util: use RTLD_LOCAL with util_dl_open()

Otherwise we risk things blowing up due to conflicting symbols. Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com> Acked-by: Rob Clark <robclark@freedesktop.org>
targets/nine: remove unused static functions
2015-11-21 12:52:21 +00:00 · 2015-11-21 12:52:21 +00:00 · 2015-11-21 12:52:21 +00:00 · 2015-11-21 12:52:21 +00:00 · 2015-11-21 12:52:21 +00:00 · 2015-11-21 12:52:20 +00:00
1284 changed files with 99981 additions and 44833 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.9
+11.1.0-devel
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -1,14 +0,0 @@
-# The commit base differs greatly between 11.0 and master
-2832ca95ecce064c7d841a3a374c2179f56161be glsl: fix stream qualifier for blocks with an instance name
-
-# Somewhat of a mixed feature/bugfix patch, causing some 200 piglit regressions
-2b676570960277d47477822ffeccc672613f9142 gallium/swrast: fix front buffer blitting. (v2)
-
-# causes regression in xwayland, kde/plasma, mpv, steam ... fdo#92759
-839793680f99b8387bee9489733d5071c10f3ace i965: Use MESA_FORMAT_B8G8R8X8_SRGB for RGB visuals
-
-# already picked as commit 94ac4b3e84737b8c5faa371834670fd25502e024
-b5b87c4ed1dfd58aec8905e0514c9ba92ba83e1d r600g: write all MRTs only if there is exactly one output (fixes a hang)
-
-# patch not applicable on branch (null check already exists)
-f7b71451231c75c36771e8b7b0d78f05e0d50f65 glx/dri3: a drawable might not be bound at wait time
--- a/bin/get-extra-pick-list.sh
+++ b/bin/get-extra-pick-list.sh
@@ -1,35 +0,0 @@
-#!/bin/sh
-
-# Script for generating a list of candidates which fix commits that have been
-# previously cherry-picked to a stable branch.
-#
-# Usage examples:
-#
-# $ bin/get-extra-pick-list.sh
-# $ bin/get-extra-pick-list.sh > picklist
-# $ bin/get-extra-pick-list.sh | tee picklist
-
-# Use the last branchpoint as our limit for the search
-# XXX: there should be a better way for this
-latest_branchpoint=`git branch | grep \* | cut -c 3-`-branchpoint
-
-# Grep for commits with "cherry picked from commit" in the commit message.
-git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
-	grep "cherry picked from commit" |\
-	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' |\
-	cut -c -8 |\
-while read sha
-do
-	# Check if the original commit is referenced in master
-	git log -n1 --pretty=oneline --grep=$sha $latest_branchpoint..origin/master |\
-		cut -c -8 |\
-	while read candidate
-	do
-		# Check if the potential fix, hasn't landed in branch yet.
-		found=`git log -n1 --pretty=oneline --reverse --grep=$candidate $latest_branchpoint..HEAD |wc -l`
-		if test $found = 0
-		then
-			echo Commit $candidate might need to be picked, as it references $sha
-		fi
-	done
-done
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -14,7 +14,7 @@ git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

 # Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*11\.0.*mesa-stable\)' HEAD..origin/master |\
+git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
--- a/configure.ac
+++ b/configure.ac
@@ -9,6 +9,7 @@ dnl Copyright © 2009-2014 Jon TURNEY
 dnl Copyright © 2011-2012 Benjamin Franzke
 dnl Copyright © 2008-2014 David Airlie
 dnl Copyright © 2009-2013 Brian Paul
+dnl Copyright © 2003-2007 Keith Packard, Daniel Stone
 dnl
 dnl Permission is hereby granted, free of charge, to any person obtaining a
 dnl copy of this software and associated documentation files (the "Software"),
@@ -73,14 +74,14 @@ LIBDRM_AMDGPU_REQUIRED=2.4.63
 LIBDRM_INTEL_REQUIRED=2.4.61
 LIBDRM_NVVIEUX_REQUIRED=2.4.33
 LIBDRM_NOUVEAU_REQUIRED=2.4.62
-LIBDRM_FREEDRENO_REQUIRED=2.4.64
+LIBDRM_FREEDRENO_REQUIRED=2.4.65
 DRI2PROTO_REQUIRED=2.6
 DRI3PROTO_REQUIRED=1.0
 PRESENTPROTO_REQUIRED=1.0
 LIBUDEV_REQUIRED=151
 GLPROTO_REQUIRED=1.4.14
 LIBOMXIL_BELLAGIO_REQUIRED=0.0
-LIBVA_REQUIRED=0.35.0
+LIBVA_REQUIRED=0.38.0
 VDPAU_REQUIRED=1.1
 WAYLAND_REQUIRED=1.2.0
 XCB_REQUIRED=1.9.3
@@ -97,7 +98,7 @@ AC_PROG_CXX
 AM_PROG_CC_C_O
 AM_PROG_AS
 AX_CHECK_GNU_MAKE
-AC_CHECK_PROGS([PYTHON2], [python2.7 python2 python])
+AC_CHECK_PROGS([PYTHON2], [python2 python])
 AC_PROG_SED
 AC_PROG_MKDIR_P

@@ -375,11 +376,10 @@ save_CFLAGS="$CFLAGS"
 CFLAGS="$SSE41_CFLAGS $CFLAGS"
 AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
 #include <smmintrin.h>
-int param;
 int main () {
-    __m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
+    __m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
    c = _mm_max_epu32(a, b);
-    return _mm_cvtsi128_si32(c);
+    return 0;
 }]])], SSE41_SUPPORTED=1)
 CFLAGS="$save_CFLAGS"
 if test "x$SSE41_SUPPORTED" = x1; then
@@ -535,15 +535,32 @@ AM_CONDITIONAL(HAVE_COMPAT_SYMLINKS, test "x$HAVE_COMPAT_SYMLINKS" = xyes)
 dnl
 dnl library names
 dnl
+dnl Unfortunately we need to do a few things that libtool can't help us with,
+dnl so we need some knowledge of shared library filenames:
+dnl
+dnl LIB_EXT is the extension used when creating symlinks for alternate
+dnl filenames for a shared library which will be dynamically loaded
+dnl
+dnl IMP_LIB_EXT is the extension used when checking for the presence of a
+dnl the file for a shared library we wish to link with
+dnl
 case "$host_os" in
 darwin* )
-    LIB_EXT='dylib' ;;
+    LIB_EXT='dylib'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 cygwin* )
-    LIB_EXT='dll' ;;
+    LIB_EXT='dll'
+    IMP_LIB_EXT='dll.a'
+    ;;
 aix* )
-    LIB_EXT='a' ;;
+    LIB_EXT='a'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 * )
-    LIB_EXT='so' ;;
+    LIB_EXT='so'
+    IMP_LIB_EXT=$LIB_EXT
+    ;;
 esac

 AC_SUBST([LIB_EXT])
@@ -849,7 +866,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
    [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
        [comma delimited Gallium drivers list, e.g.
-        "i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4"
+        "i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl"
        @<:@default=r300,r600,svga,swrast@:>@])],
    [with_gallium_drivers="$withval"],
    [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -939,8 +956,13 @@ gnu*|cygwin*)
    dri_platform='drm' ;;
 esac

+if test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes; then
+    have_drisw_kms='yes'
+fi
+
 AM_CONDITIONAL(HAVE_DRICOMMON, test "x$enable_dri" = xyes )
 AM_CONDITIONAL(HAVE_DRISW, test "x$enable_dri" = xyes )
+AM_CONDITIONAL(HAVE_DRISW_KMS, test "x$have_drisw_kms" = xyes )
 AM_CONDITIONAL(HAVE_DRI2, test "x$enable_dri" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
 AM_CONDITIONAL(HAVE_DRI3, test "x$enable_dri3" = xyes -a "x$dri_platform" = xdrm -a "x$have_libdrm" = xyes )
 AM_CONDITIONAL(HAVE_APPLEDRI, test "x$enable_dri" = xyes -a "x$dri_platform" = xapple )
@@ -975,10 +997,6 @@ if test -n "$with_gallium_drivers" -a "x$enable_glx$enable_xlib_glx" = xyesyes;
    NEED_WINSYS_XLIB="yes"
 fi

-if test "x$enable_dri" = xyes; then
-    enable_gallium_loader="$enable_shared_pipe_drivers"
-fi
-
 if test "x$enable_gallium_osmesa" = xyes; then
    if ! echo "$with_gallium_drivers" | grep -q 'swrast'; then
        AC_MSG_ERROR([gallium_osmesa requires the gallium swrast driver])
@@ -990,6 +1008,149 @@ fi

 AC_SUBST([MESA_LLVM])

+# SHA1 hashing
+AC_ARG_WITH([sha1],
+        [AS_HELP_STRING([--with-sha1=libc|libmd|libnettle|libgcrypt|libcrypto|libsha1|CommonCrypto|CryptoAPI],
+        [choose SHA1 implementation])])
+case "x$with_sha1" in
+x | xlibc | xlibmd | xlibnettle | xlibgcrypt | xlibcrypto | xlibsha1 | xCommonCrypto | xCryptoAPI)
+  ;;
+*)
+        AC_MSG_ERROR([Illegal value for --with-sha1: $with_sha1])
+esac
+
+AC_CHECK_FUNC([SHA1Init], [HAVE_SHA1_IN_LIBC=yes])
+if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_LIBC" = xyes; then
+	with_sha1=libc
+fi
+if test "x$with_sha1" = xlibc && test "x$HAVE_SHA1_IN_LIBC" != xyes; then
+	AC_MSG_ERROR([sha1 in libc requested but not found])
+fi
+if test "x$with_sha1" = xlibc; then
+	AC_DEFINE([HAVE_SHA1_IN_LIBC], [1],
+		[Use libc SHA1 functions])
+	SHA1_LIBS=""
+fi
+AC_CHECK_FUNC([CC_SHA1_Init], [HAVE_SHA1_IN_COMMONCRYPTO=yes])
+if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_COMMONCRYPTO" = xyes; then
+	with_sha1=CommonCrypto
+fi
+if test "x$with_sha1" = xCommonCrypto && test "x$HAVE_SHA1_IN_COMMONCRYPTO" != xyes; then
+	AC_MSG_ERROR([CommonCrypto requested but not found])
+fi
+if test "x$with_sha1" = xCommonCrypto; then
+	AC_DEFINE([HAVE_SHA1_IN_COMMONCRYPTO], [1],
+		[Use CommonCrypto SHA1 functions])
+	SHA1_LIBS=""
+fi
+dnl stdcall functions cannot be tested with AC_CHECK_LIB
+AC_CHECK_HEADER([wincrypt.h], [HAVE_SHA1_IN_CRYPTOAPI=yes], [], [#include <windows.h>])
+if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_CRYPTOAPI" = xyes; then
+	with_sha1=CryptoAPI
+fi
+if test "x$with_sha1" = xCryptoAPI && test "x$HAVE_SHA1_IN_CRYPTOAPI" != xyes; then
+	AC_MSG_ERROR([CryptoAPI requested but not found])
+fi
+if test "x$with_sha1" = xCryptoAPI; then
+	AC_DEFINE([HAVE_SHA1_IN_CRYPTOAPI], [1],
+		[Use CryptoAPI SHA1 functions])
+	SHA1_LIBS=""
+fi
+AC_CHECK_LIB([md], [SHA1Init], [HAVE_LIBMD=yes])
+if test "x$with_sha1" = x && test "x$HAVE_LIBMD" = xyes; then
+	with_sha1=libmd
+fi
+if test "x$with_sha1" = xlibmd && test "x$HAVE_LIBMD" != xyes; then
+	AC_MSG_ERROR([libmd requested but not found])
+fi
+if test "x$with_sha1" = xlibmd; then
+	AC_DEFINE([HAVE_SHA1_IN_LIBMD], [1],
+	          [Use libmd SHA1 functions])
+	SHA1_LIBS=-lmd
+fi
+PKG_CHECK_MODULES([LIBSHA1], [libsha1], [HAVE_LIBSHA1=yes], [HAVE_LIBSHA1=no])
+if test "x$with_sha1" = x && test "x$HAVE_LIBSHA1" = xyes; then
+   with_sha1=libsha1
+fi
+if test "x$with_sha1" = xlibsha1 && test "x$HAVE_LIBSHA1" != xyes; then
+	AC_MSG_ERROR([libsha1 requested but not found])
+fi
+if test "x$with_sha1" = xlibsha1; then
+	AC_DEFINE([HAVE_SHA1_IN_LIBSHA1], [1],
+	          [Use libsha1 for SHA1])
+	SHA1_LIBS=-lsha1
+fi
+AC_CHECK_LIB([nettle], [nettle_sha1_init], [HAVE_LIBNETTLE=yes])
+if test "x$with_sha1" = x && test "x$HAVE_LIBNETTLE" = xyes; then
+	with_sha1=libnettle
+fi
+if test "x$with_sha1" = xlibnettle && test "x$HAVE_LIBNETTLE" != xyes; then
+	AC_MSG_ERROR([libnettle requested but not found])
+fi
+if test "x$with_sha1" = xlibnettle; then
+	AC_DEFINE([HAVE_SHA1_IN_LIBNETTLE], [1],
+	          [Use libnettle SHA1 functions])
+	SHA1_LIBS=-lnettle
+fi
+AC_CHECK_LIB([gcrypt], [gcry_md_open], [HAVE_LIBGCRYPT=yes])
+if test "x$with_sha1" = x && test "x$HAVE_LIBGCRYPT" = xyes; then
+	with_sha1=libgcrypt
+fi
+if test "x$with_sha1" = xlibgcrypt && test "x$HAVE_LIBGCRYPT" != xyes; then
+	AC_MSG_ERROR([libgcrypt requested but not found])
+fi
+if test "x$with_sha1" = xlibgcrypt; then
+	AC_DEFINE([HAVE_SHA1_IN_LIBGCRYPT], [1],
+	          [Use libgcrypt SHA1 functions])
+	SHA1_LIBS=-lgcrypt
+fi
+# We don't need all of the OpenSSL libraries, just libcrypto
+AC_CHECK_LIB([crypto], [SHA1_Init], [HAVE_LIBCRYPTO=yes])
+PKG_CHECK_MODULES([OPENSSL], [openssl], [HAVE_OPENSSL_PKC=yes],
+                  [HAVE_OPENSSL_PKC=no])
+if test "x$HAVE_LIBCRYPTO" = xyes || test "x$HAVE_OPENSSL_PKC" = xyes; then
+	if test "x$with_sha1" = x; then
+		with_sha1=libcrypto
+	fi
+else
+	if test "x$with_sha1" = xlibcrypto; then
+		AC_MSG_ERROR([OpenSSL libcrypto requested but not found])
+	fi
+fi
+if test "x$with_sha1" = xlibcrypto; then
+	if test "x$HAVE_LIBCRYPTO" = xyes; then
+		SHA1_LIBS=-lcrypto
+	else
+		SHA1_LIBS="$OPENSSL_LIBS"
+		SHA1_CFLAGS="$OPENSSL_CFLAGS"
+	fi
+fi
+AC_MSG_CHECKING([for SHA1 implementation])
+AC_MSG_RESULT([$with_sha1])
+AC_SUBST(SHA1_LIBS)
+AC_SUBST(SHA1_CFLAGS)
+
+# Enable a define for SHA1
+if test "x$with_sha1" != "x"; then
+	DEFINES="$DEFINES -DHAVE_SHA1"
+fi
+
+# Allow user to configure out the shader-cache feature
+AC_ARG_ENABLE([shader-cache],
+    AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
+    [enable_shader_cache="$enableval"],
+    [if test "x$with_sha1" != "x"; then
+        enable_shader_cache=yes
+     else
+        enable_shader_cache=no
+     fi])
+if test "x$with_sha1" = "x"; then
+    if test "x$enable_shader_cache" = "xyes"; then
+        AC_MSG_ERROR([Cannot enable shader cache (no SHA-1 implementation found)])
+    fi
+fi
+AM_CONDITIONAL([ENABLE_SHADER_CACHE], [test x$enable_shader_cache = xyes])
+
 case "$host_os" in
 linux*)
    need_pci_id=yes ;;
@@ -1066,7 +1227,8 @@ xyesno)

            if test x"$enable_dri3" = xyes; then
               PKG_CHECK_EXISTS([xcb >= $XCB_REQUIRED], [], AC_MSG_ERROR([DRI3 requires xcb >= $XCB_REQUIRED]))
-               dri_modules="$dri_modules xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
+               dri3_modules="xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
+               PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
            fi
        fi
        if test x"$dri_platform" = xapple ; then
@@ -1407,6 +1569,12 @@ if test "x$enable_egl" = xyes; then
            if test "x$enable_shared_glapi" = xno; then
                AC_MSG_ERROR([egl_dri2 requires --enable-shared-glapi])
            fi
+            if test "x$enable_dri3" = xyes; then
+                HAVE_EGL_DRIVER_DRI3=1
+                if test "x$enable_shared_glapi" = xno; then
+                    AC_MSG_ERROR([egl_dri3 requires --enable-shared-glapi])
+                fi
+            fi
        else
            # Avoid building an "empty" libEGL. Drop/update this
            # when other backends (haiku?) come along.
@@ -1418,6 +1586,8 @@ fi
 AM_CONDITIONAL(HAVE_EGL, test "x$enable_egl" = xyes)
 AC_SUBST([EGL_LIB_DEPS])

+gallium_st="mesa"
+
 dnl
 dnl XA configuration
 dnl
@@ -1430,7 +1600,7 @@ if test "x$enable_xa" = xyes; then
          enabling XA.
          Example: ./configure --enable-xa --with-gallium-drivers=svga...])
    fi
-    enable_gallium_loader=$enable_shared_pipe_drivers
+    gallium_st="$gallium_st xa"
 fi
 AM_CONDITIONAL(HAVE_ST_XA, test "x$enable_xa" = xyes)

@@ -1475,25 +1645,25 @@ AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)

 if test "x$enable_xvmc" = xyes; then
    PKG_CHECK_MODULES([XVMC], [xvmc >= $XVMC_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
+    gallium_st="$gallium_st xvmc"
 fi
 AM_CONDITIONAL(HAVE_ST_XVMC, test "x$enable_xvmc" = xyes)

 if test "x$enable_vdpau" = xyes; then
    PKG_CHECK_MODULES([VDPAU], [vdpau >= $VDPAU_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
+    gallium_st="$gallium_st vdpau"
 fi
 AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes)

 if test "x$enable_omx" = xyes; then
    PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
+    gallium_st="$gallium_st omx"
 fi
 AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes)

 if test "x$enable_va" = xyes; then
    PKG_CHECK_MODULES([VA], [libva >= $LIBVA_REQUIRED])
-    enable_gallium_loader=$enable_shared_pipe_drivers
+    gallium_st="$gallium_st va"
 fi
 AM_CONDITIONAL(HAVE_ST_VA, test "x$enable_va" = xyes)

@@ -1515,7 +1685,7 @@ if test "x$enable_nine" = xyes; then
        AC_MSG_WARN([using nine together with wine requires DRI3 enabled system])
    fi

-    enable_gallium_loader=$enable_shared_pipe_drivers
+    gallium_st="$gallium_st nine"
 fi
 AM_CONDITIONAL(HAVE_ST_NINE, test "x$enable_nine" = xyes)

@@ -1530,15 +1700,7 @@ AC_ARG_WITH([clang-libdir],
   [CLANG_LIBDIR=''])

 PKG_CHECK_EXISTS([libclc], [have_libclc=yes], [have_libclc=no])
-PKG_CHECK_MODULES([LIBELF], [libelf], [have_libelf=yes], [have_libelf=no])
-
-if test "x$have_libelf" = xno; then
-   LIBELF_LIBS=''
-   LIBELF_CFLAGS=''
-   AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;LIBELF_LIBS=-lelf], [have_libelf=no])
-   AC_SUBST([LIBELF_LIBS])
-   AC_SUBST([LIBELF_CFLAGS])
-fi
+AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;ELF_LIB=-lelf])

 if test "x$enable_opencl" = xyes; then
    if test -z "$with_gallium_drivers"; then
@@ -1561,8 +1723,7 @@ if test "x$enable_opencl" = xyes; then
        AC_SUBST([LIBCLC_LIBEXECDIR])
    fi

-    # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers
-    enable_gallium_loader=yes
+    gallium_st="$gallium_st clover"

    if test "x$enable_opencl_icd" = xyes; then
        OPENCL_LIBNAME="MesaOpenCL"
@@ -1842,10 +2003,6 @@ AC_SUBST([XVMC_LIB_INSTALL_DIR])
 dnl
 dnl Gallium Tests
 dnl
-if test "x$enable_gallium_tests" = xyes; then
-    # XXX: Use $enable_shared_pipe_drivers once converted to use static/shared pipe-drivers
-    enable_gallium_loader=yes
-fi
 AM_CONDITIONAL(HAVE_GALLIUM_TESTS, test "x$enable_gallium_tests" = xyes)

 dnl Directory for VDPAU libs
@@ -1900,14 +2057,8 @@ gallium_require_llvm() {
 }

 gallium_require_drm_loader() {
-    if test "x$enable_gallium_loader" = xyes; then
-        if test "x$need_pci_id$have_pci_id" = xyesno; then
-            AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED or sysfs])
-        fi
-        enable_gallium_drm_loader=yes
-    fi
-    if test "x$enable_va" = xyes && test "x$7" != x; then
-         GALLIUM_TARGET_DIRS="$GALLIUM_TARGET_DIRS $7"
+    if test "x$need_pci_id$have_pci_id" = xyesno; then
+        AC_MSG_ERROR([Gallium drm loader requires libudev >= $LIBUDEV_REQUIRED or sysfs])
    fi
 }

@@ -1933,7 +2084,7 @@ radeon_llvm_check() {
    if test "x$enable_gallium_llvm" != "xyes"; then
        AC_MSG_ERROR([--enable-gallium-llvm is required when building $1])
    fi
-    llvm_check_version_for "3" "4" "2" $1 
+    llvm_check_version_for "3" "5" "0" $1
    if test true && $LLVM_CONFIG --targets-built | grep -iqvw $amdgpu_llvm_target_name ; then
        AC_MSG_ERROR([LLVM $amdgpu_llvm_target_name not enabled in your LLVM build.])
    fi
@@ -2021,11 +2172,14 @@ if test -n "$with_gallium_drivers"; then
            gallium_require_drm "vc4"
            gallium_require_drm_loader

-            case "$host_cpu" in
-                i?86 | x86_64 | amd64)
-                USE_VC4_SIMULATOR=yes
-                ;;
-            esac
+            PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
+                              [USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
+            ;;
+        xvirgl)
+            HAVE_GALLIUM_VIRGL=yes
+            gallium_require_drm "virgl"
+            gallium_require_drm_loader
+            require_egl_drm "virgl"
            ;;
        *)
            AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@ -2045,10 +2199,14 @@ if test "x$MESA_LLVM" != x0; then

    LLVM_LIBS="`$LLVM_CONFIG --libs ${LLVM_COMPONENTS}`"

+    dnl llvm-config may not give the right answer when llvm is a built as a
+    dnl single shared library, so we must work the library name out for
+    dnl ourselves.
+    dnl (See https://llvm.org/bugs/show_bug.cgi?id=6823)
    if test "x$enable_llvm_shared_libs" = xyes; then
        dnl We can't use $LLVM_VERSION because it has 'svn' stripped out,
        LLVM_SO_NAME=LLVM-`$LLVM_CONFIG --version`
-        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.so"], [llvm_have_one_so=yes])
+        AS_IF([test -f "$LLVM_LIBDIR/lib$LLVM_SO_NAME.$IMP_LIB_EXT"], [llvm_have_one_so=yes])

        if test "x$llvm_have_one_so" = xyes; then
            dnl LLVM was built using auto*, so there is only one shared object.
@@ -2056,7 +2214,7 @@ if test "x$MESA_LLVM" != x0; then
        else
            dnl If LLVM was built with CMake, there will be one shared object per
            dnl component.
-            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.so"],
+            AS_IF([test ! -f "$LLVM_LIBDIR/libLLVMTarget.$IMP_LIB_EXT"],
                    [AC_MSG_ERROR([Could not find llvm shared libraries:
 	Please make sure you have built llvm with the --enable-shared option
 	and that your llvm libraries are installed in $LLVM_LIBDIR
@@ -2094,26 +2252,19 @@ AM_CONDITIONAL(HAVE_GALLIUM_FREEDRENO, test "x$HAVE_GALLIUM_FREEDRENO" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_SOFTPIPE, test "x$HAVE_GALLIUM_SOFTPIPE" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_LLVMPIPE, test "x$HAVE_GALLIUM_LLVMPIPE" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_VC4, test "x$HAVE_GALLIUM_VC4" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_VIRGL, test "x$HAVE_GALLIUM_VIRGL" = xyes)

 AM_CONDITIONAL(HAVE_GALLIUM_STATIC_TARGETS, test "x$enable_shared_pipe_drivers" = xno)

-# NOTE: anything using xcb or other client side libs ends up in separate
-#       _CLIENT variables.  The pipe loader is built in two variants,
-#       one that is standalone and does not link any x client libs (for
-#       use by XA tracker in particular, but could be used in any case
-#       where communication with xserver is not desired).
-if test "x$enable_gallium_loader" = xyes; then
-    if test "x$enable_dri" = xyes; then
-        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"
-    fi
-
-    if test "x$enable_gallium_drm_loader" = xyes; then
-        GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRM"
-    fi
-
-    AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
+if test "x$enable_dri" = xyes; then
+    GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_DRI"
 fi

+if test "x$have_drisw_kms" = xyes; then
+    GALLIUM_PIPE_LOADER_DEFINES="$GALLIUM_PIPE_LOADER_DEFINES -DHAVE_PIPE_LOADER_KMS"
+fi
+AC_SUBST([GALLIUM_PIPE_LOADER_DEFINES])
+
 AM_CONDITIONAL(HAVE_I915_DRI, test x$HAVE_I915_DRI = xyes)
 AM_CONDITIONAL(HAVE_I965_DRI, test x$HAVE_I965_DRI = xyes)
 AM_CONDITIONAL(HAVE_NOUVEAU_DRI, test x$HAVE_NOUVEAU_DRI = xyes)
@@ -2127,8 +2278,6 @@ AM_CONDITIONAL(NEED_RADEON_DRM_WINSYS, test "x$HAVE_GALLIUM_R300" = xyes -o \
 AM_CONDITIONAL(NEED_WINSYS_XLIB, test "x$NEED_WINSYS_XLIB" = xyes)
 AM_CONDITIONAL(NEED_RADEON_LLVM, test x$NEED_RADEON_LLVM = xyes)
 AM_CONDITIONAL(USE_R600_LLVM_COMPILER, test x$USE_R600_LLVM_COMPILER = xyes)
-AM_CONDITIONAL(HAVE_LOADER_GALLIUM, test x$enable_gallium_loader = xyes)
-AM_CONDITIONAL(HAVE_DRM_LOADER_GALLIUM, test x$enable_gallium_drm_loader = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_COMPUTE, test x$enable_opencl = xyes)
 AM_CONDITIONAL(HAVE_MESA_LLVM, test x$MESA_LLVM = x1)
 AM_CONDITIONAL(USE_VC4_SIMULATOR, test x$USE_VC4_SIMULATOR = xyes)
@@ -2136,6 +2285,8 @@ if test "x$USE_VC4_SIMULATOR" = xyes -a "x$HAVE_GALLIUM_ILO" = xyes; then
    AC_MSG_ERROR([VC4 simulator on x86 replaces i965 driver build, so ilo must be disabled.])
 fi

+AC_SUBST([ELF_LIB])
+
 AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
 AM_CONDITIONAL(HAVE_X11_DRIVER, test "x$enable_xlib_glx" = xyes)
 AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
@@ -2197,6 +2348,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/auxiliary/Makefile
 		src/gallium/auxiliary/pipe-loader/Makefile
 		src/gallium/drivers/freedreno/Makefile
+		src/gallium/drivers/ddebug/Makefile
 		src/gallium/drivers/i915/Makefile
 		src/gallium/drivers/ilo/Makefile
 		src/gallium/drivers/llvmpipe/Makefile
@@ -2211,6 +2363,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/drivers/svga/Makefile
 		src/gallium/drivers/trace/Makefile
 		src/gallium/drivers/vc4/Makefile
+		src/gallium/drivers/virgl/Makefile
 		src/gallium/state_trackers/clover/Makefile
 		src/gallium/state_trackers/dri/Makefile
 		src/gallium/state_trackers/glx/xlib/Makefile
@@ -2251,6 +2404,8 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/winsys/sw/wrapper/Makefile
 		src/gallium/winsys/sw/xlib/Makefile
 		src/gallium/winsys/vc4/drm/Makefile
+		src/gallium/winsys/virgl/drm/Makefile
+		src/gallium/winsys/virgl/vtest/Makefile
 		src/gbm/Makefile
 		src/gbm/main/gbm.pc
 		src/glsl/Makefile
@@ -2344,6 +2499,9 @@ if test "$enable_egl" = yes; then
    if test "x$HAVE_EGL_DRIVER_DRI2" != "x"; then
        egl_drivers="$egl_drivers builtin:egl_dri2"
    fi
+    if test "x$HAVE_EGL_DRIVER_DRI3" != "x"; then
+        egl_drivers="$egl_drivers builtin:egl_dri3"
+    fi

    echo "        EGL drivers:    $egl_drivers"
 fi
@@ -2359,11 +2517,18 @@ fi

 echo ""
 if test -n "$with_gallium_drivers"; then
-    echo "        Gallium:         yes"
+    echo "        Gallium drivers: $gallium_drivers"
+    echo "        Gallium st:      $gallium_st"
 else
    echo "        Gallium:         no"
 fi

+dnl Shader cache
+echo ""
+echo "        Shader cache:    $enable_shader_cache"
+if test "x$enable_shader_cache" = "xyes"; then
+    echo "        With SHA1 from:  $with_sha1"
+fi

 dnl Libraries
 echo ""
--- a/docs/GL3.txt
+++ b/docs/GL3.txt
@@ -96,27 +96,27 @@ GL 4.0, GLSL 4.00 --- all DONE: nvc0, radeonsi

  GL_ARB_draw_buffers_blend                            DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_draw_indirect                                 DONE (i965, r600, llvmpipe, softpipe)
-  GL_ARB_gpu_shader5                                   DONE (i965)
+  GL_ARB_gpu_shader5                                   DONE (i965, r600)
  - 'precise' qualifier                                DONE
-  - Dynamically uniform sampler array indices          DONE (r600, softpipe)
-  - Dynamically uniform UBO array indices              DONE (r600)
+  - Dynamically uniform sampler array indices          DONE (softpipe)
+  - Dynamically uniform UBO array indices              DONE ()
  - Implicit signed -> unsigned conversions            DONE
  - Fused multiply-add                                 DONE ()
-  - Packing/bitfield/conversion functions              DONE (r600, softpipe)
-  - Enhanced textureGather                             DONE (r600, softpipe)
-  - Geometry shader instancing                         DONE (r600, llvmpipe, softpipe)
+  - Packing/bitfield/conversion functions              DONE (softpipe)
+  - Enhanced textureGather                             DONE (softpipe)
+  - Geometry shader instancing                         DONE (llvmpipe, softpipe)
  - Geometry shader multiple streams                   DONE ()
-  - Enhanced per-sample shading                        DONE (r600)
-  - Interpolation functions                            DONE (r600)
+  - Enhanced per-sample shading                        DONE ()
+  - Interpolation functions                            DONE ()
  - New overload resolution rules                      DONE
-  GL_ARB_gpu_shader_fp64                               DONE (llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                               DONE (r600, llvmpipe, softpipe)
  GL_ARB_sample_shading                                DONE (i965, nv50, r600)
  GL_ARB_shader_subroutine                             DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_tessellation_shader                           DONE ()
  GL_ARB_texture_buffer_object_rgb32                   DONE (i965, r600, llvmpipe, softpipe)
  GL_ARB_texture_cube_map_array                        DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_texture_gather                                DONE (i965, nv50, r600, llvmpipe, softpipe)
-  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600)
+  GL_ARB_texture_query_lod                             DONE (i965, nv50, r600, softpipe)
  GL_ARB_transform_feedback2                           DONE (i965, nv50, r600, llvmpipe, softpipe)
  GL_ARB_transform_feedback3                           DONE (i965, nv50, r600, llvmpipe, softpipe)

@@ -127,7 +127,7 @@ GL 4.1, GLSL 4.10 --- all DONE: nvc0, radeonsi
  GL_ARB_get_program_binary                            DONE (0 binary formats)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_shader_precision                              DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                           DONE (llvmpipe, softpipe)
+  GL_ARB_vertex_attrib_64bit                           DONE (r600, llvmpipe, softpipe)
  GL_ARB_viewport_array                                DONE (i965, nv50, r600, llvmpipe)


@@ -149,14 +149,14 @@ GL 4.2, GLSL 4.20:

 GL 4.3, GLSL 4.30:

-  GL_ARB_arrays_of_arrays                              started (Timothy)
+  GL_ARB_arrays_of_arrays                              DONE (i965)
  GL_ARB_ES3_compatibility                             DONE (all drivers that support GLSL 3.30)
  GL_ARB_clear_buffer_object                           DONE (all drivers)
  GL_ARB_compute_shader                                in progress (jljusten)
-  GL_ARB_copy_image                                    DONE (i965) (gallium - in progress, VMware)
+  GL_ARB_copy_image                                    DONE (i965, nv50, nvc0, radeonsi)
  GL_KHR_debug                                         DONE (all drivers)
  GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
-  GL_ARB_fragment_layer_viewport                       DONE (nv50, nvc0, r600, radeonsi, llvmpipe)
+  GL_ARB_fragment_layer_viewport                       DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe)
  GL_ARB_framebuffer_no_attachments                    DONE (i965)
  GL_ARB_internalformat_query2                         not started
  GL_ARB_invalidate_subdata                            DONE (all drivers)
@@ -164,12 +164,12 @@ GL 4.3, GLSL 4.30:
  GL_ARB_program_interface_query                       DONE (all drivers)
  GL_ARB_robust_buffer_access_behavior                 not started
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_texture_buffer_range                          DONE (nv50, nvc0, i965, r600, radeonsi, llvmpipe)
  GL_ARB_texture_query_levels                          DONE (all drivers that support GLSL 1.30)
  GL_ARB_texture_storage_multisample                   DONE (all drivers that support GL_ARB_texture_multisample)
-  GL_ARB_texture_view                                  DONE (i965, nv50, nvc0, llvmpipe, softpipe)
+  GL_ARB_texture_view                                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_vertex_attrib_binding                         DONE (all drivers)


@@ -177,8 +177,14 @@ GL 4.4, GLSL 4.40:

  GL_MAX_VERTEX_ATTRIB_STRIDE                          DONE (all drivers)
  GL_ARB_buffer_storage                                DONE (i965, nv50, nvc0, r600, radeonsi)
-  GL_ARB_clear_texture                                 DONE (i965) (gallium - in progress, VMware)
-  GL_ARB_enhanced_layouts                              not started
+  GL_ARB_clear_texture                                 DONE (i965, nv50, nvc0)
+  GL_ARB_enhanced_layouts                              in progress (Timothy)
+  - compile-time constant expressions                  DONE
+  - explicit byte offsets for blocks                   in progress
+  - forced alignment within blocks                     in progress
+  - specified vec4-slot component numbers              in progress
+  - specified transform/feedback layout                in progress
+  - input/output block locations                       in progress
  GL_ARB_multi_bind                                    DONE (all drivers)
  GL_ARB_query_buffer_object                           not started
  GL_ARB_texture_mirror_clamp_to_edge                  DONE (i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -194,16 +200,16 @@ GL 4.5, GLSL 4.50:
  GL_ARB_derivative_control                            DONE (i965, nv50, nvc0, r600, radeonsi)
  GL_ARB_direct_state_access                           DONE (all drivers)
  GL_ARB_get_texture_sub_image                         DONE (all drivers)
-  GL_ARB_shader_texture_image_samples                  not started
-  GL_ARB_texture_barrier                               DONE (nv50, nvc0, r600, radeonsi)
-  GL_KHR_context_flush_control                         DONE (all - but needs GLX/EXT extension to be useful)
+  GL_ARB_shader_texture_image_samples                  DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_ARB_texture_barrier                               DONE (i965, nv50, nvc0, r600, radeonsi)
+  GL_KHR_context_flush_control                         DONE (all - but needs GLX/EGL extension to be useful)
  GL_KHR_robust_buffer_access_behavior                 not started
  GL_KHR_robustness                                    90% done (the ARB variant)
  GL_EXT_shader_integer_mix                            DONE (all drivers that support GLSL)

 These are the extensions cherry-picked to make GLES 3.1
 GLES3.1, GLSL ES 3.1
-  GL_ARB_arrays_of_arrays                              started (Timothy)
+  GL_ARB_arrays_of_arrays                              DONE (i965)
  GL_ARB_compute_shader                                in progress (jljusten)
  GL_ARB_draw_indirect                                 DONE (i965, nvc0, r600, radeonsi, llvmpipe, softpipe)
  GL_ARB_explicit_uniform_location                     DONE (all drivers that support GLSL)
@@ -212,7 +218,7 @@ GLES3.1, GLSL ES 3.1
  GL_ARB_shader_atomic_counters                        DONE (i965)
  GL_ARB_shader_image_load_store                       DONE (i965)
  GL_ARB_shader_image_size                             DONE (i965)
-  GL_ARB_shader_storage_buffer_object                  in progress (Iago Toral, Samuel Iglesias)
+  GL_ARB_shader_storage_buffer_object                  DONE (i965)
  GL_ARB_shading_language_packing                      DONE (all drivers)
  GL_ARB_separate_shader_objects                       DONE (all drivers)
  GL_ARB_stencil_texturing                             DONE (i965/gen8+, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe)
@@ -223,10 +229,35 @@ GLES3.1, GLSL ES 3.1
  GS5 Packing/bitfield/conversion functions            DONE (i965, nvc0, r600, radeonsi)
  GL_EXT_shader_integer_mix                            DONE (all drivers that support GLSL)

-  Additional functions not covered above:
-      glMemoryBarrierByRegion
-      glGetTexLevelParameter[fi]v - needs updates to restrict to GLES enums
-      glGetBooleani_v - needs updates to restrict to GLES enums
+  Additional functionality not covered above:
+      glMemoryBarrierByRegion                          DONE
+      glGetTexLevelParameter[fi]v - needs updates      DONE
+      glGetBooleani_v - restrict to GLES enums
+      gl_HelperInvocation support
+
+GLES3.2, GLSL ES 3.2
+  GL_EXT_color_buffer_float                            DONE (all drivers)
+  GL_KHR_blend_equation_advanced                       not started
+  GL_KHR_debug                                         DONE (all drivers)
+  GL_KHR_robustness                                    90% done (the ARB variant)
+  GL_KHR_texture_compression_astc_ldr                  DONE (i965/gen9+)
+  GL_OES_copy_image                                    not started (based on GL_ARB_copy_image, which is done for some drivers)
+  GL_OES_draw_buffers_indexed                          not started
+  GL_OES_draw_elements_base_vertex                     DONE (all drivers)
+  GL_OES_geometry_shader                               not started (based on GL_ARB_geometry_shader4, which is done for all drivers)
+  GL_OES_gpu_shader5                                   not started (based on parts of GL_ARB_gpu_shader5, which is done for some drivers)
+  GL_OES_primitive_bounding box                        not started
+  GL_OES_sample_shading                                not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_sample_variables                              not started (based on parts of GL_ARB_sample_shading, which is done for some drivers)
+  GL_OES_shader_image_atomic                           not started (based on parts of GL_ARB_shader_image_load_store, which is done for some drivers)
+  GL_OES_shader_io_blocks                              not started (based on parts of GLSL 1.50, which is done)
+  GL_OES_shader_multisample_interpolation              not started (based on parts of GL_ARB_gpu_shader5, which is done)
+  GL_OES_tessellation_shader                           not started (based on GL_ARB_tessellation_shader, which is done for some drivers)
+  GL_OES_texture_border_clamp                          not started (based on GL_ARB_texture_border_clamp, which is done)
+  GL_OES_texture_buffer                                not started (based on GL_ARB_texture_buffer_object, GL_ARB_texture_buffer_range, and GL_ARB_texture_buffer_object_rgb32 that are all done)
+  GL_OES_texture_cube_map_array                        not started (based on GL_ARB_texture_cube_map_array, which is done for all drivers)
+  GL_OES_texture_stencil8                              not started (based on GL_ARB_texture_stencil8, which is done for some drivers)
+  GL_OES_texture_storage_multisample_2d_array          DONE (all drivers that support GL_ARB_texture_multisample)

 More info about these features and the work involved can be found at
 http://dri.freedesktop.org/wiki/MissingFunctionality
--- a/docs/README.UVD
+++ b/docs/README.UVD
@@ -2,8 +2,8 @@ The software may implement third party technologies (e.g. third party
 libraries) that are not licensed to you by AMD and for which you may need
 to obtain licenses from other parties.  Unless explicitly stated otherwise,
 these third party technologies are not licensed hereunder.  Such third
-party technologies include, but are not limited, to H.264, MPEG-2, MPEG-4,
-AVC, and VC-1.
+party technologies include, but are not limited, to H.264, H.265, HEVC, MPEG-2,
+MPEG-4, AVC, and VC-1.

 For MPEG-2 Encoding Products ANY USE OF THIS PRODUCT IN ANY MANNER OTHER
 THAN PERSONAL USE THAT COMPLIES WITH THE MPEG-2 STANDARD FOR ENCODING VIDEO
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -87,6 +87,13 @@ created in a <code>lib64</code> directory at the top of the Mesa source
 tree.</p>
 </dd>

+<dt><code>--sysconfdir=DIR</code></dt>
+<dd><p>This option specifies the directory where the configuration
+files will be installed. The default is <code>${prefix}/etc</code>.
+Currently there's only one config file provided when dri drivers are
+enabled - it's <code>drirc</code>.</p>
+</dd>
+
 <dt><code>--enable-static, --disable-shared</code></dt>
 <dd><p>By default, Mesa
 will build shared libraries. Either of these options will force static
@@ -217,7 +224,7 @@ GLX.
 <dt><code>--with-expat=DIR</code>
 <dd><p><strong>DEPRECATED</strong>, use <code>PKG_CONFIG_PATH</code> instead.</p>
 <p>The DRI-enabled libGL uses expat to
-parse the DRI configuration files in <code>/etc/drirc</code> and
+parse the DRI configuration files in <code>${sysconfdir}/drirc</code> and
 <code>~/.drirc</code>. This option allows a specific expat installation
 to be used. For example, <code>--with-expat=/usr/local</code> will
 search for expat headers and libraries in <code>/usr/local/include</code>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -153,6 +153,7 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
   <li>no16 - suppress generation of 16-wide fragment shaders. useful for debugging broken shaders</li>
   <li>blorp - emit messages about the blorp operations (blits &amp; clears)</li>
   <li>nodualobj - suppress generation of dual-object geometry shader code</li>
+   <li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
 </ul>
 </ul>

@@ -178,6 +179,14 @@ Mesa EGL supports different sets of environment variables.  See the
 <li>GALLIUM_HUD - draws various information on the screen, like framerate,
    cpu load, driver statistics, performance counters, etc.
    Set GALLIUM_HUD=help and run e.g. glxgears for more info.
+<li>GALLIUM_HUD_PERIOD - sets the hud update rate in seconds (float). Use zero
+    to update every frame. The default period is 1/2 second.
+<li>GALLIUM_HUD_VISIBLE - control default visibility, defaults to true.
+<li>GALLIUM_HUD_TOGGLE_SIGNAL - toggle visibility via user specified signal.
+    Especially useful to toggle hud at specific points of application and
+    disable for unencumbered viewing the rest of the time. For example, set
+    GALLIUM_HUD_VISIBLE to false and GALLIUM_HUD_SIGNAL_TOGGLE to 10 (SIGUSR1).
+    Use kill -10 <pid> to toggle the hud as desired.
 <li>GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc.
    rather than stderr.
 <li>GALLIUM_PRINT_OPTIONS - if non-zero, print all the Gallium environment
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,25 +16,96 @@

 <h1>News</h1>

-<h2>August 22 2015</h2>
+<h2>November 21, 2015</h2>
+<p>
+<a href="relnotes/11.0.6.html">Mesa 11.0.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 11, 2015</h2>
+<p>
+<a href="relnotes/11.0.5.html">Mesa 11.0.5</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>October 24, 2015</h2>
+<p>
+<a href="relnotes/11.0.4.html">Mesa 11.0.4</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>October 10, 2015</h2>
+<p>
+<a href="relnotes/11.0.3.html">Mesa 11.0.3</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>October 3, 2015</h2>
+<p>
+<a href="relnotes/10.6.9.html">Mesa 10.6.9</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 10.6.9 will be the final release in the 10.6
+series. Users of 10.6 are encouraged to migrate to the 11.0 series in order
+to obtain future fixes.
+</p>
+
+<h2>September 28, 2015</h2>
+<p>
+<a href="relnotes/11.0.2.html">Mesa 11.0.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 26, 2015</h2>
+<p>
+<a href="relnotes/11.0.1.html">Mesa 11.0.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 20, 2015</h2>
+<p>
+<a href="relnotes/10.6.8.html">Mesa 10.6.8</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 12, 2015</h2>
+<p>
+<a href="relnotes/11.0.0.html">Mesa 11.0.0</a> is released.  This is a new
+development release.  See the release notes for more information about
+the release.
+</p>
+
+<h2>September 10, 2015</h2>
+<p>
+<a href="relnotes/10.6.7.html">Mesa 10.6.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 4, 2015</h2>
+<p>
+<a href="relnotes/10.6.6.html">Mesa 10.6.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>August 22, 2015</h2>
 <p>
 <a href="relnotes/10.6.5.html">Mesa 10.6.5</a> is released.
 This is a bug-fix release.
 </p>

-<h2>August 11 2015</h2>
+<h2>August 11, 2015</h2>
 <p>
 <a href="relnotes/10.6.4.html">Mesa 10.6.4</a> is released.
 This is a bug-fix release.
 </p>

-<h2>July 26 2015</h2>
+<h2>July 26, 2015</h2>
 <p>
 <a href="relnotes/10.6.3.html">Mesa 10.6.3</a> is released.
 This is a bug-fix release.
 </p>

-<h2>July 11 2015</h2>
+<h2>July 11, 2015</h2>
 <p>
 <a href="relnotes/10.6.2.html">Mesa 10.6.2</a> is released.
 This is a bug-fix release.
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,17 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/11.0.6.html">11.0.6 release notes</a>
+<li><a href="relnotes/11.0.5.html">11.0.5 release notes</a>
+<li><a href="relnotes/11.0.4.html">11.0.4 release notes</a>
+<li><a href="relnotes/11.0.3.html">11.0.3 release notes</a>
+<li><a href="relnotes/10.6.9.html">10.6.9 release notes</a>
+<li><a href="relnotes/11.0.2.html">11.0.2 release notes</a>
+<li><a href="relnotes/11.0.1.html">11.0.1 release notes</a>
+<li><a href="relnotes/10.6.8.html">10.6.8 release notes</a>
+<li><a href="relnotes/11.0.0.html">11.0.0 release notes</a>
+<li><a href="relnotes/10.6.7.html">10.6.7 release notes</a>
+<li><a href="relnotes/10.6.6.html">10.6.6 release notes</a>
 <li><a href="relnotes/10.6.5.html">10.6.5 release notes</a>
 <li><a href="relnotes/10.6.4.html">10.6.4 release notes</a>
 <li><a href="relnotes/10.6.3.html">10.6.3 release notes</a>
--- a/docs/relnotes/10.6.6.html
+++ b/docs/relnotes/10.6.6.html
@@ -0,0 +1,164 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.6 Release Notes / September 04, 2015</h1>
+
+<p>
+Mesa 10.6.6 is a bug fix release which fixes bugs found since the 10.6.5 release.
+</p>
+<p>
+Mesa 10.6.6 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+416517aa9df4791f97d34451a9e4da33c966afcd18c115c5769b92b15b018ef5  mesa-10.6.6.tar.gz
+570f2154b7340ff5db61ff103bc6e85165b8958798b78a50fa2df488e98e5778  mesa-10.6.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is &gt; 32k</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: &quot;Found invalid pixel values&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - &quot;high fidelity&quot;: Segfault in _mesa_program_resource_find_name</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chris Wilson (2):</p>
+<ul>
+  <li>i965: Prevent coordinate overflow in intel_emit_linear_blit</li>
+  <li>i965: Always re-emit the pipeline select during invariant state emission</li>
+</ul>
+
+<p>Daniel Scharrer (1):</p>
+<ul>
+  <li>mesa: add missing queries for ARB_direct_state_access</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>mesa/arb_gpu_shader_fp64: add support for glGetUniformdv</li>
+  <li>mesa/texgetimage: fix missing stencil check</li>
+  <li>st/readpixels: fix accel path for skipimages.</li>
+  <li>texcompress_s3tc/fxt1: fix stride checks (v1.1)</li>
+  <li>mesa/readpixels: check strides are equal before skipping conversion</li>
+  <li>mesa: enable texture stencil8 for multisample</li>
+  <li>r600/sb: update last_cf for finalize if.</li>
+  <li>r600g: fix calculation for gpr allocation</li>
+</ul>
+
+<p>David Heidelberg (1):</p>
+<ul>
+  <li>st/nine: Require gcc &gt;= 4.6</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.5</li>
+  <li>get-pick-list.sh: Require explicit "10.6" for nominating stable patches</li>
+</ul>
+
+<p>Glenn Kennard (4):</p>
+<ul>
+  <li>r600g: Fix assert in tgsi_cmp</li>
+  <li>r600g/sb: Handle undef in read port tracker</li>
+  <li>r600g/sb: Don't read junk after EOP</li>
+  <li>r600g/sb: Don't crash on empty if jump target</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: fix assignments with 4-operand arguments (i.e. BFI)</li>
+  <li>st/mesa: pass through 4th opcode argument in bitmap/pixel visitors</li>
+  <li>nv50,nvc0: disable depth bounds test on blit</li>
+  <li>nv50: fix 2d engine blits for 64- and 128-bit formats</li>
+  <li>mesa: only copy the requested teximage faces</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/fs: Split VGRFs after lowering pull constants</li>
+</ul>
+
+<p>Kenneth Graunke (3):</p>
+<ul>
+  <li>i965: Fix copy propagation type changes.</li>
+  <li>Revert "i965: Advertise a line width of 40.0 on Cherryview and Skylake."</li>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>gallium/radeon: fix the ADDRESS_HI mask for EVENT_WRITE CIK packets</li>
+  <li>mesa: create multisample fallback textures like normal textures</li>
+  <li>radeonsi: fix a Unigine Heaven hang when drirc is missing</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>i965/fs: Handle MRF destinations in lower_integer_multiplication().</li>
+</ul>
+
+<p>Neil Roberts (2):</p>
+<ul>
+  <li>i965: Swap the order of the vertex ID and edge flag attributes</li>
+  <li>i965/bdw: Fix 3DSTATE_VF_INSTANCING when the edge flag is used</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>mesa: update fbo state in glTexStorage</li>
+  <li>glsl: build stageref mask using IR, not symbol table</li>
+  <li>glsl: expose build_program_resource_list function</li>
+  <li>glsl: create program resource list after LinkShader</li>
+  <li>mesa: add GL_RED, GL_RG support for floating point textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.7.html
+++ b/docs/relnotes/10.6.7.html
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.7 Release Notes / September 10, 2015</h1>
+
+<p>
+Mesa 10.6.7 is a bug fix release which fixes bugs found since the 10.6.6 release.
+</p>
+<p>
+Mesa 10.6.7 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4ba10c59abee30d72476543a57afd2f33803dabf4620dc333b335d47966ff842  mesa-10.6.7.tar.gz
+feb1f640b915dada88a7c793dfaff0ae23580f8903f87a6b76469253de0d28d8  mesa-10.6.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>mesa/teximage: use correct extension for accept stencil texture.</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.6</li>
+  <li>Revert "i965: Momentarily pretend to support ARB_texture_stencil8 for blits."</li>
+  <li>Update version to 10.6.7</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>glsl: Handle attribute aliasing in attribute storage limit check.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.8.html
+++ b/docs/relnotes/10.6.8.html
@@ -0,0 +1,136 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.8 Release Notes / September 20, 2015</h1>
+
+<p>
+Mesa 10.6.8 is a bug fix release which fixes bugs found since the 10.6.7 release.
+</p>
+<p>
+Mesa 10.6.8 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1f34dba2a8059782e3e4e0f18b9628004e253b2c69085f735b846d2e63c9e250  mesa-10.6.8.tar.gz
+e36ee5ceeadb3966fb5ce5b4cf18322dbb76a4f075558ae49c3bba94f57d58fd  mesa-10.6.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alejandro Piñeiro (1):</p>
+<ul>
+  <li>i965/vec4: fill src_reg type using the constructor type parameter</li>
+</ul>
+
+<p>Antia Puentes (1):</p>
+<ul>
+  <li>i965/vec4: Fix saturation errors when coalescing registers</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.7</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+</ul>
+
+<p>Hans de Goede (4):</p>
+<ul>
+  <li>nv30: Fix creation of scanout buffers</li>
+  <li>nv30: Implement color resolve for msaa</li>
+  <li>nv30: Fix max width / height checks in nv30 sifm code</li>
+  <li>nv30: Disable msaa unless requested from the env by NV30_MAX_MSAA</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>mesa: Pass the type to _mesa_uniform_matrix as a glsl_base_type</li>
+  <li>mesa: Don't allow wrong type setters for matrix uniforms</li>
+</ul>
+
+<p>Ilia Mirkin (5):</p>
+<ul>
+  <li>st/mesa: don't fall back to 16F when 32F is requested</li>
+  <li>nvc0: always emit a full shader colormask</li>
+  <li>nvc0: remove BGRA4 format support</li>
+  <li>st/mesa: avoid integer overflows with buffers &gt;= 512MB</li>
+  <li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>i965/vec4: Don't reswizzle hardware registers</li>
+</ul>
+
+<p>Jose Fonseca (1):</p>
+<ul>
+  <li>gallivm: Workaround LLVM PR23628.</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Momentarily pretend to support ARB_texture_stencil8 for blits.</li>
+</ul>
+
+<p>Oded Gabbay (1):</p>
+<ul>
+  <li>llvmpipe: convert double to long long instead of unsigned long long</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
+</ul>
+
+<p>Ulrich Weigand (1):</p>
+<ul>
+  <li>mesa: Fix texture compression on big-endian systems</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>gallivm: Do not use NoFramePointerElim with LLVM 3.7.</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/10.6.9.html
+++ b/docs/relnotes/10.6.9.html
@@ -0,0 +1,130 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.6.9 Release Notes / Octover 03, 2015</h1>
+
+<p>
+Mesa 10.6.9 is a bug fix release which fixes bugs found since the 10.6.8 release.
+</p>
+<p>
+Mesa 10.6.9 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+3406876aac67546d0c3e2cb97da330b62644c313e7992b95618662e13c54296a  mesa-10.6.9.tar.gz
+b04c4de6280b863babc2929573da17218d92e9e4ba6272d548d135415723e8c3  mesa-10.6.9.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw&#64;entry=0x7fffd4097a08, fb=fb&#64;entry=0x7fffd40fa900, buffers=buffers&#64;entry=2, partial_clear=partial_clear&#64;entry=false)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture formats</li>
+</ul>
+
+<p>Chris Wilson (1):</p>
+<ul>
+  <li>i965: Remove early release of DRI2 miptree</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 10.6.8</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>cherry-ignore: add commit non applicable for 10.6</li>
+  <li>Update version to 10.6.9</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
+</ul>
+
+<p>Ian Romanick (5):</p>
+<ul>
+  <li>t_dd_dmatmp: Make "count" actually be the count</li>
+  <li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
+  <li>t_dd_dmatmp: Use '&amp; 3' instead of '% 4' everywhere</li>
+  <li>t_dd_dmatmp: Pull out common 'count -= count &amp; 3' code</li>
+  <li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
+</ul>
+
+<p>Jeremy Huddleston (1):</p>
+<ul>
+  <li>configure.ac: Add support to enable read-only text segment on x86.</li>
+</ul>
+
+<p>Kristian Høgsberg Kristensen (1):</p>
+<ul>
+  <li>i965: Respect stride and subreg_offset for ATTR registers</li>
+</ul>
+
+<p>Kyle Brenneman (3):</p>
+<ul>
+  <li>glx: Fix build errors with --enable-mangling (v2)</li>
+  <li>mapi: Make _glapi_get_stub work with "gl" or "mgl" prefix.</li>
+  <li>glx: Don't hard-code the name "libGL.so.1" in driOpenDriver (v3)</li>
+</ul>
+
+<p>Leo Liu (1):</p>
+<ul>
+  <li>radeon/vce: fix vui time_scale zero error</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: fix front buffer regression after dropping st_validate_state in Blit</li>
+</ul>
+
+<p>Roland Scheidegger (1):</p>
+<ul>
+  <li>mesa: fix mipmap generation for immutable, compressed textures</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/11.0.7.html
+++ b/docs/relnotes/11.0.7.html
@@ -1,154 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.0.7 Release Notes / December 9, 2015</h1>
-
-<p>
-Mesa 11.0.7 is a bug fix release which fixes bugs found since the 11.0.6 release.
-</p>
-<p>
-Mesa 11.0.7 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-07c27004ff68b288097d17b2faa7bdf15ec73c96b7e6c9835266e544adf0a62f  mesa-11.0.7.tar.gz
-e7e90a332ede6c8fd08eff90786a3fd1605a4e62ebf3a9b514047838194538cb  mesa-11.0.7.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90348">Bug 90348</a> - Spilling failure of b96 merged value</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92363">Bug 92363</a> - [BSW/BDW] ogles1conform Gets test fails</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92438">Bug 92438</a> - Segfault in pushbuf_kref when running the android emulator (qemu) on nv50</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93110">Bug 93110</a> - [NVE4] textureSize() and textureQueryLevels() uses a texture bound during the previous draw call</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93126">Bug 93126</a> - wrongly claim supporting GL_EXT_texture_rg</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Chris Wilson (1):</p>
-<ul>
-  <li>meta: Compute correct buffer size with SkipRows/SkipPixels</li>
-</ul>
-
-<p>Daniel Stone (1):</p>
-<ul>
-  <li>egl/wayland: Ignore rects from SwapBuffersWithDamage</li>
-</ul>
-
-<p>Dave Airlie (4):</p>
-<ul>
-  <li>texgetimage: consolidate 1D array handling code.</li>
-  <li>r600: geometry shader gsvs itemsize workaround</li>
-  <li>r600: rv670 use at least 16es/gs threads</li>
-  <li>r600: workaround empty geom shader.</li>
-</ul>
-
-<p>Emil Velikov (4):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.0.6</li>
-  <li>get-pick-list.sh: Require explicit "11.0" for nominating stable patches</li>
-  <li>mesa; add get-extra-pick-list.sh script into bin/</li>
-  <li>Update version to 11.0.7</li>
-</ul>
-
-<p>François Tigeot (1):</p>
-<ul>
-  <li>xmlconfig: Add support for DragonFly</li>
-</ul>
-
-<p>Ian Romanick (22):</p>
-<ul>
-  <li>mesa: Make bind_vertex_buffer avilable outside varray.c</li>
-  <li>mesa: Refactor update_array_format to make _mesa_update_array_format_public</li>
-  <li>mesa: Refactor enable_vertex_array_attrib to make _mesa_enable_vertex_array_attrib</li>
-  <li>i965: Pass brw_context instead of gl_context to brw_draw_rectlist</li>
-  <li>i965: Use DSA functions for VBOs in brw_meta_fast_clear</li>
-  <li>i965: Use internal functions for buffer object access</li>
-  <li>i965: Don't pollute the buffer object namespace in brw_meta_fast_clear</li>
-  <li>meta: Use DSA functions for PBO in create_texture_for_pbo</li>
-  <li>meta: Use _mesa_NamedBufferData and _mesa_NamedBufferSubData for users of _mesa_meta_setup_vertex_objects</li>
-  <li>i965: Use _mesa_NamedBufferSubData for users of _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Don't leave the VBO bound after _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Track VBO using gl_buffer_object instead of GL API object handle</li>
-  <li>meta: Use DSA functions for VBOs in _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Use internal functions for buffer object and VAO access</li>
-  <li>meta: Don't pollute the buffer object namespace in _mesa_meta_setup_vertex_objects</li>
-  <li>meta: Partially convert _mesa_meta_DrawTex to DSA</li>
-  <li>meta: Track VBO using gl_buffer_object instead of GL API object handle in _mesa_meta_DrawTex</li>
-  <li>meta: Use internal functions for buffer object and VAO access in _mesa_meta_DrawTex</li>
-  <li>meta: Don't pollute the buffer object namespace in _mesa_meta_DrawTex</li>
-  <li>meta/TexSubImage: Don't pollute the buffer object namespace</li>
-  <li>meta/generate_mipmap: Don't leak the framebuffer object</li>
-  <li>glsl: Fix off-by-one error in array size check assertion</li>
-</ul>
-
-<p>Ilia Mirkin (7):</p>
-<ul>
-  <li>nvc0/ir: actually emit AFETCH on kepler</li>
-  <li>nir: fix typo in idiv lowering, causing large-udiv-udiv failures</li>
-  <li>nouveau: use the buffer usage to determine placement when no binding</li>
-  <li>nv50,nvc0: properly handle buffer storage invalidation on dsa buffer</li>
-  <li>nv50/ir: fix (un)spilling of 3-wide results</li>
-  <li>mesa: support GL_RED/GL_RG in ES2 contexts when driver support exists</li>
-  <li>nvc0/ir: start offset at texBindBase for txq, like regular texturing</li>
-</ul>
-
-<p>Jonathan Gray (1):</p>
-<ul>
-  <li>automake: fix some occurrences of hardcoded -ldl and -lpthread</li>
-</ul>
-
-<p>Leo Liu (1):</p>
-<ul>
-  <li>radeon/vce: disable Stoney VCE for 11.0</li>
-</ul>
-
-<p>Marta Lofstedt (1):</p>
-<ul>
-  <li>gles2: Update gl2ext.h to revision: 32120</li>
-</ul>
-
-<p>Oded Gabbay (1):</p>
-<ul>
-  <li>llvmpipe: disable VSX in ppc due to LLVM PPC bug</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.0.8.html
+++ b/docs/relnotes/11.0.8.html
@@ -1,200 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.0.8 Release Notes / December 9, 2015</h1>
-
-<p>
-Mesa 11.0.8 is a bug fix release which fixes bugs found since the 11.0.7 release.
-</p>
-<p>
-Mesa 11.0.8 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-ab9db87b54d7525e4b611b82577ea9a9eae55927558df57b190059d5ecd9406f  mesa-11.0.8.tar.gz
-5696e4730518b6805d2ed5def393c4293f425a2c2c01bd5ed4bdd7ad62f7ad75  mesa-11.0.8.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806">Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004">Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215">Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Boyuan Zhang (1):</p>
-<ul>
-  <li>radeon/uvd: uv pitch separation for stoney</li>
-</ul>
-
-<p>Dave Airlie (9):</p>
-<ul>
-  <li>r600: do SQ flush ES ring rolling workaround</li>
-  <li>r600: SMX returns CONTEXT_DONE early workaround</li>
-  <li>r600/shader: split address get out to a function.</li>
-  <li>r600/shader: add utility functions to do single slot arithmatic</li>
-  <li>r600g: fix geom shader input indirect indexing.</li>
-  <li>r600: handle geometry dynamic input array index</li>
-  <li>radeonsi: handle doubles in lds load path.</li>
-  <li>mesa/varray: set double arrays to non-normalised.</li>
-  <li>mesa/shader: return correct attribute location for double matrix arrays</li>
-</ul>
-
-<p>Emil Velikov (8):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.0.7</li>
-  <li>cherry-ignore: don't pick a specific i965 formats patch</li>
-  <li>Revert "i965/nir: Remove unused indirect handling"</li>
-  <li>Revert "i965/state: Get rid of dword_pitch arguments to buffer functions"</li>
-  <li>Revert "i965/vec4: Use a stride of 1 and byte offsets for UBOs"</li>
-  <li>Revert "i965/fs: Use a stride of 1 and byte offsets for UBOs"</li>
-  <li>Revert "i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge"</li>
-  <li>Update version to 11.0.8</li>
-</ul>
-
-<p>Francisco Jerez (1):</p>
-<ul>
-  <li>i965: Resolve color and flush for all active shader images in intel_update_state().</li>
-</ul>
-
-<p>Ian Romanick (1):</p>
-<ul>
-  <li>meta/generate_mipmap: Work-around GLES 1.x problem with GL_DRAW_FRAMEBUFFER</li>
-</ul>
-
-<p>Ilia Mirkin (17):</p>
-<ul>
-  <li>freedreno/a4xx: support lod_bias</li>
-  <li>freedreno/a4xx: fix 5_5_5_1 texture sampler format</li>
-  <li>freedreno/a4xx: point regid to "red" even for alpha-only rb formats</li>
-  <li>nvc0/ir: fold postfactor into immediate</li>
-  <li>nv50/ir: deal with loops with no breaks</li>
-  <li>nv50/ir: the mad source might not have a defining instruction</li>
-  <li>nv50/ir: fix instruction permutation logic</li>
-  <li>nv50/ir: don't forget to mark flagsDef on cvt in txb lowering</li>
-  <li>nv50/ir: fix DCE to not generate 96-bit loads</li>
-  <li>nv50/ir: avoid looking at uninitialized srcMods entries</li>
-  <li>gk110/ir: fix imul hi emission with limm arg</li>
-  <li>gk104/ir: sampler doesn't matter for txf</li>
-  <li>gk110/ir: fix imad sat/hi flag emission for immediate args</li>
-  <li>nv50/ir: fix cutoff for using r63 vs r127 when replacing zero</li>
-  <li>nv50/ir: can't have predication and immediates</li>
-  <li>glsl: assign varying locations to tess shaders when doing SSO</li>
-  <li>ttn: add TEX2 support</li>
-</ul>
-
-<p>Jason Ekstrand (5):</p>
-<ul>
-  <li>i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge</li>
-  <li>i965/fs: Use a stride of 1 and byte offsets for UBOs</li>
-  <li>i965/vec4: Use a stride of 1 and byte offsets for UBOs</li>
-  <li>i965/state: Get rid of dword_pitch arguments to buffer functions</li>
-  <li>i965/nir: Remove unused indirect handling</li>
-</ul>
-
-<p>Jonathan Gray (2):</p>
-<ul>
-  <li>configure.ac: use pkg-config for libelf</li>
-  <li>configure: check for python2.7 for PYTHON2</li>
-</ul>
-
-<p>Kenneth Graunke (2):</p>
-<ul>
-  <li>i965: Fix fragment shader struct inputs.</li>
-  <li>i965: Fix scalar vertex shader struct outputs.</li>
-</ul>
-
-<p>Marek Olšák (8):</p>
-<ul>
-  <li>radeonsi: fix occlusion queries on Fiji</li>
-  <li>radeonsi: fix a hang due to uninitialized border color registers</li>
-  <li>radeonsi: fix Fiji for LLVM &lt;= 3.7</li>
-  <li>radeonsi: don't call of u_prims_for_vertices for patches and rectangles</li>
-  <li>radeonsi: apply the streamout workaround to Fiji as well</li>
-  <li>gallium/radeon: fix Hyper-Z hangs by programming PA_SC_MODE_CNTL_1 correctly</li>
-  <li>tgsi/scan: add flag colors_written</li>
-  <li>r600g: write all MRTs only if there is exactly one output (fixes a hang)</li>
-</ul>
-
-<p>Matt Turner (1):</p>
-<ul>
-  <li>glsl: Allow binding of image variables with 420pack.</li>
-</ul>
-
-<p>Neil Roberts (2):</p>
-<ul>
-  <li>i965: Add MESA_FORMAT_B8G8R8X8_SRGB to brw_format_for_mesa_format</li>
-  <li>i965: Add B8G8R8X8_SRGB to the alpha format override</li>
-</ul>
-
-<p>Oded Gabbay (1):</p>
-<ul>
-  <li>configura.ac: fix test for SSE4.1 assembler support</li>
-</ul>
-
-<p>Patrick Rudolph (2):</p>
-<ul>
-  <li>nv50,nvc0: fix use-after-free when vertex buffers are unbound</li>
-  <li>gallium/util: return correct number of bound vertex buffers</li>
-</ul>
-
-<p>Samuel Pitoiset (1):</p>
-<ul>
-  <li>nvc0: free memory allocated by the prog which reads MP perf counters</li>
-</ul>
-
-<p>Tapani Pälli (1):</p>
-<ul>
-  <li>i965: use _Shader to get fragment program when updating surface state</li>
-</ul>
-
-<p>Tom Stellard (2):</p>
-<ul>
-  <li>radeonsi: Rename si_shader::ls_rsrc{1,2} to si_shader::rsrc{1,2}</li>
-  <li>radeonsi/compute: Use the compiler's COMPUTE_PGM_RSRC* register values</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.0.9.html
+++ b/docs/relnotes/11.0.9.html
@@ -1,127 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 11.0.9 Release Notes / January 22, 2016</h1>
-
-<p>
-Mesa 11.0.9 is a bug fix release which fixes bugs found since the 11.0.8 release.
-</p>
-<p>
-Mesa 11.0.9 implements the OpenGL 4.1 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
-4.1 is <strong>only</strong> available if requested at context creation
-because compatibility contexts are not supported.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-1597c2e983f476f98efdd6cd58b5298896d18479ff542bdeff28b98b129ede05  mesa-11.0.9.tar.gz
-a1262ff1c66a16ccf341186cf0e57b306b8589eb2cc5ce92ffb6788ab01d2b01  mesa-11.0.9.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-<h2>Bug fixes</h2>
-
-<p>This list is likely incomplete.</p>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92229">Bug 92229</a> - [APITRACE] SOMA have serious graphical errors</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93257">Bug 93257</a> - [SKL, bisected] ASTC dEQP tests segfault</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Emil Velikov (6):</p>
-<ul>
-  <li>docs: add sha256 checksums for 11.0.8</li>
-  <li>cherry-ignore: add patch already in branch</li>
-  <li>cherry-ignore: add the dri3 glx null check patch</li>
-  <li>i915: correctly parse/set the context flags</li>
-  <li>egl/dri2: expose srgb configs when KHR_gl_colorspace is available</li>
-  <li>Update version to 11.0.9</li>
-</ul>
-
-<p>Grazvydas Ignotas (1):</p>
-<ul>
-  <li>r600: fix constant buffer size programming</li>
-</ul>
-
-<p>Ilia Mirkin (5):</p>
-<ul>
-  <li>nvc0: don't forget to reset VTX_TMP bufctx slot after blit completion</li>
-  <li>nv50/ir: float(s32 &amp; 0xff) = float(u8), not s8</li>
-  <li>nv50,nvc0: make sure there's pushbuf space and that we ref the bo early</li>
-  <li>nv50,nvc0: fix crash when increasing bsp bo size for h264</li>
-  <li>nvc0: scale up inter_bo size so that it's 16M for a 4K video</li>
-</ul>
-
-<p>Kenneth Graunke (2):</p>
-<ul>
-  <li>ralloc: Fix ralloc_adopt() to the old context's last child's parent.</li>
-  <li>nvc0: Set winding order regardless of domain.</li>
-</ul>
-
-<p>Marek Olšák (1):</p>
-<ul>
-  <li>radeonsi: don't miss changes to SPI_TMPRING_SIZE</li>
-</ul>
-
-<p>Miklós Máté (1):</p>
-<ul>
-  <li>mesa: Don't leak ATIfs instructions in DeleteFragmentShader</li>
-</ul>
-
-<p>Neil Roberts (1):</p>
-<ul>
-  <li>i965: Fix crash when calling glViewport with no surface bound</li>
-</ul>
-
-<p>Nicolai Hähnle (6):</p>
-<ul>
-  <li>gallium/radeon: only dispose locally created target machine in radeon_llvm_compile</li>
-  <li>mesa/bufferobj: make _mesa_delete_buffer_object externally accessible</li>
-  <li>st/mesa: use _mesa_delete_buffer_object</li>
-  <li>radeon: use _mesa_delete_buffer_object</li>
-  <li>i915: use _mesa_delete_buffer_object</li>
-  <li>i965: use _mesa_delete_buffer_object</li>
-</ul>
-
-<p>Oded Gabbay (1):</p>
-<ul>
-  <li>llvmpipe: use vpkswss when dst is signed</li>
-</ul>
-
-<p>Rob Herring (1):</p>
-<ul>
-  <li>freedreno/ir3: fix 32-bit builds with pointer-to-int-cast error enabled</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/11.1.0.html
+++ b/docs/relnotes/11.1.0.html
@@ -0,0 +1,89 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 11.1.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 11.1.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 11.1.1.
+</p>
+<p>
+Mesa 11.1.0 implements the OpenGL 4.1 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.1.  OpenGL
+4.1 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>OpenGL 3.1 support on freedreno (a3xx, a4xx)</li>
+<li>OpenGL 3.3 support for VMware guest VM driver (supported by Workstation 12
+    and Fusion 8).
+<li>GL_AMD_performance_monitor on nv50</li>
+<li>GL_ARB_arrays_of_arrays on i965</li>
+<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
+<li>GL_ARB_clear_texture on nv50, nvc0</li>
+<li>GL_ARB_copy_image on nv50, nvc0, radeonsi</li>
+<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
+<li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
+<li>GL_ARB_shader_clock on i965 (gen7+)</li>
+<li>GL_ARB_shader_stencil_export on i965 (gen9+)</li>
+<li>GL_ARB_shader_storage_buffer_object on i965</li>
+<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
+<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
+<li>GL_ARB_texture_query_lod on softpipe</li>
+<li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
+<li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx, a4xx)</li>
+<li>GL_EXT_blend_func_extended on all drivers that support the ARB version</li>
+<li>GL_EXT_buffer_storage implemented for when ES 3.1 support is gained</li>
+<li>GL_EXT_draw_elements_base_vertex on all drivers</li>
+<li>GL_EXT_texture_compression_rgtc / latc on freedreno (a3xx & a4xx)</li>
+<li>GL_KHR_debug (GLES)</li>
+<li>GL_NV_conditional_render on freedreno</li>
+<li>GL_OES_draw_elements_base_vertex on all drivers</li>
+<li>EGL_KHR_create_context on softpipe, llvmpipe</li>
+<li>EGL_KHR_gl_colorspace on softpipe, llvmpipe</li>
+<li>new virgl gallium driver for qemu virtio-gpu</li>
+<li>16x multisampling on i965 (gen9+)</li>
+<li>GL_EXT_shader_samples_identical on i965.</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+TBD.
+
+<h2>Changes</h2>
+
+TBD.
+
+</div>
+</body>
+</html>
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -63,6 +63,20 @@ execution.  These are generally used for debugging.
 Example:  export MESA_GLSL=dump,nopt
 </p>

+<p>
+Shaders can be dumped and replaced on runtime for debugging purposes. Mesa 
+needs to be configured with '--with-sha1' to enable this functionality. This 
+feature is not currently supported by SCons build.
+
+This is controlled via following environment variables:
+<ul>
+<li><b>MESA_SHADER_DUMP_PATH</b> - path where shader sources are dumped
+<li><b>MESA_SHADER_READ_PATH</b> - path where replacement shaders are read
+</ul>
+Note, path set must exist before running for dumping or replacing to work. 
+When both are set, these paths should be different so the dumped shaders do 
+not clobber the replacement shaders.
+</p>

 <h2 id="support">GLSL Version</h2>

--- a/docs/specs/EXT_shader_samples_identical.txt
+++ b/docs/specs/EXT_shader_samples_identical.txt
@@ -0,0 +1,176 @@
+Name
+
+    EXT_shader_samples_identical
+
+Name Strings
+
+    GL_EXT_shader_samples_identical
+
+Contact
+
+    Ian Romanick, Intel (ian.d.romanick 'at' intel.com)
+
+Contributors
+
+    Chris Forbes, Mesa
+    Magnus Wendt, Intel
+    Neil S. Roberts, Intel
+    Graham Sellers, AMD
+
+Status
+
+    XXX - Not complete yet.
+
+Version
+
+    Last Modified Date: November 19, 2015
+    Revision: 6
+
+Number
+
+    TBD
+
+Dependencies
+
+    OpenGL 3.2, or OpenGL ES 3.1, or ARB_texture_multisample is required.
+
+    This extension is written against the OpenGL 4.5 (Core Profile)
+    Specification
+
+Overview
+
+    Multisampled antialiasing has become a common method for improving the
+    quality of rendered images.  Multisampling differs from supersampling in
+    that the color of a primitive that covers all or part of a pixel is
+    resolved once, regardless of the number of samples covered.  If a large
+    polygon is rendered, the colors of all samples in each interior pixel will
+    be the same.  This suggests a simple compression scheme that can reduce
+    the necessary memory bandwidth requirements.  In one such scheme, each
+    sample is stored in a separate slice of the multisample surface.  An
+    additional multisample control surface (MCS) contains a mapping from pixel
+    samples to slices.
+
+    If all the values stored in the MCS for a particular pixel are the same,
+    then all the samples have the same value.  Applications can take advantage
+    of this information to reduce the bandwidth of reading multisample
+    textures.  A custom multisample resolve filter could optimize resolving
+    pixels where every sample is identical by reading the color once.
+
+    color = texelFetch(sampler, coordinate, 0);
+    if (!textureSamplesIdenticalEXT(sampler, coordinate)) {
+        for (int i = 1; i < MAX_SAMPLES; i++) {
+            vec4 c = texelFetch(sampler, coordinate, i);
+
+            //... accumulate c into color
+
+        }
+    }
+
+New Procedures and Functions
+
+    None.
+
+New Tokens
+
+    None.
+
+Additions to the OpenGL 4.5 (Core Profile) Specification
+
+    None.
+
+Modifications to The OpenGL Shading Language Specification, Version 4.50.5
+
+    Including the following line in a shader can be used to control the
+    language features described in this extension:
+
+        #extension GL_EXT_shader_samples_identical
+
+    A new preprocessor #define is added to the OpenGL Shading Language:
+
+        #define GL_EXT_shader_samples_identical
+
+    Add to the table in section 8.7 "Texture Lookup Functions"
+
+    Syntax:
+
+        bool textureSamplesIdenticalEXT(gsampler2DMS sampler, ivec2 coord)
+
+        bool textureSamplesIdenticalEXT(gsampler2DMSArray sampler,
+                                        ivec3 coord)
+
+    Description:
+
+        Returns true if it can be determined that all samples within the texel
+        of the multisample texture bound to <sampler> at <coord> contain the
+        same values or false if this cannot be determined."
+
+Additions to the AGL/EGL/GLX/WGL Specifications
+
+    None
+
+Errors
+
+    None
+
+New State
+
+    None
+
+New Implementation Dependent State
+
+    None
+
+Issues
+
+    1) What should the new functions be called?
+
+    RESOLVED: textureSamplesIdenticalEXT.  Initially
+    textureAllSamplesIdenticalEXT was considered, but
+    textureSamplesIdenticalEXT is more similar to the existing textureSamples
+    function.
+
+    2) It seems like applications could implement additional optimization if
+       they were provided with raw MCS data.  Should this extension also
+       provide that data?
+
+    There are a number of challenges in providing raw MCS data.  The biggest
+    problem being that the amount of MCS data depends on the number of
+    samples, and that is not known at compile time.  Additionally, without new
+    texelFetch functions, applications would have difficulty utilizing the
+    information.
+
+    Another option is to have a function that returns an array of tuples of
+    sample number and count.  This also has difficulties with the maximum
+    array size not being known at compile time.
+
+    RESOLVED: Do not expose raw MCS data in this extension.
+
+    3) Should this extension also extend SPIR-V?
+
+    RESOLVED: Yes, but this has not yet been written.
+
+    4) Is it possible for textureSamplesIdenticalEXT to report false negatives?
+
+    RESOLVED: Yes.  It is possible that the underlying hardware may not detect
+    that separate writes of the same color to different samples of a pixel are
+    the same.  The shader function is at the whim of the underlying hardware
+    implementation.  It is also possible that a compressed multisample surface
+    is not used.  In that case the function will likely always return false.
+
+Revision History
+
+    Rev  Date        Author    Changes
+    ---  ----------  --------  ---------------------------------------------
+      1  2014/08/20  cforbes   Initial version
+      2  2015/10/23  idr       Change from MESA to EXT.  Rebase on OpenGL 4.5,
+                               and add dependency on OpenGL ES 3.1.  Initial
+                               draft of overview section and issues 1 through
+                               3.
+      3  2015/10/27  idr       Typo fixes.
+      4  2015/11/10  idr       Rename extension from EXT_shader_multisample_compression
+                               to EXT_shader_samples_identical.
+                               Add issue #4.
+      5  2015/11/18  idr       Fix some typos spotted by gsellers.  Change the
+                               name of the name of the function to
+                               textureSamplesIdenticalEXT.
+      6  2015/11/19  idr       Fix more typos spotted by Nicolai Hähnle.
--- a/docs/utilities.html
+++ b/docs/utilities.html
@@ -30,6 +30,10 @@
  <dt><a href="http://www.valgrind.org">Valgrind</a></dt>
  <dd>is a very useful tool for tracking down
  memory-related problems in your code.</dd>
+
+  <dt><a href="http:scan.coverity.com/projects/mesa">Coverity</a><dt>
+  <dd>provides static code analysis of Mesa.  If you create an account
+  you can see the results and try to fix outstanding issues.</dd>
 </dl>

 </div>
--- a/docs/vmware-guest.html
+++ b/docs/vmware-guest.html
@@ -26,6 +26,31 @@ VMware Workstation running on Linux or Windows and VMware Fusion running on
 MacOS are all supported.
 </p>

+<p>
+With the August 2015 Workstation 12 / Fusion 8 releases, OpenGL 3.3
+is supported in the guest.
+This requires:
+<ul>
+<li>The VM is configured for virtual hardware version 12.
+<li>The host OS, GPU and graphics driver supports DX11 (Windows) or
+    OpenGL 4.0 (Linux, Mac)
+<li>On Linux, the vmwgfx kernel module must be version 2.9.0 or later.
+<li>A recent version of Mesa with the updated svga gallium driver.
+</ul>
+</p>
+
+<p>
+Otherwise, OpenGL 2.1 is supported.
+</p>
+
+<p>
+OpenGL 3.3 support can be disabled by setting the environment variable
+SVGA_VGPU10=0.
+You will then have OpenGL 2.1 support.
+This may be useful to work around application bugs (such as incorrect use
+of the OpenGL 3.x core profile).
+</p>
+
 <p>
 Most modern Linux distros include the SVGA3D driver so end users shouldn't
 be concerned with this information.
@@ -123,10 +148,33 @@ To get the latest code from git:
 <h2>Building the Code</h2>

 <ul>
-<li>Build libdrm: If you're on a 32-bit system, you should skip the --libdir configure option. Note also the comment about toolchain libdrm above. 
+<li>
+Determine where the GL-related libraries reside on your system and set
+the LIBDIR environment variable accordingly.
+<br><br>
+For 32-bit Ubuntu systems:
+<pre>
+  export LIBDIR=/usr/lib/i386-linux-gnu
+</pre>
+For 64-bit Ubuntu systems:
+<pre>
+  export LIBDIR=/usr/lib/x86_64-linux-gnu
+</pre>
+For 32-bit Fedora systems:
+<pre>
+  export LIBDIR=/usr/lib
+</pre>
+For 64-bit Fedora systems:
+<pre>
+  export LIBDIR=/usr/lib64
+</pre>
+
+</li>
+
+<li>Build libdrm:
  <pre>
  cd $TOP/drm
-  ./autogen.sh --prefix=/usr --libdir=/usr/lib64
+  ./autogen.sh --prefix=/usr --libdir=${LIBDIR}
  make
  sudo make install
  </pre>
@@ -137,12 +185,9 @@ The libxatracker library is used exclusively by the X server to do render,
 copy and video acceleration:
 <br>
 The following configure options doesn't build the EGL system.
-<br>
-As before, if you're on a 32-bit system, you should skip the --libdir
-configure option.
  <pre>
  cd $TOP/mesa
-  ./autogen.sh --prefix=/usr --libdir=/usr/lib64 --with-gallium-drivers=svga --with-dri-drivers= --enable-xa --disable-dri3
+  ./autogen.sh --prefix=/usr --libdir=${LIBDIR} --with-gallium-drivers=svga --with-dri-drivers=swrast --enable-xa --disable-dri3 --enable-glx-tls
  make
  sudo make install
  </pre>
@@ -152,25 +197,39 @@ if they're not installed in your system.  You should be told what's missing.
 <br>
 <br>

-<li>xf86-video-vmware: Now, once libxatracker is installed, we proceed with building and replacing the current Xorg driver. First check if your system is 32- or 64-bit. If you're building for a 32-bit system, you will not be needing the --libdir=/usr/lib64 option to autogen. 
+<li>xf86-video-vmware: Now, once libxatracker is installed, we proceed with
+building and replacing the current Xorg driver.
+First check if your system is 32- or 64-bit.
  <pre>
  cd $TOP/xf86-video-vmware
-  ./autogen.sh --prefix=/usr --libdir=/usr/lib64
+  ./autogen.sh --prefix=/usr --libdir=${LIBDIR}
  make
  sudo make install
  </pre>
+
 <li>vmwgfx kernel module. First make sure that any old version of this kernel module is removed from the system by issuing
-  <pre>
+<pre>
  sudo rm /lib/modules/`uname -r`/kernel/drivers/gpu/drm/vmwgfx.ko*
-  </pre>
-Then 
-  <pre>
+</pre>
+Build and install:
+<pre>
  cd $TOP/vmwgfx
  make
  sudo make install
-  sudo cp 00-vmwgfx.rules /etc/udev/rules.d
-  sudo depmod -ae
-  </pre>
+  sudo depmod -a
+</pre>
+If you're using a Ubuntu OS:
+<pre>
+  sudo update-initramfs -u
+</pre>
+If you're using a Fedora OS:
+<pre>
+  sudo dracut --force
+</pre>
+Add 'vmwgfx' to the /etc/modules file:
+<pre>
+  echo vmwgfx | sudo tee -a /etc/modules
+</pre>

 Note: some distros put DRM kernel drivers in different directories.
 For example, sometimes vmwgfx.ko might be found in
@@ -227,6 +286,16 @@ If you don't see this, try setting this environment variable:
 then rerun glxinfo and examine the output for error messages.
 </p>

+<p>
+If OpenGL 3.3 is not working (you only get OpenGL 2.1):
+</p>
+<ul>
+<li>Make sure the VM uses hardware version 12.
+<li>Make sure the vmwgfx kernel module is version 2.9.0 or later.
+<li>Check the vmware.log file for errors.
+<li>Run 'dmesg | grep vmwgfx' and look for "DX: yes".
+
+
 </div>
 </body>
 </html>
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -495,7 +495,7 @@ struct __DRIdamageExtensionRec {
 * SWRast Loader extension.
 */
 #define __DRI_SWRAST_LOADER "DRI_SWRastLoader"
-#define __DRI_SWRAST_LOADER_VERSION 2
+#define __DRI_SWRAST_LOADER_VERSION 3
 struct __DRIswrastLoaderExtensionRec {
    __DRIextension base;

@@ -528,6 +528,15 @@ struct __DRIswrastLoaderExtensionRec {
    void (*putImage2)(__DRIdrawable *drawable, int op,
                      int x, int y, int width, int height, int stride,
                      char *data, void *loaderPrivate);
+
+   /**
+     * Put image to drawable
+     *
+     * \since 3
+     */
+   void (*getImage2)(__DRIdrawable *readable,
+		     int x, int y, int width, int height, int stride,
+		     char *data, void *loaderPrivate);
 };

 /**
--- a/include/GLES2/gl2ext.h
+++ b/include/GLES2/gl2ext.h
--- a/include/c11/threads_posix.h
+++ b/include/c11/threads_posix.h
@@ -102,9 +102,8 @@ call_once(once_flag *flag, void (*func)(void))
 static inline int
 cnd_broadcast(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_broadcast(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_broadcast(cond) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.2
@@ -119,18 +118,16 @@ cnd_destroy(cnd_t *cond)
 static inline int
 cnd_init(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_init(cond, NULL);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_init(cond, NULL) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.4
 static inline int
 cnd_signal(cnd_t *cond)
 {
-    if (!cond) return thrd_error;
-    pthread_cond_signal(cond);
-    return thrd_success;
+    assert(cond != NULL);
+    return (pthread_cond_signal(cond) == 0) ? thrd_success : thrd_error;
 }

 // 7.25.3.5
@@ -139,7 +136,14 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
 {
    struct timespec abs_time;
    int rt;
-    if (!cond || !mtx || !xt) return thrd_error;
+
+    assert(mtx != NULL);
+    assert(cond != NULL);
+    assert(xt != NULL);
+
+    abs_time.tv_sec = xt->sec;
+    abs_time.tv_nsec = xt->nsec;
+
    rt = pthread_cond_timedwait(cond, mtx, &abs_time);
    if (rt == ETIMEDOUT)
        return thrd_busy;
@@ -150,9 +154,9 @@ cnd_timedwait(cnd_t *cond, mtx_t *mtx, const xtime *xt)
 static inline int
 cnd_wait(cnd_t *cond, mtx_t *mtx)
 {
-    if (!cond || !mtx) return thrd_error;
-    pthread_cond_wait(cond, mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    assert(cond != NULL);
+    return (pthread_cond_wait(cond, mtx) == 0) ? thrd_success : thrd_error;
 }


@@ -161,7 +165,7 @@ cnd_wait(cnd_t *cond, mtx_t *mtx)
 static inline void
 mtx_destroy(mtx_t *mtx)
 {
-    assert(mtx);
+    assert(mtx != NULL);
    pthread_mutex_destroy(mtx);
 }

@@ -170,7 +174,7 @@ static inline int
 mtx_init(mtx_t *mtx, int type)
 {
    pthread_mutexattr_t attr;
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
    if (type != mtx_plain && type != mtx_timed && type != mtx_try
      && type != (mtx_plain|mtx_recursive)
      && type != (mtx_timed|mtx_recursive)
@@ -188,9 +192,8 @@ mtx_init(mtx_t *mtx, int type)
 static inline int
 mtx_lock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
-    pthread_mutex_lock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_lock(mtx) == 0) ? thrd_success : thrd_error;
 }

 static inline int
@@ -203,7 +206,9 @@ thrd_yield(void);
 static inline int
 mtx_timedlock(mtx_t *mtx, const xtime *xt)
 {
-    if (!mtx || !xt) return thrd_error;
+    assert(mtx != NULL);
+    assert(xt != NULL);
+
    {
 #ifdef EMULATED_THREADS_USE_NATIVE_TIMEDLOCK
    struct timespec ts;
@@ -233,7 +238,7 @@ mtx_timedlock(mtx_t *mtx, const xtime *xt)
 static inline int
 mtx_trylock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
+    assert(mtx != NULL);
    return (pthread_mutex_trylock(mtx) == 0) ? thrd_success : thrd_busy;
 }

@@ -241,9 +246,8 @@ mtx_trylock(mtx_t *mtx)
 static inline int
 mtx_unlock(mtx_t *mtx)
 {
-    if (!mtx) return thrd_error;
-    pthread_mutex_unlock(mtx);
-    return thrd_success;
+    assert(mtx != NULL);
+    return (pthread_mutex_unlock(mtx) == 0) ? thrd_success : thrd_error;
 }


@@ -253,7 +257,7 @@ static inline int
 thrd_create(thrd_t *thr, thrd_start_t func, void *arg)
 {
    struct impl_thrd_param *pack;
-    if (!thr) return thrd_error;
+    assert(thr != NULL);
    pack = (struct impl_thrd_param *)malloc(sizeof(struct impl_thrd_param));
    if (!pack) return thrd_nomem;
    pack->func = func;
@@ -329,7 +333,7 @@ thrd_yield(void)
 static inline int
 tss_create(tss_t *key, tss_dtor_t dtor)
 {
-    if (!key) return thrd_error;
+    assert(key != NULL);
    return (pthread_key_create(key, dtor) == 0) ? thrd_success : thrd_error;
 }

--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,21 +109,25 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
-CHIPSET(0x1902, skl_gt1, "Intel(R) Skylake DT  GT1")
-CHIPSET(0x1906, skl_gt1, "Intel(R) Skylake ULT GT1")
-CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake SRV GT1")
-CHIPSET(0x190B, skl_gt1, "Intel(R) Skylake Halo GT1")
-CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake ULX GT1")
-CHIPSET(0x1912, skl_gt2, "Intel(R) Skylake DT  GT2")
-CHIPSET(0x1916, skl_gt2, "Intel(R) Skylake ULT GT2")
-CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake SRV GT2")
-CHIPSET(0x191B, skl_gt2, "Intel(R) Skylake Halo GT2")
-CHIPSET(0x191D, skl_gt2, "Intel(R) Skylake WKS GT2")
-CHIPSET(0x191E, skl_gt2, "Intel(R) Skylake ULX GT2")
-CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake ULT GT2F")
-CHIPSET(0x1926, skl_gt3, "Intel(R) Skylake ULT GT3")
-CHIPSET(0x192A, skl_gt3, "Intel(R) Skylake SRV GT3")
-CHIPSET(0x192B, skl_gt3, "Intel(R) Skylake Halo GT3")
+CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
+CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
+CHIPSET(0x190E, skl_gt1, "Intel(R) Skylake GT1")
+CHIPSET(0x1912, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x1913, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1915, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x1916, skl_gt2, "Intel(R) HD Graphics 520 (Skylake GT2)")
+CHIPSET(0x1917, skl_gt2, "Intel(R) Skylake GT2f")
+CHIPSET(0x191A, skl_gt2, "Intel(R) Skylake GT2")
+CHIPSET(0x191B, skl_gt2, "Intel(R) HD Graphics 530 (Skylake GT2)")
+CHIPSET(0x191D, skl_gt2, "Intel(R) HD Graphics P530 (Skylake GT2)")
+CHIPSET(0x191E, skl_gt2, "Intel(R) HD Graphics 515 (Skylake GT2)")
+CHIPSET(0x1921, skl_gt2, "Intel(R) Skylake GT2")
+CHIPSET(0x1923, skl_gt3, "Intel(R) Iris Graphics 540 (Skylake GT3e)")
+CHIPSET(0x1926, skl_gt3, "Intel(R) HD Graphics 535 (Skylake GT3)")
+CHIPSET(0x1927, skl_gt3, "Intel(R) Iris Graphics 550 (Skylake GT3e)")
+CHIPSET(0x192A, skl_gt4, "Intel(R) Skylake GT4")
+CHIPSET(0x192B, skl_gt3, "Intel(R) Iris Graphics (Skylake GT3fe)")
 CHIPSET(0x1932, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x193A, skl_gt4, "Intel(R) Skylake GT4")
 CHIPSET(0x193B, skl_gt4, "Intel(R) Skylake GT4")
--- a/src/egl/Makefile.am
+++ b/src/egl/Makefile.am
@@ -47,12 +47,21 @@ libEGL_la_LDFLAGS = \
 	$(LD_NO_UNDEFINED)

 dri2_backend_FILES =
+dri3_backend_FILES =

 if HAVE_EGL_PLATFORM_X11
 AM_CFLAGS += -DHAVE_X11_PLATFORM
 AM_CFLAGS += $(XCB_DRI2_CFLAGS)
 libEGL_la_LIBADD += $(XCB_DRI2_LIBS)
 dri2_backend_FILES += drivers/dri2/platform_x11.c
+
+if HAVE_DRI3
+dri3_backend_FILES += \
+	drivers/dri2/platform_x11_dri3.c \
+	drivers/dri2/platform_x11_dri3.h
+
+libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader_dri3_helper.la
+endif
 endif

 if HAVE_EGL_PLATFORM_WAYLAND
@@ -88,7 +97,8 @@ AM_CFLAGS += \

 libEGL_la_SOURCES += \
 	$(dri2_backend_core_FILES) \
-	$(dri2_backend_FILES)
+	$(dri2_backend_FILES) \
+	$(dri3_backend_FILES)

 libEGL_la_LIBADD += $(top_builddir)/src/loader/libloader.la
 libEGL_la_LIBADD += $(DLOPEN_LIBS) $(LIBDRM_LIBS)
@@ -111,7 +121,10 @@ egl_HEADERS = \
 	$(top_srcdir)/include/EGL/eglmesaext.h \
 	$(top_srcdir)/include/EGL/eglplatform.h

+TESTS = egl-symbols-check
+
 EXTRA_DIST = \
+	egl-symbols-check \
 	SConscript \
 	drivers/haiku \
 	docs \
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -8,6 +8,7 @@ env = env.Clone()

 env.Append(CPPPATH = [
    '#/include',
+    '#/include/HaikuGL',
    '#/src/egl/main',
    '#/src',
 ])
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -27,6 +27,7 @@

 #define WL_HIDE_DEPRECATED

+#include <stdbool.h>
 #include <stdint.h>
 #include <stdbool.h>
 #include <stdlib.h>
@@ -130,12 +131,10 @@ const __DRIconfig *
 dri2_get_dri_config(struct dri2_egl_config *conf, EGLint surface_type,
                    EGLenum colorspace)
 {
-   if (colorspace == EGL_GL_COLORSPACE_SRGB_KHR)
-      return surface_type == EGL_WINDOW_BIT ? conf->dri_srgb_double_config :
-                                              conf->dri_srgb_single_config;
-   else
-      return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config :
-                                              conf->dri_single_config;
+   const bool srgb = colorspace == EGL_GL_COLORSPACE_SRGB_KHR;
+
+   return surface_type == EGL_WINDOW_BIT ? conf->dri_double_config[srgb] :
+                                           conf->dri_single_config[srgb];
 }

 static EGLBoolean
@@ -236,8 +235,6 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,

      case __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE:
         srgb = value != 0;
-         if (!disp->Extensions.KHR_gl_colorspace && srgb)
-            return NULL;
         break;

      default:
@@ -285,14 +282,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
   if (num_configs == 1) {
      conf = (struct dri2_egl_config *) matching_config;

-      if (double_buffer && srgb && !conf->dri_srgb_double_config)
-         conf->dri_srgb_double_config = dri_config;
-      else if (double_buffer && !srgb && !conf->dri_double_config)
-         conf->dri_double_config = dri_config;
-      else if (!double_buffer && srgb && !conf->dri_srgb_single_config)
-         conf->dri_srgb_single_config = dri_config;
-      else if (!double_buffer && !srgb && !conf->dri_single_config)
-         conf->dri_single_config = dri_config;
+      if (double_buffer && !conf->dri_double_config[srgb])
+         conf->dri_double_config[srgb] = dri_config;
+      else if (!double_buffer && !conf->dri_single_config[srgb])
+         conf->dri_single_config[srgb] = dri_config;
      else
         /* a similar config type is already added (unlikely) => discard */
         return NULL;
@@ -302,19 +295,12 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
      if (conf == NULL)
         return NULL;

-      memcpy(&conf->base, &base, sizeof base);
-      if (double_buffer) {
-         if (srgb)
-            conf->dri_srgb_double_config = dri_config;
-         else
-            conf->dri_double_config = dri_config;
-      } else {
-         if (srgb)
-            conf->dri_srgb_single_config = dri_config;
-         else
-            conf->dri_single_config = dri_config;
-      }
+      if (double_buffer)
+         conf->dri_double_config[srgb] = dri_config;
+      else
+         conf->dri_single_config[srgb] = dri_config;

+      memcpy(&conf->base, &base, sizeof base);
      conf->base.SurfaceType = 0;
      conf->base.ConfigID = config_id;

@@ -366,6 +352,12 @@ struct dri2_extension_match {
   int offset;
 };

+static struct dri2_extension_match dri3_driver_extensions[] = {
+   { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
+   { __DRI_IMAGE_DRIVER, 1, offsetof(struct dri2_egl_display, image_driver) },
+   { NULL, 0, 0 }
+};
+
 static struct dri2_extension_match dri2_driver_extensions[] = {
   { __DRI_CORE, 1, offsetof(struct dri2_egl_display, core) },
   { __DRI_DRI2, 2, offsetof(struct dri2_egl_display, dri2) },
@@ -399,13 +391,13 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
   void *field;

   for (i = 0; extensions[i]; i++) {
-      _eglLog(_EGL_DEBUG, "DRI2: found extension `%s'", extensions[i]->name);
+      _eglLog(_EGL_DEBUG, "found extension `%s'", extensions[i]->name);
      for (j = 0; matches[j].name; j++) {
 	 if (strcmp(extensions[i]->name, matches[j].name) == 0 &&
 	     extensions[i]->version >= matches[j].version) {
 	    field = ((char *) dri2_dpy + matches[j].offset);
 	    *(const __DRIextension **) field = extensions[i];
-	    _eglLog(_EGL_INFO, "DRI2: found extension %s version %d",
+	    _eglLog(_EGL_INFO, "found extension %s version %d",
 		    extensions[i]->name, extensions[i]->version);
 	 }
      }
@@ -414,7 +406,7 @@ dri2_bind_extensions(struct dri2_egl_display *dri2_dpy,
   for (j = 0; matches[j].name; j++) {
      field = ((char *) dri2_dpy + matches[j].offset);
      if (*(const __DRIextension **) field == NULL) {
-	 _eglLog(_EGL_WARNING, "DRI2: did not find extension %s version %d",
+         _eglLog(_EGL_WARNING, "did not find extension %s version %d",
 		 matches[j].name, matches[j].version);
 	 ret = EGL_FALSE;
      }
@@ -507,6 +499,25 @@ dri2_open_driver(_EGLDisplay *disp)
   return extensions;
 }

+EGLBoolean
+dri2_load_driver_dri3(_EGLDisplay *disp)
+{
+   struct dri2_egl_display *dri2_dpy = disp->DriverData;
+   const __DRIextension **extensions;
+
+   extensions = dri2_open_driver(disp);
+   if (!extensions)
+      return EGL_FALSE;
+
+   if (!dri2_bind_extensions(dri2_dpy, dri3_driver_extensions, extensions)) {
+      dlclose(dri2_dpy->driver);
+      return EGL_FALSE;
+   }
+   dri2_dpy->driver_extensions = extensions;
+
+   return EGL_TRUE;
+}
+
 EGLBoolean
 dri2_load_driver(_EGLDisplay *disp)
 {
@@ -564,7 +575,9 @@ dri2_setup_screen(_EGLDisplay *disp)
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
   unsigned int api_mask;

-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver) {
+      api_mask = dri2_dpy->image_driver->getAPIMask(dri2_dpy->dri_screen);
+   } else if (dri2_dpy->dri2) {
      api_mask = dri2_dpy->dri2->getAPIMask(dri2_dpy->dri_screen);
   } else {
      assert(dri2_dpy->swrast);
@@ -584,7 +597,7 @@ dri2_setup_screen(_EGLDisplay *disp)
   if (api_mask & (1 << __DRI_API_GLES3))
      disp->ClientAPIs |= EGL_OPENGL_ES3_BIT_KHR;

-   assert(dri2_dpy->dri2 || dri2_dpy->swrast);
+   assert(dri2_dpy->image_driver || dri2_dpy->dri2 || dri2_dpy->swrast);
   disp->Extensions.KHR_surfaceless_context = EGL_TRUE;
   disp->Extensions.MESA_configless_context = EGL_TRUE;

@@ -592,7 +605,9 @@ dri2_setup_screen(_EGLDisplay *disp)
                                   __DRI2_RENDERER_HAS_FRAMEBUFFER_SRGB))
      disp->Extensions.KHR_gl_colorspace = EGL_TRUE;

-   if (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) {
+   if (dri2_dpy->image_driver ||
+       (dri2_dpy->dri2 && dri2_dpy->dri2->base.version >= 3) ||
+       (dri2_dpy->swrast && dri2_dpy->swrast->base.version >= 3)) {
      disp->Extensions.KHR_create_context = EGL_TRUE;

      if (dri2_dpy->robustness)
@@ -654,7 +669,14 @@ dri2_create_screen(_EGLDisplay *disp)

   dri2_dpy = disp->DriverData;

-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver) {
+      dri2_dpy->dri_screen =
+         dri2_dpy->image_driver->createNewScreen2(0, dri2_dpy->fd,
+                                                  dri2_dpy->extensions,
+                                                  dri2_dpy->driver_extensions,
+                                                  &dri2_dpy->driver_configs,
+                                                  disp);
+   } else if (dri2_dpy->dri2) {
      if (dri2_dpy->dri2->base.version >= 4) {
         dri2_dpy->dri_screen =
            dri2_dpy->dri2->createNewScreen2(0, dri2_dpy->fd,
@@ -690,7 +712,7 @@ dri2_create_screen(_EGLDisplay *disp)

   extensions = dri2_dpy->core->getExtensions(dri2_dpy->dri_screen);

-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver || dri2_dpy->dri2) {
      if (!dri2_bind_extensions(dri2_dpy, dri2_core_extensions, extensions))
         goto cleanup_dri_screen;
   } else {
@@ -788,7 +810,7 @@ dri2_terminate(_EGLDriver *drv, _EGLDisplay *disp)

   if (dri2_dpy->own_dri_screen)
      dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
-   if (dri2_dpy->fd)
+   if (dri2_dpy->fd >= 0)
      close(dri2_dpy->fd);
   if (dri2_dpy->driver)
      dlclose(dri2_dpy->driver);
@@ -906,6 +928,55 @@ dri2_create_context_attribs_error(int dri_error)
   _eglError(egl_error, "dri2_create_context");
 }

+static bool
+dri2_fill_context_attribs(struct dri2_egl_context *dri2_ctx,
+                          struct dri2_egl_display *dri2_dpy,
+                          uint32_t *ctx_attribs,
+                          unsigned *num_attribs)
+{
+   int pos = 0;
+
+   assert(*num_attribs >= 8);
+
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMajorVersion;
+   ctx_attribs[pos++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
+   ctx_attribs[pos++] = dri2_ctx->base.ClientMinorVersion;
+
+   if (dri2_ctx->base.Flags != 0) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it the robust-access flag.
+       * It may explode.  Instead, generate the required EGL error here.
+       */
+      if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
+            && !dri2_dpy->robustness) {
+         _eglError(EGL_BAD_MATCH, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_FLAGS;
+      ctx_attribs[pos++] = dri2_ctx->base.Flags;
+   }
+
+   if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
+      /* If the implementation doesn't support the __DRI2_ROBUSTNESS
+       * extension, don't even try to send it a reset strategy.  It may
+       * explode.  Instead, generate the required EGL error here.
+       */
+      if (!dri2_dpy->robustness) {
+         _eglError(EGL_BAD_CONFIG, "eglCreateContext");
+         return false;
+      }
+
+      ctx_attribs[pos++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
+      ctx_attribs[pos++] = __DRI_CTX_RESET_LOSE_CONTEXT;
+   }
+
+   *num_attribs = pos;
+
+   return true;
+}
+
 /**
 * Called via eglCreateContext(), drv->API.CreateContext().
 */
@@ -974,10 +1045,10 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
       * doubleBufferMode check in
       * src/mesa/main/context.c:check_compatible()
       */
-      if (dri2_config->dri_double_config)
-         dri_config = dri2_config->dri_double_config;
+      if (dri2_config->dri_double_config[0])
+         dri_config = dri2_config->dri_double_config[0];
      else
-         dri_config = dri2_config->dri_single_config;
+         dri_config = dri2_config->dri_single_config[0];

      /* EGL_WINDOW_BIT is set only when there is a dri_double_config.  This
       * makes sure the back buffer will always be used.
@@ -988,47 +1059,34 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
   else
      dri_config = NULL;

-   if (dri2_dpy->dri2) {
+   if (dri2_dpy->image_driver) {
+      unsigned error;
+      unsigned num_attribs = 8;
+      uint32_t ctx_attribs[8];
+
+      if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+         goto cleanup;
+
+      dri2_ctx->dri_context =
+         dri2_dpy->image_driver->createContextAttribs(dri2_dpy->dri_screen,
+                                                      api,
+                                                      dri_config,
+                                                      shared,
+                                                      num_attribs / 2,
+                                                      ctx_attribs,
+                                                      & error,
+                                                      dri2_ctx);
+      dri2_create_context_attribs_error(error);
+   } else if (dri2_dpy->dri2) {
      if (dri2_dpy->dri2->base.version >= 3) {
         unsigned error;
-         unsigned num_attribs = 0;
+         unsigned num_attribs = 8;
         uint32_t ctx_attribs[8];

-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MAJOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMajorVersion;
-         ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_MINOR_VERSION;
-         ctx_attribs[num_attribs++] = dri2_ctx->base.ClientMinorVersion;
-
-         if (dri2_ctx->base.Flags != 0) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it the robust-access flag.
-             * It may explode.  Instead, generate the required EGL error here.
-             */
-            if ((dri2_ctx->base.Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) != 0
-                && !dri2_dpy->robustness) {
-               _eglError(EGL_BAD_MATCH, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_FLAGS;
-            ctx_attribs[num_attribs++] = dri2_ctx->base.Flags;
-         }
-
-         if (dri2_ctx->base.ResetNotificationStrategy != EGL_NO_RESET_NOTIFICATION_KHR) {
-            /* If the implementation doesn't support the __DRI2_ROBUSTNESS
-             * extension, don't even try to send it a reset strategy.  It may
-             * explode.  Instead, generate the required EGL error here.
-             */
-            if (!dri2_dpy->robustness) {
-               _eglError(EGL_BAD_CONFIG, "eglCreateContext");
-               goto cleanup;
-            }
-
-            ctx_attribs[num_attribs++] = __DRI_CTX_ATTRIB_RESET_STRATEGY;
-            ctx_attribs[num_attribs++] = __DRI_CTX_RESET_LOSE_CONTEXT;
-         }
-
-         assert(num_attribs <= ARRAY_SIZE(ctx_attribs));
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;

 	 dri2_ctx->dri_context =
 	    dri2_dpy->dri2->createContextAttribs(dri2_dpy->dri_screen,
@@ -1050,12 +1108,33 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
      }
   } else {
      assert(dri2_dpy->swrast);
-      dri2_ctx->dri_context =
-         dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
-                                                  api,
-                                                  dri_config,
-                                                  shared,
-                                                  dri2_ctx);
+      if (dri2_dpy->swrast->base.version >= 3) {
+         unsigned error;
+         unsigned num_attribs = 8;
+         uint32_t ctx_attribs[8];
+
+         if (!dri2_fill_context_attribs(dri2_ctx, dri2_dpy, ctx_attribs,
+                                        &num_attribs))
+            goto cleanup;
+
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createContextAttribs(dri2_dpy->dri_screen,
+                                                   api,
+                                                   dri_config,
+                                                   shared,
+                                                   num_attribs / 2,
+                                                   ctx_attribs,
+                                                   & error,
+                                                   dri2_ctx);
+         dri2_create_context_attribs_error(error);
+      } else {
+         dri2_ctx->dri_context =
+            dri2_dpy->swrast->createNewContextForAPI(dri2_dpy->dri_screen,
+                                                     api,
+                                                     dri_config,
+                                                     shared,
+                                                     dri2_ctx);
+      }
   }

   if (!dri2_ctx->dri_context)
@@ -1094,11 +1173,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
 {
   struct dri2_egl_driver *dri2_drv = dri2_egl_driver(drv);
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_dsurf = dri2_egl_surface(dsurf);
-   struct dri2_egl_surface *dri2_rsurf = dri2_egl_surface(rsurf);
   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
   _EGLContext *old_ctx;
   _EGLSurface *old_dsurf, *old_rsurf;
+   _EGLSurface *tmp_dsurf, *tmp_rsurf;
   __DRIdrawable *ddraw, *rdraw;
   __DRIcontext *cctx;

@@ -1110,8 +1188,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
   if (old_ctx && dri2_drv->glFlush)
      dri2_drv->glFlush();

-   ddraw = (dri2_dsurf) ? dri2_dsurf->dri_drawable : NULL;
-   rdraw = (dri2_rsurf) ? dri2_rsurf->dri_drawable : NULL;
+   ddraw = (dsurf) ? dri2_dpy->vtbl->get_dri_drawable(dsurf) : NULL;
+   rdraw = (rsurf) ? dri2_dpy->vtbl->get_dri_drawable(rsurf) : NULL;
   cctx = (dri2_ctx) ? dri2_ctx->dri_context : NULL;

   if (old_ctx) {
@@ -1131,10 +1209,10 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
      return EGL_TRUE;
   } else {
      /* undo the previous _eglBindContext */
-      _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &dsurf, &rsurf);
+      _eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &tmp_dsurf, &tmp_rsurf);
      assert(&dri2_ctx->base == ctx &&
-             &dri2_dsurf->base == dsurf &&
-             &dri2_rsurf->base == rsurf);
+             tmp_dsurf == dsurf &&
+             tmp_rsurf == rsurf);

      _eglPutSurface(dsurf);
      _eglPutSurface(rsurf);
@@ -1148,6 +1226,14 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
   }
 }

+__DRIdrawable *
+dri2_surface_get_dri_drawable(_EGLSurface *surf)
+{
+   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
+
+   return dri2_surf->dri_drawable;
+}
+
 /*
 * Called from eglGetProcAddress() via drv->API.GetProcAddress().
 */
@@ -1210,7 +1296,7 @@ void
 dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(draw);
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(draw);

   if (dri2_dpy->flush) {
      if (dri2_dpy->flush->base.version >= 4) {
@@ -1228,12 +1314,12 @@ dri2_flush_drawable_for_swapbuffers(_EGLDisplay *disp, _EGLSurface *draw)
          *      after calling eglSwapBuffers."
          */
         dri2_dpy->flush->flush_with_flags(dri2_ctx->dri_context,
-                                           dri2_surf->dri_drawable,
+                                           dri_drawable,
                                           __DRI2_FLUSH_DRAWABLE |
                                           __DRI2_FLUSH_INVALIDATE_ANCILLARY,
                                           __DRI2_THROTTLE_SWAPBUFFER);
      } else {
-         dri2_dpy->flush->flush(dri2_surf->dri_drawable);
+         dri2_dpy->flush->flush(dri_drawable);
      }
   }
 }
@@ -1290,7 +1376,8 @@ static EGLBoolean
 dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(ctx->DrawSurface);
+   _EGLSurface *surf = ctx->DrawSurface;
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);

   (void) drv;

@@ -1298,7 +1385,7 @@ dri2_wait_client(_EGLDriver *drv, _EGLDisplay *disp, _EGLContext *ctx)
    * we need to copy fake to real here.*/

   if (dri2_dpy->flush != NULL)
-      dri2_dpy->flush->flush(dri2_surf->dri_drawable);
+      dri2_dpy->flush->flush(dri_drawable);

   return EGL_TRUE;
 }
@@ -1321,10 +1408,10 @@ dri2_bind_tex_image(_EGLDriver *drv,
 		    _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
   struct dri2_egl_context *dri2_ctx;
   _EGLContext *ctx;
   GLint format, target;
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);

   ctx = _eglGetCurrentContext();
   dri2_ctx = dri2_egl_context(ctx);
@@ -1332,7 +1419,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
   if (!_eglBindTexImage(drv, disp, surf, buffer))
      return EGL_FALSE;

-   switch (dri2_surf->base.TextureFormat) {
+   switch (surf->TextureFormat) {
   case EGL_TEXTURE_RGB:
      format = __DRI_TEXTURE_FORMAT_RGB;
      break;
@@ -1344,7 +1431,7 @@ dri2_bind_tex_image(_EGLDriver *drv,
      format = __DRI_TEXTURE_FORMAT_RGBA;
   }

-   switch (dri2_surf->base.TextureTarget) {
+   switch (surf->TextureTarget) {
   case EGL_TEXTURE_2D:
      target = GL_TEXTURE_2D;
      break;
@@ -1355,7 +1442,7 @@ dri2_bind_tex_image(_EGLDriver *drv,

   (*dri2_dpy->tex_buffer->setTexBuffer2)(dri2_ctx->dri_context,
 					  target, format,
-					  dri2_surf->dri_drawable);
+					  dri_drawable);

   return EGL_TRUE;
 }
@@ -1365,10 +1452,10 @@ dri2_release_tex_image(_EGLDriver *drv,
 		       _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer)
 {
   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
-   struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
   struct dri2_egl_context *dri2_ctx;
   _EGLContext *ctx;
   GLint  target;
+   __DRIdrawable *dri_drawable = dri2_dpy->vtbl->get_dri_drawable(surf);

   ctx = _eglGetCurrentContext();
   dri2_ctx = dri2_egl_context(ctx);
@@ -1376,7 +1463,7 @@ dri2_release_tex_image(_EGLDriver *drv,
   if (!_eglReleaseTexImage(drv, disp, surf, buffer))
      return EGL_FALSE;

-   switch (dri2_surf->base.TextureTarget) {
+   switch (surf->TextureTarget) {
   case EGL_TEXTURE_2D:
      target = GL_TEXTURE_2D;
      break;
@@ -1388,7 +1475,7 @@ dri2_release_tex_image(_EGLDriver *drv,
       dri2_dpy->tex_buffer->releaseTexBuffer != NULL) {
      (*dri2_dpy->tex_buffer->releaseTexBuffer)(dri2_ctx->dri_context,
                                                target,
-                                                dri2_surf->dri_drawable);
+                                                dri_drawable);
   }

   return EGL_TRUE;
--- a/src/egl/drivers/dri2/egl_dri2.h
+++ b/src/egl/drivers/dri2/egl_dri2.h
@@ -35,6 +35,10 @@
 #include <xcb/dri2.h>
 #include <xcb/xfixes.h>
 #include <X11/Xlib-xcb.h>
+
+#ifdef HAVE_DRI3
+#include "loader_dri3_helper.h"
+#endif
 #endif

 #ifdef HAVE_WAYLAND_PLATFORM
@@ -145,6 +149,8 @@ struct dri2_egl_display_vtbl {
   EGLBoolean (*get_sync_values)(_EGLDisplay *display, _EGLSurface *surface,
                                 EGLuint64KHR *ust, EGLuint64KHR *msc,
                                 EGLuint64KHR *sbc);
+
+   __DRIdrawable *(*get_dri_drawable)(_EGLSurface *surf);
 };

 struct dri2_egl_display
@@ -158,6 +164,7 @@ struct dri2_egl_display
   const __DRIconfig       **driver_configs;
   void                     *driver;
   const __DRIcoreExtension       *core;
+   const __DRIimageDriverExtension *image_driver;
   const __DRIdri2Extension       *dri2;
   const __DRIswrastExtension     *swrast;
   const __DRI2flushExtension     *flush;
@@ -190,6 +197,9 @@ struct dri2_egl_display
 #ifdef HAVE_X11_PLATFORM
   xcb_connection_t         *conn;
   int                      screen;
+#ifdef HAVE_DRI3
+   struct loader_dri3_extensions loader_dri3_ext;
+#endif
 #endif

 #ifdef HAVE_WAYLAND_PLATFORM
@@ -203,8 +213,9 @@ struct dri2_egl_display
   int			     formats;
   uint32_t                  capabilities;
   int			     is_render_node;
-   int			     is_different_gpu;
 #endif
+
+   int			     is_different_gpu;
 };

 struct dri2_egl_context
@@ -284,10 +295,8 @@ struct dri2_egl_surface
 struct dri2_egl_config
 {
   _EGLConfig         base;
-   const __DRIconfig *dri_single_config;
-   const __DRIconfig *dri_double_config;
-   const __DRIconfig *dri_srgb_single_config;
-   const __DRIconfig *dri_srgb_double_config;
+   const __DRIconfig *dri_single_config[2];
+   const __DRIconfig *dri_double_config[2];
 };

 struct dri2_egl_image
@@ -326,9 +335,15 @@ dri2_setup_screen(_EGLDisplay *disp);
 EGLBoolean
 dri2_load_driver_swrast(_EGLDisplay *disp);

+EGLBoolean
+dri2_load_driver_dri3(_EGLDisplay *disp);
+
 EGLBoolean
 dri2_create_screen(_EGLDisplay *disp);

+__DRIdrawable *
+dri2_surface_get_dri_drawable(_EGLSurface *surf);
+
 __DRIimage *
 dri2_lookup_egl_image(__DRIscreen *screen, void *image, void *data);

--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -650,6 +650,7 @@ static struct dri2_egl_display_vtbl droid_display_vtbl = {
   .query_buffer_age = dri2_fallback_query_buffer_age,
   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
   .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };

 EGLBoolean
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -101,6 +101,7 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
   struct dri2_egl_surface *dri2_surf;
   struct gbm_surface *window = native_window;
   struct gbm_dri_surface *surf;
+   const __DRIconfig *config;

   (void) drv;

@@ -130,21 +131,20 @@ dri2_drm_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
      goto cleanup_surf;
   }

-   if (dri2_dpy->dri2) {
-      const __DRIconfig *config =
-         dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
-                             dri2_surf->base.GLColorspace);
+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);

+   if (dri2_dpy->dri2) {
      dri2_surf->dri_drawable =
         (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
                                              dri2_surf->gbm_surf);

   } else {
      assert(dri2_dpy->swrast != NULL);
+
      dri2_surf->dri_drawable =
-         (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-                                                 dri2_conf->dri_double_config,
-                                                 dri2_surf->gbm_surf);
+         (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                                dri2_surf->gbm_surf);

   }
   if (dri2_surf->dri_drawable == NULL) {
@@ -594,6 +594,7 @@ static struct dri2_egl_display_vtbl dri2_drm_display_vtbl = {
   .query_buffer_age = dri2_drm_query_buffer_age,
   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
   .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };

 EGLBoolean
@@ -623,27 +624,19 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
      dri2_dpy->own_device = 1;
      gbm = gbm_create_device(fd);
      if (gbm == NULL)
-         return EGL_FALSE;
+         goto cleanup;
+   } else {
+      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
+      if (fd < 0)
+         goto cleanup;
   }

-   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
+   if (strcmp(gbm_device_get_backend_name(gbm), "drm") != 0)
+      goto cleanup;

   dri2_dpy->gbm_dri = gbm_dri_device(gbm);
-   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI) {
-      free(dri2_dpy);
-      return EGL_FALSE;
-   }
-
-   if (fd < 0) {
-      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
-      if (fd < 0) {
-         free(dri2_dpy);
-         return EGL_FALSE;
-      }
-   }
+   if (dri2_dpy->gbm_dri->base.type != GBM_DRM_DRIVER_TYPE_DRI)
+      goto cleanup;

   dri2_dpy->fd = fd;
   dri2_dpy->device_name = loader_get_device_name_for_fd(dri2_dpy->fd);
@@ -727,4 +720,11 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
   dri2_dpy->vtbl = &dri2_drm_display_vtbl;

   return EGL_TRUE;
+
+cleanup:
+   if (fd >= 0)
+      close(fd);
+
+   free(dri2_dpy);
+   return EGL_FALSE;
 }
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1025,6 +1025,7 @@ static struct dri2_egl_display_vtbl dri2_wl_display_vtbl = {
   .query_buffer_age = dri2_wl_query_buffer_age,
   .create_wayland_buffer_from_image = dri2_wl_create_wayland_buffer_from_image,
   .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };

 static EGLBoolean
@@ -1637,6 +1638,7 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
   struct wl_egl_window *window = native_window;
   struct dri2_egl_surface *dri2_surf;
+   const __DRIconfig *config;

   (void) drv;

@@ -1661,10 +1663,12 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
   dri2_surf->base.Width = -1;
   dri2_surf->base.Height = -1;

+   config = dri2_get_dri_config(dri2_conf, EGL_WINDOW_BIT,
+                                dri2_surf->base.GLColorspace);
+
   dri2_surf->dri_drawable =
-      (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-                                              dri2_conf->dri_double_config,
-                                              dri2_surf);
+      (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen,
+                                             config, dri2_surf);
   if (dri2_surf->dri_drawable == NULL) {
      _eglError(EGL_BAD_ALLOC, "swrast->createNewDrawable");
      goto cleanup_dri_drawable;
@@ -1749,6 +1753,7 @@ static struct dri2_egl_display_vtbl dri2_wl_swrast_display_vtbl = {
   .query_buffer_age = dri2_fallback_query_buffer_age,
   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
   .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };

 static EGLBoolean
@@ -1796,6 +1801,7 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->formats == 0)
      goto cleanup_shm;

+   dri2_dpy->fd = -1;
   dri2_dpy->driver_name = strdup("swrast");
   if (!dri2_load_driver_swrast(disp))
      goto cleanup_shm;
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -45,6 +45,10 @@
 #include "egl_dri2_fallbacks.h"
 #include "loader.h"

+#ifdef HAVE_DRI3
+#include "platform_x11_dri3.h"
+#endif
+
 static EGLBoolean
 dri2_x11_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
                       EGLint interval);
@@ -206,6 +210,7 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
   xcb_generic_error_t *error;
   xcb_drawable_t drawable;
   xcb_screen_t *screen;
+   const __DRIconfig *config;

   STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
   drawable = (uintptr_t) native_surface;
@@ -245,19 +250,18 @@ dri2_x11_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
      dri2_surf->drawable = drawable;
   }

-   if (dri2_dpy->dri2) {
-      const __DRIconfig *config =
-         dri2_get_dri_config(dri2_conf, type, dri2_surf->base.GLColorspace);
+   config = dri2_get_dri_config(dri2_conf, type,
+                                dri2_surf->base.GLColorspace);

+   if (dri2_dpy->dri2) {
      dri2_surf->dri_drawable =
 	 (*dri2_dpy->dri2->createNewDrawable)(dri2_dpy->dri_screen, config,
 					      dri2_surf);
   } else {
      assert(dri2_dpy->swrast);
      dri2_surf->dri_drawable = 
-	 (*dri2_dpy->swrast->createNewDrawable) (dri2_dpy->dri_screen,
-						 dri2_conf->dri_double_config,
-						 dri2_surf);
+         (*dri2_dpy->swrast->createNewDrawable)(dri2_dpy->dri_screen, config,
+                                                dri2_surf);
   }

   if (dri2_surf->dri_drawable == NULL) {
@@ -703,7 +707,7 @@ dri2_x11_local_authenticate(_EGLDisplay *disp)

 static EGLBoolean
 dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
-                                 _EGLDisplay *disp)
+                                 _EGLDisplay *disp, bool supports_preserved)
 {
   xcb_screen_iterator_t s;
   xcb_depth_iterator_t d;
@@ -724,8 +728,10 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
   surface_type =
      EGL_WINDOW_BIT |
      EGL_PIXMAP_BIT |
-      EGL_PBUFFER_BIT |
-      EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
+      EGL_PBUFFER_BIT;
+
+   if (supports_preserved)
+      surface_type |= EGL_SWAP_BEHAVIOR_PRESERVED_BIT;

   while (d.rem > 0) {
      EGLBoolean class_added[6] = { 0, };
@@ -1112,6 +1118,7 @@ static struct dri2_egl_display_vtbl dri2_x11_swrast_display_vtbl = {
   .query_buffer_age = dri2_fallback_query_buffer_age,
   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
   .get_sync_values = dri2_fallback_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };

 static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = {
@@ -1130,6 +1137,7 @@ static struct dri2_egl_display_vtbl dri2_x11_display_vtbl = {
   .query_buffer_age = dri2_fallback_query_buffer_age,
   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
   .get_sync_values = dri2_x11_get_sync_values,
+   .get_dri_drawable = dri2_surface_get_dri_drawable,
 };

 static EGLBoolean
@@ -1161,6 +1169,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
    * Every hardware driver_name is set using strdup. Doing the same in
    * here will allow is to simply free the memory at dri2_terminate().
    */
+   dri2_dpy->fd = -1;
   dri2_dpy->driver_name = strdup("swrast");
   if (!dri2_load_driver_swrast(disp))
      goto cleanup_conn;
@@ -1178,7 +1187,7 @@ dri2_initialize_x11_swrast(_EGLDriver *drv, _EGLDisplay *disp)
   if (!dri2_create_screen(disp))
      goto cleanup_driver;

-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
      goto cleanup_configs;

   /* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1249,6 +1258,100 @@ dri2_x11_setup_swap_interval(struct dri2_egl_display *dri2_dpy)
   }
 }

+#ifdef HAVE_DRI3
+static EGLBoolean
+dri2_initialize_x11_dri3(_EGLDriver *drv, _EGLDisplay *disp)
+{
+   struct dri2_egl_display *dri2_dpy;
+
+   dri2_dpy = calloc(1, sizeof *dri2_dpy);
+   if (!dri2_dpy)
+      return _eglError(EGL_BAD_ALLOC, "eglInitialize");
+
+   disp->DriverData = (void *) dri2_dpy;
+   if (disp->PlatformDisplay == NULL) {
+      dri2_dpy->conn = xcb_connect(0, &dri2_dpy->screen);
+      dri2_dpy->own_device = true;
+   } else {
+      Display *dpy = disp->PlatformDisplay;
+
+      dri2_dpy->conn = XGetXCBConnection(dpy);
+      dri2_dpy->screen = DefaultScreen(dpy);
+   }
+
+   if (xcb_connection_has_error(dri2_dpy->conn)) {
+      _eglLog(_EGL_WARNING, "DRI3: xcb_connect failed");
+      goto cleanup_dpy;
+   }
+
+   if (dri2_dpy->conn) {
+      if (!dri3_x11_connect(dri2_dpy))
+         goto cleanup_conn;
+   }
+
+   if (!dri2_load_driver_dri3(disp))
+      goto cleanup_conn;
+
+   dri2_dpy->extensions[0] = &dri3_image_loader_extension.base;
+   dri2_dpy->extensions[1] = &use_invalidate.base;
+   dri2_dpy->extensions[2] = &image_lookup_extension.base;
+   dri2_dpy->extensions[3] = NULL;
+
+   dri2_dpy->swap_available = true;
+   dri2_dpy->invalidate_available = true;
+
+   if (!dri2_create_screen(disp))
+      goto cleanup_fd;
+
+   dri2_x11_setup_swap_interval(dri2_dpy);
+
+   if (!dri2_dpy->is_different_gpu)
+      disp->Extensions.KHR_image_pixmap = EGL_TRUE;
+   disp->Extensions.NOK_texture_from_pixmap = EGL_TRUE;
+   disp->Extensions.CHROMIUM_sync_control = EGL_TRUE;
+   disp->Extensions.EXT_buffer_age = EGL_TRUE;
+
+#ifdef HAVE_WAYLAND_PLATFORM
+   disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
+#endif
+
+   if (dri2_dpy->conn) {
+      if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, false))
+         goto cleanup_configs;
+   }
+
+   dri2_dpy->loader_dri3_ext.core = dri2_dpy->core;
+   dri2_dpy->loader_dri3_ext.image_driver = dri2_dpy->image_driver;
+   dri2_dpy->loader_dri3_ext.flush = dri2_dpy->flush;
+   dri2_dpy->loader_dri3_ext.tex_buffer = dri2_dpy->tex_buffer;
+   dri2_dpy->loader_dri3_ext.image = dri2_dpy->image;
+   dri2_dpy->loader_dri3_ext.config = dri2_dpy->config;
+
+   /* Fill vtbl last to prevent accidentally calling virtual function during
+    * initialization.
+    */
+   dri2_dpy->vtbl = &dri3_x11_display_vtbl;
+
+   _eglLog(_EGL_INFO, "Using DRI3");
+
+   return EGL_TRUE;
+
+ cleanup_configs:
+   _eglCleanupDisplay(disp);
+   dri2_dpy->core->destroyScreen(dri2_dpy->dri_screen);
+   dlclose(dri2_dpy->driver);
+ cleanup_fd:
+   close(dri2_dpy->fd);
+ cleanup_conn:
+   if (disp->PlatformDisplay == NULL)
+      xcb_disconnect(dri2_dpy->conn);
+ cleanup_dpy:
+   free(dri2_dpy);
+
+   return EGL_FALSE;
+}
+#endif
+
 static EGLBoolean
 dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
 {
@@ -1320,7 +1423,7 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
   disp->Extensions.WL_bind_wayland_display = EGL_TRUE;
 #endif

-   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp))
+   if (!dri2_x11_add_configs_for_visuals(dri2_dpy, disp, true))
      goto cleanup_configs;

   /* Fill vtbl last to prevent accidentally calling virtual function during
@@ -1328,6 +1431,8 @@ dri2_initialize_x11_dri2(_EGLDriver *drv, _EGLDisplay *disp)
    */
   dri2_dpy->vtbl = &dri2_x11_display_vtbl;

+   _eglLog(_EGL_INFO, "Using DRI2");
+
   return EGL_TRUE;

 cleanup_configs:
@@ -1354,9 +1459,16 @@ dri2_initialize_x11(_EGLDriver *drv, _EGLDisplay *disp)
   int x11_dri2_accel = (getenv("LIBGL_ALWAYS_SOFTWARE") == NULL);

   if (x11_dri2_accel) {
-      if (!dri2_initialize_x11_dri2(drv, disp)) {
-         initialized = dri2_initialize_x11_swrast(drv, disp);
+#ifdef HAVE_DRI3
+      if (getenv("LIBGL_DRI3_DISABLE") != NULL ||
+          !dri2_initialize_x11_dri3(drv, disp)) {
+#endif
+         if (!dri2_initialize_x11_dri2(drv, disp)) {
+            initialized = dri2_initialize_x11_swrast(drv, disp);
+         }
+#ifdef HAVE_DRI3
      }
+#endif
   } else {
      initialized = dri2_initialize_x11_swrast(drv, disp);
   }
--- a/src/egl/drivers/dri2/platform_x11_dri3.c
+++ b/src/egl/drivers/dri2/platform_x11_dri3.c
@@ -0,0 +1,547 @@
+/*
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <xcb/xcb.h>
+#include <xcb/dri3.h>
+#include <xcb/present.h>
+
+#include <xf86drm.h>
+
+#include "egl_dri2.h"
+#include "egl_dri2_fallbacks.h"
+#include "platform_x11_dri3.h"
+
+#include "loader.h"
+#include "loader_dri3_helper.h"
+
+static struct dri3_egl_surface *
+loader_drawable_to_egl_surface(struct loader_dri3_drawable *draw) {
+   size_t offset = offsetof(struct dri3_egl_surface, loader_drawable);
+   return (struct dri3_egl_surface *)(((void*) draw) - offset);
+}
+
+static int
+egl_dri3_get_swap_interval(struct loader_dri3_drawable *draw)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   return dri3_surf->base.SwapInterval;
+}
+
+static int
+egl_dri3_clamp_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   if (interval > dri3_surf->base.Config->MaxSwapInterval)
+      interval = dri3_surf->base.Config->MaxSwapInterval;
+   else if (interval < dri3_surf->base.Config->MinSwapInterval)
+      interval = dri3_surf->base.Config->MinSwapInterval;
+
+   return interval;
+}
+
+static void
+egl_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   dri3_surf->base.SwapInterval = interval;
+}
+
+static void
+egl_dri3_set_drawable_size(struct loader_dri3_drawable *draw,
+                           int width, int height)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+
+   dri3_surf->base.Width = width;
+   dri3_surf->base.Height = height;
+}
+
+static bool
+egl_dri3_in_current_context(struct loader_dri3_drawable *draw)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+   _EGLContext *ctx = _eglGetCurrentContext();
+
+   return ctx->Resource.Display == dri3_surf->base.Resource.Display;
+}
+
+static __DRIcontext *
+egl_dri3_get_dri_context(struct loader_dri3_drawable *draw)
+{
+   _EGLContext *ctx = _eglGetCurrentContext();
+   struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
+
+   return dri2_ctx->dri_context;
+}
+
+static void
+egl_dri3_flush_drawable(struct loader_dri3_drawable *draw, unsigned flags)
+{
+   struct dri3_egl_surface *dri3_surf = loader_drawable_to_egl_surface(draw);
+   _EGLDisplay *disp = dri3_surf->base.Resource.Display;
+
+   dri2_flush_drawable_for_swapbuffers(disp, &dri3_surf->base);
+}
+
+static struct loader_dri3_vtable egl_dri3_vtable = {
+   .get_swap_interval = egl_dri3_get_swap_interval,
+   .clamp_swap_interval = egl_dri3_clamp_swap_interval,
+   .set_swap_interval = egl_dri3_set_swap_interval,
+   .set_drawable_size = egl_dri3_set_drawable_size,
+   .in_current_context = egl_dri3_in_current_context,
+   .get_dri_context = egl_dri3_get_dri_context,
+   .flush_drawable = egl_dri3_flush_drawable,
+   .show_fps = NULL,
+};
+
+static EGLBoolean
+dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   (void) drv;
+
+   if (!_eglPutSurface(surf))
+      return EGL_TRUE;
+
+   loader_dri3_drawable_fini(&dri3_surf->loader_drawable);
+
+   free(surf);
+
+   return EGL_TRUE;
+}
+
+static EGLBoolean
+dri3_set_swap_interval(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
+                       EGLint interval)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   loader_dri3_set_swap_interval(&dri3_surf->loader_drawable, interval);
+
+   return EGL_TRUE;
+}
+
+static xcb_screen_t *
+get_xcb_screen(xcb_screen_iterator_t iter, int screen)
+{
+    for (; iter.rem; --screen, xcb_screen_next(&iter))
+        if (screen == 0)
+            return iter.data;
+
+    return NULL;
+}
+
+static _EGLSurface *
+dri3_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
+                    _EGLConfig *conf, void *native_surface,
+                    const EGLint *attrib_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   struct dri2_egl_config *dri2_conf = dri2_egl_config(conf);
+   struct dri3_egl_surface *dri3_surf;
+   const __DRIconfig *dri_config;
+   xcb_drawable_t drawable;
+   xcb_screen_iterator_t s;
+   xcb_screen_t *screen;
+
+   STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_surface));
+   drawable = (uintptr_t) native_surface;
+
+   (void) drv;
+
+   dri3_surf = calloc(1, sizeof *dri3_surf);
+   if (!dri3_surf) {
+      _eglError(EGL_BAD_ALLOC, "dri3_create_surface");
+      return NULL;
+   }
+
+   if (!_eglInitSurface(&dri3_surf->base, disp, type, conf, attrib_list))
+      goto cleanup_surf;
+
+   if (type == EGL_PBUFFER_BIT) {
+      s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+      screen = get_xcb_screen(s, dri2_dpy->screen);
+      if (!screen) {
+         _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_create_surface");
+         goto cleanup_surf;
+      }
+
+      drawable = xcb_generate_id(dri2_dpy->conn);
+      xcb_create_pixmap(dri2_dpy->conn, conf->BufferSize,
+                        drawable, screen->root,
+                        dri3_surf->base.Width, dri3_surf->base.Height);
+   }
+
+   dri_config = dri2_get_dri_config(dri2_conf, type,
+                                    dri3_surf->base.GLColorspace);
+
+   if (loader_dri3_drawable_init(dri2_dpy->conn, drawable,
+                                 dri2_dpy->dri_screen,
+                                 dri2_dpy->is_different_gpu, dri_config,
+                                 &dri2_dpy->loader_dri3_ext,
+                                 &egl_dri3_vtable,
+                                 &dri3_surf->loader_drawable)) {
+      _eglError(EGL_BAD_ALLOC, "dri3_surface_create");
+      goto cleanup_pixmap;
+   }
+
+   return &dri3_surf->base;
+
+ cleanup_pixmap:
+   if (type == EGL_PBUFFER_BIT)
+      xcb_free_pixmap(dri2_dpy->conn, drawable);
+ cleanup_surf:
+   free(dri3_surf);
+
+   return NULL;
+}
+
+/**
+ * Called via eglCreateWindowSurface(), drv->API.CreateWindowSurface().
+ */
+static _EGLSurface *
+dri3_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
+                           _EGLConfig *conf, void *native_window,
+                           const EGLint *attrib_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   _EGLSurface *surf;
+
+   surf = dri3_create_surface(drv, disp, EGL_WINDOW_BIT, conf,
+                              native_window, attrib_list);
+   if (surf != NULL)
+      dri3_set_swap_interval(drv, disp, surf, dri2_dpy->default_swap_interval);
+
+   return surf;
+}
+
+static _EGLSurface *
+dri3_create_pixmap_surface(_EGLDriver *drv, _EGLDisplay *disp,
+                           _EGLConfig *conf, void *native_pixmap,
+                           const EGLint *attrib_list)
+{
+   return dri3_create_surface(drv, disp, EGL_PIXMAP_BIT, conf,
+                              native_pixmap, attrib_list);
+}
+
+static _EGLSurface *
+dri3_create_pbuffer_surface(_EGLDriver *drv, _EGLDisplay *disp,
+                                _EGLConfig *conf, const EGLint *attrib_list)
+{
+   return dri3_create_surface(drv, disp, EGL_PBUFFER_BIT, conf,
+                              XCB_WINDOW_NONE, attrib_list);
+}
+
+static EGLBoolean
+dri3_get_sync_values(_EGLDisplay *display, _EGLSurface *surface,
+                     EGLuint64KHR *ust, EGLuint64KHR *msc,
+                     EGLuint64KHR *sbc)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surface);
+
+   return loader_dri3_wait_for_msc(&dri3_surf->loader_drawable, 0, 0, 0,
+                                   (int64_t *) ust, (int64_t *) msc,
+                                   (int64_t *) sbc) ? EGL_TRUE : EGL_FALSE;
+}
+
+static _EGLImage *
+dri3_create_image_khr_pixmap(_EGLDisplay *disp, _EGLContext *ctx,
+                             EGLClientBuffer buffer, const EGLint *attr_list)
+{
+   struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
+   struct dri2_egl_image *dri2_img;
+   xcb_drawable_t drawable;
+   xcb_dri3_buffer_from_pixmap_cookie_t bp_cookie;
+   xcb_dri3_buffer_from_pixmap_reply_t  *bp_reply;
+   unsigned int format;
+
+   drawable = (xcb_drawable_t) (uintptr_t) buffer;
+   bp_cookie = xcb_dri3_buffer_from_pixmap(dri2_dpy->conn, drawable);
+   bp_reply = xcb_dri3_buffer_from_pixmap_reply(dri2_dpy->conn,
+                                                bp_cookie, NULL);
+   if (!bp_reply) {
+      _eglError(EGL_BAD_ALLOC, "xcb_dri3_buffer_from_pixmap");
+      return NULL;
+   }
+
+   switch (bp_reply->depth) {
+   case 16:
+      format = __DRI_IMAGE_FORMAT_RGB565;
+      break;
+   case 24:
+      format = __DRI_IMAGE_FORMAT_XRGB8888;
+      break;
+   case 32:
+      format = __DRI_IMAGE_FORMAT_ARGB8888;
+      break;
+   default:
+      _eglError(EGL_BAD_PARAMETER,
+                "dri3_create_image_khr: unsupported pixmap depth");
+      free(bp_reply);
+      return EGL_NO_IMAGE_KHR;
+   }
+
+   dri2_img = malloc(sizeof *dri2_img);
+   if (!dri2_img) {
+      _eglError(EGL_BAD_ALLOC, "dri3_create_image_khr");
+      return EGL_NO_IMAGE_KHR;
+   }
+
+   if (!_eglInitImage(&dri2_img->base, disp)) {
+      free(dri2_img);
+      return EGL_NO_IMAGE_KHR;
+   }
+
+   dri2_img->dri_image = loader_dri3_create_image(dri2_dpy->conn,
+                                                  bp_reply,
+                                                  format,
+                                                  dri2_dpy->dri_screen,
+                                                  dri2_dpy->image,
+                                                  dri2_img);
+
+   free(bp_reply);
+
+   return &dri2_img->base;
+}
+
+static _EGLImage *
+dri3_create_image_khr(_EGLDriver *drv, _EGLDisplay *disp,
+                      _EGLContext *ctx, EGLenum target,
+                      EGLClientBuffer buffer, const EGLint *attr_list)
+{
+   (void) drv;
+
+   switch (target) {
+   case EGL_NATIVE_PIXMAP_KHR:
+      return dri3_create_image_khr_pixmap(disp, ctx, buffer, attr_list);
+   default:
+      return dri2_create_image_khr(drv, disp, ctx, target, buffer, attr_list);
+   }
+}
+
+/**
+ * Called by the driver when it needs to update the real front buffer with the
+ * contents of its fake front buffer.
+ */
+static void
+dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
+{
+   /* There does not seem to be any kind of consensus on whether we should
+    * support front-buffer rendering or not:
+    * http://lists.freedesktop.org/archives/mesa-dev/2013-June/040129.html
+    */
+   _eglLog(_EGL_WARNING, "FIXME: egl/x11 doesn't support front buffer rendering.");
+   (void) driDrawable;
+   (void) loaderPrivate;
+}
+
+const __DRIimageLoaderExtension dri3_image_loader_extension = {
+   .base = { __DRI_IMAGE_LOADER, 1 },
+
+   .getBuffers          = loader_dri3_get_buffers,
+   .flushFrontBuffer    = dri3_flush_front_buffer,
+};
+
+static EGLBoolean
+dri3_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(draw);
+
+   /* No-op for a pixmap or pbuffer surface */
+   if (draw->Type == EGL_PIXMAP_BIT || draw->Type == EGL_PBUFFER_BIT)
+      return 0;
+
+   return loader_dri3_swap_buffers_msc(&dri3_surf->loader_drawable,
+                                       0, 0, 0, 0,
+                                       draw->SwapBehavior == EGL_BUFFER_PRESERVED) != -1;
+}
+
+static EGLBoolean
+dri3_copy_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf,
+                  void *native_pixmap_target)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+   xcb_pixmap_t target;
+
+   STATIC_ASSERT(sizeof(uintptr_t) == sizeof(native_pixmap_target));
+   target = (uintptr_t) native_pixmap_target;
+
+   loader_dri3_copy_drawable(&dri3_surf->loader_drawable, target,
+                             dri3_surf->loader_drawable.drawable);
+
+   return EGL_TRUE;
+}
+
+static int
+dri3_query_buffer_age(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   return loader_dri3_query_buffer_age(&dri3_surf->loader_drawable);
+}
+
+static __DRIdrawable *
+dri3_get_dri_drawable(_EGLSurface *surf)
+{
+   struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
+
+   return dri3_surf->loader_drawable.dri_drawable;
+}
+
+struct dri2_egl_display_vtbl dri3_x11_display_vtbl = {
+   .authenticate = NULL,
+   .create_window_surface = dri3_create_window_surface,
+   .create_pixmap_surface = dri3_create_pixmap_surface,
+   .create_pbuffer_surface = dri3_create_pbuffer_surface,
+   .destroy_surface = dri3_destroy_surface,
+   .create_image = dri3_create_image_khr,
+   .swap_interval = dri3_set_swap_interval,
+   .swap_buffers = dri3_swap_buffers,
+   .swap_buffers_with_damage = dri2_fallback_swap_buffers_with_damage,
+   .swap_buffers_region = dri2_fallback_swap_buffers_region,
+   .post_sub_buffer = dri2_fallback_post_sub_buffer,
+   .copy_buffers = dri3_copy_buffers,
+   .query_buffer_age = dri3_query_buffer_age,
+   .create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
+   .get_sync_values = dri3_get_sync_values,
+   .get_dri_drawable = dri3_get_dri_drawable,
+};
+
+static char *
+dri3_get_device_name(int fd)
+{
+   char *ret = NULL;
+
+   ret = drmGetRenderDeviceNameFromFd(fd);
+   if (ret)
+      return ret;
+
+   /* For dri3, render node support is required for WL_bind_wayland_display.
+    * In order not to regress on older systems without kernel or libdrm
+    * support, fall back to dri2. User can override it with environment
+    * variable if they don't need to use that extension.
+    */
+   if (getenv("EGL_FORCE_DRI3") == NULL) {
+      _eglLog(_EGL_WARNING, "Render node support not available, falling back to dri2");
+      _eglLog(_EGL_WARNING, "If you want to force dri3, set EGL_FORCE_DRI3 environment variable");
+   } else
+      ret = loader_get_device_name_for_fd(fd);
+
+   return ret;
+}
+
+EGLBoolean
+dri3_x11_connect(struct dri2_egl_display *dri2_dpy)
+{
+   xcb_dri3_query_version_reply_t *dri3_query;
+   xcb_dri3_query_version_cookie_t dri3_query_cookie;
+   xcb_present_query_version_reply_t *present_query;
+   xcb_present_query_version_cookie_t present_query_cookie;
+   xcb_generic_error_t *error;
+   xcb_screen_iterator_t s;
+   xcb_screen_t *screen;
+   const xcb_query_extension_reply_t *extension;
+
+   xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_dri3_id);
+   xcb_prefetch_extension_data (dri2_dpy->conn, &xcb_present_id);
+
+   extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_dri3_id);
+   if (!(extension && extension->present))
+      return EGL_FALSE;
+
+   extension = xcb_get_extension_data(dri2_dpy->conn, &xcb_present_id);
+   if (!(extension && extension->present))
+      return EGL_FALSE;
+
+   dri3_query_cookie = xcb_dri3_query_version(dri2_dpy->conn,
+                                              XCB_DRI3_MAJOR_VERSION,
+                                              XCB_DRI3_MINOR_VERSION);
+
+   present_query_cookie = xcb_present_query_version(dri2_dpy->conn,
+                                                    XCB_PRESENT_MAJOR_VERSION,
+                                                    XCB_PRESENT_MINOR_VERSION);
+
+   dri3_query =
+      xcb_dri3_query_version_reply(dri2_dpy->conn, dri3_query_cookie, &error);
+   if (dri3_query == NULL || error != NULL) {
+      _eglLog(_EGL_WARNING, "DRI3: failed to query the version");
+      free(dri3_query);
+      free(error);
+      return EGL_FALSE;
+   }
+   free(dri3_query);
+
+   present_query =
+      xcb_present_query_version_reply(dri2_dpy->conn,
+                                      present_query_cookie, &error);
+   if (present_query == NULL || error != NULL) {
+      _eglLog(_EGL_WARNING, "DRI3: failed to query Present version");
+      free(present_query);
+      free(error);
+      return EGL_FALSE;
+   }
+   free(present_query);
+
+   s = xcb_setup_roots_iterator(xcb_get_setup(dri2_dpy->conn));
+   screen = get_xcb_screen(s, dri2_dpy->screen);
+   if (!screen) {
+      _eglError(EGL_BAD_NATIVE_WINDOW, "dri3_x11_connect");
+      return EGL_FALSE;
+   }
+
+   dri2_dpy->fd = loader_dri3_open(dri2_dpy->conn, screen->root, 0);
+   if (dri2_dpy->fd < 0) {
+      int conn_error = xcb_connection_has_error(dri2_dpy->conn);
+      _eglLog(_EGL_WARNING, "DRI3: Screen seems not DRI3 capable");
+
+      if (conn_error)
+         _eglLog(_EGL_WARNING, "DRI3: Failed to initialize");
+
+      return EGL_FALSE;
+   }
+
+   dri2_dpy->fd = loader_get_user_preferred_fd(dri2_dpy->fd, &dri2_dpy->is_different_gpu);
+
+   dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd, 0);
+   if (!dri2_dpy->driver_name) {
+      _eglLog(_EGL_WARNING, "DRI3: No driver found");
+      close(dri2_dpy->fd);
+      return EGL_FALSE;
+   }
+
+   dri2_dpy->device_name = dri3_get_device_name(dri2_dpy->fd);
+   if (!dri2_dpy->device_name) {
+      close(dri2_dpy->fd);
+      return EGL_FALSE;
+   }
+
+   return EGL_TRUE;
+}
--- a/src/egl/drivers/dri2/platform_x11_dri3.h
+++ b/src/egl/drivers/dri2/platform_x11_dri3.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright © 2015 Boyan Ding
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#ifndef EGL_X11_DRI3_INCLUDED
+#define EGL_X11_DRI3_INCLUDED
+
+#include "egl_dri2.h"
+
+_EGL_DRIVER_TYPECAST(dri3_egl_surface, _EGLSurface, obj)
+
+struct dri3_egl_surface {
+   _EGLSurface base;
+   struct loader_dri3_drawable loader_drawable;
+};
+
+extern const __DRIimageLoaderExtension dri3_image_loader_extension;
+extern struct dri2_egl_display_vtbl dri3_x11_display_vtbl;
+
+EGLBoolean
+dri3_x11_connect(struct dri2_egl_display *dri2_dpy);
+
+#endif
--- a/src/egl/egl-symbols-check
+++ b/src/egl/egl-symbols-check
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+FUNCS=$(nm -D --defined-only ${1-.libs/libEGL.so} | grep -o "T .*" | cut -c 3- | while read func; do
+( grep -q "^$func$" || echo $func )  <<EOF
+eglBindAPI
+eglBindTexImage
+eglChooseConfig
+eglClientWaitSync
+eglCopyBuffers
+eglCreateContext
+eglCreateImage
+eglCreatePbufferFromClientBuffer
+eglCreatePbufferSurface
+eglCreatePixmapSurface
+eglCreatePlatformPixmapSurface
+eglCreatePlatformWindowSurface
+eglCreateSync
+eglCreateWindowSurface
+eglDestroyContext
+eglDestroyImage
+eglDestroySurface
+eglDestroySync
+eglGetConfigAttrib
+eglGetConfigs
+eglGetCurrentContext
+eglGetCurrentDisplay
+eglGetCurrentSurface
+eglGetDisplay
+eglGetError
+eglGetPlatformDisplay
+eglGetProcAddress
+eglGetSyncAttrib
+eglInitialize
+eglMakeCurrent
+eglQueryAPI
+eglQueryContext
+eglQueryString
+eglQuerySurface
+eglReleaseTexImage
+eglReleaseThread
+eglSurfaceAttrib
+eglSwapBuffers
+eglSwapInterval
+eglTerminate
+eglWaitClient
+eglWaitGL
+eglWaitNative
+eglWaitSync
+_fini
+_init
+EOF
+done)
+
+test ! -n "$FUNCS" || echo $FUNCS
+test ! -n "$FUNCS"
--- a/src/egl/wayland/wayland-drm/wayland-drm.c
+++ b/src/egl/wayland/wayland-drm/wayland-drm.c
@@ -197,7 +197,7 @@ drm_authenticate(struct wl_client *client,
 		wl_resource_post_event(resource, WL_DRM_AUTHENTICATED);
 }

-const static struct wl_drm_interface drm_interface = {
+static const struct wl_drm_interface drm_interface = {
 	drm_authenticate,
 	drm_create_buffer,
        drm_create_planar_buffer,
--- a/src/egl/wayland/wayland-egl/wayland-egl.c
+++ b/src/egl/wayland/wayland-egl/wayland-egl.c
@@ -1,3 +1,32 @@
+/*
+ * Copyright © 2011 Kristian Høgsberg
+ * Copyright © 2011 Benjamin Franzke
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Kristian Høgsberg <krh@bitplanet.net>
+ *    Benjamin Franzke <benjaminfranzke@googlemail.com>
+ */
+
 #include <stdlib.h>

 #include <wayland-client.h>
--- a/src/gallium/Android.mk
+++ b/src/gallium/Android.mk
@@ -27,6 +27,7 @@ GALLIUM_TOP := $(call my-dir)
 GALLIUM_COMMON_MK := $(GALLIUM_TOP)/Android.common.mk

 SUBDIRS := auxiliary
+SUBDIRS += auxiliary/pipe-loader

 #
 # Gallium drivers and their respective winsys
--- a/src/gallium/Automake.inc
+++ b/src/gallium/Automake.inc
@@ -67,3 +67,9 @@ if HAVE_DRISW
 GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
 	$(top_builddir)/src/gallium/winsys/sw/dri/libswdri.la
 endif
+
+if HAVE_DRISW_KMS
+GALLIUM_PIPE_LOADER_WINSYS_LIBS += \
+	$(top_builddir)/src/gallium/winsys/sw/kms-dri/libswkmsdri.la \
+	$(LIBDRM_LIBS)
+endif
--- a/src/gallium/Makefile.am
+++ b/src/gallium/Makefile.am
@@ -5,12 +5,14 @@ SUBDIRS =
 ##

 SUBDIRS += auxiliary
+SUBDIRS += auxiliary/pipe-loader

 ##
 ## Gallium pipe drivers and their respective winsys'
 ##

 SUBDIRS += \
+	drivers/ddebug \
 	drivers/noop \
 	drivers/trace \
 	drivers/rbug
@@ -81,6 +83,11 @@ if HAVE_GALLIUM_VC4
 SUBDIRS += drivers/vc4 winsys/vc4/drm
 endif

+## virgl
+if HAVE_GALLIUM_VIRGL
+SUBDIRS += drivers/virgl winsys/virgl/drm winsys/virgl/vtest
+endif
+
 ## the sw winsys'
 SUBDIRS += winsys/sw/null

@@ -92,7 +99,7 @@ if HAVE_DRISW
 SUBDIRS += winsys/sw/dri
 endif

-if HAVE_DRI2
+if HAVE_DRISW_KMS
 SUBDIRS += winsys/sw/kms-dri
 endif

@@ -114,7 +121,8 @@ EXTRA_DIST = \
 ## Gallium state trackers and their users (targets)
 ##

-if HAVE_LOADER_GALLIUM
+## XXX: Rename the conditional once we have a config switch for static/dynamic pipe-drivers
+if HAVE_CLOVER
 SUBDIRS += targets/pipe-loader
 endif

--- a/src/gallium/SConscript
+++ b/src/gallium/SConscript
@@ -5,6 +5,7 @@ Import('env')
 #

 SConscript('auxiliary/SConscript')
+SConscript('auxiliary/pipe-loader/SConscript')

 #
 # Drivers
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,7 +1,3 @@
-if HAVE_LOADER_GALLIUM
-SUBDIRS := pipe-loader
-endif
-
 include Makefile.sources
 include $(top_srcdir)/src/gallium/Automake.inc

@@ -66,15 +62,7 @@ COMMON_VL_CFLAGS = \
 	$(AM_CFLAGS) \
 	$(VL_CFLAGS) \
 	$(DRI2PROTO_CFLAGS) \
-	$(LIBDRM_CFLAGS) \
-	$(GALLIUM_PIPE_LOADER_DEFINES) \
-	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
-
-if HAVE_GALLIUM_STATIC_TARGETS
-COMMON_VL_CFLAGS += \
-	-DGALLIUM_STATIC_TARGETS=1
-
-endif # HAVE_GALLIUM_STATIC_TARGETS
+	$(LIBDRM_CFLAGS)

 noinst_LTLIBRARIES += libgalliumvl.la

--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -129,12 +129,16 @@ C_SOURCES := \
 	rtasm/rtasm_execmem.h \
 	rtasm/rtasm_x86sse.c \
 	rtasm/rtasm_x86sse.h \
+	tgsi/tgsi_aa_point.c \
+	tgsi/tgsi_aa_point.h \
 	tgsi/tgsi_build.c \
 	tgsi/tgsi_build.h \
 	tgsi/tgsi_dump.c \
 	tgsi/tgsi_dump.h \
 	tgsi/tgsi_exec.c \
 	tgsi/tgsi_exec.h \
+	tgsi/tgsi_emulate.c \
+	tgsi/tgsi_emulate.h \
 	tgsi/tgsi_info.c \
 	tgsi/tgsi_info.h \
 	tgsi/tgsi_iterate.c \
@@ -144,6 +148,8 @@ C_SOURCES := \
 	tgsi/tgsi_opcode_tmp.h \
 	tgsi/tgsi_parse.c \
 	tgsi/tgsi_parse.h \
+	tgsi/tgsi_point_sprite.c \
+	tgsi/tgsi_point_sprite.h \
 	tgsi/tgsi_sanity.c \
 	tgsi/tgsi_sanity.h \
 	tgsi/tgsi_scan.c \
@@ -154,6 +160,8 @@ C_SOURCES := \
 	tgsi/tgsi_text.h \
 	tgsi/tgsi_transform.c \
 	tgsi/tgsi_transform.h \
+	tgsi/tgsi_two_side.c \
+	tgsi/tgsi_two_side.h \
 	tgsi/tgsi_ureg.c \
 	tgsi/tgsi_ureg.h \
 	tgsi/tgsi_util.c \
@@ -260,6 +268,8 @@ C_SOURCES := \
 	util/u_pack_color.h \
 	util/u_pointer.h \
 	util/u_prim.h \
+	util/u_prim_restart.c \
+	util/u_prim_restart.h \
 	util/u_pstipple.c \
 	util/u_pstipple.h \
 	util/u_range.h \
@@ -339,7 +349,8 @@ VL_SOURCES := \

 # XXX: Nuke this as our dri targets no longer depend on VL.
 VL_WINSYS_SOURCES := \
-	vl/vl_winsys_dri.c
+	vl/vl_winsys_dri.c \
+	vl/vl_winsys_drm.c

 VL_STUB_SOURCES := \
 	vl/vl_stubs.c
@@ -368,7 +379,9 @@ GALLIVM_SOURCES := \
 	gallivm/lp_bld_flow.h \
 	gallivm/lp_bld_format_aos_array.c \
 	gallivm/lp_bld_format_aos.c \
+	gallivm/lp_bld_format_cached.c \
 	gallivm/lp_bld_format_float.c \
+	gallivm/lp_bld_format.c \
 	gallivm/lp_bld_format.h \
 	gallivm/lp_bld_format_soa.c \
 	gallivm/lp_bld_format_srgb.c \
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -625,6 +625,7 @@ generate_vs(struct draw_llvm_variant *variant,
                     inputs,
                     outputs,
                     context_ptr,
+                     NULL,
                     draw_sampler,
                     &llvm->draw->vs.vertex_shader->info,
                     NULL);
@@ -749,7 +750,8 @@ generate_fetch(struct gallivm_state *gallivm,
                                    lp_float32_vec4_type(),
                                    FALSE,
                                    map_ptr,
-                                    zero, zero, zero);
+                                    zero, zero, zero,
+                                    NULL);
      LLVMBuildStore(builder, val, temp_ptr);
   }
   lp_build_endif(&if_ctx);
@@ -2193,6 +2195,7 @@ draw_gs_llvm_generate(struct draw_llvm *llvm,
                     NULL,
                     outputs,
                     context_ptr,
+                     NULL,
                     sampler,
                     &llvm->draw->gs.geometry_shader->info,
                     (const struct lp_build_tgsi_gs_iface *)&gs_iface);
--- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c
@@ -240,7 +240,8 @@ aa_transform_prolog(struct tgsi_transform_context *ctx)
                               TGSI_FILE_INPUT, texInput, TGSI_SWIZZLE_W);

   /* KILL_IF -tmp0.yyyy;   # if -tmp0.y < 0, KILL */
-   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y);
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_Y, TRUE);

   /* compute coverage factor = (1-d)/(1-k) */

--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -280,7 +280,8 @@ pstip_transform_prolog(struct tgsi_transform_context *ctx)

   /* KILL_IF -texTemp.wwww;   # if -texTemp < 0, KILL fragment */
   tgsi_transform_kill_inst(ctx,
-                            TGSI_FILE_TEMPORARY, pctx->texTemp, TGSI_SWIZZLE_W);
+                            TGSI_FILE_TEMPORARY, pctx->texTemp,
+                            TGSI_SWIZZLE_W, TRUE);
 }


--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -355,8 +355,9 @@ struct draw_vertex_info {
 };

 /* these flags are set if the primitive is a segment of a larger one */
-#define DRAW_SPLIT_BEFORE 0x1
-#define DRAW_SPLIT_AFTER  0x2
+#define DRAW_SPLIT_BEFORE        0x1
+#define DRAW_SPLIT_AFTER         0x2
+#define DRAW_LINE_LOOP_AS_STRIP  0x4

 struct draw_prim_info {
   boolean linear;
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c
@@ -359,6 +359,16 @@ fetch_pipeline_generic(struct draw_pt_middle_end *middle,
 }


+static inline unsigned
+prim_type(unsigned prim, unsigned flags)
+{
+   if (flags & DRAW_LINE_LOOP_AS_STRIP)
+      return PIPE_PRIM_LINE_STRIP;
+   else
+      return prim;
+}
+
+
 static void
 fetch_pipeline_run(struct draw_pt_middle_end *middle,
                   const unsigned *fetch_elts,
@@ -380,7 +390,7 @@ fetch_pipeline_run(struct draw_pt_middle_end *middle,
   prim_info.start = 0;
   prim_info.count = draw_count;
   prim_info.elts = draw_elts;
-   prim_info.prim = fpme->input_prim;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
   prim_info.flags = prim_flags;
   prim_info.primitive_count = 1;
   prim_info.primitive_lengths = &draw_count;
@@ -408,7 +418,7 @@ fetch_pipeline_linear_run(struct draw_pt_middle_end *middle,
   prim_info.start = 0;
   prim_info.count = count;
   prim_info.elts = NULL;
-   prim_info.prim = fpme->input_prim;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
   prim_info.flags = prim_flags;
   prim_info.primitive_count = 1;
   prim_info.primitive_lengths = &count;
@@ -439,7 +449,7 @@ fetch_pipeline_linear_run_elts(struct draw_pt_middle_end *middle,
   prim_info.start = 0;
   prim_info.count = draw_count;
   prim_info.elts = draw_elts;
-   prim_info.prim = fpme->input_prim;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
   prim_info.flags = prim_flags;
   prim_info.primitive_count = 1;
   prim_info.primitive_lengths = &draw_count;
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -473,6 +473,16 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
 }


+static inline unsigned
+prim_type(unsigned prim, unsigned flags)
+{
+   if (flags & DRAW_LINE_LOOP_AS_STRIP)
+      return PIPE_PRIM_LINE_STRIP;
+   else
+      return prim;
+}
+
+
 static void
 llvm_middle_end_run(struct draw_pt_middle_end *middle,
                    const unsigned *fetch_elts,
@@ -494,7 +504,7 @@ llvm_middle_end_run(struct draw_pt_middle_end *middle,
   prim_info.start = 0;
   prim_info.count = draw_count;
   prim_info.elts = draw_elts;
-   prim_info.prim = fpme->input_prim;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
   prim_info.flags = prim_flags;
   prim_info.primitive_count = 1;
   prim_info.primitive_lengths = &draw_count;
@@ -522,7 +532,7 @@ llvm_middle_end_linear_run(struct draw_pt_middle_end *middle,
   prim_info.start = 0;
   prim_info.count = count;
   prim_info.elts = NULL;
-   prim_info.prim = fpme->input_prim;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
   prim_info.flags = prim_flags;
   prim_info.primitive_count = 1;
   prim_info.primitive_lengths = &count;
@@ -552,7 +562,7 @@ llvm_middle_end_linear_run_elts(struct draw_pt_middle_end *middle,
   prim_info.start = 0;
   prim_info.count = draw_count;
   prim_info.elts = draw_elts;
-   prim_info.prim = fpme->input_prim;
+   prim_info.prim = prim_type(fpme->input_prim, prim_flags);
   prim_info.flags = prim_flags;
   prim_info.primitive_count = 1;
   prim_info.primitive_lengths = &draw_count;
--- a/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vsplit_tmp.h
@@ -249,6 +249,9 @@ vsplit_segment_loop_linear(struct vsplit_frontend *vsplit, unsigned flags,

   assert(icount + !!close_loop <= vsplit->segment_size);

+   /* need to draw the sections of the line loop as line strips */
+   flags |= DRAW_LINE_LOOP_AS_STRIP;
+
   if (close_loop) {
      for (nr = 0; nr < icount; nr++)
         vsplit->fetch_elts[nr] = istart + nr;
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.c
@@ -0,0 +1,56 @@
+/**************************************************************************
+ *
+ * Copyright 2010 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ **************************************************************************/
+
+
+#include "lp_bld_format.h"
+
+
+
+LLVMTypeRef
+lp_build_format_cache_type(struct gallivm_state *gallivm)
+{
+   LLVMTypeRef elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_COUNT];
+   LLVMTypeRef s;
+
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_DATA] =
+         LLVMArrayType(LLVMInt32TypeInContext(gallivm->context),
+                       LP_BUILD_FORMAT_CACHE_SIZE * 16);
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_TAGS] =
+         LLVMArrayType(LLVMInt64TypeInContext(gallivm->context),
+                       LP_BUILD_FORMAT_CACHE_SIZE);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL] =
+         LLVMInt64TypeInContext(gallivm->context);
+   elem_types[LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS] =
+         LLVMInt64TypeInContext(gallivm->context);
+#endif
+
+   s = LLVMStructTypeInContext(gallivm->context, elem_types,
+                               LP_BUILD_FORMAT_CACHE_MEMBER_COUNT, 0);
+
+   return s;
+}
--- a/src/gallium/auxiliary/gallivm/lp_bld_format.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format.h
@@ -44,6 +44,45 @@ struct lp_type;
 struct lp_build_context;


+#define LP_BUILD_FORMAT_CACHE_DEBUG 0
+/*
+ * Block cache
+ *
+ * Optional block cache to be used when unpacking big pixel blocks.
+ * Must be a power of 2
+ */
+
+#define LP_BUILD_FORMAT_CACHE_SIZE 128
+
+/*
+ * Note: cache_data needs 16 byte alignment.
+ */
+struct lp_build_format_cache
+{
+   PIPE_ALIGN_VAR(16) uint32_t cache_data[LP_BUILD_FORMAT_CACHE_SIZE][4][4];
+   uint64_t cache_tags[LP_BUILD_FORMAT_CACHE_SIZE];
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   uint64_t cache_access_total;
+   uint64_t cache_access_miss;
+#endif
+};
+
+
+enum {
+   LP_BUILD_FORMAT_CACHE_MEMBER_DATA = 0,
+   LP_BUILD_FORMAT_CACHE_MEMBER_TAGS,
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL,
+   LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS,
+#endif
+   LP_BUILD_FORMAT_CACHE_MEMBER_COUNT
+};
+
+
+LLVMTypeRef
+lp_build_format_cache_type(struct gallivm_state *gallivm);
+
+
 /*
 * AoS
 */
@@ -66,7 +105,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
-                        LLVMValueRef j);
+                        LLVMValueRef j,
+                        LLVMValueRef cache);

 LLVMValueRef
 lp_build_fetch_rgba_aos_array(struct gallivm_state *gallivm,
@@ -107,13 +147,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                        LLVMValueRef offsets,
                        LLVMValueRef i,
                        LLVMValueRef j,
+                        LLVMValueRef cache,
                        LLVMValueRef rgba_out[4]);

 /*
 * YUV
 */

-
 LLVMValueRef
 lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
                                   const struct util_format_description *format_desc,
@@ -123,6 +163,18 @@ lp_build_fetch_subsampled_rgba_aos(struct gallivm_state *gallivm,
                                   LLVMValueRef i,
                                   LLVMValueRef j);

+
+LLVMValueRef
+lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
+                             const struct util_format_description *format_desc,
+                             unsigned n,
+                             LLVMValueRef base_ptr,
+                             LLVMValueRef offset,
+                             LLVMValueRef i,
+                             LLVMValueRef j,
+                             LLVMValueRef cache);
+
+
 /*
 * special float formats
 */
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_aos.c
@@ -370,7 +370,8 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
                        LLVMValueRef base_ptr,
                        LLVMValueRef offset,
                        LLVMValueRef i,
-                        LLVMValueRef j)
+                        LLVMValueRef j,
+                        LLVMValueRef cache)
 {
   LLVMBuilderRef builder = gallivm->builder;
   unsigned num_pixels = type.length / 4;
@@ -502,6 +503,34 @@ lp_build_fetch_rgba_aos(struct gallivm_state *gallivm,
      return tmp;
   }

+   /*
+    * s3tc rgb formats
+    */
+
+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC && cache) {
+      struct lp_type tmp_type;
+      LLVMValueRef tmp;
+
+      memset(&tmp_type, 0, sizeof tmp_type);
+      tmp_type.width = 8;
+      tmp_type.length = num_pixels * 4;
+      tmp_type.norm = TRUE;
+
+      tmp = lp_build_fetch_cached_texels(gallivm,
+                                         format_desc,
+                                         num_pixels,
+                                         base_ptr,
+                                         offset,
+                                         i, j,
+                                         cache);
+
+      lp_build_conv(gallivm,
+                    tmp_type, type,
+                    &tmp, 1, &tmp, 1);
+
+       return tmp;
+   }
+
   /*
    * Fallback to util_format_description::fetch_rgba_8unorm().
    */
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_cached.c
@@ -0,0 +1,374 @@
+/**************************************************************************
+ *
+ * Copyright 2015 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#include "lp_bld_format.h"
+#include "lp_bld_type.h"
+#include "lp_bld_struct.h"
+#include "lp_bld_const.h"
+#include "lp_bld_flow.h"
+#include "lp_bld_swizzle.h"
+
+#include "util/u_math.h"
+
+
+/**
+ * @file
+ * Complex block-compression based formats are handled here by using a cache,
+ * so re-decoding of every pixel is not required.
+ * Especially for bilinear filtering, texel reuse is very high hence even
+ * a small cache helps.
+ * The elements in the cache are the decoded blocks - currently things
+ * are restricted to formats which are 4x4 block based, and the decoded
+ * texels must fit into 4x8 bits.
+ * The cache is direct mapped so hitrates aren't all that great and cache
+ * thrashing could happen.
+ *
+ * @author Roland Scheidegger <sroland@vmware.com>
+ */
+
+
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+static void
+update_cache_access(struct gallivm_state *gallivm,
+                    LLVMValueRef ptr,
+                    unsigned count,
+                    unsigned index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, cache_access;
+
+   assert(index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL ||
+          index == LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+
+   member_ptr = lp_build_struct_get_ptr(gallivm, ptr, index, "");
+   cache_access = LLVMBuildLoad(builder, member_ptr, "cache_access");
+   cache_access = LLVMBuildAdd(builder, cache_access,
+                               LLVMConstInt(LLVMInt64TypeInContext(gallivm->context),
+                                                                   count, 0), "");
+   LLVMBuildStore(builder, cache_access, member_ptr);
+}
+#endif
+
+
+static void
+store_cached_block(struct gallivm_state *gallivm,
+                   LLVMValueRef *col,
+                   LLVMValueRef tag_value,
+                   LLVMValueRef hash_index,
+                   LLVMValueRef cache)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef ptr, indices[3];
+   LLVMTypeRef type_ptr4x32;
+   unsigned count;
+
+   type_ptr4x32 = LLVMPointerType(LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4), 0);
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
+   indices[2] = hash_index;
+   ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), "");
+   LLVMBuildStore(builder, tag_value, ptr);
+
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
+   hash_index = LLVMBuildMul(builder, hash_index,
+                             lp_build_const_int32(gallivm, 16), "");
+   for (count = 0; count < 4; count++) {
+      indices[2] = hash_index;
+      ptr = LLVMBuildGEP(builder, cache, indices, Elements(indices), "");
+      ptr = LLVMBuildBitCast(builder, ptr, type_ptr4x32, "");
+      LLVMBuildStore(builder, col[count], ptr);
+      hash_index = LLVMBuildAdd(builder, hash_index,
+                                lp_build_const_int32(gallivm, 4), "");
+   }
+}
+
+
+static LLVMValueRef
+lookup_cached_pixel(struct gallivm_state *gallivm,
+                    LLVMValueRef ptr,
+                    LLVMValueRef index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, indices[3];
+
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_DATA);
+   indices[2] = index;
+   member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+   return LLVMBuildLoad(builder, member_ptr, "cache_data");
+}
+
+
+static LLVMValueRef
+lookup_tag_data(struct gallivm_state *gallivm,
+                LLVMValueRef ptr,
+                LLVMValueRef index)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef member_ptr, indices[3];
+
+   indices[0] = lp_build_const_int32(gallivm, 0);
+   indices[1] = lp_build_const_int32(gallivm, LP_BUILD_FORMAT_CACHE_MEMBER_TAGS);
+   indices[2] = index;
+   member_ptr = LLVMBuildGEP(builder, ptr, indices, Elements(indices), "");
+   return LLVMBuildLoad(builder, member_ptr, "tag_data");
+}
+
+
+static void
+update_cached_block(struct gallivm_state *gallivm,
+                    const struct util_format_description *format_desc,
+                    LLVMValueRef ptr_addr,
+                    LLVMValueRef hash_index,
+                    LLVMValueRef cache)
+
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
+   LLVMTypeRef pi8t = LLVMPointerType(i8t, 0);
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef i32x4 = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), 4);
+   LLVMValueRef function;
+   LLVMValueRef tag_value, tmp_ptr;
+   LLVMValueRef col[4];
+   unsigned i, j;
+
+   /*
+    * Use format_desc->fetch_rgba_8unorm() for each pixel in the block.
+    * This doesn't actually make any sense whatsoever, someone would need
+    * to write a function doing this for all pixels in a block (either as
+    * an external c function or with generated code). Don't ask.
+    */
+
+   {
+      /*
+       * Function to call looks like:
+       *   fetch(uint8_t *dst, const uint8_t *src, unsigned i, unsigned j)
+       */
+      LLVMTypeRef ret_type;
+      LLVMTypeRef arg_types[4];
+      LLVMTypeRef function_type;
+
+      assert(format_desc->fetch_rgba_8unorm);
+
+      ret_type = LLVMVoidTypeInContext(gallivm->context);
+      arg_types[0] = pi8t;
+      arg_types[1] = pi8t;
+      arg_types[2] = i32t;
+      arg_types[3] = i32t;
+      function_type = LLVMFunctionType(ret_type, arg_types,
+                                       Elements(arg_types), 0);
+
+      /* make const pointer for the C fetch_rgba_8unorm function */
+      function = lp_build_const_int_pointer(gallivm,
+         func_to_pointer((func_pointer) format_desc->fetch_rgba_8unorm));
+
+      /* cast the callee pointer to the function's type */
+      function = LLVMBuildBitCast(builder, function,
+                                  LLVMPointerType(function_type, 0),
+                                  "cast callee");
+   }
+
+   tmp_ptr = lp_build_array_alloca(gallivm, i32x4,
+                                   lp_build_const_int32(gallivm, 16),
+                                   "tmp_decode_store");
+   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, pi8t, "");
+
+   /*
+    * Invoke format_desc->fetch_rgba_8unorm() for each pixel.
+    * This is going to be really really slow.
+    * Note: the block store format is actually
+    * x0y0x0y1x0y2x0y3 x1y0x1y1x1y2x1y3 ...
+    */
+   for (i = 0; i < 4; ++i) {
+      for (j = 0; j < 4; ++j) {
+         LLVMValueRef args[4];
+         LLVMValueRef dst_offset = lp_build_const_int32(gallivm, (i * 4 + j) * 4);
+
+         /*
+          * Note we actually supply a pointer to the start of the block,
+          * not the start of the texture.
+          */
+         args[0] = LLVMBuildGEP(gallivm->builder, tmp_ptr, &dst_offset, 1, "");
+         args[1] = ptr_addr;
+         args[2] = LLVMConstInt(i32t, i, 0);
+         args[3] = LLVMConstInt(i32t, j, 0);
+         LLVMBuildCall(builder, function, args, Elements(args), "");
+      }
+   }
+
+   /* Finally store the block - pointless mem copy + update tag. */
+   tmp_ptr = LLVMBuildBitCast(builder, tmp_ptr, LLVMPointerType(i32x4, 0), "");
+   for (i = 0; i < 4; ++i) {
+      LLVMValueRef tmp_offset = lp_build_const_int32(gallivm, i);
+      LLVMValueRef ptr = LLVMBuildGEP(gallivm->builder, tmp_ptr, &tmp_offset, 1, "");
+      col[i] = LLVMBuildLoad(builder, ptr, "");
+   }
+
+   tag_value = LLVMBuildPtrToInt(gallivm->builder, ptr_addr,
+                                 LLVMInt64TypeInContext(gallivm->context), "");
+   store_cached_block(gallivm, col, tag_value, hash_index, cache);
+}
+
+
+/*
+ * Do a cached lookup.
+ *
+ * Returns (vectors of) 4x8 rgba aos value
+ */
+LLVMValueRef
+lp_build_fetch_cached_texels(struct gallivm_state *gallivm,
+                             const struct util_format_description *format_desc,
+                             unsigned n,
+                             LLVMValueRef base_ptr,
+                             LLVMValueRef offset,
+                             LLVMValueRef i,
+                             LLVMValueRef j,
+                             LLVMValueRef cache)
+
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   unsigned count, low_bit, log2size;
+   LLVMValueRef color, offset_stored, addr, ptr_addrtrunc, tmp;
+   LLVMValueRef ij_index, hash_index, hash_mask, block_index;
+   LLVMTypeRef i8t = LLVMInt8TypeInContext(gallivm->context);
+   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
+   LLVMTypeRef i64t = LLVMInt64TypeInContext(gallivm->context);
+   struct lp_type type;
+   struct lp_build_context bld32;
+   memset(&type, 0, sizeof type);
+   type.width = 32;
+   type.length = n;
+
+   assert(format_desc->block.width == 4);
+   assert(format_desc->block.height == 4);
+
+   lp_build_context_init(&bld32, gallivm, type);
+
+   /*
+    * compute hash - we use direct mapped cache, the hash function could
+    *                be better but it needs to be simple
+    * per-element:
+    *    compare offset with offset stored at tag (hash)
+    *    if not equal decode/store block, update tag
+    *    extract color from cache
+    *    assemble result vector
+    */
+
+   /* TODO: not ideal with 32bit pointers... */
+
+   low_bit = util_logbase2(format_desc->block.bits / 8);
+   log2size = util_logbase2(LP_BUILD_FORMAT_CACHE_SIZE);
+   addr = LLVMBuildPtrToInt(builder, base_ptr, i64t, "");
+   ptr_addrtrunc = LLVMBuildPtrToInt(builder, base_ptr, i32t, "");
+   ptr_addrtrunc = lp_build_broadcast_scalar(&bld32, ptr_addrtrunc);
+   /* For the hash function, first mask off the unused lowest bits. Then just
+      do some xor with address bits - only use lower 32bits */
+   ptr_addrtrunc = LLVMBuildAdd(builder, offset, ptr_addrtrunc, "");
+   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
+                                 lp_build_const_int_vec(gallivm, type, low_bit), "");
+   /* This only really makes sense for size 64,128,256 */
+   hash_index = ptr_addrtrunc;
+   ptr_addrtrunc = LLVMBuildLShr(builder, ptr_addrtrunc,
+                                 lp_build_const_int_vec(gallivm, type, 2*log2size), "");
+   hash_index = LLVMBuildXor(builder, ptr_addrtrunc, hash_index, "");
+   tmp = LLVMBuildLShr(builder, hash_index,
+                       lp_build_const_int_vec(gallivm, type, log2size), "");
+   hash_index = LLVMBuildXor(builder, hash_index, tmp, "");
+
+   hash_mask = lp_build_const_int_vec(gallivm, type, LP_BUILD_FORMAT_CACHE_SIZE - 1);
+   hash_index = LLVMBuildAnd(builder, hash_index, hash_mask, "");
+   ij_index = LLVMBuildShl(builder, i, lp_build_const_int_vec(gallivm, type, 2), "");
+   ij_index = LLVMBuildAdd(builder, ij_index, j, "");
+   block_index = LLVMBuildShl(builder, hash_index,
+                              lp_build_const_int_vec(gallivm, type, 4), "");
+   block_index = LLVMBuildAdd(builder, ij_index, block_index, "");
+
+   if (n > 1) {
+      color = LLVMGetUndef(LLVMVectorType(i32t, n));
+      for (count = 0; count < n; count++) {
+         LLVMValueRef index, cond, colorx;
+         LLVMValueRef block_indexx, hash_indexx, addrx, offsetx, ptr_addrx;
+         struct lp_build_if_state if_ctx;
+
+         index = lp_build_const_int32(gallivm, count);
+         offsetx = LLVMBuildExtractElement(builder, offset, index, "");
+         addrx = LLVMBuildZExt(builder, offsetx, i64t, "");
+         addrx = LLVMBuildAdd(builder, addrx, addr, "");
+         block_indexx = LLVMBuildExtractElement(builder, block_index, index, "");
+         hash_indexx = LLVMBuildLShr(builder, block_indexx,
+                                     lp_build_const_int32(gallivm, 4), "");
+         offset_stored = lookup_tag_data(gallivm, cache, hash_indexx);
+         cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addrx, "");
+
+         lp_build_if(&if_ctx, gallivm, cond);
+         {
+            ptr_addrx = LLVMBuildIntToPtr(builder, addrx,
+                                          LLVMPointerType(i8t, 0), "");
+            update_cached_block(gallivm, format_desc, ptr_addrx, hash_indexx, cache);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+            update_cache_access(gallivm, cache, 1,
+                                LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+#endif
+         }
+         lp_build_endif(&if_ctx);
+
+         colorx = lookup_cached_pixel(gallivm, cache, block_indexx);
+
+         color = LLVMBuildInsertElement(builder, color, colorx,
+                                        lp_build_const_int32(gallivm, count), "");
+      }
+   }
+   else {
+      LLVMValueRef cond;
+      struct lp_build_if_state if_ctx;
+
+      tmp = LLVMBuildZExt(builder, offset, i64t, "");
+      addr = LLVMBuildAdd(builder, tmp, addr, "");
+      offset_stored = lookup_tag_data(gallivm, cache, hash_index);
+      cond = LLVMBuildICmp(builder, LLVMIntNE, offset_stored, addr, "");
+
+      lp_build_if(&if_ctx, gallivm, cond);
+      {
+         tmp = LLVMBuildIntToPtr(builder, addr, LLVMPointerType(i8t, 0), "");
+         update_cached_block(gallivm, format_desc, tmp, hash_index, cache);
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+         update_cache_access(gallivm, cache, 1,
+                             LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_MISS);
+#endif
+      }
+      lp_build_endif(&if_ctx);
+
+      color = lookup_cached_pixel(gallivm, cache, block_index);
+   }
+#if LP_BUILD_FORMAT_CACHE_DEBUG
+   update_cache_access(gallivm, cache, n,
+                       LP_BUILD_FORMAT_CACHE_MEMBER_ACCESS_TOTAL);
+#endif
+   return LLVMBuildBitCast(builder, color, LLVMVectorType(i8t, n * 4), "");
+}
+
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -346,6 +346,7 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
 * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
 *              these will always be (0,0).  For compressed formats, i will
 *              be in [0, block_width-1] and j will be in [0, block_height-1].
+ * \param cache  optional value pointing to a lp_build_format_cache structure
 */
 void
 lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
@@ -355,6 +356,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
                        LLVMValueRef offset,
                        LLVMValueRef i,
                        LLVMValueRef j,
+                        LLVMValueRef cache,
                        LLVMValueRef rgba_out[4])
 {
   LLVMBuilderRef builder = gallivm->builder;
@@ -473,7 +475,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
      tmp_type.norm = TRUE;

      tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
-                                    TRUE, base_ptr, offset, i, j);
+                                    TRUE, base_ptr, offset, i, j, cache);

      lp_build_rgba8_to_fi32_soa(gallivm,
                                type,
@@ -483,6 +485,39 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
      return;
   }

+   if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC &&
+       /* non-srgb case is already handled above */
+       format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB &&
+       type.floating && type.width == 32 &&
+       (type.length == 1 || (type.length % 4 == 0)) &&
+       cache) {
+      const struct util_format_description *format_decompressed;
+      const struct util_format_description *flinear_desc;
+      LLVMValueRef packed;
+      flinear_desc = util_format_description(util_format_linear(format_desc->format));
+      packed = lp_build_fetch_cached_texels(gallivm,
+                                            flinear_desc,
+                                            type.length,
+                                            base_ptr,
+                                            offset,
+                                            i, j,
+                                            cache);
+      packed = LLVMBuildBitCast(builder, packed,
+                                lp_build_int_vec_type(gallivm, type), "");
+      /*
+       * The values are now packed so they match ordinary srgb RGBA8 format,
+       * hence need to use matching format for unpack.
+       */
+      format_decompressed = util_format_description(PIPE_FORMAT_R8G8B8A8_SRGB);
+
+      lp_build_unpack_rgba_soa(gallivm,
+                               format_decompressed,
+                               type,
+                               packed, rgba_out);
+
+      return;
+   }
+
   /*
    * Fallback to calling lp_build_fetch_rgba_aos for each pixel.
    *
@@ -524,7 +559,7 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
         /* Get a single float[4]={R,G,B,A} pixel */
         tmp = lp_build_fetch_rgba_aos(gallivm, format_desc, tmp_type,
                                       TRUE, base_ptr, offset_elem,
-                                       i_elem, j_elem);
+                                       i_elem, j_elem, cache);

         /*
          * Insert the AoS tmp value channels into the SoA result vectors at
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -461,49 +461,50 @@ lp_build_pack2(struct gallivm_state *gallivm,
   assert(src_type.length * 2 == dst_type.length);

   /* Check for special cases first */
-   if ((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) &&
-        src_type.width * src_type.length >= 128) {
+   if((util_cpu_caps.has_sse2 || util_cpu_caps.has_altivec) &&
+       src_type.width * src_type.length >= 128) {
      const char *intrinsic = NULL;
      boolean swap_intrinsic_operands = FALSE;

      switch(src_type.width) {
      case 32:
         if (util_cpu_caps.has_sse2) {
-           if (dst_type.sign) {
+           if(dst_type.sign) {
              intrinsic = "llvm.x86.sse2.packssdw.128";
-           } else {
+           }
+           else {
              if (util_cpu_caps.has_sse4_1) {
                 intrinsic = "llvm.x86.sse41.packusdw";
              }
           }
         } else if (util_cpu_caps.has_altivec) {
            if (dst_type.sign) {
-               intrinsic = "llvm.ppc.altivec.vpkswss";
-            } else {
-               intrinsic = "llvm.ppc.altivec.vpkuwus";
-            }
+              intrinsic = "llvm.ppc.altivec.vpkswus";
+           } else {
+              intrinsic = "llvm.ppc.altivec.vpkuwus";
+           }
 #ifdef PIPE_ARCH_LITTLE_ENDIAN
-            swap_intrinsic_operands = TRUE;
+           swap_intrinsic_operands = TRUE;
 #endif
         }
         break;
      case 16:
         if (dst_type.sign) {
            if (util_cpu_caps.has_sse2) {
-               intrinsic = "llvm.x86.sse2.packsswb.128";
+              intrinsic = "llvm.x86.sse2.packsswb.128";
            } else if (util_cpu_caps.has_altivec) {
-               intrinsic = "llvm.ppc.altivec.vpkshss";
+              intrinsic = "llvm.ppc.altivec.vpkshss";
 #ifdef PIPE_ARCH_LITTLE_ENDIAN
-               swap_intrinsic_operands = TRUE;
+              swap_intrinsic_operands = TRUE;
 #endif
            }
         } else {
            if (util_cpu_caps.has_sse2) {
-               intrinsic = "llvm.x86.sse2.packuswb.128";
+              intrinsic = "llvm.x86.sse2.packuswb.128";
            } else if (util_cpu_caps.has_altivec) {
-               intrinsic = "llvm.ppc.altivec.vpkshus";
+	      intrinsic = "llvm.ppc.altivec.vpkshus";
 #ifdef PIPE_ARCH_LITTLE_ENDIAN
-               swap_intrinsic_operands = TRUE;
+              swap_intrinsic_operands = TRUE;
 #endif
            }
         }
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.h
@@ -99,6 +99,7 @@ struct lp_sampler_params
   unsigned sampler_index;
   unsigned sample_key;
   LLVMValueRef context_ptr;
+   LLVMValueRef thread_data_ptr;
   const LLVMValueRef *coords;
   const LLVMValueRef *offsets;
   LLVMValueRef lod;
@@ -267,6 +268,17 @@ struct lp_sampler_dynamic_state
                   struct gallivm_state *gallivm,
                   LLVMValueRef context_ptr,
                   unsigned sampler_unit);
+
+   /** 
+    * Obtain texture cache (returns ptr to lp_build_format_cache).
+    *
+    * It's optional: no caching will be done if it's NULL.
+    */
+   LLVMValueRef
+   (*cache_ptr)(const struct lp_sampler_dynamic_state *state,
+                struct gallivm_state *gallivm,
+                LLVMValueRef thread_data_ptr,
+                unsigned unit);
 };


@@ -356,6 +368,7 @@ struct lp_build_sample_context
   LLVMValueRef img_stride_array;
   LLVMValueRef base_ptr;
   LLVMValueRef mip_offsets;
+   LLVMValueRef cache;

   /** Integer vector with texture width, height, depth */
   LLVMValueRef int_size;
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_aos.c
@@ -593,7 +593,8 @@ lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld,
                                      TRUE,
                                      data_ptr, offset,
                                      x_subcoord,
-                                      y_subcoord);
+                                      y_subcoord,
+                                      bld->cache);
   }

   *colors = rgba8;
@@ -933,7 +934,8 @@ lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld,
                                               TRUE,
                                               data_ptr, offset[k][j][i],
                                               x_subcoord[i],
-                                               y_subcoord[j]);
+                                               y_subcoord[j],
+                                               bld->cache);
            }

            neighbors[k][j][i] = rgba8;
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -161,6 +161,7 @@ lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
                           bld->texel_type,
                           data_ptr, offset,
                           i, j,
+                           bld->cache,
                           texel_out);

   /*
@@ -405,16 +406,17 @@ lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
      break;

   case PIPE_TEX_WRAP_MIRROR_REPEAT:
+      if (offset) {
+         offset = lp_build_int_to_float(coord_bld, offset);
+         offset = lp_build_div(coord_bld, offset, length_f);
+         coord = lp_build_add(coord_bld, coord, offset);
+      }
      /* compute mirror function */
      coord = lp_build_coord_mirror(bld, coord);

      /* scale coord to length */
      coord = lp_build_mul(coord_bld, coord, length_f);
      coord = lp_build_sub(coord_bld, coord, half);
-      if (offset) {
-         offset = lp_build_int_to_float(coord_bld, offset);
-         coord = lp_build_add(coord_bld, coord, offset);
-      }

      /* convert to int, compute lerp weight */
      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
@@ -567,12 +569,13 @@ lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
         coord = lp_build_mul(coord_bld, coord, length_f);
      }

+      if (offset) {
+         offset = lp_build_int_to_float(coord_bld, offset);
+         coord = lp_build_add(coord_bld, coord, offset);
+      }
      /* floor */
      /* use itrunc instead since we clamp to 0 anyway */
      icoord = lp_build_itrunc(coord_bld, coord);
-      if (offset) {
-         icoord = lp_build_add(int_coord_bld, icoord, offset);
-      }

      /* clamp to [0, length - 1]. */
      icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
@@ -2387,6 +2390,7 @@ lp_build_fetch_texel(struct lp_build_sample_context *bld,
                           bld->texel_type,
                           bld->base_ptr, offset,
                           i, j,
+                           bld->cache,
                           colors_out);

   if (out_of_bound_ret_zero) {
@@ -2440,6 +2444,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                         unsigned texture_index,
                         unsigned sampler_index,
                         LLVMValueRef context_ptr,
+                         LLVMValueRef thread_data_ptr,
                         const LLVMValueRef *coords,
                         const LLVMValueRef *offsets,
                         const struct lp_derivatives *derivs, /* optional */
@@ -2586,6 +2591,10 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
      derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
      derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
   }
+   /*
+    * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
+    * so AoS path could be used. Not sure it's worth the trouble...
+    */

   min_img_filter = derived_sampler_state.min_img_filter;
   mag_img_filter = derived_sampler_state.mag_img_filter;
@@ -2701,6 +2710,11 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
                                                context_ptr, texture_index);
   /* Note that mip_offsets is an array[level] of offsets to texture images */

+   if (dynamic_state->cache_ptr && thread_data_ptr) {
+      bld.cache = dynamic_state->cache_ptr(dynamic_state, gallivm,
+                                           thread_data_ptr, texture_index);
+   }
+
   /* width, height, depth as single int vector */
   if (dims <= 1) {
      bld.int_size = tex_width;
@@ -2877,6 +2891,7 @@ lp_build_sample_soa_code(struct gallivm_state *gallivm,
         bld4.base_ptr = bld.base_ptr;
         bld4.mip_offsets = bld.mip_offsets;
         bld4.int_size = bld.int_size;
+         bld4.cache = bld.cache;

         bld4.vector_width = lp_type_width(type4);

@@ -3075,12 +3090,14 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
   LLVMValueRef offsets[3] = { NULL };
   LLVMValueRef lod = NULL;
   LLVMValueRef context_ptr;
+   LLVMValueRef thread_data_ptr = NULL;
   LLVMValueRef texel_out[4];
   struct lp_derivatives derivs;
   struct lp_derivatives *deriv_ptr = NULL;
   unsigned num_param = 0;
   unsigned i, num_coords, num_derivs, num_offsets, layer;
   enum lp_sampler_lod_control lod_control;
+   boolean need_cache = FALSE;

   lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
                    LP_SAMPLER_LOD_CONTROL_SHIFT;
@@ -3088,8 +3105,19 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
   get_target_info(static_texture_state->target,
                   &num_coords, &num_derivs, &num_offsets, &layer);

+   if (dynamic_state->cache_ptr) {
+      const struct util_format_description *format_desc;
+      format_desc = util_format_description(static_texture_state->format);
+      if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+         need_cache = TRUE;
+      }
+   }
+
   /* "unpack" arguments */
   context_ptr = LLVMGetParam(function, num_param++);
+   if (need_cache) {
+      thread_data_ptr = LLVMGetParam(function, num_param++);
+   }
   for (i = 0; i < num_coords; i++) {
      coords[i] = LLVMGetParam(function, num_param++);
   }
@@ -3140,6 +3168,7 @@ lp_build_sample_gen_func(struct gallivm_state *gallivm,
                            texture_index,
                            sampler_index,
                            context_ptr,
+                            thread_data_ptr,
                            coords,
                            offsets,
                            deriv_ptr,
@@ -3183,6 +3212,7 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
   const LLVMValueRef *offsets = params->offsets;
   const struct lp_derivatives *derivs = params->derivs;
   enum lp_sampler_lod_control lod_control;
+   boolean need_cache = FALSE;

   lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
                    LP_SAMPLER_LOD_CONTROL_SHIFT;
@@ -3190,6 +3220,17 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
   get_target_info(static_texture_state->target,
                   &num_coords, &num_derivs, &num_offsets, &layer);

+   if (dynamic_state->cache_ptr) {
+      const struct util_format_description *format_desc;
+      format_desc = util_format_description(static_texture_state->format);
+      if (format_desc && format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
+         /*
+          * This is not 100% correct, if we have cache but the
+          * util_format_s3tc_prefer is true the cache won't get used
+          * regardless (could hook up the block decode there...) */
+         need_cache = TRUE;
+      }
+   }
   /*
    * texture function matches are found by name.
    * Thus the name has to include both the texture and sampler unit
@@ -3215,6 +3256,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,
       */

      arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
+      if (need_cache) {
+         arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
+      }
      for (i = 0; i < num_coords; i++) {
         arg_types[num_param++] = LLVMTypeOf(coords[0]);
         assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
@@ -3274,6 +3318,9 @@ lp_build_sample_soa_func(struct gallivm_state *gallivm,

   num_args = 0;
   args[num_args++] = params->context_ptr;
+   if (need_cache) {
+      args[num_args++] = params->thread_data_ptr;
+   }
   for (i = 0; i < num_coords; i++) {
      args[num_args++] = coords[i];
   }
@@ -3378,6 +3425,7 @@ lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
                               params->texture_index,
                               params->sampler_index,
                               params->context_ptr,
+                               params->thread_data_ptr,
                               params->coords,
                               params->offsets,
                               params->derivs,
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.c
@@ -129,7 +129,8 @@ lp_build_emit_llvm_unary(
   unsigned tgsi_opcode,
   LLVMValueRef arg0)
 {
-   struct lp_build_emit_data emit_data;
+   struct lp_build_emit_data emit_data = {{0}};
+   emit_data.info = tgsi_get_opcode_info(tgsi_opcode);
   emit_data.arg_count = 1;
   emit_data.args[0] = arg0;
   return lp_build_emit_llvm(bld_base, tgsi_opcode, &emit_data);
@@ -142,7 +143,8 @@ lp_build_emit_llvm_binary(
   LLVMValueRef arg0,
   LLVMValueRef arg1)
 {
-   struct lp_build_emit_data emit_data;
+   struct lp_build_emit_data emit_data = {{0}};
+   emit_data.info = tgsi_get_opcode_info(tgsi_opcode);
   emit_data.arg_count = 2;
   emit_data.args[0] = arg0;
   emit_data.args[1] = arg1;
@@ -157,7 +159,8 @@ lp_build_emit_llvm_ternary(
   LLVMValueRef arg1,
   LLVMValueRef arg2)
 {
-   struct lp_build_emit_data emit_data;
+   struct lp_build_emit_data emit_data = {{0}};
+   emit_data.info = tgsi_get_opcode_info(tgsi_opcode);
   emit_data.arg_count = 3;
   emit_data.args[0] = arg0;
   emit_data.args[1] = arg1;
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -230,6 +230,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                  const LLVMValueRef (*inputs)[4],
                  LLVMValueRef (*outputs)[4],
                  LLVMValueRef context_ptr,
+                  LLVMValueRef thread_data_ptr,
                  struct lp_build_sampler_soa *sampler,
                  const struct tgsi_shader_info *info,
                  const struct lp_build_tgsi_gs_iface *gs_iface);
@@ -447,6 +448,7 @@ struct lp_build_tgsi_soa_context
   const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
   LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS];
   LLVMValueRef context_ptr;
+   LLVMValueRef thread_data_ptr;

   const struct lp_build_sampler_soa *sampler;

--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_action.c
@@ -538,12 +538,19 @@ lrp_emit(
   struct lp_build_tgsi_context * bld_base,
   struct lp_build_emit_data * emit_data)
 {
-   LLVMValueRef tmp;
-   tmp = lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_SUB,
-                                   emit_data->args[1],
-                                   emit_data->args[2]);
-   emit_data->output[emit_data->chan] = lp_build_emit_llvm_ternary(bld_base,
-                    TGSI_OPCODE_MAD, emit_data->args[0], tmp, emit_data->args[2]);
+   struct lp_build_context *bld = &bld_base->base;
+   LLVMValueRef inv, a, b;
+
+   /* This uses the correct version: (1 - t)*a + t*b
+    *
+    * An alternative version is "a + t*(b-a)". The problem is this version
+    * doesn't return "b" for t = 1, because "a + (b-a)" isn't equal to "b"
+    * because of the floating-point rounding.
+    */
+   inv = lp_build_sub(bld, bld_base->base.one, emit_data->args[0]);
+   a = lp_build_mul(bld, emit_data->args[1], emit_data->args[0]);
+   b = lp_build_mul(bld, emit_data->args[2], inv);
+   emit_data->output[emit_data->chan] = lp_build_add(bld, a, b);
 }

 /* TGSI_OPCODE_MAD */
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -2321,6 +2321,7 @@ emit_tex( struct lp_build_tgsi_soa_context *bld,
   params.texture_index = unit;
   params.sampler_index = unit;
   params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
   params.coords = coords;
   params.offsets = offsets;
   params.lod = lod;
@@ -2488,6 +2489,7 @@ emit_sample(struct lp_build_tgsi_soa_context *bld,
   params.texture_index = texture_unit;
   params.sampler_index = sampler_unit;
   params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
   params.coords = coords;
   params.offsets = offsets;
   params.lod = lod;
@@ -2606,8 +2608,14 @@ emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
   params.type = bld->bld_base.base.type;
   params.sample_key = sample_key;
   params.texture_index = unit;
-   params.sampler_index = unit;
+   /*
+    * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
+    * and trigger some assertions with d3d10 where the sampler view number
+    * can exceed this.
+    */
+   params.sampler_index = 0;
   params.context_ptr = bld->context_ptr;
+   params.thread_data_ptr = bld->thread_data_ptr;
   params.coords = coords;
   params.offsets = offsets;
   params.derivs = NULL;
@@ -3858,6 +3866,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
                  const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
                  LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
                  LLVMValueRef context_ptr,
+                  LLVMValueRef thread_data_ptr,
                  struct lp_build_sampler_soa *sampler,
                  const struct tgsi_shader_info *info,
                  const struct lp_build_tgsi_gs_iface *gs_iface)
@@ -3893,6 +3902,7 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
   bld.bld_base.info = info;
   bld.indirect_files = info->indirect_files;
   bld.context_ptr = context_ptr;
+   bld.thread_data_ptr = thread_data_ptr;

   /*
    * If the number of temporaries is rather large then we just
--- a/src/gallium/auxiliary/hud/hud_context.c
+++ b/src/gallium/auxiliary/hud/hud_context.c
@@ -33,6 +33,7 @@
 * Set GALLIUM_HUD=help for more info.
 */

+#include <signal.h>
 #include <stdio.h>

 #include "hud/hud_context.h"
@@ -51,12 +52,15 @@
 #include "tgsi/tgsi_text.h"
 #include "tgsi/tgsi_dump.h"

+/* Control the visibility of all HUD contexts */
+static boolean huds_visible = TRUE;

 struct hud_context {
   struct pipe_context *pipe;
   struct cso_context *cso;
   struct u_upload_mgr *uploader;

+   struct hud_batch_query_context *batch_query;
   struct list_head pane_list;

   /* states */
@@ -95,6 +99,13 @@ struct hud_context {
   } text, bg, whitelines;
 };

+#ifdef PIPE_OS_UNIX
+static void
+signal_visible_handler(int sig, siginfo_t *siginfo, void *context)
+{
+   huds_visible = !huds_visible;
+}
+#endif

 static void
 hud_draw_colored_prims(struct hud_context *hud, unsigned prim,
@@ -441,6 +452,9 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
   struct hud_pane *pane;
   struct hud_graph *gr;

+   if (!huds_visible)
+      return;
+
   hud->fb_width = tex->width0;
   hud->fb_height = tex->height0;
   hud->constants.two_div_fb_width = 2.0f / hud->fb_width;
@@ -510,6 +524,8 @@ hud_draw(struct hud_context *hud, struct pipe_resource *tex)
   hud_alloc_vertices(hud, &hud->text, 4 * 512, 4 * sizeof(float));

   /* prepare all graphs */
+   hud_batch_query_update(hud->batch_query);
+
   LIST_FOR_EACH_ENTRY(pane, &hud->pane_list, head) {
      LIST_FOR_EACH_ENTRY(gr, &pane->graph_list, head) {
         gr->query_new_value(gr);
@@ -903,17 +919,21 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
      }
      else if (strcmp(name, "samples-passed") == 0 &&
               has_occlusion_query(hud->pipe->screen)) {
-         hud_pipe_query_install(pane, hud->pipe, "samples-passed",
+         hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+                                "samples-passed",
                                PIPE_QUERY_OCCLUSION_COUNTER, 0, 0,
                                PIPE_DRIVER_QUERY_TYPE_UINT64,
-                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+                                0);
      }
      else if (strcmp(name, "primitives-generated") == 0 &&
               has_streamout(hud->pipe->screen)) {
-         hud_pipe_query_install(pane, hud->pipe, "primitives-generated",
+         hud_pipe_query_install(&hud->batch_query, pane, hud->pipe,
+                                "primitives-generated",
                                PIPE_QUERY_PRIMITIVES_GENERATED, 0, 0,
                                PIPE_DRIVER_QUERY_TYPE_UINT64,
-                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+                                PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+                                0);
      }
      else {
         boolean processed = FALSE;
@@ -938,17 +958,19 @@ hud_parse_env_var(struct hud_context *hud, const char *env)
               if (strcmp(name, pipeline_statistics_names[i]) == 0)
                  break;
            if (i < Elements(pipeline_statistics_names)) {
-               hud_pipe_query_install(pane, hud->pipe, name,
+               hud_pipe_query_install(&hud->batch_query, pane, hud->pipe, name,
                                      PIPE_QUERY_PIPELINE_STATISTICS, i,
                                      0, PIPE_DRIVER_QUERY_TYPE_UINT64,
-                                      PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE);
+                                      PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE,
+                                      0);
               processed = TRUE;
            }
         }

         /* driver queries */
         if (!processed) {
-            if (!hud_driver_query_install(pane, hud->pipe, name)){
+            if (!hud_driver_query_install(&hud->batch_query, pane, hud->pipe,
+                                          name)) {
               fprintf(stderr, "gallium_hud: unknown driver query '%s'\n", name);
            }
         }
@@ -987,6 +1009,9 @@ hud_parse_env_var(struct hud_context *hud, const char *env)

      case ',':
         env++;
+         if (!pane)
+            break;
+
         y += height + hud->font.glyph_height * (pane->num_graphs + 2);
         height = 100;

@@ -1122,6 +1147,12 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)
   struct pipe_sampler_view view_templ;
   unsigned i;
   const char *env = debug_get_option("GALLIUM_HUD", NULL);
+   unsigned signo = debug_get_num_option("GALLIUM_HUD_TOGGLE_SIGNAL", 0);
+#ifdef PIPE_OS_UNIX
+   static boolean sig_handled = FALSE;
+   struct sigaction action = {};
+#endif
+   huds_visible = debug_get_bool_option("GALLIUM_HUD_VISIBLE", TRUE);

   if (!env || !*env)
      return NULL;
@@ -1264,6 +1295,22 @@ hud_create(struct pipe_context *pipe, struct cso_context *cso)

   LIST_INITHEAD(&hud->pane_list);

+   /* setup sig handler once for all hud contexts */
+#ifdef PIPE_OS_UNIX
+   if (!sig_handled && signo != 0) {
+      action.sa_sigaction = &signal_visible_handler;
+      action.sa_flags = SA_SIGINFO;
+
+      if (signo >= NSIG)
+         fprintf(stderr, "gallium_hud: invalid signal %u\n", signo);
+      else if (sigaction(signo, &action, NULL) < 0)
+         fprintf(stderr, "gallium_hud: unable to set handler for signal %u\n", signo);
+      fflush(stderr);
+
+      sig_handled = TRUE;
+   }
+#endif
+
   hud_parse_env_var(hud, env);
   return hud;
 }
@@ -1284,6 +1331,7 @@ hud_destroy(struct hud_context *hud)
      FREE(pane);
   }

+   hud_batch_query_cleanup(&hud->batch_query);
   pipe->delete_fs_state(pipe, hud->fs_color);
   pipe->delete_fs_state(pipe, hud->fs_text);
   pipe->delete_vs_state(pipe, hud->vs);
--- a/src/gallium/auxiliary/hud/hud_cpu.c
+++ b/src/gallium/auxiliary/hud/hud_cpu.c
@@ -33,6 +33,58 @@
 #include "util/u_memory.h"
 #include <stdio.h>
 #include <inttypes.h>
+#ifdef PIPE_OS_WINDOWS
+#include <windows.h>
+#endif
+
+
+#ifdef PIPE_OS_WINDOWS
+
+static inline uint64_t
+filetime_to_scalar(FILETIME ft)
+{
+   ULARGE_INTEGER uli;
+   uli.LowPart = ft.dwLowDateTime;
+   uli.HighPart = ft.dwHighDateTime;
+   return uli.QuadPart;
+}
+
+static boolean
+get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
+{
+   SYSTEM_INFO sysInfo;
+   FILETIME ftNow, ftCreation, ftExit, ftKernel, ftUser;
+
+   GetSystemInfo(&sysInfo);
+   assert(sysInfo.dwNumberOfProcessors >= 1);
+   if (cpu_index != ALL_CPUS && cpu_index >= sysInfo.dwNumberOfProcessors) {
+      /* Tell hud_get_num_cpus there are only this many CPUs. */
+      return FALSE;
+   }
+
+   /* Get accumulated user and sys time for all threads */
+   if (!GetProcessTimes(GetCurrentProcess(), &ftCreation, &ftExit,
+                        &ftKernel, &ftUser))
+      return FALSE;
+
+   GetSystemTimeAsFileTime(&ftNow);
+
+   *busy_time = filetime_to_scalar(ftUser) + filetime_to_scalar(ftKernel);
+   *total_time = filetime_to_scalar(ftNow) - filetime_to_scalar(ftCreation);
+
+   /* busy_time already has the time accross all cpus.
+    * XXX: if we want 100% to mean one CPU, 200% two cpus, eliminate the
+    * following line.
+    */
+   *total_time *= sysInfo.dwNumberOfProcessors;
+
+   /* XXX: we ignore cpu_index, i.e, we assume that the individual CPU usage
+    * and the system usage are one and the same.
+    */
+   return TRUE;
+}
+
+#else

 static boolean
 get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
@@ -81,6 +133,8 @@ get_cpu_stats(unsigned cpu_index, uint64_t *busy_time, uint64_t *total_time)
   fclose(f);
   return FALSE;
 }
+#endif
+

 struct cpu_info {
   unsigned cpu_index;
--- a/src/gallium/auxiliary/hud/hud_driver_query.c
+++ b/src/gallium/auxiliary/hud/hud_driver_query.c
@@ -34,13 +34,164 @@
 #include "hud/hud_private.h"
 #include "pipe/p_screen.h"
 #include "os/os_time.h"
+#include "util/u_math.h"
 #include "util/u_memory.h"
 #include <stdio.h>

+// Must be a power of two
 #define NUM_QUERIES 8

+struct hud_batch_query_context {
+   struct pipe_context *pipe;
+   unsigned num_query_types;
+   unsigned allocated_query_types;
+   unsigned *query_types;
+
+   boolean failed;
+   struct pipe_query *query[NUM_QUERIES];
+   union pipe_query_result *result[NUM_QUERIES];
+   unsigned head, pending, results;
+};
+
+void
+hud_batch_query_update(struct hud_batch_query_context *bq)
+{
+   struct pipe_context *pipe;
+
+   if (!bq || bq->failed)
+      return;
+
+   pipe = bq->pipe;
+
+   if (bq->query[bq->head])
+      pipe->end_query(pipe, bq->query[bq->head]);
+
+   bq->results = 0;
+
+   while (bq->pending) {
+      unsigned idx = (bq->head - bq->pending + 1) % NUM_QUERIES;
+      struct pipe_query *query = bq->query[idx];
+
+      if (!bq->result[idx])
+         bq->result[idx] = MALLOC(sizeof(bq->result[idx]->batch[0]) *
+                                  bq->num_query_types);
+      if (!bq->result[idx]) {
+         fprintf(stderr, "gallium_hud: out of memory.\n");
+         bq->failed = TRUE;
+         return;
+      }
+
+      if (!pipe->get_query_result(pipe, query, FALSE, bq->result[idx]))
+         break;
+
+      ++bq->results;
+      --bq->pending;
+   }
+
+   bq->head = (bq->head + 1) % NUM_QUERIES;
+
+   if (bq->pending == NUM_QUERIES) {
+      fprintf(stderr,
+              "gallium_hud: all queries busy after %i frames, dropping data.\n",
+              NUM_QUERIES);
+
+      assert(bq->query[bq->head]);
+
+      pipe->destroy_query(bq->pipe, bq->query[bq->head]);
+      bq->query[bq->head] = NULL;
+   }
+
+   ++bq->pending;
+
+   if (!bq->query[bq->head]) {
+      bq->query[bq->head] = pipe->create_batch_query(pipe,
+                                                     bq->num_query_types,
+                                                     bq->query_types);
+
+      if (!bq->query[bq->head]) {
+         fprintf(stderr,
+                 "gallium_hud: create_batch_query failed. You may have "
+                 "selected too many or incompatible queries.\n");
+         bq->failed = TRUE;
+         return;
+      }
+   }
+
+   if (!pipe->begin_query(pipe, bq->query[bq->head])) {
+      fprintf(stderr,
+              "gallium_hud: could not begin batch query. You may have "
+              "selected too many or incompatible queries.\n");
+      bq->failed = TRUE;
+   }
+}
+
+static boolean
+batch_query_add(struct hud_batch_query_context **pbq,
+                struct pipe_context *pipe, unsigned query_type,
+                unsigned *result_index)
+{
+   struct hud_batch_query_context *bq = *pbq;
+   unsigned i;
+
+   if (!bq) {
+      bq = CALLOC_STRUCT(hud_batch_query_context);
+      if (!bq)
+         return false;
+      bq->pipe = pipe;
+      *pbq = bq;
+   }
+
+   for (i = 0; i < bq->num_query_types; ++i) {
+      if (bq->query_types[i] == query_type) {
+         *result_index = i;
+         return true;
+      }
+   }
+
+   if (bq->num_query_types == bq->allocated_query_types) {
+      unsigned new_alloc = MAX2(16, bq->allocated_query_types * 2);
+      unsigned *new_query_types
+         = REALLOC(bq->query_types,
+                   bq->allocated_query_types * sizeof(unsigned),
+                   new_alloc * sizeof(unsigned));
+      if (!new_query_types)
+         return false;
+      bq->query_types = new_query_types;
+      bq->allocated_query_types = new_alloc;
+   }
+
+   bq->query_types[bq->num_query_types] = query_type;
+   *result_index = bq->num_query_types++;
+   return true;
+}
+
+void
+hud_batch_query_cleanup(struct hud_batch_query_context **pbq)
+{
+   struct hud_batch_query_context *bq = *pbq;
+   unsigned idx;
+
+   if (!bq)
+      return;
+
+   *pbq = NULL;
+
+   if (bq->query[bq->head] && !bq->failed)
+      bq->pipe->end_query(bq->pipe, bq->query[bq->head]);
+
+   for (idx = 0; idx < NUM_QUERIES; ++idx) {
+      if (bq->query[idx])
+         bq->pipe->destroy_query(bq->pipe, bq->query[idx]);
+      FREE(bq->result[idx]);
+   }
+
+   FREE(bq->query_types);
+   FREE(bq);
+}
+
 struct query_info {
   struct pipe_context *pipe;
+   struct hud_batch_query_context *batch;
   unsigned query_type;
   unsigned result_index; /* unit depends on query_type */
   enum pipe_driver_query_result_type result_type;
@@ -48,7 +199,6 @@ struct query_info {
   /* Ring of queries. If a query is busy, we use another slot. */
   struct pipe_query *query[NUM_QUERIES];
   unsigned head, tail;
-   unsigned num_queries;

   uint64_t last_time;
   uint64_t results_cumulative;
@@ -56,11 +206,26 @@ struct query_info {
 };

 static void
-query_new_value(struct hud_graph *gr)
+query_new_value_batch(struct query_info *info)
+{
+   struct hud_batch_query_context *bq = info->batch;
+   unsigned result_index = info->result_index;
+   unsigned idx = (bq->head - bq->pending) % NUM_QUERIES;
+   unsigned results = bq->results;
+
+   while (results) {
+      info->results_cumulative += bq->result[idx]->batch[result_index].u64;
+      ++info->num_results;
+
+      --results;
+      idx = (idx - 1) % NUM_QUERIES;
+   }
+}
+
+static void
+query_new_value_normal(struct query_info *info)
 {
-   struct query_info *info = gr->query_data;
   struct pipe_context *pipe = info->pipe;
-   uint64_t now = os_time_get();

   if (info->last_time) {
      if (info->query[info->head])
@@ -107,30 +272,9 @@ query_new_value(struct hud_graph *gr)
            break;
         }
      }
-
-      if (info->num_results && info->last_time + gr->pane->period <= now) {
-         uint64_t value;
-
-         switch (info->result_type) {
-         default:
-         case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
-            value = info->results_cumulative / info->num_results;
-            break;
-         case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
-            value = info->results_cumulative;
-            break;
-         }
-
-         hud_graph_add_value(gr, value);
-
-         info->last_time = now;
-         info->results_cumulative = 0;
-         info->num_results = 0;
-      }
   }
   else {
      /* initialize */
-      info->last_time = now;
      info->query[info->head] = pipe->create_query(pipe, info->query_type, 0);
   }

@@ -138,12 +282,50 @@ query_new_value(struct hud_graph *gr)
      pipe->begin_query(pipe, info->query[info->head]);
 }

+static void
+query_new_value(struct hud_graph *gr)
+{
+   struct query_info *info = gr->query_data;
+   uint64_t now = os_time_get();
+
+   if (info->batch) {
+      query_new_value_batch(info);
+   } else {
+      query_new_value_normal(info);
+   }
+
+   if (!info->last_time) {
+      info->last_time = now;
+      return;
+   }
+
+   if (info->num_results && info->last_time + gr->pane->period <= now) {
+      uint64_t value;
+
+      switch (info->result_type) {
+      default:
+      case PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE:
+         value = info->results_cumulative / info->num_results;
+         break;
+      case PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE:
+         value = info->results_cumulative;
+         break;
+      }
+
+      hud_graph_add_value(gr, value);
+
+      info->last_time = now;
+      info->results_cumulative = 0;
+      info->num_results = 0;
+   }
+}
+
 static void
 free_query_info(void *ptr)
 {
   struct query_info *info = ptr;

-   if (info->last_time) {
+   if (!info->batch && info->last_time) {
      struct pipe_context *pipe = info->pipe;
      int i;

@@ -159,11 +341,13 @@ free_query_info(void *ptr)
 }

 void
-hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+hud_pipe_query_install(struct hud_batch_query_context **pbq,
+                       struct hud_pane *pane, struct pipe_context *pipe,
                       const char *name, unsigned query_type,
                       unsigned result_index,
                       uint64_t max_value, enum pipe_driver_query_type type,
-                       enum pipe_driver_query_result_type result_type)
+                       enum pipe_driver_query_result_type result_type,
+                       unsigned flags)
 {
   struct hud_graph *gr;
   struct query_info *info;
@@ -175,28 +359,40 @@ hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
   strncpy(gr->name, name, sizeof(gr->name));
   gr->name[sizeof(gr->name) - 1] = '\0';
   gr->query_data = CALLOC_STRUCT(query_info);
-   if (!gr->query_data) {
-      FREE(gr);
-      return;
-   }
+   if (!gr->query_data)
+      goto fail_gr;

   gr->query_new_value = query_new_value;
   gr->free_query_data = free_query_info;

   info = gr->query_data;
   info->pipe = pipe;
-   info->query_type = query_type;
-   info->result_index = result_index;
   info->result_type = result_type;

+   if (flags & PIPE_DRIVER_QUERY_FLAG_BATCH) {
+      if (!batch_query_add(pbq, pipe, query_type, &info->result_index))
+         goto fail_info;
+      info->batch = *pbq;
+   } else {
+      info->query_type = query_type;
+      info->result_index = result_index;
+   }
+
   hud_pane_add_graph(pane, gr);
   if (pane->max_value < max_value)
      hud_pane_set_max_value(pane, max_value);
   pane->type = type;
+   return;
+
+fail_info:
+   FREE(info);
+fail_gr:
+   FREE(gr);
 }

 boolean
-hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+hud_driver_query_install(struct hud_batch_query_context **pbq,
+                         struct hud_pane *pane, struct pipe_context *pipe,
                         const char *name)
 {
   struct pipe_screen *screen = pipe->screen;
@@ -220,8 +416,9 @@ hud_driver_query_install(struct hud_pane *pane, struct pipe_context *pipe,
   if (!found)
      return FALSE;

-   hud_pipe_query_install(pane, pipe, query.name, query.query_type, 0,
-                          query.max_value.u64, query.type, query.result_type);
+   hud_pipe_query_install(pbq, pane, pipe, query.name, query.query_type, 0,
+                          query.max_value.u64, query.type, query.result_type,
+                          query.flags);

   return TRUE;
 }
--- a/src/gallium/auxiliary/hud/hud_private.h
+++ b/src/gallium/auxiliary/hud/hud_private.h
@@ -80,19 +80,26 @@ void hud_pane_set_max_value(struct hud_pane *pane, uint64_t value);
 void hud_graph_add_value(struct hud_graph *gr, uint64_t value);

 /* graphs/queries */
+struct hud_batch_query_context;
+
 #define ALL_CPUS ~0 /* optionally set as cpu_index */

 int hud_get_num_cpus(void);

 void hud_fps_graph_install(struct hud_pane *pane);
 void hud_cpu_graph_install(struct hud_pane *pane, unsigned cpu_index);
-void hud_pipe_query_install(struct hud_pane *pane, struct pipe_context *pipe,
+void hud_pipe_query_install(struct hud_batch_query_context **pbq,
+                            struct hud_pane *pane, struct pipe_context *pipe,
                            const char *name, unsigned query_type,
                            unsigned result_index,
                            uint64_t max_value,
                            enum pipe_driver_query_type type,
-                            enum pipe_driver_query_result_type result_type);
-boolean hud_driver_query_install(struct hud_pane *pane,
+                            enum pipe_driver_query_result_type result_type,
+                            unsigned flags);
+boolean hud_driver_query_install(struct hud_batch_query_context **pbq,
+                                 struct hud_pane *pane,
                                 struct pipe_context *pipe, const char *name);
+void hud_batch_query_update(struct hud_batch_query_context *bq);
+void hud_batch_query_cleanup(struct hud_batch_query_context **pbq);

 #endif
--- a/src/gallium/auxiliary/indices/u_indices.c
+++ b/src/gallium/auxiliary/indices/u_indices.c
@@ -68,17 +68,18 @@ static void translate_memcpy_uint( const void *in,
 * \param out_nr  returns number of new vertices
 * \param out_translate  returns the translation function to use by the caller
 */
-int u_index_translator( unsigned hw_mask,
-                        unsigned prim,
-                        unsigned in_index_size,
-                        unsigned nr,
-                        unsigned in_pv,
-                        unsigned out_pv,
-                        unsigned prim_restart,
-                        unsigned *out_prim,
-                        unsigned *out_index_size,
-                        unsigned *out_nr,
-                        u_translate_func *out_translate )
+enum indices_mode
+u_index_translator(unsigned hw_mask,
+                   unsigned prim,
+                   unsigned in_index_size,
+                   unsigned nr,
+                   unsigned in_pv,
+                   unsigned out_pv,
+                   unsigned prim_restart,
+                   unsigned *out_prim,
+                   unsigned *out_index_size,
+                   unsigned *out_nr,
+                   u_translate_func *out_translate)
 {
   unsigned in_idx;
   unsigned out_idx;
@@ -204,17 +205,17 @@ int u_index_translator( unsigned hw_mask,
 * \param out_nr  returns new number of vertices to draw
 * \param out_generate  returns pointer to the generator function
 */
-int u_index_generator( unsigned hw_mask,
-                       unsigned prim,
-                       unsigned start,
-                       unsigned nr,
-                       unsigned in_pv,
-                       unsigned out_pv,
-                       unsigned *out_prim,
-                       unsigned *out_index_size,
-                       unsigned *out_nr,
-                       u_generate_func *out_generate )
-
+enum indices_mode
+u_index_generator(unsigned hw_mask,
+                  unsigned prim,
+                  unsigned start,
+                  unsigned nr,
+                  unsigned in_pv,
+                  unsigned out_pv,
+                  unsigned *out_prim,
+                  unsigned *out_index_size,
+                  unsigned *out_nr,
+                  u_generate_func *out_generate)
 {
   unsigned out_idx;

--- a/src/gallium/auxiliary/indices/u_indices.h
+++ b/src/gallium/auxiliary/indices/u_indices.h
@@ -67,66 +67,68 @@ typedef void (*u_generate_func)( unsigned start,
 /* Return codes describe the translate/generate operation.  Caller may
 * be able to reuse translated indices under some circumstances.
 */
-#define U_TRANSLATE_ERROR  -1
-#define U_TRANSLATE_NORMAL  1
-#define U_TRANSLATE_MEMCPY  2
-#define U_GENERATE_LINEAR   3
-#define U_GENERATE_REUSABLE 4
-#define U_GENERATE_ONE_OFF  5
-
+enum indices_mode {
+   U_TRANSLATE_ERROR = -1,
+   U_TRANSLATE_NORMAL = 1,
+   U_TRANSLATE_MEMCPY = 2,
+   U_GENERATE_LINEAR  = 3,
+   U_GENERATE_REUSABLE= 4,
+   U_GENERATE_ONE_OFF = 5,
+};

 void u_index_init( void );

-int u_index_translator( unsigned hw_mask,
-                        unsigned prim,
-                        unsigned in_index_size,
-                        unsigned nr,
-                        unsigned in_pv,   /* API */
-                        unsigned out_pv,  /* hardware */
-                        unsigned prim_restart,
-                        unsigned *out_prim,
-                        unsigned *out_index_size,
-                        unsigned *out_nr,
-                        u_translate_func *out_translate );
+enum indices_mode
+u_index_translator(unsigned hw_mask,
+                   unsigned prim,
+                   unsigned in_index_size,
+                   unsigned nr,
+                   unsigned in_pv,   /* API */
+                   unsigned out_pv,  /* hardware */
+                   unsigned prim_restart,
+                   unsigned *out_prim,
+                   unsigned *out_index_size,
+                   unsigned *out_nr,
+                   u_translate_func *out_translate);

 /* Note that even when generating it is necessary to know what the
 * API's PV is, as the indices generated will depend on whether it is
 * the same as hardware or not, and in the case of triangle strips,
 * whether it is first or last.
 */
-int u_index_generator( unsigned hw_mask,
-                       unsigned prim,
-                       unsigned start,
-                       unsigned nr,
-                       unsigned in_pv,   /* API */
-                       unsigned out_pv,  /* hardware */
-                       unsigned *out_prim,
-                       unsigned *out_index_size,
-                       unsigned *out_nr,
-                       u_generate_func *out_generate );
+enum indices_mode
+u_index_generator(unsigned hw_mask,
+                  unsigned prim,
+                  unsigned start,
+                  unsigned nr,
+                  unsigned in_pv,   /* API */
+                  unsigned out_pv,  /* hardware */
+                  unsigned *out_prim,
+                  unsigned *out_index_size,
+                  unsigned *out_nr,
+                  u_generate_func *out_generate);


 void u_unfilled_init( void );

-int u_unfilled_translator( unsigned prim,
-                           unsigned in_index_size,
-                           unsigned nr,
-                           unsigned unfilled_mode,
-                           unsigned *out_prim,
-                           unsigned *out_index_size,
-                           unsigned *out_nr,
-                           u_translate_func *out_translate );
-
-int u_unfilled_generator( unsigned prim,
-                          unsigned start,
-                          unsigned nr,
-                          unsigned unfilled_mode,
-                          unsigned *out_prim,
-                          unsigned *out_index_size,
-                          unsigned *out_nr,
-                          u_generate_func *out_generate );
-
-
+enum indices_mode
+u_unfilled_translator(unsigned prim,
+                      unsigned in_index_size,
+                      unsigned nr,
+                      unsigned unfilled_mode,
+                      unsigned *out_prim,
+                      unsigned *out_index_size,
+                      unsigned *out_nr,
+                      u_translate_func *out_translate);

+enum indices_mode
+u_unfilled_generator(unsigned prim,
+                     unsigned start,
+                     unsigned nr,
+                     unsigned unfilled_mode,
+                     unsigned *out_prim,
+                     unsigned *out_index_size,
+                     unsigned *out_nr,
+                     u_generate_func *out_generate);

 #endif
--- a/src/gallium/auxiliary/indices/u_unfilled_indices.c
+++ b/src/gallium/auxiliary/indices/u_unfilled_indices.c
@@ -111,14 +111,15 @@ static unsigned nr_lines( unsigned prim,
                              


-int u_unfilled_translator( unsigned prim,
-                        unsigned in_index_size,
-                        unsigned nr,
-                        unsigned unfilled_mode,
-                        unsigned *out_prim,
-                        unsigned *out_index_size,
-                        unsigned *out_nr,
-                        u_translate_func *out_translate )
+enum indices_mode
+u_unfilled_translator(unsigned prim,
+                      unsigned in_index_size,
+                      unsigned nr,
+                      unsigned unfilled_mode,
+                      unsigned *out_prim,
+                      unsigned *out_index_size,
+                      unsigned *out_nr,
+                      u_translate_func *out_translate)
 {
   unsigned in_idx;
   unsigned out_idx;
@@ -170,14 +171,15 @@ int u_unfilled_translator( unsigned prim,
 * different front/back fill modes, that can be handled with the
 * 'draw' module.
 */
-int u_unfilled_generator( unsigned prim,
-                          unsigned start,
-                          unsigned nr,
-                          unsigned unfilled_mode,
-                          unsigned *out_prim,
-                          unsigned *out_index_size,
-                          unsigned *out_nr,
-                          u_generate_func *out_generate )
+enum indices_mode
+u_unfilled_generator(unsigned prim,
+                     unsigned start,
+                     unsigned nr,
+                     unsigned unfilled_mode,
+                     unsigned *out_prim,
+                     unsigned *out_index_size,
+                     unsigned *out_nr,
+                     u_generate_func *out_generate)
 {
   unsigned out_idx;

--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -24,9 +24,10 @@

 #include "util/ralloc.h"
 #include "glsl/nir/nir.h"
+#include "glsl/nir/nir_control_flow.h"
 #include "glsl/nir/nir_builder.h"
 #include "glsl/list.h"
-#include "glsl/shader_enums.h"
+#include "glsl/nir/shader_enums.h"

 #include "nir/tgsi_to_nir.h"
 #include "tgsi/tgsi_parse.h"
@@ -64,24 +65,24 @@ struct ttn_compile {
   nir_register *addr_reg;

   /**
-    * Stack of cf_node_lists where instructions should be pushed as we pop
+    * Stack of nir_cursors where instructions should be pushed as we pop
    * back out of the control flow stack.
    *
    * For each IF/ELSE/ENDIF block, if_stack[if_stack_pos] has where the else
    * instructions should be placed, and if_stack[if_stack_pos - 1] has where
    * the next instructions outside of the if/then/else block go.
    */
-   struct exec_list **if_stack;
+   nir_cursor *if_stack;
   unsigned if_stack_pos;

   /**
-    * Stack of cf_node_lists where instructions should be pushed as we pop
+    * Stack of nir_cursors where instructions should be pushed as we pop
    * back out of the control flow stack.
    *
    * loop_stack[loop_stack_pos - 1] contains the cf_node_list for the outside
    * of the loop.
    */
-   struct exec_list **loop_stack;
+   nir_cursor *loop_stack;
   unsigned loop_stack_pos;

   /* How many TGSI_FILE_IMMEDIATE vec4s have been parsed so far. */
@@ -93,6 +94,128 @@ struct ttn_compile {
 #define ttn_channel(b, src, swiz) \
   nir_swizzle(b, src, SWIZ(swiz, swiz, swiz, swiz), 1, false)

+static gl_varying_slot
+tgsi_varying_semantic_to_slot(unsigned semantic, unsigned index)
+{
+   switch (semantic) {
+   case TGSI_SEMANTIC_POSITION:
+      return VARYING_SLOT_POS;
+   case TGSI_SEMANTIC_COLOR:
+      if (index == 0)
+         return VARYING_SLOT_COL0;
+      else
+         return VARYING_SLOT_COL1;
+   case TGSI_SEMANTIC_BCOLOR:
+      if (index == 0)
+         return VARYING_SLOT_BFC0;
+      else
+         return VARYING_SLOT_BFC1;
+   case TGSI_SEMANTIC_FOG:
+      return VARYING_SLOT_FOGC;
+   case TGSI_SEMANTIC_PSIZE:
+      return VARYING_SLOT_PSIZ;
+   case TGSI_SEMANTIC_GENERIC:
+      return VARYING_SLOT_VAR0 + index;
+   case TGSI_SEMANTIC_FACE:
+      return VARYING_SLOT_FACE;
+   case TGSI_SEMANTIC_EDGEFLAG:
+      return VARYING_SLOT_EDGE;
+   case TGSI_SEMANTIC_PRIMID:
+      return VARYING_SLOT_PRIMITIVE_ID;
+   case TGSI_SEMANTIC_CLIPDIST:
+      if (index == 0)
+         return VARYING_SLOT_CLIP_DIST0;
+      else
+         return VARYING_SLOT_CLIP_DIST1;
+   case TGSI_SEMANTIC_CLIPVERTEX:
+      return VARYING_SLOT_CLIP_VERTEX;
+   case TGSI_SEMANTIC_TEXCOORD:
+      return VARYING_SLOT_TEX0 + index;
+   case TGSI_SEMANTIC_PCOORD:
+      return VARYING_SLOT_PNTC;
+   case TGSI_SEMANTIC_VIEWPORT_INDEX:
+      return VARYING_SLOT_VIEWPORT;
+   case TGSI_SEMANTIC_LAYER:
+      return VARYING_SLOT_LAYER;
+   default:
+      fprintf(stderr, "Bad TGSI semantic: %d/%d\n", semantic, index);
+      abort();
+   }
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * VARYING_SLOT_
+ */
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [VARYING_SLOT_POS] = { TGSI_SEMANTIC_POSITION, 0 },
+      [VARYING_SLOT_COL0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [VARYING_SLOT_COL1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [VARYING_SLOT_BFC0] = { TGSI_SEMANTIC_BCOLOR, 0 },
+      [VARYING_SLOT_BFC1] = { TGSI_SEMANTIC_BCOLOR, 1 },
+      [VARYING_SLOT_FOGC] = { TGSI_SEMANTIC_FOG, 0 },
+      [VARYING_SLOT_PSIZ] = { TGSI_SEMANTIC_PSIZE, 0 },
+      [VARYING_SLOT_FACE] = { TGSI_SEMANTIC_FACE, 0 },
+      [VARYING_SLOT_EDGE] = { TGSI_SEMANTIC_EDGEFLAG, 0 },
+      [VARYING_SLOT_PRIMITIVE_ID] = { TGSI_SEMANTIC_PRIMID, 0 },
+      [VARYING_SLOT_CLIP_DIST0] = { TGSI_SEMANTIC_CLIPDIST, 0 },
+      [VARYING_SLOT_CLIP_DIST1] = { TGSI_SEMANTIC_CLIPDIST, 1 },
+      [VARYING_SLOT_CLIP_VERTEX] = { TGSI_SEMANTIC_CLIPVERTEX, 0 },
+      [VARYING_SLOT_PNTC] = { TGSI_SEMANTIC_PCOORD, 0 },
+      [VARYING_SLOT_VIEWPORT] = { TGSI_SEMANTIC_VIEWPORT_INDEX, 0 },
+      [VARYING_SLOT_LAYER] = { TGSI_SEMANTIC_LAYER, 0 },
+   };
+
+   if (slot >= VARYING_SLOT_VAR0) {
+      *semantic_name = TGSI_SEMANTIC_GENERIC;
+      *semantic_index = slot - VARYING_SLOT_VAR0;
+      return;
+   }
+
+   if (slot >= VARYING_SLOT_TEX0 && slot <= VARYING_SLOT_TEX7) {
+      *semantic_name = TGSI_SEMANTIC_TEXCOORD;
+      *semantic_index = slot - VARYING_SLOT_TEX0;
+      return;
+   }
+
+   if (slot >= ARRAY_SIZE(map)) {
+      fprintf(stderr, "Unknown varying slot %d\n", slot);
+      abort();
+   }
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
+/* Temporary helper to remap back to TGSI style semantic name/index
+ * values, for use in drivers that haven't been converted to using
+ * FRAG_RESULT_
+ */
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index)
+{
+   static const unsigned map[][2] = {
+      [FRAG_RESULT_DEPTH] = { TGSI_SEMANTIC_POSITION, 0 },
+      [FRAG_RESULT_COLOR] = { TGSI_SEMANTIC_COLOR, -1 },
+      [FRAG_RESULT_DATA0 + 0] = { TGSI_SEMANTIC_COLOR, 0 },
+      [FRAG_RESULT_DATA0 + 1] = { TGSI_SEMANTIC_COLOR, 1 },
+      [FRAG_RESULT_DATA0 + 2] = { TGSI_SEMANTIC_COLOR, 2 },
+      [FRAG_RESULT_DATA0 + 3] = { TGSI_SEMANTIC_COLOR, 3 },
+      [FRAG_RESULT_DATA0 + 4] = { TGSI_SEMANTIC_COLOR, 4 },
+      [FRAG_RESULT_DATA0 + 5] = { TGSI_SEMANTIC_COLOR, 5 },
+      [FRAG_RESULT_DATA0 + 6] = { TGSI_SEMANTIC_COLOR, 6 },
+      [FRAG_RESULT_DATA0 + 7] = { TGSI_SEMANTIC_COLOR, 7 },
+   };
+
+   *semantic_name = map[slot][0];
+   *semantic_index = map[slot][1];
+}
+
 static nir_ssa_def *
 ttn_src_for_dest(nir_builder *b, nir_alu_dest *dest)
 {
@@ -215,12 +338,15 @@ ttn_emit_declaration(struct ttn_compile *c)
            var->data.mode = nir_var_shader_in;
            var->name = ralloc_asprintf(var, "in_%d", idx);

-            /* We should probably translate to a VERT_ATTRIB_* or VARYING_SLOT_*
-             * instead, but nothing in NIR core is looking at the value
-             * currently, and this is less change to drivers.
-             */
-            var->data.location = decl->Semantic.Name;
-            var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(decl->Semantic.Name,
+                                                decl->Semantic.Index);
+            } else {
+               assert(!decl->Declaration.Semantic);
+               var->data.location = VERT_ATTRIB_GENERIC0 + idx;
+            }
+            var->data.index = 0;

            /* We definitely need to translate the interpolation field, because
             * nir_print will decode it.
@@ -240,6 +366,8 @@ ttn_emit_declaration(struct ttn_compile *c)
            exec_list_push_tail(&b->shader->inputs, &var->node);
            break;
         case TGSI_FILE_OUTPUT: {
+            int semantic_name = decl->Semantic.Name;
+            int semantic_index = decl->Semantic.Index;
            /* Since we can't load from outputs in the IR, we make temporaries
             * for the outputs and emit stores to the real outputs at the end of
             * the shader.
@@ -251,14 +379,40 @@ ttn_emit_declaration(struct ttn_compile *c)

            var->data.mode = nir_var_shader_out;
            var->name = ralloc_asprintf(var, "out_%d", idx);
+            var->data.index = 0;

-            var->data.location = decl->Semantic.Name;
-            if (decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
-                decl->Semantic.Index == 0 &&
-                c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
-               var->data.index = -1;
-            else
-               var->data.index = decl->Semantic.Index;
+            if (c->scan->processor == TGSI_PROCESSOR_FRAGMENT) {
+               switch (semantic_name) {
+               case TGSI_SEMANTIC_COLOR: {
+                  /* TODO tgsi loses some information, so we cannot
+                   * actually differentiate here between DSB and MRT
+                   * at this point.  But so far no drivers using tgsi-
+                   * to-nir support dual source blend:
+                   */
+                  bool dual_src_blend = false;
+                  if (dual_src_blend && (semantic_index == 1)) {
+                     var->data.location = FRAG_RESULT_DATA0;
+                     var->data.index = 1;
+                  } else {
+                     if (c->scan->properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS])
+                        var->data.location = FRAG_RESULT_COLOR;
+                     else
+                        var->data.location = FRAG_RESULT_DATA0 + semantic_index;
+                  }
+                  break;
+               }
+               case TGSI_SEMANTIC_POSITION:
+                  var->data.location = FRAG_RESULT_DEPTH;
+                  break;
+               default:
+                  fprintf(stderr, "Bad TGSI semantic: %d/%d\n",
+                          decl->Semantic.Name, decl->Semantic.Index);
+                  abort();
+               }
+            } else {
+               var->data.location =
+                  tgsi_varying_semantic_to_slot(semantic_name, semantic_index);
+            }

            if (is_array) {
               unsigned j;
@@ -307,7 +461,7 @@ ttn_emit_immediate(struct ttn_compile *c)
   for (i = 0; i < 4; i++)
      load_const->value.u[i] = tgsi_imm->u[i].Uint;

-   nir_instr_insert_after_cf_list(b->cf_node_list, &load_const->instr);
+   nir_builder_instr_insert(b, &load_const->instr);
 }

 static nir_src
@@ -363,7 +517,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
         load->variables[0] = ttn_array_deref(c, load, var, offset, indirect);

         nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
-         nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+         nir_builder_instr_insert(b, &load->instr);

         src = nir_src_for_ssa(&load->dest.ssa);

@@ -414,7 +568,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
      load->num_components = ncomp;

      nir_ssa_dest_init(&load->instr, &load->dest, ncomp, NULL);
-      nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+      nir_builder_instr_insert(b, &load->instr);

      src = nir_src_for_ssa(&load->dest.ssa);
      break;
@@ -476,7 +630,7 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
         srcn++;
      }
      nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
-      nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+      nir_builder_instr_insert(b, &load->instr);

      src = nir_src_for_ssa(&load->dest.ssa);
      break;
@@ -552,7 +706,7 @@ ttn_get_dest(struct ttn_compile *c, struct tgsi_full_dst_register *tgsi_fdst)

         load->dest = nir_dest_for_reg(reg);

-         nir_instr_insert_after_cf_list(b->cf_node_list, &load->instr);
+         nir_builder_instr_insert(b, &load->instr);
      } else {
         assert(!tgsi_dst->Indirect);
         dest.dest.reg.reg = c->temp_regs[index].reg;
@@ -667,7 +821,7 @@ ttn_alu(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
      instr->src[i].src = nir_src_for_ssa(src[i]);

   instr->dest = dest;
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
@@ -683,7 +837,7 @@ ttn_move_dest_masked(nir_builder *b, nir_alu_dest dest,
   mov->src[0].src = nir_src_for_ssa(def);
   for (unsigned i = def->num_components; i < 4; i++)
      mov->src[0].swizzle[i] = def->num_components - 1;
-   nir_instr_insert_after_cf_list(b->cf_node_list, &mov->instr);
+   nir_builder_instr_insert(b, &mov->instr);
 }

 static void
@@ -902,7 +1056,7 @@ ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
 {
   nir_intrinsic_instr *discard =
      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+   nir_builder_instr_insert(b, &discard->instr);
 }

 static void
@@ -912,7 +1066,7 @@ ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
   nir_intrinsic_instr *discard =
      nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
   discard->src[0] = nir_src_for_ssa(cmp);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &discard->instr);
+   nir_builder_instr_insert(b, &discard->instr);
 }

 static void
@@ -920,10 +1074,6 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
 {
   nir_builder *b = &c->build;

-   /* Save the outside-of-the-if-statement node list. */
-   c->if_stack[c->if_stack_pos] = b->cf_node_list;
-   c->if_stack_pos++;
-
   src = ttn_channel(b, src, X);

   nir_if *if_stmt = nir_if_create(b->shader);
@@ -932,11 +1082,14 @@ ttn_if(struct ttn_compile *c, nir_ssa_def *src, bool is_uint)
   } else {
      if_stmt->condition = nir_src_for_ssa(nir_fne(b, src, nir_imm_int(b, 0)));
   }
-   nir_cf_node_insert_end(b->cf_node_list, &if_stmt->cf_node);
+   nir_builder_cf_insert(b, &if_stmt->cf_node);

-   nir_builder_insert_after_cf_list(b, &if_stmt->then_list);
+   c->if_stack[c->if_stack_pos] = nir_after_cf_node(&if_stmt->cf_node);
+   c->if_stack_pos++;

-   c->if_stack[c->if_stack_pos] = &if_stmt->else_list;
+   b->cursor = nir_after_cf_list(&if_stmt->then_list);
+
+   c->if_stack[c->if_stack_pos] = nir_after_cf_list(&if_stmt->else_list);
   c->if_stack_pos++;
 }

@@ -945,7 +1098,7 @@ ttn_else(struct ttn_compile *c)
 {
   nir_builder *b = &c->build;

-   nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos - 1]);
+   b->cursor = c->if_stack[c->if_stack_pos - 1];
 }

 static void
@@ -954,7 +1107,7 @@ ttn_endif(struct ttn_compile *c)
   nir_builder *b = &c->build;

   c->if_stack_pos -= 2;
-   nir_builder_insert_after_cf_list(b, c->if_stack[c->if_stack_pos]);
+   b->cursor = c->if_stack[c->if_stack_pos];
 }

 static void
@@ -962,28 +1115,27 @@ ttn_bgnloop(struct ttn_compile *c)
 {
   nir_builder *b = &c->build;

-   /* Save the outside-of-the-loop node list. */
-   c->loop_stack[c->loop_stack_pos] = b->cf_node_list;
+   nir_loop *loop = nir_loop_create(b->shader);
+   nir_builder_cf_insert(b, &loop->cf_node);
+
+   c->loop_stack[c->loop_stack_pos] = nir_after_cf_node(&loop->cf_node);
   c->loop_stack_pos++;

-   nir_loop *loop = nir_loop_create(b->shader);
-   nir_cf_node_insert_end(b->cf_node_list, &loop->cf_node);
-
-   nir_builder_insert_after_cf_list(b, &loop->body);
+   b->cursor = nir_after_cf_list(&loop->body);
 }

 static void
 ttn_cont(nir_builder *b)
 {
   nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_continue);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
 ttn_brk(nir_builder *b)
 {
   nir_jump_instr *instr = nir_jump_instr_create(b->shader, nir_jump_break);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);
 }

 static void
@@ -992,7 +1144,7 @@ ttn_endloop(struct ttn_compile *c)
   nir_builder *b = &c->build;

   c->loop_stack_pos--;
-   nir_builder_insert_after_cf_list(b, c->loop_stack[c->loop_stack_pos]);
+   b->cursor = c->loop_stack[c->loop_stack_pos];
 }

 static void
@@ -1286,7 +1438,7 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
   assert(src_number == num_srcs);

   nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &instr->instr);
+   nir_builder_instr_insert(b, &instr->instr);

   /* Resolve the writemask on the texture op. */
   ttn_move_dest(b, dest, &instr->dest.ssa);
@@ -1325,10 +1477,10 @@ ttn_txq(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
   txs->src[0].src_type = nir_tex_src_lod;

   nir_ssa_dest_init(&txs->instr, &txs->dest, 3, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &txs->instr);
+   nir_builder_instr_insert(b, &txs->instr);

   nir_ssa_dest_init(&qlv->instr, &qlv->dest, 1, NULL);
-   nir_instr_insert_after_cf_list(b->cf_node_list, &qlv->instr);
+   nir_builder_instr_insert(b, &qlv->instr);

   ttn_move_dest_masked(b, dest, &txs->dest.ssa, TGSI_WRITEMASK_XYZ);
   ttn_move_dest_masked(b, dest, &qlv->dest.ssa, TGSI_WRITEMASK_W);
@@ -1738,7 +1890,7 @@ ttn_emit_instruction(struct ttn_compile *c)
      store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
      store->src[0] = nir_src_for_reg(dest.dest.reg.reg);

-      nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+      nir_builder_instr_insert(b, &store->instr);
   }
 }

@@ -1767,11 +1919,26 @@ ttn_add_output_stores(struct ttn_compile *c)
         store->const_index[0] = loc;
         store->src[0].reg.reg = c->output_regs[loc].reg;
         store->src[0].reg.base_offset = c->output_regs[loc].offset;
-         nir_instr_insert_after_cf_list(b->cf_node_list, &store->instr);
+         nir_builder_instr_insert(b, &store->instr);
      }
   }
 }

+static gl_shader_stage
+tgsi_processor_to_shader_stage(unsigned processor)
+{
+   switch (processor) {
+   case TGSI_PROCESSOR_FRAGMENT:  return MESA_SHADER_FRAGMENT;
+   case TGSI_PROCESSOR_VERTEX:    return MESA_SHADER_VERTEX;
+   case TGSI_PROCESSOR_GEOMETRY:  return MESA_SHADER_GEOMETRY;
+   case TGSI_PROCESSOR_TESS_CTRL: return MESA_SHADER_TESS_CTRL;
+   case TGSI_PROCESSOR_TESS_EVAL: return MESA_SHADER_TESS_EVAL;
+   case TGSI_PROCESSOR_COMPUTE:   return MESA_SHADER_COMPUTE;
+   default:
+      unreachable("invalid TGSI processor");
+   };
+}
+
 struct nir_shader *
 tgsi_to_nir(const void *tgsi_tokens,
            const nir_shader_compiler_options *options)
@@ -1783,17 +1950,19 @@ tgsi_to_nir(const void *tgsi_tokens,
   int ret;

   c = rzalloc(NULL, struct ttn_compile);
-   s = nir_shader_create(NULL, options);
+
+   tgsi_scan_shader(tgsi_tokens, &scan);
+   c->scan = &scan;
+
+   s = nir_shader_create(NULL, tgsi_processor_to_shader_stage(scan.processor),
+                         options);

   nir_function *func = nir_function_create(s, "main");
   nir_function_overload *overload = nir_function_overload_create(func);
   nir_function_impl *impl = nir_function_impl_create(overload);

   nir_builder_init(&c->build, impl);
-   nir_builder_insert_after_cf_list(&c->build, &impl->body);
-
-   tgsi_scan_shader(tgsi_tokens, &scan);
-   c->scan = &scan;
+   c->build.cursor = nir_after_cf_list(&impl->body);

   s->num_inputs = scan.file_max[TGSI_FILE_INPUT] + 1;
   s->num_uniforms = scan.const_file_max[0] + 1;
@@ -1809,10 +1978,10 @@ tgsi_to_nir(const void *tgsi_tokens,
   c->num_samp_types = scan.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
   c->samp_types = rzalloc_array(c, nir_alu_type, c->num_samp_types);

-   c->if_stack = rzalloc_array(c, struct exec_list *,
+   c->if_stack = rzalloc_array(c, nir_cursor,
                               (scan.opcode_count[TGSI_OPCODE_IF] +
                                scan.opcode_count[TGSI_OPCODE_UIF]) * 2);
-   c->loop_stack = rzalloc_array(c, struct exec_list *,
+   c->loop_stack = rzalloc_array(c, nir_cursor,
                                 scan.opcode_count[TGSI_OPCODE_BGNLOOP]);

   ret = tgsi_parse_init(&parser, tgsi_tokens);
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.h
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h
@@ -28,3 +28,9 @@ struct nir_shader_compiler_options *options;
 struct nir_shader *
 tgsi_to_nir(const void *tgsi_tokens,
            const struct nir_shader_compiler_options *options);
+void
+varying_slot_to_tgsi_semantic(gl_varying_slot slot,
+                              unsigned *semantic_name, unsigned *semantic_index);
+void
+frag_result_to_tgsi_semantic(gl_frag_result slot,
+                             unsigned *semantic_name, unsigned *semantic_index);
--- a/src/gallium/auxiliary/os/os_misc.c
+++ b/src/gallium/auxiliary/os/os_misc.c
@@ -96,11 +96,13 @@ os_log_message(const char *message)
 }


+#if !defined(PIPE_SUBSYSTEM_EMBEDDED)
 const char *
 os_get_option(const char *name)
 {
   return getenv(name);
 }
+#endif /* !PIPE_SUBSYSTEM_EMBEDDED */


 /**
--- a/src/gallium/auxiliary/os/os_process.c
+++ b/src/gallium/auxiliary/os/os_process.c
@@ -54,37 +54,48 @@ boolean
 os_get_process_name(char *procname, size_t size)
 {
   const char *name;
+
+   /* First, check if the GALLIUM_PROCESS_NAME env var is set to
+    * override the normal process name query.
+    */
+   name = os_get_option("GALLIUM_PROCESS_NAME");
+
+   if (!name) {
+      /* do normal query */
+
 #if defined(PIPE_SUBSYSTEM_WINDOWS_USER)
-   char szProcessPath[MAX_PATH];
-   char *lpProcessName;
-   char *lpProcessExt;
+      char szProcessPath[MAX_PATH];
+      char *lpProcessName;
+      char *lpProcessExt;

-   GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath));
+      GetModuleFileNameA(NULL, szProcessPath, Elements(szProcessPath));

-   lpProcessName = strrchr(szProcessPath, '\\');
-   lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;
+      lpProcessName = strrchr(szProcessPath, '\\');
+      lpProcessName = lpProcessName ? lpProcessName + 1 : szProcessPath;

-   lpProcessExt = strrchr(lpProcessName, '.');
-   if (lpProcessExt) {
-      *lpProcessExt = '\0';
-   }
+      lpProcessExt = strrchr(lpProcessName, '.');
+      if (lpProcessExt) {
+         *lpProcessExt = '\0';
+      }

-   name = lpProcessName;
+      name = lpProcessName;

 #elif defined(__GLIBC__) || defined(__CYGWIN__)
-   name = program_invocation_short_name;
+      name = program_invocation_short_name;
 #elif defined(PIPE_OS_BSD) || defined(PIPE_OS_APPLE)
-   /* *BSD and OS X */
-   name = getprogname();
+      /* *BSD and OS X */
+      name = getprogname();
 #elif defined(PIPE_OS_HAIKU)
-   image_info info;
-   get_image_info(B_CURRENT_TEAM, &info);
-   name = info.name;
+      image_info info;
+      get_image_info(B_CURRENT_TEAM, &info);
+      name = info.name;
 #else
 #warning unexpected platform in os_process.c
-   return FALSE;
+      return FALSE;
 #endif

+   }
+
   assert(size > 0);
   assert(procname);

--- a/src/gallium/auxiliary/pipe-loader/Android.mk
+++ b/src/gallium/auxiliary/pipe-loader/Android.mk
@@ -0,0 +1,49 @@
+# Mesa 3-D graphics library
+#
+# Copyright (C) 2015 Emil Velikov <emil.l.velikov@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included
+# in all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+
+# NOTE: Currently we build only a 'static' pipe-loader
+LOCAL_PATH := $(call my-dir)
+
+# get COMMON_SOURCES and DRM_SOURCES
+include $(LOCAL_PATH)/Makefile.sources
+
+include $(CLEAR_VARS)
+
+LOCAL_CFLAGS := \
+	-DHAVE_PIPE_LOADER_DRI \
+	-DDROP_PIPE_LOADER_MISC \
+	-DGALLIUM_STATIC_TARGETS
+
+LOCAL_SRC_FILES := $(COMMON_SOURCES)
+
+LOCAL_MODULE := libmesa_pipe_loader
+
+ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
+LOCAL_CFLAGS += -DHAVE_LIBDRM
+LOCAL_SRC_FILES += $(DRM_SOURCES)
+
+LOCAL_SHARED_LIBRARIES := libdrm
+LOCAL_STATIC_LIBRARIES := libmesa_loader
+endif
+
+include $(GALLIUM_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
--- a/src/gallium/auxiliary/pipe-loader/Makefile.am
+++ b/src/gallium/auxiliary/pipe-loader/Makefile.am
@@ -9,20 +9,40 @@ AM_CFLAGS = \
 	$(GALLIUM_CFLAGS) \
 	$(VISIBILITY_CFLAGS)

-noinst_LTLIBRARIES = libpipe_loader.la
+noinst_LTLIBRARIES = \
+	libpipe_loader_static.la \
+	libpipe_loader_dynamic.la

-libpipe_loader_la_SOURCES = \
+libpipe_loader_static_la_CFLAGS = \
+	$(AM_CFLAGS) \
+	-DGALLIUM_STATIC_TARGETS=1
+
+libpipe_loader_dynamic_la_CFLAGS = \
+	$(AM_CFLAGS) \
+	-DPIPE_SEARCH_DIR=\"$(libdir)/gallium-pipe\"
+
+libpipe_loader_static_la_SOURCES = \
 	$(COMMON_SOURCES)

-if HAVE_DRM_LOADER_GALLIUM
+libpipe_loader_dynamic_la_SOURCES = \
+	$(COMMON_SOURCES)
+
+if HAVE_LIBDRM
 AM_CFLAGS += \
 	$(LIBDRM_CFLAGS)

-libpipe_loader_la_SOURCES += \
+libpipe_loader_static_la_SOURCES += \
 	$(DRM_SOURCES)

-libpipe_loader_la_LIBADD = \
+libpipe_loader_dynamic_la_SOURCES += \
+	$(DRM_SOURCES)
+
+libpipe_loader_static_la_LIBADD = \
+	$(top_builddir)/src/loader/libloader.la
+
+libpipe_loader_dynamic_la_LIBADD = \
 	$(top_builddir)/src/loader/libloader.la

 endif

+EXTRA_DIST = SConscript
--- a/src/gallium/auxiliary/pipe-loader/SConscript
+++ b/src/gallium/auxiliary/pipe-loader/SConscript
@@ -0,0 +1,34 @@
+Import('*')
+
+env = env.Clone()
+
+env.MSVC2008Compat()
+
+env.Append(CPPPATH = [
+    '#/src/loader',
+    '#/src/gallium/winsys',
+])
+
+env.Append(CPPDEFINES = [
+    ('HAVE_PIPE_LOADER_DRI', '1'),
+    ('DROP_PIPE_LOADER_MISC', '1'),
+    ('GALLIUM_STATIC_TARGETS', '1'),
+])
+
+source = env.ParseSourceList('Makefile.sources', 'COMMON_SOURCES')
+
+#if HAVE_LIBDRM
+source += env.ParseSourceList('Makefile.sources', 'DRM_SOURCES')
+
+env.PkgUseModules('DRM')
+env.Append(LIBS = [libloader])
+#endif
+
+pipe_loader = env.ConvenienceLibrary(
+    target = 'pipe_loader',
+    source = source,
+)
+
+env.Alias('pipe_loader', pipe_loader)
+
+Export('pipe_loader')
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -35,7 +35,7 @@
 #define MODULE_PREFIX "pipe_"

 static int (*backends[])(struct pipe_loader_device **, int) = {
-#ifdef HAVE_PIPE_LOADER_DRM
+#ifdef HAVE_LIBDRM
   &pipe_loader_drm_probe,
 #endif
   &pipe_loader_sw_probe
@@ -69,10 +69,9 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
 }

 struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev,
-                          const char *library_paths)
+pipe_loader_create_screen(struct pipe_loader_device *dev)
 {
-   return dev->ops->create_screen(dev, library_paths);
+   return dev->ops->create_screen(dev);
 }

 struct util_dl_library *
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -82,13 +82,9 @@ pipe_loader_probe(struct pipe_loader_device **devs, int ndev);
 * Create a pipe_screen for the specified device.
 *
 * \param dev Device the screen will be created for.
- * \param library_paths Colon-separated list of filesystem paths that
- *                      will be used to look for the pipe driver
- *                      module that handles this device.
 */
 struct pipe_screen *
-pipe_loader_create_screen(struct pipe_loader_device *dev,
-                          const char *library_paths);
+pipe_loader_create_screen(struct pipe_loader_device *dev);

 /**
 * Query the configuration parameters for the specified device.
@@ -112,8 +108,6 @@ pipe_loader_configuration(struct pipe_loader_device *dev,
 void
 pipe_loader_release(struct pipe_loader_device **devs, int ndev);

-#ifdef HAVE_PIPE_LOADER_DRI
-
 /**
 * Initialize sw dri device give the drisw_loader_funcs.
 *
@@ -125,7 +119,15 @@ bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs,
                         struct drisw_loader_funcs *drisw_lf);

-#endif
+/**
+ * Initialize a kms backed sw device given an fd.
+ *
+ * This function is platform-specific.
+ *
+ * \sa pipe_loader_probe
+ */
+bool
+pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd);

 /**
 * Initialize a null sw device.
@@ -158,8 +160,6 @@ boolean
 pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
                             struct pipe_screen *screen);

-#ifdef HAVE_PIPE_LOADER_DRM
-
 /**
 * Get a list of known DRM devices.
 *
@@ -180,8 +180,6 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev);
 bool
 pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd);

-#endif
-
 #ifdef __cplusplus
 }
 #endif
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -36,6 +36,7 @@
 #include <unistd.h>

 #include "loader.h"
+#include "target-helpers/drm_helper_public.h"
 #include "state_tracker/drm_driver.h"
 #include "pipe_loader_priv.h"

@@ -50,13 +51,119 @@

 struct pipe_loader_drm_device {
   struct pipe_loader_device base;
+   const struct drm_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
   struct util_dl_library *lib;
+#endif
   int fd;
 };

 #define pipe_loader_drm_device(dev) ((struct pipe_loader_drm_device *)dev)

-static struct pipe_loader_ops pipe_loader_drm_ops;
+static const struct pipe_loader_ops pipe_loader_drm_ops;
+
+#ifdef GALLIUM_STATIC_TARGETS
+static const struct drm_conf_ret throttle_ret = {
+   DRM_CONF_INT,
+   {2},
+};
+
+static const struct drm_conf_ret share_fd_ret = {
+   DRM_CONF_BOOL,
+   {true},
+};
+
+static inline const struct drm_conf_ret *
+configuration_query(enum drm_conf conf)
+{
+   switch (conf) {
+   case DRM_CONF_THROTTLE:
+      return &throttle_ret;
+   case DRM_CONF_SHARE_FD:
+      return &share_fd_ret;
+   default:
+      break;
+   }
+   return NULL;
+}
+
+static const struct drm_driver_descriptor driver_descriptors[] = {
+    {
+        .name = "i915",
+        .driver_name = "i915",
+        .create_screen = pipe_i915_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "i965",
+        .driver_name = "i915",
+        .create_screen = pipe_ilo_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "nouveau",
+        .driver_name = "nouveau",
+        .create_screen = pipe_nouveau_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "r300",
+        .driver_name = "radeon",
+        .create_screen = pipe_r300_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "r600",
+        .driver_name = "radeon",
+        .create_screen = pipe_r600_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "radeonsi",
+        .driver_name = "radeon",
+        .create_screen = pipe_radeonsi_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "vmwgfx",
+        .driver_name = "vmwgfx",
+        .create_screen = pipe_vmwgfx_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "kgsl",
+        .driver_name = "freedreno",
+        .create_screen = pipe_freedreno_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "msm",
+        .driver_name = "freedreno",
+        .create_screen = pipe_freedreno_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "virtio_gpu",
+        .driver_name = "virtio-gpu",
+        .create_screen = pipe_virgl_create_screen,
+        .configuration = configuration_query,
+    },
+    {
+        .name = "vc4",
+        .driver_name = "vc4",
+        .create_screen = pipe_vc4_create_screen,
+        .configuration = configuration_query,
+    },
+#ifdef USE_VC4_SIMULATOR
+    {
+        .name = "i965",
+        .driver_name = "vc4",
+        .create_screen = pipe_vc4_create_screen,
+        .configuration = configuration_query,
+    },
+#endif
+};
+#endif

 bool
 pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
@@ -81,10 +188,36 @@ pipe_loader_drm_probe_fd(struct pipe_loader_device **dev, int fd)
   if (!ddev->base.driver_name)
      goto fail;

+#ifdef GALLIUM_STATIC_TARGETS
+   for (int i = 0; i < ARRAY_SIZE(driver_descriptors); i++) {
+      if (strcmp(driver_descriptors[i].name, ddev->base.driver_name) == 0) {
+         ddev->dd = &driver_descriptors[i];
+         break;
+      }
+   }
+   if (!ddev->dd)
+      goto fail;
+#else
+   ddev->lib = pipe_loader_find_module(&ddev->base, PIPE_SEARCH_DIR);
+   if (!ddev->lib)
+      goto fail;
+
+   ddev->dd = (const struct drm_driver_descriptor *)
+      util_dl_get_proc_address(ddev->lib, "driver_descriptor");
+
+   /* sanity check on the name */
+   if (!ddev->dd || strcmp(ddev->dd->name, ddev->base.driver_name) != 0)
+      goto fail;
+#endif
+
   *dev = &ddev->base;
   return true;

  fail:
+#ifndef GALLIUM_STATIC_TARGETS
+   if (ddev->lib)
+      util_dl_close(ddev->lib);
+#endif
   FREE(ddev);
   return false;
 }
@@ -105,8 +238,9 @@ pipe_loader_drm_probe(struct pipe_loader_device **devs, int ndev)

   for (i = DRM_RENDER_NODE_MIN_MINOR, j = 0;
        i <= DRM_RENDER_NODE_MAX_MINOR; i++) {
-      fd = open_drm_render_node_minor(i);
      struct pipe_loader_device *dev;
+
+      fd = open_drm_render_node_minor(i);
      if (fd < 0)
         continue;

@@ -132,8 +266,10 @@ pipe_loader_drm_release(struct pipe_loader_device **dev)
 {
   struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev);

+#ifndef GALLIUM_STATIC_TARGETS
   if (ddev->lib)
      util_dl_close(ddev->lib);
+#endif

   close(ddev->fd);
   FREE(ddev->base.driver_name);
@@ -146,47 +282,22 @@ pipe_loader_drm_configuration(struct pipe_loader_device *dev,
                              enum drm_conf conf)
 {
   struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
-   const struct drm_driver_descriptor *dd;

-   if (!ddev->lib)
+   if (!ddev->dd->configuration)
      return NULL;

-   dd = (const struct drm_driver_descriptor *)
-      util_dl_get_proc_address(ddev->lib, "driver_descriptor");
-
-   /* sanity check on the name */
-   if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0)
-      return NULL;
-
-   if (!dd->configuration)
-      return NULL;
-
-   return dd->configuration(conf);
+   return ddev->dd->configuration(conf);
 }

 static struct pipe_screen *
-pipe_loader_drm_create_screen(struct pipe_loader_device *dev,
-                              const char *library_paths)
+pipe_loader_drm_create_screen(struct pipe_loader_device *dev)
 {
   struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
-   const struct drm_driver_descriptor *dd;

-   if (!ddev->lib)
-      ddev->lib = pipe_loader_find_module(dev, library_paths);
-   if (!ddev->lib)
-      return NULL;
-
-   dd = (const struct drm_driver_descriptor *)
-      util_dl_get_proc_address(ddev->lib, "driver_descriptor");
-
-   /* sanity check on the name */
-   if (!dd || strcmp(dd->name, ddev->base.driver_name) != 0)
-      return NULL;
-
-   return dd->create_screen(ddev->fd);
+   return ddev->dd->create_screen(ddev->fd);
 }

-static struct pipe_loader_ops pipe_loader_drm_ops = {
+static const struct pipe_loader_ops pipe_loader_drm_ops = {
   .create_screen = pipe_loader_drm_create_screen,
   .configuration = pipe_loader_drm_configuration,
   .release = pipe_loader_drm_release
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_priv.h
@@ -31,8 +31,7 @@
 #include "pipe_loader.h"

 struct pipe_loader_ops {
-   struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev,
-                                        const char *library_paths);
+   struct pipe_screen *(*create_screen)(struct pipe_loader_device *dev);

   const struct drm_conf_ret *(*configuration)(struct pipe_loader_device *dev,
                                               enum drm_conf conf);
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -30,45 +30,160 @@
 #include "util/u_memory.h"
 #include "util/u_dl.h"
 #include "sw/dri/dri_sw_winsys.h"
+#include "sw/kms-dri/kms_dri_sw_winsys.h"
 #include "sw/null/null_sw_winsys.h"
 #include "sw/wrapper/wrapper_sw_winsys.h"
 #include "target-helpers/inline_sw_helper.h"
 #include "state_tracker/drisw_api.h"
+#include "state_tracker/sw_driver.h"

 struct pipe_loader_sw_device {
   struct pipe_loader_device base;
+   const struct sw_driver_descriptor *dd;
+#ifndef GALLIUM_STATIC_TARGETS
   struct util_dl_library *lib;
+#endif
   struct sw_winsys *ws;
 };

 #define pipe_loader_sw_device(dev) ((struct pipe_loader_sw_device *)dev)

-static struct pipe_loader_ops pipe_loader_sw_ops;
+static const struct pipe_loader_ops pipe_loader_sw_ops;

-static struct sw_winsys *(*backends[])() = {
-   null_sw_create
+#ifdef GALLIUM_STATIC_TARGETS
+static const struct sw_driver_descriptor driver_descriptors = {
+   .create_screen = sw_screen_create,
+   .winsys = {
+#ifdef HAVE_PIPE_LOADER_DRI
+      {
+         .name = "dri",
+         .create_winsys = dri_create_sw_winsys,
+      },
+#endif
+#ifdef HAVE_PIPE_LOADER_KMS
+      {
+         .name = "kms_dri",
+         .create_winsys = kms_dri_create_winsys,
+      },
+#endif
+/**
+ * XXX: Do not include these two for non autotools builds.
+ * They don't have neither opencl nor nine, where these are used.
+ */
+#ifndef DROP_PIPE_LOADER_MISC
+      {
+         .name = "null",
+         .create_winsys = null_sw_create,
+      },
+      {
+         .name = "wrapped",
+         .create_winsys = wrapper_sw_winsys_wrap_pipe_screen,
+      },
+#endif
+      { 0 },
+   }
 };
+#endif
+
+static bool
+pipe_loader_sw_probe_init_common(struct pipe_loader_sw_device *sdev)
+{
+   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
+   sdev->base.driver_name = "swrast";
+   sdev->base.ops = &pipe_loader_sw_ops;
+
+#ifdef GALLIUM_STATIC_TARGETS
+   sdev->dd = &driver_descriptors;
+   if (!sdev->dd)
+      return false;
+#else
+   sdev->lib = pipe_loader_find_module(&sdev->base, PIPE_SEARCH_DIR);
+   if (!sdev->lib)
+      return false;
+
+   sdev->dd = (const struct sw_driver_descriptor *)
+      util_dl_get_proc_address(sdev->lib, "swrast_driver_descriptor");
+
+   if (!sdev->dd){
+      util_dl_close(sdev->lib);
+      sdev->lib = NULL;
+      return false;
+   }
+#endif
+
+   return true;
+}
+
+static void
+pipe_loader_sw_probe_teardown_common(struct pipe_loader_sw_device *sdev)
+{
+#ifndef GALLIUM_STATIC_TARGETS
+   if (sdev->lib)
+      util_dl_close(sdev->lib);
+#endif
+}

 #ifdef HAVE_PIPE_LOADER_DRI
 bool
 pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_funcs *drisw_lf)
 {
   struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;

   if (!sdev)
      return false;

-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
-   sdev->ws = dri_create_sw_winsys(drisw_lf);
-   if (!sdev->ws) {
-      FREE(sdev);
-      return false;
-   }
-   *devs = &sdev->base;
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;

+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf);
+         break;
+      }
+   }
+   if (!sdev->ws)
+      goto fail;
+
+   *devs = &sdev->base;
   return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
+}
+#endif
+
+#ifdef HAVE_PIPE_LOADER_KMS
+bool
+pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
+{
+   struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;
+
+   if (!sdev)
+      return false;
+
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;
+
+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys(fd);
+         break;
+      }
+   }
+   if (!sdev->ws)
+      goto fail;
+
+   *devs = &sdev->base;
+   return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
 }
 #endif

@@ -76,38 +191,40 @@ bool
 pipe_loader_sw_probe_null(struct pipe_loader_device **devs)
 {
   struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;

   if (!sdev)
      return false;

-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
-   sdev->ws = null_sw_create();
-   if (!sdev->ws) {
-      FREE(sdev);
-      return false;
-   }
-   *devs = &sdev->base;
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;

+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "null") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys();
+         break;
+      }
+   }
+   if (!sdev->ws)
+      goto fail;
+
+   *devs = &sdev->base;
   return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
 }

 int
 pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev)
 {
-   int i;
+   int i = 1;

-   for (i = 0; i < Elements(backends); i++) {
-      if (i < ndev) {
-         struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
-	 /* TODO: handle CALLOC_STRUCT failure */
-
-         sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-         sdev->base.driver_name = "swrast";
-         sdev->base.ops = &pipe_loader_sw_ops;
-         sdev->ws = backends[i]();
-         devs[i] = &sdev->base;
+   if (i < ndev) {
+      if (!pipe_loader_sw_probe_null(devs)) {
+         i--;
      }
   }

@@ -119,21 +236,30 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
                             struct pipe_screen *screen)
 {
   struct pipe_loader_sw_device *sdev = CALLOC_STRUCT(pipe_loader_sw_device);
+   int i;

   if (!sdev)
      return false;

-   sdev->base.type = PIPE_LOADER_DEVICE_SOFTWARE;
-   sdev->base.driver_name = "swrast";
-   sdev->base.ops = &pipe_loader_sw_ops;
-   sdev->ws = wrapper_sw_winsys_wrap_pipe_screen(screen);
+   if (!pipe_loader_sw_probe_init_common(sdev))
+      goto fail;

-   if (!sdev->ws) {
-      FREE(sdev);
-      return false;
+   for (i = 0; sdev->dd->winsys; i++) {
+      if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) {
+         sdev->ws = sdev->dd->winsys[i].create_winsys(screen);
+         break;
+      }
   }
+   if (!sdev->ws)
+      goto fail;
+
   *dev = &sdev->base;
   return true;
+
+fail:
+   pipe_loader_sw_probe_teardown_common(sdev);
+   FREE(sdev);
+   return false;
 }

 static void
@@ -141,8 +267,10 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
 {
   struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev);

+#ifndef GALLIUM_STATIC_TARGETS
   if (sdev->lib)
      util_dl_close(sdev->lib);
+#endif

   FREE(sdev);
   *dev = NULL;
@@ -156,28 +284,19 @@ pipe_loader_sw_configuration(struct pipe_loader_device *dev,
 }

 static struct pipe_screen *
-pipe_loader_sw_create_screen(struct pipe_loader_device *dev,
-                             const char *library_paths)
+pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
 {
   struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(dev);
-   struct pipe_screen *(*init)(struct sw_winsys *);
+   struct pipe_screen *screen;

-   if (!sdev->lib)
-      sdev->lib = pipe_loader_find_module(dev, library_paths);
-   if (!sdev->lib)
-      return NULL;
+   screen = sdev->dd->create_screen(sdev->ws);
+   if (!screen)
+      sdev->ws->destroy(sdev->ws);

-   init = (void *)util_dl_get_proc_address(sdev->lib, "swrast_create_screen");
-   if (!init){
-      util_dl_close(sdev->lib);
-      sdev->lib = NULL;
-      return NULL;
-   }
-
-   return init(sdev->ws);
+   return screen;
 }

-static struct pipe_loader_ops pipe_loader_sw_ops = {
+static const struct pipe_loader_ops pipe_loader_sw_ops = {
   .create_screen = pipe_loader_sw_create_screen,
   .configuration = pipe_loader_sw_configuration,
   .release = pipe_loader_sw_release
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -0,0 +1,275 @@
+#ifndef DRM_HELPER_H
+#define DRM_HELPER_H
+
+#include <stdio.h>
+#include "target-helpers/inline_debug_helper.h"
+#include "target-helpers/drm_helper_public.h"
+
+#ifdef GALLIUM_I915
+#include "i915/drm/i915_drm_public.h"
+#include "i915/i915_public.h"
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd)
+{
+   struct i915_winsys *iws;
+   struct pipe_screen *screen;
+
+   iws = i915_drm_winsys_create(fd);
+   if (!iws)
+      return NULL;
+
+   screen = i915_screen_create(iws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd)
+{
+   fprintf(stderr, "i915g: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_ILO
+#include "intel/drm/intel_drm_public.h"
+#include "ilo/ilo_public.h"
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd)
+{
+   struct intel_winsys *iws;
+   struct pipe_screen *screen;
+
+   iws = intel_winsys_create_for_fd(fd);
+   if (!iws)
+      return NULL;
+
+   screen = ilo_screen_create(iws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd)
+{
+   fprintf(stderr, "ilo: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_NOUVEAU
+#include "nouveau/drm/nouveau_drm_public.h"
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = nouveau_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd)
+{
+   fprintf(stderr, "nouveau: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_R300
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r300/r300_public.h"
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd)
+{
+   struct radeon_winsys *rw;
+
+   rw = radeon_drm_winsys_create(fd, r300_screen_create);
+   return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd)
+{
+   fprintf(stderr, "r300: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_R600
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "r600/r600_public.h"
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd)
+{
+   struct radeon_winsys *rw;
+
+   rw = radeon_drm_winsys_create(fd, r600_screen_create);
+   return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd)
+{
+   fprintf(stderr, "r600: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_RADEONSI
+#include "radeon/radeon_winsys.h"
+#include "radeon/drm/radeon_drm_public.h"
+#include "amdgpu/drm/amdgpu_public.h"
+#include "radeonsi/si_public.h"
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd)
+{
+   struct radeon_winsys *rw;
+
+   /* First, try amdgpu. */
+   rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
+
+   if (!rw)
+      rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
+
+   return rw ? debug_screen_wrap(rw->screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd)
+{
+   fprintf(stderr, "radeonsi: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VMWGFX
+#include "svga/drm/svga_drm_public.h"
+#include "svga/svga_public.h"
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd)
+{
+   struct svga_winsys_screen *sws;
+   struct pipe_screen *screen;
+
+   sws = svga_drm_winsys_screen_create(fd);
+   if (!sws)
+      return NULL;
+
+   screen = svga_screen_create(sws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd)
+{
+   fprintf(stderr, "svga: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_FREEDRENO
+#include "freedreno/drm/freedreno_drm_public.h"
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = fd_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd)
+{
+   fprintf(stderr, "freedreno: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VIRGL
+#include "virgl/drm/virgl_drm_public.h"
+#include "virgl/virgl_public.h"
+
+static struct pipe_screen *
+pipe_virgl_create_screen(int fd)
+{
+   struct virgl_winsys *vws;
+   struct pipe_screen *screen;
+
+   vws = virgl_drm_winsys_create(fd);
+   if (!vws)
+      return NULL;
+
+   screen = virgl_create_screen(vws);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_virgl_create_screen(int fd)
+{
+   fprintf(stderr, "virgl: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+#ifdef GALLIUM_VC4
+#include "vc4/drm/vc4_drm_public.h"
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd)
+{
+   struct pipe_screen *screen;
+
+   screen = vc4_drm_screen_create(fd);
+   return screen ? debug_screen_wrap(screen) : NULL;
+}
+
+#else
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd)
+{
+   fprintf(stderr, "vc4: driver missing\n");
+   return NULL;
+}
+
+#endif
+
+
+#endif /* DRM_HELPER_H */
--- a/src/gallium/auxiliary/target-helpers/drm_helper_public.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper_public.h
@@ -0,0 +1,37 @@
+#ifndef _DRM_HELPER_PUBLIC_H
+#define _DRM_HELPER_PUBLIC_H
+
+
+struct pipe_screen;
+
+struct pipe_screen *
+pipe_i915_create_screen(int fd);
+
+struct pipe_screen *
+pipe_ilo_create_screen(int fd);
+
+struct pipe_screen *
+pipe_nouveau_create_screen(int fd);
+
+struct pipe_screen *
+pipe_r300_create_screen(int fd);
+
+struct pipe_screen *
+pipe_r600_create_screen(int fd);
+
+struct pipe_screen *
+pipe_radeonsi_create_screen(int fd);
+
+struct pipe_screen *
+pipe_vmwgfx_create_screen(int fd);
+
+struct pipe_screen *
+pipe_freedreno_create_screen(int fd);
+
+struct pipe_screen *
+pipe_virgl_create_screen(int fd);
+
+struct pipe_screen *
+pipe_vc4_create_screen(int fd);
+
+#endif /* _DRM_HELPER_PUBLIC_H */
--- a/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_debug_helper.h
@@ -11,6 +11,10 @@
 * one or more debug driver: rbug, trace.
 */

+#ifdef GALLIUM_DDEBUG
+#include "ddebug/dd_public.h"
+#endif
+
 #ifdef GALLIUM_TRACE
 #include "trace/tr_public.h"
 #endif
@@ -30,6 +34,10 @@
 static inline struct pipe_screen *
 debug_screen_wrap(struct pipe_screen *screen)
 {
+#if defined(GALLIUM_DDEBUG)
+   screen = ddebug_screen_create(screen);
+#endif
+
 #if defined(GALLIUM_RBUG)
   screen = rbug_screen_create(screen);
 #endif
--- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h
@@ -1,489 +0,0 @@
-#ifndef INLINE_DRM_HELPER_H
-#define INLINE_DRM_HELPER_H
-
-#include "state_tracker/drm_driver.h"
-#include "target-helpers/inline_debug_helper.h"
-#include "loader.h"
-#if defined(DRI_TARGET)
-#include "dri_screen.h"
-#endif
-
-#if GALLIUM_SOFTPIPE
-#include "target-helpers/inline_sw_helper.h"
-#include "sw/kms-dri/kms_dri_sw_winsys.h"
-#endif
-
-#if GALLIUM_I915
-#include "i915/drm/i915_drm_public.h"
-#include "i915/i915_public.h"
-#endif
-
-#if GALLIUM_ILO
-#include "intel/drm/intel_drm_public.h"
-#include "ilo/ilo_public.h"
-#endif
-
-#if GALLIUM_NOUVEAU
-#include "nouveau/drm/nouveau_drm_public.h"
-#endif
-
-#if GALLIUM_R300
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r300/r300_public.h"
-#endif
-
-#if GALLIUM_R600
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "r600/r600_public.h"
-#endif
-
-#if GALLIUM_RADEONSI
-#include "radeon/radeon_winsys.h"
-#include "radeon/drm/radeon_drm_public.h"
-#include "amdgpu/drm/amdgpu_public.h"
-#include "radeonsi/si_public.h"
-#endif
-
-#if GALLIUM_VMWGFX
-#include "svga/drm/svga_drm_public.h"
-#include "svga/svga_public.h"
-#endif
-
-#if GALLIUM_FREEDRENO
-#include "freedreno/drm/freedreno_drm_public.h"
-#endif
-
-#if GALLIUM_VC4
-#include "vc4/drm/vc4_drm_public.h"
-#endif
-
-static char* driver_name = NULL;
-
-/* XXX: We need to teardown the winsys if *screen_create() fails. */
-
-#if defined(GALLIUM_SOFTPIPE)
-#if defined(DRI_TARGET)
-#if defined(HAVE_LIBDRM)
-
-const __DRIextension **__driDriverGetExtensions_kms_swrast(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void)
-{
-   globalDriverAPI = &dri_kms_driver_api;
-   return galliumdrm_driver_extensions;
-}
-
-struct pipe_screen *
-kms_swrast_create_screen(int fd)
-{
-   struct sw_winsys *sws;
-   struct pipe_screen *screen;
-
-   sws = kms_dri_create_winsys(fd);
-   if (!sws)
-      return NULL;
-
-   screen = sw_screen_create(sws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-#endif
-#endif
-
-#if defined(GALLIUM_I915)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_i915(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i915(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_i915_create_screen(int fd)
-{
-   struct i915_winsys *iws;
-   struct pipe_screen *screen;
-
-   iws = i915_drm_winsys_create(fd);
-   if (!iws)
-      return NULL;
-
-   screen = i915_screen_create(iws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_ILO)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_i965(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_ilo_create_screen(int fd)
-{
-   struct intel_winsys *iws;
-   struct pipe_screen *screen;
-
-   iws = intel_winsys_create_for_fd(fd);
-   if (!iws)
-      return NULL;
-
-   screen = ilo_screen_create(iws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_NOUVEAU)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_nouveau(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_nouveau(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_nouveau_create_screen(int fd)
-{
-   struct pipe_screen *screen;
-
-   screen = nouveau_drm_screen_create(fd);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_R300)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_r300(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_r300(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_r300_create_screen(int fd)
-{
-   struct radeon_winsys *rw;
-
-   rw = radeon_drm_winsys_create(fd, r300_screen_create);
-   return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_R600)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_r600(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_r600(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_r600_create_screen(int fd)
-{
-   struct radeon_winsys *rw;
-
-   rw = radeon_drm_winsys_create(fd, r600_screen_create);
-   return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_RADEONSI)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_radeonsi(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_radeonsi(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_radeonsi_create_screen(int fd)
-{
-   struct radeon_winsys *rw;
-
-   /* First, try amdgpu. */
-   rw = amdgpu_winsys_create(fd, radeonsi_screen_create);
-
-   if (!rw)
-      rw = radeon_drm_winsys_create(fd, radeonsi_screen_create);
-
-   return rw ? debug_screen_wrap(rw->screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VMWGFX)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_vmwgfx(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_vmwgfx(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_vmwgfx_create_screen(int fd)
-{
-   struct svga_winsys_screen *sws;
-   struct pipe_screen *screen;
-
-   sws = svga_drm_winsys_screen_create(fd);
-   if (!sws)
-      return NULL;
-
-   screen = svga_screen_create(sws);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_FREEDRENO)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_msm(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_msm(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-
-const __DRIextension **__driDriverGetExtensions_kgsl(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_kgsl(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-static struct pipe_screen *
-pipe_freedreno_create_screen(int fd)
-{
-   struct pipe_screen *screen;
-
-   screen = fd_drm_screen_create(fd);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-#if defined(GALLIUM_VC4)
-#if defined(DRI_TARGET)
-
-const __DRIextension **__driDriverGetExtensions_vc4(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_vc4(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-
-#if defined(USE_VC4_SIMULATOR)
-const __DRIextension **__driDriverGetExtensions_i965(void);
-
-/**
- * When building using the simulator (on x86), we advertise ourselves as the
- * i965 driver so that you can just make a directory with a link from
- * i965_dri.so to the built vc4_dri.so, and point LIBGL_DRIVERS_PATH to that
- * on your i965-using host to run the driver under simulation.
- *
- * This is, of course, incompatible with building with the ilo driver, but you
- * shouldn't be building that anyway.
- */
-PUBLIC const __DRIextension **__driDriverGetExtensions_i965(void)
-{
-   globalDriverAPI = &galliumdrm_driver_api;
-   return galliumdrm_driver_extensions;
-}
-#endif
-
-#endif
-
-static struct pipe_screen *
-pipe_vc4_create_screen(int fd)
-{
-   struct pipe_screen *screen;
-
-   screen = vc4_drm_screen_create(fd);
-   return screen ? debug_screen_wrap(screen) : NULL;
-}
-#endif
-
-inline struct pipe_screen *
-dd_create_screen(int fd)
-{
-   driver_name = loader_get_driver_for_fd(fd, _LOADER_GALLIUM);
-   if (!driver_name)
-      return NULL;
-
-#if defined(GALLIUM_I915)
-   if (strcmp(driver_name, "i915") == 0)
-      return pipe_i915_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_ILO)
-   if (strcmp(driver_name, "i965") == 0)
-      return pipe_ilo_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_NOUVEAU)
-   if (strcmp(driver_name, "nouveau") == 0)
-      return pipe_nouveau_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_R300)
-   if (strcmp(driver_name, "r300") == 0)
-      return pipe_r300_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_R600)
-   if (strcmp(driver_name, "r600") == 0)
-      return pipe_r600_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_RADEONSI)
-   if (strcmp(driver_name, "radeonsi") == 0)
-      return pipe_radeonsi_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_VMWGFX)
-   if (strcmp(driver_name, "vmwgfx") == 0)
-      return pipe_vmwgfx_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_FREEDRENO)
-   if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0))
-      return pipe_freedreno_create_screen(fd);
-   else
-#endif
-#if defined(GALLIUM_VC4)
-   if (strcmp(driver_name, "vc4") == 0)
-      return pipe_vc4_create_screen(fd);
-   else
-#if defined(USE_VC4_SIMULATOR)
-   if (strcmp(driver_name, "i965") == 0)
-      return pipe_vc4_create_screen(fd);
-   else
-#endif
-#endif
-      return NULL;
-}
-
-inline const char *
-dd_driver_name(void)
-{
-   return driver_name;
-}
-
-static const struct drm_conf_ret throttle_ret = {
-   DRM_CONF_INT,
-   {2},
-};
-
-static const struct drm_conf_ret share_fd_ret = {
-   DRM_CONF_BOOL,
-   {true},
-};
-
-static inline const struct drm_conf_ret *
-configuration_query(enum drm_conf conf)
-{
-   switch (conf) {
-   case DRM_CONF_THROTTLE:
-      return &throttle_ret;
-   case DRM_CONF_SHARE_FD:
-      return &share_fd_ret;
-   default:
-      break;
-   }
-   return NULL;
-}
-
-inline const struct drm_conf_ret *
-dd_configuration(enum drm_conf conf)
-{
-   if (!driver_name)
-      return NULL;
-
-#if defined(GALLIUM_I915)
-   if (strcmp(driver_name, "i915") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_ILO)
-   if (strcmp(driver_name, "i965") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_NOUVEAU)
-   if (strcmp(driver_name, "nouveau") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_R300)
-   if (strcmp(driver_name, "r300") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_R600)
-   if (strcmp(driver_name, "r600") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_RADEONSI)
-   if (strcmp(driver_name, "radeonsi") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_VMWGFX)
-   if (strcmp(driver_name, "vmwgfx") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_FREEDRENO)
-   if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0))
-      return configuration_query(conf);
-   else
-#endif
-#if defined(GALLIUM_VC4)
-   if (strcmp(driver_name, "vc4") == 0)
-      return configuration_query(conf);
-   else
-#if defined(USE_VC4_SIMULATOR)
-   if (strcmp(driver_name, "i965") == 0)
-      return configuration_query(conf);
-   else
-#endif
-#endif
-      return NULL;
-}
-#endif /* INLINE_DRM_HELPER_H */
--- a/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
+++ b/src/gallium/auxiliary/target-helpers/inline_sw_helper.h
@@ -19,6 +19,10 @@
 #include "llvmpipe/lp_public.h"
 #endif

+#ifdef GALLIUM_VIRGL
+#include "virgl/virgl_public.h"
+#include "virgl/vtest/virgl_vtest_public.h"
+#endif

 static inline struct pipe_screen *
 sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
@@ -30,6 +34,14 @@ sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
      screen = llvmpipe_create_screen(winsys);
 #endif

+#if defined(GALLIUM_VIRGL)
+   if (screen == NULL && strcmp(driver, "virpipe") == 0) {
+      struct virgl_winsys *vws;
+      vws = virgl_vtest_winsys_wrap(winsys);
+      screen = virgl_create_screen(vws);
+   }
+#endif
+
 #if defined(GALLIUM_SOFTPIPE)
   if (screen == NULL)
      screen = softpipe_create_screen(winsys);
@@ -57,69 +69,4 @@ sw_screen_create(struct sw_winsys *winsys)
   return sw_screen_create_named(winsys, driver);
 }

-#if defined(GALLIUM_SOFTPIPE)
-#if defined(DRI_TARGET)
-#include "target-helpers/inline_debug_helper.h"
-#include "sw/dri/dri_sw_winsys.h"
-#include "dri_screen.h"
-
-const __DRIextension **__driDriverGetExtensions_swrast(void);
-
-PUBLIC const __DRIextension **__driDriverGetExtensions_swrast(void)
-{
-   globalDriverAPI = &galliumsw_driver_api;
-   return galliumsw_driver_extensions;
-}
-
-inline struct pipe_screen *
-drisw_create_screen(struct drisw_loader_funcs *lf)
-{
-   struct sw_winsys *winsys = NULL;
-   struct pipe_screen *screen = NULL;
-
-   winsys = dri_create_sw_winsys(lf);
-   if (winsys == NULL)
-      return NULL;
-
-   screen = sw_screen_create(winsys);
-   if (screen == NULL) {
-      winsys->destroy(winsys);
-      return NULL;
-   }
-
-   screen = debug_screen_wrap(screen);
-   return screen;
-}
-#endif // DRI_TARGET
-
-#if defined(NINE_TARGET)
-#include "sw/wrapper/wrapper_sw_winsys.h"
-#include "target-helpers/inline_debug_helper.h"
-
-extern struct pipe_screen *ninesw_create_screen(struct pipe_screen *screen);
-
-inline struct pipe_screen *
-ninesw_create_screen(struct pipe_screen *pscreen)
-{
-   struct sw_winsys *winsys = NULL;
-   struct pipe_screen *screen = NULL;
-
-   winsys = wrapper_sw_winsys_wrap_pipe_screen(pscreen);
-   if (winsys == NULL)
-      return NULL;
-
-   screen = sw_screen_create(winsys);
-   if (screen == NULL) {
-      winsys->destroy(winsys);
-      return NULL;
-   }
-
-   screen = debug_screen_wrap(screen);
-   return screen;
-}
-#endif // NINE_TARGET
-
-#endif // GALLIUM_SOFTPIPE
-
-
 #endif
--- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.c
@@ -0,0 +1,309 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the fragment shader to support anti-aliasing points.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_aa_point.h"
+#include "tgsi_transform.h"
+
+#define INVALID_INDEX 9999
+
+struct aa_transform_context
+{
+   struct tgsi_transform_context base;
+
+   unsigned tmp;           // temp register
+   unsigned color_out;     // frag color out register
+   unsigned color_tmp;     // frag color temp register
+   unsigned num_tmp;       // number of temp registers
+   unsigned num_imm;       // number of immediates
+   unsigned num_input;     // number of inputs
+   unsigned aa_point_coord_index;
+};
+
+static inline struct aa_transform_context *
+aa_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct aa_transform_context *) ctx;
+}
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+aa_decl(struct tgsi_transform_context *ctx,
+              struct tgsi_full_declaration *decl)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_OUTPUT &&
+       decl->Semantic.Name == TGSI_SEMANTIC_COLOR &&
+       decl->Semantic.Index == 0) {
+         ts->color_out = decl->Range.First;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      ts->num_input++;
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+aa_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+aa_prolog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned tmp0;
+   unsigned texIn;
+   unsigned imm;
+
+   /* Declare two temporary registers, one for temporary and
+    * one for color.
+    */
+   ts->tmp = ts->num_tmp++;
+   ts->color_tmp = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->tmp, ts->color_tmp);
+
+   /* Declare new generic input/texcoord */
+   texIn = ts->num_input++;
+   tgsi_transform_input_decl(ctx, texIn, TGSI_SEMANTIC_GENERIC,
+                             ts->aa_point_coord_index, TGSI_INTERPOLATE_LINEAR);
+
+   /* Declare extra immediates */
+   imm = ts->num_imm++;
+   tgsi_transform_immediate_decl(ctx, 0.5, 0.5, 0.45, 1.0);
+
+   /*
+    * Emit code to compute fragment coverage.
+    * The point always has radius 0.5.  The threshold value will be a
+    * value less than, but close to 0.5, such as 0.45.
+    * We compute a coverage factor from the distance and threshold.
+    * If the coverage is negative, the fragment is outside the circle and
+    * it's discarded.
+    * If the coverage is >= 1, the fragment is fully inside the threshold
+    * distance.  We limit/clamp the coverage to 1.
+    * Otherwise, the fragment is between the threshold value and 0.5 and we
+    * compute a coverage value in [0,1].
+    *
+    * Input reg (texIn) usage:
+    *  texIn.x = x point coord in [0,1]
+    *  texIn.y = y point coord in [0,1]
+    *  texIn.z = "k" the smoothing threshold distance
+    *  texIn.w = unused
+    *
+    * Temp reg (t0) usage:
+    *  t0.x = distance of fragment from center point
+    *  t0.y = boolean, is t0.x > 0.5, also misc temp usage
+    *  t0.z = temporary for computing 1/(0.5-k) value
+    *  t0.w = final coverage value
+    */
+
+   tmp0 = ts->tmp;
+
+   /* SUB t0.xy, texIn, (0.5, 0,5) */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_SUB,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_XY,
+                           TGSI_FILE_INPUT, texIn,
+                           TGSI_FILE_IMMEDIATE, imm);
+
+   /* DP2 t0.x, t0.xy, t0.xy;  # t0.x = x^2 + y^2 */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_DP2,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* SQRT t0.x, t0.x */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_SQRT,
+                           TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_X,
+                           TGSI_FILE_TEMPORARY, tmp0);
+
+   /* compute coverage factor = (0.5-d)/(0.5-k) */
+
+   /* SUB t0.w, 0.5, texIn.z;  # t0.w = 0.5-k */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_INPUT, texIn, TGSI_SWIZZLE_Z);
+
+   /* SUB t0.y, 0.5, t0.x;  # t0.y = 0.5-d */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_Y,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_X,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_X);
+
+   /* DIV t0.w, t0.y, t0.w;  # coverage = (0.5-d)/(0.5-k) */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_Y,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W);
+
+   /* If the coverage value is negative, it means the fragment is outside
+    * the point's circular boundary.  Kill it.
+    */
+   /* KILL_IF tmp0.w;  # if tmp0.w < 0 KILL */
+   tgsi_transform_kill_inst(ctx, TGSI_FILE_TEMPORARY, tmp0,
+                            TGSI_SWIZZLE_W, FALSE);
+
+   /* If the distance is less than the threshold, the coverage/alpha value
+    * will be greater than one.  Clamp to one here.
+    */
+   /* MIN tmp0.w, tmp0.w, 1.0 */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_WRITEMASK_W,
+                               TGSI_FILE_TEMPORARY, tmp0, TGSI_SWIZZLE_W,
+                               TGSI_FILE_IMMEDIATE, imm, TGSI_SWIZZLE_W);
+}
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+aa_inst(struct tgsi_transform_context *ctx,
+        struct tgsi_full_instruction *inst)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+   unsigned i;
+
+   /* Look for writes to color output reg and replace it with
+    * color temp reg.
+    */
+   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+      struct tgsi_full_dst_register *dst = &inst->Dst[i];
+      if (dst->Register.File == TGSI_FILE_OUTPUT &&
+          dst->Register.Index == ts->color_out) {
+         dst->Register.File = TGSI_FILE_TEMPORARY;
+         dst->Register.Index = ts->color_tmp;
+      }
+   }
+
+   ctx->emit_instruction(ctx, inst);
+}
+
+/**
+ * TGSI transform epilog callback.
+ */
+static void
+aa_epilog(struct tgsi_transform_context *ctx)
+{
+   struct aa_transform_context *ts = aa_transform_context(ctx);
+
+   /* add alpha modulation code at tail of program */
+   assert(ts->color_out != INVALID_INDEX);
+   assert(ts->color_tmp != INVALID_INDEX);
+
+   /* MOV output.color.xyz colorTmp */
+   tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_XYZ,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp);
+
+   /* MUL output.color.w colorTmp.w tmp0.w */
+   tgsi_transform_op2_inst(ctx, TGSI_OPCODE_MUL,
+                           TGSI_FILE_OUTPUT, ts->color_out,
+                           TGSI_WRITEMASK_W,
+                           TGSI_FILE_TEMPORARY, ts->color_tmp,
+                           TGSI_FILE_TEMPORARY, ts->tmp);
+}
+
+/**
+ * TGSI utility to transform a fragment shader to support antialiasing point.
+ *
+ * This utility accepts two inputs:
+ *\param tokens_in  -- the original token string of the shader
+ *\param aa_point_coord_index -- the semantic index of the generic register
+ *                            that contains the point sprite texture coord
+ *
+ * For each fragment in the point, we compute the distance of the fragment
+ * from the point center using the point sprite texture coordinates.
+ * If the distance is greater than 0.5, we'll discard the fragment.
+ * Otherwise, we'll compute a coverage value which approximates how much
+ * of the fragment is inside the bounding circle of the point. If the distance
+ * is less than 'k', the coverage is 1. Else, the coverage is between 0 and 1.
+ * The final fragment color's alpha channel is then modulated by the coverage
+ * value.
+ */
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index)
+{
+   struct aa_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = aa_decl;
+   transform.base.transform_instruction = aa_inst;
+   transform.base.transform_immediate = aa_immediate;
+   transform.base.prolog = aa_prolog;
+   transform.base.epilog = aa_epilog;
+
+   transform.tmp = INVALID_INDEX;
+   transform.color_out = INVALID_INDEX;
+   transform.color_tmp = INVALID_INDEX;
+
+   assert(aa_point_coord_index != -1);
+   transform.aa_point_coord_index = (unsigned)aa_point_coord_index;
+
+   transform.num_tmp = 0;
+   transform.num_imm = 0;
+   transform.num_input = 0;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   return new_tokens;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_aa_point.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef TGSI_AA_POINT_H
+#define TGSI_AA_POINT_H
+
+struct tgsi_token;
+
+struct tgsi_token *
+tgsi_add_aa_point(const struct tgsi_token *tokens_in,
+                  const int aa_point_coord_index);
+
+#endif /* TGSI_AA_POINT_H */
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -29,6 +29,7 @@
 #include "util/u_string.h"
 #include "util/u_math.h"
 #include "util/u_memory.h"
+#include "util/u_math.h"
 #include "tgsi_dump.h"
 #include "tgsi_info.h"
 #include "tgsi_iterate.h"
@@ -43,6 +44,8 @@ struct dump_ctx
 {
   struct tgsi_iterate_context iter;

+   boolean dump_float_as_hex;
+
   uint instno;
   uint immno;
   int indent;
@@ -88,6 +91,7 @@ dump_enum(
 #define SID(I)          ctx->dump_printf( ctx, "%d", I )
 #define FLT(F)          ctx->dump_printf( ctx, "%10.4f", F )
 #define DBL(D)          ctx->dump_printf( ctx, "%10.8f", D )
+#define HFLT(F)         ctx->dump_printf( ctx, "0x%08x", fui((F)) )
 #define ENM(E,ENUMS)    dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) )

 const char *
@@ -251,7 +255,10 @@ dump_imm_data(struct tgsi_iterate_context *iter,
         break;
      }
      case TGSI_IMM_FLOAT32:
-         FLT( data[i].Float );
+         if (ctx->dump_float_as_hex)
+            HFLT( data[i].Float );
+         else
+            FLT( data[i].Float );
         break;
      case TGSI_IMM_UINT32:
         UID(data[i].Uint);
@@ -648,6 +655,7 @@ tgsi_dump_instruction(
   ctx.indent = 0;
   ctx.dump_printf = dump_ctx_printf;
   ctx.indentation = 0;
+   ctx.file = NULL;

   iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst );
 }
@@ -681,6 +689,11 @@ tgsi_dump_to_file(const struct tgsi_token *tokens, uint flags, FILE *file)
   ctx.indentation = 0;
   ctx.file = file;

+   if (flags & TGSI_DUMP_FLOAT_AS_HEX)
+      ctx.dump_float_as_hex = TRUE;
+   else
+      ctx.dump_float_as_hex = FALSE;
+
   tgsi_iterate_shader( tokens, &ctx.iter );
 }

@@ -696,6 +709,7 @@ struct str_dump_ctx
   char *str;
   char *ptr;
   int left;
+   bool nospace;
 };

 static void
@@ -718,10 +732,11 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
         sctx->ptr += written;
         sctx->left -= written;
      }
-   }
+   } else
+      sctx->nospace = true;
 }

-void
+bool
 tgsi_dump_str(
   const struct tgsi_token *tokens,
   uint flags,
@@ -748,8 +763,16 @@ tgsi_dump_str(
   ctx.str[0] = 0;
   ctx.ptr = str;
   ctx.left = (int)size;
+   ctx.nospace = false;
+
+   if (flags & TGSI_DUMP_FLOAT_AS_HEX)
+      ctx.base.dump_float_as_hex = TRUE;
+   else
+      ctx.base.dump_float_as_hex = FALSE;

   tgsi_iterate_shader( tokens, &ctx.base.iter );
+
+   return !ctx.nospace;
 }

 void
@@ -772,6 +795,7 @@ tgsi_dump_instruction_str(
   ctx.str[0] = 0;
   ctx.ptr = str;
   ctx.left = (int)size;
+   ctx.nospace = false;

   iter_instruction( &ctx.base.iter, (struct tgsi_full_instruction *)inst );
 }
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h
@@ -38,7 +38,9 @@
 extern "C" {
 #endif

-void
+#define TGSI_DUMP_FLOAT_AS_HEX (1 << 0)
+
+bool
 tgsi_dump_str(
   const struct tgsi_token *tokens,
   uint flags,
--- a/src/gallium/auxiliary/tgsi/tgsi_emulate.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_emulate.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include "tgsi/tgsi_transform.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_debug.h"
+
+#include "tgsi_emulate.h"
+
+struct tgsi_emulation_context {
+   struct tgsi_transform_context base;
+   struct tgsi_shader_info info;
+   unsigned flags;
+   bool first_instruction_emitted;
+};
+
+static inline struct tgsi_emulation_context *
+tgsi_emulation_context(struct tgsi_transform_context *tctx)
+{
+   return (struct tgsi_emulation_context *)tctx;
+}
+
+static void
+transform_decl(struct tgsi_transform_context *tctx,
+               struct tgsi_full_declaration *decl)
+{
+   struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+
+   if (ctx->flags & TGSI_EMU_FORCE_PERSAMPLE_INTERP &&
+       decl->Declaration.File == TGSI_FILE_INPUT) {
+      assert(decl->Declaration.Interpolate);
+      decl->Interp.Location = TGSI_INTERPOLATE_LOC_SAMPLE;
+   }
+
+   tctx->emit_declaration(tctx, decl);
+}
+
+static void
+passthrough_edgeflag(struct tgsi_transform_context *tctx)
+{
+   struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+   struct tgsi_full_declaration decl;
+   struct tgsi_full_instruction new_inst;
+
+   /* Input */
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_INPUT;
+   decl.Range.First = decl.Range.Last = ctx->info.num_inputs;
+   tctx->emit_declaration(tctx, &decl);
+
+   /* Output */
+   decl = tgsi_default_full_declaration();
+   decl.Declaration.File = TGSI_FILE_OUTPUT;
+   decl.Declaration.Semantic = true;
+   decl.Range.First = decl.Range.Last = ctx->info.num_outputs;
+   decl.Semantic.Name = TGSI_SEMANTIC_EDGEFLAG;
+   decl.Semantic.Index = 0;
+   tctx->emit_declaration(tctx, &decl);
+
+   /* MOV */
+   new_inst = tgsi_default_full_instruction();
+   new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+
+   new_inst.Instruction.NumDstRegs = 1;
+   new_inst.Dst[0].Register.File  = TGSI_FILE_OUTPUT;
+   new_inst.Dst[0].Register.Index = ctx->info.num_outputs;
+   new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
+
+   new_inst.Instruction.NumSrcRegs = 1;
+   new_inst.Src[0].Register.File  = TGSI_FILE_INPUT;
+   new_inst.Src[0].Register.Index = ctx->info.num_inputs;
+   new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
+   new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
+   new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
+   new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
+
+   tctx->emit_instruction(tctx, &new_inst);
+}
+
+static void
+transform_instr(struct tgsi_transform_context *tctx,
+                struct tgsi_full_instruction *inst)
+{
+   struct tgsi_emulation_context *ctx = tgsi_emulation_context(tctx);
+
+   /* Pass through edgeflags. */
+   if (!ctx->first_instruction_emitted) {
+      ctx->first_instruction_emitted = true;
+
+      if (ctx->flags & TGSI_EMU_PASSTHROUGH_EDGEFLAG)
+         passthrough_edgeflag(tctx);
+   }
+
+   /* Clamp color outputs. */
+   if (ctx->flags & TGSI_EMU_CLAMP_COLOR_OUTPUTS) {
+      int i;
+      for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
+         unsigned semantic;
+
+         if (inst->Dst[i].Register.File != TGSI_FILE_OUTPUT ||
+             inst->Dst[i].Register.Indirect)
+            continue;
+
+         semantic =
+            ctx->info.output_semantic_name[inst->Dst[i].Register.Index];
+
+         if (semantic == TGSI_SEMANTIC_COLOR ||
+             semantic == TGSI_SEMANTIC_BCOLOR)
+            inst->Instruction.Saturate = true;
+      }
+   }
+
+   tctx->emit_instruction(tctx, inst);
+}
+
+const struct tgsi_token *
+tgsi_emulate(const struct tgsi_token *tokens, unsigned flags)
+{
+   struct tgsi_emulation_context ctx;
+   struct tgsi_token *newtoks;
+   int newlen;
+
+   if (!(flags & (TGSI_EMU_CLAMP_COLOR_OUTPUTS |
+                  TGSI_EMU_PASSTHROUGH_EDGEFLAG |
+                  TGSI_EMU_FORCE_PERSAMPLE_INTERP)))
+      return NULL;
+
+   memset(&ctx, 0, sizeof(ctx));
+   ctx.flags = flags;
+   tgsi_scan_shader(tokens, &ctx.info);
+
+   if (flags & TGSI_EMU_FORCE_PERSAMPLE_INTERP)
+      ctx.base.transform_declaration = transform_decl;
+
+   if (flags & (TGSI_EMU_CLAMP_COLOR_OUTPUTS |
+                TGSI_EMU_PASSTHROUGH_EDGEFLAG))
+      ctx.base.transform_instruction = transform_instr;
+
+   newlen = tgsi_num_tokens(tokens) + 20;
+   newtoks = tgsi_alloc_tokens(newlen);
+   if (!newtoks)
+      return NULL;
+
+   tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
+   return newtoks;
+}
--- a/src/gallium/auxiliary/tgsi/tgsi_emulate.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_emulate.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#ifndef TGSI_GL_EMULATION_H_
+#define TGSI_GL_EMULATION_H_
+
+#include "pipe/p_shader_tokens.h"
+
+#define TGSI_EMU_CLAMP_COLOR_OUTPUTS      (1 << 0)
+#define TGSI_EMU_PASSTHROUGH_EDGEFLAG     (1 << 1)
+#define TGSI_EMU_FORCE_PERSAMPLE_INTERP   (1 << 2)
+
+const struct tgsi_token *
+tgsi_emulate(const struct tgsi_token *tokens, unsigned flags);
+
+#endif /* TGSI_GL_EMULATION_H_ */
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -2021,7 +2021,7 @@ fetch_sampler_unit(struct tgsi_exec_machine *mach,
 /*
 * execute a texture instruction.
 *
- * modifier is used to control the channel routing for the\
+ * modifier is used to control the channel routing for the
 * instruction variants like proj, lod, and texture with lod bias.
 * sampler indicates which src register the sampler is contained in.
 */
@@ -2032,7 +2032,7 @@ exec_tex(struct tgsi_exec_machine *mach,
 {
   const union tgsi_exec_channel *args[5], *proj = NULL;
   union tgsi_exec_channel r[5];
-   enum tgsi_sampler_control control =  tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
   uint chan;
   uint unit;
   int8_t offsets[3];
@@ -2078,11 +2078,11 @@ exec_tex(struct tgsi_exec_machine *mach,
         args[i] = &ZeroVec;

      if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
      else if (modifier == TEX_MODIFIER_LOD_BIAS)
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
      else if (modifier == TEX_MODIFIER_GATHER)
-         control = tgsi_sampler_gather;
+         control = TGSI_SAMPLER_GATHER;
   }
   else {
      for (i = dim; i < Elements(args); i++)
@@ -2132,6 +2132,46 @@ exec_tex(struct tgsi_exec_machine *mach,
   }
 }

+static void
+exec_lodq(struct tgsi_exec_machine *mach,
+          const struct tgsi_full_instruction *inst)
+{
+   uint unit;
+   int dim;
+   int i;
+   union tgsi_exec_channel coords[4];
+   const union tgsi_exec_channel *args[Elements(coords)];
+   union tgsi_exec_channel r[2];
+
+   unit = fetch_sampler_unit(mach, inst, 1);
+   dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, NULL);
+   assert(dim <= Elements(coords));
+   /* fetch coordinates */
+   for (i = 0; i < dim; i++) {
+      FETCH(&coords[i], 0, TGSI_CHAN_X + i);
+      args[i] = &coords[i];
+   }
+   for (i = dim; i < Elements(coords); i++) {
+      args[i] = &ZeroVec;
+   }
+   mach->Sampler->query_lod(mach->Sampler, unit, unit,
+                            args[0]->f,
+                            args[1]->f,
+                            args[2]->f,
+                            args[3]->f,
+                            TGSI_SAMPLER_LOD_NONE,
+                            r[0].f,
+                            r[1].f);
+
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
+      store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+   if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
+      store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y,
+                 TGSI_EXEC_DATA_FLOAT);
+   }
+}

 static void
 exec_txd(struct tgsi_exec_machine *mach,
@@ -2155,7 +2195,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2171,7 +2211,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2185,7 +2225,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2205,7 +2245,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2225,7 +2265,7 @@ exec_txd(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, unit, unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2364,7 +2404,7 @@ exec_sample(struct tgsi_exec_machine *mach,
   const uint sampler_unit = inst->Src[2].Register.Index;
   union tgsi_exec_channel r[5], c1;
   const union tgsi_exec_channel *lod = &ZeroVec;
-   enum tgsi_sampler_control control = tgsi_sampler_lod_none;
+   enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
   uint chan;
   unsigned char swizzles[4];
   int8_t offsets[3];
@@ -2378,16 +2418,16 @@ exec_sample(struct tgsi_exec_machine *mach,
      if (modifier == TEX_MODIFIER_LOD_BIAS) {
         FETCH(&c1, 3, TGSI_CHAN_X);
         lod = &c1;
-         control = tgsi_sampler_lod_bias;
+         control = TGSI_SAMPLER_LOD_BIAS;
      }
      else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
         FETCH(&c1, 3, TGSI_CHAN_X);
         lod = &c1;
-         control = tgsi_sampler_lod_explicit;
+         control = TGSI_SAMPLER_LOD_EXPLICIT;
      }
      else {
         assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
-         control = tgsi_sampler_lod_zero;
+         control = TGSI_SAMPLER_LOD_ZERO;
      }
   }

@@ -2513,7 +2553,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec,   /* S, T, P, C, LOD */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);           /* R, G, B, A */
      break;

@@ -2529,7 +2569,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec,   /* inputs */
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);     /* outputs */
      break;

@@ -2547,7 +2587,7 @@ exec_sample_d(struct tgsi_exec_machine *mach,

      fetch_texel(mach->Sampler, resource_unit, sampler_unit,
                  &r[0], &r[1], &r[2], &r[3], &ZeroVec,
-                  derivs, offsets, tgsi_sampler_derivs_explicit,
+                  derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
                  &r[0], &r[1], &r[2], &r[3]);
      break;

@@ -4378,6 +4418,12 @@ exec_instruction(
      exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
      break;

+   case TGSI_OPCODE_LODQ:
+      /* src[0] = texcoord */
+      /* src[1] = sampler unit */
+      exec_lodq(mach, inst);
+      break;
+
   case TGSI_OPCODE_UP2H:
      assert (0);
      break;
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -88,13 +88,14 @@ struct tgsi_interp_coef
   float dady[TGSI_NUM_CHANNELS];
 };

-enum tgsi_sampler_control {
-   tgsi_sampler_lod_none,
-   tgsi_sampler_lod_bias,
-   tgsi_sampler_lod_explicit,
-   tgsi_sampler_lod_zero,
-   tgsi_sampler_derivs_explicit,
-   tgsi_sampler_gather,
+enum tgsi_sampler_control
+{
+   TGSI_SAMPLER_LOD_NONE,
+   TGSI_SAMPLER_LOD_BIAS,
+   TGSI_SAMPLER_LOD_EXPLICIT,
+   TGSI_SAMPLER_LOD_ZERO,
+   TGSI_SAMPLER_DERIVS_EXPLICIT,
+   TGSI_SAMPLER_GATHER,
 };

 /**
@@ -138,6 +139,16 @@ struct tgsi_sampler
                     const int j[TGSI_QUAD_SIZE], const int k[TGSI_QUAD_SIZE],
                     const int lod[TGSI_QUAD_SIZE], const int8_t offset[3],
                     float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]);
+   void (*query_lod)(const struct tgsi_sampler *tgsi_sampler,
+                     const unsigned sview_index,
+                     const unsigned sampler_index,
+                     const float s[TGSI_QUAD_SIZE],
+                     const float t[TGSI_QUAD_SIZE],
+                     const float p[TGSI_QUAD_SIZE],
+                     const float c0[TGSI_QUAD_SIZE],
+                     const enum tgsi_sampler_control control,
+                     float mipmap[TGSI_QUAD_SIZE],
+                     float lod[TGSI_QUAD_SIZE]);
 };

 #define TGSI_EXEC_NUM_TEMPS       4096
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -141,7 +141,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
   { 0, 0, 0, 1, 1, 0, NONE, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
   { 0, 0, 0, 0, 1, 0, NONE, "ENDSUB", TGSI_OPCODE_ENDSUB },
   { 1, 1, 1, 0, 0, 0, OTHR, "TXQ_LZ", TGSI_OPCODE_TXQ_LZ },
-   { 0, 0, 0, 0, 0, 0, NONE, "", 104 },     /* removed */
+   { 1, 1, 1, 0, 0, 0, OTHR, "TXQS", TGSI_OPCODE_TXQS },
   { 0, 0, 0, 0, 0, 0, NONE, "", 105 },     /* removed */
   { 0, 0, 0, 0, 0, 0, NONE, "", 106 },     /* removed */
   { 0, 0, 0, 0, 0, 0, NONE, "NOP", TGSI_OPCODE_NOP },
@@ -331,6 +331,7 @@ tgsi_opcode_infer_type( uint opcode )
   case TGSI_OPCODE_SAD: /* XXX some src args may be signed for SAD ? */
   case TGSI_OPCODE_TXQ:
   case TGSI_OPCODE_TXQ_LZ:
+   case TGSI_OPCODE_TXQS:
   case TGSI_OPCODE_F2U:
   case TGSI_OPCODE_UDIV:
   case TGSI_OPCODE_UMAD:
--- a/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_point_sprite.c
@@ -0,0 +1,582 @@
+/*
+ * Copyright 2014 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+
+/**
+ * This utility transforms the geometry shader to emulate point sprite by
+ * drawing a quad. It also adds an extra output for the original point position
+ * if the point position is to be written to a stream output buffer.
+ * Note: It assumes the driver will add a constant for the inverse viewport
+ *       after the user defined constants.
+ */
+
+#include "util/u_debug.h"
+#include "util/u_math.h"
+#include "tgsi_info.h"
+#include "tgsi_point_sprite.h"
+#include "tgsi_transform.h"
+#include "pipe/p_state.h"
+
+#define INVALID_INDEX 9999
+
+/* Set swizzle based on the immediates (0, 1, 0, -1) */
+static inline unsigned
+set_swizzle(int x, int y, int z, int w)
+{
+   static const unsigned map[3] = {TGSI_SWIZZLE_W, TGSI_SWIZZLE_X,
+                                   TGSI_SWIZZLE_Y};
+   assert(x >= -1);
+   assert(x <= 1);
+   assert(y >= -1);
+   assert(y <= 1);
+   assert(z >= -1);
+   assert(z <= 1);
+   assert(w >= -1);
+   assert(w <= 1);
+
+   return map[x+1] | (map[y+1] << 2) | (map[z+1] << 4) | (map[w+1] << 6);
+}
+
+static inline unsigned
+get_swizzle(unsigned swizzle, unsigned component)
+{
+   assert(component < 4);
+   return (swizzle >> (component * 2)) & 0x3;
+}
+
+struct psprite_transform_context
+{
+   struct tgsi_transform_context base;
+   unsigned num_tmp;
+   unsigned num_out;
+   unsigned num_orig_out;
+   unsigned num_const;
+   unsigned num_imm;
+   unsigned point_size_in;          // point size input
+   unsigned point_size_out;         // point size output
+   unsigned point_size_tmp;         // point size temp
+   unsigned point_pos_in;           // point pos input
+   unsigned point_pos_out;          // point pos output
+   unsigned point_pos_sout;         // original point pos for streamout
+   unsigned point_pos_tmp;          // point pos temp
+   unsigned point_scale_tmp;        // point scale temp
+   unsigned point_color_out;        // point color output
+   unsigned point_color_tmp;        // point color temp
+   unsigned point_imm;              // point immediates
+   unsigned point_ivp;              // point inverseViewport constant
+   unsigned point_dir_swz[4];       // point direction swizzle
+   unsigned point_coord_swz[4];     // point coord swizzle
+   unsigned point_coord_enable;     // point coord enable mask
+   unsigned point_coord_decl;       // point coord output declared mask
+   unsigned point_coord_out;        // point coord output starting index
+   unsigned point_coord_aa;         // aa point coord semantic index
+   unsigned point_coord_k;          // aa point coord threshold distance
+   unsigned stream_out_point_pos:1; // set if to stream out original point pos
+   unsigned aa_point:1;             // set if doing aa point
+   unsigned out_tmp_index[PIPE_MAX_SHADER_OUTPUTS];
+   int max_generic;
+};
+
+static inline struct psprite_transform_context *
+psprite_transform_context(struct tgsi_transform_context *ctx)
+{
+   return (struct psprite_transform_context *) ctx;
+}
+
+
+/**
+ * TGSI declaration transform callback.
+ */
+static void
+psprite_decl(struct tgsi_transform_context *ctx,
+             struct tgsi_full_declaration *decl)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (decl->Declaration.File == TGSI_FILE_INPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_in = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_in = decl->Range.First;
+      }
+   }
+   else if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
+      if (decl->Semantic.Name == TGSI_SEMANTIC_PSIZE) {
+         ts->point_size_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_POSITION) {
+         ts->point_pos_out = decl->Range.First;
+      }
+      else if (decl->Semantic.Name == TGSI_SEMANTIC_GENERIC &&
+               decl->Semantic.Index < 32) {
+         ts->point_coord_decl |= 1 << decl->Semantic.Index;
+         ts->max_generic = MAX2(ts->max_generic, decl->Semantic.Index);
+      }
+      ts->num_out = MAX2(ts->num_out, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) {
+      ts->num_tmp = MAX2(ts->num_tmp, decl->Range.Last + 1);
+   }
+   else if (decl->Declaration.File == TGSI_FILE_CONSTANT) {
+      ts->num_const = MAX2(ts->num_const, decl->Range.Last + 1);
+   }
+
+   ctx->emit_declaration(ctx, decl);
+}
+
+/**
+ * TGSI immediate declaration transform callback.
+ */
+static void
+psprite_immediate(struct tgsi_transform_context *ctx,
+                  struct tgsi_full_immediate *imm)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   ctx->emit_immediate(ctx, imm);
+   ts->num_imm++;
+}
+
+
+/**
+ * TGSI transform prolog callback.
+ */
+static void
+psprite_prolog(struct tgsi_transform_context *ctx)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   unsigned point_coord_enable, en;
+   int i;
+
+   /* Replace output registers with temporary registers */
+   for (i = 0; i < ts->num_out; i++) {
+      ts->out_tmp_index[i] = ts->num_tmp++;
+   }
+   ts->num_orig_out = ts->num_out;
+
+   /* Declare a tmp register for point scale */
+   ts->point_scale_tmp = ts->num_tmp++;
+
+   if (ts->point_size_out != INVALID_INDEX)
+      ts->point_size_tmp = ts->out_tmp_index[ts->point_size_out];
+   else
+      ts->point_size_tmp = ts->num_tmp++;
+
+   assert(ts->point_pos_out != INVALID_INDEX);
+   ts->point_pos_tmp = ts->out_tmp_index[ts->point_pos_out];
+   ts->out_tmp_index[ts->point_pos_out] = INVALID_INDEX;
+
+   /* Declare one more tmp register for point coord threshold distance
+    * if we are generating anti-aliased point.
+    */
+   if (ts->aa_point)
+      ts->point_coord_k = ts->num_tmp++;
+
+   tgsi_transform_temps_decl(ctx, ts->point_size_tmp, ts->num_tmp-1);
+
+   /* Declare an extra output for the original point position for stream out */
+   if (ts->stream_out_point_pos) {
+      ts->point_pos_sout = ts->num_out++;
+      tgsi_transform_output_decl(ctx, ts->point_pos_sout,
+                                 TGSI_SEMANTIC_GENERIC, 0, 0);
+   }
+
+   /* point coord outputs to be declared */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* Declare outputs for those point coord that are enabled but are not
+    * already declared in this shader.
+    */
+   ts->point_coord_out = ts->num_out;
+   if (point_coord_enable) {
+      for (i = 0, en = point_coord_enable; en; en>>=1, i++) {
+         if (en & 0x1) {
+            tgsi_transform_output_decl(ctx, ts->num_out++,
+                                       TGSI_SEMANTIC_GENERIC, i, 0);
+            ts->max_generic = MAX2(ts->max_generic, i);
+         }
+      }
+   }
+
+   /* add an extra generic output for aa point texcoord */
+   if (ts->aa_point) {
+      ts->point_coord_aa = ts->max_generic + 1;
+      assert((ts->point_coord_enable & (1 << ts->point_coord_aa)) == 0);
+      ts->point_coord_enable |= 1 << (ts->point_coord_aa);
+      tgsi_transform_output_decl(ctx, ts->num_out++, TGSI_SEMANTIC_GENERIC,
+                                 ts->point_coord_aa, 0);
+   }
+
+   /* Declare extra immediates */
+   ts->point_imm = ts->num_imm;
+   tgsi_transform_immediate_decl(ctx, 0, 1, 0.5, -1);
+
+   /* Declare point constant -
+    * constant.xy -- inverseViewport
+    * constant.z -- current point size
+    * constant.w -- max point size
+    * The driver needs to add this constant to the constant buffer
+    */
+   ts->point_ivp = ts->num_const++;
+   tgsi_transform_const_decl(ctx, ts->point_ivp, ts->point_ivp);
+
+   /* If this geometry shader does not specify point size,
+    * get the current point size from the point constant.
+    */
+   if (ts->point_size_out == INVALID_INDEX) {
+      struct tgsi_full_instruction inst;
+
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                             ts->point_size_tmp, TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 1;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_CONSTANT,
+                             ts->point_ivp, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+      ctx->emit_instruction(ctx, &inst);
+   }
+}
+
+
+/**
+ * Add the point sprite emulation instructions at the emit vertex instruction
+ */
+static void
+psprite_emit_vertex_inst(struct tgsi_transform_context *ctx,
+                         struct tgsi_full_instruction *vert_inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+   struct tgsi_full_instruction inst;
+   unsigned point_coord_enable, en;
+   unsigned i, j, s;
+
+   /* new point coord outputs */
+   point_coord_enable = ts->point_coord_enable & ~ts->point_coord_decl;
+
+   /* OUTPUT[pos_sout] = TEMP[pos] */
+   if (ts->point_pos_sout != INVALID_INDEX) {
+      tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                              TGSI_FILE_OUTPUT, ts->point_pos_sout,
+                              TGSI_WRITEMASK_XYZW,
+                              TGSI_FILE_TEMPORARY, ts->point_pos_tmp);
+   }
+
+   /**
+    * Set up the point scale vector
+    * scale = pointSize * pos.w * inverseViewport
+    */
+
+   /* MUL point_scale.x, point_size.x, point_pos.w */
+   tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MUL,
+                  TGSI_FILE_TEMPORARY, ts->point_scale_tmp, TGSI_WRITEMASK_X,
+                  TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                  TGSI_FILE_TEMPORARY, ts->point_pos_tmp, TGSI_SWIZZLE_W);
+
+   /* MUL point_scale.xy, point_scale.xx, inverseViewport.xy */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_MUL;
+   inst.Instruction.NumDstRegs = 1;
+   tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_WRITEMASK_XY);
+   inst.Instruction.NumSrcRegs = 2;
+   tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY,
+                          ts->point_scale_tmp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X);
+   tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_CONSTANT,
+                          ts->point_ivp, TGSI_SWIZZLE_X,
+                          TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
+   ctx->emit_instruction(ctx, &inst);
+
+   /**
+    * Set up the point coord threshold distance
+    * k = 0.5 - 1 / pointsize
+    */
+   if (ts->aa_point) {
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_DIV,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Y,
+                                  TGSI_FILE_TEMPORARY, ts->point_size_tmp,
+                                  TGSI_SWIZZLE_X);
+
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_SUB,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_WRITEMASK_X,
+                                  TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                  TGSI_SWIZZLE_Z,
+                                  TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                  TGSI_SWIZZLE_X);
+   }
+
+
+   for (i = 0; i < 4; i++) {
+      unsigned point_dir_swz = ts->point_dir_swz[i];
+      unsigned point_coord_swz = ts->point_coord_swz[i];
+
+      /* All outputs need to be emitted for each vertex */
+      for (j = 0; j < ts->num_orig_out; j++) {
+         if (ts->out_tmp_index[j] != INVALID_INDEX) {
+            tgsi_transform_op1_inst(ctx, TGSI_OPCODE_MOV,
+                                    TGSI_FILE_OUTPUT, j,
+                                    TGSI_WRITEMASK_XYZW,
+                                    TGSI_FILE_TEMPORARY, ts->out_tmp_index[j]);
+         }
+      }
+
+      /* pos = point_scale * point_dir + point_pos */
+      inst = tgsi_default_full_instruction();
+      inst.Instruction.Opcode = TGSI_OPCODE_MAD;
+      inst.Instruction.NumDstRegs = 1;
+      tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT, ts->point_pos_out,
+                             TGSI_WRITEMASK_XYZW);
+      inst.Instruction.NumSrcRegs = 3;
+      tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_TEMPORARY, ts->point_scale_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X,
+                             TGSI_SWIZZLE_X);
+      tgsi_transform_src_reg(&inst.Src[1], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                             get_swizzle(point_dir_swz, 0),
+                             get_swizzle(point_dir_swz, 1),
+                             get_swizzle(point_dir_swz, 2),
+                             get_swizzle(point_dir_swz, 3));
+      tgsi_transform_src_reg(&inst.Src[2], TGSI_FILE_TEMPORARY, ts->point_pos_tmp,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z,
+                             TGSI_SWIZZLE_W);
+      ctx->emit_instruction(ctx, &inst);
+
+      /* point coord */
+      for (j = 0, s = 0, en = point_coord_enable; en; en>>=1, s++) {
+         unsigned dstReg;
+
+         if (en & 0x1) {
+            dstReg = ts->point_coord_out + j;
+
+            inst = tgsi_default_full_instruction();
+            inst.Instruction.Opcode = TGSI_OPCODE_MOV;
+            inst.Instruction.NumDstRegs = 1;
+            tgsi_transform_dst_reg(&inst.Dst[0], TGSI_FILE_OUTPUT,
+                                   dstReg, TGSI_WRITEMASK_XYZW);
+            inst.Instruction.NumSrcRegs = 1;
+            tgsi_transform_src_reg(&inst.Src[0], TGSI_FILE_IMMEDIATE, ts->point_imm,
+                                   get_swizzle(point_coord_swz, 0),
+                                   get_swizzle(point_coord_swz, 1),
+                                   get_swizzle(point_coord_swz, 2),
+                                   get_swizzle(point_coord_swz, 3));
+            ctx->emit_instruction(ctx, &inst);
+
+            /* MOV point_coord.z  point_coord_k.x */
+            if (s == ts->point_coord_aa) {
+               tgsi_transform_op1_swz_inst(ctx, TGSI_OPCODE_MOV,
+                                           TGSI_FILE_OUTPUT, dstReg, TGSI_WRITEMASK_Z,
+                                           TGSI_FILE_TEMPORARY, ts->point_coord_k,
+                                           TGSI_SWIZZLE_X);
+            }
+            j++;  /* the next point coord output offset */
+         }
+      }
+
+      /* Emit the EMIT instruction for each vertex of the quad */
+      ctx->emit_instruction(ctx, vert_inst);
+   }
+
+   /* Emit the ENDPRIM instruction for the quad */
+   inst = tgsi_default_full_instruction();
+   inst.Instruction.Opcode = TGSI_OPCODE_ENDPRIM;
+   inst.Instruction.NumDstRegs = 0;
+   inst.Instruction.NumSrcRegs = 1;
+   inst.Src[0] = vert_inst->Src[0];
+   ctx->emit_instruction(ctx, &inst);
+}
+
+
+/**
+ * TGSI instruction transform callback.
+ */
+static void
+psprite_inst(struct tgsi_transform_context *ctx,
+             struct tgsi_full_instruction *inst)
+{
+   struct psprite_transform_context *ts = psprite_transform_context(ctx);
+
+   if (inst->Instruction.Opcode == TGSI_OPCODE_EMIT) {
+      psprite_emit_vertex_inst(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_size_out) {
+      /**
+       * Replace point size output reg with tmp reg.
+       * The tmp reg will be later used as a src reg for computing
+       * the point scale factor.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_size_tmp;
+      ctx->emit_instruction(ctx, inst);
+
+      /* Clamp the point size */
+      /* MAX point_size_tmp.x, point_size_tmp.x, point_imm.y */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MAX,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_IMMEDIATE, ts->point_imm, TGSI_SWIZZLE_Y);
+
+      /* MIN point_size_tmp.x, point_size_tmp.x, point_ivp.w */
+      tgsi_transform_op2_swz_inst(ctx, TGSI_OPCODE_MIN,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_WRITEMASK_X,
+                 TGSI_FILE_TEMPORARY, ts->point_size_tmp, TGSI_SWIZZLE_X,
+                 TGSI_FILE_CONSTANT, ts->point_ivp, TGSI_SWIZZLE_W);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
+            inst->Dst[0].Register.Index == ts->point_pos_out) {
+      /**
+       * Replace point pos output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->point_pos_tmp;
+      ctx->emit_instruction(ctx, inst);
+   }
+   else if (inst->Dst[0].Register.File == TGSI_FILE_OUTPUT) {
+      /**
+       * Replace output reg with tmp reg.
+       */
+      inst->Dst[0].Register.File = TGSI_FILE_TEMPORARY;
+      inst->Dst[0].Register.Index = ts->out_tmp_index[inst->Dst[0].Register.Index];
+      ctx->emit_instruction(ctx, inst);
+   }
+   else {
+      ctx->emit_instruction(ctx, inst);
+   }
+}
+
+
+/**
+ * TGSI property instruction transform callback.
+ * Transforms a point into a 4-vertex triangle strip.
+ */
+static void
+psprite_property(struct tgsi_transform_context *ctx,
+                 struct tgsi_full_property *prop)
+{
+   switch (prop->Property.PropertyName) {
+   case TGSI_PROPERTY_GS_OUTPUT_PRIM:
+       prop->u[0].Data = PIPE_PRIM_TRIANGLE_STRIP;
+       break;
+   case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES:
+       prop->u[0].Data *= 4;
+       break;
+   default:
+       break;
+   }
+   ctx->emit_property(ctx, prop);
+}
+
+/**
+ * TGSI utility to transform a geometry shader to support point sprite.
+ */
+struct tgsi_token *
+tgsi_add_point_sprite(const struct tgsi_token *tokens_in,
+                      const unsigned point_coord_enable,
+                      const bool sprite_origin_lower_left,
+                      const bool stream_out_point_pos,
+                      int *aa_point_coord_index)
+{
+   struct psprite_transform_context transform;
+   const uint num_new_tokens = 200; /* should be enough */
+   const uint new_len = tgsi_num_tokens(tokens_in) + num_new_tokens;
+   struct tgsi_token *new_tokens;
+
+   /* setup transformation context */
+   memset(&transform, 0, sizeof(transform));
+   transform.base.transform_declaration = psprite_decl;
+   transform.base.transform_instruction = psprite_inst;
+   transform.base.transform_property = psprite_property;
+   transform.base.transform_immediate = psprite_immediate;
+   transform.base.prolog = psprite_prolog;
+
+   transform.point_size_in = INVALID_INDEX;
+   transform.point_size_out = INVALID_INDEX;
+   transform.point_size_tmp = INVALID_INDEX;
+   transform.point_pos_in = INVALID_INDEX;
+   transform.point_pos_out = INVALID_INDEX;
+   transform.point_pos_sout = INVALID_INDEX;
+   transform.point_pos_tmp = INVALID_INDEX;
+   transform.point_scale_tmp = INVALID_INDEX;
+   transform.point_imm = INVALID_INDEX;
+   transform.point_coord_aa = INVALID_INDEX;
+   transform.point_coord_k = INVALID_INDEX;
+
+   transform.stream_out_point_pos = stream_out_point_pos;
+   transform.point_coord_enable = point_coord_enable;
+   transform.aa_point = aa_point_coord_index != NULL;
+   transform.max_generic = -1;
+
+   /* point sprite directions based on the immediates (0, 1, 0.5, -1) */
+   /* (-1, -1, 0, 0) */
+   transform.point_dir_swz[0] = set_swizzle(-1, -1, 0, 0);
+   /* (-1, 1, 0, 0) */
+   transform.point_dir_swz[1] = set_swizzle(-1, 1, 0, 0);
+   /* (1, -1, 0, 0) */
+   transform.point_dir_swz[2] = set_swizzle(1, -1, 0, 0);
+   /* (1, 1, 0, 0) */
+   transform.point_dir_swz[3] = set_swizzle(1, 1, 0, 0);
+
+   /* point coord based on the immediates (0, 1, 0, -1) */
+   if (sprite_origin_lower_left) {
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 0, 0, 1);
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 1, 0, 1);
+   }
+   else {
+      /* (0, 1, 0, 1) */
+      transform.point_coord_swz[0] = set_swizzle(0, 1, 0, 1);
+      /* (0, 0, 0, 1) */
+      transform.point_coord_swz[1] = set_swizzle(0, 0, 0, 1);
+      /* (1, 1, 0, 1) */
+      transform.point_coord_swz[2] = set_swizzle(1, 1, 0, 1);
+      /* (1, 0, 0, 1) */
+      transform.point_coord_swz[3] = set_swizzle(1, 0, 0, 1);
+   }
+
+
+   /* allocate new tokens buffer */
+   new_tokens = tgsi_alloc_tokens(new_len);
+   if (!new_tokens)
+      return NULL;
+
+   /* transform the shader */
+   tgsi_transform_shader(tokens_in, new_tokens, new_len, &transform.base);
+
+   if (aa_point_coord_index)
+      *aa_point_coord_index = transform.point_coord_aa;
+
+   return new_tokens;
+}
--- a/Show More
+++ b/Show More