Update version to 11.0.0-rc3

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
nouveau: don't mark full range as used on unmap with explicit flush
2015-09-06 19:30:23 +01:00 · 2015-09-06 19:11:00 +01:00 · 2015-09-06 19:09:59 +01:00 · 2015-09-06 19:09:11 +01:00 · 2015-09-06 19:08:22 +01:00 · 2015-09-06 19:07:37 +01:00
87 changed files with 922 additions and 899 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-11.0.0-devel
+11.0.0-rc3
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,6 @@ dnl Copyright © 2009-2014 Jon TURNEY
 dnl Copyright © 2011-2012 Benjamin Franzke
 dnl Copyright © 2008-2014 David Airlie
 dnl Copyright © 2009-2013 Brian Paul
-dnl Copyright © 2003-2007 Keith Packard, Daniel Stone
 dnl
 dnl Permission is hereby granted, free of charge, to any person obtaining a
 dnl copy of this software and associated documentation files (the "Software"),
@@ -988,144 +987,6 @@ fi

 AC_SUBST([MESA_LLVM])

-# SHA1 hashing
-AC_ARG_WITH([sha1],
-        [AS_HELP_STRING([--with-sha1=libc|libmd|libnettle|libgcrypt|libcrypto|libsha1|CommonCrypto|CryptoAPI],
-        [choose SHA1 implementation])])
-case "x$with_sha1" in
-x | xlibc | xlibmd | xlibnettle | xlibgcrypt | xlibcrypto | xlibsha1 | xCommonCrypto | xCryptoAPI)
-  ;;
-*)
-        AC_MSG_ERROR([Illegal value for --with-sha1: $with_sha1])
-esac
-
-AC_CHECK_FUNC([SHA1Init], [HAVE_SHA1_IN_LIBC=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_LIBC" = xyes; then
-	with_sha1=libc
-fi
-if test "x$with_sha1" = xlibc && test "x$HAVE_SHA1_IN_LIBC" != xyes; then
-	AC_MSG_ERROR([sha1 in libc requested but not found])
-fi
-if test "x$with_sha1" = xlibc; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBC], [1],
-		[Use libc SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_FUNC([CC_SHA1_Init], [HAVE_SHA1_IN_COMMONCRYPTO=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_COMMONCRYPTO" = xyes; then
-	with_sha1=CommonCrypto
-fi
-if test "x$with_sha1" = xCommonCrypto && test "x$HAVE_SHA1_IN_COMMONCRYPTO" != xyes; then
-	AC_MSG_ERROR([CommonCrypto requested but not found])
-fi
-if test "x$with_sha1" = xCommonCrypto; then
-	AC_DEFINE([HAVE_SHA1_IN_COMMONCRYPTO], [1],
-		[Use CommonCrypto SHA1 functions])
-	SHA1_LIBS=""
-fi
-dnl stdcall functions cannot be tested with AC_CHECK_LIB
-AC_CHECK_HEADER([wincrypt.h], [HAVE_SHA1_IN_CRYPTOAPI=yes], [], [#include <windows.h>])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_CRYPTOAPI" = xyes; then
-	with_sha1=CryptoAPI
-fi
-if test "x$with_sha1" = xCryptoAPI && test "x$HAVE_SHA1_IN_CRYPTOAPI" != xyes; then
-	AC_MSG_ERROR([CryptoAPI requested but not found])
-fi
-if test "x$with_sha1" = xCryptoAPI; then
-	AC_DEFINE([HAVE_SHA1_IN_CRYPTOAPI], [1],
-		[Use CryptoAPI SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_LIB([md], [SHA1Init], [HAVE_LIBMD=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBMD" = xyes; then
-	with_sha1=libmd
-fi
-if test "x$with_sha1" = xlibmd && test "x$HAVE_LIBMD" != xyes; then
-	AC_MSG_ERROR([libmd requested but not found])
-fi
-if test "x$with_sha1" = xlibmd; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBMD], [1],
-	          [Use libmd SHA1 functions])
-	SHA1_LIBS=-lmd
-fi
-PKG_CHECK_MODULES([LIBSHA1], [libsha1], [HAVE_LIBSHA1=yes], [HAVE_LIBSHA1=no])
-if test "x$with_sha1" = x && test "x$HAVE_LIBSHA1" = xyes; then
-   with_sha1=libsha1
-fi
-if test "x$with_sha1" = xlibsha1 && test "x$HAVE_LIBSHA1" != xyes; then
-	AC_MSG_ERROR([libsha1 requested but not found])
-fi
-if test "x$with_sha1" = xlibsha1; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBSHA1], [1],
-	          [Use libsha1 for SHA1])
-	SHA1_LIBS=-lsha1
-fi
-AC_CHECK_LIB([nettle], [nettle_sha1_init], [HAVE_LIBNETTLE=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBNETTLE" = xyes; then
-	with_sha1=libnettle
-fi
-if test "x$with_sha1" = xlibnettle && test "x$HAVE_LIBNETTLE" != xyes; then
-	AC_MSG_ERROR([libnettle requested but not found])
-fi
-if test "x$with_sha1" = xlibnettle; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBNETTLE], [1],
-	          [Use libnettle SHA1 functions])
-	SHA1_LIBS=-lnettle
-fi
-AC_CHECK_LIB([gcrypt], [gcry_md_open], [HAVE_LIBGCRYPT=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBGCRYPT" = xyes; then
-	with_sha1=libgcrypt
-fi
-if test "x$with_sha1" = xlibgcrypt && test "x$HAVE_LIBGCRYPT" != xyes; then
-	AC_MSG_ERROR([libgcrypt requested but not found])
-fi
-if test "x$with_sha1" = xlibgcrypt; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBGCRYPT], [1],
-	          [Use libgcrypt SHA1 functions])
-	SHA1_LIBS=-lgcrypt
-fi
-# We don't need all of the OpenSSL libraries, just libcrypto
-AC_CHECK_LIB([crypto], [SHA1_Init], [HAVE_LIBCRYPTO=yes])
-PKG_CHECK_MODULES([OPENSSL], [openssl], [HAVE_OPENSSL_PKC=yes],
-                  [HAVE_OPENSSL_PKC=no])
-if test "x$HAVE_LIBCRYPTO" = xyes || test "x$HAVE_OPENSSL_PKC" = xyes; then
-	if test "x$with_sha1" = x; then
-		with_sha1=libcrypto
-	fi
-else
-	if test "x$with_sha1" = xlibcrypto; then
-		AC_MSG_ERROR([OpenSSL libcrypto requested but not found])
-	fi
-fi
-if test "x$with_sha1" = xlibcrypto; then
-	if test "x$HAVE_LIBCRYPTO" = xyes; then
-		SHA1_LIBS=-lcrypto
-	else
-		SHA1_LIBS="$OPENSSL_LIBS"
-		SHA1_CFLAGS="$OPENSSL_CFLAGS"
-	fi
-fi
-AC_MSG_CHECKING([for SHA1 implementation])
-AC_MSG_RESULT([$with_sha1])
-AC_SUBST(SHA1_LIBS)
-AC_SUBST(SHA1_CFLAGS)
-
-# Allow user to configure out the shader-cache feature
-AC_ARG_ENABLE([shader-cache],
-    AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
-    [enable_shader_cache="$enableval"],
-    [if test "x$with_sha1" != "x"; then
-        enable_shader_cache=yes
-     else
-        enable_shader_cache=no
-     fi])
-if test "x$with_sha1" = "x"; then
-    if test "x$enable_shader_cache" = "xyes"; then
-        AC_MSG_ERROR([Cannot enable shader cache (no SHA-1 implementation found)])
-    fi
-fi
-AM_CONDITIONAL([ENABLE_SHADER_CACHE], [test x$enable_shader_cache = xyes])
-
 case "$host_os" in
 linux*)
    need_pci_id=yes ;;
@@ -2484,12 +2345,6 @@ else
    echo "        Gallium:         no"
 fi

-dnl Shader cache
-echo ""
-echo "        Shader cache:    $enable_shader_cache"
-if test "x$enable_shader_cache" = "xyes"; then
-    echo "        With SHA1 from:  $with_sha1"
-fi

 dnl Libraries
 echo ""
--- a/src/egl/SConscript
+++ b/src/egl/SConscript
@@ -15,7 +15,6 @@ env.Append(CPPPATH = [

 # parse Makefile.sources
 egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
-egl_sources.append(env.ParseSourceList('Makefile.sources', 'dri2_backend_core_FILES'))

 env.Append(CPPDEFINES = [
    '_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
--- a/src/gallium/auxiliary/gallivm/lp_bld_const.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_const.c
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
   else {
      double dscale = lp_const_scale(type);

-      elem = LLVMConstInt(elem_type, round(val*dscale), 0);
+      elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
   }

   return elem;
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider,
                        unsigned bypass_usage,
                        uint64_t maximum_cache_size);

+/**
+ * Remove a buffer from the cache, but keep it alive.
+ */
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *buf);

 struct pb_fence_ops;

--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr)
 }


+static void
+_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
+{
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (buf->head.next) {
+      LIST_DEL(&buf->head);
+      assert(mgr->numDelayed);
+      --mgr->numDelayed;
+      mgr->cache_size -= buf->base.size;
+   }
+   buf->mgr = NULL;
+}
+
+void
+pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
+{
+   struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
+   struct pb_cache_manager *mgr = buf->mgr;
+
+   if (!mgr)
+      return;
+
+   pipe_mutex_lock(mgr->mutex);
+   _pb_cache_manager_remove_buffer_locked(buf);
+   pipe_mutex_unlock(mgr->mutex);
+}
+
 /**
 * Actually destroy the buffer.
 */
 static inline void
 _pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
 {
-   struct pb_cache_manager *mgr = buf->mgr;
-
-   LIST_DEL(&buf->head);
-   assert(mgr->numDelayed);
-   --mgr->numDelayed;
-   mgr->cache_size -= buf->base.size;
+   if (buf->mgr)
+      _pb_cache_manager_remove_buffer_locked(buf);
   assert(!pipe_is_referenced(&buf->base.reference));
   pb_reference(&buf->buffer, NULL);
   FREE(buf);
@@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
   struct pb_cache_buffer *buf = pb_cache_buffer(_buf);   
   struct pb_cache_manager *mgr = buf->mgr;

+   if (!mgr) {
+      pb_reference(&buf->buffer, NULL);
+      FREE(buf);
+      return;
+   }
+
   pipe_mutex_lock(mgr->mutex);
   assert(!pipe_is_referenced(&buf->base.reference));
   
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -41,6 +41,7 @@
 #include "util/u_tile.h" 
 #include "util/u_prim.h"
 #include "util/u_surface.h"
+#include <inttypes.h>

 #include <stdio.h>
 #include <limits.h> /* CHAR_BIT */
@@ -275,7 +276,7 @@ debug_get_flags_option(const char *name,
      for (; flags->name; ++flags)
         namealign = MAX2(namealign, strlen(flags->name));
      for (flags = orig; flags->name; ++flags)
-         _debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
+         _debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
                      (int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
                      flags->desc ? " " : "", flags->desc ? flags->desc : "");
   }
@@ -290,9 +291,9 @@ debug_get_flags_option(const char *name,

   if (debug_get_option_should_print()) {
      if (str) {
-         debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
+         debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
      } else {
-         debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
+         debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
      }
   }

--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_emit.c
@@ -563,10 +563,29 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
 		val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
 				A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
+		/* TODO only use if prog doesn't use clipvertex/clipdist */
+		val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
 		OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
 		OUT_RING(ring, val);
 	}

+	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
+		uint32_t planes = ctx->rasterizer->clip_plane_enable;
+		int count = 0;
+
+		while (planes && count < 6) {
+			int i = ffs(planes) - 1;
+
+			planes &= ~(1U << i);
+			fd_wfi(ctx, ring);
+			OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
+			OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
+		}
+	}
+
 	/* NOTE: since primitive_restart is not actually part of any
 	 * state object, we need to make sure that we always emit
 	 * PRIM_VTX_CNTL.. either that or be more clever and detect
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -162,10 +162,13 @@ enum a4xx_tex_fmt {
 	TFMT4_8_UNORM = 4,
 	TFMT4_8_8_UNORM = 14,
 	TFMT4_8_8_8_8_UNORM = 28,
+	TFMT4_8_SNORM = 5,
 	TFMT4_8_8_SNORM = 15,
 	TFMT4_8_8_8_8_SNORM = 29,
+	TFMT4_8_UINT = 6,
 	TFMT4_8_8_UINT = 16,
 	TFMT4_8_8_8_8_UINT = 30,
+	TFMT4_8_SINT = 7,
 	TFMT4_8_8_SINT = 17,
 	TFMT4_8_8_8_8_SINT = 31,
 	TFMT4_16_UINT = 21,
--- a/src/gallium/drivers/freedreno/a4xx/fd4_format.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_format.c
@@ -79,9 +79,9 @@ struct fd4_format {
 static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
 	/* 8-bit */
 	VT(R8_UNORM,   8_UNORM, R8_UNORM, WZYX),
-	V_(R8_SNORM,   8_SNORM, NONE,     WZYX),
-	V_(R8_UINT,    8_UINT,  NONE,     WZYX),
-	V_(R8_SINT,    8_SINT,  NONE,     WZYX),
+	VT(R8_SNORM,   8_SNORM, NONE,     WZYX),
+	VT(R8_UINT,    8_UINT,  NONE,     WZYX),
+	VT(R8_SINT,    8_SINT,  NONE,     WZYX),
 	V_(R8_USCALED, 8_UINT,  NONE,     WZYX),
 	V_(R8_SSCALED, 8_UINT,  NONE,     WZYX),

@@ -115,8 +115,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {

 	VT(R8G8_UNORM,   8_8_UNORM, R8G8_UNORM, WZYX),
 	VT(R8G8_SNORM,   8_8_SNORM, R8G8_SNORM, WZYX),
-	VT(R8G8_UINT,    8_8_UINT,  NONE,       WZYX),
-	VT(R8G8_SINT,    8_8_SINT,  NONE,       WZYX),
+	VT(R8G8_UINT,    8_8_UINT,  R8G8_UINT,  WZYX),
+	VT(R8G8_SINT,    8_8_SINT,  R8G8_SINT,  WZYX),
 	V_(R8G8_USCALED, 8_8_UINT,  NONE,       WZYX),
 	V_(R8G8_SSCALED, 8_8_SINT,  NONE,       WZYX),

--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2015-05-20 20:03:14)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  14968 bytes, from 2015-05-20 20:12:27)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  67120 bytes, from 2015-08-14 23:22:03)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63785 bytes, from 2015-08-14 18:27:06)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  63915 bytes, from 2015-08-24 16:56:28)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/freedreno_context.h
+++ b/src/gallium/drivers/freedreno/freedreno_context.h
@@ -334,6 +334,7 @@ struct fd_context {
 		FD_DIRTY_INDEXBUF    = (1 << 16),
 		FD_DIRTY_SCISSOR     = (1 << 17),
 		FD_DIRTY_STREAMOUT   = (1 << 18),
+		FD_DIRTY_UCP         = (1 << 19),
 	} dirty;

 	struct pipe_blend_state *blend;
@@ -355,6 +356,7 @@ struct fd_context {
 	struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
 	struct pipe_index_buffer indexbuf;
 	struct fd_streamout_stateobj streamout;
+	struct pipe_clip_state ucp;

 	/* GMEM/tile handling fxns: */
 	void (*emit_tile_init)(struct fd_context *ctx);
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -65,7 +65,9 @@ static void
 fd_set_clip_state(struct pipe_context *pctx,
 		const struct pipe_clip_state *clip)
 {
-	DBG("TODO: ");
+	struct fd_context *ctx = fd_context(pctx);
+	ctx->ucp = *clip;
+	ctx->dirty |= FD_DIRTY_UCP;
 }

 static void
--- a/src/gallium/drivers/nouveau/nouveau_buffer.c
+++ b/src/gallium/drivers/nouveau/nouveau_buffer.c
@@ -532,8 +532,13 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
   struct nv04_resource *buf = nv04_resource(transfer->resource);

   if (tx->base.usage & PIPE_TRANSFER_WRITE) {
-      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
-         nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+      if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
+         if (tx->map)
+            nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
+
+         util_range_add(&buf->valid_buffer_range,
+                        tx->base.box.x, tx->base.box.x + tx->base.box.width);
+      }

      if (likely(buf->domain)) {
         const uint8_t bind = buf->base.bind;
@@ -541,9 +546,6 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
         if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
            nv->vbo_dirty = true;
      }
-
-      util_range_add(&buf->valid_buffer_range,
-                     tx->base.box.x, tx->base.box.x + tx->base.box.width);
   }

   if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
--- a/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_miptree.c
@@ -28,6 +28,7 @@
 #include "util/u_surface.h"

 #include "nv_m2mf.xml.h"
+#include "nv_object.xml.h"
 #include "nv30/nv30_screen.h"
 #include "nv30/nv30_context.h"
 #include "nv30/nv30_resource.h"
@@ -144,21 +145,18 @@ nv30_resource_copy_region(struct pipe_context *pipe,
   nv30_transfer_rect(nv30, NEAREST, &src, &dst);
 }

-void
-nv30_resource_resolve(struct pipe_context *pipe,
-                      const struct pipe_resolve_info *info)
+static void
+nv30_resource_resolve(struct nv30_context *nv30,
+                      const struct pipe_blit_info *info)
 {
-#if 0
-   struct nv30_context *nv30 = nv30_context(pipe);
   struct nv30_rect src, dst;

-   define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
-               info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
-   define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
-               info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
+   define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
+      info->src.box.y, info->src.box.width, info->src.box.height, &src);
+   define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
+      info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);

   nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
-#endif
 }

 void
@@ -172,7 +170,7 @@ nv30_blit(struct pipe_context *pipe,
       info.dst.resource->nr_samples <= 1 &&
       !util_format_is_depth_or_stencil(info.src.resource->format) &&
       !util_format_is_pure_integer(info.src.resource->format)) {
-      debug_printf("nv30: color resolve unimplemented\n");
+      nv30_resource_resolve(nv30, blit_info);
      return;
   }

@@ -362,6 +360,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
   blocksz = util_format_get_blocksize(pt->format);

   if ((pt->target == PIPE_TEXTURE_RECT) ||
+       (pt->bind & PIPE_BIND_SCANOUT) ||
       !util_is_power_of_two(pt->width0) ||
       !util_is_power_of_two(pt->height0) ||
       !util_is_power_of_two(pt->depth0) ||
@@ -369,6 +368,14 @@ nv30_miptree_create(struct pipe_screen *pscreen,
       util_format_is_float(pt->format) || mt->ms_mode) {
      mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
      mt->uniform_pitch = align(mt->uniform_pitch, 64);
+      if (pt->bind & PIPE_BIND_SCANOUT) {
+         struct nv30_screen *screen = nv30_screen(pscreen);
+         int pitch_align = MAX2(
+               screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
+               /* round_down_pow2(mt->uniform_pitch / 4) */
+               1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
+         mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
+      }
   }

   if (!mt->uniform_pitch)
--- a/src/gallium/drivers/nouveau/nv30/nv30_resource.h
+++ b/src/gallium/drivers/nouveau/nv30/nv30_resource.h
@@ -65,9 +65,6 @@ nv30_resource_copy_region(struct pipe_context *pipe,
                          struct pipe_resource *src, unsigned src_level,
                          const struct pipe_box *src_box);

-void
-nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
-
 void
 nv30_blit(struct pipe_context *pipe,
          const struct pipe_blit_info *blit_info);
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.c
@@ -199,9 +199,13 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
         }
      }

-      if (nv50->idxbuf.buffer == res)
+      if (nv50->idxbuf.buffer == res) {
+         /* Just rebind to the bufctx as there is no separate dirty bit */
+         nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
+         BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
         if (!--ref)
            return ref;
+      }

      for (s = 0; s < 3; ++s) {
      assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -197,7 +197,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);

 /* nv50_query.c */
 void nv50_init_query_functions(struct nv50_context *);
-void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
+void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
                               struct pipe_query *, unsigned result_offset);
 void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
 void nva0_so_target_save_offset(struct pipe_context *,
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.c
@@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
      case TGSI_SEMANTIC_VERTEXID:
         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
         prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
+         prog->vp.vertexid = 1;
         continue;
      default:
         break;
--- a/src/gallium/drivers/nouveau/nv50/nv50_program.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_program.h
@@ -76,6 +76,7 @@ struct nv50_program {
      ubyte psiz;        /* output slot of point size */
      ubyte bfc[2];      /* indices into varying for FFC (FP) or BFC (VP) */
      ubyte edgeflag;
+      ubyte vertexid;
      ubyte clpd[2];     /* output slot of clip distance[i]'s 1st component */
      ubyte clpd_nr;
   } vp;
--- a/src/gallium/drivers/nouveau/nv50/nv50_query.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c
@@ -266,6 +266,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
      nv50_query_get(push, q, 0, 0x1000f010);
      break;
   case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
+      q->sequence++;
      nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
      break;
   case PIPE_QUERY_TIMESTAMP_DISJOINT:
@@ -451,18 +452,18 @@ nv50_render_condition(struct pipe_context *pipe,
 }

 void
-nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
+nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
                          struct pipe_query *pq, unsigned result_offset)
 {
   struct nv50_query *q = nv50_query(pq);

-   /* XXX: does this exist ? */
-#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
+   nv50_query_update(q);
+   if (q->state != NV50_QUERY_STATE_READY)
+      nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
+   q->state = NV50_QUERY_STATE_READY;

-   PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
-   nouveau_pushbuf_space(push, 0, 0, 1);
-   nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
-                        NV50_IB_ENTRY_1_NO_PREFETCH);
+   BEGIN_NV04(push, SUBC_3D(method), 1);
+   PUSH_DATA (push, q->data[result_offset / 4]);
 }

 void
--- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c
@@ -641,12 +641,12 @@ nv50_stream_output_validate(struct nv50_context *nv50)
      PUSH_DATA (push, so->num_attribs[i]);
      if (n == 4) {
         PUSH_DATA(push, targ->pipe.buffer_size);
-
-         BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
         if (!targ->clean) {
            assert(targ->pq);
-            nv50_query_pushbuf_submit(push, targ->pq, 0x4);
+            nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
+                                      targ->pq, 0x4);
         } else {
+            BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
            PUSH_DATA(push, 0);
            targ->clean = false;
         }
@@ -655,6 +655,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
            (so->stride[i] * nv50->state.prim_size);
         prims = MIN2(prims, limit);
      }
+      targ->stride = so->stride[i];
      BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
   }
   if (prims != ~0) {
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -117,7 +117,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
   struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
   int i;
   bool emit_common_func = cso->rt[0].blend_enable;
-   uint32_t ms;

   if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
      SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
@@ -189,15 +188,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
      SB_DATA    (so, nv50_colormask(cso->rt[0].colormask));
   }

-   ms = 0;
-   if (cso->alpha_to_coverage)
-      ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
-   if (cso->alpha_to_one)
-      ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
-
-   SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
-   SB_DATA    (so, ms);
-
   assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
   return so;
 }
--- a/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state_validate.c
@@ -1,4 +1,6 @@

+#include "util/u_format.h"
+
 #include "nv50/nv50_context.h"
 #include "nv50/nv50_defs.xml.h"

@@ -313,6 +315,25 @@ nv50_validate_derived_2(struct nv50_context *nv50)
   }
 }

+static void
+nv50_validate_derived_3(struct nv50_context *nv50)
+{
+   struct nouveau_pushbuf *push = nv50->base.pushbuf;
+   struct pipe_framebuffer_state *fb = &nv50->framebuffer;
+   uint32_t ms = 0;
+
+   if ((!fb->nr_cbufs || !fb->cbufs[0] ||
+        !util_format_is_pure_integer(fb->cbufs[0]->format)) && nv50->blend) {
+      if (nv50->blend->pipe.alpha_to_coverage)
+         ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+      if (nv50->blend->pipe.alpha_to_one)
+         ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+   }
+
+   BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
+   PUSH_DATA (push, ms);
+}
+
 static void
 nv50_validate_clip(struct nv50_context *nv50)
 {
@@ -474,6 +495,7 @@ static struct state_validate {
    { nv50_validate_derived_rs,    NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
    { nv50_validate_derived_2,     NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
+    { nv50_validate_derived_3,     NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
    { nv50_validate_clip,          NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
    { nv50_constbufs_validate,     NV50_NEW_CONSTBUF },
@@ -481,7 +503,8 @@ static struct state_validate {
    { nv50_validate_samplers,      NV50_NEW_SAMPLERS },
    { nv50_stream_output_validate, NV50_NEW_STRMOUT |
                                   NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
-    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
+    { nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
+                                   NV50_NEW_VERTPROG },
    { nv50_validate_min_samples,   NV50_NEW_MIN_SAMPLES },
 };
 #define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
--- a/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_stateobj.h
@@ -19,7 +19,7 @@
 struct nv50_blend_stateobj {
   struct pipe_blend_state pipe;
   int size;
-   uint32_t state[84]; // TODO: allocate less if !independent_blend_enable
+   uint32_t state[82]; // TODO: allocate less if !independent_blend_enable
 };

 struct nv50_rasterizer_stateobj {
--- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c
@@ -68,6 +68,10 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
      return NV50_SURFACE_FORMAT_R16_UNORM;
   case 4:
      return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+   case 8:
+      return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
+   case 16:
+      return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
   default:
      return 0;
   }
@@ -1003,6 +1007,8 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
   /* zsa state */
   BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
   PUSH_DATA (push, 0);
+   BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1);
+   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
   PUSH_DATA (push, 0);
   BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -293,7 +293,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
   uint64_t addrs[PIPE_MAX_ATTRIBS];
   uint32_t limits[PIPE_MAX_ATTRIBS];
   struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   struct nv50_vertex_stateobj *vertex = nv50->vertex;
+   struct nv50_vertex_stateobj dummy = {};
+   struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
   struct pipe_vertex_buffer *vb;
   struct nv50_vertex_element *ve;
   uint32_t mask;
@@ -301,6 +302,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
   unsigned i;
   const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);

+   /* A vertexid is not generated for inline data uploads. Have to use a
+    * VBO. This check must come after the vertprog has been validated,
+    * otherwise vertexid may be unset.
+    */
+   assert(nv50->vertprog->translated);
+   if (nv50->vertprog->vp.vertexid)
+      nv50->vbo_push_hint = 0;
+
   if (unlikely(vertex->need_conversion))
      nv50->vbo_fifo = ~0;
   else
@@ -317,7 +326,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
         if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
            buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
            nv50->base.vbo_dirty = true;
-            break;
         }
      }
   }
@@ -736,9 +744,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
      PUSH_DATA (push, 0);
      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
-      PUSH_DATA (push, 0);
-      BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
-      nv50_query_pushbuf_submit(push, so->pq, 0x4);
+      PUSH_DATA (push, so->stride);
+      nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
      BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
      PUSH_DATA (push, 0);

@@ -838,10 +845,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
         nv50->base.vbo_dirty = true;
   }

-   if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
-       nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
-      nv50->base.vbo_dirty = true;
-
   if (nv50->base.vbo_dirty) {
      BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
      PUSH_DATA (push, 0);
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -887,6 +887,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)

   /* zsa state */
   IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+   IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
   IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
   IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);

--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
 					fprintf(stderr, "CND:%X ", cf->cond);
 				if (cf->pop_count)
 					fprintf(stderr, "POP:%X ", cf->pop_count);
+				if (cf->end_of_program)
+					fprintf(stderr, "EOP ");
 				fprintf(stderr, "\n");
 			}
 		}
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -936,28 +936,5 @@ static inline bool r600_can_read_depth(struct r600_texture *rtex)
 #define     V_028A6C_OUTPRIM_TYPE_LINESTRIP            1
 #define     V_028A6C_OUTPRIM_TYPE_TRISTRIP             2

-static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
-{
-	static const int prim_conv[] = {
-		V_028A6C_OUTPRIM_TYPE_POINTLIST,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_LINESTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP,
-		V_028A6C_OUTPRIM_TYPE_TRISTRIP
-	};
-	assert(mode < Elements(prim_conv));
-
-	return prim_conv[mode];
-}
-
+unsigned r600_conv_prim_to_gs_out(unsigned mode);
 #endif
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6151,10 +6151,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
 		r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
 		if (r)
 			return r;
-		r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]);
+		r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
 		if (r)
 			return r;
 		tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
--- a/src/gallium/drivers/r600/r600_state.c
+++ b/src/gallium/drivers/r600/r600_state.c
@@ -2051,7 +2051,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
 			/* always privilege vs stage so that at worst we have the
 			 * pixel stage producing wrong output (not the vertex
 			 * stage) */
-			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
+			new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
 			new_num_vs_gprs = num_vs_gprs;
 			new_num_gs_gprs = num_gs_gprs;
 			new_num_es_gprs = num_es_gprs;
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -123,6 +123,31 @@ static unsigned r600_conv_pipe_prim(unsigned prim)
 	return prim_conv[prim];
 }

+unsigned r600_conv_prim_to_gs_out(unsigned mode)
+{
+	static const int prim_conv[] = {
+		[PIPE_PRIM_POINTS]			= V_028A6C_OUTPRIM_TYPE_POINTLIST,
+		[PIPE_PRIM_LINES]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_LOOP]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_STRIP]			= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_TRIANGLES]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_STRIP]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_FAN]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_QUADS]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_QUAD_STRIP]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_POLYGON]			= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_LINES_ADJACENCY]		= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_LINE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_LINESTRIP,
+		[PIPE_PRIM_TRIANGLES_ADJACENCY]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY]	= V_028A6C_OUTPRIM_TYPE_TRISTRIP,
+		[PIPE_PRIM_PATCHES]			= V_028A6C_OUTPRIM_TYPE_POINTLIST,
+		[R600_PRIM_RECTANGLE_LIST]		= V_028A6C_OUTPRIM_TYPE_TRISTRIP
+	};
+	assert(mode < Elements(prim_conv));
+
+	return prim_conv[mode];
+}
+
 /* common state between evergreen and r600 */

 static void r600_bind_blend_state_internal(struct r600_context *rctx,
--- a/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_decoder.cpp
@@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) {
 	int r = 0;
 	uint32_t dw0 = dw[i];
 	uint32_t dw1 = dw[i+1];
+	assert(i+1 <= ndw);

 	if ((dw1 >> 29) & 1) { // CF_ALU
 		return decode_cf_alu(i, bc);
--- a/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_finalize.cpp
@@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
 		cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
 		cf_node *if_pop = sh.create_cf(CF_OP_POP);

+		if (!last_cf || last_cf->get_parent_region() == r) {
+			last_cf = if_pop;
+		}
 		if_pop->bc.pop_count = 1;
 		if_pop->jump_after(if_pop);

--- a/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_parser.cpp
@@ -95,7 +95,7 @@ int bc_parser::decode_shader() {
 		if ((r = decode_cf(i, eop)))
 			return r;

-	} while (!eop || (i >> 1) <= max_cf);
+	} while (!eop || (i >> 1) < max_cf);

 	return 0;
 }
@@ -769,6 +769,7 @@ int bc_parser::prepare_ir() {
 }

 int bc_parser::prepare_loop(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());

 	cf_node *end = cf_map[c->bc.addr - 1];
 	assert(end->bc.op == CF_OP_LOOP_END);
@@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) {
 }

 int bc_parser::prepare_if(cf_node* c) {
+	assert(c->bc.addr-1 < cf_map.size());
 	cf_node *c_else = NULL, *end = cf_map[c->bc.addr];

+	if (!end)
+		return 0; // not quite sure how this happens, malformed input?
+
 	BCP_DUMP(
 		sblog << "parsing JUMP @" << c->bc.id;
 		sblog << "\n";
@@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) {
 	if (c_else->parent != c->parent)
 		c_else = NULL;

-	if (end->parent != c->parent)
+	if (end && end->parent != c->parent)
 		end = NULL;

 	region_node *reg = sh->create_region();
--- a/src/gallium/drivers/r600/sb/sb_sched.cpp
+++ b/src/gallium/drivers/r600/sb/sb_sched.cpp
@@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) {

 	for (i = 0; i < nsrc; ++i) {
 		value *v = n->src[i];
-		if (v->is_readonly())
+		if (v->is_readonly() || v->is_undef())
 			continue;
 		if (i == 1 && opt)
 			continue;
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -197,7 +197,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -206,13 +206,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -220,7 +220,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
@@ -254,7 +254,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
@@ -264,7 +264,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		va += query->buffer.results_end + query->result_size/2;
@@ -273,7 +273,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
+		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
 		break;
@@ -282,7 +282,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
-		radeon_emit(cs, (va >> 32UL) & 0xFF);
+		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	default:
 		assert(0);
--- a/src/gallium/drivers/radeonsi/si_hw_context.c
+++ b/src/gallium/drivers/radeonsi/si_hw_context.c
@@ -33,14 +33,6 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
 	int i;

-	/* If the CS is sufficiently large, don't count the space needed
-	 * and just flush if there is less than 8096 dwords left. */
-	if (cs->max_dw >= 24 * 1024) {
-		if (cs->cdw > cs->max_dw - 8 * 1024)
-			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
-		return;
-	}
-
 	/* There are two memory usage counters in the winsys for all buffers
 	 * that have been added (cs_add_reloc) and two counters in the pipe
 	 * driver for those that haven't been added yet.
@@ -54,6 +46,15 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
 	ctx->b.gtt = 0;
 	ctx->b.vram = 0;

+	/* If the CS is sufficiently large, don't count the space needed
+	 * and just flush if there is less than 8096 dwords left.
+	 */
+	if (cs->max_dw >= 24 * 1024) {
+		if (cs->cdw > cs->max_dw - 8 * 1024)
+			ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
+		return;
+	}
+
 	/* The number of dwords we already used in the CS so far. */
 	num_dw += cs->cdw;

--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -190,6 +190,7 @@ struct si_shader_selector {
 	uint64_t	inputs_read;
 	uint64_t	outputs_written;
 	uint32_t	patch_outputs_written;
+	uint32_t	ps_colors_written;
 };

 /* Valid shader configurations:
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -29,6 +29,7 @@
 #include "sid.h"
 #include "radeon/r600_cs.h"

+#include "util/u_dual_blend.h"
 #include "util/u_format.h"
 #include "util/u_format_s3tc.h"
 #include "util/u_memory.h"
@@ -233,8 +234,10 @@ static unsigned si_pack_float_12p4(float x)
 * - The COLOR1 format isn't INVALID because of possible dual-source blending,
 *   so COLOR1 is enabled pretty much all the time.
 * So CB_TARGET_MASK is the only register that can disable COLOR1.
+ *
+ * Another reason is to avoid a hang with dual source blending.
 */
-static void si_update_fb_blend_state(struct si_context *sctx)
+void si_update_fb_blend_state(struct si_context *sctx)
 {
 	struct si_pm4_state *pm4;
 	struct si_state_blend *blend = sctx->queued.named.blend;
@@ -252,6 +255,16 @@ static void si_update_fb_blend_state(struct si_context *sctx)
 			mask |= 0xf << (4*i);
 	mask &= blend->cb_target_mask;

+	/* Avoid a hang that happens when dual source blending is enabled
+	 * but there is not enough color outputs. This is undefined behavior,
+	 * so disable color writes completely.
+	 *
+	 * Reproducible with Unigine Heaven 4.0 and drirc missing.
+	 */
+	if (blend->dual_src_blend &&
+	    (sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
+		mask = 0;
+
 	si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
 	si_pm4_set_state(sctx, fb_blend, pm4);
 }
@@ -343,6 +356,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
 		return NULL;

 	blend->alpha_to_one = state->alpha_to_one;
+	blend->dual_src_blend = util_blend_state_is_dual(state, 0);

 	if (state->logicop_enable) {
 		color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
@@ -3166,6 +3180,7 @@ static void si_init_config(struct si_context *sctx)
 	unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
 	unsigned raster_config, raster_config_1;
 	struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
+	int i;

 	if (pm4 == NULL)
 		return;
@@ -3196,6 +3211,11 @@ static void si_init_config(struct si_context *sctx)

 	si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);

+	for (i = 0; i < 16; i++) {
+		si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
+		si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
+	}
+
 	switch (sctx->screen->b.family) {
 	case CHIP_TAHITI:
 	case CHIP_PITCAIRN:
@@ -3282,8 +3302,6 @@ static void si_init_config(struct si_context *sctx)
 	si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
 	/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
 	si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
-	si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
-	si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
 	si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
 	si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
 	si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -39,6 +39,7 @@ struct si_state_blend {
 	struct si_pm4_state	pm4;
 	uint32_t		cb_target_mask;
 	bool			alpha_to_one;
+	bool			dual_src_blend;
 };

 struct si_state_sample_mask {
@@ -251,6 +252,7 @@ void si_shader_change_notify(struct si_context *sctx);
 /* si_state.c */
 struct si_shader_selector;

+void si_update_fb_blend_state(struct si_context *sctx);
 boolean si_is_format_supported(struct pipe_screen *screen,
                               enum pipe_format format,
                               enum pipe_texture_target target,
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -713,6 +713,15 @@ static void *si_create_shader_state(struct pipe_context *ctx,
 			}
 		}
 		break;
+	case PIPE_SHADER_FRAGMENT:
+		for (i = 0; i < sel->info.num_outputs; i++) {
+			unsigned name = sel->info.output_semantic_name[i];
+			unsigned index = sel->info.output_semantic_index[i];
+
+			if (name == TGSI_SEMANTIC_COLOR)
+				sel->ps_colors_written |= 1 << index;
+		}
+		break;
 	}

 	if (sscreen->b.debug_flags & DBG_PRECOMPILE)
@@ -840,6 +849,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 	}

 	sctx->ps_shader = sel;
+	si_update_fb_blend_state(sctx);
 }

 static void si_delete_shader_selector(struct pipe_context *ctx,
--- a/src/gallium/drivers/vc4/vc4_qir.c
+++ b/src/gallium/drivers/vc4/vc4_qir.c
@@ -314,6 +314,7 @@ qir_get_temp(struct vc4_compile *c)

        reg.file = QFILE_TEMP;
        reg.index = c->num_temps++;
+        reg.pack = 0;

        if (c->num_temps > c->defs_array_size) {
                uint32_t old_size = c->defs_array_size;
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -684,6 +684,9 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
   enum amdgpu_bo_handle_type type;
   int r;

+   if ((void*)bo != (void*)buffer)
+      pb_cache_manager_remove_buffer(buffer);
+
   switch (whandle->type) {
   case DRM_API_HANDLE_TYPE_SHARED:
      type = amdgpu_bo_handle_type_gem_flink_name;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.c
@@ -1126,6 +1126,9 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,

    memset(&flink, 0, sizeof(flink));

+    if ((void*)bo != (void*)buffer)
+       pb_cache_manager_remove_buffer(buffer);
+
    if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
        if (!bo->flink_name) {
            flink.handle = bo->handle;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -97,22 +97,17 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
 {
    int i;

-    csc->buf = MALLOC(ws->ib_max_size);
-    if (!csc->buf)
-        return FALSE;
    csc->fd = ws->fd;
    csc->nrelocs = 512;
    csc->relocs_bo = (struct radeon_bo**)
                     CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
    if (!csc->relocs_bo) {
-        FREE(csc->buf);
        return FALSE;
    }

    csc->relocs = (struct drm_radeon_cs_reloc*)
                  CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
    if (!csc->relocs) {
-        FREE(csc->buf);
        FREE(csc->relocs_bo);
        return FALSE;
    }
@@ -165,7 +160,6 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
    radeon_cs_context_cleanup(csc);
    FREE(csc->relocs_bo);
    FREE(csc->relocs);
-    FREE(csc->buf);
 }


@@ -206,7 +200,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
    cs->cst = &cs->csc2;
    cs->base.buf = cs->csc->buf;
    cs->base.ring_type = ring_type;
-    cs->base.max_dw = ws->ib_max_size / 4;
+    cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);

    p_atomic_inc(&ws->num_cs);
    return &cs->base;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -30,7 +30,7 @@
 #include "radeon_drm_bo.h"

 struct radeon_cs_context {
-    uint32_t                    *buf;
+    uint32_t                    buf[16 * 1024];

    int                         fd;
    struct drm_radeon_cs        cs;
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -395,20 +395,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
        }

        ws->info.r600_virtual_address = FALSE;
-        ws->ib_max_size = 64 * 1024;
-
        if (ws->info.drm_minor >= 13) {
+            uint32_t ib_vm_max_size;
+
            ws->info.r600_virtual_address = TRUE;
            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
                                      &ws->va_start))
                ws->info.r600_virtual_address = FALSE;
-
-            if (radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
-                                     &ws->ib_max_size))
-                ws->ib_max_size *= 4; /* the kernel returns the size in dwords */
-            else
+            if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
+                                      &ib_vm_max_size))
                ws->info.r600_virtual_address = FALSE;
-
            radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
                                 &ws->va_unmap_working);
        }
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.h
@@ -73,7 +73,6 @@ struct radeon_drm_winsys {

    enum radeon_generation gen;
    struct radeon_info info;
-    uint32_t ib_max_size;
    uint32_t va_start;
    uint32_t va_unmap_working;
    uint32_t accel_working2;
--- a/src/glsl/linker.cpp
+++ b/src/glsl/linker.cpp
@@ -2339,6 +2339,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
    */
   unsigned used_locations = (max_index >= 32)
      ? ~0 : ~((1 << max_index) - 1);
+   unsigned double_storage_locations = 0;

   assert((target_index == MESA_SHADER_VERTEX)
 	  || (target_index == MESA_SHADER_FRAGMENT));
@@ -2452,34 +2453,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,

      const unsigned slots = var->type->count_attribute_slots();

-      /* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
-       *
-       * "A program with more than the value of MAX_VERTEX_ATTRIBS active
-       * attribute variables may fail to link, unless device-dependent
-       * optimizations are able to make the program fit within available
-       * hardware resources. For the purposes of this test, attribute variables
-       * of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
-       * and dmat4 may count as consuming twice as many attributes as equivalent
-       * single-precision types. While these types use the same number of
-       * generic attributes as their single-precision equivalents,
-       * implementations are permitted to consume two single-precision vectors
-       * of internal storage for each three- or four-component double-precision
-       * vector."
-       * Until someone has a good reason in Mesa, enforce that now.
-       */
-      if (target_index == MESA_SHADER_VERTEX) {
-	 total_attribs_size += slots;
-	 if (var->type->without_array() == glsl_type::dvec3_type ||
-	     var->type->without_array() == glsl_type::dvec4_type ||
-	     var->type->without_array() == glsl_type::dmat2x3_type ||
-	     var->type->without_array() == glsl_type::dmat2x4_type ||
-	     var->type->without_array() == glsl_type::dmat3_type ||
-	     var->type->without_array() == glsl_type::dmat3x4_type ||
-	     var->type->without_array() == glsl_type::dmat4x3_type ||
-	     var->type->without_array() == glsl_type::dmat4_type)
-	    total_attribs_size += slots;
-      }
-
      /* If the variable is not a built-in and has a location statically
       * assigned in the shader (presumably via a layout qualifier), make sure
       * that it doesn't collide with other assigned locations.  Otherwise,
@@ -2594,6 +2567,38 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
 	    }

 	    used_locations |= (use_mask << attr);
+
+            /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
+             *
+             * "A program with more than the value of MAX_VERTEX_ATTRIBS
+             *  active attribute variables may fail to link, unless
+             *  device-dependent optimizations are able to make the program
+             *  fit within available hardware resources. For the purposes
+             *  of this test, attribute variables of the type dvec3, dvec4,
+             *  dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
+             *  count as consuming twice as many attributes as equivalent
+             *  single-precision types. While these types use the same number
+             *  of generic attributes as their single-precision equivalents,
+             *  implementations are permitted to consume two single-precision
+             *  vectors of internal storage for each three- or four-component
+             *  double-precision vector."
+             *
+             * Mark this attribute slot as taking up twice as much space
+             * so we can count it properly against limits.  According to
+             * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
+             * is optional behavior, but it seems preferable.
+             */
+            const glsl_type *type = var->type->without_array();
+            if (type == glsl_type::dvec3_type ||
+                type == glsl_type::dvec4_type ||
+                type == glsl_type::dmat2x3_type ||
+                type == glsl_type::dmat2x4_type ||
+                type == glsl_type::dmat3_type ||
+                type == glsl_type::dmat3x4_type ||
+                type == glsl_type::dmat4x3_type ||
+                type == glsl_type::dmat4_type) {
+               double_storage_locations |= (use_mask << attr);
+            }
 	 }

 	 continue;
@@ -2605,6 +2610,9 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
   }

   if (target_index == MESA_SHADER_VERTEX) {
+      unsigned total_attribs_size =
+         _mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
+         _mesa_bitcount(double_storage_locations);
      if (total_attribs_size > max_index) {
 	 linker_error(prog,
 		      "attempt to use %d vertex attribute slots only %d available ",
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -793,21 +793,6 @@ brw_emit_vertices(struct brw_context *brw)
                    ((i * 4) << BRW_VE1_DST_OFFSET_SHIFT));
   }

-   if (brw->gen >= 6 && gen6_edgeflag_input) {
-      uint32_t format =
-         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
-
-      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
-                GEN6_VE0_VALID |
-                GEN6_VE0_EDGE_FLAG_ENABLE |
-                (format << BRW_VE0_FORMAT_SHIFT) |
-                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
-
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      uint32_t dw0 = 0, dw1 = 0;
      uint32_t comp0 = BRW_VE1_COMPONENT_STORE_0;
@@ -848,6 +833,21 @@ brw_emit_vertices(struct brw_context *brw)
      OUT_BATCH(dw1);
   }

+   if (brw->gen >= 6 && gen6_edgeflag_input) {
+      uint32_t format =
+         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+
+      OUT_BATCH((gen6_edgeflag_input->buffer << GEN6_VE0_INDEX_SHIFT) |
+                GEN6_VE0_VALID |
+                GEN6_VE0_EDGE_FLAG_ENABLE |
+                (format << BRW_VE0_FORMAT_SHIFT) |
+                (gen6_edgeflag_input->offset << BRW_VE0_SRC_OFFSET_SHIFT));
+      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+   }
+
   ADVANCE_BATCH();
 }

--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -3211,7 +3211,8 @@ fs_visitor::lower_integer_multiplication()
             * schedule multi-component multiplications much better.
             */

-            if (inst->conditional_mod && inst->dst.is_null()) {
+            fs_reg orig_dst = inst->dst;
+            if (orig_dst.is_null() || orig_dst.file == MRF) {
               inst->dst = fs_reg(GRF, alloc.allocate(dispatch_width / 8),
                                  inst->dst.type);
            }
@@ -3277,10 +3278,9 @@ fs_visitor::lower_integer_multiplication()

            ibld.ADD(dst, low, high);

-            if (inst->conditional_mod) {
-               fs_reg null(retype(ibld.null_reg_f(), inst->dst.type));
+            if (inst->conditional_mod || orig_dst.file == MRF) {
               set_condmod(inst->conditional_mod,
-                           ibld.MOV(null, inst->dst));
+                           ibld.MOV(orig_dst, inst->dst));
            }
         }

@@ -4801,12 +4801,12 @@ fs_visitor::optimize()
    */
   bld = fs_builder(this, 64);

-   split_virtual_grfs();
-
   move_uniform_array_access_to_pull_constants();
   assign_constant_locations();
   demote_pull_constants();

+   split_virtual_grfs();
+
 #define OPT(pass, args...) ({                                           \
      pass_num++;                                                       \
      bool this_progress = pass(args);                                  \
--- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp
@@ -279,6 +279,7 @@ static bool
 can_change_source_types(fs_inst *inst)
 {
   return !inst->src[0].abs && !inst->src[0].negate &&
+          inst->dst.type == inst->src[0].type &&
          (inst->opcode == BRW_OPCODE_MOV ||
           (inst->opcode == BRW_OPCODE_SEL &&
            inst->predicate != BRW_PREDICATE_NONE &&
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -878,7 +878,8 @@ brw_upload_invariant_state(struct brw_context *brw)
 {
   const bool is_965 = brw->gen == 4 && !brw->is_g4x;

-   brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw_emit_select_pipeline(brw, BRW_RENDER_PIPELINE);
+   brw->last_pipeline = BRW_RENDER_PIPELINE;

   if (brw->gen < 6) {
      /* Disable depth offset clamping. */
--- a/src/mesa/drivers/dri/i965/gen8_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/gen8_draw_upload.c
@@ -40,16 +40,25 @@ gen8_emit_vertices(struct brw_context *brw)
 {
   struct gl_context *ctx = &brw->ctx;
   uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
+   bool uses_edge_flag;

   brw_prepare_vertices(brw);
   brw_prepare_shader_draw_parameters(brw);

+   uses_edge_flag = (ctx->Polygon.FrontMode != GL_FILL ||
+                     ctx->Polygon.BackMode != GL_FILL);
+
   if (brw->vs.prog_data->uses_vertexid || brw->vs.prog_data->uses_instanceid) {
      unsigned vue = brw->vb.nr_enabled;

-      WARN_ONCE(brw->vs.prog_data->inputs_read & VERT_BIT_EDGEFLAG,
-                "Using VID/IID with edgeflags, need to reorder the "
-                "vertex attributes");
+      /* The element for the edge flags must always be last, so we have to
+       * insert the SGVS before it in that case.
+       */
+      if (uses_edge_flag) {
+         assert(vue > 0);
+         vue--;
+      }
+
      WARN_ONCE(vue >= 33,
                "Trying to insert VID/IID past 33rd vertex element, "
                "need to reorder the vertex attrbutes.");
@@ -138,7 +147,18 @@ gen8_emit_vertices(struct brw_context *brw)
      ADVANCE_BATCH();
   }

-   unsigned nr_elements = brw->vb.nr_enabled + brw->vs.prog_data->uses_vertexid;
+   /* Normally we don't need an element for the SGVS attribute because the
+    * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
+    * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if the
+    * vertex ID is used then it needs an element for the base vertex buffer.
+    * Additionally if there is an edge flag element then the SGVS can't be
+    * inserted past that so we need a dummy element to ensure that the edge
+    * flag is the last one.
+    */
+   bool needs_sgvs_element = (brw->vs.prog_data->uses_vertexid ||
+                              (brw->vs.prog_data->uses_instanceid &&
+                               uses_edge_flag));
+   unsigned nr_elements = brw->vb.nr_enabled + needs_sgvs_element;

   /* The hardware allows one more VERTEX_ELEMENTS than VERTEX_BUFFERS,
    * presumably for VertexID/InstanceID.
@@ -192,6 +212,24 @@ gen8_emit_vertices(struct brw_context *brw)
                (comp3 << BRW_VE1_COMPONENT_3_SHIFT));
   }

+   if (needs_sgvs_element) {
+      if (brw->vs.prog_data->uses_vertexid) {
+         OUT_BATCH(GEN6_VE0_VALID |
+                   brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
+                   BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      } else {
+         OUT_BATCH(GEN6_VE0_VALID);
+         OUT_BATCH((BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_0_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
+                   (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
+      }
+   }
+
   if (gen6_edgeflag_input) {
      uint32_t format =
         brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
@@ -206,25 +244,26 @@ gen8_emit_vertices(struct brw_context *brw)
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
   }
-
-   if (brw->vs.prog_data->uses_vertexid) {
-      OUT_BATCH(GEN6_VE0_VALID |
-                brw->vb.nr_buffers << GEN6_VE0_INDEX_SHIFT |
-                BRW_SURFACEFORMAT_R32_UINT << BRW_VE0_FORMAT_SHIFT);
-      OUT_BATCH((BRW_VE1_COMPONENT_STORE_SRC << BRW_VE1_COMPONENT_0_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_1_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_2_SHIFT) |
-                (BRW_VE1_COMPONENT_STORE_0 << BRW_VE1_COMPONENT_3_SHIFT));
-   }
   ADVANCE_BATCH();

-   for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
+   for (unsigned i = 0, j = 0; i < brw->vb.nr_enabled; i++) {
      const struct brw_vertex_element *input = brw->vb.enabled[i];
      const struct brw_vertex_buffer *buffer = &brw->vb.buffers[input->buffer];
+      unsigned element_index;
+
+      /* The edge flag element is reordered to be the last one in the code
+       * above so we need to compensate for that in the element indices used
+       * below.
+       */
+      if (input == gen6_edgeflag_input)
+         element_index = nr_elements - 1;
+      else
+         element_index = j++;

      BEGIN_BATCH(3);
      OUT_BATCH(_3DSTATE_VF_INSTANCING << 16 | (3 - 2));
-      OUT_BATCH(i | (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
+      OUT_BATCH(element_index |
+                (buffer->step_rate ? GEN8_VF_INSTANCING_ENABLE : 0));
      OUT_BATCH(buffer->step_rate);
      ADVANCE_BATCH();
   }
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -796,47 +796,43 @@ intel_emit_linear_blit(struct brw_context *brw,
   int16_t src_x, dst_x;
   bool ok;

-   /* The pitch given to the GPU must be DWORD aligned, and
-    * we want width to match pitch. Max width is (1 << 15 - 1),
-    * rounding that down to the nearest DWORD is 1 << 15 - 4
-    */
-   pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
-   height = (pitch == 0) ? 1 : size / pitch;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   ok = intelEmitCopyBlit(brw, 1,
-			  pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-			  pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
-                          INTEL_MIPTREE_TRMODE_NONE,
-			  src_x, 0, /* src x/y */
-			  dst_x, 0, /* dst x/y */
-			  pitch, height, /* w, h */
-			  GL_COPY);
-   if (!ok)
-      _mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
+   do {
+      /* The pitch given to the GPU must be DWORD aligned, and
+       * we want width to match pitch. Max width is (1 << 15 - 1),
+       * rounding that down to the nearest DWORD is 1 << 15 - 4
+       */
+      pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 64), 4);
+      height = (size < pitch || pitch == 0) ? 1 : size / pitch;

-   src_offset += pitch * height;
-   dst_offset += pitch * height;
-   src_x = src_offset % 64;
-   dst_x = dst_offset % 64;
-   size -= pitch * height;
-   assert (size < (1 << 15));
-   pitch = ALIGN(size, 4);
+      src_x = src_offset % 64;
+      dst_x = dst_offset % 64;
+      pitch = ALIGN(MIN2(size, (1 << 15) - 64), 4);
+      assert(src_x + pitch < 1 << 15);
+      assert(dst_x + pitch < 1 << 15);

-   if (size != 0) {
      ok = intelEmitCopyBlit(brw, 1,
-			     pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
+                             pitch, src_bo, src_offset - src_x, I915_TILING_NONE,
                             INTEL_MIPTREE_TRMODE_NONE,
-			     pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
+                             pitch, dst_bo, dst_offset - dst_x, I915_TILING_NONE,
                             INTEL_MIPTREE_TRMODE_NONE,
-			     src_x, 0, /* src x/y */
-			     dst_x, 0, /* dst x/y */
-			     size, 1, /* w, h */
-			     GL_COPY);
-      if (!ok)
-         _mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
-   }
+                             src_x, 0, /* src x/y */
+                             dst_x, 0, /* dst x/y */
+                             MIN2(size, pitch), height, /* w, h */
+                             GL_COPY);
+      if (!ok) {
+         _mesa_problem(ctx, "Failed to linear blit %dx%d\n",
+                       MIN2(size, pitch), height);
+         return;
+      }
+
+      pitch *= height;
+      if (size <= pitch)
+         return;
+
+      src_offset += pitch;
+      dst_offset += pitch;
+      size -= pitch;
+   } while (1);
 }

 /**
--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -109,6 +109,10 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
       pack->Invert)
      return false;

+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
   /* This renderbuffer can come from a texture.  In this case, we impose
    * some of the same restrictions we have for textures and adjust for
    * miplevels.
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -118,6 +118,10 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
       packing->Invert)
      return false;

+   /* Only a simple blit, no scale, bias or other mapping. */
+   if (ctx->_ImageTransferState)
+      return false;
+
   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
                         INTEL_UPLOAD))
      return false;
--- a/src/mesa/main/formats.c
+++ b/src/mesa/main/formats.c
@@ -1013,6 +1013,10 @@ _mesa_format_to_type_and_comps(mesa_format format,
   case MESA_FORMAT_R8G8B8X8_UNORM:
   case MESA_FORMAT_B8G8R8X8_UNORM:
   case MESA_FORMAT_X8R8G8B8_UNORM:
+   case MESA_FORMAT_A8B8G8R8_UINT:
+   case MESA_FORMAT_R8G8B8A8_UINT:
+   case MESA_FORMAT_B8G8R8A8_UINT:
+   case MESA_FORMAT_A8R8G8B8_UINT:
      *datatype = GL_UNSIGNED_BYTE;
      *comps = 4;
      return;
@@ -1023,6 +1027,8 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;
   case MESA_FORMAT_B5G6R5_UNORM:
   case MESA_FORMAT_R5G6B5_UNORM:
+   case MESA_FORMAT_B5G6R5_UINT:
+   case MESA_FORMAT_R5G6B5_UINT:
      *datatype = GL_UNSIGNED_SHORT_5_6_5;
      *comps = 3;
      return;
@@ -1030,6 +1036,8 @@ _mesa_format_to_type_and_comps(mesa_format format,
   case MESA_FORMAT_B4G4R4A4_UNORM:
   case MESA_FORMAT_A4R4G4B4_UNORM:
   case MESA_FORMAT_B4G4R4X4_UNORM:
+   case MESA_FORMAT_B4G4R4A4_UINT:
+   case MESA_FORMAT_A4R4G4B4_UINT:
      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
      *comps = 4;
      return;
@@ -1037,6 +1045,8 @@ _mesa_format_to_type_and_comps(mesa_format format,
   case MESA_FORMAT_B5G5R5A1_UNORM:
   case MESA_FORMAT_A1R5G5B5_UNORM:
   case MESA_FORMAT_B5G5R5X1_UNORM:
+   case MESA_FORMAT_B5G5R5A1_UINT:
+   case MESA_FORMAT_A1R5G5B5_UINT:
      *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
      *comps = 4;
      return;
@@ -1047,6 +1057,7 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;

   case MESA_FORMAT_A1B5G5R5_UNORM:
+   case MESA_FORMAT_A1B5G5R5_UINT:
      *datatype = GL_UNSIGNED_SHORT_5_5_5_1;
      *comps = 4;
      return;
@@ -1081,19 +1092,23 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;

   case MESA_FORMAT_R3G3B2_UNORM:
+   case MESA_FORMAT_R3G3B2_UINT:
      *datatype = GL_UNSIGNED_BYTE_2_3_3_REV;
      *comps = 3;
      return;
   case MESA_FORMAT_A4B4G4R4_UNORM:
+   case MESA_FORMAT_A4B4G4R4_UINT:
      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
      *comps = 4;
      return;

   case MESA_FORMAT_R4G4B4A4_UNORM:
+   case MESA_FORMAT_R4G4B4A4_UINT:
      *datatype = GL_UNSIGNED_SHORT_4_4_4_4;
      *comps = 4;
      return;
   case MESA_FORMAT_R5G5B5A1_UNORM:
+   case MESA_FORMAT_R5G5B5A1_UINT:
      *datatype = GL_UNSIGNED_SHORT_1_5_5_5_REV;
      *comps = 4;
      return;
@@ -1109,6 +1124,7 @@ _mesa_format_to_type_and_comps(mesa_format format,
      return;

   case MESA_FORMAT_B2G3R3_UNORM:
+   case MESA_FORMAT_B2G3R3_UINT:
      *datatype = GL_UNSIGNED_BYTE_3_3_2;
      *comps = 3;
      return;
@@ -2138,6 +2154,96 @@ _mesa_format_matches_format_and_type(mesa_format mesa_format,
              type == GL_UNSIGNED_INT_2_10_10_10_REV &&
              !swapBytes);

+   case MESA_FORMAT_B5G6R5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5;
+
+   case MESA_FORMAT_R5G6B5_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_SHORT_5_6_5_REV;
+
+   case MESA_FORMAT_B2G3R3_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_3_3_2;
+
+   case MESA_FORMAT_R3G3B2_UINT:
+      return format == GL_RGB_INTEGER && type == GL_UNSIGNED_BYTE_2_3_3_REV;
+
+   case MESA_FORMAT_A4B4G4R4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_R4G4B4A4_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B4G4R4A4_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_4_4_4_4_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A4R4G4B4_UINT:
+      return GL_FALSE;
+
+   case MESA_FORMAT_A1B5G5R5_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_B5G5R5A1_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV &&
+         !swapBytes;
+
+   case MESA_FORMAT_A1R5G5B5_UINT:
+      return format == GL_BGRA_INTEGER && type == GL_UNSIGNED_SHORT_5_5_5_1 &&
+         !swapBytes;
+
+   case MESA_FORMAT_R5G5B5A1_UINT:
+      return format == GL_RGBA_INTEGER && type == GL_UNSIGNED_SHORT_1_5_5_5_REV;
+
+   case MESA_FORMAT_A8B8G8R8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV && swapBytes)
+         return GL_TRUE;
+      return GL_FALSE;
+
+   case MESA_FORMAT_A8R8G8B8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_R8G8B8A8_UINT:
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_RGBA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
+   case MESA_FORMAT_B8G8R8A8_UINT:
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8_REV &&
+          !swapBytes)
+         return GL_TRUE;
+
+      if (format == GL_BGRA_INTEGER && type == GL_UNSIGNED_INT_8_8_8_8 && swapBytes)
+         return GL_TRUE;
+
+      return GL_FALSE;
+
   case MESA_FORMAT_R9G9B9E5_FLOAT:
      return format == GL_RGB && type == GL_UNSIGNED_INT_5_9_9_9_REV &&
         !swapBytes;
--- a/src/mesa/main/formats.csv
+++ b/src/mesa/main/formats.csv
@@ -186,10 +186,26 @@ MESA_FORMAT_RGBX_FLOAT32                  , array , 1, 1, f32 , f32 , f32 , x32
 MESA_FORMAT_Z_FLOAT32                     , array , 1, 1, f32 ,     ,     ,     , x___, zs

 # Packed signed/unsigned non-normalized integer formats
+MESA_FORMAT_A8B8G8R8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , wzyx, rgb
+MESA_FORMAT_A8R8G8B8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , yzwx, rgb
+MESA_FORMAT_R8G8B8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , xyzw, rgb
+MESA_FORMAT_B8G8R8A8_UINT                 , packed, 1, 1, u8  , u8  , u8  , u8  , zyxw, rgb
 MESA_FORMAT_B10G10R10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , zyxw, rgb
 MESA_FORMAT_R10G10B10A2_UINT              , packed, 1, 1, u10 , u10 , u10 , u2  , xyzw, rgb
 MESA_FORMAT_A2B10G10R10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , wzyx, rgb
 MESA_FORMAT_A2R10G10B10_UINT              , packed, 1, 1, u2  , u10 , u10 , u10 , yzwx, rgb
+MESA_FORMAT_B5G6R5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , zyx1, rgb
+MESA_FORMAT_R5G6B5_UINT                   , packed, 1, 1, u5  , u6  ,  u5 ,     , xyz1, rgb
+MESA_FORMAT_B2G3R3_UINT                   , packed, 1, 1, u2  , u3  ,  u3 ,     , zyx1, rgb
+MESA_FORMAT_R3G3B2_UINT                   , packed, 1, 1, u3  , u3  ,  u2 ,     , xyz1, rgb
+MESA_FORMAT_A4B4G4R4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , wzyx, rgb
+MESA_FORMAT_R4G4B4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , xyzw, rgb
+MESA_FORMAT_B4G4R4A4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , zyxw, rgb
+MESA_FORMAT_A4R4G4B4_UINT                 , packed, 1, 1, u4  , u4  ,  u4 , u4  , yzwx, rgb
+MESA_FORMAT_A1B5G5R5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , wzyx, rgb
+MESA_FORMAT_B5G5R5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , zyxw, rgb
+MESA_FORMAT_A1R5G5B5_UINT                 , packed, 1, 1, u1  , u5  ,  u5 , u5  , yzwx, rgb
+MESA_FORMAT_R5G5B5A1_UINT                 , packed, 1, 1, u5  , u5  ,  u5 , u1  , xyzw, rgb

 # Array signed/unsigned non-normalized integer formats
 MESA_FORMAT_A_UINT8                       , array , 1, 1, u8  ,     ,     ,     , 000x, rgb
--- a/src/mesa/main/formats.h
+++ b/src/mesa/main/formats.h
@@ -470,10 +470,27 @@ typedef enum
   MESA_FORMAT_Z_FLOAT32,

   /* Packed signed/unsigned non-normalized integer formats */
+
+   MESA_FORMAT_A8B8G8R8_UINT,    /* RRRR RRRR GGGG GGGG BBBB BBBB AAAA AAAA */
+   MESA_FORMAT_A8R8G8B8_UINT,    /* BBBB BBBB GGGG GGGG RRRR RRRR AAAA AAAA */
+   MESA_FORMAT_R8G8B8A8_UINT,    /* AAAA AAAA BBBB BBBB GGGG GGGG RRRR RRRR */
+   MESA_FORMAT_B8G8R8A8_UINT,    /* AAAA AAAA RRRR RRRR GGGG GGGG BBBB BBBB */
   MESA_FORMAT_B10G10R10A2_UINT, /* AARR RRRR RRRR GGGG GGGG GGBB BBBB BBBB */
   MESA_FORMAT_R10G10B10A2_UINT, /* AABB BBBB BBBB GGGG GGGG GGRR RRRR RRRR */
   MESA_FORMAT_A2B10G10R10_UINT, /* RRRR RRRR RRGG GGGG GGGG BBBB BBBB BBAA */
   MESA_FORMAT_A2R10G10B10_UINT, /* BBBB BBBB BBGG GGGG GGGG RRRR RRRR RRAA */
+   MESA_FORMAT_B5G6R5_UINT,                          /* RRRR RGGG GGGB BBBB */
+   MESA_FORMAT_R5G6B5_UINT,                          /* BBBB BGGG GGGR RRRR */
+   MESA_FORMAT_B2G3R3_UINT,                                    /* RRRG GGBB */
+   MESA_FORMAT_R3G3B2_UINT,                                    /* BBGG GRRR */
+   MESA_FORMAT_A4B4G4R4_UINT,                        /* RRRR GGGG BBBB AAAA */
+   MESA_FORMAT_R4G4B4A4_UINT,                        /* AAAA BBBB GGGG RRRR */
+   MESA_FORMAT_B4G4R4A4_UINT,                        /* AAAA RRRR GGGG BBBB */
+   MESA_FORMAT_A4R4G4B4_UINT,                        /* BBBB GGGG RRRR AAAA */
+   MESA_FORMAT_A1B5G5R5_UINT,                        /* RRRR RGGG GGBB BBBA */
+   MESA_FORMAT_B5G5R5A1_UINT,                        /* ARRR RRGG GGGB BBBB */
+   MESA_FORMAT_A1R5G5B5_UINT,                        /* BBBB BGGG GGRR RRRA */
+   MESA_FORMAT_R5G5B5A1_UINT,                        /* ABBB BBGG GGGR RRRR */

   /* Array signed/unsigned non-normalized integer formats */
   MESA_FORMAT_A_UINT8,
--- a/src/mesa/main/get.c
+++ b/src/mesa/main/get.c
@@ -35,6 +35,7 @@
 #include "mtypes.h"
 #include "state.h"
 #include "texcompress.h"
+#include "texstate.h"
 #include "framebuffer.h"
 #include "samplerobj.h"
 #include "stencil.h"
@@ -1785,6 +1786,52 @@ _mesa_GetDoublev(GLenum pname, GLdouble *params)
   }
 }

+/**
+ * Convert a GL texture binding enum such as GL_TEXTURE_BINDING_2D
+ * into the corresponding Mesa texture target index.
+ * \return TEXTURE_x_INDEX or -1 if binding is invalid
+ */
+static int
+tex_binding_to_index(const struct gl_context *ctx, GLenum binding)
+{
+   switch (binding) {
+   case GL_TEXTURE_BINDING_1D:
+      return _mesa_is_desktop_gl(ctx) ? TEXTURE_1D_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D:
+      return TEXTURE_2D_INDEX;
+   case GL_TEXTURE_BINDING_3D:
+      return ctx->API != API_OPENGLES ? TEXTURE_3D_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+      return ctx->Extensions.ARB_texture_cube_map
+         ? TEXTURE_CUBE_INDEX : -1;
+   case GL_TEXTURE_BINDING_RECTANGLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.NV_texture_rectangle
+         ? TEXTURE_RECT_INDEX : -1;
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array
+         ? TEXTURE_1D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+      return (_mesa_is_desktop_gl(ctx) && ctx->Extensions.EXT_texture_array)
+         || _mesa_is_gles3(ctx)
+         ? TEXTURE_2D_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_BUFFER:
+      return ctx->API == API_OPENGL_CORE &&
+             ctx->Extensions.ARB_texture_buffer_object ?
+             TEXTURE_BUFFER_INDEX : -1;
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_cube_map_array
+         ? TEXTURE_CUBE_ARRAY_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_INDEX : -1;
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+      return _mesa_is_desktop_gl(ctx) && ctx->Extensions.ARB_texture_multisample
+         ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : -1;
+   default:
+      return -1;
+   }
+}
+
 static enum value_type
 find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
 {
@@ -2048,6 +2095,45 @@ find_value_indexed(const char *func, GLenum pname, GLuint index, union value *v)
      v->value_int = ctx->ImageUnits[index].Format;
      return TYPE_INT;

+   /* ARB_direct_state_access */
+   case GL_TEXTURE_BINDING_1D:
+   case GL_TEXTURE_BINDING_1D_ARRAY:
+   case GL_TEXTURE_BINDING_2D:
+   case GL_TEXTURE_BINDING_2D_ARRAY:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE:
+   case GL_TEXTURE_BINDING_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_BINDING_3D:
+   case GL_TEXTURE_BINDING_BUFFER:
+   case GL_TEXTURE_BINDING_CUBE_MAP:
+   case GL_TEXTURE_BINDING_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_BINDING_RECTANGLE: {
+      int target;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      target = tex_binding_to_index(ctx, pname);
+      if (target < 0)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      v->value_int = ctx->Texture.Unit[index].CurrentTex[target]->Name;
+      return TYPE_INT;
+   }
+
+   case GL_SAMPLER_BINDING: {
+      struct gl_sampler_object *samp;
+
+      if (ctx->API != API_OPENGL_CORE)
+         goto invalid_enum;
+      if (index >= _mesa_max_tex_unit(ctx))
+         goto invalid_value;
+
+      samp = ctx->Texture.Unit[index].Sampler;
+      v->value_int = samp ? samp->Name : 0;
+      return TYPE_INT;
+   }
+
   case GL_MAX_COMPUTE_WORK_GROUP_COUNT:
      if (!_mesa_is_desktop_gl(ctx) || !ctx->Extensions.ARB_compute_shader)
         goto invalid_enum;
--- a/src/mesa/main/glformats.c
+++ b/src/mesa/main/glformats.c
@@ -2718,12 +2718,16 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
         return MESA_FORMAT_B5G6R5_UNORM;
      else if (format == GL_BGR)
         return MESA_FORMAT_R5G6B5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B5G6R5_UINT;
      break;
   case GL_UNSIGNED_SHORT_5_6_5_REV:
      if (format == GL_RGB)
         return MESA_FORMAT_R5G6B5_UNORM;
      else if (format == GL_BGR)
         return MESA_FORMAT_B5G6R5_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R5G6B5_UINT;
      break;
   case GL_UNSIGNED_SHORT_4_4_4_4:
      if (format == GL_RGBA)
@@ -2732,6 +2736,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
         return MESA_FORMAT_A4R4G4B4_UNORM;
      else if (format == GL_ABGR_EXT)
         return MESA_FORMAT_R4G4B4A4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A4B4G4R4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A4R4G4B4_UINT;
      break;
   case GL_UNSIGNED_SHORT_4_4_4_4_REV:
      if (format == GL_RGBA)
@@ -2740,26 +2748,42 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
         return MESA_FORMAT_B4G4R4A4_UNORM;
      else if (format == GL_ABGR_EXT)
         return MESA_FORMAT_A4B4G4R4_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R4G4B4A4_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B4G4R4A4_UINT;
      break;
   case GL_UNSIGNED_SHORT_5_5_5_1:
      if (format == GL_RGBA)
         return MESA_FORMAT_A1B5G5R5_UNORM;
      else if (format == GL_BGRA)
         return MESA_FORMAT_A1R5G5B5_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A1B5G5R5_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A1R5G5B5_UINT;
      break;
   case GL_UNSIGNED_SHORT_1_5_5_5_REV:
      if (format == GL_RGBA)
         return MESA_FORMAT_R5G5B5A1_UNORM;
      else if (format == GL_BGRA)
         return MESA_FORMAT_B5G5R5A1_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R5G5B5A1_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B5G5R5A1_UINT;
      break;
   case GL_UNSIGNED_BYTE_3_3_2:
      if (format == GL_RGB)
         return MESA_FORMAT_B2G3R3_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_B2G3R3_UINT;
      break;
   case GL_UNSIGNED_BYTE_2_3_3_REV:
      if (format == GL_RGB)
         return MESA_FORMAT_R3G3B2_UNORM;
+      else if (format == GL_RGB_INTEGER)
+         return MESA_FORMAT_R3G3B2_UINT;
      break;
   case GL_UNSIGNED_INT_5_9_9_9_REV:
      if (format == GL_RGB)
@@ -2794,6 +2818,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
         return MESA_FORMAT_A8R8G8B8_UNORM;
      else if (format == GL_ABGR_EXT)
         return MESA_FORMAT_R8G8B8A8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_A8B8G8R8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_A8R8G8B8_UINT;
      break;
   case GL_UNSIGNED_INT_8_8_8_8_REV:
      if (format == GL_RGBA)
@@ -2802,6 +2830,10 @@ _mesa_format_from_format_and_type(GLenum format, GLenum type)
         return MESA_FORMAT_B8G8R8A8_UNORM;
      else if (format == GL_ABGR_EXT)
         return MESA_FORMAT_A8B8G8R8_UNORM;
+      else if (format == GL_RGBA_INTEGER)
+         return MESA_FORMAT_R8G8B8A8_UINT;
+      else if (format == GL_BGRA_INTEGER)
+         return MESA_FORMAT_B8G8R8A8_UINT;
      break;
   case GL_UNSIGNED_SHORT_8_8_MESA:
      if (format == GL_YCBCR_MESA)
--- a/src/mesa/main/image.c
+++ b/src/mesa/main/image.c
@@ -49,8 +49,8 @@
 * \param src the array with the source data we want to byte-swap.
 * \param n number of words.
 */
-void
-_mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
+static void
+swap2_copy( GLushort *dst, GLushort *src, GLuint n )
 {
   GLuint i;
   for (i = 0; i < n; i++) {
@@ -58,7 +58,11 @@ _mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
   }
 }

-
+void
+_mesa_swap2(GLushort *p, GLuint n)
+{
+   swap2_copy(p, p, n);
+}

 /*
 * Flip the order of the 4 bytes in each word in the given array (src) and
@@ -69,8 +73,8 @@ _mesa_swap2_copy( GLushort *dst, GLushort *src, GLuint n )
 * \param src the array with the source data we want to byte-swap.
 * \param n number of words.
 */
-void
-_mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
+static void
+swap4_copy( GLuint *dst, GLuint *src, GLuint n )
 {
   GLuint i, a, b;
   for (i = 0; i < n; i++) {
@@ -83,6 +87,11 @@ _mesa_swap4_copy( GLuint *dst, GLuint *src, GLuint n )
   }
 }

+void
+_mesa_swap4(GLuint *p, GLuint n)
+{
+   swap4_copy(p, p, n);
+}

 /**
 * Return the byte offset of a specific pixel in an image (1D, 2D or 3D).
@@ -958,3 +967,42 @@ _mesa_clip_blit(struct gl_context *ctx,

   return GL_TRUE;
 }
+
+/**
+ * Swap the bytes in a 2D image.
+ *
+ * using the packing information this swaps the bytes
+ * according to the format and type of data being input.
+ * It takes into a/c various packing parameters like
+ * Alignment and RowLength.
+ */
+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src)
+{
+   GLint swapSize = _mesa_sizeof_packed_type(type);
+
+   assert(packing->SwapBytes);
+
+   if (swapSize == 2 || swapSize == 4) {
+      int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
+      int stride = _mesa_image_row_stride(packing, width, format, type);
+      int row;
+      uint8_t *dstrow;
+      const uint8_t *srcrow;
+      assert(swapsPerPixel > 0);
+      assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
+      dstrow = dst;
+      srcrow = src;
+      for (row = 0; row < height; row++) {
+         if (swapSize == 2)
+            swap2_copy((GLushort *)dstrow, (GLushort *)srcrow, width * swapsPerPixel);
+         else if (swapSize == 4)
+            swap4_copy((GLuint *)dstrow, (GLuint *)srcrow, width * swapsPerPixel);
+         dstrow += stride;
+         srcrow += stride;
+      }
+   }
+}
--- a/src/mesa/main/image.h
+++ b/src/mesa/main/image.h
@@ -35,22 +35,11 @@ struct gl_pixelstore_attrib;
 struct gl_framebuffer;

 extern void
-_mesa_swap2_copy(GLushort *dst, GLushort *src, GLuint n);
+_mesa_swap2(GLushort *p, GLuint n);

 extern void
-_mesa_swap4_copy(GLuint *dst, GLuint *src, GLuint n);
+_mesa_swap4(GLuint *p, GLuint n);

-static inline void
-_mesa_swap2(GLushort *p, GLuint n)
-{
-   _mesa_swap2_copy(p, p, n);
-}
-
-static inline void
-_mesa_swap4(GLuint *p, GLuint n)
-{
-   _mesa_swap4_copy(p, p, n);
-}

 extern GLintptr
 _mesa_image_offset( GLuint dimensions,
@@ -146,5 +135,10 @@ _mesa_clip_blit(struct gl_context *ctx,
                GLint *srcX0, GLint *srcY0, GLint *srcX1, GLint *srcY1,
                GLint *dstX0, GLint *dstY0, GLint *dstX1, GLint *dstY1);

+void
+_mesa_swap_bytes_2d_image(GLenum format, GLenum type,
+                          const struct gl_pixelstore_attrib *packing,
+                          GLsizei width, GLsizei height,
+                          GLvoid *dst, const GLvoid *src);

 #endif
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -523,7 +523,8 @@ read_rgba_pixels( struct gl_context *ctx,
       * convert to, then we can convert directly into the dst buffer and avoid
       * the final conversion/copy from the rgba buffer to the dst buffer.
       */
-      if (dst_format == rgba_format) {
+      if (dst_format == rgba_format &&
+          dst_stride == rgba_stride) {
         need_convert = false;
         rgba = dst;
      } else {
@@ -613,15 +614,8 @@ read_rgba_pixels( struct gl_context *ctx,
 done_swap:
   /* Handle byte swapping if required */
   if (packing->SwapBytes) {
-      GLint swapSize = _mesa_sizeof_packed_type(type);
-      if (swapSize == 2 || swapSize == 4) {
-         int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-         assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-         if (swapSize == 2)
-            _mesa_swap2((GLushort *) dst, width * height * swapsPerPixel);
-         else if (swapSize == 4)
-            _mesa_swap4((GLuint *) dst, width * height * swapsPerPixel);
-      }
+      _mesa_swap_bytes_2d_image(format, type, packing,
+                                width, height, dst, dst);
   }

 done_unmap:
--- a/src/mesa/main/texcompress_fxt1.c
+++ b/src/mesa/main/texcompress_fxt1.c
@@ -65,7 +65,7 @@ _mesa_texstore_rgb_fxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGB ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGB/GLubyte */
      GLubyte *tempImageSlices[1];
--- a/src/mesa/main/texcompress_s3tc.c
+++ b/src/mesa/main/texcompress_s3tc.c
@@ -130,7 +130,7 @@ _mesa_texstore_rgb_dxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGB ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGB/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -187,7 +187,7 @@ _mesa_texstore_rgba_dxt1(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -244,7 +244,7 @@ _mesa_texstore_rgba_dxt3(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
@@ -300,7 +300,7 @@ _mesa_texstore_rgba_dxt5(TEXSTORE_PARAMS)
   if (srcFormat != GL_RGBA ||
       srcType != GL_UNSIGNED_BYTE ||
       ctx->_ImageTransferState ||
-       srcPacking->RowLength != srcWidth ||
+       ALIGN(srcPacking->RowLength, srcPacking->Alignment) != srcWidth ||
       srcPacking->SwapBytes) {
      /* convert image to RGBA/GLubyte */
      GLubyte *tempImageSlices[1];
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -361,6 +361,13 @@ get_tex_rgba_compressed(struct gl_context *ctx, GLuint dimensions,
                           tempSlice, RGBA32_FLOAT, srcStride,
                           width, height,
                           needsRebase ? rebaseSwizzle : NULL);
+
+      /* Handle byte swapping if required */
+      if (ctx->Pack.SwapBytes) {
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);
+      }
+
      tempSlice += 4 * width * height;
   }

@@ -557,17 +564,9 @@ get_tex_rgba_uncompressed(struct gl_context *ctx, GLuint dimensions,

   do_swap:
      /* Handle byte swapping if required */
-      if (ctx->Pack.SwapBytes) {
-         GLint swapSize = _mesa_sizeof_packed_type(type);
-         if (swapSize == 2 || swapSize == 4) {
-            int swapsPerPixel = _mesa_bytes_per_pixel(format, type) / swapSize;
-            assert(_mesa_bytes_per_pixel(format, type) % swapSize == 0);
-            if (swapSize == 2)
-               _mesa_swap2((GLushort *) dest, width * height * swapsPerPixel);
-            else if (swapSize == 4)
-               _mesa_swap4((GLuint *) dest, width * height * swapsPerPixel);
-         }
-      }
+      if (ctx->Pack.SwapBytes)
+         _mesa_swap_bytes_2d_image(format, type, &ctx->Pack,
+                                   width, height, dest, dest);

      /* Unmap the src texture buffer */
      ctx->Driver.UnmapTextureImage(ctx, texImage, zoffset + img);
@@ -1213,6 +1212,13 @@ getteximage_error_check(struct gl_context *ctx,
                  "%s(format=GL_STENCIL_INDEX)", caller);
      return true;
   }
+   else if (_mesa_is_stencil_format(format)
+	    && !_mesa_is_depthstencil_format(baseFormat)
+	    && !_mesa_is_stencil_format(baseFormat)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(format mismatch)", caller);
+      return true;
+   }
   else if (_mesa_is_ycbcr_format(format)
            && !_mesa_is_ycbcr_format(baseFormat)) {
      _mesa_error(ctx, GL_INVALID_OPERATION,
--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -3711,12 +3711,12 @@ texturesubimage(struct gl_context *ctx, GLuint dims,
      rowStride = _mesa_image_image_stride(&ctx->Unpack, width, height,
                                           format, type);
      /* Copy in each face. */
-      for (i = 0; i < 6; ++i) {
+      for (i = zoffset; i < zoffset + depth; ++i) {
         texImage = texObj->Image[i][level];
         assert(texImage);

         _mesa_texture_sub_image(ctx, 3, texObj, texImage, texObj->Target,
-                                 level, xoffset, yoffset, zoffset,
+                                 level, xoffset, yoffset, 0,
                                 width, height, 1, format,
                                 type, pixels, true);
         pixels = (GLubyte *) pixels + rowStride;
@@ -5569,10 +5569,13 @@ static GLboolean
 is_renderable_texture_format(struct gl_context *ctx, GLenum internalformat)
 {
   /* Everything that is allowed for renderbuffers,
-    * except for a base format of GL_STENCIL_INDEX.
+    * except for a base format of GL_STENCIL_INDEX, unless supported.
    */
   GLenum baseFormat = _mesa_base_fbo_format(ctx, internalformat);
-   return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
+   if (ctx->Extensions.ARB_texture_stencil8)
+      return baseFormat != 0;
+   else
+      return baseFormat != 0 && baseFormat != GL_STENCIL_INDEX;
 }


--- a/src/mesa/main/texparam.c
+++ b/src/mesa/main/texparam.c
@@ -1914,6 +1914,12 @@ get_tex_parameterfv(struct gl_context *ctx,
         *params = (GLfloat) obj->ImageFormatCompatibilityType;
         break;

+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = ENUM_TO_FLOAT(obj->Target);
+         break;
+
      default:
         goto invalid_pname;
   }
@@ -2139,6 +2145,12 @@ get_tex_parameteriv(struct gl_context *ctx,
         *params = obj->ImageFormatCompatibilityType;
         break;

+      case GL_TEXTURE_TARGET:
+         if (ctx->API != API_OPENGL_CORE)
+            goto invalid_pname;
+         *params = (GLint) obj->Target;
+         break;
+
      default:
         goto invalid_pname;
   }
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -727,19 +727,25 @@ texstore_rgba(TEXSTORE_PARAMS)
       */
      GLint swapSize = _mesa_sizeof_packed_type(srcType);
      if (swapSize == 2 || swapSize == 4) {
-         int bytesPerPixel = _mesa_bytes_per_pixel(srcFormat, srcType);
-         int swapsPerPixel = bytesPerPixel / swapSize;
-         int elementCount = srcWidth * srcHeight * srcDepth;
-         assert(bytesPerPixel % swapSize == 0);
-         tempImage = malloc(elementCount * bytesPerPixel);
+         int imageStride = _mesa_image_image_stride(srcPacking, srcWidth, srcHeight, srcFormat, srcType);
+         int bufferSize = imageStride * srcDepth;
+         int layer;
+         const uint8_t *src;
+         uint8_t *dst;
+
+         tempImage = malloc(bufferSize);
         if (!tempImage)
            return GL_FALSE;
-         if (swapSize == 2)
-            _mesa_swap2_copy(tempImage, (GLushort *) srcAddr,
-                             elementCount * swapsPerPixel);
-         else
-            _mesa_swap4_copy(tempImage, (GLuint *) srcAddr,
-                             elementCount * swapsPerPixel);
+         src = srcAddr;
+         dst = tempImage;
+         for (layer = 0; layer < srcDepth; layer++) {
+            _mesa_swap_bytes_2d_image(srcFormat, srcType,
+                                      srcPacking,
+                                      srcWidth, srcHeight,
+                                      dst, src);
+            src += imageStride;
+            dst += imageStride;
+         }
         srcAddr = tempImage;
      }
   }
@@ -1004,6 +1010,7 @@ store_texsubimage(struct gl_context *ctx,
   /* compute slice info (and do some sanity checks) */
   switch (target) {
   case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_MULTISAMPLE:
   case GL_TEXTURE_RECTANGLE:
   case GL_TEXTURE_CUBE_MAP:
   case GL_TEXTURE_EXTERNAL_OES:
@@ -1025,6 +1032,7 @@ store_texsubimage(struct gl_context *ctx,
      srcImageStride = _mesa_image_row_stride(packing, width, format, type);
      break;
   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
      numSlices = depth;
      sliceOffset = zoffset;
      depth = 1;
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -319,24 +319,31 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,

      return;
   }
+   if ((uni->type->base_type == GLSL_TYPE_DOUBLE &&
+        returnType != GLSL_TYPE_DOUBLE) ||
+       (uni->type->base_type != GLSL_TYPE_DOUBLE &&
+        returnType == GLSL_TYPE_DOUBLE)) {
+	 _mesa_error( ctx, GL_INVALID_OPERATION,
+	             "glGetnUniform*vARB(incompatible uniform types)");
+	return;
+   }

   {
      unsigned elements = (uni->type->is_sampler())
 	 ? 1 : uni->type->components();
+      const int dmul = uni->type->base_type == GLSL_TYPE_DOUBLE ? 2 : 1;

      /* Calculate the source base address *BEFORE* modifying elements to
       * account for the size of the user's buffer.
       */
      const union gl_constant_value *const src =
-	 &uni->storage[offset * elements];
+	 &uni->storage[offset * elements * dmul];

      assert(returnType == GLSL_TYPE_FLOAT || returnType == GLSL_TYPE_INT ||
-             returnType == GLSL_TYPE_UINT);
-      /* The three (currently) supported types all have the same size,
-       * which is of course the same as their union. That'll change
-       * with glGetUniformdv()...
-       */
-      unsigned bytes = sizeof(src[0]) * elements;
+             returnType == GLSL_TYPE_UINT || returnType == GLSL_TYPE_DOUBLE);
+
+      /* doubles have a different size than the other 3 types */
+      unsigned bytes = sizeof(src[0]) * elements * dmul;
      if (bufSize < 0 || bytes > (unsigned) bufSize) {
 	 _mesa_error( ctx, GL_INVALID_OPERATION,
 	             "glGetnUniform*vARB(out of bounds: bufSize is %d,"
@@ -866,7 +873,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                     GLint location, GLsizei count,
                     GLboolean transpose,
-                     const GLvoid *values, GLenum type)
+                     const GLvoid *values, enum glsl_base_type basicType)
 {
   unsigned offset;
   unsigned vectors;
@@ -885,8 +892,8 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
      return;
   }

-   assert(type == GL_FLOAT || type == GL_DOUBLE);
-   size_mul = type == GL_DOUBLE ? 2 : 1;
+   assert(basicType == GLSL_TYPE_FLOAT || basicType == GLSL_TYPE_DOUBLE);
+   size_mul = basicType == GLSL_TYPE_DOUBLE ? 2 : 1;

   assert(!uni->type->is_sampler());
   vectors = uni->type->matrix_columns;
@@ -912,6 +919,31 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
      }
   }

+   /* Section 2.11.7 (Uniform Variables) of the OpenGL 4.2 Core Profile spec
+    * says:
+    *
+    *     "If any of the following conditions occur, an INVALID_OPERATION
+    *     error is generated by the Uniform* commands, and no uniform values
+    *     are changed:
+    *
+    *     ...
+    *
+    *     - if the uniform declared in the shader is not of type boolean and
+    *       the type indicated in the name of the Uniform* command used does
+    *       not match the type of the uniform"
+    *
+    * There are no Boolean matrix types, so we do not need to allow
+    * GLSL_TYPE_BOOL here (as _mesa_uniform does).
+    */
+   if (uni->type->base_type != basicType) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "glUniformMatrix%ux%u(\"%s\"@%d is %s, not %s)",
+                  cols, rows, uni->name, location,
+                  glsl_type_name(uni->type->base_type),
+                  glsl_type_name(basicType));
+      return;
+   }
+
   if (unlikely(ctx->_Shader->Flags & GLSL_UNIFORMS)) {
      log_uniform(values, uni->type->base_type, components, vectors, count,
 		  bool(transpose), shProg, location, uni);
@@ -941,7 +973,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
   if (!transpose) {
      memcpy(&uni->storage[elements * offset], values,
 	     sizeof(uni->storage[0]) * elements * count * size_mul);
-   } else if (type == GL_FLOAT) {
+   } else if (basicType == GLSL_TYPE_FLOAT) {
      /* Copy and transpose the matrix.
       */
      const float *src = (const float *)values;
@@ -958,7 +990,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 	 src += elements;
      }
   } else {
-      assert(type == GL_DOUBLE);
+      assert(basicType == GLSL_TYPE_DOUBLE);
      const double *src = (const double *)values;
      double *dst = (double *)&uni->storage[elements * offset].f;

--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -553,7 +553,7 @@ _mesa_UniformMatrix2fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_FLOAT);
+			2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -562,7 +562,7 @@ _mesa_UniformMatrix3fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_FLOAT);
+			3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -571,7 +571,7 @@ _mesa_UniformMatrix4fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_FLOAT);
+			4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 /** Same as above with direct state access **/
@@ -683,7 +683,7 @@ _mesa_ProgramUniformMatrix2fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -694,7 +694,7 @@ _mesa_ProgramUniformMatrix3fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -705,7 +705,7 @@ _mesa_ProgramUniformMatrix4fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }


@@ -718,7 +718,7 @@ _mesa_UniformMatrix2x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_FLOAT);
+			2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -727,7 +727,7 @@ _mesa_UniformMatrix3x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_FLOAT);
+			3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -736,7 +736,7 @@ _mesa_UniformMatrix2x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_FLOAT);
+			2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -745,7 +745,7 @@ _mesa_UniformMatrix4x2fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_FLOAT);
+			4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -754,7 +754,7 @@ _mesa_UniformMatrix3x4fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_FLOAT);
+			3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -763,7 +763,7 @@ _mesa_UniformMatrix4x3fv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_FLOAT);
+			4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 /** Same as above with direct state access **/
@@ -776,7 +776,7 @@ _mesa_ProgramUniformMatrix2x3fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -787,7 +787,7 @@ _mesa_ProgramUniformMatrix3x2fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -798,7 +798,7 @@ _mesa_ProgramUniformMatrix2x4fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -809,7 +809,7 @@ _mesa_ProgramUniformMatrix4x2fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x2fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -820,7 +820,7 @@ _mesa_ProgramUniformMatrix3x4fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x4fv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }

 void GLAPIENTRY
@@ -831,7 +831,7 @@ _mesa_ProgramUniformMatrix4x3fv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x3fv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_FLOAT);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_FLOAT);
 }


@@ -888,16 +888,7 @@ _mesa_GetnUniformdvARB(GLuint program, GLint location,
 {
   GET_CURRENT_CONTEXT(ctx);

-   (void) program;
-   (void) location;
-   (void) bufSize;
-   (void) params;
-
-   /*
   _mesa_get_uniform(ctx, program, location, bufSize, GLSL_TYPE_DOUBLE, params);
-   */
-   _mesa_error(ctx, GL_INVALID_OPERATION, "glGetUniformdvARB"
-               "(GL_ARB_gpu_shader_fp64 not implemented)");
 }

 void GLAPIENTRY
@@ -1312,7 +1303,7 @@ _mesa_UniformMatrix2dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 2, location, count, transpose, value, GL_DOUBLE);
+			2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1321,7 +1312,7 @@ _mesa_UniformMatrix3dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 3, location, count, transpose, value, GL_DOUBLE);
+			3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1330,7 +1321,7 @@ _mesa_UniformMatrix4dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 4, location, count, transpose, value, GL_DOUBLE);
+			4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1339,7 +1330,7 @@ _mesa_UniformMatrix2x3dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 3, location, count, transpose, value, GL_DOUBLE);
+			2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1348,7 +1339,7 @@ _mesa_UniformMatrix3x2dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 2, location, count, transpose, value, GL_DOUBLE);
+			3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1357,7 +1348,7 @@ _mesa_UniformMatrix2x4dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			2, 4, location, count, transpose, value, GL_DOUBLE);
+			2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1366,7 +1357,7 @@ _mesa_UniformMatrix4x2dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 2, location, count, transpose, value, GL_DOUBLE);
+			4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1375,7 +1366,7 @@ _mesa_UniformMatrix3x4dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			3, 4, location, count, transpose, value, GL_DOUBLE);
+			3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1384,7 +1375,7 @@ _mesa_UniformMatrix4x3dv(GLint location, GLsizei count, GLboolean transpose,
 {
   GET_CURRENT_CONTEXT(ctx);
   _mesa_uniform_matrix(ctx, ctx->_Shader->ActiveProgram,
-			4, 3, location, count, transpose, value, GL_DOUBLE);
+			4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1490,7 +1481,7 @@ _mesa_ProgramUniformMatrix2dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1501,7 +1492,7 @@ _mesa_ProgramUniformMatrix3dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1512,7 +1503,7 @@ _mesa_ProgramUniformMatrix4dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1523,7 +1514,7 @@ _mesa_ProgramUniformMatrix2x3dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1534,7 +1525,7 @@ _mesa_ProgramUniformMatrix3x2dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1545,7 +1536,7 @@ _mesa_ProgramUniformMatrix2x4dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix2x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 2, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1556,7 +1547,7 @@ _mesa_ProgramUniformMatrix4x2dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x2dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 2, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1567,7 +1558,7 @@ _mesa_ProgramUniformMatrix3x4dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix3x4dv");
-   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 3, 4, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }

 void GLAPIENTRY
@@ -1578,5 +1569,5 @@ _mesa_ProgramUniformMatrix4x3dv(GLuint program, GLint location, GLsizei count,
   struct gl_shader_program *shProg =
      _mesa_lookup_shader_program_err(ctx, program,
            "glProgramUniformMatrix4x3dv");
-   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GL_DOUBLE);
+   _mesa_uniform_matrix(ctx, shProg, 4, 3, location, count, transpose, value, GLSL_TYPE_DOUBLE);
 }
--- a/src/mesa/main/uniforms.h
+++ b/src/mesa/main/uniforms.h
@@ -355,7 +355,7 @@ _mesa_uniform_matrix(struct gl_context *ctx, struct gl_shader_program *shProg,
 		     GLuint cols, GLuint rows,
                     GLint location, GLsizei count,
                     GLboolean transpose,
-                     const GLvoid *values, GLenum type);
+                     const GLvoid *values, enum glsl_base_type basicType);

 void
 _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
--- a/src/mesa/state_tracker/st_cb_readpixels.c
+++ b/src/mesa/state_tracker/st_cb_readpixels.c
@@ -238,9 +238,9 @@ st_readpixels(struct gl_context *ctx, GLint x, GLint y,
      GLuint row;

      for (row = 0; row < (unsigned) height; row++) {
-         GLvoid *dest = _mesa_image_address3d(pack, pixels,
+         GLvoid *dest = _mesa_image_address2d(pack, pixels,
                                              width, height, format,
-                                              type, 0, row, 0);
+                                              type, row, 0);
         memcpy(dest, map, bytesPerRow);
         map += tex_xfer->stride;
      }
--- a/src/mesa/swrast/s_drawpix.c
+++ b/src/mesa/swrast/s_drawpix.c
@@ -481,17 +481,17 @@ draw_rgba_pixels( struct gl_context *ctx, GLint x, GLint y,
          */
         GLint swapSize = _mesa_sizeof_packed_type(type);
         if (swapSize == 2 || swapSize == 4) {
-            int components = _mesa_components_in_format(format);
-            int elementCount = width * height * components;
-            tempImage = malloc(elementCount * swapSize);
+            int imageStride = _mesa_image_image_stride(unpack, width, height, format, type);
+
+            tempImage = malloc(imageStride);
            if (!tempImage) {
               _mesa_error(ctx, GL_OUT_OF_MEMORY, "glDrawPixels");
               return;
            }
-            if (swapSize == 2)
-               _mesa_swap2_copy(tempImage, (GLushort *) pixels, elementCount);
-            else
-               _mesa_swap4_copy(tempImage, (GLuint *) pixels, elementCount);
+
+            _mesa_swap_bytes_2d_image(format, type, unpack,
+                                      width, height, tempImage, pixels);
+
            pixels = tempImage;
         }
      }
--- a/src/mesa/swrast/s_texfetch.c
+++ b/src/mesa/swrast/s_texfetch.c
@@ -282,10 +282,26 @@ texfetch_funcs[] =
   },

   /* Packed signed/unsigned non-normalized integer formats */
+   FETCH_NULL(A8B8G8R8_UINT),
+   FETCH_NULL(A8R8G8B8_UINT),
+   FETCH_NULL(R8G8B8A8_UINT),
+   FETCH_NULL(B8G8R8A8_UINT),
   FETCH_NULL(B10G10R10A2_UINT),
   FETCH_NULL(R10G10B10A2_UINT),
   FETCH_NULL(A2B10G10R10_UINT),
   FETCH_NULL(A2R10G10B10_UINT),
+   FETCH_NULL(B5G6R5_UINT),
+   FETCH_NULL(R5G6B5_UINT),
+   FETCH_NULL(B2G3R3_UINT),
+   FETCH_NULL(R3G3B2_UINT),
+   FETCH_NULL(A4B4G4R4_UINT),
+   FETCH_NULL(R4G4B4A4_UINT),
+   FETCH_NULL(B4G4R4A4_UINT),
+   FETCH_NULL(A4R4G4B4_UINT),
+   FETCH_NULL(A1B5G5R5_UINT),
+   FETCH_NULL(B5G5R5A1_UINT),
+   FETCH_NULL(A1R5G5B5_UINT),
+   FETCH_NULL(R5G5B5A1_UINT),

   /* Array signed/unsigned non-normalized integer formats */
   FETCH_NULL(A_UINT8),
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -36,7 +36,6 @@ libmesautil_la_CPPFLAGS = \
 	-I$(top_srcdir)/src/mesa \
 	-I$(top_srcdir)/src/gallium/include \
 	-I$(top_srcdir)/src/gallium/auxiliary \
-	$(SHA1_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	$(MSVC2008_COMPAT_CFLAGS)

@@ -44,12 +43,6 @@ libmesautil_la_SOURCES = \
 	$(MESA_UTIL_FILES) \
 	$(MESA_UTIL_GENERATED_FILES)

-if ENABLE_SHADER_CACHE
-libmesautil_la_SOURCES += $(MESA_UTIL_SHADER_CACHE_FILES)
-
-libmesautil_la_LIBADD = $(SHA1_LIBS)
-endif
-
 roundeven_test_LDADD = -lm

 check_PROGRAMS = u_atomic_test roundeven_test
--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -1,7 +1,3 @@
-MESA_UTIL_SHADER_CACHE_FILES := \
-	mesa-sha1.c \
-	mesa-sha1.h
-
 MESA_UTIL_FILES :=	\
 	bitset.h \
 	format_srgb.h \
--- a/src/util/SConscript
+++ b/src/util/SConscript
@@ -33,11 +33,6 @@ mesautil_sources = (
    source_lists['MESA_UTIL_GENERATED_FILES']
 )

-# XXX We don't yet have scons support for detecting any of the various
-# HAVE_SHA1_* definitions, so for now simply disable the shader cache.
-if False:
-    mesautil_sources += source_lists['MESA_UTIL_SHADER_CACHE_FILES']
-
 mesautil = env.ConvenienceLibrary(
    target = 'mesautil',
    source = mesautil_sources,
--- a/src/util/mesa-sha1.c
+++ b/src/util/mesa-sha1.c
@@ -1,316 +0,0 @@
-/* Copyright © 2007 Carl Worth
- * Copyright © 2009 Jeremy Huddleston, Julien Cristau, and Matthieu Herrb
- * Copyright © 2009-2010 Mikhail Gusarov
- * Copyright © 2012 Yaakov Selkowitz and Keith Packard
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "mesa-sha1.h"
-
-#if defined(HAVE_SHA1_IN_LIBMD)  /* Use libmd for SHA1 */ \
-	|| defined(HAVE_SHA1_IN_LIBC)   /* Use libc for SHA1 */
-
-#include <sha1.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   SHA1_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   SHA1Init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
-
-   SHA1Update(sha1_ctx, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
-
-   SHA1Final(result, sha1_ctx);
-   free(sha1_ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_COMMONCRYPTO)        /* Use CommonCrypto for SHA1 */
-
-#include <CommonCrypto/CommonDigest.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   CC_SHA1_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   CC_SHA1_Init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
-
-   CC_SHA1_Update(sha1_ctx, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
-
-   CC_SHA1_Final(result, sha1_ctx);
-   free(sha1_ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_CRYPTOAPI)        /* Use CryptoAPI for SHA1 */
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <wincrypt.h>
-
-static HCRYPTPROV hProv;
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   HCRYPTHASH *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   CryptAcquireContext(&hProv, NULL, MS_DEF_PROV, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
-   CryptCreateHash(hProv, CALG_SHA1, 0, 0, ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
-
-   CryptHashData(*hHash, data, size, 0);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
-   DWORD len = 20;
-
-   CryptGetHashParam(*hHash, HP_HASHVAL, result, &len, 0);
-   CryptDestroyHash(*hHash);
-   CryptReleaseContext(hProv, 0);
-   free(ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBNETTLE)   /* Use libnettle for SHA1 */
-
-#include <nettle/sha.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   struct sha1_ctx *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   sha1_init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   sha1_update((struct sha1_ctx *) ctx, size, data);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   sha1_digest((struct sha1_ctx *) ctx, 20, result);
-   free(ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBGCRYPT)   /* Use libgcrypt for SHA1 */
-
-#include <gcrypt.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   static int init;
-   gcry_md_hd_t h;
-   gcry_error_t err;
-
-   if (!init) {
-      if (!gcry_check_version(NULL))
-         return NULL;
-      gcry_control(GCRYCTL_DISABLE_SECMEM, 0);
-      gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
-      init = 1;
-   }
-
-   err = gcry_md_open(&h, GCRY_MD_SHA1, 0);
-   if (err)
-      return NULL;
-   return (struct mesa_sha1 *) h;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
-
-   gcry_md_write(h, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
-
-   memcpy(result, gcry_md_read(h, GCRY_MD_SHA1), 20);
-   gcry_md_close(h);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBSHA1)     /* Use libsha1 */
-
-#include <libsha1.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   sha1_ctx *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   sha1_begin(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   sha1_hash(data, size, (sha1_ctx *) ctx);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   sha1_end(result, (sha1_ctx *) ctx);
-   free(ctx);
-   return 1;
-}
-
-#else                           /* Use OpenSSL's libcrypto */
-
-#include <stddef.h>             /* buggy openssl/sha.h wants size_t */
-#include <openssl/sha.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   int ret;
-   SHA_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   ret = SHA1_Init(ctx);
-   if (!ret) {
-      free(ctx);
-      return NULL;
-   }
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   int ret;
-   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
-
-   ret = SHA1_Update(sha_ctx, data, size);
-   if (!ret)
-      free(sha_ctx);
-   return ret;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   int ret;
-   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
-
-   ret = SHA1_Final(result, (SHA_CTX *) sha_ctx);
-   free(sha_ctx);
-   return ret;
-}
-
-#endif
-
-void
-_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20])
-{
-   struct mesa_sha1 *ctx;
-
-   ctx = _mesa_sha1_init();
-   _mesa_sha1_update(ctx, data, size);
-   _mesa_sha1_final(ctx, result);
-}
-
-char *
-_mesa_sha1_format(char *buf, const unsigned char *sha1)
-{
-   static const char hex_digits[] = "0123456789abcdef";
-   int i;
-
-   for (i = 0; i < 40; i += 2) {
-      buf[i] = hex_digits[sha1[i >> 1] >> 4];
-      buf[i + 1] = hex_digits[sha1[i >> 1] & 0x0f];
-   }
-   buf[i] = '\0';
-
-   return buf;
-}
--- a/src/util/mesa-sha1.h
+++ b/src/util/mesa-sha1.h
@@ -1,53 +0,0 @@
-/* Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SHA1_H
-#define SHA1_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdlib.h>
-
-struct mesa_sha1;
-
-struct mesa_sha1 *
-_mesa_sha1_init(void);
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size);
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20]);
-
-char *
-_mesa_sha1_format(char *buf, const unsigned char *sha1);
-
-void
-_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20]);
-
-#ifdef __cplusplus
-} /* extern C */
-#endif
-
-#endif