Add release notes for the 10.5.1 release

Signed-off-by: Emil Velikov <emil.l.velikov@gmail.com>
Update version to 10.5.1
2015-03-13 22:35:01 +00:00 · 2015-03-13 22:32:35 +00:00 · 2015-03-12 12:45:48 +00:00 · 2015-03-12 12:45:48 +00:00 · 2015-03-12 12:45:45 +00:00 · 2015-03-12 12:38:54 +00:00
77 changed files with 1243 additions and 1061 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-10.5.0
+10.5.1
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -10,3 +10,14 @@

 # auxiliary/vl: bring back the VL code for the dri targets
 c39dbfdd0f764b1aaa7319b4694e7335692993dd
+
+# mesa: rename format_info.c to format_info.h
+3f6c28f2a976e35128b7a4a513cfa60af00301e1
+# mesa: fix dependency tracking of generated sources
+d22391cb165af4ed2f9a9e5d6233072a432cc969
+# mesa: drop Makefile from get_hash.h dependency list
+2c0f72d5389a9838cc4fbf4cc4f4291aa56c7845
+# mapi: fix *glapi dependency tracking
+fe5fddd7e2df74233a2a02ae021418485f39d11c
+# xmlpool: make sure we ship options.h
+8d8ca64c28170ec7e9ffa01638bcf8fd30a96088
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,6 @@ dnl Copyright © 2009-2014 Jon TURNEY
 dnl Copyright © 2011-2012 Benjamin Franzke
 dnl Copyright © 2008-2014 David Airlie
 dnl Copyright © 2009-2013 Brian Paul
-dnl Copyright © 2003-2007 Keith Packard, Daniel Stone
 dnl
 dnl Permission is hereby granted, free of charge, to any person obtaining a
 dnl copy of this software and associated documentation files (the "Software"),
@@ -935,144 +934,6 @@ fi

 AC_SUBST([MESA_LLVM])

-# SHA1 hashing
-AC_ARG_WITH([sha1],
-        [AS_HELP_STRING([--with-sha1=libc|libmd|libnettle|libgcrypt|libcrypto|libsha1|CommonCrypto|CryptoAPI],
-        [choose SHA1 implementation])])
-case "x$with_sha1" in
-x | xlibc | xlibmd | xlibnettle | xlibgcrypt | xlibcrypto | xlibsha1 | xCommonCrypto | xCryptoAPI)
-  ;;
-*)
-        AC_MSG_ERROR([Illegal value for --with-sha1: $with_sha1])
-esac
-
-AC_CHECK_FUNC([SHA1Init], [HAVE_SHA1_IN_LIBC=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_LIBC" = xyes; then
-	with_sha1=libc
-fi
-if test "x$with_sha1" = xlibc && test "x$HAVE_SHA1_IN_LIBC" != xyes; then
-	AC_MSG_ERROR([sha1 in libc requested but not found])
-fi
-if test "x$with_sha1" = xlibc; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBC], [1],
-		[Use libc SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_FUNC([CC_SHA1_Init], [HAVE_SHA1_IN_COMMONCRYPTO=yes])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_COMMONCRYPTO" = xyes; then
-	with_sha1=CommonCrypto
-fi
-if test "x$with_sha1" = xCommonCrypto && test "x$HAVE_SHA1_IN_COMMONCRYPTO" != xyes; then
-	AC_MSG_ERROR([CommonCrypto requested but not found])
-fi
-if test "x$with_sha1" = xCommonCrypto; then
-	AC_DEFINE([HAVE_SHA1_IN_COMMONCRYPTO], [1],
-		[Use CommonCrypto SHA1 functions])
-	SHA1_LIBS=""
-fi
-dnl stdcall functions cannot be tested with AC_CHECK_LIB
-AC_CHECK_HEADER([wincrypt.h], [HAVE_SHA1_IN_CRYPTOAPI=yes], [], [#include <windows.h>])
-if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_CRYPTOAPI" = xyes; then
-	with_sha1=CryptoAPI
-fi
-if test "x$with_sha1" = xCryptoAPI && test "x$HAVE_SHA1_IN_CRYPTOAPI" != xyes; then
-	AC_MSG_ERROR([CryptoAPI requested but not found])
-fi
-if test "x$with_sha1" = xCryptoAPI; then
-	AC_DEFINE([HAVE_SHA1_IN_CRYPTOAPI], [1],
-		[Use CryptoAPI SHA1 functions])
-	SHA1_LIBS=""
-fi
-AC_CHECK_LIB([md], [SHA1Init], [HAVE_LIBMD=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBMD" = xyes; then
-	with_sha1=libmd
-fi
-if test "x$with_sha1" = xlibmd && test "x$HAVE_LIBMD" != xyes; then
-	AC_MSG_ERROR([libmd requested but not found])
-fi
-if test "x$with_sha1" = xlibmd; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBMD], [1],
-	          [Use libmd SHA1 functions])
-	SHA1_LIBS=-lmd
-fi
-PKG_CHECK_MODULES([LIBSHA1], [libsha1], [HAVE_LIBSHA1=yes], [HAVE_LIBSHA1=no])
-if test "x$with_sha1" = x && test "x$HAVE_LIBSHA1" = xyes; then
-   with_sha1=libsha1
-fi
-if test "x$with_sha1" = xlibsha1 && test "x$HAVE_LIBSHA1" != xyes; then
-	AC_MSG_ERROR([libsha1 requested but not found])
-fi
-if test "x$with_sha1" = xlibsha1; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBSHA1], [1],
-	          [Use libsha1 for SHA1])
-	SHA1_LIBS=-lsha1
-fi
-AC_CHECK_LIB([nettle], [nettle_sha1_init], [HAVE_LIBNETTLE=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBNETTLE" = xyes; then
-	with_sha1=libnettle
-fi
-if test "x$with_sha1" = xlibnettle && test "x$HAVE_LIBNETTLE" != xyes; then
-	AC_MSG_ERROR([libnettle requested but not found])
-fi
-if test "x$with_sha1" = xlibnettle; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBNETTLE], [1],
-	          [Use libnettle SHA1 functions])
-	SHA1_LIBS=-lnettle
-fi
-AC_CHECK_LIB([gcrypt], [gcry_md_open], [HAVE_LIBGCRYPT=yes])
-if test "x$with_sha1" = x && test "x$HAVE_LIBGCRYPT" = xyes; then
-	with_sha1=libgcrypt
-fi
-if test "x$with_sha1" = xlibgcrypt && test "x$HAVE_LIBGCRYPT" != xyes; then
-	AC_MSG_ERROR([libgcrypt requested but not found])
-fi
-if test "x$with_sha1" = xlibgcrypt; then
-	AC_DEFINE([HAVE_SHA1_IN_LIBGCRYPT], [1],
-	          [Use libgcrypt SHA1 functions])
-	SHA1_LIBS=-lgcrypt
-fi
-# We don't need all of the OpenSSL libraries, just libcrypto
-AC_CHECK_LIB([crypto], [SHA1_Init], [HAVE_LIBCRYPTO=yes])
-PKG_CHECK_MODULES([OPENSSL], [openssl], [HAVE_OPENSSL_PKC=yes],
-                  [HAVE_OPENSSL_PKC=no])
-if test "x$HAVE_LIBCRYPTO" = xyes || test "x$HAVE_OPENSSL_PKC" = xyes; then
-	if test "x$with_sha1" = x; then
-		with_sha1=libcrypto
-	fi
-else
-	if test "x$with_sha1" = xlibcrypto; then
-		AC_MSG_ERROR([OpenSSL libcrypto requested but not found])
-	fi
-fi
-if test "x$with_sha1" = xlibcrypto; then
-	if test "x$HAVE_LIBCRYPTO" = xyes; then
-		SHA1_LIBS=-lcrypto
-	else
-		SHA1_LIBS="$OPENSSL_LIBS"
-		SHA1_CFLAGS="$OPENSSL_CFLAGS"
-	fi
-fi
-AC_MSG_CHECKING([for SHA1 implementation])
-AC_MSG_RESULT([$with_sha1])
-AC_SUBST(SHA1_LIBS)
-AC_SUBST(SHA1_CFLAGS)
-
-# Allow user to configure out the shader-cache feature
-AC_ARG_ENABLE([shader-cache],
-    AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
-    [enable_shader_cache="$enableval"],
-    [if test "x$with_sha1" != "x"; then
-        enable_shader_cache=yes
-     else
-        enable_shader_cache=no
-     fi])
-if test "x$with_sha1" = "x"; then
-    if test "x$enable_shader_cache" = "xyes"; then
-        AC_MSG_ERROR([Cannot enable shader cache (no SHA-1 implementation found)])
-    fi
-fi
-AM_CONDITIONAL([ENABLE_SHADER_CACHE], [test x$enable_shader_cache = xyes])
-
 # Check for libdrm
 PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
                  [have_libdrm=yes], [have_libdrm=no])
@@ -2529,12 +2390,6 @@ else
    echo "        Gallium:         no"
 fi

-dnl Shader cache
-echo ""
-echo "        Shader cache:    $enable_shader_cache"
-if test "x$enable_shader_cache" = "xyes"; then
-    echo "        With SHA1 from:  $with_sha1"
-fi

 dnl Libraries
 echo ""
--- a/docs/relnotes/10.5.0.html
+++ b/docs/relnotes/10.5.0.html
@@ -31,9 +31,10 @@ because compatibility contexts are not supported.
 </p>


-<h2>MD5 checksums</h2>
+<h2>SHA256 checksums</h2>
 <pre>
-TBD.
+2bb6e2e982ee4d8264d52d638c2a4e3f8a164190336d72d4e34ae1304d87ed91  mesa-10.5.0.tar.gz
+d7ca9f9044bbdd674377e3eebceef1fae339c8817b9aa435c2053e4fea44e5d3  mesa-10.5.0.tar.xz
 </pre>


--- a/docs/relnotes/10.5.1.html
+++ b/docs/relnotes/10.5.1.html
@@ -0,0 +1,216 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 10.5.1 Release Notes / March 13, 2015</h1>
+
+<p>
+Mesa 10.5.1 is a bug fix release which fixes bugs found since the 10.5.0 release.
+</p>
+<p>
+Mesa 10.5.1 implements the OpenGL 3.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 3.3.  OpenGL
+3.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79202">Bug 79202</a> - valgrind errors in glsl-fs-uniform-array-loop-unroll.shader_test; random code generation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84613">Bug 84613</a> - [G965, bisected] piglit regressions : glslparsertest.glsl2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86747">Bug 86747</a> - Noise in Football Manager 2014 textures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86974">Bug 86974</a> - INTEL_DEBUG=shader_time always asserts in fs_generator::generate_code() when Mesa is built with --enable-debug (= with asserts)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=88246">Bug 88246</a> - Commit 2881b12 causes 43 DrawElements test regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=88793">Bug 88793</a> - [BDW/BSW Bisected]Piglit/shaders_glsl-max-varyings fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=88883">Bug 88883</a> - ir-a2xx.c: variable changed in assert statement</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=88885">Bug 88885</a> - Transform feedback uses incorrect interleaving if a previous draw did not write gl_Position</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89095">Bug 89095</a> - [SNB/IVB/BYT Bisected]Webglc conformance/glsl/functions/glsl-function-mix-float.html fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89156">Bug 89156</a> - r300g: GL_COMPRESSED_RED_RGTC1 / ATI1N support broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89224">Bug 89224</a> - Incorrect rendering of Unigine Valley running in VM on VMware Workstation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89292">Bug 89292</a> - [regression,bisected] incomplete screenshots in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89311">Bug 89311</a> - [regression, bisected] dEQP: Added entry points for glCompressedTextureSubImage*D.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89312">Bug 89312</a> - [regression, bisected] main: Added entry points for CopyTextureSubImage*D. (d6b7c40cecfe01)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89315">Bug 89315</a> - [HSW, regression, bisected] i965/fs: Emit MAD instructions when possible.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89317">Bug 89317</a> - [HSW, regression, bisected] i965: Add LINTERP/CINTERP to can_do_cmod() (d91390634)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89416">Bug 89416</a> - UE4Editor crash after load project</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89430">Bug 89430</a> - [g965][bisected] arb_copy_image-targets gl_texture* tests fail</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andrey Sudnik (1):</p>
+<ul>
+  <li>i965/vec4: Don't lose the saturate modifier in copy propagation.</li>
+</ul>
+
+<p>Chris Forbes (1):</p>
+<ul>
+  <li>i965/gs: Check newly-generated GS-out VUE map against correct stage</li>
+</ul>
+
+<p>Daniel Stone (1):</p>
+<ul>
+  <li>egl: Take alpha bits into account when selecting GBM formats</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: Add sha256 sums for the 10.5.0 release</li>
+  <li>egl/main: no longer export internal function</li>
+  <li>cherry-ignore: ignore a few more commits picked without -x</li>
+  <li>mapi: fix commit 90411b56f6bc817e229d8801ac0adad6d4e3fb7a</li>
+  <li>Update version to 10.5.1</li>
+</ul>
+
+<p>Frank Henigman (1):</p>
+<ul>
+  <li>intel: fix EGLImage renderbuffer _BaseFormat</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>i965: Fix out-of-bounds accesses into pull_constant_loc array</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>i965/fs/nir: Use emit_math for nir_op_fpow</li>
+</ul>
+
+<p>Ilia Mirkin (3):</p>
+<ul>
+  <li>freedreno: move fb state copy after checking for size change</li>
+  <li>freedreno/ir3: fix array count returned by TXQ</li>
+  <li>freedreno/ir3: get the # of miplevels from getinfo</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>meta/TexSubImage: Stash everything other than PIXEL_TRANSFER/store in meta_begin</li>
+  <li>main/base_tex_format: Properly handle STENCIL_INDEX1/4/16</li>
+</ul>
+
+<p>Kenneth Graunke (8):</p>
+<ul>
+  <li>i965: Split Gen4-5 BlitFramebuffer code; prefer BLT over Meta.</li>
+  <li>glsl: Mark array access when copying to a temporary for the ?: operator.</li>
+  <li>i965/fs: Set force_writemask_all on shader_time instructions.</li>
+  <li>i965/fs: Set smear on shader_time diff register.</li>
+  <li>i965/fs: Make emit_shader_time_write return rather than emit.</li>
+  <li>i965/fs: Make get_timestamp() pass back the MOV rather than emitting it.</li>
+  <li>i965/fs: Make emit_shader_time_end() insert before EOT.</li>
+  <li>i965/fs: Don't issue FB writes for bound but unwritten color targets.</li>
+</ul>
+
+<p>Laura Ekstrand (2):</p>
+<ul>
+  <li>main: Fix target checking for CompressedTexSubImage*D.</li>
+  <li>main: Fix target checking for CopyTexSubImage*D.</li>
+</ul>
+
+<p>Marc-Andre Lureau (1):</p>
+<ul>
+  <li>gallium/auxiliary/indices: fix start param</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>r300g: fix RGTC1 and LATC1 SNORM formats</li>
+  <li>r300g: fix a crash when resolving into an sRGB texture</li>
+  <li>r300g: fix sRGB-&gt;sRGB blits</li>
+</ul>
+
+<p>Matt Turner (12):</p>
+<ul>
+  <li>i965/vec4: Fix implementation of i2b.</li>
+  <li>mesa: Indent break statements and add a missing one.</li>
+  <li>mesa: Free memory allocated for luminance in readpixels.</li>
+  <li>mesa: Correct backwards NULL check.</li>
+  <li>i965: Consider scratch writes to have side effects.</li>
+  <li>i965/fs: Don't use backend_visitor::instructions after creating the CFG.</li>
+  <li>r300g: Use PATH_MAX instead of limiting ourselves to 100 chars.</li>
+  <li>r300g: Check return value of snprintf().</li>
+  <li>i965/fs: Don't propagate cmod to inst with different type.</li>
+  <li>i965: Tell intel_get_memcpy() which direction the memcpy() is going.</li>
+  <li>Revert SHA1 additions.</li>
+  <li>i965: Avoid applying negate to wrong MAD source.</li>
+</ul>
+
+<p>Neil Roberts (4):</p>
+<ul>
+  <li>meta: In pbo_{Get,}TexSubImage don't repeatedly rebind the source tex</li>
+  <li>Revert "common: Fix PBOs for 1D_ARRAY."</li>
+  <li>meta: Allow GL_UN/PACK_IMAGE_HEIGHT in _mesa_meta_pbo_Get/TexSubImage</li>
+  <li>meta: Fix the y offset for 1D_ARRAY in _mesa_meta_pbo_TexSubImage</li>
+</ul>
+
+<p>Rob Clark (11):</p>
+<ul>
+  <li>freedreno/ir3: fix silly typo for binning pass shaders</li>
+  <li>freedreno/a2xx: fix increment in assert</li>
+  <li>freedreno/a4xx: bit of cleanup</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a4xx: set PC_PRIM_VTX_CNTL.VAROUT properly</li>
+  <li>freedreno: update generated headers</li>
+  <li>freedreno/a4xx: aniso filtering</li>
+  <li>freedreno/ir3: fix up cat6 instruction encodings</li>
+  <li>freedreno/ir3: add support for memory (cat6) instructions</li>
+  <li>freedreno/ir3: handle flat bypass for a4xx</li>
+  <li>freedreno/ir3: fix failed assert in grouping</li>
+</ul>
+
+<p>Stefan Dösinger (1):</p>
+<ul>
+  <li>r300g: Fix the ATI1N swizzle (RGTC1 and LATC1)</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -668,15 +668,21 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)

   for (i = 0; dri2_dpy->driver_configs[i]; i++) {
      EGLint format, attr_list[3];
-      unsigned int mask;
+      unsigned int red, alpha;

      dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
-                                       __DRI_ATTRIB_RED_MASK, &mask);
-      if (mask == 0x3ff00000)
+                                       __DRI_ATTRIB_RED_MASK, &red);
+      dri2_dpy->core->getConfigAttrib(dri2_dpy->driver_configs[i],
+                                       __DRI_ATTRIB_ALPHA_MASK, &alpha);
+      if (red == 0x3ff00000 && alpha == 0x00000000)
         format = GBM_FORMAT_XRGB2101010;
-      else if (mask == 0x00ff0000)
+      else if (red == 0x3ff00000 && alpha == 0xc0000000)
+         format = GBM_FORMAT_ARGB2101010;
+      else if (red == 0x00ff0000 && alpha == 0x00000000)
         format = GBM_FORMAT_XRGB8888;
-      else if (mask == 0xf800)
+      else if (red == 0x00ff0000 && alpha == 0xff000000)
+         format = GBM_FORMAT_ARGB8888;
+      else if (red == 0xf800)
         format = GBM_FORMAT_RGB565;
      else
         continue;
--- a/src/egl/main/eglarray.h
+++ b/src/egl/main/eglarray.h
@@ -49,7 +49,7 @@ extern _EGLArray *
 _eglCreateArray(const char *name, EGLint init_size);


-PUBLIC void
+extern void
 _eglDestroyArray(_EGLArray *array, void (*free_cb)(void *));


@@ -65,7 +65,7 @@ void *
 _eglFindArray(_EGLArray *array, void *elem);


-PUBLIC EGLint
+extern EGLint
 _eglFilterArray(_EGLArray *array, void **data, EGLint size,
                _EGLArrayForEach filter, void *filter_data);

--- a/src/egl/main/eglconfig.c
+++ b/src/egl/main/eglconfig.c
@@ -75,7 +75,7 @@ _eglInitConfig(_EGLConfig *conf, _EGLDisplay *dpy, EGLint id)
 *
 * Note that we just save the ptr to the config (we don't copy the config).
 */
-PUBLIC EGLConfig
+EGLConfig
 _eglLinkConfig(_EGLConfig *conf)
 {
   _EGLDisplay *dpy = conf->Display;
--- a/src/egl/main/eglconfig.h
+++ b/src/egl/main/eglconfig.h
@@ -162,11 +162,11 @@ _eglGetConfigKey(const _EGLConfig *conf, EGLint key)
 }


-PUBLIC void
+extern void
 _eglInitConfig(_EGLConfig *config, _EGLDisplay *dpy, EGLint id);


-PUBLIC EGLConfig
+extern EGLConfig
 _eglLinkConfig(_EGLConfig *conf);


@@ -184,25 +184,25 @@ _eglGetConfigHandle(_EGLConfig *conf)
 }


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglMatchConfig(const _EGLConfig *conf, const _EGLConfig *criteria);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglParseConfigAttribList(_EGLConfig *conf, _EGLDisplay *dpy,
                          const EGLint *attrib_list);


-PUBLIC EGLint
+extern EGLint
 _eglCompareConfigs(const _EGLConfig *conf1, const _EGLConfig *conf2,
                   const _EGLConfig *criteria, EGLBoolean compare_id);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglFilterConfigArray(_EGLArray *array, EGLConfig *configs,
                      EGLint config_size, EGLint *num_configs,
                      EGLBoolean (*match)(const _EGLConfig *, void *),
--- a/src/egl/main/eglcontext.h
+++ b/src/egl/main/eglcontext.h
@@ -63,7 +63,7 @@ struct _egl_context
 };


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglInitContext(_EGLContext *ctx, _EGLDisplay *dpy,
                _EGLConfig *config, const EGLint *attrib_list);

@@ -72,7 +72,7 @@ extern EGLBoolean
 _eglQueryContext(_EGLDriver *drv, _EGLDisplay *dpy, _EGLContext *ctx, EGLint attribute, EGLint *value);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglBindContext(_EGLContext *ctx, _EGLSurface *draw, _EGLSurface *read,
                _EGLContext **old_ctx,
                _EGLSurface **old_draw, _EGLSurface **old_read);
--- a/src/egl/main/eglcurrent.c
+++ b/src/egl/main/eglcurrent.c
@@ -242,7 +242,7 @@ _eglIsCurrentThreadDummy(void)
 /**
 * Return the currently bound context of the given API, or NULL.
 */
-PUBLIC _EGLContext *
+_EGLContext *
 _eglGetAPIContext(EGLenum api)
 {
   _EGLThreadInfo *t = _eglGetCurrentThread();
--- a/src/egl/main/eglcurrent.h
+++ b/src/egl/main/eglcurrent.h
@@ -90,7 +90,7 @@ _eglConvertApiFromIndex(EGLint idx)
 }


-PUBLIC _EGLThreadInfo *
+extern _EGLThreadInfo *
 _eglGetCurrentThread(void);


@@ -102,15 +102,15 @@ extern EGLBoolean
 _eglIsCurrentThreadDummy(void);


-PUBLIC _EGLContext *
+extern _EGLContext *
 _eglGetAPIContext(EGLenum api);


-PUBLIC _EGLContext *
+extern _EGLContext *
 _eglGetCurrentContext(void);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglError(EGLint errCode, const char *msg);


--- a/src/egl/main/egldisplay.h
+++ b/src/egl/main/egldisplay.h
@@ -177,11 +177,11 @@ extern _EGLDisplay *
 _eglFindDisplay(_EGLPlatformType plat, void *plat_dpy);


-PUBLIC void
+extern void
 _eglReleaseDisplayResources(_EGLDriver *drv, _EGLDisplay *dpy);


-PUBLIC void
+extern void
 _eglCleanupDisplay(_EGLDisplay *disp);


@@ -189,7 +189,7 @@ extern EGLBoolean
 _eglCheckDisplayHandle(EGLDisplay dpy);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglCheckResource(void *res, _EGLResourceType type, _EGLDisplay *dpy);


@@ -221,11 +221,11 @@ extern void
 _eglInitResource(_EGLResource *res, EGLint size, _EGLDisplay *dpy);


-PUBLIC void
+extern void
 _eglGetResource(_EGLResource *res);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglPutResource(_EGLResource *res);


--- a/src/egl/main/egldriver.h
+++ b/src/egl/main/egldriver.h
@@ -96,7 +96,7 @@ extern _EGLDriver *
 _eglBuiltInDriverGLX(const char *args);


-PUBLIC _EGLDriver *
+extern _EGLDriver *
 _eglMain(const char *args);


@@ -113,11 +113,11 @@ _eglUnloadDrivers(void);


 /* defined in eglfallbacks.c */
-PUBLIC void
+extern void
 _eglInitDriverFallbacks(_EGLDriver *drv);


-PUBLIC void
+extern void
 _eglSearchPathForEach(EGLBoolean (*callback)(const char *, size_t, void *),
                      void *callback_data);

--- a/src/egl/main/eglimage.h
+++ b/src/egl/main/eglimage.h
@@ -80,12 +80,12 @@ struct _egl_image
 };


-PUBLIC EGLint
+extern EGLint
 _eglParseImageAttribList(_EGLImageAttribs *attrs, _EGLDisplay *dpy,
                         const EGLint *attrib_list);


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglInitImage(_EGLImage *img, _EGLDisplay *dpy);


--- a/src/egl/main/egllog.h
+++ b/src/egl/main/egllog.h
@@ -43,15 +43,15 @@
 typedef void (*_EGLLogProc)(EGLint level, const char *msg);


-PUBLIC void
+extern void
 _eglSetLogProc(_EGLLogProc logger);


-PUBLIC void
+extern void
 _eglSetLogLevel(EGLint level);


-PUBLIC void
+extern void
 _eglLog(EGLint level, const char *fmtStr, ...);


--- a/src/egl/main/eglscreen.h
+++ b/src/egl/main/eglscreen.h
@@ -67,11 +67,11 @@ struct _egl_screen
 };


-PUBLIC void
+extern void
 _eglInitScreen(_EGLScreen *screen, _EGLDisplay *dpy, EGLint num_modes);


-PUBLIC EGLScreenMESA
+extern EGLScreenMESA
 _eglLinkScreen(_EGLScreen *screen);


--- a/src/egl/main/eglsurface.h
+++ b/src/egl/main/eglsurface.h
@@ -78,7 +78,7 @@ struct _egl_surface
 };


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
                _EGLConfig *config, const EGLint *attrib_list);

@@ -91,10 +91,10 @@ extern EGLBoolean
 _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, EGLint attribute, EGLint value);


-PUBLIC extern EGLBoolean
+extern EGLBoolean
 _eglBindTexImage(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, EGLint buffer);

-PUBLIC extern EGLBoolean
+extern EGLBoolean
 _eglReleaseTexImage(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLint buffer);


--- a/src/egl/main/eglsync.h
+++ b/src/egl/main/eglsync.h
@@ -48,7 +48,7 @@ struct _egl_sync
 };


-PUBLIC EGLBoolean
+extern EGLBoolean
 _eglInitSync(_EGLSync *sync, _EGLDisplay *dpy, EGLenum type,
             const EGLint *attrib_list);

--- a/src/gallium/auxiliary/indices/u_indices_gen.py
+++ b/src/gallium/auxiliary/indices/u_indices_gen.py
@@ -193,7 +193,7 @@ def lineloop(intype, outtype, inpv, outpv):
    print '  for (i = start, j = 0; j < nr - 2; j+=2, i++) { '
    do_line( intype, outtype, 'out+j',  'i', 'i+1', inpv, outpv );
    print '   }'
-    do_line( intype, outtype, 'out+j',  'i', '0', inpv, outpv );
+    do_line( intype, outtype, 'out+j',  'i', 'start', inpv, outpv );
    postamble()

 def tris(intype, outtype, inpv, outpv):
@@ -218,7 +218,7 @@ def tristrip(intype, outtype, inpv, outpv):
 def trifan(intype, outtype, inpv, outpv):
    preamble(intype, outtype, inpv, outpv, prim='trifan')
    print '  for (i = start, j = 0; j < nr; j+=3, i++) { '
-    do_tri( intype, outtype, 'out+j',  '0', 'i+1', 'i+2', inpv, outpv );
+    do_tri( intype, outtype, 'out+j',  'start', 'i+1', 'i+2', inpv, outpv );
    print '   }'
    postamble()

@@ -228,9 +228,9 @@ def polygon(intype, outtype, inpv, outpv):
    preamble(intype, outtype, inpv, outpv, prim='polygon')
    print '  for (i = start, j = 0; j < nr; j+=3, i++) { '
    if inpv == FIRST:
-        do_tri( intype, outtype, 'out+j',  '0', 'i+1', 'i+2', inpv, outpv );
+        do_tri( intype, outtype, 'out+j',  'start', 'i+1', 'i+2', inpv, outpv );
    else:
-        do_tri( intype, outtype, 'out+j',  'i+1', 'i+2', '0', inpv, outpv );
+        do_tri( intype, outtype, 'out+j',  'i+1', 'i+2', 'start', inpv, outpv );
    print '   }'
    postamble()

--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  15085 bytes, from 2014-12-20 21:49:41)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64344 bytes, from 2014-12-12 20:22:26)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51270 bytes, from 2015-01-18 23:05:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64848 bytes, from 2015-02-20 18:21:24)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51942 bytes, from 2015-02-24 17:14:02)

 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir-a2xx.c
@@ -439,7 +439,8 @@ static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords,
 		assert(sdst_reg->flags == dst_reg->flags);

 		if (src3_reg) {
-			assert(src3_reg == instr->regs[reg++]);
+			assert(src3_reg == instr->regs[reg]);
+			reg++;
 		} else {
 			src3_reg = instr->regs[reg++];
 		}
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -13,10 +13,10 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  15085 bytes, from 2014-12-20 21:49:41)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64344 bytes, from 2014-12-12 20:22:26)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51270 bytes, from 2015-01-18 23:05:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64848 bytes, from 2015-02-20 18:21:24)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51942 bytes, from 2015-02-24 17:14:02)

-Copyright (C) 2013-2014 by the following authors:
+Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)

 Permission is hereby granted, free of charge, to any person obtaining
@@ -130,6 +130,10 @@ enum a3xx_tex_fmt {
 	TFMT_I420_Y = 24,
 	TFMT_I420_U = 26,
 	TFMT_I420_V = 27,
+	TFMT_ATC_RGB = 32,
+	TFMT_ATC_RGBA_EXPLICIT = 33,
+	TFMT_ETC1 = 34,
+	TFMT_ATC_RGBA_INTERPOLATED = 35,
 	TFMT_DXT1 = 36,
 	TFMT_DXT3 = 37,
 	TFMT_DXT5 = 38,
@@ -854,6 +858,12 @@ static inline uint32_t A3XX_RB_MODE_CONTROL_RENDER_MODE(enum a3xx_render_mode va
 {
 	return ((val) << A3XX_RB_MODE_CONTROL_RENDER_MODE__SHIFT) & A3XX_RB_MODE_CONTROL_RENDER_MODE__MASK;
 }
+#define A3XX_RB_MODE_CONTROL_MRT__MASK				0x00003000
+#define A3XX_RB_MODE_CONTROL_MRT__SHIFT				12
+static inline uint32_t A3XX_RB_MODE_CONTROL_MRT(uint32_t val)
+{
+	return ((val) << A3XX_RB_MODE_CONTROL_MRT__SHIFT) & A3XX_RB_MODE_CONTROL_MRT__MASK;
+}
 #define A3XX_RB_MODE_CONTROL_MARB_CACHE_SPLIT_MODE		0x00008000
 #define A3XX_RB_MODE_CONTROL_PACKER_TIMER_ENABLE		0x00010000

@@ -2107,6 +2117,12 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
 #define REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_1			0x000022e9

 #define REG_A3XX_SP_FS_OUTPUT_REG				0x000022ec
+#define A3XX_SP_FS_OUTPUT_REG_MRT__MASK				0x00000003
+#define A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT			0
+static inline uint32_t A3XX_SP_FS_OUTPUT_REG_MRT(uint32_t val)
+{
+	return ((val) << A3XX_SP_FS_OUTPUT_REG_MRT__SHIFT) & A3XX_SP_FS_OUTPUT_REG_MRT__MASK;
+}
 #define A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE			0x00000080
 #define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__MASK			0x0000ff00
 #define A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID__SHIFT		8
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -365,7 +365,10 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
 				COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
 		OUT_RING(ring, 0x00000000);
 	} else {
-		uint32_t vinterp[4] = {0}, flatshade[2] = {0};
+		uint32_t vinterp[4], flatshade[2];
+
+		memset(vinterp, 0, sizeof(vinterp));
+		memset(flatshade, 0, sizeof(flatshade));

 		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
 		for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) {
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  15085 bytes, from 2014-12-20 21:49:41)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64344 bytes, from 2014-12-12 20:22:26)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51270 bytes, from 2015-01-18 23:05:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64848 bytes, from 2015-02-20 18:21:24)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51942 bytes, from 2015-02-24 17:14:02)

 Copyright (C) 2013-2015 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
@@ -150,6 +150,7 @@ enum a4xx_depth_format {
 enum a4xx_tex_filter {
 	A4XX_TEX_NEAREST = 0,
 	A4XX_TEX_LINEAR = 1,
+	A4XX_TEX_ANISO = 2,
 };

 enum a4xx_tex_clamp {
@@ -159,6 +160,14 @@ enum a4xx_tex_clamp {
 	A4XX_TEX_CLAMP_NONE = 3,
 };

+enum a4xx_tex_aniso {
+	A4XX_TEX_ANISO_1 = 0,
+	A4XX_TEX_ANISO_2 = 1,
+	A4XX_TEX_ANISO_4 = 2,
+	A4XX_TEX_ANISO_8 = 3,
+	A4XX_TEX_ANISO_16 = 4,
+};
+
 enum a4xx_tex_swiz {
 	A4XX_TEX_X = 0,
 	A4XX_TEX_Y = 1,
@@ -936,6 +945,10 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)

 #define REG_A4XX_CP_IB2_BUFSZ					0x00000209

+#define REG_A4XX_CP_ME_NRT_ADDR					0x0000020c
+
+#define REG_A4XX_CP_ME_NRT_DATA					0x0000020d
+
 #define REG_A4XX_CP_ME_RB_DONE_DATA				0x00000217

 #define REG_A4XX_CP_QUEUE_THRESH2				0x00000219
@@ -946,9 +959,9 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)

 #define REG_A4XX_CP_ROQ_DATA					0x0000021d

-#define REG_A4XX_CP_MEQ_ADDR 					0x0000021e
+#define REG_A4XX_CP_MEQ_ADDR					0x0000021e

-#define REG_A4XX_CP_MEQ_DATA 					0x0000021f
+#define REG_A4XX_CP_MEQ_DATA					0x0000021f

 #define REG_A4XX_CP_MERCIU_ADDR					0x00000220

@@ -1424,6 +1437,10 @@ static inline uint32_t REG_A4XX_VSC_PIPE_DATA_LENGTH_REG(uint32_t i0) { return 0

 #define REG_A4XX_VFD_PERFCTR_VFD_SEL_7				0x00000e4a

+#define REG_A4XX_VGT_CL_INITIATOR				0x000021d0
+
+#define REG_A4XX_VGT_EVENT_INITIATOR				0x000021d9
+
 #define REG_A4XX_VFD_CONTROL_0					0x00002200
 #define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__MASK			0x000000ff
 #define A4XX_VFD_CONTROL_0_TOTALATTRTOVS__SHIFT			0
@@ -2041,7 +2058,12 @@ static inline uint32_t A4XX_HLSQ_GS_CONTROL_REG_INSTRLENGTH(uint32_t val)
 #define REG_A4XX_PC_BIN_BASE					0x000021c0

 #define REG_A4XX_PC_PRIM_VTX_CNTL				0x000021c4
-#define A4XX_PC_PRIM_VTX_CNTL_VAROUT				0x00000001
+#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK			0x0000000f
+#define A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT			0
+static inline uint32_t A4XX_PC_PRIM_VTX_CNTL_VAROUT(uint32_t val)
+{
+	return ((val) << A4XX_PC_PRIM_VTX_CNTL_VAROUT__SHIFT) & A4XX_PC_PRIM_VTX_CNTL_VAROUT__MASK;
+}
 #define A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST		0x02000000
 #define A4XX_PC_PRIM_VTX_CNTL_PSIZE				0x04000000

@@ -2166,6 +2188,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_WRAP_R(enum a4xx_tex_clamp val)
 {
 	return ((val) << A4XX_TEX_SAMP_0_WRAP_R__SHIFT) & A4XX_TEX_SAMP_0_WRAP_R__MASK;
 }
+#define A4XX_TEX_SAMP_0_ANISO__MASK				0x0001c000
+#define A4XX_TEX_SAMP_0_ANISO__SHIFT				14
+static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
+{
+	return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
+}

 #define REG_A4XX_TEX_SAMP_1					0x00000001
 #define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK			0x0000000e
--- a/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_draw.c
@@ -100,6 +100,9 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
 		if (last_key->alpha != key->alpha)
 			ctx->prog.dirty |= FD_SHADER_DIRTY_FP;

+		if (last_key->rasterflat != key->rasterflat)
+			ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
 		fd4_ctx->last_key = *key;
 	}
 }
@@ -118,6 +121,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 			.binning_pass = true,
 			.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
 			.alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])),
+			.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
 			// TODO set .half_precision based on render target format,
 			// ie. float16 and smaller use half, float32 use full..
 			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
@@ -130,7 +134,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
 			.fsaturate_r = fd4_ctx->fsaturate_r,
 		},
 		.format = fd4_emit_format(pfb->cbufs[0]),
-		.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
 	};
 	unsigned dirty;

--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -425,13 +425,9 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 		OUT_PKT0(ring, REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE, 2);
 		OUT_RING(ring, rasterizer->gras_su_poly_offset_scale);
 		OUT_RING(ring, rasterizer->gras_su_poly_offset_offset);
-	}

-	if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
-		uint32_t val = fd4_rasterizer_stateobj(ctx->rasterizer)
-				->gras_cl_clip_cntl;
 		OUT_PKT0(ring, REG_A4XX_GRAS_CL_CLIP_CNTL, 1);
-		OUT_RING(ring, val);
+		OUT_RING(ring, rasterizer->gras_cl_clip_cntl);
 	}

 	/* NOTE: since primitive_restart is not actually part of any
@@ -444,7 +440,12 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
 				->pc_prim_vtx_cntl;

 		val |= COND(vp->writes_psize, A4XX_PC_PRIM_VTX_CNTL_PSIZE);
-		val |= COND(fp->total_in > 0, A4XX_PC_PRIM_VTX_CNTL_VAROUT);
+		if (fp->total_in > 0) {
+			uint32_t varout = align(fp->total_in, 16) / 16;
+			if (varout > 1)
+				varout = align(varout, 2);
+			val |= A4XX_PC_PRIM_VTX_CNTL_VAROUT(varout);
+		}

 		OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 2);
 		OUT_RING(ring, val);
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.h
@@ -55,7 +55,6 @@ struct fd4_emit {
 	struct ir3_shader_key key;
 	enum a4xx_color_fmt format;
 	uint32_t dirty;
-	bool rasterflat;

 	/* cached to avoid repeated lookups of same variants: */
 	struct ir3_shader_variant *vp, *fp;
--- a/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_gmem.c
@@ -375,7 +375,7 @@ fd4_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)

 	OUT_PKT0(ring, REG_A4XX_PC_PRIM_VTX_CNTL, 1);
 	OUT_RING(ring, A4XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST |
-			A4XX_PC_PRIM_VTX_CNTL_VAROUT);
+			A4XX_PC_PRIM_VTX_CNTL_VAROUT(1));

 	OUT_PKT0(ring, REG_A4XX_VFD_INDEX_OFFSET, 2);
 	OUT_RING(ring, 0);            /* VFD_INDEX_OFFSET */
@@ -436,13 +436,6 @@ fd4_emit_sysmem_prep(struct fd_context *ctx)
 {
 	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
 	struct fd_ringbuffer *ring = ctx->ring;
-	uint32_t pitch = 0;
-
-	if (pfb->cbufs[0]) {
-		struct pipe_surface *psurf = pfb->cbufs[0];
-		unsigned lvl = psurf->u.tex.level;
-		pitch = fd_resource(psurf->texture)->slices[lvl].pitch;
-	}

 	fd4_emit_restore(ctx);

--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -420,8 +420,28 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
 				COND(s[VS].v->writes_psize, A4XX_VPC_ATTR_PSIZE));
 		OUT_RING(ring, 0x00000000);
 	} else {
-		uint32_t vinterp[8] = {0}, flatshade[2] = {0};
+		uint32_t vinterp[8], flatshade[2];

+		memset(vinterp, 0, sizeof(vinterp));
+		memset(flatshade, 0, sizeof(flatshade));
+
+		/* TODO: looks like we need to do int varyings in the frag
+		 * shader on a4xx (no flatshad reg?):
+		 *
+		 *    (sy)(ss)nop
+		 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
+		 *    ldlv.u32 r0.y,l[r0.x+1], 1
+		 *    (ss)bary.f (ei)r63.x, 0, r0.x
+		 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
+		 *    (rpt5)nop
+		 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
+		 *
+		 * for now, don't set FLAT on vinterp[], since that
+		 * at least works well enough for pure float impl (ie.
+		 * pre glsl130).. we'll have to do a bit more work to
+		 * handle this properly:
+		 */
+#if 0
 		/* figure out VARYING_INTERP / FLAT_SHAD register values: */
 		for (j = -1; (j = ir3_next_varying(s[FS].v, j)) < (int)s[FS].v->inputs_count; ) {
 			uint32_t interp = s[FS].v->inputs[j].interpolate;
@@ -443,25 +463,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit)
 				}
 			}
 		}
-
-		/* HACK: looks like we need to do int varyings in the frag
-		 * shader on a4xx (no flatshad reg?):
-		 *
-		 *    (sy)(ss)nop
-		 *    (sy)ldlv.u32 r0.x,l[r0.x], 1
-		 *    ldlv.u32 r0.y,l[r0.x+1], 1
-		 *    (ss)bary.f (ei)r63.x, 0, r0.x
-		 *    (ss)(rpt1)cov.s32f16 hr0.x, (r)r0.x
-		 *    (rpt5)nop
-		 *    sam (f16)(xyzw)hr0.x, hr0.x, s#0, t#0
-		 *
-		 * for now, don't set FLAT on vinterp[], since that
-		 * at least works well enough for pure float impl (ie.
-		 * pre glsl130).. we'll have to do a bit more work to
-		 * handle this properly:
-		 */
-		for (i = 0; i < ARRAY_SIZE(vinterp); i++)
-			vinterp[i] = 0;
+#endif

 		OUT_PKT0(ring, REG_A4XX_VPC_ATTR, 2);
 		OUT_RING(ring, A4XX_VPC_ATTR_TOTALATTR(s[FS].v->total_in) |
--- a/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_texture.c
@@ -68,13 +68,13 @@ tex_clamp(unsigned wrap)
 }

 static enum a4xx_tex_filter
-tex_filter(unsigned filter)
+tex_filter(unsigned filter, bool aniso)
 {
 	switch (filter) {
 	case PIPE_TEX_FILTER_NEAREST:
 		return A4XX_TEX_NEAREST;
 	case PIPE_TEX_FILTER_LINEAR:
-		return A4XX_TEX_LINEAR;
+		return aniso ? A4XX_TEX_ANISO : A4XX_TEX_LINEAR;
 	default:
 		DBG("invalid filter: %u", filter);
 		return 0;
@@ -86,6 +86,7 @@ fd4_sampler_state_create(struct pipe_context *pctx,
 		const struct pipe_sampler_state *cso)
 {
 	struct fd4_sampler_stateobj *so = CALLOC_STRUCT(fd4_sampler_stateobj);
+	unsigned aniso = util_last_bit(MIN2(cso->max_anisotropy >> 1, 8));
 	bool miplinear = false;

 	if (!so)
@@ -98,8 +99,9 @@ fd4_sampler_state_create(struct pipe_context *pctx,

 	so->texsamp0 =
 		COND(miplinear, A4XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
-		A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
-		A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
+		A4XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter, aniso)) |
+		A4XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter, aniso)) |
+		A4XX_TEX_SAMP_0_ANISO(aniso) |
 		A4XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
 		A4XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
 		A4XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  15085 bytes, from 2014-12-20 21:49:41)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64344 bytes, from 2014-12-12 20:22:26)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51270 bytes, from 2015-01-18 23:05:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64848 bytes, from 2015-02-20 18:21:24)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51942 bytes, from 2015-02-24 17:14:02)

 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -13,8 +13,8 @@ The rules-ng-ng source files this header was generated from are:
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml          (  32901 bytes, from 2014-06-02 15:21:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml (  10551 bytes, from 2014-11-13 22:44:30)
 - /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml    (  15085 bytes, from 2014-12-20 21:49:41)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64344 bytes, from 2014-12-12 20:22:26)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51270 bytes, from 2015-01-18 23:05:48)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml          (  64848 bytes, from 2015-02-20 18:21:24)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml          (  51942 bytes, from 2015-02-24 17:14:02)

 Copyright (C) 2013-2014 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
--- a/src/gallium/drivers/freedreno/freedreno_state.c
+++ b/src/gallium/drivers/freedreno/freedreno_state.c
@@ -123,12 +123,12 @@ fd_set_framebuffer_state(struct pipe_context *pctx,

 	fd_context_render(pctx);

-	util_copy_framebuffer_state(cso, framebuffer);
-
 	if ((cso->width != framebuffer->width) ||
 			(cso->height != framebuffer->height))
 		ctx->needs_rb_fbd = true;

+	util_copy_framebuffer_state(cso, framebuffer);
+
 	ctx->dirty |= FD_DIRTY_FRAMEBUFFER;

 	ctx->disabled_scissor.minx = 0;
--- a/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
+++ b/src/gallium/drivers/freedreno/ir3/disasm-a3xx.c
@@ -448,117 +448,114 @@ static void print_instr_cat5(instr_t *instr)
 	}
 }

-static int32_t u2i(uint32_t val, int nbits)
-{
-	return ((val >> (nbits-1)) * ~((1 << nbits) - 1)) | val;
-}
-
 static void print_instr_cat6(instr_t *instr)
 {
 	instr_cat6_t *cat6 = &instr->cat6;
+	char sd = 0, ss = 0;  /* dst/src address space */
+	bool full = type_size(cat6->type) == 32;
+	bool nodst = false;

 	printf(".%s ", type[cat6->type]);

 	switch (cat6->opc) {
+	case OPC_STG:
+		sd = 'g';
+		break;
+	case OPC_STP:
+		sd = 'p';
+		break;
+	case OPC_STL:
+	case OPC_STLW:
+		sd = 'l';
+		break;
+
 	case OPC_LDG:
+		ss = 'g';
+		break;
 	case OPC_LDP:
+		ss = 'p';
+		break;
 	case OPC_LDL:
 	case OPC_LDLW:
 	case OPC_LDLV:
-		/* load instructions: */
-		print_reg_dst((reg_t)(cat6->a.dst), type_size(cat6->type) == 32, false);
-		printf(",");
-		switch (cat6->opc) {
-		case OPC_LDG:
-			printf("g");
-			break;
-		case OPC_LDP:
-			printf("p");
-			break;
-		case OPC_LDL:
-		case OPC_LDLW:
-		case OPC_LDLV:
-			printf("l");
-			break;
-		}
-		printf("[");
-		print_reg_src((reg_t)(cat6->a.src), true,
-				false, false, false, false, false, false);
-		if (cat6->a.off)
-			printf("%+d", cat6->a.off);
-		printf("]");
+		ss = 'l';
 		break;
-	case OPC_PREFETCH:
-		/* similar to load instructions: */
-		printf("g[");
-		print_reg_src((reg_t)(cat6->a.src), true,
-				false, false, false, false, false, false);
-		if (cat6->a.off)
-			printf("%+d", cat6->a.off);
-		printf("]");
-		break;
-	case OPC_STG:
-	case OPC_STP:
-	case OPC_STL:
-	case OPC_STLW:
-		/* store instructions: */
-		switch (cat6->opc) {
-		case OPC_STG:
-			printf("g");
-			break;
-		case OPC_STP:
-			printf("p");
-			break;
-		case OPC_STL:
-		case OPC_STLW:
-			printf("l");
-			break;
-		}
-		printf("[");
-		print_reg_dst((reg_t)(cat6->b.dst), true, false);
-		if (cat6->b.off || cat6->b.off_hi)
-			printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13));
-		printf("]");
-		printf(",");
-		print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32,
-				false, false, false, false, false, false);

+	case OPC_L2G:
+		ss = 'l';
+		sd = 'g';
 		break;
+
+	case OPC_G2L:
+		ss = 'g';
+		sd = 'l';
+		break;
+
+	case OPC_PREFETCH:
+		ss = 'g';
+		nodst = true;
+		break;
+
 	case OPC_STI:
-		/* sti has same encoding as other store instructions, but
-		 * slightly different syntax:
-		 */
-		print_reg_dst((reg_t)(cat6->b.dst), false /* XXX is it always half? */, false);
-		if (cat6->b.off || cat6->b.off_hi)
-			printf("%+d", u2i((cat6->b.off_hi << 8) | cat6->b.off, 13));
-		printf(",");
-		print_reg_src((reg_t)(cat6->b.src), type_size(cat6->type) == 32,
-				false, false, false, false, false, false);
+		full = false;  // XXX or inverts??
 		break;
 	}

-	printf(", %d", cat6->iim_val);
+	if (cat6->has_off) {
+		if (!nodst) {
+			if (sd)
+				printf("%c[", sd);
+			print_reg_dst((reg_t)(cat6->a.dst), full, false);
+			if (sd)
+				printf("]");
+			printf(", ");
+		}
+		if (ss)
+			printf("%c[", ss);
+		print_reg_src((reg_t)(cat6->a.src1), true,
+				false, false, cat6->a.src1_im, false, false, false);
+		printf("%+d", cat6->a.off);
+		if (ss)
+			printf("]");
+		printf(", ");
+		print_reg_src((reg_t)(cat6->a.src2), full,
+				false, false, cat6->a.src2_im, false, false, false);
+	} else {
+		if (!nodst) {
+			if (sd)
+				printf("%c[", sd);
+			print_reg_dst((reg_t)(cat6->b.dst), full, false);
+			if (sd)
+				printf("]");
+			printf(", ");
+		}
+		if (ss)
+			printf("%c[", ss);
+		print_reg_src((reg_t)(cat6->b.src1), true,
+				false, false, cat6->b.src1_im, false, false, false);
+		if (ss)
+			printf("]");
+		printf(", ");
+		print_reg_src((reg_t)(cat6->b.src2), full,
+				false, false, cat6->b.src2_im, false, false, false);
+	}

 	if (debug & PRINT_VERBOSE) {
 		switch (cat6->opc) {
 		case OPC_LDG:
 		case OPC_LDP:
 			/* load instructions: */
-			if (cat6->a.dummy1|cat6->a.dummy2|cat6->a.dummy3)
-				printf("\t{6: %x,%x,%x}", cat6->a.dummy1, cat6->a.dummy2, cat6->a.dummy3);
-			if ((cat6->a.must_be_one1 != 1) || (cat6->a.must_be_one2 != 1))
-				printf("{?? %d,%d ??}", cat6->a.must_be_one1, cat6->a.must_be_one2);
+			if (cat6->a.dummy2|cat6->a.dummy3)
+				printf("\t{6: %x,%x}", cat6->a.dummy2, cat6->a.dummy3);
 			break;
 		case OPC_STG:
 		case OPC_STP:
 		case OPC_STI:
 			/* store instructions: */
-			if (cat6->b.dummy1|cat6->b.dummy2)
-				printf("\t{6: %x,%x}", cat6->b.dummy1, cat6->b.dummy2);
-			if ((cat6->b.must_be_one1 != 1) || (cat6->b.must_be_one2 != 1) ||
-					(cat6->b.must_be_zero1 != 0))
-				printf("{?? %d,%d,%d ??}", cat6->b.must_be_one1, cat6->b.must_be_one2,
-						cat6->b.must_be_zero1);
+			if (cat6->b.dummy2|cat6->b.dummy2)
+				printf("\t{6: %x,%x}", cat6->b.dummy2, cat6->b.dummy3);
+			if (cat6->b.ignore0)
+				printf("\t{?? %x}", cat6->b.ignore0);
 			break;
 		}
 	}
--- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
+++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h
@@ -572,15 +572,15 @@ typedef struct PACKED {
 	uint32_t opc_cat  : 3;
 } instr_cat5_t;

-/* used for load instructions: */
+/* [src1 + off], src2: */
 typedef struct PACKED {
 	/* dword0: */
-	uint32_t must_be_one1 : 1;
-	int16_t  off      : 13;
-	uint32_t src      : 8;
-	uint32_t dummy1   : 1;
-	uint32_t must_be_one2 : 1;
-	int32_t  iim_val  : 8;
+	uint32_t mustbe1  : 1;
+	int32_t  off      : 13;
+	uint32_t src1     : 8;
+	uint32_t src1_im  : 1;
+	uint32_t src2_im  : 1;
+	uint32_t src2     : 8;

 	/* dword1: */
 	uint32_t dst      : 8;
@@ -593,35 +593,38 @@ typedef struct PACKED {
 	uint32_t opc_cat  : 3;
 } instr_cat6a_t;

-/* used for store instructions: */
+/* [src1], src2: */
 typedef struct PACKED {
 	/* dword0: */
-	uint32_t must_be_zero1 : 1;
-	uint32_t src      : 8;
-	uint32_t off_hi   : 5;   /* high bits of 'off'... ugly! */
-	uint32_t dummy1   : 9;
-	uint32_t must_be_one1 : 1;
-	int32_t  iim_val  : 8;
+	uint32_t mustbe0  : 1;
+	uint32_t src1     : 8;
+	uint32_t ignore0  : 13;
+	uint32_t src1_im  : 1;
+	uint32_t src2_im  : 1;
+	uint32_t src2     : 8;

 	/* dword1: */
-	uint16_t off      : 8;
-	uint32_t must_be_one2 : 1;
 	uint32_t dst      : 8;
+	uint32_t dummy2   : 9;
 	uint32_t type     : 3;
-	uint32_t dummy2   : 2;
+	uint32_t dummy3   : 2;
 	uint32_t opc      : 5;
 	uint32_t jmp_tgt  : 1;
 	uint32_t sync     : 1;
 	uint32_t opc_cat  : 3;
 } instr_cat6b_t;

+/* I think some of the other cat6 instructions use additional
+ * sub-encodings..
+ */
+
 typedef union PACKED {
 	instr_cat6a_t a;
 	instr_cat6b_t b;
 	struct PACKED {
 		/* dword0: */
-		uint32_t pad1     : 24;
-		int32_t  iim_val  : 8;
+		uint32_t has_off  : 1;
+		uint32_t pad1     : 31;

 		/* dword1: */
 		uint32_t pad2     : 17;
--- a/src/gallium/drivers/freedreno/ir3/ir3.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3.c
@@ -113,6 +113,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,

 		if (reg->flags & IR3_REG_CONST) {
 			info->max_const = MAX2(info->max_const, max);
+		} else if (val.num == 63) {
+			/* ignore writes to dummy register r63.x */
 		} else if ((max != REG_A0) && (max != REG_P0)) {
 			if (reg->flags & IR3_REG_HALF) {
 				info->max_half_reg = MAX2(info->max_half_reg, max);
@@ -474,58 +476,40 @@ static int emit_cat5(struct ir3_instruction *instr, void *ptr,
 static int emit_cat6(struct ir3_instruction *instr, void *ptr,
 		struct ir3_info *info)
 {
-	struct ir3_register *dst = instr->regs[0];
-	struct ir3_register *src = instr->regs[1];
+	struct ir3_register *dst  = instr->regs[0];
+	struct ir3_register *src1 = instr->regs[1];
+	struct ir3_register *src2 = (instr->regs_count >= 3) ? instr->regs[2] : NULL;
 	instr_cat6_t *cat6 = ptr;

-	iassert(instr->regs_count == 2);
+	iassert(instr->regs_count >= 2);

-	switch (instr->opc) {
-	/* load instructions: */
-	case OPC_LDG:
-	case OPC_LDP:
-	case OPC_LDL:
-	case OPC_LDLW:
-	case OPC_LDLV:
-	case OPC_PREFETCH: {
+	if (instr->cat6.offset) {
 		instr_cat6a_t *cat6a = ptr;

-		iassert(!((dst->flags ^ type_flags(instr->cat6.type)) & IR3_REG_HALF));
+		cat6->has_off = true;

-		cat6a->must_be_one1  = 1;
-		cat6a->must_be_one2  = 1;
-		cat6a->off = instr->cat6.offset;
-		cat6a->src = reg(src, info, instr->repeat, 0);
 		cat6a->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
-		break;
-	}
-	/* store instructions: */
-	case OPC_STG:
-	case OPC_STP:
-	case OPC_STL:
-	case OPC_STLW:
-	case OPC_STI: {
+		cat6a->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+		cat6a->src1_im = !!(src1->flags & IR3_REG_IMMED);
+		if (src2) {
+			cat6a->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+			cat6a->src2_im = !!(src2->flags & IR3_REG_IMMED);
+		}
+		cat6a->off = instr->cat6.offset;
+	} else {
 		instr_cat6b_t *cat6b = ptr;
-		uint32_t src_flags = type_flags(instr->cat6.type);
-		uint32_t dst_flags = (instr->opc == OPC_STI) ? IR3_REG_HALF : 0;

-		iassert(!((src->flags ^ src_flags) & IR3_REG_HALF));
+		cat6->has_off = false;

-		cat6b->must_be_one1  = 1;
-		cat6b->must_be_one2  = 1;
-		cat6b->src    = reg(src, info, instr->repeat, src_flags);
-		cat6b->off_hi = instr->cat6.offset >> 8;
-		cat6b->off    = instr->cat6.offset;
-		cat6b->dst    = reg(dst, info, instr->repeat, IR3_REG_R | dst_flags);
-
-		break;
-	}
-	default:
-		// TODO
-		break;
+		cat6b->dst = reg(dst, info, instr->repeat, IR3_REG_R | IR3_REG_HALF);
+		cat6b->src1 = reg(src1, info, instr->repeat, IR3_REG_IMMED);
+		cat6b->src1_im = !!(src1->flags & IR3_REG_IMMED);
+		if (src2) {
+			cat6b->src2 = reg(src2, info, instr->repeat, IR3_REG_IMMED);
+			cat6b->src2_im = !!(src2->flags & IR3_REG_IMMED);
+		}
 	}

-	cat6->iim_val  = instr->cat6.iim_val;
 	cat6->type     = instr->cat6.type;
 	cat6->opc      = instr->opc;
 	cat6->jmp_tgt  = !!(instr->flags & IR3_INSTR_JP);
--- a/src/gallium/drivers/freedreno/ir3/ir3.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3.h
@@ -420,8 +420,19 @@ static inline bool is_tex(struct ir3_instruction *instr)
 	return (instr->category == 5);
 }

+static inline bool is_mem(struct ir3_instruction *instr)
+{
+	return (instr->category == 6);
+}
+
 static inline bool is_input(struct ir3_instruction *instr)
 {
+	/* in some cases, ldlv is used to fetch varying without
+	 * interpolation.. fortunately inloc is the first src
+	 * register in either case
+	 */
+	if (is_mem(instr) && (instr->opc == OPC_LDLV))
+		return true;
 	return (instr->category == 2) && (instr->opc == OPC_BARY_F);
 }

--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
@@ -102,6 +102,11 @@ struct ir3_compile_context {
 	/* for calculating input/output positions/linkages: */
 	unsigned next_inloc;

+	/* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
+	 * so we need to use ldlv.u32 to load the varying directly:
+	 */
+	bool flat_bypass;
+
 	unsigned num_internal_temps;
 	struct tgsi_src_register internal_temps[8];

@@ -198,9 +203,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
 	} else if (ir3_shader_gpuid(so->shader) >= 400) {
 		/* a4xx seems to have *no* sam.p */
 		lconfig.lower_TXP = ~0;  /* lower all txp */
+		/* need special handling for "flat" */
+		ctx->flat_bypass = true;
 	} else {
 		/* a3xx just needs to avoid sam.p for 3d tex */
 		lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
+		/* no special handling for "flat" */
+		ctx->flat_bypass = false;
 	}

 	ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
@@ -1537,6 +1546,7 @@ trans_txq(const struct instr_translater *t,
 	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
 	struct tgsi_src_register *level = &inst->Src[0].Register;
 	struct tgsi_src_register *samp = &inst->Src[1].Register;
+	const struct target_info *tgt = &tex_targets[inst->Texture.Texture];
 	struct tex_info tinf;

 	memset(&tinf, 0, sizeof(tinf));
@@ -1550,8 +1560,67 @@ trans_txq(const struct instr_translater *t,
 	instr->cat5.tex  = samp->Index;
 	instr->flags |= tinf.flags;

-	add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
-	add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
+	if (tgt->array && (dst->WriteMask & (1 << tgt->dims))) {
+		/* Array size actually ends up in .w rather than .z. This doesn't
+		 * matter for miplevel 0, but for higher mips the value in z is
+		 * minified whereas w stays. Also, the value in TEX_CONST_3_DEPTH is
+		 * returned, which means that we have to add 1 to it for arrays.
+		 */
+		struct tgsi_dst_register tmp_dst;
+		struct tgsi_src_register *tmp_src;
+		type_t type_mov = get_utype(ctx);
+
+		tmp_src = get_internal_temp(ctx, &tmp_dst);
+		add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0,
+						   dst->WriteMask | TGSI_WRITEMASK_W);
+		add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
+
+		if (dst->WriteMask & TGSI_WRITEMASK_X) {
+			instr = instr_create(ctx, 1, 0);
+			instr->cat1.src_type = type_mov;
+			instr->cat1.dst_type = type_mov;
+			add_dst_reg(ctx, instr, dst, 0);
+			add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 0));
+		}
+
+		if (tgt->dims == 2) {
+			if (dst->WriteMask & TGSI_WRITEMASK_Y) {
+				instr = instr_create(ctx, 1, 0);
+				instr->cat1.src_type = type_mov;
+				instr->cat1.dst_type = type_mov;
+				add_dst_reg(ctx, instr, dst, 1);
+				add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 1));
+			}
+		}
+
+		instr = instr_create(ctx, 2, OPC_ADD_U);
+		add_dst_reg(ctx, instr, dst, tgt->dims);
+		add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 3));
+		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+	} else {
+		add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
+		add_src_reg_wrmask(ctx, instr, level, level->SwizzleX, 0x1);
+	}
+
+	if (dst->WriteMask & TGSI_WRITEMASK_W) {
+		/* The # of levels comes from getinfo.z. We need to add 1 to it, since
+		 * the value in TEX_CONST_0 is zero-based.
+		 */
+		struct tgsi_dst_register tmp_dst;
+		struct tgsi_src_register *tmp_src;
+
+		tmp_src = get_internal_temp(ctx, &tmp_dst);
+		instr = instr_create(ctx, 5, OPC_GETINFO);
+		instr->cat5.type = get_utype(ctx);
+		instr->cat5.samp = samp->Index;
+		instr->cat5.tex  = samp->Index;
+		add_dst_reg_wrmask(ctx, instr, &tmp_dst, 0, TGSI_WRITEMASK_Z);
+
+		instr = instr_create(ctx, 2, OPC_ADD_U);
+		add_dst_reg(ctx, instr, dst, 3);
+		add_src_reg(ctx, instr, tmp_src, src_swiz(tmp_src, 2));
+		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+	}
 }

 /* DDX/DDY */
@@ -2718,11 +2787,23 @@ decl_semantic(const struct tgsi_declaration_semantic *sem)

 static struct ir3_instruction *
 decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
-		unsigned j, unsigned inloc)
+		unsigned j, unsigned inloc, bool use_ldlv)
 {
 	struct ir3_instruction *instr;
 	struct ir3_register *src;

+	if (use_ldlv) {
+		/* ldlv.u32 dst, l[#inloc], 1 */
+		instr = instr_create(ctx, 6, OPC_LDLV);
+		instr->cat6.type = TYPE_U32;
+		instr->cat6.iim_val = 1;
+		ir3_reg_create(instr, regid, 0);   /* dummy dst */
+		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
+		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+
+		return instr;
+	}
+
 	/* bary.f dst, #inloc, r0.x */
 	instr = instr_create(ctx, 2, OPC_BARY_F);
 	ir3_reg_create(instr, regid, 0);   /* dummy dst */
@@ -2916,9 +2997,31 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
 					so->frag_face = true;
 					instr = decl_in_frag_face(ctx, r + j, j);
 				} else {
+					bool use_ldlv = false;
+
+					/* I don't believe it is valid to not have Interp
+					 * on a normal frag shader input, and various parts
+					 * that that handle flat/smooth shading make this
+					 * assumption as well.
+					 */
+					compile_assert(ctx, decl->Declaration.Interpolate);
+
+					if (ctx->flat_bypass) {
+						switch (decl->Interp.Interpolate) {
+						case TGSI_INTERPOLATE_COLOR:
+							if (!ctx->so->key.rasterflat)
+								break;
+							/* fallthrough */
+						case TGSI_INTERPOLATE_CONSTANT:
+							use_ldlv = true;
+							break;
+						}
+					}
+
 					so->inputs[n].bary = true;
+
 					instr = decl_in_frag_bary(ctx, r + j, j,
-							so->inputs[n].inloc + j - 8);
+							so->inputs[n].inloc + j - 8, use_ldlv);
 				}
 			} else {
 				instr = create_input(ctx->block, NULL, (i * 4) + j);
@@ -3220,7 +3323,7 @@ ir3_compile_shader(struct ir3_shader_variant *so,
 	if (key.binning_pass) {
 		for (i = 0, j = 0; i < so->outputs_count; i++) {
 			unsigned name = sem2name(so->outputs[i].semantic);
-			unsigned idx = sem2name(so->outputs[i].semantic);
+			unsigned idx = sem2idx(so->outputs[i].semantic);

 			/* throw away everything but first position/psize */
 			if ((idx == 0) && ((name == TGSI_SEMANTIC_POSITION) ||
--- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c
@@ -67,7 +67,7 @@ int ir3_delayslots(struct ir3_instruction *assigner,
 		return 6;

 	/* handled via sync flags: */
-	if (is_sfu(assigner) || is_tex(assigner))
+	if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
 		return 0;

 	/* assigner must be alu: */
--- a/src/gallium/drivers/freedreno/ir3/ir3_group.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_group.c
@@ -50,50 +50,7 @@ static bool check_stop(struct ir3_instruction *instr)
 	return false;
 }

-/* bleh.. we need to do the same group_n() thing for both inputs/outputs
- * (where we have a simple instr[] array), and fanin nodes (where we have
- * an extra indirection via reg->instr).
- */
-struct group_ops {
-	struct ir3_instruction *(*get)(void *arr, int idx);
-	void (*set)(void *arr, int idx, struct ir3_instruction *instr);
-};
-
-static struct ir3_instruction *arr_get(void *arr, int idx)
-{
-	return ((struct ir3_instruction **)arr)[idx];
-}
-static void arr_set_out(void *arr, int idx, struct ir3_instruction *instr)
-{
-	((struct ir3_instruction **)arr)[idx] = instr;
-}
-static void arr_set_in(void *arr, int idx, struct ir3_instruction *instr)
-{
-	debug_printf("cannot insert mov before input!\n");
-	debug_assert(0);
-}
-static struct group_ops arr_ops_out = { arr_get, arr_set_out };
-static struct group_ops arr_ops_in = { arr_get, arr_set_in };
-
-static struct ir3_instruction *instr_get(void *arr, int idx)
-{
-	return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
-}
-static void instr_set(void *arr, int idx, struct ir3_instruction *instr)
-{
-	((struct ir3_instruction *)arr)->regs[idx+1]->instr = instr;
-}
-static struct group_ops instr_ops = { instr_get, instr_set };
-
-
-
-static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
-{
-	return (a && b) && (a != b);
-}
-
-static struct ir3_instruction *
-create_mov(struct ir3_instruction *instr)
+static struct ir3_instruction * create_mov(struct ir3_instruction *instr)
 {
 	struct ir3_instruction *mov;

@@ -106,6 +63,67 @@ create_mov(struct ir3_instruction *instr)
 	return mov;
 }

+/* bleh.. we need to do the same group_n() thing for both inputs/outputs
+ * (where we have a simple instr[] array), and fanin nodes (where we have
+ * an extra indirection via reg->instr).
+ */
+struct group_ops {
+	struct ir3_instruction *(*get)(void *arr, int idx);
+	void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
+};
+
+static struct ir3_instruction *arr_get(void *arr, int idx)
+{
+	return ((struct ir3_instruction **)arr)[idx];
+}
+static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
+{
+	((struct ir3_instruction **)arr)[idx] = create_mov(instr);
+}
+static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
+{
+	/* so, we can't insert a mov in front of a meta:in.. and the downstream
+	 * instruction already has a pointer to 'instr'.  So we cheat a bit and
+	 * morph the meta:in instruction into a mov and insert a new meta:in
+	 * in front.
+	 */
+	struct ir3_instruction *in;
+
+	debug_assert(instr->regs_count == 1);
+
+	in = ir3_instr_create(instr->block, -1, OPC_META_INPUT);
+	in->inout.block = instr->block;
+	ir3_reg_create(in, instr->regs[0]->num, 0);
+
+	/* create src reg for meta:in and fixup to now be a mov: */
+	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
+	instr->category = 1;
+	instr->opc = 0;
+	instr->cat1.src_type = TYPE_F32;
+	instr->cat1.dst_type = TYPE_F32;
+
+	((struct ir3_instruction **)arr)[idx] = in;
+}
+static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
+static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
+
+static struct ir3_instruction *instr_get(void *arr, int idx)
+{
+	return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
+}
+static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
+{
+	((struct ir3_instruction *)arr)->regs[idx+1]->instr = create_mov(instr);
+}
+static struct group_ops instr_ops = { instr_get, instr_insert_mov };
+
+
+
+static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
+{
+	return (a && b) && (a != b);
+}
+
 static void group_n(struct group_ops *ops, void *arr, unsigned n)
 {
 	unsigned i, j;
@@ -135,8 +153,7 @@ restart:
 					conflict = true;

 			if (conflict) {
-				instr = create_mov(instr);
-				ops->set(arr, i, instr);
+				ops->insert_mov(arr, i, instr);
 				/* inserting the mov may have caused a conflict
 				 * against the previous:
 				 */
--- a/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_legalize.c
@@ -168,6 +168,8 @@ static void legalize(struct ir3_legalize_ctx *ctx)
 			 */
 			ctx->has_samp = true;
 			regmask_set(&needs_sy, n->regs[0]);
+		} else if (is_mem(n)) {
+			regmask_set(&needs_sy, n->regs[0]);
 		}

 		/* both tex/sfu appear to not always immediately consume
@@ -185,8 +187,44 @@ static void legalize(struct ir3_legalize_ctx *ctx)
 			last_input = n;
 	}

-	if (last_input)
+	if (last_input) {
+		/* special hack.. if using ldlv to bypass interpolation,
+		 * we need to insert a dummy bary.f on which we can set
+		 * the (ei) flag:
+		 */
+		if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
+			int i, cnt;
+
+			/* note that ir3_instr_create() inserts into
+			 * shader->instrs[] and increments the count..
+			 * so we need to bump up the cnt initially (to
+			 * avoid it clobbering the last real instr) and
+			 * restore it after.
+			 */
+			cnt = ++shader->instrs_count;
+
+			/* inserting instructions would be a bit nicer if list.. */
+			for (i = cnt - 2; i >= 0; i--) {
+				if (shader->instrs[i] == last_input) {
+
+					/* (ss)bary.f (ei)r63.x, 0, r0.x */
+					last_input = ir3_instr_create(block, 2, OPC_BARY_F);
+					last_input->flags |= IR3_INSTR_SS;
+					ir3_reg_create(last_input, regid(63, 0), 0);
+					ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
+					ir3_reg_create(last_input, regid(0, 0), 0);
+
+					shader->instrs[i + 1] = last_input;
+
+					break;
+				}
+				shader->instrs[i + 1] = shader->instrs[i];
+			}
+
+			shader->instrs_count = cnt;
+		}
 		last_input->regs[0]->flags |= IR3_REG_EI;
+	}

 	if (last_rel)
 		last_rel->flags |= IR3_INSTR_UL;
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -246,6 +246,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
 		key.color_two_side = false;
 		key.half_precision = false;
 		key.alpha = false;
+		key.rasterflat = false;
 		if (key.has_per_samp) {
 			key.fsaturate_s = 0;
 			key.fsaturate_t = 0;
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -77,6 +77,10 @@ struct ir3_shader_key {
 			 * let's start with this and see how it goes:
 			 */
 			unsigned alpha : 1;
+			/* used when shader needs to handle flat varyings (a4xx),
+			 * for TGSI_INTERPOLATE_COLOR:
+			 */
+			unsigned rasterflat : 1;
 		};
 		uint32_t global;
 	};
--- a/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
+++ b/src/gallium/drivers/r300/compiler/tests/rc_test_helpers.c
@@ -28,6 +28,7 @@
 */

 #include <errno.h>
+#include <limits.h>
 #include <regex.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -528,7 +529,6 @@ void init_compiler(
 }

 #define MAX_LINE_LENGTH 100
-#define MAX_PATH_LENGTH 100

 unsigned load_program(
 	struct radeon_compiler *c,
@@ -536,14 +536,19 @@ unsigned load_program(
 	const char *filename)
 {
 	char line[MAX_LINE_LENGTH];
-	char path[MAX_PATH_LENGTH];
+	char path[PATH_MAX];
 	FILE *file;
 	unsigned *count;
 	char **string_store;
 	unsigned i = 0;
+	int n;

 	memset(line, 0, sizeof(line));
-	snprintf(path, MAX_PATH_LENGTH, TEST_PATH "/%s", filename);
+	n = snprintf(path, PATH_MAX, TEST_PATH "/%s", filename);
+	if (n < 0 || n >= PATH_MAX) {
+		return 0;
+	}
+
 	file = fopen(path, "r");
 	if (!file) {
 		return 0;
--- a/src/gallium/drivers/r300/r300_blit.c
+++ b/src/gallium/drivers/r300/r300_blit.c
@@ -803,6 +803,15 @@ static void r300_blit(struct pipe_context *pipe,
        (struct pipe_framebuffer_state*)r300->fb_state.state;
    struct pipe_blit_info info = *blit;

+    /* The driver supports sRGB textures but not framebuffers. Blitting
+     * from sRGB to sRGB should be the same as blitting from linear
+     * to linear, so use that, This avoids incorrect linearization.
+     */
+    if (util_format_is_srgb(info.src.format)) {
+      info.src.format = util_format_linear(info.src.format);
+      info.dst.format = util_format_linear(info.dst.format);
+    }
+
    /* MSAA resolve. */
    if (info.src.resource->nr_samples > 1 &&
        !util_format_is_depth_or_stencil(info.src.resource->format)) {
--- a/src/gallium/drivers/r300/r300_fs.c
+++ b/src/gallium/drivers/r300/r300_fs.c
@@ -170,24 +170,10 @@ static void get_external_state(
        }

        state->unit[i].non_normalized_coords = !s->state.normalized_coords;
-        state->unit[i].convert_unorm_to_snorm =
-                v->base.format == PIPE_FORMAT_RGTC1_SNORM ||
-                v->base.format == PIPE_FORMAT_LATC1_SNORM;
+        state->unit[i].convert_unorm_to_snorm = 0;

        /* Pass texture swizzling to the compiler, some lowering passes need it. */
-        if (v->base.format == PIPE_FORMAT_RGTC1_SNORM ||
-            v->base.format == PIPE_FORMAT_LATC1_SNORM) {
-            unsigned char swizzle[4];
-
-            util_format_compose_swizzles(
-                            util_format_description(v->base.format)->swizzle,
-                            v->swizzle,
-                            swizzle);
-
-            state->unit[i].texture_swizzle =
-                    RC_MAKE_SWIZZLE(swizzle[0], swizzle[1],
-                                    swizzle[2], swizzle[3]);
-        } else if (state->unit[i].compare_mode_enabled) {
+        if (state->unit[i].compare_mode_enabled) {
            state->unit[i].texture_swizzle =
                RC_MAKE_SWIZZLE(v->swizzle[0], v->swizzle[1],
                                v->swizzle[2], v->swizzle[3]);
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -169,20 +169,21 @@ uint32_t r300_translate_texformat(enum pipe_format format,

    /* Add swizzling. */
    /* The RGTC1_SNORM and LATC1_SNORM swizzle is done in the shader. */
-    if (format != PIPE_FORMAT_RGTC1_SNORM &&
+    if (util_format_is_compressed(format) &&
+        dxtc_swizzle &&
+        format != PIPE_FORMAT_RGTC2_UNORM &&
+        format != PIPE_FORMAT_RGTC2_SNORM &&
+        format != PIPE_FORMAT_LATC2_UNORM &&
+        format != PIPE_FORMAT_LATC2_SNORM &&
+        format != PIPE_FORMAT_RGTC1_UNORM &&
+        format != PIPE_FORMAT_RGTC1_SNORM &&
+        format != PIPE_FORMAT_LATC1_UNORM &&
        format != PIPE_FORMAT_LATC1_SNORM) {
-        if (util_format_is_compressed(format) &&
-            dxtc_swizzle &&
-            format != PIPE_FORMAT_RGTC2_UNORM &&
-            format != PIPE_FORMAT_RGTC2_SNORM &&
-            format != PIPE_FORMAT_LATC2_UNORM &&
-            format != PIPE_FORMAT_LATC2_SNORM) {
-            result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
-                                                TRUE);
-        } else {
-            result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
-                                                FALSE);
-        }
+        result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+                                            TRUE);
+    } else {
+        result |= r300_get_swizzle_combined(desc->swizzle, swizzle_view,
+                                            FALSE);
    }

    /* S3TC formats. */
@@ -213,6 +214,7 @@ uint32_t r300_translate_texformat(enum pipe_format format,
        switch (format) {
            case PIPE_FORMAT_RGTC1_SNORM:
            case PIPE_FORMAT_LATC1_SNORM:
+                result |= sign_bit[0];
            case PIPE_FORMAT_LATC1_UNORM:
            case PIPE_FORMAT_RGTC1_UNORM:
                return R500_TX_FORMAT_ATI1N | result;
@@ -936,14 +938,16 @@ static void r300_texture_setup_fb_state(struct r300_surface *surf)
        surf->pitch_zmask = tex->tex.zmask_stride_in_pixels[level];
        surf->pitch_hiz = tex->tex.hiz_stride_in_pixels[level];
    } else {
+        enum pipe_format format = util_format_linear(surf->base.format);
+
        surf->pitch =
                stride |
-                r300_translate_colorformat(surf->base.format) |
+                r300_translate_colorformat(format) |
                R300_COLOR_TILE(tex->tex.macrotile[level]) |
                R300_COLOR_MICROTILE(tex->tex.microtile);
-        surf->format = r300_translate_out_fmt(surf->base.format);
+        surf->format = r300_translate_out_fmt(format);
        surf->colormask_swizzle =
-            r300_translate_colormask_swizzle(surf->base.format);
+            r300_translate_colormask_swizzle(format);
        surf->pitch_cmask = tex->tex.cmask_stride_in_pixels;
    }
 }
--- a/src/glsl/ast_to_hir.cpp
+++ b/src/glsl/ast_to_hir.cpp
@@ -1603,6 +1603,12 @@ ast_expression::do_hir(exec_list *instructions,
          && cond_val != NULL) {
         result = cond_val->value.b[0] ? op[1] : op[2];
      } else {
+         /* The copy to conditional_tmp reads the whole array. */
+         if (type->is_array()) {
+            mark_whole_array_access(op[1]);
+            mark_whole_array_access(op[2]);
+         }
+
         ir_variable *const tmp =
            new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary);
         instructions->push_tail(tmp);
--- a/src/mapi/Makefile.am
+++ b/src/mapi/Makefile.am
@@ -115,7 +115,7 @@ glapi_libglapi_la_CPPFLAGS = \
 	-I$(top_srcdir)/src/mesa

 if HAVE_SHARED_GLAPI
-glapi_libglapi_la_SOURCES += $(MAPI_BRIDGE_FILES)
+glapi_libglapi_la_SOURCES += $(MAPI_BRIDGE_FILES) glapi/glapi_mapi_tmp.h
 glapi_libglapi_la_CPPFLAGS += \
 	-DMAPI_MODE_BRIDGE \
 	-DMAPI_ABI_HEADER=\"glapi/glapi_mapi_tmp.h\"
--- a/src/mesa/drivers/common/meta_tex_subimage.c
+++ b/src/mesa/drivers/common/meta_tex_subimage.c
@@ -134,10 +134,11 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
                           const struct gl_pixelstore_attrib *packing)
 {
   GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
+   int full_height, image_height;
   struct gl_texture_image *pbo_tex_image;
   GLenum status;
   bool success = false;
-   int z, iters;
+   int z;

   /* XXX: This should probably be passed in from somewhere */
   const char *where = "_mesa_meta_pbo_TexSubImage";
@@ -167,14 +168,16 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
      return true;
   }

-   /* Only accept tightly packed pixels from the user. */
-   if (packing->ImageHeight != 0 && packing->ImageHeight != height)
-      return false;
+   /* For arrays, use a tall (height * depth) 2D texture but taking into
+    * account the inter-image padding specified with the image height packing
+    * property.
+    */
+   image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
+   full_height = image_height * (depth - 1) + height;

-   /* For arrays, use a tall (height * depth) 2D texture. */
   pbo_tex_image = create_texture_for_pbo(ctx, create_pbo,
                                          GL_PIXEL_UNPACK_BUFFER,
-                                          width, height * depth,
+                                          width, full_height,
                                          format, type, pixels, packing,
                                          &pbo, &pbo_tex);
   if (!pbo_tex_image)
@@ -183,13 +186,23 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
   if (allocate_storage)
      ctx->Driver.AllocTextureImageBuffer(ctx, tex_image);

-   /* Only stash the current FBO */
-   _mesa_meta_begin(ctx, 0);
+   _mesa_meta_begin(ctx, ~(MESA_META_PIXEL_TRANSFER |
+                           MESA_META_PIXEL_STORE));

   _mesa_GenFramebuffers(2, fbos);
   _mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbos[0]);
   _mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbos[1]);

+   if (tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
+      assert(depth == 1);
+      assert(zoffset == 0);
+      depth = height;
+      height = 1;
+      image_height = 1;
+      zoffset = yoffset;
+      yoffset = 0;
+   }
+
   _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
                             pbo_tex_image, 0);
   /* If this passes on the first layer it should pass on the others */
@@ -213,29 +226,18 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
                                  GL_COLOR_BUFFER_BIT, GL_NEAREST))
      goto fail;

-   iters = tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY ?
-           height : depth;
-
-   for (z = 1; z < iters; z++) {
-      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
-                                pbo_tex_image, z);
+   for (z = 1; z < depth; z++) {
      _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
                                tex_image, zoffset + z);

      _mesa_update_state(ctx);

-      if (tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY)
-         _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
-                                    0, z, width, z + 1,
-                                    xoffset, yoffset,
-                                    xoffset + width, yoffset + 1,
-                                    GL_COLOR_BUFFER_BIT, GL_NEAREST);
-      else
-         _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
-                                    0, z * height, width, (z + 1) * height,
-                                    xoffset, yoffset,
-                                    xoffset + width, yoffset + height,
-                                    GL_COLOR_BUFFER_BIT, GL_NEAREST);
+      _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                                 0, z * image_height,
+                                 width, z * image_height + height,
+                                 xoffset, yoffset,
+                                 xoffset + width, yoffset + height,
+                                 GL_COLOR_BUFFER_BIT, GL_NEAREST);
   }

   success = true;
@@ -259,10 +261,11 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                              const struct gl_pixelstore_attrib *packing)
 {
   GLuint pbo = 0, pbo_tex = 0, fbos[2] = { 0, 0 };
+   int full_height, image_height;
   struct gl_texture_image *pbo_tex_image;
   GLenum status;
   bool success = false;
-   int z, iters;
+   int z;

   /* XXX: This should probably be passed in from somewhere */
   const char *where = "_mesa_meta_pbo_GetTexSubImage";
@@ -292,23 +295,35 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
      return true;
   }

-   /* Only accept tightly packed pixels from the user. */
-   if (packing->ImageHeight != 0 && packing->ImageHeight != height)
-      return false;
+   /* For arrays, use a tall (height * depth) 2D texture but taking into
+    * account the inter-image padding specified with the image height packing
+    * property.
+    */
+   image_height = packing->ImageHeight == 0 ? height : packing->ImageHeight;
+   full_height = image_height * (depth - 1) + height;

-   /* For arrays, use a tall (height * depth) 2D texture. */
   pbo_tex_image = create_texture_for_pbo(ctx, false, GL_PIXEL_PACK_BUFFER,
-                                          width, height * depth,
+                                          width, full_height * depth,
                                          format, type, pixels, packing,
                                          &pbo, &pbo_tex);
   if (!pbo_tex_image)
      return false;

-   /* Only stash the current FBO */
-   _mesa_meta_begin(ctx, 0);
+   _mesa_meta_begin(ctx, ~(MESA_META_PIXEL_TRANSFER |
+                           MESA_META_PIXEL_STORE));

   _mesa_GenFramebuffers(2, fbos);

+   if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
+      assert(depth == 1);
+      assert(zoffset == 0);
+      depth = height;
+      height = 1;
+      image_height = 1;
+      zoffset = yoffset;
+      yoffset = 0;
+   }
+
   /* If we were given a texture, bind it to the read framebuffer.  If not,
    * we're doing a ReadPixels and we should just use whatever framebuffer
    * the client has bound.
@@ -342,31 +357,18 @@ _mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
                                  GL_COLOR_BUFFER_BIT, GL_NEAREST))
      goto fail;

-   if (tex_image && tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY)
-      iters = height;
-   else
-      iters = depth;
-
-   for (z = 1; z < iters; z++) {
+   for (z = 1; z < depth; z++) {
      _mesa_meta_bind_fbo_image(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
                                tex_image, zoffset + z);
-      _mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
-                                pbo_tex_image, z);

      _mesa_update_state(ctx);

-      if (tex_image->TexObject->Target == GL_TEXTURE_1D_ARRAY)
-         _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
-                                    xoffset, yoffset,
-                                    xoffset + width, yoffset + 1,
-                                    0, z, width, z + 1,
-                                    GL_COLOR_BUFFER_BIT, GL_NEAREST);
-      else
-         _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
-                                    xoffset, yoffset,
-                                    xoffset + width, yoffset + height,
-                                    0, z * height, width, (z + 1) * height,
-                                    GL_COLOR_BUFFER_BIT, GL_NEAREST);
+      _mesa_meta_BlitFramebuffer(ctx, ctx->ReadBuffer, ctx->DrawBuffer,
+                                 xoffset, yoffset,
+                                 xoffset + width, yoffset + height,
+                                 0, z * image_height,
+                                 width, z * image_height + height,
+                                 GL_COLOR_BUFFER_BIT, GL_NEAREST);
   }

   success = true;
--- a/src/mesa/drivers/dri/i915/intel_fbo.c
+++ b/src/mesa/drivers/dri/i915/intel_fbo.c
@@ -287,8 +287,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
   rb->Width = image->region->width;
   rb->Height = image->region->height;
   rb->Format = image->format;
-   rb->_BaseFormat = _mesa_base_fbo_format(&intel->ctx,
-					   image->internal_format);
+   rb->_BaseFormat = _mesa_get_format_base_format(image->format);
   rb->NeedsFinishRenderTexture = true;
 }

--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -678,8 +678,14 @@ fs_visitor::type_size(const struct glsl_type *type)
   return 0;
 }

+/**
+ * Create a MOV to read the timestamp register.
+ *
+ * The caller is responsible for emitting the MOV.  The return value is
+ * the destination of the MOV, with extra parameters set.
+ */
 fs_reg
-fs_visitor::get_timestamp()
+fs_visitor::get_timestamp(fs_inst **out_mov)
 {
   assert(brw->gen >= 7);

@@ -690,7 +696,7 @@ fs_visitor::get_timestamp()

   fs_reg dst = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 4);

-   fs_inst *mov = emit(MOV(dst, ts));
+   fs_inst *mov = MOV(dst, ts);
   /* We want to read the 3 fields we care about even if it's not enabled in
    * the dispatch.
    */
@@ -708,6 +714,7 @@ fs_visitor::get_timestamp()
    */
   dst.set_smear(0);

+   *out_mov = mov;
   return dst;
 }

@@ -715,7 +722,9 @@ void
 fs_visitor::emit_shader_time_begin()
 {
   current_annotation = "shader time start";
-   shader_start_time = get_timestamp();
+   fs_inst *mov;
+   shader_start_time = get_timestamp(&mov);
+   emit(mov);
 }

 void
@@ -751,38 +760,50 @@ fs_visitor::emit_shader_time_end()
      unreachable("fs_visitor::emit_shader_time_end missing code");
   }

-   fs_reg shader_end_time = get_timestamp();
+   /* Insert our code just before the final SEND with EOT. */
+   exec_node *end = this->instructions.get_tail();
+   assert(end && ((fs_inst *) end)->eot);
+
+   fs_inst *tm_read;
+   fs_reg shader_end_time = get_timestamp(&tm_read);
+   end->insert_before(tm_read);

   /* Check that there weren't any timestamp reset events (assuming these
    * were the only two timestamp reads that happened).
    */
   fs_reg reset = shader_end_time;
   reset.set_smear(2);
-   fs_inst *test = emit(AND(reg_null_d, reset, fs_reg(1u)));
+   fs_inst *test = AND(reg_null_d, reset, fs_reg(1u));
   test->conditional_mod = BRW_CONDITIONAL_Z;
-   emit(IF(BRW_PREDICATE_NORMAL));
+   test->force_writemask_all = true;
+   end->insert_before(test);
+   end->insert_before(IF(BRW_PREDICATE_NORMAL));

   fs_reg start = shader_start_time;
   start.negate = true;
   fs_reg diff = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD, 1);
-   emit(ADD(diff, start, shader_end_time));
+   diff.set_smear(0);
+   fs_inst *add = ADD(diff, start, shader_end_time);
+   add->force_writemask_all = true;
+   end->insert_before(add);

   /* If there were no instructions between the two timestamp gets, the diff
    * is 2 cycles.  Remove that overhead, so I can forget about that when
    * trying to determine the time taken for single instructions.
    */
-   emit(ADD(diff, diff, fs_reg(-2u)));
+   add = ADD(diff, diff, fs_reg(-2u));
+   add->force_writemask_all = true;
+   end->insert_before(add);

-   emit_shader_time_write(type, diff);
-   emit_shader_time_write(written_type, fs_reg(1u));
-   emit(BRW_OPCODE_ELSE);
-   emit_shader_time_write(reset_type, fs_reg(1u));
-   emit(BRW_OPCODE_ENDIF);
+   end->insert_before(SHADER_TIME_ADD(type, diff));
+   end->insert_before(SHADER_TIME_ADD(written_type, fs_reg(1u)));
+   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ELSE, dispatch_width));
+   end->insert_before(SHADER_TIME_ADD(reset_type, fs_reg(1u)));
+   end->insert_before(new(mem_ctx) fs_inst(BRW_OPCODE_ENDIF, dispatch_width));
 }

-void
-fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
-                                   fs_reg value)
+fs_inst *
+fs_visitor::SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value)
 {
   int shader_time_index =
      brw_get_shader_time_index(brw, shader_prog, prog, type);
@@ -794,8 +815,8 @@ fs_visitor::emit_shader_time_write(enum shader_time_shader_type type,
   else
      payload = vgrf(glsl_type::uint_type);

-   emit(new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
-                             fs_reg(), payload, offset, value));
+   return new(mem_ctx) fs_inst(SHADER_OPCODE_SHADER_TIME_ADD,
+                               fs_reg(), payload, offset, value);
 }

 void
@@ -2265,8 +2286,13 @@ fs_visitor::demote_pull_constants()
 	 if (inst->src[i].file != UNIFORM)
 	    continue;

-         int pull_index = pull_constant_loc[inst->src[i].reg +
-                                            inst->src[i].reg_offset];
+         int pull_index;
+         unsigned location = inst->src[i].reg + inst->src[i].reg_offset;
+         if (location >= uniforms) /* Out of bounds access */
+            pull_index = -1;
+         else
+            pull_index = pull_constant_loc[location];
+
         if (pull_index == -1)
 	    continue;

@@ -2956,16 +2982,6 @@ fs_visitor::insert_gen4_post_send_dependency_workarounds(bblock_t *block, fs_ins
      if (i == write_len)
         return;
   }
-
-   /* If we hit the end of the program, resolve all remaining dependencies out
-    * of paranoia.
-    */
-   fs_inst *last_inst = (fs_inst *)this->instructions.get_tail();
-   assert(last_inst->eot);
-   for (int i = 0; i < write_len; i++) {
-      if (needs_dep[i])
-         last_inst->insert_before(block, DEP_RESOLVE_MOV(first_write_grf + i));
-   }
 }

 void
@@ -3817,6 +3833,9 @@ fs_visitor::run_fs()

      emit_fb_writes();

+      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
+         emit_shader_time_end();
+
      optimize();

      assign_curb_setup();
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -601,8 +601,7 @@ public:

   void emit_shader_time_begin();
   void emit_shader_time_end();
-   void emit_shader_time_write(enum shader_time_shader_type type,
-                               fs_reg value);
+   fs_inst *SHADER_TIME_ADD(enum shader_time_shader_type type, fs_reg value);

   void emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
                            fs_reg dst, fs_reg offset, fs_reg src0,
@@ -623,7 +622,7 @@ public:
   void resolve_ud_negate(fs_reg *reg);
   void resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg);

-   fs_reg get_timestamp();
+   fs_reg get_timestamp(fs_inst **out_mov);

   struct brw_reg interp_reg(int location, int channel);
   void setup_uniform_values(ir_variable *ir);
--- a/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_cmod_propagation.cpp
@@ -80,6 +80,10 @@ opt_cmod_propagation_local(fs_visitor *v, bblock_t *block)
                scan_inst->dst.reg_offset != inst->src[0].reg_offset)
               break;

+            /* Comparisons operate differently for ints and floats */
+            if (scan_inst->dst.type != inst->dst.type)
+               break;
+
            /* If the instruction generating inst's source also wrote the
             * flag, and inst is doing a simple .nz comparison, then inst
             * is redundant - the appropriate value is already in the flag
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -986,7 +986,7 @@ fs_visitor::nir_emit_alu(nir_alu_instr *instr)
      break;

   case nir_op_fpow:
-      inst = emit(SHADER_OPCODE_POW, result, op[0], op[1]);
+      inst = emit_math(SHADER_OPCODE_POW, result, op[0], op[1]);
      inst->saturate = instr->dest.saturate;
      break;

--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -428,21 +428,16 @@ fs_visitor::try_emit_mad(ir_expression *ir)
   if (ir->type != glsl_type::float_type)
      return false;

-   ir_rvalue *nonmul = ir->operands[1];
-   ir_expression *mul = ir->operands[0]->as_expression();
+   ir_rvalue *nonmul;
+   ir_expression *mul;
+   bool mul_negate, mul_abs;

-   bool mul_negate = false, mul_abs = false;
-   if (mul && mul->operation == ir_unop_abs) {
-      mul = mul->operands[0]->as_expression();
-      mul_abs = true;
-   } else if (mul && mul->operation == ir_unop_neg) {
-      mul = mul->operands[0]->as_expression();
-      mul_negate = true;
-   }
+   for (int i = 0; i < 2; i++) {
+      mul_negate = false;
+      mul_abs = false;

-   if (!mul || mul->operation != ir_binop_mul) {
-      nonmul = ir->operands[0];
-      mul = ir->operands[1]->as_expression();
+      mul = ir->operands[i]->as_expression();
+      nonmul = ir->operands[1 - i];

      if (mul && mul->operation == ir_unop_abs) {
         mul = mul->operands[0]->as_expression();
@@ -452,10 +447,13 @@ fs_visitor::try_emit_mad(ir_expression *ir)
         mul_negate = true;
      }

-      if (!mul || mul->operation != ir_binop_mul)
-         return false;
+      if (mul && mul->operation == ir_binop_mul)
+         break;
   }

+   if (!mul || mul->operation != ir_binop_mul)
+      return false;
+
   if (nonmul->as_constant() ||
       mul->operands[0]->as_constant() ||
       mul->operands[1]->as_constant())
@@ -3522,19 +3520,20 @@ fs_visitor::emit_fb_writes()
         do_dual_src = false;
   }

-   fs_inst *inst;
+   fs_inst *inst = NULL;
   if (do_dual_src) {
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-         emit_shader_time_end();
-
      this->current_annotation = ralloc_asprintf(this->mem_ctx,
 						 "FB dual-source write");
      inst = emit_single_fb_write(this->outputs[0], this->dual_src_output,
                                  reg_undef, 4);
      inst->target = 0;
      prog_data->dual_src_blend = true;
-   } else if (key->nr_color_regions > 0) {
+   } else {
      for (int target = 0; target < key->nr_color_regions; target++) {
+         /* Skip over outputs that weren't written. */
+         if (this->outputs[target].file == BAD_FILE)
+            continue;
+
         this->current_annotation = ralloc_asprintf(this->mem_ctx,
                                                    "FB write target %d",
                                                    target);
@@ -3542,19 +3541,14 @@ fs_visitor::emit_fb_writes()
         if (brw->gen >= 6 && key->replicate_alpha && target != 0)
            src0_alpha = offset(outputs[0], 3);

-         if (target == key->nr_color_regions - 1 &&
-             (INTEL_DEBUG & DEBUG_SHADER_TIME))
-            emit_shader_time_end();
-
         inst = emit_single_fb_write(this->outputs[target], reg_undef,
                                     src0_alpha,
                                     this->output_components[target]);
         inst->target = target;
      }
-   } else {
-      if (INTEL_DEBUG & DEBUG_SHADER_TIME)
-         emit_shader_time_end();
+   }

+   if (inst == NULL) {
      /* Even if there's no color buffers enabled, we still need to send
       * alpha out the pipeline to our null renderbuffer to support
       * alpha-testing, alpha-to-coverage, and so on.
@@ -3765,9 +3759,6 @@ fs_visitor::emit_urb_writes()
      if (length == 8 || last)
         flush = true;
      if (flush) {
-         if (last && (INTEL_DEBUG & DEBUG_SHADER_TIME))
-            emit_shader_time_end();
-
         fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
         fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1),
                                 BRW_REGISTER_TYPE_F);
--- a/src/mesa/drivers/dri/i965/brw_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_gs.c
@@ -351,7 +351,7 @@ brw_upload_gs_prog(struct brw_context *brw)
   }
   brw->gs.base.prog_data = &brw->gs.prog_data->base.base;

-   if (memcmp(&brw->vs.prog_data->base.vue_map, &brw->vue_map_geom_out,
+   if (memcmp(&brw->gs.prog_data->base.vue_map, &brw->vue_map_geom_out,
              sizeof(brw->vue_map_geom_out)) != 0) {
      brw->vue_map_geom_out = brw->gs.prog_data->base.vue_map;
      brw->state.dirty.brw |= BRW_NEW_VUE_MAP_GEOM_OUT;
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -945,6 +945,7 @@ backend_instruction::has_side_effects() const
 {
   switch (opcode) {
   case SHADER_OPCODE_UNTYPED_ATOMIC:
+   case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
   case SHADER_OPCODE_URB_WRITE_SIMD8:
   case FS_OPCODE_FB_WRITE:
      return true;
--- a/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp
@@ -428,7 +428,7 @@ vec4_visitor::opt_copy_propagation(bool do_constant_prop)
 	 entries[reg].saturatemask = 0x0;
 	 for (int i = 0; i < 4; i++) {
 	    if (inst->dst.writemask & (1 << i)) {
-               entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
+               entries[reg].value[i] = (!inst->saturate && direct_copy) ? &inst->src[0] : NULL;
               entries[reg].saturatemask |= (((inst->saturate && direct_copy) ? 1 : 0) << i);
 	    }
 	 }
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1645,7 +1645,7 @@ vec4_visitor::visit(ir_expression *ir)
      emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
      break;
   case ir_unop_i2b:
-      emit(AND(result_dst, op[0], src_reg(1)));
+      emit(CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
      break;

   case ir_unop_trunc:
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -398,7 +398,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
   rb->Width = image->width;
   rb->Height = image->height;
   rb->Format = image->format;
-   rb->_BaseFormat = _mesa_base_fbo_format(ctx, image->internal_format);
+   rb->_BaseFormat = _mesa_get_format_base_format(image->format);
   rb->NeedsFinishRenderTexture = true;
   irb->layer_count = 1;
 }
@@ -915,6 +915,51 @@ intel_blit_framebuffer(struct gl_context *ctx,
                           mask, filter);
 }

+/**
+ * Gen4-5 implementation of glBlitFrameBuffer().
+ *
+ * Tries BLT, Meta, then swrast.
+ *
+ * Gen4-5 have a single ring for both 3D and BLT operations, so there's no
+ * inter-ring synchronization issues like on Gen6+.  It is apparently faster
+ * than using the 3D pipeline.  Original Gen4 also has to rebase and copy
+ * miptree slices in order to render to unaligned locations.
+ */
+static void
+gen4_blit_framebuffer(struct gl_context *ctx,
+                      struct gl_framebuffer *readFb,
+                      struct gl_framebuffer *drawFb,
+                      GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
+                      GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
+                      GLbitfield mask, GLenum filter)
+{
+   /* Page 679 of OpenGL 4.4 spec says:
+    * "Added BlitFramebuffer to commands affected by conditional rendering in
+    *  section 10.10 (Bug 9562)."
+    */
+   if (!_mesa_check_conditional_render(ctx))
+      return;
+
+   mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
+                                              srcX0, srcY0, srcX1, srcY1,
+                                              dstX0, dstY0, dstX1, dstY1,
+                                              mask, filter);
+   if (mask == 0x0)
+      return;
+
+   mask = _mesa_meta_BlitFramebuffer(ctx, readFb, drawFb,
+                                     srcX0, srcY0, srcX1, srcY1,
+                                     dstX0, dstY0, dstX1, dstY1,
+                                     mask, filter);
+   if (mask == 0x0)
+      return;
+
+   _swrast_BlitFramebuffer(ctx, readFb, drawFb,
+                           srcX0, srcY0, srcX1, srcY1,
+                           dstX0, dstY0, dstX1, dstY1,
+                           mask, filter);
+}
+
 /**
 * Does the renderbuffer have hiz enabled?
 */
@@ -1049,7 +1094,10 @@ intel_fbo_init(struct brw_context *brw)
   dd->UnmapRenderbuffer = intel_unmap_renderbuffer;
   dd->RenderTexture = intel_render_texture;
   dd->ValidateFramebuffer = intel_validate_framebuffer;
-   dd->BlitFramebuffer = intel_blit_framebuffer;
+   if (brw->gen >= 6)
+      dd->BlitFramebuffer = intel_blit_framebuffer;
+   else
+      dd->BlitFramebuffer = gen4_blit_framebuffer;
   dd->EGLImageTargetRenderbufferStorage =
      intel_image_target_renderbuffer_storage;

--- a/src/mesa/drivers/dri/i965/intel_pixel_read.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_read.c
@@ -139,7 +139,8 @@ intel_readpixels_tiled_memcpy(struct gl_context * ctx,
       rb->Format == MESA_FORMAT_R8G8B8X8_UNORM)
      return false;

-   if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp))
+   if (!intel_get_memcpy(rb->Format, format, type, &mem_copy, &cpp,
+                         INTEL_DOWNLOAD))
      return false;

   if (!irb->mt ||
--- a/src/mesa/drivers/dri/i965/intel_tex_image.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_image.c
@@ -399,7 +399,8 @@ intel_gettexsubimage_tiled_memcpy(struct gl_context *ctx,
       texImage->TexFormat == MESA_FORMAT_R8G8B8X8_UNORM)
      return false;

-   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
+                         INTEL_DOWNLOAD))
      return false;

   /* If this is a nontrivial texture view, let another path handle it instead. */
--- a/src/mesa/drivers/dri/i965/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_subimage.c
@@ -118,7 +118,8 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
       packing->Invert)
      return false;

-   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp))
+   if (!intel_get_memcpy(texImage->TexFormat, format, type, &mem_copy, &cpp,
+                         INTEL_UPLOAD))
      return false;

   /* If this is a nontrivial texture view, let another path handle it instead. */
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.c
@@ -60,42 +60,79 @@ static const uint32_t ytile_span = 16;
 static const uint8_t rgba8_permutation[16] =
   { 2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15 };

-/* NOTE: dst must be 16 byte aligned */
-#define rgba8_copy_16(dst, src)                     \
-   *(__m128i *)(dst) = _mm_shuffle_epi8(            \
-      (__m128i) _mm_loadu_ps((float *)(src)),       \
-      *(__m128i *) rgba8_permutation                \
-   )
+/* NOTE: dst must be 16-byte aligned. src may be unaligned. */
+#define rgba8_copy_16_aligned_dst(dst, src)                            \
+   _mm_store_si128((__m128i *)(dst),                                   \
+                   _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)(src)), \
+                                    *(__m128i *) rgba8_permutation))
+
+/* NOTE: src must be 16-byte aligned. dst may be unaligned. */
+#define rgba8_copy_16_aligned_src(dst, src)                            \
+   _mm_storeu_si128((__m128i *)(dst),                                  \
+                    _mm_shuffle_epi8(_mm_load_si128((__m128i *)(src)), \
+                                     *(__m128i *) rgba8_permutation))
 #endif

 /**
- * Copy RGBA to BGRA - swap R and B.
+ * Copy RGBA to BGRA - swap R and B, with the destination 16-byte aligned.
 */
 static inline void *
-rgba8_copy(void *dst, const void *src, size_t bytes)
+rgba8_copy_aligned_dst(void *dst, const void *src, size_t bytes)
 {
   uint8_t *d = dst;
   uint8_t const *s = src;

 #ifdef __SSSE3__
-   /* Fast copying for tile spans.
-    *
-    * As long as the destination texture is 16 aligned,
-    * any 16 or 64 spans we get here should also be 16 aligned.
-    */
-
   if (bytes == 16) {
      assert(!(((uintptr_t)dst) & 0xf));
-      rgba8_copy_16(d+ 0, s+ 0);
+      rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
      return dst;
   }

   if (bytes == 64) {
      assert(!(((uintptr_t)dst) & 0xf));
-      rgba8_copy_16(d+ 0, s+ 0);
-      rgba8_copy_16(d+16, s+16);
-      rgba8_copy_16(d+32, s+32);
-      rgba8_copy_16(d+48, s+48);
+      rgba8_copy_16_aligned_dst(d+ 0, s+ 0);
+      rgba8_copy_16_aligned_dst(d+16, s+16);
+      rgba8_copy_16_aligned_dst(d+32, s+32);
+      rgba8_copy_16_aligned_dst(d+48, s+48);
+      return dst;
+   }
+#endif
+
+   while (bytes >= 4) {
+      d[0] = s[2];
+      d[1] = s[1];
+      d[2] = s[0];
+      d[3] = s[3];
+      d += 4;
+      s += 4;
+      bytes -= 4;
+   }
+   return dst;
+}
+
+/**
+ * Copy RGBA to BGRA - swap R and B, with the source 16-byte aligned.
+ */
+static inline void *
+rgba8_copy_aligned_src(void *dst, const void *src, size_t bytes)
+{
+   uint8_t *d = dst;
+   uint8_t const *s = src;
+
+#ifdef __SSSE3__
+   if (bytes == 16) {
+      assert(!(((uintptr_t)src) & 0xf));
+      rgba8_copy_16_aligned_src(d+ 0, s+ 0);
+      return dst;
+   }
+
+   if (bytes == 64) {
+      assert(!(((uintptr_t)src) & 0xf));
+      rgba8_copy_16_aligned_src(d+ 0, s+ 0);
+      rgba8_copy_16_aligned_src(d+16, s+16);
+      rgba8_copy_16_aligned_src(d+32, s+32);
+      rgba8_copy_16_aligned_src(d+48, s+48);
      return dst;
   }
 #endif
@@ -357,16 +394,18 @@ linear_to_xtiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
      if (mem_copy == memcpy)
         return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
                                 dst, src, src_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_dst)
         return linear_to_xtiled(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, src_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_dst);
   } else {
      if (mem_copy == memcpy)
         return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
                                 dst, src, src_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_dst)
         return linear_to_xtiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_dst);
   }
   linear_to_xtiled(x0, x1, x2, x3, y0, y1,
                    dst, src, src_pitch, swizzle_bit, mem_copy);
@@ -393,16 +432,18 @@ linear_to_ytiled_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
      if (mem_copy == memcpy)
         return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
                                 dst, src, src_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_dst)
         return linear_to_ytiled(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, src_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_dst);
   } else {
      if (mem_copy == memcpy)
         return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
                                 dst, src, src_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_dst)
         return linear_to_ytiled(x0, x1, x2, x3, y0, y1,
-                                 dst, src, src_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, src_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_dst);
   }
   linear_to_ytiled(x0, x1, x2, x3, y0, y1,
                    dst, src, src_pitch, swizzle_bit, mem_copy);
@@ -429,16 +470,18 @@ xtiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
      if (mem_copy == memcpy)
         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
                                 dst, src, dst_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_src)
         return xtiled_to_linear(0, 0, xtile_width, xtile_width, 0, xtile_height,
-                                 dst, src, dst_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_src);
   } else {
      if (mem_copy == memcpy)
         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
                                 dst, src, dst_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_src)
         return xtiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_src);
   }
   xtiled_to_linear(x0, x1, x2, x3, y0, y1,
                    dst, src, dst_pitch, swizzle_bit, mem_copy);
@@ -465,16 +508,18 @@ ytiled_to_linear_faster(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3,
      if (mem_copy == memcpy)
         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
                                 dst, src, dst_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_src)
         return ytiled_to_linear(0, 0, ytile_width, ytile_width, 0, ytile_height,
-                                 dst, src, dst_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_src);
   } else {
      if (mem_copy == memcpy)
         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
                                 dst, src, dst_pitch, swizzle_bit, memcpy);
-      else if (mem_copy == rgba8_copy)
+      else if (mem_copy == rgba8_copy_aligned_src)
         return ytiled_to_linear(x0, x1, x2, x3, y0, y1,
-                                 dst, src, dst_pitch, swizzle_bit, rgba8_copy);
+                                 dst, src, dst_pitch, swizzle_bit,
+                                 rgba8_copy_aligned_src);
   }
   ytiled_to_linear(x0, x1, x2, x3, y0, y1,
                    dst, src, dst_pitch, swizzle_bit, mem_copy);
@@ -684,7 +729,8 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
 * \return true if the format and type combination are valid
 */
 bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
-                      GLenum type, mem_copy_fn* mem_copy, uint32_t* cpp)
+                      GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp,
+                      enum intel_memcpy_direction direction)
 {
   if (type == GL_UNSIGNED_INT_8_8_8_8_REV &&
       !(format == GL_RGBA || format == GL_BGRA))
@@ -700,7 +746,8 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
      if (format == GL_BGRA) {
         *mem_copy = memcpy;
      } else if (format == GL_RGBA) {
-         *mem_copy = rgba8_copy;
+         *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst
+                                               : rgba8_copy_aligned_src;
      }
   } else if ((tiledFormat == MESA_FORMAT_R8G8B8A8_UNORM) ||
              (tiledFormat == MESA_FORMAT_R8G8B8X8_UNORM)) {
@@ -709,7 +756,8 @@ bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
         /* Copying from RGBA to BGRA is the same as BGRA to RGBA so we can
          * use the same function.
          */
-         *mem_copy = rgba8_copy;
+         *mem_copy = direction == INTEL_UPLOAD ? rgba8_copy_aligned_dst
+                                               : rgba8_copy_aligned_src;
      } else if (format == GL_RGBA) {
         *mem_copy = memcpy;
      }
--- a/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
+++ b/src/mesa/drivers/dri/i965/intel_tiled_memcpy.h
@@ -55,7 +55,20 @@ tiled_to_linear(uint32_t xt1, uint32_t xt2,
                uint32_t tiling,
                mem_copy_fn mem_copy);

+/* Tells intel_get_memcpy() whether the memcpy() is
+ *
+ *  - an upload to the GPU with an aligned destination and a potentially
+ *    unaligned source; or
+ *  - a download from the GPU with an aligned source and a potentially
+ *    unaligned destination.
+ */
+enum intel_memcpy_direction {
+   INTEL_UPLOAD,
+   INTEL_DOWNLOAD
+};
+
 bool intel_get_memcpy(mesa_format tiledFormat, GLenum format,
-                      GLenum type, mem_copy_fn* mem_copy, uint32_t* cpp);
+                      GLenum type, mem_copy_fn *mem_copy, uint32_t *cpp,
+                      enum intel_memcpy_direction direction);

 #endif /* INTEL_TILED_MEMCPY */
--- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
+++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp
@@ -415,3 +415,37 @@ TEST_F(cmod_propagation_test, movnz)
   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 0)->opcode);
   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 0)->conditional_mod);
 }
+
+TEST_F(cmod_propagation_test, different_types_cmod_with_zero)
+{
+   fs_reg dest = v->vgrf(glsl_type::int_type);
+   fs_reg src0 = v->vgrf(glsl_type::int_type);
+   fs_reg src1 = v->vgrf(glsl_type::int_type);
+   fs_reg zero(0.0f);
+   v->emit(BRW_OPCODE_ADD, dest, src0, src1);
+   v->emit(BRW_OPCODE_CMP, v->reg_null_f, retype(dest, BRW_REGISTER_TYPE_F),
+                                          zero)
+      ->conditional_mod = BRW_CONDITIONAL_GE;
+
+   /* = Before =
+    *
+    * 0: add(8)        dest:D  src0:D  src1:D
+    * 1: cmp.ge.f0(8)  null:F  dest:F  0.0f
+    *
+    * = After =
+    * (no changes)
+    */
+
+   v->calculate_cfg();
+   bblock_t *block0 = v->cfg->blocks[0];
+
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+
+   EXPECT_FALSE(cmod_propagation(v));
+   EXPECT_EQ(0, block0->start_ip);
+   EXPECT_EQ(1, block0->end_ip);
+   EXPECT_EQ(BRW_OPCODE_ADD, instruction(block0, 0)->opcode);
+   EXPECT_EQ(BRW_OPCODE_CMP, instruction(block0, 1)->opcode);
+   EXPECT_EQ(BRW_CONDITIONAL_GE, instruction(block0, 1)->conditional_mod);
+}
--- a/src/mesa/main/pack.c
+++ b/src/mesa/main/pack.c
@@ -1487,20 +1487,20 @@ _mesa_pack_luminance_from_rgba_integer(GLuint n,
         case GL_UNSIGNED_BYTE: {
            GLbyte *dst = (GLbyte *) dstAddr;
            dst[i] = lum32;
+            break;
         }
-         break;
         case GL_SHORT:
         case GL_UNSIGNED_SHORT: {
            GLshort *dst = (GLshort *) dstAddr;
            dst[i] = lum32;
+            break;
         }
-         break;
         case GL_INT:
         case GL_UNSIGNED_INT: {
            GLint *dst = (GLint *) dstAddr;
            dst[i] = lum32;
+            break;
         }
-         break;
         }
      }
      return;
@@ -1525,21 +1525,22 @@ _mesa_pack_luminance_from_rgba_integer(GLuint n,
            GLbyte *dst = (GLbyte *) dstAddr;
            dst[2*i] = lum32;
            dst[2*i+1] = alpha;
+            break;
         }
         case GL_SHORT:
         case GL_UNSIGNED_SHORT: {
            GLshort *dst = (GLshort *) dstAddr;
            dst[i] = lum32;
            dst[2*i+1] = alpha;
+            break;
         }
-         break;
         case GL_INT:
         case GL_UNSIGNED_INT: {
            GLint *dst = (GLint *) dstAddr;
            dst[i] = lum32;
            dst[2*i+1] = alpha;
+            break;
         }
-         break;
         }
      }
      return;
--- a/src/mesa/main/readpix.c
+++ b/src/mesa/main/readpix.c
@@ -594,6 +594,7 @@ read_rgba_pixels( struct gl_context *ctx,
      _mesa_format_convert(dst, dst_format, dst_stride,
                           luminance, luminance_format, luminance_stride,
                           width, height, NULL);
+      free(luminance);
   } else {
      _mesa_pack_luminance_from_rgba_integer(width * height, src, !src_is_uint,
                                             dst, format, type);
--- a/src/mesa/main/shaderapi.c
+++ b/src/mesa/main/shaderapi.c
@@ -1713,7 +1713,7 @@ _mesa_GetProgramBinary(GLuint program, GLsizei bufSize, GLsizei *length,
    * Ensure that length always points to valid storage to avoid multiple NULL
    * pointer checks below.
    */
-   if (length != NULL)
+   if (length == NULL)
      length = &length_dummy;


--- a/src/mesa/main/teximage.c
+++ b/src/mesa/main/teximage.c
@@ -224,7 +224,10 @@ _mesa_base_tex_format( struct gl_context *ctx, GLint internalFormat )
   if (ctx->Extensions.ARB_stencil_texturing) {
      switch (internalFormat) {
      case GL_STENCIL_INDEX:
+      case GL_STENCIL_INDEX1:
+      case GL_STENCIL_INDEX4:
      case GL_STENCIL_INDEX8:
+      case GL_STENCIL_INDEX16:
         return GL_STENCIL_INDEX;
      default:
         ; /* fallthrough */
@@ -2815,14 +2818,6 @@ copytexsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
      }
   }

-   /* check target (proxies not allowed) */
-   if (!legal_texsubimage_target(ctx, dimensions, target, dsa)) {
-      _mesa_error(ctx, GL_INVALID_ENUM, "glCopyTex%sSubImage%uD(target=%s)",
-                  suffix, dimensions,
-                  _mesa_lookup_enum_by_nr(target));
-      return GL_TRUE;
-   }
-
   /* Check level */
   if (level < 0 || level >= _mesa_max_texture_levels(ctx, target)) {
      _mesa_error(ctx, GL_INVALID_VALUE,
@@ -4088,6 +4083,16 @@ _mesa_CopyTexSubImage1D( GLenum target, GLint level,
   struct gl_texture_object* texObj;
   GET_CURRENT_CONTEXT(ctx);

+   /* Check target (proxies not allowed). Target must be checked prior to
+    * calling _mesa_get_current_tex_object.
+    */
+   if (!legal_texsubimage_target(ctx, 1, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCopyTexSubImage1D(invalid target %s)",
+                  _mesa_lookup_enum_by_nr(target));
+      return;
+   }
+
   texObj = _mesa_get_current_tex_object(ctx, target);
   if (!texObj)
      return;
@@ -4106,6 +4111,16 @@ _mesa_CopyTexSubImage2D( GLenum target, GLint level,
   struct gl_texture_object* texObj;
   GET_CURRENT_CONTEXT(ctx);

+   /* Check target (proxies not allowed). Target must be checked prior to
+    * calling _mesa_get_current_tex_object.
+    */
+   if (!legal_texsubimage_target(ctx, 2, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCopyTexSubImage2D(invalid target %s)",
+                  _mesa_lookup_enum_by_nr(target));
+      return;
+   }
+
   texObj = _mesa_get_current_tex_object(ctx, target);
   if (!texObj)
      return;
@@ -4125,6 +4140,16 @@ _mesa_CopyTexSubImage3D( GLenum target, GLint level,
   struct gl_texture_object* texObj;
   GET_CURRENT_CONTEXT(ctx);

+   /* Check target (proxies not allowed). Target must be checked prior to
+    * calling _mesa_get_current_tex_object.
+    */
+   if (!legal_texsubimage_target(ctx, 3, target, false)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCopyTexSubImage3D(invalid target %s)",
+                  _mesa_lookup_enum_by_nr(target));
+      return;
+   }
+
   texObj = _mesa_get_current_tex_object(ctx, target);
   if (!texObj)
      return;
@@ -4145,6 +4170,14 @@ _mesa_CopyTextureSubImage1D(GLuint texture, GLint level,
   if (!texObj)
      return;

+   /* Check target (proxies not allowed). */
+   if (!legal_texsubimage_target(ctx, 1, texObj->Target, true)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCopyTextureSubImage1D(invalid target %s)",
+                  _mesa_lookup_enum_by_nr(texObj->Target));
+      return;
+   }
+
   _mesa_copy_texture_sub_image(ctx, 1, texObj, texObj->Target, level,
                                xoffset, 0, 0, x, y, width, 1, true);
 }
@@ -4161,6 +4194,14 @@ _mesa_CopyTextureSubImage2D(GLuint texture, GLint level,
   if (!texObj)
      return;

+   /* Check target (proxies not allowed). */
+   if (!legal_texsubimage_target(ctx, 2, texObj->Target, true)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCopyTextureSubImage2D(invalid target %s)",
+                  _mesa_lookup_enum_by_nr(texObj->Target));
+      return;
+   }
+
   _mesa_copy_texture_sub_image(ctx, 2, texObj, texObj->Target, level,
                                xoffset, yoffset, 0,
                                x, y, width, height, true);
@@ -4180,6 +4221,14 @@ _mesa_CopyTextureSubImage3D(GLuint texture, GLint level,
   if (!texObj)
      return;

+   /* Check target (proxies not allowed). */
+   if (!legal_texsubimage_target(ctx, 3, texObj->Target, true)) {
+      _mesa_error(ctx, GL_INVALID_ENUM,
+                  "glCopyTextureSubImage3D(invalid target %s)",
+                  _mesa_lookup_enum_by_nr(texObj->Target));
+      return;
+   }
+
   _mesa_copy_texture_sub_image(ctx, 3, texObj, texObj->Target, level,
                                xoffset, yoffset, zoffset,
                                x, y, width, height, true);
@@ -4454,25 +4503,21 @@ out:


 /**
- * Error checking for glCompressedTexSubImage[123]D().
+ * Target checking for glCompressedTexSubImage[123]D().
 * \return GL_TRUE if error, GL_FALSE if no error
+ * Must come before other error checking so that the texture object can
+ * be correctly retrieved using _mesa_get_current_tex_object.
 */
 static GLboolean
-compressed_subtexture_error_check(struct gl_context *ctx, GLint dims,
-                                  const struct gl_texture_object *texObj,
-                                  GLenum target, GLint level,
-                                  GLint xoffset, GLint yoffset, GLint zoffset,
-                                  GLsizei width, GLsizei height, GLsizei depth,
-                                  GLenum format, GLsizei imageSize, bool dsa)
+compressed_subtexture_target_check(struct gl_context *ctx, GLenum target,
+                                   GLint dims, GLenum format, bool dsa,
+                                   const char *caller)
 {
-   struct gl_texture_image *texImage;
-   GLint expectedSize;
   GLboolean targetOK;
-   const char *suffix = dsa ? "ture" : "";

   if (dsa && target == GL_TEXTURE_RECTANGLE) {
-      _mesa_error(ctx, GL_INVALID_OPERATION,
-                  "glCompressedSubTexture%dD(target)", dims);
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid target %s)", caller,
+                  _mesa_lookup_enum_by_nr(target));
      return GL_TRUE;
   }

@@ -4535,7 +4580,9 @@ compressed_subtexture_error_check(struct gl_context *ctx, GLint dims,
         }
         if (invalidformat) {
            _mesa_error(ctx, GL_INVALID_OPERATION,
-                        "glCompressedTex%sSubImage%uD(target)", suffix, dims);
+                        "%s(invalid target %s for format %s)", caller,
+                        _mesa_lookup_enum_by_nr(target),
+                        _mesa_lookup_enum_by_nr(format));
            return GL_TRUE;
         }
      }
@@ -4549,11 +4596,30 @@ compressed_subtexture_error_check(struct gl_context *ctx, GLint dims,
   }

   if (!targetOK) {
-      _mesa_error(ctx, GL_INVALID_ENUM,
-                  "glCompressedTex%sSubImage%uD(target)", suffix, dims);
+      _mesa_error(ctx, GL_INVALID_ENUM, "%s(invalid target %s)", caller,
+                  _mesa_lookup_enum_by_nr(target));
      return GL_TRUE;
   }

+   return GL_FALSE;
+}
+
+/**
+ * Error checking for glCompressedTexSubImage[123]D().
+ * \return GL_TRUE if error, GL_FALSE if no error
+ */
+static GLboolean
+compressed_subtexture_error_check(struct gl_context *ctx, GLint dims,
+                                  const struct gl_texture_object *texObj,
+                                  GLenum target, GLint level,
+                                  GLint xoffset, GLint yoffset, GLint zoffset,
+                                  GLsizei width, GLsizei height, GLsizei depth,
+                                  GLenum format, GLsizei imageSize, bool dsa)
+{
+   struct gl_texture_image *texImage;
+   GLint expectedSize;
+   const char *suffix = dsa ? "ture" : "";
+
   /* this will catch any invalid compressed format token */
   if (!_mesa_is_compressed_format(ctx, format)) {
      _mesa_error(ctx, GL_INVALID_ENUM,
@@ -4709,6 +4775,11 @@ _mesa_CompressedTexSubImage1D(GLenum target, GLint level, GLint xoffset,
   struct gl_texture_object *texObj;
   GET_CURRENT_CONTEXT(ctx);

+   if (compressed_subtexture_target_check(ctx, target, 1, format, false,
+                                          "glCompressedTexSubImage1D")) {
+      return;
+   }
+
   texObj = _mesa_get_current_tex_object(ctx, target);
   if (!texObj)
      return;
@@ -4731,6 +4802,12 @@ _mesa_CompressedTextureSubImage1D(GLuint texture, GLint level, GLint xoffset,
   if (!texObj)
      return;

+   if (compressed_subtexture_target_check(ctx, texObj->Target, 1, format,
+                                          true,
+                                          "glCompressedTextureSubImage1D")) {
+      return;
+   }
+
   _mesa_compressed_texture_sub_image(ctx, 1, texObj, texObj->Target, level,
                                      xoffset, 0, 0, width, 1, 1,
                                      format, imageSize, data, true);
@@ -4746,6 +4823,11 @@ _mesa_CompressedTexSubImage2D(GLenum target, GLint level, GLint xoffset,
   struct gl_texture_object *texObj;
   GET_CURRENT_CONTEXT(ctx);

+   if (compressed_subtexture_target_check(ctx, target, 2, format, false,
+                                          "glCompressedTexSubImage2D")) {
+      return;
+   }
+
   texObj = _mesa_get_current_tex_object(ctx, target);
   if (!texObj)
      return;
@@ -4770,6 +4852,12 @@ _mesa_CompressedTextureSubImage2D(GLuint texture, GLint level, GLint xoffset,
   if (!texObj)
      return;

+   if (compressed_subtexture_target_check(ctx, texObj->Target, 2, format,
+                                          true,
+                                          "glCompressedTextureSubImage2D")) {
+      return;
+   }
+
   _mesa_compressed_texture_sub_image(ctx, 2, texObj, texObj->Target, level,
                                      xoffset, yoffset, 0, width, height, 1,
                                      format, imageSize, data, true);
@@ -4784,6 +4872,11 @@ _mesa_CompressedTexSubImage3D(GLenum target, GLint level, GLint xoffset,
   struct gl_texture_object *texObj;
   GET_CURRENT_CONTEXT(ctx);

+   if (compressed_subtexture_target_check(ctx, target, 3, format, false,
+                                          "glCompressedTexSubImage3D")) {
+      return;
+   }
+
   texObj = _mesa_get_current_tex_object(ctx, target);
   if (!texObj)
      return;
@@ -4809,6 +4902,12 @@ _mesa_CompressedTextureSubImage3D(GLuint texture, GLint level, GLint xoffset,
   if (!texObj)
      return;

+   if (compressed_subtexture_target_check(ctx, texObj->Target, 3, format,
+                                          true,
+                                          "glCompressedTextureSubImage3D")) {
+      return;
+   }
+
   _mesa_compressed_texture_sub_image(ctx, 3, texObj, texObj->Target, level,
                                      xoffset, yoffset, zoffset,
                                      width, height, depth,
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -31,19 +31,12 @@ libmesautil_la_CPPFLAGS = \
 	-I$(top_srcdir)/src \
 	-I$(top_srcdir)/src/mapi \
 	-I$(top_srcdir)/src/mesa \
-	$(SHA1_CFLAGS) \
 	$(VISIBILITY_CFLAGS)

 libmesautil_la_SOURCES = \
 	$(MESA_UTIL_FILES) \
 	$(MESA_UTIL_GENERATED_FILES)

-if ENABLE_SHADER_CACHE
-libmesautil_la_SOURCES += $(MESA_UTIL_SHADER_CACHE_FILES)
-endif
-
-libmesautil_la_LIBADD = $(SHA1_LIBS)
-
 check_PROGRAMS = u_atomic_test
 TESTS = $(check_PROGRAMS)

--- a/src/util/Makefile.sources
+++ b/src/util/Makefile.sources
@@ -1,7 +1,3 @@
-MESA_UTIL_SHADER_CACHE_FILES := \
-	mesa-sha1.c \
-	mesa-sha1.h
-
 MESA_UTIL_FILES :=	\
 	format_srgb.h \
 	hash_table.c	\
--- a/src/util/SConscript
+++ b/src/util/SConscript
@@ -29,11 +29,6 @@ mesautil_sources = (
    source_lists['MESA_UTIL_GENERATED_FILES']
 )

-# XXX We don't yet have scons support for detecting any of the various
-# HAVE_SHA1_* definitions, so for now simply disable the shader cache.
-if False:
-    mesautil_sources += source_lists['MESA_UTIL_SHADER_CACHE_FILES']
-
 mesautil = env.ConvenienceLibrary(
    target = 'mesautil',
    source = mesautil_sources,
--- a/src/util/mesa-sha1.c
+++ b/src/util/mesa-sha1.c
@@ -1,316 +0,0 @@
-/* Copyright © 2007 Carl Worth
- * Copyright © 2009 Jeremy Huddleston, Julien Cristau, and Matthieu Herrb
- * Copyright © 2009-2010 Mikhail Gusarov
- * Copyright © 2012 Yaakov Selkowitz and Keith Packard
- * Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include "mesa-sha1.h"
-
-#if defined(HAVE_SHA1_IN_LIBMD)  /* Use libmd for SHA1 */ \
-	|| defined(HAVE_SHA1_IN_LIBC)   /* Use libc for SHA1 */
-
-#include <sha1.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   SHA1_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   SHA1Init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
-
-   SHA1Update(sha1_ctx, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   SHA1_CTX *sha1_ctx = (SHA1_CTX *) ctx;
-
-   SHA1Final(result, sha1_ctx);
-   free(sha1_ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_COMMONCRYPTO)        /* Use CommonCrypto for SHA1 */
-
-#include <CommonCrypto/CommonDigest.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   CC_SHA1_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   CC_SHA1_Init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
-
-   CC_SHA1_Update(sha1_ctx, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   CC_SHA1_CTX *sha1_ctx = (CC_SHA1_CTX *) ctx;
-
-   CC_SHA1_Final(result, sha1_ctx);
-   free(sha1_ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_CRYPTOAPI)        /* Use CryptoAPI for SHA1 */
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-#include <wincrypt.h>
-
-static HCRYPTPROV hProv;
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   HCRYPTHASH *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-
-   CryptAcquireContext(&hProv, NULL, MS_DEF_PROV, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT);
-   CryptCreateHash(hProv, CALG_SHA1, 0, 0, ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
-
-   CryptHashData(*hHash, data, size, 0);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   HCRYPTHASH *hHash = (HCRYPTHASH *) ctx;
-   DWORD len = 20;
-
-   CryptGetHashParam(*hHash, HP_HASHVAL, result, &len, 0);
-   CryptDestroyHash(*hHash);
-   CryptReleaseContext(hProv, 0);
-   free(ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBNETTLE)   /* Use libnettle for SHA1 */
-
-#include <nettle/sha.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   struct sha1_ctx *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   sha1_init(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   sha1_update((struct sha1_ctx *) ctx, size, data);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   sha1_digest((struct sha1_ctx *) ctx, 20, result);
-   free(ctx);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBGCRYPT)   /* Use libgcrypt for SHA1 */
-
-#include <gcrypt.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   static int init;
-   gcry_md_hd_t h;
-   gcry_error_t err;
-
-   if (!init) {
-      if (!gcry_check_version(NULL))
-         return NULL;
-      gcry_control(GCRYCTL_DISABLE_SECMEM, 0);
-      gcry_control(GCRYCTL_INITIALIZATION_FINISHED, 0);
-      init = 1;
-   }
-
-   err = gcry_md_open(&h, GCRY_MD_SHA1, 0);
-   if (err)
-      return NULL;
-   return (struct mesa_sha1 *) h;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
-
-   gcry_md_write(h, data, size);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   gcry_md_hd_t h = (gcry_md_hd_t) ctx;
-
-   memcpy(result, gcry_md_read(h, GCRY_MD_SHA1), 20);
-   gcry_md_close(h);
-   return 1;
-}
-
-#elif defined(HAVE_SHA1_IN_LIBSHA1)     /* Use libsha1 */
-
-#include <libsha1.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   sha1_ctx *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   sha1_begin(ctx);
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   sha1_hash(data, size, (sha1_ctx *) ctx);
-   return 1;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   sha1_end(result, (sha1_ctx *) ctx);
-   free(ctx);
-   return 1;
-}
-
-#else                           /* Use OpenSSL's libcrypto */
-
-#include <stddef.h>             /* buggy openssl/sha.h wants size_t */
-#include <openssl/sha.h>
-
-struct mesa_sha1 *
-_mesa_sha1_init(void)
-{
-   int ret;
-   SHA_CTX *ctx = malloc(sizeof(*ctx));
-
-   if (!ctx)
-      return NULL;
-   ret = SHA1_Init(ctx);
-   if (!ret) {
-      free(ctx);
-      return NULL;
-   }
-   return (struct mesa_sha1 *) ctx;
-}
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size)
-{
-   int ret;
-   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
-
-   ret = SHA1_Update(sha_ctx, data, size);
-   if (!ret)
-      free(sha_ctx);
-   return ret;
-}
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20])
-{
-   int ret;
-   SHA_CTX *sha_ctx = (SHA_CTX *) ctx;
-
-   ret = SHA1_Final(result, (SHA_CTX *) sha_ctx);
-   free(sha_ctx);
-   return ret;
-}
-
-#endif
-
-void
-_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20])
-{
-   struct mesa_sha1 *ctx;
-
-   ctx = _mesa_sha1_init();
-   _mesa_sha1_update(ctx, data, size);
-   _mesa_sha1_final(ctx, result);
-}
-
-char *
-_mesa_sha1_format(char *buf, const unsigned char *sha1)
-{
-   static const char hex_digits[] = "0123456789abcdef";
-   int i;
-
-   for (i = 0; i < 40; i += 2) {
-      buf[i] = hex_digits[sha1[i >> 1] >> 4];
-      buf[i + 1] = hex_digits[sha1[i >> 1] & 0x0f];
-   }
-   buf[i] = '\0';
-
-   return buf;
-}
--- a/src/util/mesa-sha1.h
+++ b/src/util/mesa-sha1.h
@@ -1,53 +0,0 @@
-/* Copyright © 2014 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef SHA1_H
-#define SHA1_H
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdlib.h>
-
-struct mesa_sha1;
-
-struct mesa_sha1 *
-_mesa_sha1_init(void);
-
-int
-_mesa_sha1_update(struct mesa_sha1 *ctx, const void *data, int size);
-
-int
-_mesa_sha1_final(struct mesa_sha1 *ctx, unsigned char result[20]);
-
-char *
-_mesa_sha1_format(char *buf, const unsigned char *sha1);
-
-void
-_mesa_sha1_compute(const void *data, size_t size, unsigned char result[20]);
-
-#ifdef __cplusplus
-} /* extern C */
-#endif
-
-#endif
@@ -1 +1 @@
 .5.0
 .5.1