vc4: Use NEON to speed up utile stores on Pi2+.

Improves 1024x1024 TexSubImage2D by 41.2371% +/- 3.52799% (n=10).
vc4: Use NEON to speed up utile loads on Pi2.
2017-01-26 12:50:05 -08:00 · 2017-01-26 12:48:10 -08:00 · 2017-01-26 12:23:31 -08:00 · 2017-01-26 12:23:31 -08:00 · 2017-01-26 19:40:52 +01:00 · 2017-01-26 19:40:14 +01:00
1443 changed files with 109565 additions and 38404 deletions
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -1,4 +1,5 @@
-((prog-mode
+((nil . ((show-trailing-whitespace . t)))
+ (prog-mode
  (indent-tabs-mode . nil)
  (tab-width . 8)
  (c-basic-offset . 3)
@@ -8,6 +9,10 @@
 	    (c-set-offset 'case-label '0)
 	    (c-set-offset 'innamespace '0)
 	    (c-set-offset 'inline-open '0)))
-  )
+  (whitespace-style face indentation)
+  (whitespace-line-column . 79)
+  (eval ignore-errors
+        (require 'whitespace)
+        (whitespace-mode 1)))
 (makefile-mode (indent-tabs-mode . t))
 )
--- a/.editorconfig
+++ b/.editorconfig
@@ -6,6 +6,7 @@ root = true
 [*]
 charset = utf-8
 insert_final_newline = true
+tab_width = 8

 [*.{c,h,cpp,hpp,cc,hh}]
 indent_style = space
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,7 +32,7 @@ env:
    - DRI3PROTO_VERSION=dri3proto-1.0
    - PRESENTPROTO_VERSION=presentproto-1.0
    - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
-    - LIBDRM_VERSION=libdrm-2.4.65
+    - LIBDRM_VERSION=libdrm-2.4.74
    - XCBPROTO_VERSION=xcb-proto-1.11
    - LIBXCB_VERSION=libxcb-1.11
    - LIBXSHMFENCE_VERSION=libxshmfence-1.2
@@ -92,7 +92,7 @@ install:

  - wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
  - tar -jxvf $LIBDRM_VERSION.tar.bz2
-  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 && make install)
+  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-etnaviv-experimental-api && make install)

  - wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
  - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
@@ -103,7 +103,7 @@ script:
      ./autogen.sh --enable-debug
        --with-egl-platforms=x11,drm
        --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
-        --with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600
+        --with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600,etnaviv,imx
        --disable-llvm-shared-libs
        ;
      make && make check;
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -43,6 +43,7 @@ LOCAL_CFLAGS += \
 	-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)

 LOCAL_CFLAGS += \
+	-DENABLE_SHADER_CACHE \
 	-D__STDC_LIMIT_MACROS \
 	-DHAVE___BUILTIN_EXPECT \
 	-DHAVE___BUILTIN_FFS \
@@ -51,6 +52,7 @@ LOCAL_CFLAGS += \
 	-DHAVE_FUNC_ATTRIBUTE_UNUSED \
 	-DHAVE_FUNC_ATTRIBUTE_FORMAT \
 	-DHAVE_FUNC_ATTRIBUTE_PACKED \
+	-DHAVE_FUNC_ATTRIBUTE_ALIAS \
 	-DHAVE___BUILTIN_CTZ \
 	-DHAVE___BUILTIN_POPCOUNT \
 	-DHAVE___BUILTIN_POPCOUNTLL \
@@ -82,11 +84,13 @@ LOCAL_CFLAGS += \
 	-D__STDC_LIMIT_MACROS
 endif

+ifneq ($(LOCAL_IS_HOST_MODULE),true)
 # add libdrm if there are hardware drivers
 ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES += libdrm
 endif
+endif

 LOCAL_CPPFLAGS += \
 	$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-D_USING_LIBCXX) \
--- a/Makefile.am
+++ b/Makefile.am
@@ -40,10 +40,10 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-vdpau \
 	--enable-xa \
 	--enable-xvmc \
-	--disable-llvm-shared-libs \
+	--enable-llvm-shared-libs \
 	--with-egl-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
-	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr \
+	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
 	--with-vulkan-drivers=intel,radeon

 ACLOCAL_AMFLAGS = -I m4
@@ -62,6 +62,7 @@ noinst_HEADERS = \
 	include/c99_math.h \
 	include/c11 \
 	include/D3D9 \
+	include/GL/wglext.h \
 	include/HaikuGL \
 	include/no_extern_c.h \
 	include/pci_ids
--- a/6
+++ b/6
@@ -92,10 +92,16 @@ F: */Makefile.sources

 ANDROID BUILD
 R: Emil Velikov <emil.l.velikov@gmail.com>
+R: Rob Herring <robh@kernel.org>
 F: CleanSpec.mk
 F: */Android.*mk
 F: */Makefile.sources

+ANDROID EGL SUPPORT
+R: Rob Herring <robh@kernel.org>
+R: Tomasz Figa <tfiga@chromium.org>
+F: src/egl/drivers/dri2/platform_android.c
+
 WAYLAND EGL SUPPORT
 R: Daniel Stone <daniels@collabora.com>
 F: src/egl/wayland/*
--- a/2
+++ b/2
@@ -1 +1 @@
-12.1.0-devel
+17.1.0-devel
--- a/bin/get-typod-pick-list.sh
+++ b/bin/get-typod-pick-list.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+# Script for generating a list of candidates which have typos in the nomination line
+#
+# Usage examples:
+#
+# $ bin/get-typod-pick-list.sh
+# $ bin/get-typod-pick-list.sh > picklist
+# $ bin/get-typod-pick-list.sh | tee picklist
+
+# NB:
+# This script intentionally _never_ checks for specific version tag
+# Should we consider folding it with the original get-pick-list.sh
+
+# Grep for commits with "cherry picked from commit" in the commit message.
+git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
+	grep "cherry picked from commit" |\
+	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
+
+# Grep for commits that were marked as a candidate for the stable tree.
+git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' HEAD..origin/master |\
+while read sha
+do
+	# Check to see whether the patch is on the ignore list.
+	if [ -f bin/.cherry-ignore ] ; then
+		if grep -q ^$sha bin/.cherry-ignore ; then
+			continue
+		fi
+	fi
+
+	# Check to see if it has already been picked over.
+	if grep -q ^$sha already_picked ; then
+		continue
+	fi
+
+	git log -n1 --pretty=oneline $sha | cat
+done
+
+rm -f already_picked
--- a/common.py
+++ b/common.py
@@ -59,7 +59,7 @@ if target_platform == 'windows' and host_platform != 'windows':


 # find default_llvm value
-if 'LLVM' in os.environ:
+if 'LLVM' in os.environ or 'LLVM_CONFIG' in os.environ:
    default_llvm = 'yes'
 else:
    default_llvm = 'no'
@@ -110,5 +110,6 @@ def AddOptions(opts):
    opts.Add(BoolOption('texture_float',
                        'enable floating-point textures and renderbuffers',
                        'no'))
+    opts.Add(BoolOption('swr', 'Build OpenSWR', 'no'))
    if host_platform == 'windows':
        opts.Add('MSVC_VERSION', 'Microsoft Visual C/C++ version')
--- a/configure.ac
+++ b/configure.ac
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -55,7 +55,7 @@ to your preference, type:
 </pre>

 <p>
-This will produce libGL.so and several other libraries depending on the
+This will produce libGL.so and/or several other libraries depending on the
 options you have chosen. Later, if you want to rebuild for a different
 configuration run <code>make realclean</code> before rebuilding.
 </p>
@@ -133,9 +133,11 @@ There are also a few general options for altering the Mesa build:
 </p>
 <dl>
 <dt><code>--enable-debug</code></dt>
-<dd><p>This option will enable compiler
-options and macros to aid in debugging the Mesa libraries.</p>
-</dd>
+<dd><p>This option will set the compiler debug/optimisation levels (if the user
+hasn't already set them via the CFLAGS/CXXFLAGS) and macros to aid in
+debugging the Mesa libraries.</p>
+
+<p>Note that enabling this option can lead to noticable loss of performance.</p>

 <dt><code>--disable-asm</code></dt>
 <dd><p>There are assembly routines
@@ -174,27 +176,22 @@ architecture, the following should be sufficient to configure multilib Mesa</p>
 </dl>


-<h2 id="driver">2. Driver Options</h2>
+<h2 id="driver">2. GL Driver Options</h2>

 <p>
 There are several different driver modes that Mesa can use. These are
 described in more detail in the <a href="install.html">basic
 installation instructions</a>. The Mesa driver is controlled through the
-configure options <code>--enable-xlib-glx</code>, <code>--enable-osmesa</code>,
-and <code>--enable-dri</code>.
+configure options <code>--enable-glx</code> and <code>--enable-osmesa</code>
 </p>

 <h3 id="xlib">Xlib</h3><p>
 It uses Xlib as a software renderer to do all rendering. It corresponds
-to the option <code>--enable-xlib-glx</code>. The libX11 and libXext
-libraries, as well as the X11 development headers, will be need to
-support the Xlib driver.
+to the option <code>--enable-glx=xlib</code> or <code>--enable-glx=gallium-xlib</code>.

 <h3 id="dri">DRI</h3><p>This mode uses the DRI hardware drivers for
-accelerated OpenGL rendering. Enable the DRI drivers with the option
-<code>--enable-dri</code>. See the <a href="install.html">basic
-installation instructions</a> for details on prerequisites for the DRI
-drivers.
+accelerated OpenGL rendering. To enable use <code>--enable-glx=dri
+--enable-dri</code>.

 <!-- DRI specific options -->
 <dl>
@@ -252,10 +249,8 @@ will create the libOSMesa16 library with a 16-bit color channel.
 <h2 id="library">3. Library Options</h2>

 <p>
-The configure script provides more fine grained control over the GL
-libraries that will be built. More details on the specific GL libraries
-can be found in the <a href="install.html">basic installation
-instructions</a>.
+The configure script provides more fine grained control over the libraries
+that will be built.

 </div>
 </body>
--- a/docs/codingstyle.html
+++ b/docs/codingstyle.html
@@ -0,0 +1,142 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Coding Style</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Coding Style</h1>
+
+<p>
+Mesa is over 20 years old and the coding style has evolved over time.
+Some old parts use a style that's a bit out of date.
+
+Different sections of mesa can use different coding style as set in the local
+EditorConfig (.editorconfig) and/or Emacs (.dir-locals.el) file.
+
+Alternatively the following is applicable.
+
+If the guidelines below don't cover something, try following the format of
+existing, neighboring code.
+</p>
+
+<p>
+Basic formatting guidelines
+</p>
+
+<ul>
+<li>3-space indentation, no tabs.
+<li>Limit lines to 78 or fewer characters.  The idea is to prevent line
+wrapping in 80-column editors and terminals.  There are exceptions, such
+as if you're defining a large, static table of information.
+<li>Opening braces go on the same line as the if/for/while statement.
+For example:
+<pre>
+   if (condition) {
+      foo;
+   } else {
+      bar;
+   }
+</pre>
+
+<li>Put a space before/after operators.  For example, <tt>a = b + c;</tt>
+and not <tt>a=b+c;</tt>
+
+<li>This GNU indent command generally does the right thing for formatting:
+<pre>
+   indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
+</pre>
+
+<li>Use comments wherever you think it would be helpful for other developers.
+Several specific cases and style examples follow.  Note that we roughly
+follow <a href="http://www.stack.nl/~dimitri/doxygen/">Doxygen</a> conventions.
+<br>
+<br>
+Single-line comments:
+<pre>
+   /* null-out pointer to prevent dangling reference below */
+   bufferObj = NULL;
+</pre>
+Or,
+<pre>
+   bufferObj = NULL;  /* prevent dangling reference below */
+</pre>
+Multi-line comment:
+<pre>
+   /* If this is a new buffer object id, or one which was generated but
+    * never used before, allocate a buffer object now.
+    */
+</pre>
+We try to quote the OpenGL specification where prudent:
+<pre>
+   /* Page 38 of the PDF of the OpenGL ES 3.0 spec says:
+    *
+    *     "An INVALID_OPERATION error is generated for any of the following
+    *     conditions:
+    *
+    *     * <length> is zero."
+    *
+    * Additionally, page 94 of the PDF of the OpenGL 4.5 core spec
+    * (30.10.2014) also says this, so it's no longer allowed for desktop GL,
+    * either.
+    */
+</pre>
+Function comment example:
+<pre>
+   /**
+    * Create and initialize a new buffer object.  Called via the
+    * ctx->Driver.CreateObject() driver callback function.
+    * \param  name  integer name of the object
+    * \param  type  one of GL_FOO, GL_BAR, etc.
+    * \return  pointer to new object or NULL if error
+    */
+   struct gl_object *
+   _mesa_create_object(GLuint name, GLenum type)
+   {
+      /* function body */
+   }
+</pre>
+
+<li>Put the function return type and qualifiers on one line and the function
+name and parameters on the next, as seen above.  This makes it easy to use
+<code>grep ^function_name dir/*</code> to find function definitions.  Also,
+the opening brace goes on the next line by itself (see above.)
+
+<li>Function names follow various conventions depending on the type of function:
+<pre>
+   glFooBar()       - a public GL entry point (in glapi_dispatch.c)
+   _mesa_FooBar()   - the internal immediate mode function
+   save_FooBar()    - retained mode (display list) function in dlist.c
+   foo_bar()        - a static (private) function
+   _mesa_foo_bar()  - an internal non-static Mesa function
+</pre>
+
+<li>Constants, macros and enumerant names are ALL_UPPERCASE, with _ between
+words.
+<li>Mesa usually uses camel case for local variables (Ex: "localVarname")
+while gallium typically uses underscores (Ex: "local_var_name").
+<li>Global variables are almost never used because Mesa should be thread-safe.
+
+<li>Booleans.  Places that are not directly visible to the GL API
+should prefer the use of <tt>bool</tt>, <tt>true</tt>, and
+<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
+<tt>GL_FALSE</tt>.  In C code, this may mean that
+<tt>#include &lt;stdbool.h&gt;</tt> needs to be added.  The
+<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
+src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.
+
+</ul>
+</p>
+
+</div>
+</body>
+</html>
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -66,7 +66,7 @@
 <li><a href="debugging.html" target="_parent">Debugging Tips</a>
 <li><a href="perf.html" target="_parent">Performance Tips</a>
 <li><a href="extensions.html" target="_parent">Mesa Extensions</a>
-<li><a href="mangling.html" target="_parent">Function Name Mangling</a>
+<li><a href="mangling.html" target="_parent">GL Function Name Mangling</a>
 <li><a href="llvmpipe.html" target="_parent">Gallium llvmpipe driver</a>
 <li><a href="vmware-guest.html" target="_parent">VMware SVGA3D guest driver</a>
 <li><a href="postprocess.html" target="_parent">Gallium post-processing</a>
@@ -81,6 +81,8 @@
 <li><a href="utilities.html" target="_parent">Utilities</a>
 <li><a href="helpwanted.html" target="_parent">Help Wanted</a>
 <li><a href="devinfo.html" target="_parent">Development Notes</a>
+<li><a href="codingstyle.html" target="_parent">Coding Style</a>
+<li><a href="submittingpatches.html" target="_parent">Submitting patches</a>
 <li><a href="sourcedocs.html" target="_parent">Source Documentation</a>
 <li><a href="dispatch.html" target="_parent">GL Dispatch</a>
 </ul>
--- a/docs/devinfo.html
+++ b/docs/devinfo.html
@@ -18,650 +18,9 @@


 <ul>
-<li><a href="#style">Coding Style</a>
-<li><a href="#submitting">Submitting Patches</a>
-<li><a href="#release">Making a New Mesa Release</a>
 <li><a href="#extensions">Adding Extensions</a>
 </ul>

-
-<h2 id="style">Coding Style</h2>
-
-<p>
-Mesa is over 20 years old and the coding style has evolved over time.
-Some old parts use a style that's a bit out of date.
-If the guidelines below don't cover something, try following the format of
-existing, neighboring code.
-</p>
-
-<p>
-Basic formatting guidelines
-</p>
-
-<ul>
-<li>3-space indentation, no tabs.
-<li>Limit lines to 78 or fewer characters.  The idea is to prevent line
-wrapping in 80-column editors and terminals.  There are exceptions, such
-as if you're defining a large, static table of information.
-<li>Opening braces go on the same line as the if/for/while statement.
-For example:
-<pre>
-   if (condition) {
-      foo;
-   } else {
-      bar;
-   }
-</pre>
-
-<li>Put a space before/after operators.  For example, <tt>a = b + c;</tt>
-and not <tt>a=b+c;</tt>
-
-<li>This GNU indent command generally does the right thing for formatting:
-<pre>
-   indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
-</pre>
-
-<li>Use comments wherever you think it would be helpful for other developers.
-Several specific cases and style examples follow.  Note that we roughly
-follow <a href="http://www.stack.nl/~dimitri/doxygen/">Doxygen</a> conventions.
-<br>
-<br>
-Single-line comments:
-<pre>
-   /* null-out pointer to prevent dangling reference below */
-   bufferObj = NULL;
-</pre>
-Or,
-<pre>
-   bufferObj = NULL;  /* prevent dangling reference below */
-</pre>
-Multi-line comment:
-<pre>
-   /* If this is a new buffer object id, or one which was generated but
-    * never used before, allocate a buffer object now.
-    */
-</pre>
-We try to quote the OpenGL specification where prudent:
-<pre>
-   /* Page 38 of the PDF of the OpenGL ES 3.0 spec says:
-    *
-    *     "An INVALID_OPERATION error is generated for any of the following
-    *     conditions:
-    *
-    *     * <length> is zero."
-    *
-    * Additionally, page 94 of the PDF of the OpenGL 4.5 core spec
-    * (30.10.2014) also says this, so it's no longer allowed for desktop GL,
-    * either.
-    */
-</pre>
-Function comment example:
-<pre>
-   /**
-    * Create and initialize a new buffer object.  Called via the
-    * ctx->Driver.CreateObject() driver callback function.
-    * \param  name  integer name of the object
-    * \param  type  one of GL_FOO, GL_BAR, etc.
-    * \return  pointer to new object or NULL if error
-    */
-   struct gl_object *
-   _mesa_create_object(GLuint name, GLenum type)
-   {
-      /* function body */
-   }
-</pre>
-
-<li>Put the function return type and qualifiers on one line and the function
-name and parameters on the next, as seen above.  This makes it easy to use
-<code>grep ^function_name dir/*</code> to find function definitions.  Also,
-the opening brace goes on the next line by itself (see above.)
-
-<li>Function names follow various conventions depending on the type of function:
-<pre>
-   glFooBar()       - a public GL entry point (in glapi_dispatch.c)
-   _mesa_FooBar()   - the internal immediate mode function
-   save_FooBar()    - retained mode (display list) function in dlist.c
-   foo_bar()        - a static (private) function
-   _mesa_foo_bar()  - an internal non-static Mesa function
-</pre>
-
-<li>Constants, macros and enumerant names are ALL_UPPERCASE, with _ between
-words.
-<li>Mesa usually uses camel case for local variables (Ex: "localVarname")
-while gallium typically uses underscores (Ex: "local_var_name").
-<li>Global variables are almost never used because Mesa should be thread-safe.
-
-<li>Booleans.  Places that are not directly visible to the GL API
-should prefer the use of <tt>bool</tt>, <tt>true</tt>, and
-<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
-<tt>GL_FALSE</tt>.  In C code, this may mean that
-<tt>#include &lt;stdbool.h&gt;</tt> needs to be added.  The
-<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
-src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.
-
-</ul>
-
-
-<h2 id="submitting">Submitting patches</h2>
-
-<p>
-The basic guidelines for submitting patches are:
-</p>
-
-<ul>
-<li>Patches should be sufficiently tested before submitting.
-<li>Code patches should follow Mesa coding conventions.
-<li>Whenever possible, patches should only effect individual Mesa/Gallium
-components.
-<li>Patches should never introduce build breaks and should be bisectable (see
-<code>git bisect</code>.)
-<li>Patches should be properly formatted (see below).
-<li>Patches should be submitted to mesa-dev for review using
-<code>git send-email</code>.
-<li>Patches should not mix code changes with code formatting changes (except,
-perhaps, in very trivial cases.)
-</ul>
-
-<h3>Patch formatting</h3>
-
-<p>
-The basic rules for patch formatting are:
-</p>
-
-<ul>
-<li>Lines should be limited to 75 characters or less so that git logs
-displayed in 80-column terminals avoid line wrapping.  Note that git
-log uses 4 spaces of indentation (4 + 75 &lt; 80).
-<li>The first line should be a short, concise summary of the change prefixed
-with a module name.  Examples:
-<pre>
-    mesa: Add support for querying GL_VERTEX_ATTRIB_ARRAY_LONG
-
-    gallium: add PIPE_CAP_DEVICE_RESET_STATUS_QUERY
-
-    i965: Fix missing type in local variable declaration.
-</pre>
-<li>Subsequent patch comments should describe the change in more detail,
-if needed.  For example:
-<pre>
-    i965: Remove end-of-thread SEND alignment code.
-    
-    This was present in Eric's initial implementation of the compaction code
-    for Sandybridge (commit 077d01b6). There is no documentation saying this
-    is necessary, and removing it causes no regressions in piglit on any
-    platform.
-</pre>
-<li>A "Signed-off-by:" line is not required, but not discouraged either.
-<li>If a patch address a bugzilla issue, that should be noted in the
-patch comment.  For example:
-<pre>
-   Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
-</pre>
-<li>If there have been several revisions to a patch during the review
-process, they should be noted such as in this example:
-<pre>
-    st/mesa: add ARB_texture_stencil8 support (v4)
-    
-    if we support stencil texturing, enable texture_stencil8
-    there is no requirement to support native S8 for this,
-    the texture can be converted to x24s8 fine.
-    
-    v2: fold fixes from Marek in:
-       a) put S8 last in the list
-       b) fix renderable to always test for d/s renderable
-        fixup the texture case to use a stencil only format
-        for picking the format for the texture view.
-    v3: hit fallback for getteximage
-    v4: put s8 back in front, it shouldn't get picked now (Ilia)
-</pre>
-<li>If someone tested your patch, document it with a line like this:
-<pre>
-    Tested-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-<li>If the patch was reviewed (usually the case) or acked by someone,
-that should be documented with:
-<pre>
-    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
-    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-</ul>
-
-
-
-<h3>Testing Patches</h3>
-
-<p>
-It should go without saying that patches must be tested.  In general,
-do whatever testing is prudent.
-</p>
-
-<p>
-You should always run the Mesa test suite before submitting patches.
-The test suite can be run using the 'make check' command. All tests
-must pass before patches will be accepted, this may mean you have
-to update the tests themselves.
-</p>
-
-<p>
-Whenever possible and applicable, test the patch with
-<a href="http://piglit.freedesktop.org">Piglit</a> to
-check for regressions.
-</p>
-
-
-<h3>Mailing Patches</h3>
-
-<p>
-Patches should be sent to the mesa-dev mailing list for review:
-<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev">
-mesa-dev@lists.freedesktop.org<a/>.
-When submitting a patch make sure to use
-<a href="https://git-scm.com/docs/git-send-email">git send-email</a>
-rather than attaching patches to emails. Sending patches as
-attachments prevents people from being able to provide in-line review
-comments.
-</p>
-
-<p>
-When submitting follow-up patches you can use --in-reply-to to make v2, v3,
-etc patches show up as replies to the originals. This usually works well
-when you're sending out updates to individual patches (as opposed to
-re-sending the whole series). Using --in-reply-to makes
-it harder for reviewers to accidentally review old patches.
-</p>
-
-<p>
-When submitting follow-up patches you should also login to
-<a href="https://patchwork.freedesktop.org">patchwork</a> and change the
-state of your old patches to Superseded.
-</p>
-
-<h3>Reviewing Patches</h3>
-
-<p>
-When you've reviewed a patch on the mailing list, please be unambiguous
-about your review.  That is, state either
-<pre>
-    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-or
-<pre>
-    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-Rather than saying just "LGTM" or "Seems OK".
-</p>
-
-<p>
-If small changes are suggested, it's OK to say something like:
-<pre>
-   With the above fixes, Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-which tells the patch author that the patch can be committed, as long
-as the issues are resolved first.
-</p>
-
-
-<h3>Marking a commit as a candidate for a stable branch</h3>
-
-<p>
-If you want a commit to be applied to a stable branch,
-you should add an appropriate note to the commit message.
-</p>
-
-<p>
-Here are some examples of such a note:
-</p>
-<ul>
-  <li>CC: &lt;mesa-stable@lists.freedesktop.org&gt;</li>
-  <li>CC: "9.2 10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
-  <li>CC: "10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
-</ul>
-
-Simply adding the CC to the mesa-stable list address is adequate to nominate
-the commit for the most-recently-created stable branch. It is only necessary
-to specify a specific branch name, (such as "9.2 10.0" or "10.0" in the
-examples above), if you want to nominate the commit for an older stable
-branch. And, as in these examples, you can nominate the commit for the older
-branch in addition to the more recent branch, or nominate the commit
-exclusively for the older branch.
-
-This "CC" syntax for patch nomination will cause patches to automatically be
-copied to the mesa-stable@ mailing list when you use "git send-email" to send
-patches to the mesa-dev@ mailing list. Also, if you realize that a commit
-should be nominated for the stable branch after it has already been committed,
-you can send a note directly to the mesa-stable@lists.freedesktop.org where
-the Mesa stable-branch maintainers will receive it. Be sure to mention the
-commit ID of the commit of interest (as it appears in the mesa master branch).
-
-The latest set of patches that have been nominated, accepted, or rejected for
-the upcoming stable release can always be seen on the
-<a href="http://cworth.org/~cworth/mesa-stable-queue/">Mesa Stable Queue</a>
-page.
-
-<h3>Criteria for accepting patches to the stable branch</h3>
-
-Mesa has a designated release manager for each stable branch, and the release
-manager is the only developer that should be pushing changes to these
-branches. Everyone else should simply nominate patches using the mechanism
-described above.
-
-The stable-release manager will work with the list of nominated patches, and
-for each patch that meets the crtieria below will cherry-pick the patch with:
-<code>git cherry-pick -x &lt;commit&gt;</code>. The <code>-x</code> option is
-important so that the picked patch references the comit ID of the original
-patch.
-
-The stable-release manager may at times need to force-push changes to the
-stable branches, for example, to drop a previously-picked patch that was later
-identified as causing a regression). These force-pushes may cause changes to
-be lost from the stable branch if developers push things directly. Consider
-yourself warned.
-
-The stable-release manager is also given broad discretion in rejecting patches
-that have been nominated for the stable branch. The most basic rule is that
-the stable branch is for bug fixes only, (no new features, no
-regressions). Here is a non-exhaustive list of some reasons that a patch may
-be rejected:
-
-<ul>
-  <li>Patch introduces a regression. Any reported build breakage or other
-  regression caused by a particular patch, (game no longer work, piglit test
-  changes from PASS to FAIL), is justification for rejecting a patch.</li>
-
-  <li>Patch is too large, (say, larger than 100 lines)</li>
-
-  <li>Patch is not a fix. For example, a commit that moves code around with no
-  functional change should be rejected.</li>
-
-  <li>Patch fix is not clearly described. For example, a commit message
-  of only a single line, no description of the bug, no mention of bugzilla,
-  etc.</li>
-
-  <li>Patch has not obviously been reviewed, For example, the commit message
-  has no Reviewed-by, Signed-off-by, nor Tested-by tags from anyone but the
-  author.</li>
-
-  <li>Patch has not already been merged to the master branch. As a rule, bug
-  fixes should never be applied first to a stable branch. Patches should land
-  first on the master branch and then be cherry-picked to a stable
-  branch. (This is to avoid future releases causing regressions if the patch
-  is not also applied to master.) The only things that might look like
-  exceptions would be backports of patches from master that happen to look
-  significantly different.</li>
-
-  <li>Patch depends on too many other patches. Ideally, all stable-branch
-  patches should be self-contained. It sometimes occurs that a single, logical
-  bug-fix occurs as two separate patches on master, (such as an original
-  patch, then a subsequent fix-up to that patch). In such a case, these two
-  patches should be squashed into a single, self-contained patch for the
-  stable branch. (Of course, if the squashing makes the patch too large, then
-  that could be a reason to reject the patch.)</li>
-
-  <li>Patch includes new feature development, not bug fixes. New OpenGL
-  features, extensions, etc. should be applied to Mesa master and included in
-  the next major release. Stable releases are intended only for bug fixes.
-
-  Note: As an exception to this rule, the stable-release manager may accept
-  hardware-enabling "features". For example, backports of new code to support
-  a newly-developed hardware product can be accepted if they can be reasonably
-  determined to not have effects on other hardware.</li>
-
-  <li>Patch is a performance optimization. As a rule, performance patches are
-  not candidates for the stable branch. The only exception might be a case
-  where an application's performance was recently severely impacted so as to
-  become unusable. The fix for this performance regression could then be
-  considered for a stable branch. The optimization must also be
-  non-controversial and the patches still need to meet the other criteria of
-  being simple and self-contained</li>
-
-  <li>Patch introduces a new failure mode (such as an assert). While the new
-  assert might technically be correct, for example to make Mesa more
-  conformant, this is not the kind of "bug fix" we want in a stable
-  release. The potential problem here is that an OpenGL program that was
-  previously working, (even if technically non-compliant with the
-  specification), could stop working after this patch. So that would be a
-  regression that is unaacceptable for the stable branch.</li>
-</ul>
-
-
-<h2 id="release">Making a New Mesa Release</h2>
-
-<p>
-These are the instructions for making a new Mesa release.
-</p>
-
-<h3>Get latest source files</h3>
-<p>
-Use git to get the latest Mesa files from the git repository, from whatever
-branch is relevant. This document uses the convention X.Y.Z for the release
-being created, which should be created from a branch named X.Y.
-</p>
-
-<h3>Perform basic testing</h3>
-<p>
-The release manager should, at the very least, test the code by compiling it,
-installing it, and running the latest piglit to ensure that no piglit tests
-have regressed since the previous release.
-</p>
-
-<p>
-The release manager should do this testing with at least one hardware driver,
-(say, whatever is contained in the local development machine), as well as on
-both Gallium and non-Gallium software drivers. The software testing can be
-performed by running piglit with the following environment-variable set:
-</p>
-
-<pre>
-LIBGL_ALWAYS_SOFTWARE=1
-</pre>
-
-And Gallium vs. non-Gallium software drivers can be obtained by using the
-following configure flags on separate builds:
-
-<pre>
--with-dri-drivers=swrast
--with-gallium-drivers=swrast
-</pre>
-
-<p>
-Note: If both options are given in one build, both swrast_dri.so drivers will
-be compiled, but only one will be installed. The following command can be used
-to ensure the correct driver is being tested:
-</p>
-
-<pre>
-LIBGL_ALWAYS_SOFTWARE=1 glxinfo | grep "renderer string"
-</pre>
-
-If any regressions are found in this testing with piglit, stop here, and do
-not perform a release until regressions are fixed.
-
-<h3>Update version in file VERSION</h3>
-
-<p>
-Increment the version contained in the file VERSION at Mesa's top-level, then
-commit this change.
-</p>
-
-<h3>Create release notes for the new release</h3>
-
-<p>
-Create a new file docs/relnotes/X.Y.Z.html, (follow the style of the previous
-release notes). Note that the sha256sums section of the release notes should
-be empty at this point.
-</p>
-
-<p>
-Two scripts are available to help generate portions of the release notes:
-
-<pre>
-	./bin/bugzilla_mesa.sh
-	./bin/shortlog_mesa.sh
-</pre>
-
-<p>
-The first script identifies commits that reference bugzilla bugs and obtains
-the descriptions of those bugs from bugzilla. The second script generates a
-log of all commits. In both cases, HTML-formatted lists are printed to stdout
-to be included in the release notes.
-</p>
-
-<p>
-Commit these changes
-</p>
-
-<h3>Make the release archives, signatures, and the release tag</h3>
-<p>
-From inside the Mesa directory:
-<pre>
-	./autogen.sh
-	make -j1 tarballs
-</pre>
-
-<p>
-After the tarballs are created, the sha256 checksums for the files will
-be computed and printed. These will be used in a step below.
-</p>
-
-<p>
-It's important at this point to also verify that the constructed tar file
-actually builds:
-</p>
-
-<pre>
-	tar xjf MesaLib-X.Y.Z.tar.bz2
-	cd Mesa-X.Y.Z
-	./configure --enable-gallium-llvm
-	make -j6
-	make install
-</pre>
-
-<p>
-Some touch testing should also be performed at this point, (run glxgears or
-more involved OpenGL programs against the installed Mesa).
-</p>
-
-<p>
-Create detached GPG signatures for each of the archive files created above:
-</p>
-
-<pre>
-	gpg --sign --detach MesaLib-X.Y.Z.tar.gz
-	gpg --sign --detach MesaLib-X.Y.Z.tar.bz2
-	gpg --sign --detach MesaLib-X.Y.Z.zip
-</pre>
-
-<p>
-Tag the commit used for the build:
-</p>
-
-<pre>
-	git tag -s mesa-X.Y.X -m "Mesa X.Y.Z release"
-</pre>
-
-<p>
-Note: It would be nice to investigate and fix the issue that causes the
-tarballs target to fail with multiple build process, such as with "-j4". It
-would also be nice to incorporate all of the above commands into a single
-makefile target. And instead of a custom "tarballs" target, we should
-incorporate things into the standard "make dist" and "make distcheck" targets.
-</p>
-
-<h3>Add the sha256sums to the release notes</h3>
-
-<p>
-Edit docs/relnotes/X.Y.Z.html to add the sha256sums printed as part of "make
-tarballs" in the previous step. Commit this change.
-</p>
-
-<h3>Push all commits and the tag created above</h3>
-
-<p>
-This is the first step that cannot easily be undone. The release is going
-forward from this point:
-</p>
-
-<pre>
-	git push origin X.Y --tags
-</pre>
-
-<h3>Install the release files and signatures on the distribution server</h3>
-
-<p>
-The following commands can be used to copy the release archive files and
-signatures to the freedesktop.org server:
-</p>
-
-<pre>
-	scp MesaLib-X.Y.Z* people.freedesktop.org:
-	ssh people.freedesktop.org
-	cd /srv/ftp.freedesktop.org/pub/mesa
-	mkdir X.Y.Z
-	cd X.Y.Z
-	mv ~/MesaLib-X.Y.Z* .
-</pre>
-
-<h3>Back on mesa master, add the new release notes into the tree</h3>
-
-<p>
-Something like the following steps will do the trick:
-</p>
-
-<pre>
-	cp docs/relnotes/X.Y.Z.html /tmp
-        git checkout master
-        cp /tmp/X.Y.Z.html docs/relnotes
-        git add docs/relnotes/X.Y.Z.html
-</pre>
-
-<p>
-Also, edit docs/relnotes.html to add a link to the new release notes, and edit
-docs/index.html to add a news entry. Then commit and push:
-</p>
-
-<pre>
-	git commit -a -m "docs: Import X.Y.Z release notes, add news item."
-        git push origin
-</pre>
-
-<h3>Update the mesa3d.org website</h3>
-
-<p>
-NOTE: The recent release managers have not been performing this step
-themselves, but leaving this to Brian Paul, (who has access to the
-sourceforge.net hosting for mesa3d.org). Brian is more than willing to grant
-the permission necessary to future release managers to do this step on their
-own.
-</p>
-
-<p>
-Update the web site by copying the docs/ directory's files to 
-/home/users/b/br/brianp/mesa-www/htdocs/ with:
-<br>
-<code>
-sftp USERNAME,mesa3d@web.sourceforge.net
-</code>
-</p>
-
-
-<h3>Announce the release</h3>
-<p>
-Make an announcement on the mailing lists:
-
-<em>mesa-dev@lists.freedesktop.org</em>,
-and
-<em>mesa-announce@lists.freedesktop.org</em>
-
-Follow the template of previously-sent release announcements. The following
-command can be used to generate the log of changes to be included in the
-release announcement:
-
-<pre>
-	git shortlog mesa-X.Y.Z-1..mesa-X.Y.Z
-</pre>
-</p>
-
-
 <h2 id="extensions">Adding Extensions</h2>

 <p>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -60,6 +60,8 @@ sometimes be useful for debugging end-user issues.
     <li>flush - flush after each drawing command</li>
     <li>incomplete_tex - extra debug messages when a texture is incomplete</li>
     <li>incomplete_fbo - extra debug messages when a fbo is incomplete</li>
+     <li>context - create a debug context (see GLX_CONTEXT_DEBUG_BIT_ARB) and
+         print error and performance messages to stderr (or MESA_LOG_FILE).</li>
   </ul>
 <li>MESA_LOG_FILE - specifies a file name for logging all errors, warnings,
 etc., rather than stderr
@@ -185,6 +187,8 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
   <li>do32 - generate compute shader SIMD32 programs even if workgroup size doesn't exceed the SIMD16 limit</li>
   <li>norbc - disable single sampled render buffer compression</li>
 </ul>
+<li>INTEL_PRECISE_TRIG - if set to 1, true or yes, then the driver prefers
+   accuracy over performance in trig functions.</li>
 </ul>


@@ -217,6 +221,8 @@ Mesa EGL supports different sets of environment variables.  See the
    disable for unencumbered viewing the rest of the time. For example, set
    GALLIUM_HUD_VISIBLE to false and GALLIUM_HUD_TOGGLE_SIGNAL to 10 (SIGUSR1).
    Use kill -10 <pid> to toggle the hud as desired.
+<li>GALLIUM_HUD_DUMP_DIR - specifies a directory for writing the displayed
+    hud values into files.
 <li>GALLIUM_DRIVER - useful in combination with LIBGL_ALWAYS_SOFTWARE=1 for
    choosing one of the software renderers "softpipe", "llvmpipe" or "swr".
 <li>GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc.
@@ -235,6 +241,21 @@ Setting to "tgsi", for example, will print all the TGSI shaders.
 See src/mesa/state_tracker/st_debug.c for other options.
 </ul>

+<h3>Clover state tracker environment variables</h3>
+
+<ul>
+<li>CLOVER_EXTRA_BUILD_OPTIONS - allows specifying additional compiler and linker
+    options. Specified options are appended after the options set by the OpenCL
+    program in clBuildProgram.
+<li>CLOVER_EXTRA_COMPILE_OPTIONS - allows specifying additional compiler
+    options. Specified options are appended after the options set by the OpenCL
+    program in clCompileProgram.
+<li>CLOVER_EXTRA_LINK_OPTIONS - allows specifying additional linker
+    options. Specified options are appended after the options set by the OpenCL
+    program in clLinkProgram.
+</ul>
+
+
 <h3>Softpipe driver environment variables</h3>
 <ul>
 <li>SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment shaders
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -33,7 +33,7 @@ are exposed in the 3.0 context as extensions.
 Feature                                                 Status
 ------------------------------------------------------- ------------------------

-GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr
+GL 3.0, GLSL 1.30 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr

  glBindFragDataLocation, glGetFragDataLocation         DONE
  GL_NV_conditional_render (Conditional rendering)      DONE ()
@@ -60,12 +60,12 @@ GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
  glVertexAttribI commands                              DONE
  Depth format cube textures                            DONE ()
  GLX_ARB_create_context (GLX 1.4 is required)          DONE
-  Multisample anti-aliasing                             DONE (llvmpipe (*), softpipe (*), swr (*))
+  Multisample anti-aliasing                             DONE (freedreno (*), llvmpipe (*), softpipe (*), swr (*))

-(*) llvmpipe, softpipe, and swr have fake Multisample anti-aliasing support
+(*) freedreno, llvmpipe, softpipe, and swr have fake Multisample anti-aliasing support


-GL 3.1, GLSL 1.40 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr
+GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr

  Forward compatible context support/deprecations       DONE ()
  GL_ARB_draw_instanced (Instanced drawing)             DONE ()
@@ -82,34 +82,34 @@ GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft

  Core/compatibility profiles                           DONE
  Geometry shaders                                      DONE ()
-  GL_ARB_vertex_array_bgra (BGRA vertex order)          DONE (swr)
-  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (swr)
-  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (swr)
-  GL_ARB_provoking_vertex (Provoking vertex)            DONE (swr)
-  GL_ARB_seamless_cube_map (Seamless cubemaps)          DONE (swr)
+  GL_ARB_vertex_array_bgra (BGRA vertex order)          DONE (freedreno, swr)
+  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (freedreno, swr)
+  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (freedreno, swr)
+  GL_ARB_provoking_vertex (Provoking vertex)            DONE (freedreno, swr)
+  GL_ARB_seamless_cube_map (Seamless cubemaps)          DONE (freedreno, swr)
  GL_ARB_texture_multisample (Multisample textures)     DONE (swr)
-  GL_ARB_depth_clamp (Frag depth clamp)                 DONE (swr)
-  GL_ARB_sync (Fence objects)                           DONE (swr)
+  GL_ARB_depth_clamp (Frag depth clamp)                 DONE (freedreno, swr)
+  GL_ARB_sync (Fence objects)                           DONE (freedreno, swr)
  GLX_ARB_create_context_profile                        DONE


 GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe

-  GL_ARB_blend_func_extended                            DONE (swr)
+  GL_ARB_blend_func_extended                            DONE (freedreno/a3xx, swr)
  GL_ARB_explicit_attrib_location                       DONE (all drivers that support GLSL)
-  GL_ARB_occlusion_query2                               DONE (swr)
+  GL_ARB_occlusion_query2                               DONE (freedreno, swr)
  GL_ARB_sampler_objects                                DONE (all drivers)
-  GL_ARB_shader_bit_encoding                            DONE (swr)
-  GL_ARB_texture_rgb10_a2ui                             DONE (swr)
-  GL_ARB_texture_swizzle                                DONE (swr)
+  GL_ARB_shader_bit_encoding                            DONE (freedreno, swr)
+  GL_ARB_texture_rgb10_a2ui                             DONE (freedreno, swr)
+  GL_ARB_texture_swizzle                                DONE (freedreno, swr)
  GL_ARB_timer_query                                    DONE (swr)
-  GL_ARB_instanced_arrays                               DONE (swr)
-  GL_ARB_vertex_type_2_10_10_10_rev                     DONE (swr)
+  GL_ARB_instanced_arrays                               DONE (freedreno, swr)
+  GL_ARB_vertex_type_2_10_10_10_rev                     DONE (freedreno, swr)


-GL 4.0, GLSL 4.00 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
+GL 4.0, GLSL 4.00 --- all DONE: i965/hsw+, nvc0, r600, radeonsi

-  GL_ARB_draw_buffers_blend                             DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
+  GL_ARB_draw_buffers_blend                             DONE (freedreno, i965/gen6+, nv50, llvmpipe, softpipe, swr)
  GL_ARB_draw_indirect                                  DONE (i965/gen7+, llvmpipe, softpipe, swr)
  GL_ARB_gpu_shader5                                    DONE (i965/gen7+)
  - 'precise' qualifier                                 DONE
@@ -124,7 +124,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
  - Enhanced per-sample shading                         DONE ()
  - Interpolation functions                             DONE ()
  - New overload resolution rules                       DONE
-  GL_ARB_gpu_shader_fp64                                DONE (llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                                DONE (i965/hsw+, llvmpipe, softpipe)
  GL_ARB_sample_shading                                 DONE (i965/gen6+, nv50)
  GL_ARB_shader_subroutine                              DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
  GL_ARB_tessellation_shader                            DONE (i965/gen7+)
@@ -133,20 +133,20 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
  GL_ARB_texture_gather                                 DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
  GL_ARB_texture_query_lod                              DONE (i965, nv50, softpipe)
  GL_ARB_transform_feedback2                            DONE (i965/gen7+, nv50, llvmpipe, softpipe, swr)
-  GL_ARB_transform_feedback3                            DONE (i965/gen7+, nv50, llvmpipe, softpipe, swr)
+  GL_ARB_transform_feedback3                            DONE (i965/gen7+, llvmpipe, softpipe, swr)


-GL 4.1, GLSL 4.10 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
+GL 4.1, GLSL 4.10 --- all DONE: i965/hsw+, nvc0, r600, radeonsi

  GL_ARB_ES2_compatibility                              DONE (i965, nv50, llvmpipe, softpipe, swr)
  GL_ARB_get_program_binary                             DONE (0 binary formats)
  GL_ARB_separate_shader_objects                        DONE (all drivers)
-  GL_ARB_shader_precision                               DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                            DONE (llvmpipe, softpipe)
+  GL_ARB_shader_precision                               DONE (i965/hsw+, all drivers that support GLSL 4.10)
+  GL_ARB_vertex_attrib_64bit                            DONE (i965/hsw+, llvmpipe, softpipe)
  GL_ARB_viewport_array                                 DONE (i965, nv50, llvmpipe, softpipe)


-GL 4.2, GLSL 4.20 -- all DONE: i965/gen8+, nvc0, radeonsi
+GL 4.2, GLSL 4.20 -- all DONE: i965/hsw+, nvc0, radeonsi

  GL_ARB_texture_compression_bptc                       DONE (i965, r600)
  GL_ARB_compressed_texture_pixel_storage               DONE (all drivers)
@@ -253,25 +253,25 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
 GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+

  GL_EXT_color_buffer_float                             DONE (all drivers)
-  GL_KHR_blend_equation_advanced                        DONE (i965)
+  GL_KHR_blend_equation_advanced                        DONE (i965, nvc0)
  GL_KHR_debug                                          DONE (all drivers)
  GL_KHR_robustness                                     DONE (i965, nvc0, radeonsi)
  GL_KHR_texture_compression_astc_ldr                   DONE (i965/gen9+)
  GL_OES_copy_image                                     DONE (all drivers)
  GL_OES_draw_buffers_indexed                           DONE (all drivers that support GL_ARB_draw_buffers_blend)
  GL_OES_draw_elements_base_vertex                      DONE (all drivers)
-  GL_OES_geometry_shader                                DONE (i965/gen8+, nvc0, radeonsi)
+  GL_OES_geometry_shader                                DONE (i965/hsw+, nvc0, radeonsi)
  GL_OES_gpu_shader5                                    DONE (all drivers that support GL_ARB_gpu_shader5)
  GL_OES_primitive_bounding_box                         DONE (i965/gen7+, nvc0, radeonsi)
  GL_OES_sample_shading                                 DONE (i965, nvc0, r600, radeonsi)
  GL_OES_sample_variables                               DONE (i965, nvc0, r600, radeonsi)
  GL_OES_shader_image_atomic                            DONE (all drivers that support GL_ARB_shader_image_load_store)
-  GL_OES_shader_io_blocks                               DONE (i965/gen8+, nvc0, radeonsi)
+  GL_OES_shader_io_blocks                               DONE (All drivers that support GLES 3.1)
  GL_OES_shader_multisample_interpolation               DONE (i965, nvc0, r600, radeonsi)
  GL_OES_tessellation_shader                            DONE (all drivers that support GL_ARB_tessellation_shader)
  GL_OES_texture_border_clamp                           DONE (all drivers)
  GL_OES_texture_buffer                                 DONE (i965, nvc0, radeonsi)
-  GL_OES_texture_cube_map_array                         DONE (i965/gen8+, nvc0, radeonsi)
+  GL_OES_texture_cube_map_array                         DONE (i965/hsw+, nvc0, radeonsi)
  GL_OES_texture_stencil8                               DONE (all drivers that support GL_ARB_texture_stencil8)
  GL_OES_texture_storage_multisample_2d_array           DONE (all drivers that support GL_ARB_texture_multisample)

@@ -287,7 +287,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GL_ARB_indirect_parameters                            DONE (nvc0, radeonsi)
  GL_ARB_parallel_shader_compile                        not started, but Chia-I Wu did some related work in 2014
  GL_ARB_pipeline_statistics_query                      DONE (i965, nvc0, radeonsi, softpipe, swr)
-  GL_ARB_post_depth_coverage                            not started
+  GL_ARB_post_depth_coverage                            DONE (i965)
  GL_ARB_robustness_isolation                           not started
  GL_ARB_sample_locations                               not started
  GL_ARB_seamless_cubemap_per_texture                   DONE (i965, nvc0, radeonsi, r600, softpipe, swr)
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,58 @@

 <h1>News</h1>

+<h2>January 23, 2017</h2>
+<p>
+<a href="relnotes/12.0.6.html">Mesa 12.0.6</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: This is an extra release for the 12.0 stable branch, as per developers'
+feedback. It is anticipated that 12.0.6 will be the final release in the 12.0
+series. Users of 12.0 are encouraged to migrate to the 13.0 series in order
+to obtain future fixes.
+</p>
+
+<h2>January 5, 2017</h2>
+<p>
+<a href="relnotes/13.0.3.html">Mesa 13.0.3</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>December 5, 2016</h2>
+<p>
+<a href="relnotes/12.0.5.html">Mesa 12.0.5</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 12.0.5 will be the final release in the 12.0
+series. Users of 12.0 are encouraged to migrate to the 13.0 series in order
+to obtain future fixes.
+</p>
+
+<h2>November 28, 2016</h2>
+<p>
+<a href="relnotes/13.0.2.html">Mesa 13.0.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 14, 2016</h2>
+<p>
+<a href="relnotes/13.0.1.html">Mesa 13.0.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 10, 2016</h2>
+<p>
+<a href="relnotes/12.0.4.html">Mesa 12.0.4</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 1, 2016</h2>
+<p>
+<a href="relnotes/13.0.0.html">Mesa 13.0.0</a> is released.  This is a
+new development release.  See the release notes for more information
+about the release.
+</p>
+
 <h2>September 15, 2016</h2>
 <p>
 <a href="relnotes/12.0.3.html">Mesa 12.0.3</a> is released.
--- a/docs/install.html
+++ b/docs/install.html
@@ -24,7 +24,7 @@
  </ul>
 <li><a href="#autoconf">Building with autoconf (Linux/Unix/X11)</a>
 <li><a href="#scons">Building with SCons (Windows/Linux)</a>
-<li><a href="#other">Building for other systems</a>
+<li><a href="#android">Building with AOSP (Android)</a>
 <li><a href="#libs">Library Information</a>
 <li><a href="#pkg-config">Building OpenGL programs with pkg-config</a>
 </ol>
@@ -33,62 +33,85 @@
 <h1 id="prereq-general">1. Prerequisites for building</h1>

 <h2>1.1 General</h2>
+
+<p>
+Build system.
+</p>
+
+<ul>
+<li>Autoconf is required when building on *nix platforms.
+<li><a href="http://www.scons.org/">SCons</a> is required for building on
+Windows and optional for Linux (it's an alternative to autoconf/automake.)
+</li>
+<li>Android Build system when building as native Android component. Autoconf
+is used when when building ARC.
+</li>
+</ul>
+
+
+<p>
+The following compilers are known to work, if you know of others or you're
+willing to maintain support for other compiler get in touch.
+</p>
+
+<ul>
+<li>GCC 4.2.0 or later (some parts of Mesa may require later versions)
+<li>clang - exact minimum requirement is currently unknown.
+<li>Microsoft Visual Studio 2013 Update 4 or later is required, for building on Windows.
+</ul>
+
+
+<p>
+Third party/extra tools.
+<br>
+<strong>Note</strong>: These should not be required, when building from a release tarball. If
+you think you've spotted a bug let developers know by filing a
+<a href="bugs.html">bug report</a>.
+</p>
+
+
 <ul>
 <li><a href="http://www.python.org/">Python</a> - Python is required.
 Version 2.6.4 or later should work.
 </li>
-<br>
 <li><a href="http://www.makotemplates.org/">Python Mako module</a> -
 Python Mako module is required. Version 0.3.4 or later should work.
 </li>
-</br>
-<li><a href="http://www.scons.org/">SCons</a> is required for building on
-Windows and optional for Linux (it's an alternative to autoconf/automake.)
-</li>
-<br>
-<li>lex / yacc - for building the GLSL compiler.
-<br>
-<br>
-On Linux systems, flex and bison are used.
-Versions 2.5.35 and 2.4.1, respectively, (or later) should work.
-<br>
-<br>
+<li>lex / yacc - for building the Mesa IR and GLSL compiler.
+<div>
+On Linux systems, flex and bison versions 2.5.35 and 2.4.1, respectively,
+(or later) should work.
 On Windows with MinGW, install flex and bison with:
 <pre>mingw-get install msys-flex msys-bison</pre>
 For MSVC on Windows, install
 <a href="http://winflexbison.sourceforge.net/">Win flex-bison</a>.
-</li>
-<br>
-<li>For building on Windows, Microsoft Visual Studio 2013 or later is required.
-</li>
+</div>
 </ul>
+<p><strong>Note</strong>: Some versions can be buggy (eg. flex 2.6.2) so do try others if things fail.</p>


-<h3 id="prereq-dri">1.2 For DRI and hardware acceleration</h3>
+<h3 id="prereq-dri">1.2 Requirements</h3>

 <p>
-The following are required for DRI-based hardware acceleration with Mesa:
+The requirements depends on the features selected at configure stage.
+Check/install the respective -devel package as prompted by the configure error
+message.
 </p>

-<ul>
-<li><a href="http://xorg.freedesktop.org/releases/individual/proto/">
-dri2proto</a> version 2.6 or later
-<li><a href="http://dri.freedesktop.org/libdrm/">libDRM</a> latest version
-<li>Xorg server version 1.5 or later
-<li>Linux 2.6.28 or later
-</ul>
 <p>
-If you're using a fedora distro the following command should install all
-the needed dependencies:
+Here are some common ways to retrieve most/all of the dependencies based on
+the packaging tool used by your distro.
 </p>
+
 <pre>
-  sudo yum install flex bison imake libtool xorg-x11-proto-devel libdrm-devel \
-  gcc-c++ xorg-x11-server-devel libXi-devel libXmu-devel libXdamage-devel git \
-  expat-devel llvm-devel python-mako
+  zypper source-install --build-deps-only Mesa # openSUSE/SLED/SLES
+  yum-builddep mesa # yum Fedora, OpenSuse(?)
+  dnf builddep mesa # dnf Fedora
+  apt-get build-dep mesa # Debian and derivatives
+  ... # others
 </pre>


-
 <h1 id="autoconf">2. Building with autoconf (Linux/Unix/X11)</h1>

 <p>
@@ -139,22 +162,30 @@ This will create:
 </ul>
 <p>
 Put them all in the same directory to test them.
+
+Additional information is available in <a href="README.WIN32">README.WIN32</a>.
+
 </p>



-<h1 id="other">4. Building for other systems</h1>
+<h1 id="android">4. Building with AOSP (Android)</h1>

 <p>
-Documentation for other environments (some may be very out of date):
+Currently one can build Mesa for Android as part of the AOSP project, yet
+your experience might vary.
 </p>

-<ul>
-<li><a href="README.VMS">README.VMS</a> - VMS
-<li><a href="README.CYGWIN">README.CYGWIN</a> - Cygwin
-<li><a href="README.WIN32">README.WIN32</a> - Win32
-</ul>
+<p>
+In order to achieve that one should update their local manifest to point to the
+upstream repo, set the approapriate BOARD_GPU_DRIVERS and build the
+libGLES_mesa library.
+</p>

+<p>
+FINISHME: Improve on the instructions add references to Rob H repos/Jenkins,
+Android-x86 and/or other resources.
+</p>


 <h1 id="libs">5. Library Information</h1>
--- a/docs/mangling.html
+++ b/docs/mangling.html
@@ -2,7 +2,7 @@
 <html lang="en">
 <head>
  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Function Name Mangling</title>
+  <title>GL Function Name Mangling</title>
  <link rel="stylesheet" type="text/css" href="mesa.css">
 </head>
 <body>
@@ -14,7 +14,7 @@
 <iframe src="contents.html"></iframe>
 <div class="content">

-<h1>Function Name Mangling</h1>
+<h1>GL Function Name Mangling</h1>

 <p>
 If you want to use both Mesa and another OpenGL library in the same
@@ -25,12 +25,11 @@ This results in all the Mesa functions being prefixed with
 </p>

 <p>
-To do this, recompile Mesa with the compiler flag -DUSE_MGL_NAMESPACE.
-Add the flag to CFLAGS in the configuration file which you want to use.
-For example:
+This option is supported only with the autoconf build. To use it add
+--enable-mangling to your configure line.
 </p>
 <pre>
-CFLAGS += -DUSE_MGL_NAMESPACE
+<code>./configure --enable-mangling ...</code>
 </pre>

 </div>
--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -0,0 +1,512 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Releasing process</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Releasing process</h1>
+
+<ul>
+<li><a href="#overview">Overview</a>
+<li><a href="#schedule">Release schedule</a>
+<li><a href="#pickntest">Cherry-pick and test</a>
+<li><a href="#branch">Making a branchpoint</a>
+<li><a href="#prerelease">Pre-release announcement</a>
+<li><a href="#release">Making a new release</a>
+<li><a href="#announce">Announce the release</a>
+<li><a href="#website">Update the mesa3d.org website</a>
+<li><a href="#bugzilla">Update Bugzilla</a>
+</ul>
+
+<h1 id="overview">Overview</h1>
+
+<p>
+This document uses the convention X.Y.Z for the release number with X.Y being
+the stable branch name.
+<br>
+Mesa provides feature and bugfix releases. Former use zero as patch version (Z),
+while the latter have a non-zero one.
+</p>
+
+<p>
+For example:
+</p>
+<pre>
+	Mesa 10.1.0 - 10.1 branch, feature
+	Mesa 10.1.4 - 10.1 branch, bugfix
+	Mesa 12.0.0 - 12.0 branch, feature
+	Mesa 12.0.2 - 12.0 branch, bugfix
+</pre>
+
+<h1 id="schedule">Release schedule</h1>
+
+<p>
+Releases should happen on Fridays. Delays can occur although those should be keep
+to a minimum.
+</p>
+
+<h2>Feature releases</h2>
+<ul>
+<li>Available approximatelly every three months.
+<li>Initial timeplan available 2-4 weeks before the planned branchpoint (rc1)
+on the mesa-announce@ mailing list.
+<li>A <a href="#prerelease">pre-release</a> announcement should be available
+approximatelly 24 hours before the final (non-rc) release.
+</ul>
+
+<h2>Stable releases</h2>
+<ul>
+<li>Normally available once every two weeks.
+<li>Only the latest branch has releases. See note below.
+<li>A <a href="#prerelease">pre-release</a> announcement should be available
+approximatelly 48 hours before the actual release.
+</ul>
+
+<p>
+Note: There is one or two releases overlap when changing branches. For example:
+<br>
+The final release from the 12.0 series Mesa 12.0.5 will be out around the same
+time (or shortly after) 13.0.1 is out.
+</p>
+
+<h1 id="pickntest">Cherry-picking and testing</h1>
+
+<p>
+Commits nominated for the active branch are picked as based on the
+<a href="submittingpatches.html#criteria" target="_parent">criteria</a> as
+described in the same section.
+
+<p>
+Maintainer is responsible for testing in various possible permutations of
+the autoconf and scons build.
+</p>
+
+<h2>Cherry-picking and build/check testing</h2>
+
+<p>Done continuously up-to the <a href="#prerelease">pre-release</a> announcement.</p>
+
+<p>
+As an exception, patches can be applied up-to the last ~1h before the actual
+release. This is made <strong>only</strong> with explicit permission/request,
+and the patch <strong>must</strong> be very well contained. Thus it cannot
+affect more than one driver/subsystem.
+</p>
+<p>
+Currently Ilia Mirkin and AMD devs have requested "permanent" exception.
+</p>
+
+
+<ul>
+<li>make distcheck, scons and scons check must pass
+<li>Testing with different version of system components - LLVM and others is also
+performed where possible.
+</ul>
+<p>
+Achieved by combination of local ad-hoc scripts and AppVeyor plus Travis-CI,
+the latter as part of their Github integration.
+</p>
+
+<h2>Regression/functionality testing</h2>
+
+<p>
+Less often (once or twice), shortly before the pre-release announcement.
+Ensure that testing is redone if Intel devs have requested an exception, as per above.
+</p>
+<ul>
+<li><em>no regressions should be observed for Piglit/dEQP/CTS/Vulkan on Intel platforms</em>
+<li><em>no regressions should be observed for Piglit using the swrast, softpipe
+and llvmpipe drivers</em>
+</ul>
+<p>
+Currently testing is performed courtesy of the Intel OTC team and their Jenkins CI setup. Check with the Intel team over IRC how to get things setup.
+</p>
+
+
+<h1 id="branch">Making a branchpoint</h1>
+
+<p>
+A branchpoint is made such that new development can continue in parallel to
+stabilisation and bugfixing.
+</p>
+
+<p>
+Note: Before doing a branch ensure that basic build and <code>make check</code>
+testing is done and there are little to-no issues.
+<br>
+Ideally all of those should be tackled already.
+</p>
+
+<p>
+Check if the version number is going to remain as, alternatively
+<code> git mv docs/relnotes/{current,new}.html </code> as appropriate.
+</p>
+
+<p>
+To setup the branchpoint:
+</p>
+<pre>
+	git checkout master # make sure we're in master first
+	git tag -s X.Y-branchpoint -m "Mesa X.Y branchpoint"
+	git checkout -b X.Y
+	git checkout master
+	$EDITOR VERSION # bump the version number
+	git commit -as
+	git push origin X.Y-branchpoint X.Y
+</pre>
+
+<p>
+Now go to
+<a href="https://bugs.freedesktop.org/editversions.cgi?action=add&amp;product=Mesa" target="_parent">Bugzilla</a> and add the new Mesa version X.Y.
+</p>
+<p>
+Check for rare that there are no distribution breaking changes and revert them
+if needed. Extremely rare - we had only one case so far (see
+commit 2ced8eb136528914e1bf4e000dea06a9d53c7e04).
+</p>
+<p>
+Proceed to <a href="#release">release</a> -rc1.
+</p>
+
+<h1 id="prerelease">Pre-release announcement</h1>
+
+<p>
+It comes shortly after outstanding patches in the respective branch are pushed.
+Developers can check, in brief, what's the status of their patches. They,
+alongside very early testers, are strongly encouraged to test the branch and
+report any regressions.
+<br>
+It is followed by a brief period (normally 24 or 48 hours) before the actual
+release is made.
+</p>
+
+<h2>Terminology used</h2>
+<ul><li>Nominated</ul>
+<p>
+Patch that is nominated but yet to to merged in the patch queue/branch.
+</p>
+
+<ul><li>Queued</ul>
+<p>
+Patch is in the queue/branch and will feature in the next release.
+Barring reported regressions or objections from developers.
+</p>
+
+<ul><li>Rejected</ul>
+<p>
+Patch does not fit the
+<a href="submittingpatches.html#criteria" target="_parent">criteria</a> and
+is followed by a brief information.
+<br>
+The release maintainer is human so if you believe you've spotted a mistake do
+let them know.
+</p>
+
+<h2>Format/template</h2>
+<pre>
+Subject: [ANNOUNCE] Mesa X.Y.Z release candidate
+To: mesa-announce@...
+Cc: mesa-dev@...
+
+Hello list,
+
+The candidate for the Mesa X.Y.Z is now available. Currently we have:
+ - NUMBER queued
+ - NUMBER nominated (outstanding)
+ - and NUMBER rejected patches
+
+BRIEF SUMMARY OF CHANGES
+
+Take a look at section "Mesa stable queue" for more information.
+
+
+Testing reports/general approval
+--------------------------------
+Any testing reports (or general approval of the state of the branch) will be
+greatly appreciated.
+
+The plan is to have X.Y.Z this DAY (DATE), around or shortly after TIME.
+
+If you have any questions or suggestions - be that about the current patch
+queue or otherwise, please go ahead.
+
+
+Trivial merge conflicts
+-----------------------
+List of commits where manual intervention was required.
+Keep the authors in the CC list.
+
+commit SHA
+Author: AUTHOR
+
+    COMMIT SUMMARY
+
+    CHERRY PICKED FROM
+
+
+For example:
+
+commit 990f395e007c3204639daa34efc3049f350ee819
+Author: Emil Velikov &lt;emil.velikov@collabora.com&gt;
+
+    anv: automake: cleanup the generated json file during make clean
+
+    (cherry picked from commit 8df581520a823564be0ab5af7dbb7d501b1c9670)
+
+
+Cheers,
+Emil
+
+
+Mesa stable queue
+-----------------
+
+Nominated (NUMBER)
+==================
+
+AUTHOR (NUMBER):
+      SHA     COMMIT SUMMARY
+
+For example:
+
+Dave Airlie (1):
+      2de85eb radv: fix texturesamples to handle single sample case
+
+
+Queued (NUMBER)
+===============
+
+AUTHOR (NUMBER):
+      COMMIT SUMMARY
+
+
+Rejected (NUMBER)
+=================
+
+Rejected (11)
+=============
+
+AUTHOR (NUMBER):
+      SHA     COMMIT SUMMARY
+
+Reason: ...
+</pre>
+
+<h1 id="release">Making a new release</h1>
+
+<p>
+These are the instructions for making a new Mesa release.
+</p>
+
+<h3>Get latest source files</h3>
+<p>
+Ensure the latest code is available - both in your local master and the
+relevant branch.
+</p>
+
+<h3>Perform basic testing</h3>
+<p>
+Most of the testing should already be done during the
+<a href="#pickntest">cherry-pick</a> and
+<a href="#prerelease">pre-announce</a> stages.
+
+So we do a quick 'touch test'
+<ul>
+<li>make distcheck (you can omit this if you're not using --dist below)
+<li>scons (from release tarball)
+<li>the produced binaries work
+</ul>
+
+<p>
+Here is one solution that I've been using.
+</p>
+
+<pre>
+	git clean -fXd; git clean -nxd
+	read # quick cross check any outstanding files
+	export __version=`cat VERSION`
+	export __mesa_root=../
+	export __build_root=./foo
+	chmod 755 -fR $__build_root; rm -rf $__build_root
+	mkdir -p $__build_root &amp;&amp; cd $__build_root
+
+	$__mesa_root/autogen.sh --enable-llvm-shared-libs &amp;&amp; make -j2 distcheck
+
+	# Build check the tarballs (scons)
+	tar -xaf mesa-$__version.tar.xz &amp;&amp; cd mesa-$__version &amp;&amp; scons &amp;&amp; cd ..
+
+	# Test the automake binaries
+	rm -rf mesa-$__version
+	tar -xaf mesa-$__version.tar.xz &amp;&amp; cd mesa-$__version
+	./configure \
+		--with-dri-drivers=i965,swrast \
+		--with-gallium-drivers=swrast \
+		--with-vulkan-drivers=intel \
+		--enable-llvm-shared-libs \
+		--enable-gallium-llvm \
+		--enable-glx-tls \
+		--enable-gbm \
+		--enable-egl \
+		--with-egl-platforms=x11,drm,wayland
+	make -j2 &amp;&amp; DESTDIR=`pwd`/test make -j6 install
+	export LD_LIBRARY_PATH=`pwd`/test/usr/local/lib/
+	export LIBGL_DRIVERS_PATH=`pwd`/test/usr/local/lib/dri/
+	export LIBGL_DEBUG=verbose
+	glxinfo | egrep -o "Mesa.*"
+	glxgears
+	es2_info | egrep "GL_VERSION|GL_RENDERER"
+	es2gears_x11
+	export LIBGL_ALWAYS_SOFTWARE=1
+	glxinfo | egrep -o "Mesa.*|Gallium.*"
+	glxgears
+	es2_info | egrep "GL_VERSION|GL_RENDERER"
+	es2gears_x11
+	export LIBGL_ALWAYS_SOFTWARE=1
+	export GALLIUM_DRIVER=softpipe
+	glxinfo | egrep -o "Mesa.*|Gallium.*"
+	glxgears
+	es2_info | egrep "GL_VERSION|GL_RENDERER"
+	es2gears_x11
+	# Smoke test DOTA2
+	unset LD_LIBRARY_PATH
+	unset LIBGL_DRIVERS_PATH
+	unset LIBGL_DEBUG
+	unset LIBGL_ALWAYS_SOFTWARE
+	export VK_ICD_FILENAMES=`pwd`/src/intel/vulkan/dev_icd.json
+	steam steam://rungameid/570  -vconsole -vulkan
+</pre>
+
+<h3>Update version in file VERSION</h3>
+
+<p>
+Increment the version contained in the file VERSION at Mesa's top-level, then
+commit this change.
+</p>
+
+<h3>Create release notes for the new release</h3>
+
+<p>
+Create a new file docs/relnotes/X.Y.Z.html, (follow the style of the previous
+release notes). Note that the sha256sums section of the release notes should
+be empty (TBD) at this point.
+</p>
+
+<p>
+Two scripts are available to help generate portions of the release notes:
+
+<pre>
+	./bin/bugzilla_mesa.sh
+	./bin/shortlog_mesa.sh
+</pre>
+
+<p>
+The first script identifies commits that reference bugzilla bugs and obtains
+the descriptions of those bugs from bugzilla. The second script generates a
+log of all commits. In both cases, HTML-formatted lists are printed to stdout
+to be included in the release notes.
+</p>
+
+<p>
+Commit these changes and push the branch.
+</p>
+<pre>
+	git push origin HEAD
+</pre>
+
+
+<h3>Use the release.sh script from xorg util-macros</h3>
+
+<p>
+Ensure that the mesa git tree is clean via <code>git clean -fXd</code> and
+start the release process.
+</p>
+<pre>
+	../relative/path/to/release.sh . # append --dist if you've already done distcheck above
+</pre>
+
+<p>
+Pay close attention to the prompts as you might be required to enter your GPG
+and SSH passphrase(s) to sign and upload the files, respectively.
+</p>
+
+<h3>Add the sha256sums to the release notes</h3>
+
+<p>
+Edit docs/relnotes/X.Y.Z.html to add the sha256sums as availabe in the mesa-X.Y.Z.announce template. Commit this change.
+</p>
+
+<h3>Back on mesa master, add the new release notes into the tree</h3>
+
+<p>
+Something like the following steps will do the trick:
+</p>
+
+<pre>
+	git cherry-pick -x X.Y~1
+	git cherry-pick -x X.Y
+</pre>
+
+<p>
+Also, edit docs/relnotes.html to add a link to the new release notes, and edit
+docs/index.html to add a news entry. Then commit and push:
+</p>
+
+<pre>
+	git commit -as -m "docs: add news item and link release notes for X.Y.Z"
+	git push origin master X.Y
+</pre>
+
+
+<h1 id="announce">Announce the release</h1>
+<p>
+Use the generated template during the releasing process.
+</p>
+
+
+<h1 id="website">Update the mesa3d.org website</h1>
+
+<p>
+NOTE: The recent release managers have not been performing this step
+themselves, but leaving this to Brian Paul, (who has access to the
+sourceforge.net hosting for mesa3d.org). Brian is more than willing to grant
+the permission necessary to future release managers to do this step on their
+own.
+</p>
+
+<p>
+Update the web site by copying the docs/ directory's files to
+/home/users/b/br/brianp/mesa-www/htdocs/ with:
+<br>
+<code>
+sftp USERNAME,mesa3d@web.sourceforge.net
+</code>
+</p>
+
+
+<h1 id="bugzilla">Update Bugzilla</h1>
+
+<p>
+Parse through the bugreports as listed in the docs/relnotes/X.Y.Z.html
+document.
+<br>
+If there's outstanding action, close the bug referencing the commit ID which
+addresses the bug and mention the Mesa version that has the fix.
+</p>
+
+<p>
+Note: the above is not applicable to all the reports, so use common sense.
+</p>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,13 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/12.0.6.html">12.0.6 release notes</a>
+<li><a href="relnotes/13.0.3.html">13.0.3 release notes</a>
+<li><a href="relnotes/12.0.5.html">12.0.5 release notes</a>
+<li><a href="relnotes/13.0.2.html">13.0.2 release notes</a>
+<li><a href="relnotes/13.0.1.html">13.0.1 release notes</a>
+<li><a href="relnotes/12.0.4.html">12.0.4 release notes</a>
+<li><a href="relnotes/13.0.0.html">13.0.0 release notes</a>
 <li><a href="relnotes/12.0.3.html">12.0.3 release notes</a>
 <li><a href="relnotes/12.0.2.html">12.0.2 release notes</a>
 <li><a href="relnotes/12.0.1.html">12.0.1 release notes</a>
--- a/docs/relnotes/12.0.4.html
+++ b/docs/relnotes/12.0.4.html
@@ -0,0 +1,321 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.4 Release Notes / November 10, 2016</h1>
+
+<p>
+Mesa 12.0.4 is a bug fix release which fixes bugs found since the 12.0.4 release.
+</p>
+<p>
+Mesa 12.0.4 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+22026ce4f1c6a7908b0d10ff057decec0a5633afe7f38a0cef5c08d0689f02a6 mesa-12.0.4.tar.gz
+5d6003da867d3f54e5000b4acdfc37e6cce5b6a4459274fdad73e24bd2f0065e mesa-12.0.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with &quot;intel_do_flush_locked failed: No such file or directory&quot; if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from &quot;loader/dri3: Overhaul dri3_update_num_back&quot; commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Axel Davy (4):</p>
+<ul>
+  <li>gallium/util: Really allow aliasing of dst for u_box_union_*</li>
+  <li>st/nine: Fix the calculation of the number of vs inputs</li>
+  <li>st/nine: Fix mistake in Volume9 UnlockBox</li>
+  <li>st/nine: Fix locking CubeTexture surfaces.</li>
+</ul>
+
+<p>Brendan King (1):</p>
+<ul>
+  <li>configure.ac: fix the name of the Wayland Scanner pc file</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: fix swizzle issue in st_create_sampler_view_from_stobj()</li>
+</ul>
+
+<p>Chad Versace (3):</p>
+<ul>
+  <li>egl: Fix truncation error in _eglParseSyncAttribList64</li>
+  <li>i965/sync: Fix uninitalized usage and leak of mutex</li>
+  <li>egl: Don't advertise unsupported platform extensions</li>
+</ul>
+
+<p>Chuanbo Weng (1):</p>
+<ul>
+  <li>gbm: fix potential NULL deref of mapImage/unmapImage.</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>autoconf: Make header install distinct for various APIs (v2)</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>anv: initialise and increment send_sbc</li>
+  <li>anv/wsi: fix apps that acquire multiple images up front</li>
+  <li>Revert "st/vdpau: use linear layout for output surfaces"</li>
+</ul>
+
+<p>Emil Velikov (12):</p>
+<ul>
+  <li>docs: add sha256 checksums for 12.0.3</li>
+  <li>cherry-ignore: add non-applicable i965 commit</li>
+  <li>cherry-ignore: add vaapi encode fix</li>
+  <li>cherry-ignore: add EGL_KHR_debug fix</li>
+  <li>cherry-ignore: add update_renderbuffer_read_surfaces()</li>
+  <li>isl/gen6: correctly check msaa layout samples count</li>
+  <li>egl/x11: don't crash if dri2_dpy-&gt;conn is NULL</li>
+  <li>get-pick-list.sh: Require explicit "12.0" for nominating stable patches</li>
+  <li>automake: don't forget to pick wglext.h in the tarball</li>
+  <li>cherry-ignore: add N/A EGL revert</li>
+  <li>cherry-ignore: add ClientWaitSync fixes</li>
+  <li>Update version to 12.0.4</li>
+</ul>
+
+<p>Eric Anholt (5):</p>
+<ul>
+  <li>travis: Parse configure.ac to pick an updated LIBDRM_VERSION.</li>
+  <li>travis: Update to the Ubuntu Trusty image.</li>
+  <li>travis: Enable vc4 in libdrm to satisfy vc4 test build dependency.</li>
+  <li>travis: Upgrade LLVM dependency to 3.5 and enable LLVM drivers.</li>
+  <li>gallium: Fix install-gallium-links.mk on non-bash /bin/sh</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>pipe_loader_sw: Fix fd leak when instantiated via pipe_loader_sw_probe_kms</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>glsl: Fix cut-and-paste bug in hierarchical visitor ir_expression::accept</li>
+</ul>
+
+<p>Ilia Mirkin (16):</p>
+<ul>
+  <li>nv30: set usage to staging so that the buffer is allocated in GART</li>
+  <li>a3xx: make sure to actually clamp depth as requested</li>
+  <li>a3xx: make use of software clipping when hw can't handle it</li>
+  <li>a3xx: use window scissor to simulate viewport xy clip</li>
+  <li>main: GL_RGB10_A2UI does not come with GL 3.0/EXT_texture_integer</li>
+  <li>mesa/formatquery: limit ES target support, fix core context support</li>
+  <li>nir: fix definition of pack_uvec2_to_uint</li>
+  <li>gm107/ir: AL2P writes to a predicate register</li>
+  <li>st/mesa: fix is_scissor_enabled when X/Y are negative</li>
+  <li>nvc0/ir: fix overwriting of value backing non-constant gather offset</li>
+  <li>nv50/ir: copy over value's register id when resolving merge of a phi</li>
+  <li>nvc0/ir: fix textureGather with a single offset</li>
+  <li>gm107/ir: fix texturing with indirect samplers</li>
+  <li>gm107/ir: fix bit offset of tex lod setting for indirect texturing</li>
+  <li>nv50,nvc0: avoid reading out of bounds when getting bogus so info</li>
+  <li>nv50/ir: process texture offset sources as regular sources</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+  <li>radeonsi: Fix primitive restart when index changes</li>
+</ul>
+
+<p>Jason Ekstrand (9):</p>
+<ul>
+  <li>nir/spirv: Swap the argument order for AtomicCompareExchange</li>
+  <li>nir/spirv: Use the correct sources for CompareExchange on images</li>
+  <li>nir/spirv: Break variable decoration handling into a helper</li>
+  <li>nir/spirv: Refactor variable deocration handling</li>
+  <li>nir/spirv/cfg: Handle switches whose break block is a loop continue</li>
+  <li>nir/spirv/cfg: Detect switch_break after loop_break/continue</li>
+  <li>nir: Add a nop intrinsic</li>
+  <li>nir/spirv/cfg: Use a nop intrinsic for tagging the ends of blocks</li>
+  <li>intel/blorp: Rework our usage of ralloc when compiling shaders</li>
+</ul>
+
+<p>Jonathan Gray (3):</p>
+<ul>
+  <li>genxml: add generated headers to EXTRA_DIST</li>
+  <li>mapi: automake: set VISIBILITY_CFLAGS for shared glapi</li>
+  <li>mesa: automake: include mesa_glinterop.h in distfile</li>
+</ul>
+
+<p>Julien Isorce (1):</p>
+<ul>
+  <li>st/va: also honors interlaced preference when providing a video format</li>
+</ul>
+
+<p>Kenneth Graunke (8):</p>
+<ul>
+  <li>nir: Call nir_metadata_preserve from nir_lower_alu_to_scalar().</li>
+  <li>mesa: Expose RESET_NOTIFICATION_STRATEGY with KHR_robustness.</li>
+  <li>i965: Fix missing _NEW_TRANSFORM in Gen8+ 3DSTATE_DS atom.</li>
+  <li>i965: Add missing BRW_NEW_VS_PROG_DATA to 3DSTATE_CLIP.</li>
+  <li>i965: Move BRW_NEW_FRAGMENT_PROGRAM from 3DSTATE_PS to PS_EXTRA.</li>
+  <li>i965: Add missing BRW_NEW_CS_PROG_DATA to compute constant atom.</li>
+  <li>i965: Add missing BRW_CS_PROG_DATA to CS work group surface atom.</li>
+  <li>i965: Fix gl_InvocationID in dual object GS where invocations == 1.</li>
+</ul>
+
+<p>Marek Olšák (12):</p>
+<ul>
+  <li>radeonsi: fix cubemaps viewed as 2D</li>
+  <li>radeonsi: take compute shader and dispatch indirect memory usage into account</li>
+  <li>radeonsi: fix FP64 UBO loads with indirect uniform block indexing</li>
+  <li>mesa: fix glGetFramebufferAttachmentParameteriv w/ on-demand FRONT_BACK alloc</li>
+  <li>radeonsi: fix interpolateAt opcodes for .zw components</li>
+  <li>radeonsi: fix texture border colors for compute shaders</li>
+  <li>radeonsi: disable ReZ</li>
+  <li>gallium/radeon: make sure the address of separate CMASK is aligned properly</li>
+  <li>winsys/amdgpu: fix radeon_surf::macro_tile_index for imported textures</li>
+  <li>egl: use util/macros.h</li>
+  <li>egl: make interop ABI visible again</li>
+  <li>glx: make interop ABI visible again</li>
+</ul>
+
+<p>Mario Kleiner (1):</p>
+<ul>
+  <li>glx: Perform check for valid fbconfig against proper X-Screen.</li>
+</ul>
+
+<p>Martin Peres (2):</p>
+<ul>
+  <li>loader/dri3: add get_dri_screen() to the vtable</li>
+  <li>loader/dri3: import prime buffers in the currently-bound screen</li>
+</ul>
+
+<p>Matt Whitlock (5):</p>
+<ul>
+  <li>egl/android: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>gallium/auxiliary: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>st/dri: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>st/xa: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>gallium/winsys: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+</ul>
+
+<p>Max Staudt (1):</p>
+<ul>
+  <li>r300g: Set R300_VAP_CNTL on RSxxx to avoid triangle flickering</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>loader/dri3: Overhaul dri3_update_num_back</li>
+</ul>
+
+<p>Nicholas Bishop (2):</p>
+<ul>
+  <li>gbm: return appropriate error when queryImage() fails</li>
+  <li>st/dri: check pipe_screen-&gt;resource_get_handle() return value</li>
+</ul>
+
+<p>Nicolai Hähnle (10):</p>
+<ul>
+  <li>gallium/radeon: cleanup and fix branch emits</li>
+  <li>st/glsl_to_tgsi: disable on-the-fly peephole for 64-bit operations</li>
+  <li>st/glsl_to_tgsi: simplify translate_tex_offset</li>
+  <li>st/glsl_to_tgsi: fix textureGatherOffset with indirectly loaded offsets</li>
+  <li>st/mesa: fix vertex elements setup for doubles</li>
+  <li>radeonsi: fix indirect loads of 64 bit constants</li>
+  <li>st/glsl_to_tgsi: fix atomic counter addressing</li>
+  <li>st/glsl_to_tgsi: fix block copies of arrays of doubles</li>
+  <li>st/mesa: only set primitive_restart when the restart index is in range</li>
+  <li>radeonsi: fix 64-bit loads from LDS</li>
+</ul>
+
+<p>Samuel Pitoiset (4):</p>
+<ul>
+  <li>nvc0/ir: fix subops for IMAD</li>
+  <li>gk110/ir: fix wrong emission of OP_NOT</li>
+  <li>nvc0: use correct bufctx when invalidating CP textures</li>
+  <li>nvc0/ir: fix emission of IMAD with NEG modifiers</li>
+</ul>
+
+<p>Stencel, Joanna (1):</p>
+<ul>
+  <li>egl/wayland: add missing destroy_window callback</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>egl: stop claiming support for pbuffer + msaa</li>
+  <li>egl/dri2: set max values for pbuffer width and height</li>
+  <li>egl: add check that eglCreateContext gets a valid config</li>
+  <li>mesa: fix error handling in DrawBuffers</li>
+  <li>egl: set preserved behavior for surface only if config supports it</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>configure.ac: add llvm inteljitevents component if enabled</li>
+</ul>
+
+<p>Vedran Miletić (1):</p>
+<ul>
+  <li>clover: Fix build against clang SVN &gt;= r273191</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>Revert "mesa_glinterop: remove inclusion of GLX header"</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/12.0.5.html
+++ b/docs/relnotes/12.0.5.html
@@ -0,0 +1,138 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.5 Release Notes / December 5, 2016</h1>
+
+<p>
+Mesa 12.0.5 is a bug fix release which fixes bugs found since the 12.0.5 release.
+</p>
+<p>
+Mesa 12.0.5 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+44d08a27d98bfeacd864381189e434d98afbf451689d01f80380dc1d66450e5b  mesa-12.0.5.tar.gz
+2b0a972d8282860a11291c09c3ef01ac45171405951eb21a83c45ed2b4321924  mesa-12.0.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (2):</p>
+<ul>
+  <li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
+  <li>glx/glvnd: Fix dispatch function names and indices</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add release notes for 12.0.4</li>
+  <li>docs: add sha256 checksums for 12.0.4</li>
+  <li>cherry-ignore: add reverted LLVM_LIBDIR patch</li>
+  <li>Update version to 12.0.5</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>mesa: change state query return value for RGB565</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>i965/fs/generator: Don't use the address immediate for MOV_INDIRECT</li>
+  <li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
+  <li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>intel: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
+</ul>
+
+<p>Marek Olšák (13):</p>
+<ul>
+  <li>gallium/radeon: fix behavior of GLSL findLSB(0)</li>
+  <li>gallium/radeon: make sure HTILE address is aligned properly</li>
+  <li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
+  <li>gallium/radeon: unify viewport emission code</li>
+  <li>gallium/radeon: set VPORT_ZMIN/MAX registers correctly</li>
+  <li>radeonsi: fix gl_PatchVerticesIn for tessellation evaluation shader</li>
+  <li>radeonsi: fix a crash in imageSize for cubemap arrays</li>
+  <li>radeonsi: emit TA_CS_BC_BASE_ADDR on SI only if the kernel allows it</li>
+  <li>gallium/radeon: add support for sharing textures with DCC between processes</li>
+  <li>radeonsi: always set all blend registers</li>
+  <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+  <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+  <li>radeonsi: silence runtime warnings with LLVM 3.9</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>anv: Replace "abi_versions" with correct "api_version".</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Steinar H. Gunderson (1):</p>
+<ul>
+  <li>Fix races during _mesa_HashWalk().</li>
+</ul>
+
+<p>Tim Rowley (3):</p>
+<ul>
+  <li>swr: [rasterizer jitter] cleanup supporting different llvm versions</li>
+  <li>swr: [rasterizer jitter] fix llvm-3.7 compile</li>
+  <li>swr: [rasterizer] add support for llvm-3.9</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/12.0.6.html
+++ b/docs/relnotes/12.0.6.html
@@ -0,0 +1,148 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.6 Release Notes / January 23, 2017</h1>
+
+<p>
+Mesa 12.0.6 is a bug fix release which fixes bugs found since the 12.0.5 release.
+</p>
+<p>
+Mesa 12.0.6 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+65339ba5d76a45225b8b56f9a1da9db15c569e1d163760faa2921da0a8461741  mesa-12.0.6.tar.gz
+7d6da9744c1022a4c2ab6ad01a206984d00443fb691568011d01b3dd97e36448  mesa-12.0.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] &quot;Assertion `bkref' failed&quot; reproducible with glmark2</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+<p>Chad Versace (3):</p>
+<ul>
+  <li>i965/mt: Disable aux surfaces after making miptree shareable</li>
+  <li>i965/mt: Disable HiZ when sharing depth buffer externally (v2)</li>
+  <li>anv: Handle vkGetPhysicalDeviceQueueFamilyProperties with count == 0</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 12.0.5</li>
+  <li>get-typod-pick-list.sh: add new script</li>
+  <li>automake: use shared llvm libs for make distcheck</li>
+  <li>egl/wayland: use the destroy_window_callback for swrast</li>
+  <li>Update version to 12.0.6</li>
+</ul>
+
+<p>Fredrik Höglund (1):</p>
+<ul>
+  <li>dri3: Fix MakeCurrent without a default framebuffer</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nouveau: take extra push space into account for pushbuf_space calls</li>
+</ul>
+
+<p>Jason Ekstrand (19):</p>
+<ul>
+  <li>spirv/nir: Fix some texture opcode asserts</li>
+  <li>spirv/nir: Add support for shadow samplers that return vec4</li>
+  <li>spirv/nir: Properly handle gather components</li>
+  <li>anv/pipeline: Set binding_table.gather_texture_start</li>
+  <li>nir: Add a helper for determining the type of a texture source</li>
+  <li>nir/lower_tex: Add some helpers for working with tex sources</li>
+  <li>nir/lower_tex: Add support for lowering coordinate offsets</li>
+  <li>i965/nir: Enable NIR lowering of txf and rect offsets</li>
+  <li>i965: Get rid of the do_lower_unnormalized_offsets pass</li>
+  <li>spirv/nir: Don't increment coord_components for array lod queries</li>
+  <li>anv/image: Assert that the image format is actually supported</li>
+  <li>spirv/nir: Move opcode selection higher up in handle_texture</li>
+  <li>spirv/nir: Refactor type handling in handle_texture</li>
+  <li>nir/spirv: Refactor coordinate handling in handle_texture</li>
+  <li>spirv/nir: Handle texture projectors</li>
+  <li>spirv/nir: Add support for ImageQuerySamples</li>
+  <li>anv/device: Return the right error for failed maps</li>
+  <li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
+  <li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.</li>
+  <li>i965: Properly flush in hsw_pause_transform_feedback().</li>
+</ul>
+
+<p>Marek Olšák (6):</p>
+<ul>
+  <li>cso: don't release sampler states that are bound</li>
+  <li>radeonsi: always restore sampler states when unbinding sampler views</li>
+  <li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
+  <li>radeonsi: disable CE on SI + AMDGPU</li>
+  <li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
+  <li>gallium/radeon: fix the draw-calls HUD query</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
+  <li>i965/fs: Add unit tests for copy propagation pass.</li>
+  <li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>cso: Don't restore nr_samplers in cso_restore_fragment_samplers</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>radeonsi: enable WQM in PS prolog when needed</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.0.html
+++ b/docs/relnotes/13.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 13.0.0 Release Notes / TBD</h1>
+<h1>Mesa 13.0.0 Release Notes / November 1, 2016</h1>

 <p>
 Mesa 13.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+4a54d7cdc1a94a8dae05a75ccff48356406d51b0d6a64cbdc641c266e3e008eb  mesa-13.0.0.tar.gz
+94edb4ebff82066a68be79d9c2627f15995e1fe10f67ab3fc63deb842027d727  mesa-13.0.0.tar.xz
 </pre>


@@ -74,11 +75,236 @@ Note: some of the new features are only available with certain drivers.

 <h2>Bug fixes</h2>

-TBD.
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61907">Bug 61907</a> - Indirect rendering of multi-texture vertex arrays broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with &quot;intel_do_flush_locked failed: No such file or directory&quot; if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83036">Bug 83036</a> - [ILK]Piglit spec_ARB_copy_image_arb_copy_image-formats fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90513">Bug 90513</a> - Odd gray and red flicker in The Talos Principle on GK104</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94561">Bug 94561</a> - [llvmpipe] PIPE_CAP_VIDEO_MEMORY reports negative value on 32 bits (with 16GB ram)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94627">Bug 94627</a> - Game Risen on wine black grass</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94681">Bug 94681</a> - dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 takes 25 minutes to compile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95000">Bug 95000</a> - deqp: assert in dEQP-GLES3.functional.vertex_arrays.single_attribute.strides.fixed.user_ptr_stride17_components2_quads1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95246">Bug 95246</a> - Segfault in glBindFramebuffer()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95419">Bug 95419</a> - [HSW][regression][bisect] RPG Maker game gives &quot;invalid floating point operation&quot; at startup</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95462">Bug 95462</a> - [BXT,BSW] arb_gpu_shader_fp64 causes gpu hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95529">Bug 95529</a> - [regression, bisected] Image corruption in Chrome</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96235">Bug 96235</a> - st_nir.h:34: error: redefinition of typedef ‘nir_shader’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb-&gt;getFirst()-&gt;serial &lt;= bb-&gt;getExit()-&gt;serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96285">Bug 96285</a> - Mesa build broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96299">Bug 96299</a> - [vulkan] 64 regressions due to mesa d5f2f32</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96343">Bug 96343</a> - oom since st/mesa: implement PBO downloads for ReadPixels</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96346">Bug 96346</a> - [SNB,CTS] es2-cts.gtf.gl.atan regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96349">Bug 96349</a> - [CTS,SKL,BSW,BDW,KBL,BXT] es31-cts.arrays_of_arrays.interactionuniformbuffers3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96351">Bug 96351</a> - [CTS,SKL,KBL,BXT] es2-cts.gtf.gl2extensiontests.egl_image.egl_image</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96425">Bug 96425</a> - [bisected] occasional dark render in The Talos Principle</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96484">Bug 96484</a> - [vulkan] deqp-vk.glsl.builtin.precision.sin / cos regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96504">Bug 96504</a> - [vulkancts] compute tests crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96516">Bug 96516</a> - [bisected: 482526] &quot;clover: Update OpenCL version string to match OpenGL&quot;: clover's build fails because of missing git_sha1.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96528">Bug 96528</a> - Location qualifier segfaults during shader compilation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96541">Bug 96541</a> - Tonga Unreal elemental bad rendering since radeonsi: Decompress DCC textures in a render feedback loop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96565">Bug 96565</a> - Clive Barker's Jericho displays strange,vivid colors when motion blur enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96607">Bug 96607</a> - [bisected] texture misrender / flicker in The Talos Principle on SKL</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96617">Bug 96617</a> - gl_SecondaryFragDataEXT doesn't work for extended blend func</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96629">Bug 96629</a> - dEQP-GLES2.functional.texture.completeness.cube.not_positive_level_0: Assertion `width &gt;= 1' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96639">Bug 96639</a> - st/mesa: transfer_map with too-high level with dEQP-GLES2.functional.texture.completeness.cube.extra_level</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96674">Bug 96674</a> - [SNB, ILK] spec.ext_image_dma_buf_import.ext_image_dma_buf_import-sample_nv1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96729">Bug 96729</a> - Wrong shader compilation error message</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96765">Bug 96765</a> - BindFragDataLocationIndexed on array fragment shader output.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96782">Bug 96782</a> - [regression bisected] R600 fp64 and glsl-4.00 piglit failures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96791">Bug 96791</a> - Cannot use image from swapchains for sampling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96825">Bug 96825</a> - anv_device.c:31:27: fatal error: anv_timestamp.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - &quot;gallium: Force blend color to 16-byte alignment&quot; crash with &quot;-march=native -O3&quot; causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96878">Bug 96878</a> - [Bisected: cc2d0e6][HSW] &quot;GPU HANG&quot; msg after autologin to gnome-session</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96949">Bug 96949</a> - [regression] Piglit numSamples assertion failures with 9a23a177b90</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96950">Bug 96950</a> - Another regression from bc4e0c486: vbo: Use a bitmask to track the active arrays in vbo_exec*.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97019">Bug 97019</a> - [clover] build failure in llvm/codegen/native.cpp:129:52</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97032">Bug 97032</a> - [BDW,SKL] piglit.spec.arb_gpu_shader5.arb_gpu_shader5-interpolateatcentroid-flat</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97033">Bug 97033</a> - [BDW,SKL] piglit.spec.arb_gpu_shader_fp64.varying-packing.simple regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97083">Bug 97083</a> - [IVB,BYT] GPU hang on deqp-gles31.functional.separate.shader.random</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97140">Bug 97140</a> - dd_draw.c:949:11: error: implicit declaration of function 'fmemopen' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error &quot;Failed to make EGL context current&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97267">Bug 97267</a> - [BDW] GL45-CTS.texture_cube_map_array.sampling asserts inside brw_fs.cpp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97278">Bug 97278</a> - [vulkancts,HSW] all vulkancts tests assert on HSW</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97285">Bug 97285</a> - Darkness in Dota 2 after Patch &quot;Make Gallium's BlitFramebuffer follow the GL 4.4 sRGB rules&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97286">Bug 97286</a> - `make check` fails uniform-initializer-test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97305">Bug 97305</a> - Gallium: TBOs and images set the offset in elements, not bytes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97309">Bug 97309</a> - piglit.spec.glsl-1_30.compiler.switch-statement.switch-case-duplicated.vert regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97322">Bug 97322</a> - GenerateMipmap creates wrong mipmap for sRGB texture</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97413">Bug 97413</a> - BioShock Infinite crashes on startup with Mesa Git version, R7 370</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97448">Bug 97448</a> - [HSW] deqp-vk.api_.copy_and_blit.image_to_image_stencil regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97477">Bug 97477</a> - i915g: gl_FragCoord is always (0.0, max_y)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97513">Bug 97513</a> - clover reports wrong device pointer size</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from &quot;loader/dri3: Overhaul dri3_update_num_back&quot; commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97587">Bug 97587</a> - make check nir/tests/control_flow_tests regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97761">Bug 97761</a> - es2-cts.gtf.gl2extensiontests.egl_image_external.testsimpleunassociated crashes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97773">Bug 97773</a> - New Mesa master now results in warnings in glrender (and subsurfaces and simple-egl), black screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97790">Bug 97790</a> - Vulkan cts regressions due to 24be63066</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97804">Bug 97804</a> - Later precision statement isn't overriding earlier one</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97808">Bug 97808</a> - &quot;tgsi/scan: don't set interp flags for inputs only used by INTERP instructions&quot; causes glitches in wine with gallium nine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97894">Bug 97894</a> - Crash in u_transfer_unmap_vtbl when unmapping a buffer mapped in different context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97952">Bug 97952</a> - /usr/include/string.h:518:12: error: exception specification in declaration does not match previous declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97969">Bug 97969</a> - [radeonsi, bisected: fb827c0] Video decoding shows green artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97976">Bug 97976</a> - VCE regression BO to small for addr since winsys/amdgpu: enable buffer allocation from slabs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98005">Bug 98005</a> - VCE dual instance encoding inconsistent since st/va: enable dual instances encode by sync surface</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98128">Bug 98128</a> - nir/tests/control_flow_tests.cpp:79:73: error: ‘nir_loop_first_cf_node’ was not declared in this scope</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98131">Bug 98131</a> - Compiler should reject lowp/mediump qualifiers on atomic_uints</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98133">Bug 98133</a> - GetSynciv should raise an error if bufSize &lt; 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98135">Bug 98135</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.shader.transform_feedback_varyings wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98167">Bug 98167</a> - [vulkan, radv] missing libgcrypt and openssl devel results in linker error in libvulkan_common</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98172">Bug 98172</a> - Concurrent call to glClientWaitSync results in segfault in one of the waiters.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98244">Bug 98244</a> - dEQP: textureOffset(sampler2DArrayShadow, ...) should not exist.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98264">Bug 98264</a> - Build broken for i965 due to multiple deifnitions of intelFenceExtension</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98307">Bug 98307</a> - &quot;st/glsl_to_tgsi: explicitly track all input and output declaration&quot; broke flightgear colors on rs780</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98431">Bug 98431</a> - UnrealEngine v4 demos startup fails to blorp blit assert</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+Mesa no longer depends on libudev.

 </div>
 </body>
--- a/docs/relnotes/13.0.1.html
+++ b/docs/relnotes/13.0.1.html
@@ -0,0 +1,188 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.1 Release Notes / November 14, 2016</h1>
+
+<p>
+Mesa 13.0.1 is a bug fix release which fixes bugs found since the 13.0.0 release.
+</p>
+<p>
+Mesa 13.0.1 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+7cbb91dead05cde279ee95f86e8321c8e1c8fc9deb88f12e0f587672a10d88c5  mesa-13.0.1.tar.gz
+71962fb2bf77d33b0ad4a565b490dbbeaf4619099c6d9722f04a73187957a731  mesa-13.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97715">Bug 97715</a> - [ILK,G45,G965] piglit.spec.arb_separate_shader_objects.misc api error checks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98012">Bug 98012</a> - [IVB] Segfault when running Dolphin twice with Vulkan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98512">Bug 98512</a> - radeon r600 vdpau: Invalid command stream: texture bo too small</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (2):</p>
+<ul>
+  <li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
+  <li>glx/glvnd: Fix dispatch function names and indices</li>
+</ul>
+
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>glx/windows: Add wgl.h to the sources list</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
+</ul>
+
+<p>Chih-Wei Huang (1):</p>
+<ul>
+  <li>android: avoid using libdrm with host modules</li>
+</ul>
+
+<p>Darren Salt (1):</p>
+<ul>
+  <li>radv/pipeline: Don't dereference NULL dynamic state pointers</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>radv: expose xlib platform extension</li>
+  <li>radv: fix dual source blending</li>
+  <li>Revert "st/vdpau: use linear layout for output surfaces"</li>
+  <li>radv: emit correct last export when Z/stencil export is enabled</li>
+  <li>ac/nir: add support for discard_if intrinsic (v2)</li>
+  <li>nir: add conditional discard optimisation (v4)</li>
+  <li>radv: enable conditional discard optimisation on radv.</li>
+  <li>radv: fix GetFenceStatus for signaled fences</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.0</li>
+  <li>amd/addrlib: limit fastcall/regparm to GCC i386</li>
+  <li>anv: use correct .specVersion for extensions</li>
+  <li>radv: use correct .specVersion for extensions</li>
+  <li>radv: Suffix the radeon_icd file with the host CPU</li>
+  <li>Update version to 13.0.1</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Use Newton-Raphson on the 1/W write to fix glmark2 terrain.</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().</li>
+</ul>
+
+<p>Fredrik Höglund (1):</p>
+<ul>
+  <li>radv: add support for anisotropic filtering on VI+</li>
+</ul>
+
+<p>Jason Ekstrand (21):</p>
+<ul>
+  <li>anv/device: Return DEVICE_LOST if execbuf2 fails</li>
+  <li>vulkan/wsi/x11: Better handle wsi_x11_connection_create failure</li>
+  <li>vulkan/wsi/x11: Clean up connections in finish_wsi</li>
+  <li>anv: Better handle return codes from anv_physical_device_init</li>
+  <li>intel/blorp: Use wm_prog_data instead of hand-rolling our own</li>
+  <li>intel/blorp: Pass a brw_stage_prog_data to upload_shader</li>
+  <li>anv/pipeline: Put actual pointers in anv_shader_bin</li>
+  <li>anv/pipeline: Properly cache prog_data::param</li>
+  <li>intel/blorp: Emit all the binding tables</li>
+  <li>anv/device: Add an execbuf wrapper</li>
+  <li>anv: Add a cmd_buffer_execbuf helper</li>
+  <li>anv: Don't presume to know what address is in a surface relocation</li>
+  <li>anv: Add a new bo_pool_init helper</li>
+  <li>anv/allocator: Simplify anv_scratch_pool</li>
+  <li>anv: Initialize anv_bo::offset to -1</li>
+  <li>anv/batch_chain: Improve write_reloc</li>
+  <li>anv: Add an anv_execbuf helper struct</li>
+  <li>anv/batch: Move last_ss_pool_bo_offset to the command buffer</li>
+  <li>anv: Move relocation handling from EndCommandBuffer to QueueSubmit</li>
+  <li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
+  <li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>glsl: Update deref types when resizing implicitly sized arrays.</li>
+  <li>mesa: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
+</ul>
+
+<p>Kristian Høgsberg (1):</p>
+<ul>
+  <li>anv: Do relocations in userspace before execbuf ioctl</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>egl: use util/macros.h</li>
+  <li>egl: make interop ABI visible again</li>
+  <li>glx: make interop ABI visible again</li>
+  <li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix BFE/BFI lowering for GLSL semantics</li>
+  <li>glsl: fix lowering of UBO references of named blocks</li>
+  <li>st/glsl_to_tgsi: fix dvec[34] loads from SSBO</li>
+  <li>st/mesa: fix the layer of VDPAU surface samplers</li>
+</ul>
+
+<p>Steven Toth (3):</p>
+<ul>
+  <li>gallium/hud: fix a problem where objects are free'd while in use.</li>
+  <li>gallium/hud: close a previously opened handle</li>
+  <li>gallium/hud: protect against and initialization race</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>mesa/glsl: delete previously linked shaders earlier when linking</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.2.html
+++ b/docs/relnotes/13.0.2.html
@@ -0,0 +1,189 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.2 Release Notes / November 28, 2016</h1>
+
+<p>
+Mesa 13.0.2 is a bug fix release which fixes bugs found since the 13.0.1 release.
+</p>
+<p>
+Mesa 13.0.2 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6014233a5db6032ab8de4881384871bbe029de684502707794ce7b3e6beec308  mesa-13.0.2.tar.gz
+a6ed622645f4ed61da418bf65adde5bcc4bb79023c36ba7d6b45b389da4416d5  mesa-13.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97321">Bug 97321</a> - Query INFO_LOG_LENGTH for empty info log should return 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97420">Bug 97420</a> - &quot;#version 0&quot; crashes glsl_compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98632">Bug 98632</a> - Fix build on Hurd without PATH_MAX</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (3):</p>
+<ul>
+  <li>i965: Add some APL and KBL SKU strings</li>
+  <li>i965: Reorder PCI ID list to match release order</li>
+  <li>i965/glk: Add basic Geminilake support</li>
+</ul>
+
+<p>Dave Airlie (14):</p>
+<ul>
+  <li>radv: fix texturesamples to handle single sample case</li>
+  <li>wsi: fix VK_INCOMPLETE for vkGetSwapchainImagesKHR</li>
+  <li>radv: don't crash on null swapchain destroy.</li>
+  <li>ac/nir/llvm: fix channel in texture gather lowering code.</li>
+  <li>radv: make sure to flush input attachments correctly.</li>
+  <li>radv: fix image view creation for depth and stencil only</li>
+  <li>radv: spir-v allows texture size query with and without lod.</li>
+  <li>vulkan/wsi/x11: handle timeouts properly in next image acquire (v1.1)</li>
+  <li>vulkan/wsi: store present mode in swapchain base class</li>
+  <li>vulkan/wsi/x11: add support for IMMEDIATE present mode</li>
+  <li>radv: fix texel fetch offset with 2d arrays.</li>
+  <li>radv/si: fix optimal micro tile selection</li>
+  <li>radv/ac/llvm: shadow samplers only return one value.</li>
+  <li>radv: fix 3D clears with baseMiplevel</li>
+</ul>
+
+<p>Eduardo Lima Mitev (2):</p>
+<ul>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfaceFormatsKHR</li>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfacePresentModesKHR</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.1</li>
+  <li>cherry-ignore: add reverted LLVM_LIBDIR patch</li>
+  <li>anv: fix enumeration of properties</li>
+  <li>radv: honour the number of properties available</li>
+  <li>Update version to 13.0.2</li>
+</ul>
+
+<p>Eric Anholt (3):</p>
+<ul>
+  <li>vc4: Don't abort when a shader compile fails.</li>
+  <li>vc4: Clamp the shadow comparison value.</li>
+  <li>vc4: Fix register class handling of DDX/DDY arguments.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (2):</p>
+<ul>
+  <li>util/disk_cache: close a previously opened handle in disk_cache_put (v2)</li>
+  <li>anv: Fix unintentional integer overflow in anv_CreateDmaBufImageINTEL</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>anv/format: handle unsupported formats properly</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>glcpp: Handle '#version 0' and other invalid values</li>
+  <li>glsl: Parse 0 as a preprocessor INTCONSTANT</li>
+</ul>
+
+<p>Jason Ekstrand (15):</p>
+<ul>
+  <li>anv/gen8: Stall when needed in Cmd(Set|Reset)Event</li>
+  <li>anv/wsi: Set the fence to signaled in AcquireNextImageKHR</li>
+  <li>anv: Rework fences</li>
+  <li>vulkan/wsi/wayland: Include pthread.h</li>
+  <li>vulkan/wsi/wayland: Clean up some error handling paths</li>
+  <li>vulkan/wsi: Report the correct min/maxImageCount</li>
+  <li>i965/gs: Allow primitive id to be a system value</li>
+  <li>anv: Handle null in all destructors</li>
+  <li>anv/fence: Handle ANV_FENCE_CREATE_SIGNALED_BIT</li>
+  <li>nir/spirv: Fix handling of gl_PrimitiveId</li>
+  <li>anv/blorp: Ignore clears for attachments first used as resolve destinations</li>
+  <li>anv: Implement a depth stall restriction on gen7</li>
+  <li>anv/cmd_buffer: Handle running out of binding tables in compute shaders</li>
+  <li>anv/cmd_buffer: Emit a CS stall before setting a CS pipeline</li>
+  <li>vulkan/wsi/x11: Implement FIFO mode.</li>
+</ul>
+
+<p>Jordan Justen (2):</p>
+<ul>
+  <li>isl: Fix height calculation in isl_msaa_interleaved_scale_px_to_sa</li>
+  <li>i965/hsw: Set integer mode in sampling state for stencil texturing</li>
+</ul>
+
+<p>Kenneth Graunke (4):</p>
+<ul>
+  <li>intel: Set min_ds_entries on Broxton.</li>
+  <li>i965: Fix compute shader crash.</li>
+  <li>mesa: Drop PATH_MAX usage.</li>
+  <li>i965: Fix GS push inputs with enhanced layouts.</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>vulkan/wsi: Add a thread-safe queue implementation</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: fix multi level clears with VK_REMAINING_MIP_LEVELS</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>gbm: request correct version of the DRI2_FENCE extension</li>
+</ul>
+
+<p>Nicolai Hähnle (2):</p>
+<ul>
+  <li>radeonsi: store group_size_variable in struct si_compute</li>
+  <li>glsl/lower_output_reads: fix geometry shader output handling with conditional emit</li>
+</ul>
+
+<p>Steinar H. Gunderson (1):</p>
+<ul>
+  <li>Fix races during _mesa_HashWalk().</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: fix empty program log length</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.3.html
+++ b/docs/relnotes/13.0.3.html
@@ -0,0 +1,177 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.3 Release Notes / January 5, 2017</h1>
+
+<p>
+Mesa 13.0.3 is a bug fix release which fixes bugs found since the 13.0.2 release.
+</p>
+<p>
+Mesa 13.0.3 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+55b07d056f9b855ba9d7c8b2ddc7d3b220a61c6ab1bdc73cbfc2f607721094c2  mesa-13.0.3.tar.gz
+d9aa8be5c176d00d0cd503cb2f64a5a403ea471ec819c022581414860d7ba40e  mesa-13.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99038">Bug 99038</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>i965/mt: Disable aux surfaces after making miptree shareable</li>
+  <li>egl: Fix crashes in eglCreate*Surface()</li>
+</ul>
+
+<p>Dave Airlie (4):</p>
+<ul>
+  <li>anv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: fix another regression since shadow fixes.</li>
+  <li>radv: add missing license file to radv_meta_bufimage.</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.2</li>
+  <li>anv: don't double-close the same fd</li>
+  <li>anv: don't leak memory if anv_init_wsi() fails</li>
+  <li>radv: don't leak the fd if radv_physical_device_init() succeeds</li>
+  <li>Update version to 13.0.3</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: In a loop break/continue, jump if everyone has taken the path.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (3):</p>
+<ul>
+  <li>anv: Add missing error-checking to anv_block_pool_init (v2)</li>
+  <li>anv: Update the teardown in reverse order of the anv_CreateDevice</li>
+  <li>vulkan/wsi: Fix resource leak in success path of wsi_queue_init()</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>compiler/glsl: fix precision problem of tanh</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>mesa: only verify that enabled arrays have backing buffers</li>
+</ul>
+
+<p>Jason Ekstrand (8):</p>
+<ul>
+  <li>anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push constants are dirty</li>
+  <li>anv/image: Rename hiz_surface to aux_surface</li>
+  <li>anv/cmd_buffer: Remove the 1-D case from the HiZ QPitch calculation</li>
+  <li>genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode</li>
+  <li>anv/device: Return the right error for failed maps</li>
+  <li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
+  <li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
+  <li>spirv: Use a simpler and more correct implementaiton of tanh()</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Allocate at least some URB space even when max_vertices = 0.</li>
+</ul>
+
+<p>Marek Olšák (17):</p>
+<ul>
+  <li>radeonsi: always set all blend registers</li>
+  <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+  <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+  <li>radeonsi: consolidate max-work-group-size computation</li>
+  <li>radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips</li>
+  <li>radeonsi: apply a TC L1 write corruption workaround for SI</li>
+  <li>radeonsi: apply a tessellation bug workaround for SI</li>
+  <li>radeonsi: add a tess+GS hang workaround for VI dGPUs</li>
+  <li>radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well</li>
+  <li>cso: don't release sampler states that are bound</li>
+  <li>radeonsi: always restore sampler states when unbinding sampler views</li>
+  <li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
+  <li>radeonsi: allow specifying simm16 of emit_waitcnt at call sites</li>
+  <li>radeonsi: wait for outstanding memory instructions in TCS barriers</li>
+  <li>tgsi: fix the src type of TGSI_OPCODE_MEMBAR</li>
+  <li>radeonsi: wait for outstanding LDS instructions in memory barriers if needed</li>
+  <li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
+  <li>i965/fs: Add unit tests for copy propagation pass.</li>
+  <li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix isolines tess factor writes to control ring</li>
+  <li>radeonsi: update all GSVS ring descriptors for new buffer allocations</li>
+  <li>radeonsi: do not kill GS with memory writes</li>
+  <li>radeonsi: fix an off-by-one error in the bounds check for max_vertices</li>
+</ul>
+
+<p>Rhys Kidd (1):</p>
+<ul>
+  <li>glsl: Add pthread libs to cache_test</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>mesa: fix active subroutine uniforms properly</li>
+  <li>Revert "nir: Turn imov/fmov of undef into undef."</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.0.0.html
+++ b/docs/relnotes/17.0.0.html
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.0.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 17.0.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 17.0.1.
+</p>
+<p>
+Mesa 17.0.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_ARB_post_depth_coverage on i965/gen9+</li>
+<li>GL_KHR_blend_equation_advanced on nvc0</li>
+<li>GL_INTEL_conservative_rasterization on i965/gen9+</li>
+<li>GL_NV_image_formats on any driver supporting GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)</li>
+<li>GL_ARB_gpu_shader_fp64 in i965/haswell</li>
+<li>GL_ARB_vertex_attrib_64bit in i965/haswell</li>
+<li>GL_ARB_shader_precision in i965/haswell</li>
+<li>Intel Haswell now supports OpenGL 4.2</li>
+<li>GL_OES_geometry_shader on i965/haswell</li>
+<li>GL_OES_texture_cube_map_array on i965/haswell</li>
+<li>GL_OES_viewport_array on i965/haswell</li>
+<li>Vulkan Float64 capability support on Intel's ANV driver</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98480">Bug 98480</a> - Support R8 image texture in ES 3.1</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+TBD.
+
+</div>
+</body>
+</html>
--- a/docs/repository.html
+++ b/docs/repository.html
@@ -75,7 +75,8 @@ follow this procedure:
 <li>Subscribe to the
 <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">mesa-dev</a>
 mailing list.
-<li>Start contributing to the project by posting patches / review requests to
+<li>Start contributing to the project by
+<a href="submittingpatches.html" target="_parent">submitting patches</a> to
 the mesa-dev list.  Specifically,
 <ul>
 <li>Use <code>git send-mail</code> to post your patches to mesa-dev.
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -172,7 +172,7 @@ This tool is useful for:
 </ul>

 <p>
-After building Mesa, the compiler can be found at src/glsl/glsl_compiler
+After building Mesa, the compiler can be found at src/compiler/glsl/glsl_compiler
 </p>

 <p>
@@ -180,7 +180,7 @@ Here's an example of using the compiler to compile a vertex shader and
 emit GL_ARB_vertex_program-style instructions:
 </p>
 <pre>
-    src/glsl/glsl_compiler --dump-ast myshader.vert
+    src/compiler/glsl/glsl_compiler --version XXX --dump-ast myshader.vert
 </pre>

 Options include
@@ -188,7 +188,11 @@ Options include
 <li><b>--dump-ast</b> - dump GPU code
 <li><b>--dump-hir</b> - dump high-level IR code
 <li><b>--dump-lir</b> - dump low-level IR code
-<li><b>--link</b> - ???
+<li><b>--dump-builder</b> - dump GLSL IR code
+<li><b>--link</b> - link shaders
+<li><b>--just-log</b> - display only shader / linker info if exist,
+without any header or separator
+<li><b>--version</b> - [Mandatory] define the GLSL version to use
 </ul>


@@ -196,7 +200,7 @@ Options include

 <p>
 The source code for Mesa's shading language compiler is in the
-<code>src/glsl/</code> directory.
+<code>src/compiler/glsl/</code> directory.
 </p>

 <p>
--- a/docs/sourcetree.html
+++ b/docs/sourcetree.html
@@ -27,14 +27,18 @@ each directory.
 <li><b>include</b> - Public OpenGL header files
 <li><b>src</b>
  <ul>
+  <li><b>compiler</b> - Common utility sources for different compilers.
+    <ul>
+    <li><b>glsl</b> - the GLSL IR and compiler
+    <li><b>nir</b> - the NIR IR and compiler
+    <li><b>spriv</b> - the SPIR-V compiler
+    </ul>
  <li><b>egl</b> - EGL library sources
    <ul>
-    <li><b>docs</b> - EGL documentation
    <li><b>drivers</b> - EGL drivers
    <li><b>main</b> - main EGL library implementation.  This is where all
        the EGL API functions are implemented, like eglCreateContext().
    </ul>
-  <li><b>glsl</b> - the GLSL compiler
  <li><b>mapi</b> - Mesa APIs
    <li><b>glapi</b> - OpenGL API dispatch layer.  This is where all the
        GL entrypoints like glClear, glBegin, etc. are generated, as well as
@@ -94,7 +98,8 @@ each directory.
      <ul>
      <li><b>i915</b> - Driver for Intel i915/i945.
      <li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation.
-      <li><b>nv*</b> - Drivers for NVIDIA GPUs.
+      <li><b>nouveau</b> - Driver for NVIDIA GPUs.
+      <li><b>radeon</b> - Shared module for the r600 and radeonsi drivers.
      <li><b>radeonsi</b> - Driver for AMD Southern Island.
      <li><b>r300</b> - Driver for ATI R300 - R500.
      <li><b>r600</b> - Driver for ATI/AMD R600 - Northern Island.
@@ -128,16 +133,19 @@ each directory.
          to another.
      <li><b>util</b> - assorted utilities for arithmetic, hashing, surface
          creation, memory management, 2D blitting, simple rendering, etc.
+      <li>XXX more
      </ul>
    <li><b>state_trackers</b> -
       <ul>
       <li><b>clover</b> - OpenCL state tracker
       <li><b>dri</b> - Meta state tracker for DRI drivers
       <li><b>glx</b> - Meta state tracker for GLX
-       <li><b>vdpau</b> - VDPAU state tracker
-       <li><b>wgl</b> -
-       <li><b>xorg</b> - Meta state tracker for Xorg video drivers
+       <li><b>wgl</b> - Windows WGL state tracker
+       <li><b>xa</b> - XA state tracker
       <li><b>xvmc</b> - XvMC state tracker
+       <li><b>vdpau</b> - VDPAU state tracker
+       <li><b>va</b> - VA-API state tracker
+       <li><b>omx</b> - OpenMAX state tracker
       </ul>
    <li><b>winsys</b> -
       <ul>
@@ -148,11 +156,11 @@ each directory.
    </ul>
  </ul>
  <ul>
-  <li><b>glx</b> - The GLX library code for building libGL.  This is used for
-         direct rendering drivers.  It will dynamically load one of the 
-         xxx_dri.so drivers.
+  <li><b>glx</b> - The GLX library code for building libGL using DRI drivers.
  </ul>
-<li><b>lib</b> - where the GL libraries are placed
+<li><b>lib</b> - hardlinks to most binaries as produced by <strong>make</strong>.
+        These (shortcuts) are used for development purposes in conjunction with
+        LD_LIBRARY_PATH and/or LIBGL_DRIVERS_PATH.
 </ul>

 </div>
--- a/docs/specs/WL_bind_wayland_display.spec
+++ b/docs/specs/WL_bind_wayland_display.spec
@@ -75,6 +75,7 @@ New Tokens
        EGL_TEXTURE_Y_U_V_WL                    0x31D7
        EGL_TEXTURE_Y_UV_WL                     0x31D8
        EGL_TEXTURE_Y_XUXV_WL                   0x31D9
+        EGL_TEXTURE_EXTERNAL_WL                 0x31DA

    Accepted in the <attribute> parameter of eglQueryWaylandBufferWL:

@@ -148,6 +149,10 @@ Additions to the EGL 1.4 Specification:
                Two planes, samples Y from the first plane to r in
                the shader, U and V from the second plane to g and a.

+        EGL_TEXTURE_EXTERNAL_WL
+                Treated as a single plane texture, but sampled with
+                samplerExternalOES according to OES_EGL_image_external
+
    After querying the wl_buffer layout, create EGLImages for the
    planes by calling eglCreateImageKHR with wl_buffer as
    EGLClientBuffer, EGL_WAYLAND_BUFFER_WL as the target, NULL
--- a/docs/submittingpatches.html
+++ b/docs/submittingpatches.html
@@ -0,0 +1,378 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Submitting patches</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Submitting patches</h1>
+
+
+<ul>
+<li><a href="#guidelines">Basic guidelines</a>
+<li><a href="#formatting">Patch formatting</a>
+<li><a href="#testing">Testing Patches</a>
+<li><a href="#mailing">Mailing Patches</a>
+<li><a href="#reviewing">Reviewing Patches</a>
+<li><a href="#nominations">Nominating a commit for a stable branch</a>
+<li><a href="#criteria">Criteria for accepting patches to the stable branch</a>
+<li><a href="#gittips">Git tips</a>
+</ul>
+
+<h2 id="guidelines">Basic guidelines</h2>
+
+<ul>
+<li>Patches should not mix code changes with code formatting changes (except,
+perhaps, in very trivial cases.)
+<li>Code patches should follow Mesa
+<a href="codingstyle.html" target="_parent">coding conventions</a>.
+<li>Whenever possible, patches should only effect individual Mesa/Gallium
+components.
+<li>Patches should never introduce build breaks and should be bisectable (see
+<code>git bisect</code>.)
+<li>Patches should be properly <a href="#formatting">formatted</a>.
+<li>Patches should be sufficiently <a href="#testing">tested</a> before submitting.
+<li>Patches should be submitted to <a href="#mailing">mesa-dev</a>
+for <a href="#reviewing">review</a> using <code>git send-email</code>.
+
+</ul>
+
+<h2 id="formatting">Patch formatting</h2>
+
+<ul>
+<li>Lines should be limited to 75 characters or less so that git logs
+displayed in 80-column terminals avoid line wrapping.  Note that git
+log uses 4 spaces of indentation (4 + 75 &lt; 80).
+<li>The first line should be a short, concise summary of the change prefixed
+with a module name.  Examples:
+<pre>
+    mesa: Add support for querying GL_VERTEX_ATTRIB_ARRAY_LONG
+
+    gallium: add PIPE_CAP_DEVICE_RESET_STATUS_QUERY
+
+    i965: Fix missing type in local variable declaration.
+</pre>
+<li>Subsequent patch comments should describe the change in more detail,
+if needed.  For example:
+<pre>
+    i965: Remove end-of-thread SEND alignment code.
+    
+    This was present in Eric's initial implementation of the compaction code
+    for Sandybridge (commit 077d01b6). There is no documentation saying this
+    is necessary, and removing it causes no regressions in piglit on any
+    platform.
+</pre>
+<li>A "Signed-off-by:" line is not required, but not discouraged either.
+<li>If a patch address a bugzilla issue, that should be noted in the
+patch comment.  For example:
+<pre>
+   Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
+</pre>
+<li>If there have been several revisions to a patch during the review
+process, they should be noted such as in this example:
+<pre>
+    st/mesa: add ARB_texture_stencil8 support (v4)
+    
+    if we support stencil texturing, enable texture_stencil8
+    there is no requirement to support native S8 for this,
+    the texture can be converted to x24s8 fine.
+    
+    v2: fold fixes from Marek in:
+       a) put S8 last in the list
+       b) fix renderable to always test for d/s renderable
+        fixup the texture case to use a stencil only format
+        for picking the format for the texture view.
+    v3: hit fallback for getteximage
+    v4: put s8 back in front, it shouldn't get picked now (Ilia)
+</pre>
+<li>If someone tested your patch, document it with a line like this:
+<pre>
+    Tested-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<li>If the patch was reviewed (usually the case) or acked by someone,
+that should be documented with:
+<pre>
+    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<li>If sending later revision of a patch, add all the tags - ack, r-b,
+Cc: mesa-stable and/or other. This provides reviewers with quick feedback if the
+patch has already been reviewed.
+<li>In order for your patch to reach the prospective reviewer easier/faster,
+use the script scripts/get_reviewer.pl to get a list of individuals and include
+them in the CC list.
+<br>
+Please use common sense and do <strong>not</strong> blindly add everyone.
+<br>
+<pre>
+    $ scripts/get_reviewer.pl --help # to get the the help screen
+    $ scripts/get_reviewer.pl -f src/egl/drivers/dri2/platform_android.c
+    Rob Herring <robh@kernel.org> (reviewer:ANDROID EGL SUPPORT,added_lines:188/700=27%,removed_lines:58/283=20%)
+    Tomasz Figa <tfiga@chromium.org> (reviewer:ANDROID EGL SUPPORT,authored:12/41=29%,added_lines:308/700=44%,removed_lines:115/283=41%)
+    Emil Velikov <emil.l.velikov@gmail.com> (authored:13/41=32%,removed_lines:76/283=27%)
+</pre>
+</ul>
+
+
+
+<h2 id="testing">Testing Patches</h2>
+
+<p>
+It should go without saying that patches must be tested.  In general,
+do whatever testing is prudent.
+</p>
+
+<p>
+You should always run the Mesa test suite before submitting patches.
+The test suite can be run using the 'make check' command. All tests
+must pass before patches will be accepted, this may mean you have
+to update the tests themselves.
+</p>
+
+<p>
+Whenever possible and applicable, test the patch with
+<a href="http://piglit.freedesktop.org">Piglit</a> and/or
+<a href="https://android.googlesource.com/platform/external/deqp/">dEQP</a>
+to check for regressions.
+</p>
+
+
+<h2 id="mailing">Mailing Patches</h2>
+
+<p>
+Patches should be sent to the mesa-dev mailing list for review:
+<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev">
+mesa-dev@lists.freedesktop.org</a>.
+When submitting a patch make sure to use
+<a href="https://git-scm.com/docs/git-send-email">git send-email</a>
+rather than attaching patches to emails. Sending patches as
+attachments prevents people from being able to provide in-line review
+comments.
+</p>
+
+<p>
+When submitting follow-up patches you can use --in-reply-to to make v2, v3,
+etc patches show up as replies to the originals. This usually works well
+when you're sending out updates to individual patches (as opposed to
+re-sending the whole series). Using --in-reply-to makes
+it harder for reviewers to accidentally review old patches.
+</p>
+
+<p>
+When submitting follow-up patches you should also login to
+<a href="https://patchwork.freedesktop.org">patchwork</a> and change the
+state of your old patches to Superseded.
+</p>
+
+<h2 id="reviewing">Reviewing Patches</h2>
+
+<p>
+When you've reviewed a patch on the mailing list, please be unambiguous
+about your review.  That is, state either
+</p>
+<pre>
+    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+or
+<pre>
+    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<p>
+Rather than saying just "LGTM" or "Seems OK".
+</p>
+
+<p>
+If small changes are suggested, it's OK to say something like:
+</p>
+<pre>
+   With the above fixes, Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<p>
+which tells the patch author that the patch can be committed, as long
+as the issues are resolved first.
+</p>
+
+
+<h2 id="nominations">Nominating a commit for a stable branch</h2>
+
+<p>
+There are three ways to nominate patch for inclusion of the stable branch and
+release.
+</p>
+<ul>
+<li> By adding the Cc: mesa-stable@ tag as described below.
+<li> Sending the commit ID (as seen in master branch) to the mesa-stable@ mailing list.
+<li> Forwarding the patch from the mesa-dev@ mailing list.
+</li>
+</ul>
+<p>
+Note: resending patch identical to one on mesa-dev@ or one that differs only
+by the extra mesa-stable@ tag is <strong>not</strong> recommended.
+</p>
+
+
+<h3 id="thetag">The stable tag</h3>
+
+<p>
+If you want a commit to be applied to a stable branch,
+you should add an appropriate note to the commit message.
+</p>
+
+<p>
+Here are some examples of such a note:
+</p>
+<ul>
+  <li>CC: &lt;mesa-stable@lists.freedesktop.org&gt;</li>
+  <li>CC: "9.2 10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
+  <li>CC: "10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
+</ul>
+
+Simply adding the CC to the mesa-stable list address is adequate to nominate
+the commit for the most-recently-created stable branch. It is only necessary
+to specify a specific branch name, (such as "9.2 10.0" or "10.0" in the
+examples above), if you want to nominate the commit for an older stable
+branch. And, as in these examples, you can nominate the commit for the older
+branch in addition to the more recent branch, or nominate the commit
+exclusively for the older branch.
+
+This "CC" syntax for patch nomination will cause patches to automatically be
+copied to the mesa-stable@ mailing list when you use "git send-email" to send
+patches to the mesa-dev@ mailing list. If you prefer using --suppress-cc that
+won't have any effect negative effect on the patch nomination.
+
+<p>
+Note: by removing the tag [as the commit is pushed] the patch is
+<strong>explicitly</strong> rejected from inclusion in the stable branch(es).
+<br>
+Thus, drop the line <strong>only</strong> if you want to cancel the nomination.
+</p>
+
+<h2 id="criteria">Criteria for accepting patches to the stable branch</h2>
+
+Mesa has a designated release manager for each stable branch, and the release
+manager is the only developer that should be pushing changes to these
+branches. Everyone else should simply nominate patches using the mechanism
+described above.
+
+The stable-release manager will work with the list of nominated patches, and
+for each patch that meets the criteria below will cherry-pick the patch with:
+<code>git cherry-pick -x &lt;commit&gt;</code>. The <code>-x</code> option is
+important so that the picked patch references the commit ID of the original
+patch.
+
+The stable-release manager may at times need to force-push changes to the
+stable branches, for example, to drop a previously-picked patch that was later
+identified as causing a regression). These force-pushes may cause changes to
+be lost from the stable branch if developers push things directly. Consider
+yourself warned.
+
+The stable-release manager is also given broad discretion in rejecting patches
+that have been nominated for the stable branch. The most basic rule is that
+the stable branch is for bug fixes only, (no new features, no
+regressions). Here is a non-exhaustive list of some reasons that a patch may
+be rejected:
+
+<ul>
+  <li>Patch introduces a regression. Any reported build breakage or other
+  regression caused by a particular patch, (game no longer work, piglit test
+  changes from PASS to FAIL), is justification for rejecting a patch.</li>
+
+  <li>Patch is too large, (say, larger than 100 lines)</li>
+
+  <li>Patch is not a fix. For example, a commit that moves code around with no
+  functional change should be rejected.</li>
+
+  <li>Patch fix is not clearly described. For example, a commit message
+  of only a single line, no description of the bug, no mention of bugzilla,
+  etc.</li>
+
+  <li>Patch has not obviously been reviewed, For example, the commit message
+  has no Reviewed-by, Signed-off-by, nor Tested-by tags from anyone but the
+  author.</li>
+
+  <li>Patch has not already been merged to the master branch. As a rule, bug
+  fixes should never be applied first to a stable branch. Patches should land
+  first on the master branch and then be cherry-picked to a stable
+  branch. (This is to avoid future releases causing regressions if the patch
+  is not also applied to master.) The only things that might look like
+  exceptions would be backports of patches from master that happen to look
+  significantly different.</li>
+
+  <li>Patch depends on too many other patches. Ideally, all stable-branch
+  patches should be self-contained. It sometimes occurs that a single, logical
+  bug-fix occurs as two separate patches on master, (such as an original
+  patch, then a subsequent fix-up to that patch). In such a case, these two
+  patches should be squashed into a single, self-contained patch for the
+  stable branch. (Of course, if the squashing makes the patch too large, then
+  that could be a reason to reject the patch.)</li>
+
+  <li>Patch includes new feature development, not bug fixes. New OpenGL
+  features, extensions, etc. should be applied to Mesa master and included in
+  the next major release. Stable releases are intended only for bug fixes.
+
+  Note: As an exception to this rule, the stable-release manager may accept
+  hardware-enabling "features". For example, backports of new code to support
+  a newly-developed hardware product can be accepted if they can be reasonably
+  determined to not have effects on other hardware.</li>
+
+  <li>Patch is a performance optimization. As a rule, performance patches are
+  not candidates for the stable branch. The only exception might be a case
+  where an application's performance was recently severely impacted so as to
+  become unusable. The fix for this performance regression could then be
+  considered for a stable branch. The optimization must also be
+  non-controversial and the patches still need to meet the other criteria of
+  being simple and self-contained</li>
+
+  <li>Patch introduces a new failure mode (such as an assert). While the new
+  assert might technically be correct, for example to make Mesa more
+  conformant, this is not the kind of "bug fix" we want in a stable
+  release. The potential problem here is that an OpenGL program that was
+  previously working, (even if technically non-compliant with the
+  specification), could stop working after this patch. So that would be a
+  regression that is unacceptable for the stable branch.</li>
+</ul>
+
+<h2 id="gittips">Git tips</h2>
+
+<ul>
+<li><code>git rebase -i ...</code> is your friend. Don't be afraid to use it.
+<li>Apply a fixup to commit FOO.
+<pre>
+    git add ...
+    git commit --fixup=FOO
+    git rebase -i --autosquash ...
+</pre>
+<li>Test for build breakage between patches e.g last 8 commits.
+<pre>
+    git rebase -i --exec="make -j4" HEAD~8
+</pre>
+<li>Sets the default mailing address for your repo.
+<pre>
+    git config --local sendemail.to mesa-dev@lists.freedesktop.org
+</pre>
+<li> Add version to subject line of patch series in this case for the last 8
+commits before sending.
+<pre>
+    git send-email --subject-prefix="PATCH v4" HEAD~8
+    git send-email -v4 @~8 # shorter version, inherited from git format-patch
+</pre>
+<li> Configure git to use the get_reviewer.pl script interactively. Thus you
+can avoid adding the world to the CC list.
+<pre>
+    git config sendemail.cccmd "./scripts/get_reviewer.pl -i"
+</pre>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/EGL/eglmesaext.h
+++ b/include/EGL/eglmesaext.h
@@ -52,6 +52,7 @@ extern "C" {
 #define EGL_TEXTURE_Y_U_V_WL            0x31D7
 #define EGL_TEXTURE_Y_UV_WL             0x31D8
 #define EGL_TEXTURE_Y_XUXV_WL           0x31D9
+#define EGL_TEXTURE_EXTERNAL_WL         0x31DA

 struct wl_display;
 struct wl_resource;
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -340,12 +340,19 @@ struct __DRI2throttleExtensionRec {
 */

 #define __DRI2_FENCE "DRI2_Fence"
-#define __DRI2_FENCE_VERSION 1
+#define __DRI2_FENCE_VERSION 2

 #define __DRI2_FENCE_TIMEOUT_INFINITE     0xffffffffffffffffllu

 #define __DRI2_FENCE_FLAG_FLUSH_COMMANDS  (1 << 0)

+/**
+ * \name Capabilities that might be returned by __DRI2fenceExtensionRec::get_capabilities
+ */
+/*@{*/
+#define __DRI_FENCE_CAP_NATIVE_FD 1
+/*@}*/
+
 struct __DRI2fenceExtensionRec {
   __DRIextension base;

@@ -390,6 +397,41 @@ struct __DRI2fenceExtensionRec {
    *                sense with this function (right now there are none)
    */
   void (*server_wait_sync)(__DRIcontext *ctx, void *fence, unsigned flags);
+
+   /**
+    * Query for general capabilities of the driver that concern fences.
+    * Returns a bitmask of __DRI_FENCE_CAP_x
+    *
+    * \since 2
+    */
+   unsigned (*get_capabilities)(__DRIscreen *screen);
+
+   /**
+    * Create an fd (file descriptor) associated fence.  If the fence fd
+    * is -1, this behaves similarly to create_fence() except that when
+    * rendering is flushed the driver creates a fence fd.  Otherwise,
+    * the driver wraps an existing fence fd.
+    *
+    * This is used to implement the EGL_ANDROID_native_fence_sync extension.
+    *
+    * \since 2
+    *
+    * \param ctx     the context associated with the fence
+    * \param fd      the fence fd or -1
+    */
+   void *(*create_fence_fd)(__DRIcontext *ctx, int fd);
+
+   /**
+    * For fences created with create_fence_fd(), after rendering is flushed,
+    * this retrieves the native fence fd.  Caller takes ownership of the
+    * fd and will close() it when it is no longer needed.
+    *
+    * \since 2
+    *
+    * \param screen  the screen associated with the fence
+    * \param fence   the fence
+    */
+   int (*get_fence_fd)(__DRIscreen *screen, void *fence);
 };


@@ -1121,6 +1163,9 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FORMAT_XRGB2101010  0x1009
 #define __DRI_IMAGE_FORMAT_ARGB2101010  0x100a
 #define __DRI_IMAGE_FORMAT_SARGB8       0x100b
+#define __DRI_IMAGE_FORMAT_ARGB1555     0x100c
+#define __DRI_IMAGE_FORMAT_R16          0x100d
+#define __DRI_IMAGE_FORMAT_GR1616       0x100e

 #define __DRI_IMAGE_USE_SHARE		0x0001
 #define __DRI_IMAGE_USE_SCANOUT		0x0002
@@ -1148,6 +1193,9 @@ struct __DRIdri2ExtensionRec {

 #define __DRI_IMAGE_FOURCC_R8		0x20203852
 #define __DRI_IMAGE_FOURCC_GR88		0x38385247
+#define __DRI_IMAGE_FOURCC_ARGB1555	0x35315241
+#define __DRI_IMAGE_FOURCC_R16		0x20363152
+#define __DRI_IMAGE_FOURCC_GR1616	0x32335247
 #define __DRI_IMAGE_FOURCC_RGB565	0x36314752
 #define __DRI_IMAGE_FOURCC_ARGB8888	0x34325241
 #define __DRI_IMAGE_FOURCC_XRGB8888	0x34325258
--- a/include/c99_compat.h
+++ b/include/c99_compat.h
@@ -163,6 +163,7 @@ test_c99_compat_h(const void * restrict a,
 #    define HAVE_FUNC_ATTRIBUTE_UNUSED 1
 #    define HAVE_FUNC_ATTRIBUTE_FORMAT 1
 #    define HAVE_FUNC_ATTRIBUTE_PACKED 1
+#    define HAVE_FUNC_ATTRIBUTE_ALIAS 1

 #    if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
       /* https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html */
--- a/include/d3dadapter/present.h
+++ b/include/d3dadapter/present.h
@@ -35,6 +35,22 @@ typedef struct ID3DPresentGroup ID3DPresentGroup;
 typedef struct ID3DAdapter9 ID3DAdapter9;
 typedef struct D3DWindowBuffer D3DWindowBuffer;

+/* Available since version 1.3 */
+typedef struct _D3DPRESENT_PARAMETERS2_ {
+    /* Whether D3DSWAPEFFECT_DISCARD is allowed to release the
+     * D3DWindowBuffers in any order, and eventually with a delay.
+     * FALSE (Default): buffers should be released as soon as possible.
+     * TRUE: it is allowed to release some buffers with a delay, and in
+     * a random order. */
+    BOOL AllowDISCARDDelayedRelease;
+    /* User preference for D3DSWAPEFFECT_DISCARD with D3DPRESENT_INTERVAL_IMMEDIATE.
+     * FALSE (Default): User prefers presentation to occur as soon as possible,
+     * with potential tearings.
+     * TRUE: User prefers presentation to be tear free. Requires
+     * AllowDISCARDDelayedRelease to have any effect. */
+    BOOL TearFreeDISCARD;
+} D3DPRESENT_PARAMETERS2, *PD3DPRESENT_PARAMETERS2, *LPD3DPRESENT_PARAMETERS2;
+
 /* Presentation backend for drivers to display their brilliant work */
 typedef struct ID3DPresentVtbl
 {
@@ -54,7 +70,10 @@ typedef struct ID3DPresentVtbl
    HRESULT (WINAPI *DestroyD3DWindowBuffer)(ID3DPresent *This, D3DWindowBuffer *buffer);
    /* After presenting a buffer to the window system, the buffer
     * may be used as is (no copy of the content) by the window system.
-     * You must not use a non-released buffer, else the user may see undefined content. */
+     * You must not use a non-released buffer, else the user may see undefined content.
+     * Note: This function waits as well that the buffer content was displayed (this
+     * can be after the release of the buffer if the window system decided to make
+     * an internal copy and release early. */
    HRESULT (WINAPI *WaitBufferReleased)(ID3DPresent *This, D3DWindowBuffer *buffer);
    HRESULT (WINAPI *FrontBufferCopy)(ID3DPresent *This, D3DWindowBuffer *buffer);
    /* It is possible to do partial copy, but impossible to do resizing, which must
@@ -75,6 +94,11 @@ typedef struct ID3DPresentVtbl
    BOOL (WINAPI *ResolutionMismatch)(ID3DPresent *This);
    HANDLE (WINAPI *CreateThread)(ID3DPresent *This, void *pThreadfunc, void *pParam);
    BOOL (WINAPI *WaitForThread)(ID3DPresent *This, HANDLE thread);
+    /* Available since version 1.3 */
+    HRESULT (WINAPI *SetPresentParameters2)(ID3DPresent *This, D3DPRESENT_PARAMETERS2 *pParameters);
+    BOOL (WINAPI *IsBufferReleased)(ID3DPresent *This, D3DWindowBuffer *buffer);
+    /* Wait a buffer gets released. */
+    HRESULT (WINAPI *WaitBufferReleaseEvent)(ID3DPresent *This);
 } ID3DPresentVtbl;

 struct ID3DPresent
@@ -106,6 +130,9 @@ struct ID3DPresent
 #define ID3DPresent_ResolutionMismatch(p) (p)->lpVtbl->ResolutionMismatch(p)
 #define ID3DPresent_CreateThread(p,a,b) (p)->lpVtbl->CreateThread(p,a,b)
 #define ID3DPresent_WaitForThread(p,a) (p)->lpVtbl->WaitForThread(p,a)
+#define ID3DPresent_SetPresentParameters2(p,a) (p)->lpVtbl->SetPresentParameters2(p,a)
+#define ID3DPresent_IsBufferReleased(p,a) (p)->lpVtbl->IsBufferReleased(p,a)
+#define ID3DPresent_WaitBufferReleaseEvent(p) (p)->lpVtbl->WaitBufferReleaseEvent(p)

 typedef struct ID3DPresentGroupVtbl
 {
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,6 +109,10 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
+CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
+CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
+CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
@@ -134,6 +138,11 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
 CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
+CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics 505 (Broxton)")
+CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)")
 CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
@@ -144,22 +153,15 @@ CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kabylake GT2)")
 CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kabylake GT2)")
 CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
 CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5927, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
-CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
-CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
-CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x3184, glk,     "Intel(R) HD Graphics (Geminilake)")
+CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -205,3 +205,10 @@ CHIPSET(0x67CF, POLARIS10_, POLARIS10)
 CHIPSET(0x67DF, POLARIS10_, POLARIS10)

 CHIPSET(0x98E4, STONEY_, STONEY)
+
+CHIPSET(0x6980, POLARIS12_, POLARIS12)
+CHIPSET(0x6981, POLARIS12_, POLARIS12)
+CHIPSET(0x6985, POLARIS12_, POLARIS12)
+CHIPSET(0x6986, POLARIS12_, POLARIS12)
+CHIPSET(0x6987, POLARIS12_, POLARIS12)
+CHIPSET(0x699F, POLARIS12_, POLARIS12)
--- a/include/vulkan/vk_icd.h
+++ b/include/vulkan/vk_icd.h
@@ -1,28 +1,56 @@
+//
+// File: vk_icd.h
+//
+/*
+ * Copyright (c) 2015-2016 The Khronos Group Inc.
+ * Copyright (c) 2015-2016 Valve Corporation
+ * Copyright (c) 2015-2016 LunarG, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
 #ifndef VKICD_H
 #define VKICD_H

-#include "vk_platform.h"
+#include "vulkan.h"

+/*
+ * Loader-ICD version negotiation API
+ */
+#define CURRENT_LOADER_ICD_INTERFACE_VERSION 3
+#define MIN_SUPPORTED_LOADER_ICD_INTERFACE_VERSION 0
+typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderICDInterfaceVersion)(uint32_t *pVersion);
 /*
 * The ICD must reserve space for a pointer for the loader's dispatch
 * table, at the start of <each object>.
 * The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro.
 */

-#define ICD_LOADER_MAGIC   0x01CDC0DE
+#define ICD_LOADER_MAGIC 0x01CDC0DE

-typedef union _VK_LOADER_DATA {
-  uintptr_t loaderMagic;
-  void *loaderData;
+typedef union {
+    uintptr_t loaderMagic;
+    void *loaderData;
 } VK_LOADER_DATA;

-static inline void set_loader_magic_value(void* pNewObject) {
-    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline void set_loader_magic_value(void *pNewObject) {
+    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    loader_info->loaderMagic = ICD_LOADER_MAGIC;
 }

-static inline bool valid_loader_magic_value(void* pNewObject) {
-    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline bool valid_loader_magic_value(void *pNewObject) {
+    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC;
 }

@@ -30,56 +58,74 @@ static inline bool valid_loader_magic_value(void* pNewObject) {
 * Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that
 * contains the platform-specific connection and surface information.
 */
-typedef enum _VkIcdWsiPlatform {
+typedef enum {
    VK_ICD_WSI_PLATFORM_MIR,
    VK_ICD_WSI_PLATFORM_WAYLAND,
    VK_ICD_WSI_PLATFORM_WIN32,
    VK_ICD_WSI_PLATFORM_XCB,
    VK_ICD_WSI_PLATFORM_XLIB,
+    VK_ICD_WSI_PLATFORM_DISPLAY
 } VkIcdWsiPlatform;

-typedef struct _VkIcdSurfaceBase {
-    VkIcdWsiPlatform   platform;
+typedef struct {
+    VkIcdWsiPlatform platform;
 } VkIcdSurfaceBase;

 #ifdef VK_USE_PLATFORM_MIR_KHR
-typedef struct _VkIcdSurfaceMir {
-    VkIcdSurfaceBase   base;
-    MirConnection*     connection;
-    MirSurface*        mirSurface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    MirConnection *connection;
+    MirSurface *mirSurface;
 } VkIcdSurfaceMir;
 #endif // VK_USE_PLATFORM_MIR_KHR

 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
-typedef struct _VkIcdSurfaceWayland {
-    VkIcdSurfaceBase   base;
-    struct wl_display* display;
-    struct wl_surface* surface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    struct wl_display *display;
+    struct wl_surface *surface;
 } VkIcdSurfaceWayland;
 #endif // VK_USE_PLATFORM_WAYLAND_KHR

 #ifdef VK_USE_PLATFORM_WIN32_KHR
-typedef struct _VkIcdSurfaceWin32 {
-    VkIcdSurfaceBase   base;
-    HINSTANCE          hinstance;
-    HWND               hwnd;
+typedef struct {
+    VkIcdSurfaceBase base;
+    HINSTANCE hinstance;
+    HWND hwnd;
 } VkIcdSurfaceWin32;
 #endif // VK_USE_PLATFORM_WIN32_KHR

 #ifdef VK_USE_PLATFORM_XCB_KHR
-typedef struct _VkIcdSurfaceXcb {
-    VkIcdSurfaceBase   base;
-    xcb_connection_t*  connection;
-    xcb_window_t       window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    xcb_connection_t *connection;
+    xcb_window_t window;
 } VkIcdSurfaceXcb;
 #endif // VK_USE_PLATFORM_XCB_KHR

 #ifdef VK_USE_PLATFORM_XLIB_KHR
-typedef struct _VkIcdSurfaceXlib {
-    VkIcdSurfaceBase   base;
-    Display*           dpy;
-    Window             window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    Display *dpy;
+    Window window;
 } VkIcdSurfaceXlib;
 #endif // VK_USE_PLATFORM_XLIB_KHR

+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+typedef struct {
+    ANativeWindow* window;
+} VkIcdSurfaceAndroid;
+#endif //VK_USE_PLATFORM_ANDROID_KHR
+
+typedef struct {
+    VkIcdSurfaceBase base;
+    VkDisplayModeKHR displayMode;
+    uint32_t planeIndex;
+    uint32_t planeStackIndex;
+    VkSurfaceTransformFlagBitsKHR transform;
+    float globalAlpha;
+    VkDisplayPlaneAlphaFlagBitsKHR alphaMode;
+    VkExtent2D imageExtent;
+} VkIcdSurfaceDisplay;
+
 #endif // VKICD_H
--- a/include/vulkan/vk_platform.h
+++ b/include/vulkan/vk_platform.h
@@ -2,26 +2,19 @@
 // File: vk_platform.h
 //
 /*
-** Copyright (c) 2014-2015 The Khronos Group Inc.
+** Copyright (c) 2014-2017 The Khronos Group Inc.
 **
-** Permission is hereby granted, free of charge, to any person obtaining a
-** copy of this software and/or associated documentation files (the
-** "Materials"), to deal in the Materials without restriction, including
-** without limitation the rights to use, copy, modify, merge, publish,
-** distribute, sublicense, and/or sell copies of the Materials, and to
-** permit persons to whom the Materials are furnished to do so, subject to
-** the following conditions:
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
 **
-** The above copyright notice and this permission notice shall be included
-** in all copies or substantial portions of the Materials.
+**     http://www.apache.org/licenses/LICENSE-2.0
 **
-** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
 */


@@ -58,13 +51,13 @@ extern "C"
    #define VKAPI_ATTR
    #define VKAPI_CALL __stdcall
    #define VKAPI_PTR  VKAPI_CALL
-#elif defined(__ANDROID__) && defined(__ARM_EABI__) && !defined(__ARM_ARCH_7A__)
-    // Android does not support Vulkan in native code using the "armeabi" ABI.
-    #error "Vulkan requires the 'armeabi-v7a' or 'armeabi-v7a-hard' ABI on 32-bit ARM CPUs"
-#elif defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
-    // On Android/ARMv7a, Vulkan functions use the armeabi-v7a-hard calling
-    // convention, even if the application's native code is compiled with the
-    // armeabi-v7a calling convention.
+#elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH < 7
+    #error "Vulkan isn't supported for the 'armeabi' NDK ABI"
+#elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH >= 7 && defined(__ARM_32BIT_STATE)
+    // On Android 32-bit ARM targets, Vulkan functions use the "hardfloat"
+    // calling convention, i.e. float parameters are passed in registers. This
+    // is true even if the rest of the application passes floats on the stack,
+    // as it does by default when compiling for the armeabi-v7a NDK ABI.
    #define VKAPI_ATTR __attribute__((pcs("aapcs-vfp")))
    #define VKAPI_CALL
    #define VKAPI_PTR  VKAPI_ATTR
--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -281,7 +281,7 @@ def parse_source_list(env, filename, names=None):
                    # cause duplicate actions.
                    f = f[len(cur_srcdir + '/'):]
                # do not include any headers
-                if f.endswith('.h'):
+                if f.endswith(tuple(['.h','.hpp'])):
                    continue
                srcs.append(f)

--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -323,10 +323,6 @@ def generate(env):
                'GLX_DIRECT_RENDERING',
                'GLX_INDIRECT_RENDERING',
            ]
-        if env['platform'] in ('linux', 'freebsd'):
-            cppdefines += ['HAVE_ALIAS']
-        else:
-            cppdefines += ['GLX_ALIAS_UNSUPPORTED']

        if env['platform'] in ('linux', 'darwin'):
            cppdefines += ['HAVE_XLOCALE_H']
@@ -651,7 +647,7 @@ def generate(env):
    env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
    env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
    env.PkgCheckModules('XF86VIDMODE', ['xxf86vm'])
-    env.PkgCheckModules('DRM', ['libdrm >= 2.4.38'])
+    env.PkgCheckModules('DRM', ['libdrm >= 2.4.66'])

    if env['x11']:
        env.Append(CPPPATH = env['X11_CPPPATH'])
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -106,7 +106,24 @@ def generate(env):
        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
-        if llvm_version >= distutils.version.LooseVersion('3.7'):
+        if llvm_version >= distutils.version.LooseVersion('3.9'):
+            env.Prepend(LIBS = [
+                'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
+                'LLVMDebugInfoCodeView', 'LLVMCodeGen',
+                'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMInstrumentation', 'LLVMTransformUtils',
+                'LLVMBitWriter', 'LLVMX86Desc',
+                'LLVMMCDisassembler', 'LLVMX86Info',
+                'LLVMX86AsmPrinter', 'LLVMX86Utils',
+                'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
+                'LLVMAnalysis', 'LLVMProfileData',
+                'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
+                'LLVMBitReader', 'LLVMMC', 'LLVMCore',
+                'LLVMSupport',
+                'LLVMIRReader', 'LLVMASMParser'
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('3.7'):
            env.Prepend(LIBS = [
                'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
@@ -177,11 +194,12 @@ def generate(env):
                # that.
                env.Append(LINKFLAGS = ['/nodefaultlib:LIBCMT'])
    else:
-        if not env.Detect('llvm-config'):
-            print 'scons: llvm-config script not found'
+        llvm_config = os.environ.get('LLVM_CONFIG', 'llvm-config')
+        if not env.Detect(llvm_config):
+            print 'scons: %s script not found' % llvm_config
            return

-        llvm_version = env.backtick('llvm-config --version').rstrip()
+        llvm_version = env.backtick('%s --version' % llvm_config).rstrip()
        llvm_version = distutils.version.LooseVersion(llvm_version)

        if llvm_version < distutils.version.LooseVersion(required_llvm_version):
@@ -191,7 +209,7 @@ def generate(env):
        try:
            # Treat --cppflags specially to prevent NDEBUG from disabling
            # assertion failures in debug builds.
-            cppflags = env.ParseFlags('!llvm-config --cppflags')
+            cppflags = env.ParseFlags('!%s --cppflags' % llvm_config)
            try:
                cppflags['CPPDEFINES'].remove('NDEBUG')
            except ValueError:
@@ -199,16 +217,16 @@ def generate(env):
            env.MergeFlags(cppflags)

            # Match llvm --fno-rtti flag
-            cxxflags = env.backtick('llvm-config --cxxflags').split()
+            cxxflags = env.backtick('%s --cxxflags' % llvm_config).split()
            if '-fno-rtti' in cxxflags:
                env.Append(CXXFLAGS = ['-fno-rtti'])

-            components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 'mcdisassembler']
+            components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 'mcdisassembler', 'irreader']

-            env.ParseConfig('llvm-config --libs ' + ' '.join(components))
-            env.ParseConfig('llvm-config --ldflags')
+            env.ParseConfig('%s --libs ' % llvm_config + ' '.join(components))
+            env.ParseConfig('%s --ldflags' % llvm_config)
            if llvm_version >= distutils.version.LooseVersion('3.5'):
-                env.ParseConfig('llvm-config --system-libs')
+                env.ParseConfig('%s --system-libs' % llvm_config)
                env.Append(CXXFLAGS = ['-std=c++11'])
        except OSError:
            print 'scons: llvm-config version %s failed' % llvm_version
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -44,6 +44,7 @@ git_sha1.h: git_sha1.h.tmp

 BUILT_SOURCES = git_sha1.h
 CLEANFILES = $(BUILT_SOURCES)
+EXTRA_DIST =

 SUBDIRS = . gtest util mapi/glapi/gen mapi

@@ -118,6 +119,7 @@ endif
 if HAVE_VULKAN_COMMON
 SUBDIRS += vulkan/wsi
 endif
+EXTRA_DIST += vulkan/registry/vk.xml

 ## Requires the i965 compiler (part of mesa) and wayland-drm
 if HAVE_INTEL_VULKAN
@@ -126,7 +128,6 @@ endif

 # Requires wayland-drm
 if HAVE_RADEON_VULKAN
-SUBDIRS += amd/common
 SUBDIRS += amd/vulkan
 endif

@@ -134,7 +135,7 @@ if HAVE_GALLIUM
 SUBDIRS += gallium
 endif

-EXTRA_DIST = \
+EXTRA_DIST += \
 	getopt hgl SConscript \
 	$(top_srcdir)/include/GL/mesa_glinterop.h

--- a/src/amd/Android.common.mk
+++ b/src/amd/Android.common.mk
@@ -0,0 +1,63 @@
+# Copyright Â© 2016 Red Hat.
+# Copyright Â© 2016 Mauro Rossi <issor.oruam@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+# ---------------------------------------
+# Build libmesa_amd_common
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libmesa_amd_common
+
+LOCAL_SRC_FILES := \
+	$(AMD_COMPILER_FILES) \
+	$(AMD_DEBUG_FILES)
+
+LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU   # instructs LLVM to declare LLVMInitializeAMDGPU* functions
+
+# generate sources
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-generated-sources-dir)
+LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(AMD_GENERATED_FILES))
+
+$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2)
+$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
+
+$(intermediates)/common/sid_tables.h: $(LOCAL_PATH)/common/sid_tables.py $(MESA_TOP)/src/amd/common/sid.h
+	$(transform-generated-source)
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/include \
+	$(MESA_TOP)/src \
+	$(MESA_TOP)/src/amd/common \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/gallium/auxiliary \
+	$(intermediates)/common \
+	external/llvm/include \
+	external/llvm/device/include \
+	external/libcxx/include \
+	external/elfutils/$(if $(filter 5,$(MESA_ANDROID_MAJOR_VERSION)),0.153/,$(if $(filter 6,$(MESA_ANDROID_MAJOR_VERSION)),src/))libelf
+
+LOCAL_STATIC_LIBRARIES := libLLVMCore
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
--- a/src/amd/Android.mk
+++ b/src/amd/Android.mk
@@ -26,3 +26,4 @@ LOCAL_PATH := $(call my-dir)
 include $(LOCAL_PATH)/Makefile.sources

 include $(LOCAL_PATH)/Android.addrlib.mk
+include $(LOCAL_PATH)/Android.common.mk
--- a/src/amd/Makefile.am
+++ b/src/amd/Makefile.am
@@ -23,5 +23,6 @@ include Makefile.sources

 noinst_LTLIBRARIES =

-EXTRA_DIST = $(COMMON_HEADER_FILES)
+EXTRA_DIST = $(COMMON_HEADER_FILES) common/sid_tables.py
 include Makefile.addrlib.am
+include Makefile.common.am
--- a/src/amd/Makefile.common.am
+++ b/src/amd/Makefile.common.am
@@ -19,15 +19,19 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.

-include Makefile.sources
+if NEED_RADEON_LLVM
+
+COMMON_LIBS = common/libamd_common.la

 # TODO cleanup these
-AM_CPPFLAGS = \
+common_libamd_common_la_CPPFLAGS = \
 	$(VALGRIND_CFLAGS) \
 	$(DEFINES) \
 	-I$(top_srcdir)/include \
 	-I$(top_builddir)/src \
 	-I$(top_srcdir)/src \
+	-I$(top_builddir)/src/amd/common \
+	-I$(top_srcdir)/src/amd/common \
 	-I$(top_builddir)/src/compiler \
 	-I$(top_builddir)/src/compiler/nir \
 	-I$(top_srcdir)/src/compiler \
@@ -37,15 +41,32 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/gallium/include

-AM_CFLAGS = $(VISIBILITY_CFLAGS) \
+common_libamd_common_la_CFLAGS = \
+	$(VISIBILITY_CFLAGS) \
 	$(PTHREAD_CFLAGS) \
 	$(LLVM_CFLAGS) \
 	$(LIBELF_CFLAGS)

-AM_CXXFLAGS = \
+common_libamd_common_la_CXXFLAGS = \
 	$(VISIBILITY_CXXFLAGS) \
 	$(LLVM_CXXFLAGS)

-noinst_LTLIBRARIES = libamd_common.la
+noinst_LTLIBRARIES += $(COMMON_LIBS)

-libamd_common_la_SOURCES = $(AMD_COMPILER_SOURCES)
+common_libamd_common_la_SOURCES = \
+	$(AMD_COMPILER_FILES) \
+	$(AMD_DEBUG_FILES) \
+	$(AMD_GENERATED_FILES)
+
+# nir_to_llvm requires LLVM 3.9, which is only required as a minimum when
+# radv is built.
+if HAVE_RADEON_VULKAN
+common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
+endif
+endif
+
+common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h
+	$(AM_V_at)$(MKDIR_P) $(@D)
+	$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h > $@
+
+BUILT_SOURCES = $(AMD_GENERATED_FILES)
--- a/src/amd/Makefile.sources
+++ b/src/amd/Makefile.sources
@@ -25,3 +25,21 @@ ADDRLIB_FILES = \
 	addrlib/r800/egbaddrlib.h \
 	addrlib/r800/siaddrlib.cpp \
 	addrlib/r800/siaddrlib.h
+
+AMD_COMPILER_FILES = \
+	common/ac_binary.c \
+	common/ac_binary.h \
+	common/ac_llvm_helper.cpp \
+	common/ac_llvm_util.c \
+	common/ac_llvm_util.h
+
+AMD_NIR_FILES = \
+	common/ac_nir_to_llvm.c \
+	common/ac_nir_to_llvm.h
+
+AMD_DEBUG_FILES = \
+	common/ac_debug.c \
+	common/ac_debug.h
+
+AMD_GENERATED_FILES = \
+	common/sid_tables.h
--- a/src/amd/addrlib/addrtypes.h
+++ b/src/amd/addrlib/addrtypes.h
@@ -88,7 +88,11 @@ typedef int            INT;

 #ifndef ADDR_FASTCALL
    #if defined(__GNUC__)
-        #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #if defined(__i386__)
+            #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #else
+            #define ADDR_FASTCALL
+        #endif
    #else
        #define ADDR_FASTCALL __fastcall
    #endif
--- a/src/amd/addrlib/r800/ciaddrlib.cpp
+++ b/src/amd/addrlib/r800/ciaddrlib.cpp
@@ -353,6 +353,7 @@ AddrChipFamily CIAddrLib::HwlConvertChipFamily(
            m_settings.isFiji            = ASICREV_IS_FIJI_P(uChipRevision);
            m_settings.isPolaris10       = ASICREV_IS_POLARIS10_P(uChipRevision);
            m_settings.isPolaris11       = ASICREV_IS_POLARIS11_M(uChipRevision);
+            m_settings.isPolaris12       = ASICREV_IS_POLARIS12_V(uChipRevision);
            break;
        case FAMILY_CZ:
            m_settings.isCarrizo         = 1;
@@ -417,7 +418,7 @@ BOOL_32 CIAddrLib::HwlInitGlobalParams(
    {
        m_pipes = 16;
    }
-    else if (m_settings.isPolaris11)
+    else if (m_settings.isPolaris11 || m_settings.isPolaris12)
    {
        m_pipes = 4;
    }
--- a/src/amd/addrlib/r800/ciaddrlib.h
+++ b/src/amd/addrlib/r800/ciaddrlib.h
@@ -62,6 +62,7 @@ struct CIChipSettings
        UINT_32 isFiji            : 1;
        UINT_32 isPolaris10       : 1;
        UINT_32 isPolaris11       : 1;
+        UINT_32 isPolaris12       : 1;
        // VI fusion (Carrizo)
        UINT_32 isCarrizo         : 1;
    };
--- a/src/gallium/drivers/radeonsi/.gitignore
+++ b/src/gallium/drivers/radeonsi/.gitignore
--- a/src/amd/common/Makefile.sources
+++ b/src/amd/common/Makefile.sources
@@ -1,29 +0,0 @@
-# Copyright © 2016 Bas Nieuwenhuizen
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-AMD_COMPILER_SOURCES := \
-	ac_binary.c \
-	ac_binary.h \
-	ac_llvm_helper.cpp \
-	ac_llvm_util.c \
-	ac_llvm_util.h \
-	ac_nir_to_llvm.c \
-	ac_nir_to_llvm.h
--- a/src/amd/common/ac_debug.c
+++ b/src/amd/common/ac_debug.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#include "ac_debug.h"
+
+#include "sid.h"
+#include "sid_tables.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+/* Parsed IBs are difficult to read without colors. Use "less -R file" to
+ * read them, or use "aha -b -f file" to convert them to html.
+ */
+#define COLOR_RESET	"\033[0m"
+#define COLOR_RED	"\033[31m"
+#define COLOR_GREEN	"\033[1;32m"
+#define COLOR_YELLOW	"\033[1;33m"
+#define COLOR_CYAN	"\033[1;36m"
+
+#define INDENT_PKT 8
+
+static void print_spaces(FILE *f, unsigned num)
+{
+	fprintf(f, "%*s", num, "");
+}
+
+static void print_value(FILE *file, uint32_t value, int bits)
+{
+	/* Guess if it's int or float */
+	if (value <= (1 << 15)) {
+		if (value <= 9)
+			fprintf(file, "%u\n", value);
+		else
+			fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value);
+	} else {
+		float f = uif(value);
+
+		if (fabs(f) < 100000 && f*10 == floor(f*10))
+			fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value);
+		else
+			/* Don't print more leading zeros than there are bits. */
+			fprintf(file, "0x%0*x\n", bits / 4, value);
+	}
+}
+
+static void print_named_value(FILE *file, const char *name, uint32_t value,
+			      int bits)
+{
+	print_spaces(file, INDENT_PKT);
+	fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name);
+	print_value(file, value, bits);
+}
+
+void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
+		 uint32_t field_mask)
+{
+	int r, f;
+
+	for (r = 0; r < ARRAY_SIZE(sid_reg_table); r++) {
+		const struct si_reg *reg = &sid_reg_table[r];
+		const char *reg_name = sid_strings + reg->name_offset;
+
+		if (reg->offset == offset) {
+			bool first_field = true;
+
+			print_spaces(file, INDENT_PKT);
+			fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ",
+				reg_name);
+
+			if (!reg->num_fields) {
+				print_value(file, value, 32);
+				return;
+			}
+
+			for (f = 0; f < reg->num_fields; f++) {
+				const struct si_field *field = sid_fields_table + reg->fields_offset + f;
+				const int *values_offsets = sid_strings_offsets + field->values_offset;
+				uint32_t val = (value & field->mask) >>
+					       (ffs(field->mask) - 1);
+
+				if (!(field->mask & field_mask))
+					continue;
+
+				/* Indent the field. */
+				if (!first_field)
+					print_spaces(file,
+						     INDENT_PKT + strlen(reg_name) + 4);
+
+				/* Print the field. */
+				fprintf(file, "%s = ", sid_strings + field->name_offset);
+
+				if (val < field->num_values && values_offsets[val] >= 0)
+					fprintf(file, "%s\n", sid_strings + values_offsets[val]);
+				else
+					print_value(file, val,
+						    util_bitcount(field->mask));
+
+				first_field = false;
+			}
+			return;
+		}
+	}
+
+	print_spaces(file, INDENT_PKT);
+	fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value);
+}
+
+static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
+				    unsigned reg_offset)
+{
+	unsigned reg = (ib[1] << 2) + reg_offset;
+	int i;
+
+	for (i = 0; i < count; i++)
+		ac_dump_reg(f, reg + i*4, ib[2+i], ~0);
+}
+
+static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
+				  int trace_id, enum chip_class chip_class,
+				  ac_debug_addr_callback addr_callback,
+				  void *addr_callback_data)
+{
+	unsigned count = PKT_COUNT_G(ib[0]);
+	unsigned op = PKT3_IT_OPCODE_G(ib[0]);
+	const char *predicate = PKT3_PREDICATE(ib[0]) ? "(predicate)" : "";
+	int i;
+
+	/* Print the name first. */
+	for (i = 0; i < ARRAY_SIZE(packet3_table); i++)
+		if (packet3_table[i].op == op)
+			break;
+
+	if (i < ARRAY_SIZE(packet3_table)) {
+		const char *name = sid_strings + packet3_table[i].name_offset;
+
+		if (op == PKT3_SET_CONTEXT_REG ||
+		    op == PKT3_SET_CONFIG_REG ||
+		    op == PKT3_SET_UCONFIG_REG ||
+		    op == PKT3_SET_SH_REG)
+			fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n",
+				name, predicate);
+		else
+			fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n",
+				name, predicate);
+	} else
+		fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n",
+			op, predicate);
+
+	/* Print the contents. */
+	switch (op) {
+	case PKT3_SET_CONTEXT_REG:
+		ac_parse_set_reg_packet(f, ib, count, SI_CONTEXT_REG_OFFSET);
+		break;
+	case PKT3_SET_CONFIG_REG:
+		ac_parse_set_reg_packet(f, ib, count, SI_CONFIG_REG_OFFSET);
+		break;
+	case PKT3_SET_UCONFIG_REG:
+		ac_parse_set_reg_packet(f, ib, count, CIK_UCONFIG_REG_OFFSET);
+		break;
+	case PKT3_SET_SH_REG:
+		ac_parse_set_reg_packet(f, ib, count, SI_SH_REG_OFFSET);
+		break;
+	case PKT3_ACQUIRE_MEM:
+		ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0);
+		ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0);
+		ac_dump_reg(f, R_030230_CP_COHER_SIZE_HI, ib[3], ~0);
+		ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[4], ~0);
+		ac_dump_reg(f, R_0301E4_CP_COHER_BASE_HI, ib[5], ~0);
+		print_named_value(f, "POLL_INTERVAL", ib[6], 16);
+		break;
+	case PKT3_SURFACE_SYNC:
+		if (chip_class >= CIK) {
+			ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0);
+			ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0);
+			ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[3], ~0);
+		} else {
+			ac_dump_reg(f, R_0085F0_CP_COHER_CNTL, ib[1], ~0);
+			ac_dump_reg(f, R_0085F4_CP_COHER_SIZE, ib[2], ~0);
+			ac_dump_reg(f, R_0085F8_CP_COHER_BASE, ib[3], ~0);
+		}
+		print_named_value(f, "POLL_INTERVAL", ib[4], 16);
+		break;
+	case PKT3_EVENT_WRITE:
+		ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
+			    S_028A90_EVENT_TYPE(~0));
+		print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
+		print_named_value(f, "INV_L2", (ib[1] >> 20) & 0x1, 1);
+		if (count > 0) {
+			print_named_value(f, "ADDRESS_LO", ib[2], 32);
+			print_named_value(f, "ADDRESS_HI", ib[3], 16);
+		}
+		break;
+	case PKT3_DRAW_INDEX_AUTO:
+		ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[1], ~0);
+		ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[2], ~0);
+		break;
+	case PKT3_DRAW_INDEX_2:
+		ac_dump_reg(f, R_028A78_VGT_DMA_MAX_SIZE, ib[1], ~0);
+		ac_dump_reg(f, R_0287E8_VGT_DMA_BASE, ib[2], ~0);
+		ac_dump_reg(f, R_0287E4_VGT_DMA_BASE_HI, ib[3], ~0);
+		ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[4], ~0);
+		ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[5], ~0);
+		break;
+	case PKT3_INDEX_TYPE:
+		ac_dump_reg(f, R_028A7C_VGT_DMA_INDEX_TYPE, ib[1], ~0);
+		break;
+	case PKT3_NUM_INSTANCES:
+		ac_dump_reg(f, R_030934_VGT_NUM_INSTANCES, ib[1], ~0);
+		break;
+	case PKT3_WRITE_DATA:
+		ac_dump_reg(f, R_370_CONTROL, ib[1], ~0);
+		ac_dump_reg(f, R_371_DST_ADDR_LO, ib[2], ~0);
+		ac_dump_reg(f, R_372_DST_ADDR_HI, ib[3], ~0);
+		for (i = 2; i < count; i++) {
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, "0x%08x\n", ib[2+i]);
+		}
+		break;
+	case PKT3_CP_DMA:
+		ac_dump_reg(f, R_410_CP_DMA_WORD0, ib[1], ~0);
+		ac_dump_reg(f, R_411_CP_DMA_WORD1, ib[2], ~0);
+		ac_dump_reg(f, R_412_CP_DMA_WORD2, ib[3], ~0);
+		ac_dump_reg(f, R_413_CP_DMA_WORD3, ib[4], ~0);
+		ac_dump_reg(f, R_414_COMMAND, ib[5], ~0);
+		break;
+	case PKT3_DMA_DATA:
+		ac_dump_reg(f, R_500_DMA_DATA_WORD0, ib[1], ~0);
+		ac_dump_reg(f, R_501_SRC_ADDR_LO, ib[2], ~0);
+		ac_dump_reg(f, R_502_SRC_ADDR_HI, ib[3], ~0);
+		ac_dump_reg(f, R_503_DST_ADDR_LO, ib[4], ~0);
+		ac_dump_reg(f, R_504_DST_ADDR_HI, ib[5], ~0);
+		ac_dump_reg(f, R_414_COMMAND, ib[6], ~0);
+		break;
+	case PKT3_INDIRECT_BUFFER_SI:
+	case PKT3_INDIRECT_BUFFER_CONST:
+	case PKT3_INDIRECT_BUFFER_CIK:
+		ac_dump_reg(f, R_3F0_IB_BASE_LO, ib[1], ~0);
+		ac_dump_reg(f, R_3F1_IB_BASE_HI, ib[2], ~0);
+		ac_dump_reg(f, R_3F2_CONTROL, ib[3], ~0);
+
+		if (addr_callback) {
+			uint64_t addr = ((uint64_t)ib[2] << 32) | ib[1];
+			void *data = addr_callback(addr_callback_data, addr);
+			const char *name = G_3F2_CHAIN(ib[3]) ? "chained" : "nested";
+
+			if (data)
+				ac_parse_ib(f, data,  G_3F2_IB_SIZE(ib[3]),
+					    trace_id, name, chip_class,
+					    addr_callback, addr_callback_data);
+		}
+		break;
+	case PKT3_CLEAR_STATE:
+	case PKT3_INCREMENT_DE_COUNTER:
+	case PKT3_PFP_SYNC_ME:
+		break;
+	case PKT3_NOP:
+		if (ib[0] == 0xffff1000) {
+			count = -1; /* One dword NOP. */
+			break;
+		} else if (count == 0 && AC_IS_TRACE_POINT(ib[1])) {
+			unsigned packet_id = AC_GET_TRACE_POINT_ID(ib[1]);
+
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
+
+			if (trace_id == -1)
+				break; /* tracing was disabled */
+
+			print_spaces(f, INDENT_PKT);
+			if (packet_id < trace_id)
+				fprintf(f, COLOR_RED
+					"This trace point was reached by the CP."
+					COLOR_RESET "\n");
+			else if (packet_id == trace_id)
+				fprintf(f, COLOR_RED
+					"!!!!! This is the last trace point that "
+					"was reached by the CP !!!!!"
+					COLOR_RESET "\n");
+			else if (packet_id+1 == trace_id)
+				fprintf(f, COLOR_RED
+					"!!!!! This is the first trace point that "
+					"was NOT been reached by the CP !!!!!"
+					COLOR_RESET "\n");
+			else
+				fprintf(f, COLOR_RED
+					"!!!!! This trace point was NOT reached "
+					"by the CP !!!!!"
+					COLOR_RESET "\n");
+			break;
+		}
+		/* fall through, print all dwords */
+	default:
+		for (i = 0; i < count+1; i++) {
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, "0x%08x\n", ib[1+i]);
+		}
+	}
+
+	ib += count + 2;
+	*num_dw -= count + 2;
+	return ib;
+}
+
+/**
+ * Parse and print an IB into a file.
+ *
+ * \param f		file
+ * \param ib		IB
+ * \param num_dw	size of the IB
+ * \param chip_class	chip class
+ * \param trace_id	the last trace ID that is known to have been reached
+ *			and executed by the CP, typically read from a buffer
+ * \param addr_callback Get a mapped pointer of the IB at a given address. Can
+ *                      be NULL.
+ * \param addr_callback_data user data for addr_callback
+ */
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
+		 const char *name, enum chip_class chip_class,
+		 ac_debug_addr_callback addr_callback, void *addr_callback_data)
+{
+	fprintf(f, "------------------ %s begin ------------------\n", name);
+
+	while (num_dw > 0) {
+		unsigned type = PKT_TYPE_G(ib[0]);
+
+		switch (type) {
+		case 3:
+			ib = ac_parse_packet3(f, ib, &num_dw, trace_id,
+					      chip_class, addr_callback,
+					      addr_callback_data);
+			break;
+		case 2:
+			/* type-2 nop */
+			if (ib[0] == 0x80000000) {
+				fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n");
+				ib++;
+				num_dw--;
+				break;
+			}
+			/* fall through */
+		default:
+			fprintf(f, "Unknown packet type %i\n", type);
+			return;
+		}
+	}
+
+	fprintf(f, "------------------- %s end -------------------\n", name);
+	if (num_dw < 0) {
+		printf("Packet ends after the end of IB.\n");
+		exit(0);
+	}
+	fprintf(f, "\n");
+}
--- a/src/amd/common/ac_debug.h
+++ b/src/amd/common/ac_debug.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+#ifndef AC_DEBUG_H
+#define AC_DEBUG_H
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "amd_family.h"
+
+#define AC_ENCODE_TRACE_POINT(id)       (0xcafe0000 | ((id) & 0xffff))
+#define AC_IS_TRACE_POINT(x)            (((x) & 0xcafe0000) == 0xcafe0000)
+#define AC_GET_TRACE_POINT_ID(x)        ((x) & 0xffff)
+
+typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
+
+void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
+		 uint32_t field_mask);
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
+		 const char *name, enum chip_class chip_class,
+		 ac_debug_addr_callback addr_callback, void *addr_callback_data);
+
+#endif
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -31,16 +31,25 @@
 #  undef DEBUG
 #endif

-#include "ac_nir_to_llvm.h"
+#include "ac_llvm_util.h"
 #include <llvm-c/Core.h>
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
+#include <llvm/IR/Attributes.h>

-extern "C" void
-ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
+void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
   llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
   llvm::AttrBuilder B;
   B.addDereferenceableAttr(bytes);
   A->addAttr(llvm::AttributeSet::get(A->getContext(), A->getArgNo() + 1,  B));
 }
+
+bool ac_is_sgpr_param(LLVMValueRef arg)
+{
+	llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
+	llvm::AttributeSet AS = A->getParent()->getAttributes();
+	unsigned ArgNo = A->getArgNo();
+	return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
+	       AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
+}
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -32,6 +32,9 @@
 #include <assert.h>
 #include <stdio.h>

+#include "util/bitscan.h"
+#include "util/macros.h"
+
 static void ac_init_llvm_target()
 {
 #if HAVE_LLVM < 0x0307
@@ -140,3 +143,364 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)

 	return tm;
 }
+
+/* Initialize module-independent parts of the context.
+ *
+ * The caller is responsible for initializing ctx::module and ctx::builder.
+ */
+void
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
+{
+	LLVMValueRef args[1];
+
+	ctx->context = context;
+	ctx->module = NULL;
+	ctx->builder = NULL;
+
+	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
+	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+
+	ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
+
+	args[0] = LLVMConstReal(ctx->f32, 2.5);
+	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
+}
+
+#if HAVE_LLVM < 0x0400
+static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
+{
+   switch (attr) {
+   case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
+   case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
+   case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
+   case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
+   case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
+   case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
+   case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
+   default:
+	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
+	   return 0;
+   }
+}
+
+#else
+
+static const char *attr_to_str(enum ac_func_attr attr)
+{
+   switch (attr) {
+   case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+   case AC_FUNC_ATTR_BYVAL: return "byval";
+   case AC_FUNC_ATTR_INREG: return "inreg";
+   case AC_FUNC_ATTR_NOALIAS: return "noalias";
+   case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
+   case AC_FUNC_ATTR_READNONE: return "readnone";
+   case AC_FUNC_ATTR_READONLY: return "readonly";
+   default:
+	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
+	   return 0;
+   }
+}
+
+#endif
+
+void
+ac_add_function_attr(LLVMValueRef function,
+                     int attr_idx,
+                     enum ac_func_attr attr)
+{
+
+#if HAVE_LLVM < 0x0400
+   LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
+   if (attr_idx == -1) {
+      LLVMAddFunctionAttr(function, llvm_attr);
+   } else {
+      LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
+   }
+#else
+   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
+   const char *attr_name = attr_to_str(attr);
+   unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
+                                                      strlen(attr_name));
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
+   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+#endif
+}
+
+LLVMValueRef
+ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
+		       LLVMTypeRef return_type, LLVMValueRef *params,
+		       unsigned param_count, unsigned attrib_mask)
+{
+	LLVMValueRef function;
+
+	function = LLVMGetNamedFunction(ctx->module, name);
+	if (!function) {
+		LLVMTypeRef param_types[32], function_type;
+		unsigned i;
+
+		assert(param_count <= 32);
+
+		for (i = 0; i < param_count; ++i) {
+			assert(params[i]);
+			param_types[i] = LLVMTypeOf(params[i]);
+		}
+		function_type =
+		    LLVMFunctionType(return_type, param_types, param_count, 0);
+		function = LLVMAddFunction(ctx->module, name, function_type);
+
+		LLVMSetFunctionCallConv(function, LLVMCCallConv);
+		LLVMSetLinkage(function, LLVMExternalLinkage);
+
+		attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+		while (attrib_mask) {
+			enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+			ac_add_function_attr(function, -1, attr);
+		}
+	}
+	return LLVMBuildCall(ctx->builder, function, params, param_count, "");
+}
+
+LLVMValueRef
+ac_build_gather_values_extended(struct ac_llvm_context *ctx,
+				LLVMValueRef *values,
+				unsigned value_count,
+				unsigned value_stride,
+				bool load)
+{
+	LLVMBuilderRef builder = ctx->builder;
+	LLVMValueRef vec;
+	unsigned i;
+
+
+	if (value_count == 1) {
+		if (load)
+			return LLVMBuildLoad(builder, values[0], "");
+		return values[0];
+	} else if (!value_count)
+		unreachable("value_count is 0");
+
+	for (i = 0; i < value_count; i++) {
+		LLVMValueRef value = values[i * value_stride];
+		if (load)
+			value = LLVMBuildLoad(builder, value, "");
+
+		if (!i)
+			vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
+		LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+		vec = LLVMBuildInsertElement(builder, vec, value, index, "");
+	}
+	return vec;
+}
+
+LLVMValueRef
+ac_build_gather_values(struct ac_llvm_context *ctx,
+		       LLVMValueRef *values,
+		       unsigned value_count)
+{
+	return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
+}
+
+LLVMValueRef
+ac_emit_fdiv(struct ac_llvm_context *ctx,
+	     LLVMValueRef num,
+	     LLVMValueRef den)
+{
+	LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
+
+	if (!LLVMIsConstant(ret))
+		LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
+	return ret;
+}
+
+/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
+ * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
+ * already multiplied by two. id is the cube face number.
+ */
+struct cube_selection_coords {
+	LLVMValueRef stc[2];
+	LLVMValueRef ma;
+	LLVMValueRef id;
+};
+
+static void
+build_cube_intrinsic(struct ac_llvm_context *ctx,
+		     LLVMValueRef in[3],
+		     struct cube_selection_coords *out)
+{
+	LLVMBuilderRef builder = ctx->builder;
+
+	if (HAVE_LLVM >= 0x0309) {
+		LLVMTypeRef f32 = ctx->f32;
+
+		out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+		out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+		out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+		out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+	} else {
+		LLVMValueRef c[4] = {
+			in[0],
+			in[1],
+			in[2],
+			LLVMGetUndef(LLVMTypeOf(in[0]))
+		};
+		LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
+
+		LLVMValueRef tmp =
+			ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube",
+					  LLVMTypeOf(vec), &vec, 1,
+					  AC_FUNC_ATTR_READNONE);
+
+		out->stc[1] = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 0, 0), "");
+		out->stc[0] = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 1, 0), "");
+		out->ma = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 2, 0), "");
+		out->id = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 3, 0), "");
+	}
+}
+
+/**
+ * Build a manual selection sequence for cube face sc/tc coordinates and
+ * major axis vector (multiplied by 2 for consistency) for the given
+ * vec3 \p coords, for the face implied by \p selcoords.
+ *
+ * For the major axis, we always adjust the sign to be in the direction of
+ * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
+ * the selcoords major axis.
+ */
+static void build_cube_select(LLVMBuilderRef builder,
+			      const struct cube_selection_coords *selcoords,
+			      const LLVMValueRef *coords,
+			      LLVMValueRef *out_st,
+			      LLVMValueRef *out_ma)
+{
+	LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
+	LLVMValueRef is_ma_positive;
+	LLVMValueRef sgn_ma;
+	LLVMValueRef is_ma_z, is_not_ma_z;
+	LLVMValueRef is_ma_y;
+	LLVMValueRef is_ma_x;
+	LLVMValueRef sgn;
+	LLVMValueRef tmp;
+
+	is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
+		selcoords->ma, LLVMConstReal(f32, 0.0), "");
+	sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
+		LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
+
+	is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
+	is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
+	is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
+		LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
+	is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
+
+	/* Select sc */
+	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
+		LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+			LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
+	out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+	/* Select tc */
+	tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
+	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+		LLVMConstReal(f32, -1.0), "");
+	out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+	/* Select ma */
+	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
+		LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
+	sgn = LLVMBuildSelect(builder, is_ma_positive,
+		LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
+	*out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+}
+
+void
+ac_prepare_cube_coords(struct ac_llvm_context *ctx,
+		       bool is_deriv, bool is_array,
+		       LLVMValueRef *coords_arg,
+		       LLVMValueRef *derivs_arg)
+{
+
+	LLVMBuilderRef builder = ctx->builder;
+	struct cube_selection_coords selcoords;
+	LLVMValueRef coords[3];
+	LLVMValueRef invma;
+
+	build_cube_intrinsic(ctx, coords_arg, &selcoords);
+
+	invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32",
+			ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
+	invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
+
+	for (int i = 0; i < 2; ++i)
+		coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
+
+	coords[2] = selcoords.id;
+
+	if (is_deriv && derivs_arg) {
+		LLVMValueRef derivs[4];
+		int axis;
+
+		/* Convert cube derivatives to 2D derivatives. */
+		for (axis = 0; axis < 2; axis++) {
+			LLVMValueRef deriv_st[2];
+			LLVMValueRef deriv_ma;
+
+			/* Transform the derivative alongside the texture
+			 * coordinate. Mathematically, the correct formula is
+			 * as follows. Assume we're projecting onto the +Z face
+			 * and denote by dx/dh the derivative of the (original)
+			 * X texture coordinate with respect to horizontal
+			 * window coordinates. The projection onto the +Z face
+			 * plane is:
+			 *
+			 *   f(x,z) = x/z
+			 *
+			 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
+			 *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
+			 *
+			 * This motivatives the implementation below.
+			 *
+			 * Whether this actually gives the expected results for
+			 * apps that might feed in derivatives obtained via
+			 * finite differences is anyone's guess. The OpenGL spec
+			 * seems awfully quiet about how textureGrad for cube
+			 * maps should be handled.
+			 */
+			build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+					  deriv_st, &deriv_ma);
+
+			deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
+
+			for (int i = 0; i < 2; ++i)
+				derivs[axis * 2 + i] =
+					LLVMBuildFSub(builder,
+						LLVMBuildFMul(builder, deriv_st[i], invma, ""),
+						LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
+		}
+
+		memcpy(derivs_arg, derivs, sizeof(derivs));
+	}
+
+	/* Shift the texture coordinate. This must be applied after the
+	 * derivative calculation.
+	 */
+	for (int i = 0; i < 2; ++i)
+		coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
+
+	if (is_array) {
+		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
+		/* coords_arg.w component - array_index for cube arrays */
+		LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
+		coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
+	}
+
+	memcpy(coords_arg, coords, sizeof(coords));
+}
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -24,8 +24,77 @@
 */
 #pragma once

+#include <stdbool.h>
 #include <llvm-c/TargetMachine.h>

 #include "amd_family.h"

+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum ac_func_attr {
+	AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
+	AC_FUNC_ATTR_BYVAL        = (1 << 1),
+	AC_FUNC_ATTR_INREG        = (1 << 2),
+	AC_FUNC_ATTR_NOALIAS      = (1 << 3),
+	AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
+	AC_FUNC_ATTR_READNONE     = (1 << 5),
+	AC_FUNC_ATTR_READONLY     = (1 << 6),
+	AC_FUNC_ATTR_LAST         = (1 << 7)
+};
+
+struct ac_llvm_context {
+	LLVMContextRef context;
+	LLVMModuleRef module;
+	LLVMBuilderRef builder;
+
+	LLVMTypeRef i32;
+	LLVMTypeRef f32;
+
+	unsigned fpmath_md_kind;
+	LLVMValueRef fpmath_md_2p5_ulp;
+};
+
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family);
+
+void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
+bool ac_is_sgpr_param(LLVMValueRef param);
+
+void
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context);
+
+void
+ac_add_function_attr(LLVMValueRef function,
+                     int attr_idx,
+                     enum ac_func_attr attr);
+LLVMValueRef
+ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
+		       LLVMTypeRef return_type, LLVMValueRef *params,
+		       unsigned param_count, unsigned attrib_mask);
+
+LLVMValueRef
+ac_build_gather_values_extended(struct ac_llvm_context *ctx,
+				LLVMValueRef *values,
+				unsigned value_count,
+				unsigned value_stride,
+				bool load);
+LLVMValueRef
+ac_build_gather_values(struct ac_llvm_context *ctx,
+		       LLVMValueRef *values,
+		       unsigned value_count);
+
+LLVMValueRef
+ac_emit_fdiv(struct ac_llvm_context *ctx,
+	     LLVMValueRef num,
+	     LLVMValueRef den);
+
+void
+ac_prepare_cube_coords(struct ac_llvm_context *ctx,
+		       bool is_deriv, bool is_array,
+		       LLVMValueRef *coords_arg,
+		       LLVMValueRef *derivs_arg);
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -56,7 +56,35 @@ struct ac_nir_compiler_options {
 	enum chip_class chip_class;
 };

+struct ac_userdata_info {
+	int8_t sgpr_idx;
+	uint8_t num_sgprs;
+	bool indirect;
+	uint32_t indirect_offset;
+};
+
+enum ac_ud_index {
+	AC_UD_PUSH_CONSTANTS = 0,
+	AC_UD_SHADER_START = 1,
+	AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
+	AC_UD_VS_BASE_VERTEX_START_INSTANCE,
+	AC_UD_VS_MAX_UD,
+	AC_UD_PS_SAMPLE_POS = AC_UD_SHADER_START,
+	AC_UD_PS_MAX_UD,
+	AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
+	AC_UD_CS_MAX_UD,
+	AC_UD_MAX_UD = AC_UD_VS_MAX_UD,
+};
+
+#define AC_UD_MAX_SETS 4
+
+struct ac_userdata_locations {
+	struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS];
+	struct ac_userdata_info shader_data[AC_UD_MAX_UD];
+};
+
 struct ac_shader_variant_info {
+	struct ac_userdata_locations user_sgprs_locs;
 	unsigned num_user_sgprs;
 	unsigned num_input_sgprs;
 	unsigned num_input_vgprs;
@@ -67,6 +95,8 @@ struct ac_shader_variant_info {
 			unsigned vgpr_comp_cnt;
 			uint32_t export_mask;
 			bool writes_pointsize;
+			bool writes_layer;
+			bool writes_viewport_index;
 			uint8_t clip_dist_mask;
 			uint8_t cull_dist_mask;
 		} vs;
@@ -81,6 +111,7 @@ struct ac_shader_variant_info {
 			bool writes_stencil;
 			bool early_fragment_test;
 			bool writes_memory;
+			bool force_persample;
 		} fs;
 		struct {
 			unsigned block_size[3];
@@ -96,24 +127,4 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
                           const struct ac_nir_compiler_options *options,
 			   bool dump_shader);

-/* SHADER ABI defines */

-/* offset in dwords */
-#define AC_USERDATA_DESCRIPTOR_SET_0 0
-#define AC_USERDATA_DESCRIPTOR_SET_1 2
-#define AC_USERDATA_DESCRIPTOR_SET_2 4
-#define AC_USERDATA_DESCRIPTOR_SET_3 6
-#define AC_USERDATA_PUSH_CONST_DYN 8
-
-#define AC_USERDATA_VS_VERTEX_BUFFERS 10
-#define AC_USERDATA_VS_BASE_VERTEX 12
-#define AC_USERDATA_VS_START_INSTANCE 13
-
-#define AC_USERDATA_PS_SAMPLE_POS 10
-
-#define AC_USERDATA_CS_GRID_SIZE 10
-
-#ifdef __cplusplus
-extern "C"
-#endif
-void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -91,6 +91,7 @@ enum radeon_family {
    CHIP_STONEY,
    CHIP_POLARIS10,
    CHIP_POLARIS11,
+    CHIP_POLARIS12,
    CHIP_LAST,
 };

--- a/src/amd/common/amdgpu_id.h
+++ b/src/amd/common/amdgpu_id.h
@@ -142,6 +142,8 @@ enum {

 	VI_POLARIS11_M_A0 = 90,

+	VI_POLARIS12_V_A0 = 100,
+
 	VI_UNKNOWN        = 0xFF
 };

@@ -156,6 +158,8 @@ enum {
 	((eChipRev >= VI_POLARIS10_P_A0) && (eChipRev < VI_POLARIS11_M_A0))
 #define ASICREV_IS_POLARIS11_M(eChipRev)   \
 	(eChipRev >= VI_POLARIS11_M_A0)
+#define ASICREV_IS_POLARIS12_V(eChipRev)\
+	(eChipRev >= VI_POLARIS12_V_A0)

 /* CZ specific rev IDs */
 enum {
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -133,7 +133,9 @@
 #define   R_3F1_IB_BASE_HI                     0x3F1
 #define   R_3F2_CONTROL                        0x3F2
 #define     S_3F2_IB_SIZE(x)                   (((unsigned)(x) & 0xfffff) << 0)
+#define     G_3F2_IB_SIZE(x)                   (((unsigned)(x) >> 0) & 0xfffff)
 #define     S_3F2_CHAIN(x)                     (((unsigned)(x) & 0x1) << 20)
+#define     G_3F2_CHAIN(x)                     (((unsigned)(x) >> 20) & 0x1)
 #define     S_3F2_VALID(x)                     (((unsigned)(x) & 0x1) << 23)

 #define PKT3_COPY_DATA			       0x40
@@ -151,7 +153,12 @@
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47
-#define PKT3_EVENT_WRITE_EOS                   0x48
+/* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
+ * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
+ * DST_SEL=MC. Only CIK chips are affected.
+ */
+/*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* fix CP DMA before uncommenting */
+#define PKT3_RELEASE_MEM                       0x49
 #define PKT3_ONE_REG_WRITE                     0x57 /* not on CIK */
 #define PKT3_ACQUIRE_MEM                       0x58 /* new for CIK */
 #define PKT3_SET_CONFIG_REG                    0x68
@@ -7761,7 +7768,7 @@
 #define     V_028A90_FLUSH_HS_OUTPUT                                0x11
 #define     V_028A90_FLUSH_LS_OUTPUT                                0x12
 #define     V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT                   0x14
-#define     V_028A90_ZPASS_DONE                                     0x15 /* not on CIK */
+#define     V_028A90_ZPASS_DONE                                     0x15
 #define     V_028A90_CACHE_FLUSH_AND_INV_EVENT                      0x16
 #define     V_028A90_PERFCOUNTER_START                              0x17
 #define     V_028A90_PERFCOUNTER_STOP                               0x18
@@ -7795,7 +7802,7 @@
 /* CIK */
 #define     V_028A90_PIXEL_PIPE_STAT_CONTROL                        0x38
 #define     V_028A90_PIXEL_PIPE_STAT_DUMP                           0x39
-#define     V_028A90_PIXEL_PIPE_STAT_RESET                          0x40
+#define     V_028A90_PIXEL_PIPE_STAT_RESET                          0x3A
 /*     */
 #define   S_028A90_ADDRESS_HI(x)                                      (((unsigned)(x) & 0x1FF) << 18)
 #define   G_028A90_ADDRESS_HI(x)                                      (((x) >> 18) & 0x1FF)
@@ -9016,8 +9023,10 @@
 /* SI async DMA Packet types */
 #define    SI_DMA_PACKET_WRITE                     0x2
 #define    SI_DMA_PACKET_COPY                      0x3
-#define    SI_DMA_COPY_MAX_SIZE                    0xfffe0
-#define    SI_DMA_COPY_MAX_SIZE_DW                 0xffff8
+#define    SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE       0xfffe0
+/* The documentation says 0xffff8 is the maximum size in dwords, which is
+ * 0x3fffe0 in bytes. */
+#define    SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE      0x3fffe0
 #define    SI_DMA_COPY_DWORD_ALIGNED               0x00
 #define    SI_DMA_COPY_BYTE_ALIGNED                0x40
 #define    SI_DMA_COPY_TILED                       0x8
--- a/src/gallium/drivers/radeonsi/sid_tables.py
+++ b/src/gallium/drivers/radeonsi/sid_tables.py
--- a/src/amd/vulkan/.gitignore
+++ b/src/amd/vulkan/.gitignore
@@ -4,3 +4,4 @@
 /radv_timestamp.h
 /dev_icd.json
 /vk_format_table.c
+/radeon_icd.*.json
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -32,9 +32,6 @@ lib_LTLIBRARIES = libvulkan_radeon.la
 # The gallium includes are for the util/u_math.h include from main/macros.h

 AM_CPPFLAGS = \
-	$(AMDGPU_CFLAGS) \
-	$(VALGRIND_CFLAGS) \
-	$(DEFINES) \
 	-I$(top_srcdir)/include \
 	-I$(top_builddir)/src \
 	-I$(top_srcdir)/src \
@@ -48,7 +45,10 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/mesa \
 	-I$(top_srcdir)/src/mesa/drivers/dri/common \
 	-I$(top_srcdir)/src/gallium/auxiliary \
-	-I$(top_srcdir)/src/gallium/include
+	-I$(top_srcdir)/src/gallium/include \
+	$(AMDGPU_CFLAGS) \
+	$(VALGRIND_CFLAGS) \
+	$(DEFINES)

 AM_CFLAGS = \
 	$(VISIBILITY_CFLAGS) \
@@ -111,31 +111,27 @@ VULKAN_LIB_DEPS += \
 nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
 libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)

-radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
-	$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
+
+radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_api_xml)
+	$(AM_V_GEN) cat $(vulkan_api_xml) |\
 	$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py header > $@

-radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
-	$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_api_xml)
+	$(AM_V_GEN) cat $(vulkan_api_xml) |\
 	$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py code > $@

-.PHONY: radv_timestamp.h
-
-radv_timestamp.h:
-	@echo "Updating radv_timestamp.h"
-	$(AM_V_GEN) echo "#define RADV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@
-
 vk_format_table.c: vk_format_table.py \
 		   vk_format_parse.py \
                   vk_format_layout.csv
 	$(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@

 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
-CLEANFILES = $(BUILT_SOURCES) dev_icd.json radv_timestamp.h
+CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json
 EXTRA_DIST = \
 	$(top_srcdir)/include/vulkan/vk_icd.h \
 	dev_icd.json.in \
-	radeon_icd.json \
+	radeon_icd.json.in \
 	radv_entrypoints_gen.py \
 	vk_format_layout.csv \
 	vk_format_parse.py \
@@ -155,7 +151,7 @@ libvulkan_radeon_la_LDFLAGS = \


 icdconfdir = @VULKAN_ICD_INSTALL_DIR@
-icdconf_DATA = radeon_icd.json
+icdconf_DATA = radeon_icd.@host_cpu@.json
 # The following is used for development purposes, by setting VK_ICD_FILENAMES.
 noinst_DATA = dev_icd.json

@@ -164,4 +160,9 @@ dev_icd.json : dev_icd.json.in
 		-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
 		< $(srcdir)/dev_icd.json.in > $@

+radeon_icd.@host_cpu@.json : radeon_icd.json.in
+	$(AM_V_GEN) $(SED) \
+		-e "s#@install_libdir@#${libdir}#" \
+		< $(srcdir)/radeon_icd.json.in > $@
+
 include $(top_srcdir)/install-lib-links.mk
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -72,6 +72,5 @@ VULKAN_WSI_X11_FILES := \

 VULKAN_GENERATED_FILES := \
 	radv_entrypoints.c \
-	radv_entrypoints.h \
-	radv_timestamp.h
+	radv_entrypoints.h

--- a/src/amd/vulkan/dev_icd.json.in
+++ b/src/amd/vulkan/dev_icd.json.in
@@ -2,6 +2,6 @@
    "file_format_version": "1.0.0",
    "ICD": {
        "library_path": "@build_libdir@/libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
+        "api_version": "1.0.3"
    }
 }
--- a/src/amd/vulkan/radeon_icd.json
+++ b/src/amd/vulkan/radeon_icd.json
@@ -1,7 +0,0 @@
-{
-    "file_format_version": "1.0.0",
-    "ICD": {
-        "library_path": "libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
-    }
-}
--- a/src/amd/vulkan/radeon_icd.json.in
+++ b/src/amd/vulkan/radeon_icd.json.in
@@ -0,0 +1,7 @@
+{
+    "file_format_version": "1.0.0",
+    "ICD": {
+        "library_path": "@install_libdir@/libvulkan_radeon.so",
+        "api_version": "1.0.3"
+    }
+}
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
--- a/src/amd/vulkan/radv_entrypoints_gen.py
+++ b/src/amd/vulkan/radv_entrypoints_gen.py
@@ -22,14 +22,8 @@
 # IN THE SOFTWARE.
 #

-import fileinput, re, sys
-
-# Each function typedef in the vulkan.h header is all on one line and matches
-# this regepx. We hope that won't change.
-
-p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);')
-
-entrypoints = []
+import sys
+import xml.etree.ElementTree as ET

 # We generate a static hash table for entry point lookup
 # (vkGetProcAddress). We use a linear congruential generator for our hash
@@ -51,29 +45,11 @@ def hash(name):

    return h

-def get_platform_guard_macro(name):
-    if "Xlib" in name:
-        return "VK_USE_PLATFORM_XLIB_KHR"
-    elif "Xcb" in name:
-        return "VK_USE_PLATFORM_XCB_KHR"
-    elif "Wayland" in name:
-        return "VK_USE_PLATFORM_WAYLAND_KHR"
-    elif "Mir" in name:
-        return "VK_USE_PLATFORM_MIR_KHR"
-    elif "Android" in name:
-        return "VK_USE_PLATFORM_ANDROID_KHR"
-    elif "Win32" in name:
-        return "VK_USE_PLATFORM_WIN32_KHR"
-    else:
-        return None
-
-def print_guard_start(name):
-    guard = get_platform_guard_macro(name)
+def print_guard_start(guard):
    if guard is not None:
        print "#ifdef {0}".format(guard)

-def print_guard_end(name):
-    guard = get_platform_guard_macro(name)
+def print_guard_end(guard):
    if guard is not None:
        print "#endif // {0}".format(guard)

@@ -87,18 +63,37 @@ elif (sys.argv[1] == "code"):
    opt_code = True
    sys.argv.pop()

-# Parse the entry points in the header
+# Extract the entry points from the registry
+def get_entrypoints(doc, entrypoints_to_defines):
+    entrypoints = []
+    commands = doc.findall('./commands/command')
+    for i, command in enumerate(commands):
+        type = command.find('./proto/type').text
+        fullname = command.find('./proto/name').text
+        shortname = fullname[2:]
+        params = map(lambda p: "".join(p.itertext()), command.findall('./param'))
+        params = ', '.join(params)
+        if fullname in entrypoints_to_defines:
+            guard = entrypoints_to_defines[fullname]
+        else:
+            guard = None
+        entrypoints.append((type, shortname, params, i, hash(fullname), guard))
+    return entrypoints

-i = 0
-for line in fileinput.input():
-    m  = p.match(line)
-    if (m):
-        if m.group(2) == 'VoidFunction':
-            continue
-        fullname = "vk" + m.group(2)
-        h = hash(fullname)
-        entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
-        i = i + 1
+# Maps entry points to extension defines
+def get_entrypoints_defines(doc):
+    entrypoints_to_defines = {}
+    extensions = doc.findall('./extensions/extension')
+    for extension in extensions:
+        define = extension.get('protect')
+        entrypoints = extension.findall('./require/command')
+        for entrypoint in entrypoints:
+            fullname = entrypoint.get('name')
+            entrypoints_to_defines[fullname] = define
+    return entrypoints_to_defines
+
+doc = ET.parse(sys.stdin)
+entrypoints = get_entrypoints(doc, get_entrypoints_defines(doc))

 # For outputting entrypoints.h we generate a radv_EntryPoint() prototype
 # per entry point.
@@ -111,8 +106,7 @@ if opt_header:
    print "      void *entrypoints[%d];" % len(entrypoints)
    print "      struct {"

-    for type, name, args, num, h in entrypoints:
-        guard = get_platform_guard_macro(name)
+    for type, name, args, num, h, guard in entrypoints:
        if guard is not None:
            print "#ifdef {0}".format(guard)
            print "         PFN_vk{0} {0};".format(name)
@@ -125,10 +119,10 @@ if opt_header:
    print "   };\n"
    print "};\n"

-    for type, name, args, num, h in entrypoints:
-        print_guard_start(name)
-        print "%s radv_%s%s;" % (type, name, args)
-        print_guard_end(name)
+    for type, name, args, num, h, guard in entrypoints:
+        print_guard_start(guard)
+        print "%s radv_%s(%s);" % (type, name, args)
+        print_guard_end(guard)
    exit()


@@ -174,7 +168,7 @@ static const char strings[] ="""

 offsets = []
 i = 0;
-for type, name, args, num, h in entrypoints:
+for type, name, args, num, h, guard in entrypoints:
    print "   \"vk%s\\0\"" % name
    offsets.append(i)
    i += 2 + len(name) + 1
@@ -183,7 +177,7 @@ print "   ;"
 # Now generate the table of all entry points

 print "\nstatic const struct radv_entrypoint entrypoints[] = {"
-for type, name, args, num, h in entrypoints:
+for type, name, args, num, h, guard in entrypoints:
    print "   { %5d, 0x%08x }," % (offsets[num], h)
 print "};\n"

@@ -196,15 +190,15 @@ print """
 """

 for layer in [ "radv" ]:
-    for type, name, args, num, h in entrypoints:
-        print_guard_start(name)
-        print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
-        print_guard_end(name)
+    for type, name, args, num, h, guard in entrypoints:
+        print_guard_start(guard)
+        print "%s %s_%s(%s) __attribute__ ((weak));" % (type, layer, name, args)
+        print_guard_end(guard)
    print "\nconst struct radv_dispatch_table %s_layer = {" % layer
-    for type, name, args, num, h in entrypoints:
-        print_guard_start(name)
+    for type, name, args, num, h, guard in entrypoints:
+        print_guard_start(guard)
        print "   .%s = %s_%s," % (name, layer, name)
-        print_guard_end(name)
+        print_guard_end(guard)
    print "};\n"

 print """
@@ -222,7 +216,7 @@ radv_resolve_entrypoint(uint32_t index)

 map = [none for f in xrange(hash_size)]
 collisions = [0 for f in xrange(10)]
-for type, name, args, num, h in entrypoints:
+for type, name, args, num, h, guard in entrypoints:
    level = 0
    while map[h & hash_mask] != none:
        h = h + prime_step
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -154,6 +154,7 @@ uint32_t radv_translate_tex_dataformat(VkFormat format,
 		case VK_FORMAT_D16_UNORM:
 			return V_008F14_IMG_DATA_FORMAT_16;
 		case VK_FORMAT_D24_UNORM_S8_UINT:
+		case VK_FORMAT_X8_D24_UNORM_PACK32:
 			return V_008F14_IMG_DATA_FORMAT_8_24;
 		case VK_FORMAT_S8_UINT:
 			return V_008F14_IMG_DATA_FORMAT_8;
@@ -393,7 +394,7 @@ uint32_t radv_translate_color_numformat(VkFormat format,
 					int first_non_void)
 {
 	unsigned ntype;
-	if (first_non_void == 4 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT)
+	if (first_non_void == -1 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT)
 		ntype = V_028C70_NUMBER_FLOAT;
 	else {
 		ntype = V_028C70_NUMBER_UNORM;
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -112,8 +112,8 @@ radv_init_surface(struct radv_device *device,
 	                           VK_IMAGE_USAGE_STORAGE_BIT)) ||
 	    (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
            (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
-            device->instance->physicalDevice.rad_info.chip_class < VI ||
-            create_info->scanout || !device->allow_dcc ||
+            device->physical_device->rad_info.chip_class < VI ||
+            create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
            !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
 		surface->flags |= RADEON_SURF_DISABLE_DCC;
 	if (create_info->scanout)
@@ -123,7 +123,7 @@ radv_init_surface(struct radv_device *device,
 #define ATI_VENDOR_ID 0x1002
 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
 {
-	return (ATI_VENDOR_ID << 16) | device->instance->physicalDevice.rad_info.pci_id;
+	return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
 }

 static inline unsigned
@@ -267,17 +267,7 @@ si_make_texture_descriptor(struct radv_device *device,

 	if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
-		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
-
-		switch (vk_format) {
-		case VK_FORMAT_X8_D24_UNORM_PACK32:
-		case VK_FORMAT_D24_UNORM_S8_UINT:
-		case VK_FORMAT_D32_SFLOAT_S8_UINT:
-			vk_format_compose_swizzles(mapping, swizzle_yyyy, swizzle);
-			break;
-		default:
-			vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
-		}
+		vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
 	} else {
 		vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
 	}
@@ -336,7 +326,7 @@ si_make_texture_descriptor(struct radv_device *device,
 		/* The last dword is unused by hw. The shader uses it to clear
 		 * bits in the first dword of sampler state.
 		 */
-		if (device->instance->physicalDevice.rad_info.chip_class <= CIK && image->samples <= 1) {
+		if (device->physical_device->rad_info.chip_class <= CIK && image->samples <= 1) {
 			if (first_level == last_level)
 				state[7] = C_008F30_MAX_ANISO_RATIO;
 			else
@@ -527,8 +517,8 @@ radv_image_get_cmask_info(struct radv_device *device,
 			  struct radv_image *image,
 			  struct radv_cmask_info *out)
 {
-	unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
-	unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes;
+	unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
+	unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
 	unsigned cl_width, cl_height;

 	switch (num_pipes) {
@@ -562,10 +552,6 @@ radv_image_get_cmask_info(struct radv_device *device,
 	/* Each element of CMASK is a nibble. */
 	unsigned slice_bytes = slice_elements / 2;

-	out->pitch = width;
-	out->height = height;
-	out->xalign = cl_width * 8;
-	out->yalign = cl_height * 8;
 	out->slice_tile_max = (width * height) / (128*128);
 	if (out->slice_tile_max)
 		out->slice_tile_max -= 1;
@@ -603,8 +589,8 @@ radv_image_get_htile_size(struct radv_device *device,
 {
 	unsigned cl_width, cl_height, width, height;
 	unsigned slice_elements, slice_bytes, base_align;
-	unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes;
-	unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
+	unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
+	unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;

 	/* Overalign HTILE on P2 configs to work around GPU hangs in
 	 * piglit/depthstencil-render-miplevels 585.
@@ -613,7 +599,7 @@ radv_image_get_htile_size(struct radv_device *device,
 	 * are always reproducible. I think I have seen the test hang
 	 * on Carrizo too, though it was very rare there.
 	 */
-	if (device->instance->physicalDevice.rad_info.chip_class >= CIK && num_pipes < 4)
+	if (device->physical_device->rad_info.chip_class >= CIK && num_pipes < 4)
 		num_pipes = 4;

 	switch (num_pipes) {
@@ -663,7 +649,7 @@ static void
 radv_image_alloc_htile(struct radv_device *device,
 		       struct radv_image *image)
 {
-	if (env_var_as_boolean("RADV_HIZ_DISABLE", false))
+	if (device->debug_flags & RADV_DEBUG_NO_HIZ)
 		return;

 	image->htile.size = radv_image_get_htile_size(device, image);
@@ -688,7 +674,7 @@ radv_image_create(VkDevice _device,
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
 	struct radv_image *image = NULL;
-
+	bool can_cmask_dcc = false;
 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);

 	radv_assert(pCreateInfo->mipLevels > 0);
@@ -712,6 +698,13 @@ radv_image_create(VkDevice _device,
 	image->samples = pCreateInfo->samples;
 	image->tiling = pCreateInfo->tiling;
 	image->usage = pCreateInfo->usage;
+
+	image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
+	if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
+		for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
+			image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
+	}
+
 	radv_init_surface(device, &image->surface, create_info);

 	device->ws->surface_init(device->ws, &image->surface);
@@ -719,15 +712,18 @@ radv_image_create(VkDevice _device,
 	image->size = image->surface.bo_size;
 	image->alignment = image->surface.bo_alignment;

+	if (image->exclusive || image->queue_family_mask == 1)
+		can_cmask_dcc = true;
+
 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
-	    image->surface.dcc_size)
+	    image->surface.dcc_size && can_cmask_dcc)
 		radv_image_alloc_dcc(device, image);
 	else
 		image->surface.dcc_size = 0;

 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
 	    pCreateInfo->mipLevels == 1 &&
-	    !image->surface.dcc_size && image->extent.depth == 1)
+	    !image->surface.dcc_size && image->extent.depth == 1 && can_cmask_dcc)
 		radv_image_alloc_cmask(device, image);
 	if (image->samples > 1 && vk_format_is_color(pCreateInfo->format)) {
 		radv_image_alloc_fmask(device, image);
@@ -756,6 +752,7 @@ radv_image_view_init(struct radv_image_view *iview,
 {
 	RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
 	const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
+	uint32_t blk_w;
 	bool is_stencil = false;
 	switch (image->type) {
 	case VK_IMAGE_TYPE_1D:
@@ -775,8 +772,13 @@ radv_image_view_init(struct radv_image_view *iview,
 	iview->vk_format = pCreateInfo->format;
 	iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;

-	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
 		is_stencil = true;
+		iview->vk_format = vk_format_stencil_only(iview->vk_format);
+	} else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+		iview->vk_format = vk_format_depth_only(iview->vk_format);
+	}
+
 	iview->extent = (VkExtent3D) {
 		.width  = radv_minify(image->extent.width , range->baseMipLevel),
 		.height = radv_minify(image->extent.height, range->baseMipLevel),
@@ -788,13 +790,15 @@ radv_image_view_init(struct radv_image_view *iview,
 	iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format),
 					    vk_format_get_blockheight(image->vk_format));

+	assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
+	blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
 	iview->base_layer = range->baseArrayLayer;
 	iview->layer_count = radv_get_layerCount(image, range);
 	iview->base_mip = range->baseMipLevel;

 	si_make_texture_descriptor(device, image, false,
 				   iview->type,
-				   pCreateInfo->format,
+				   iview->vk_format,
 				   &pCreateInfo->components,
 				   0, radv_get_levelCount(image, range) - 1,
 				   range->baseArrayLayer,
@@ -807,7 +811,7 @@ radv_image_view_init(struct radv_image_view *iview,
 	si_set_mutable_tex_desc_fields(device, image,
 				       is_stencil ? &image->surface.stencil_level[range->baseMipLevel] : &image->surface.level[range->baseMipLevel], range->baseMipLevel,
 				       range->baseMipLevel,
-				       image->surface.blk_w, is_stencil, iview->descriptor);
+				       blk_w, is_stencil, iview->descriptor);
 }

 void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
@@ -817,7 +821,7 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
 	 * definitions for them either. They are all 2D_TILED_THIN1 modes with
 	 * different bpp and micro tile mode.
 	 */
-	if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+	if (device->physical_device->rad_info.chip_class >= CIK) {
 		switch (micro_tile_mode) {
 		case 0: /* displayable */
 			image->surface.tiling_index[0] = 10;
@@ -836,29 +840,29 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
 		switch (micro_tile_mode) {
 		case 0: /* displayable */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                            image->surface.tiling_index[0] = 10;
                            break;
-			case 16:
+			case 2:
                            image->surface.tiling_index[0] = 11;
                            break;
-			default: /* 32, 64 */
+			default: /* 4, 8 */
                            image->surface.tiling_index[0] = 12;
                            break;
 			}
 			break;
 		case 1: /* thin */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                                image->surface.tiling_index[0] = 14;
                                break;
-			case 16:
+			case 2:
                                image->surface.tiling_index[0] = 15;
                                break;
-			case 32:
+			case 4:
                                image->surface.tiling_index[0] = 16;
                                break;
-			default: /* 64, 128 */
+			default: /* 8, 16 */
                                image->surface.tiling_index[0] = 17;
                                break;
 			}
@@ -892,11 +896,19 @@ bool radv_layout_can_expclear(const struct radv_image *image,
 		layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
 }

-bool radv_layout_has_cmask(const struct radv_image *image,
-			   VkImageLayout layout)
+bool radv_layout_can_fast_clear(const struct radv_image *image,
+			        VkImageLayout layout,
+			        unsigned queue_mask)
 {
-	return (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ||
-		layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+	return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+		queue_mask == (1u << RADV_QUEUE_GENERAL);
+}
+
+
+unsigned radv_image_queue_family_mask(const struct radv_image *image, int family) {
+	if (image->exclusive)
+		return 1u <<family;
+	return image->queue_family_mask;
 }

 VkResult
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -159,13 +159,34 @@ void radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer,
 			      struct radv_meta_saved_compute_state *save);
 void radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer,
 			    struct radv_meta_saved_compute_state *save);
-
+void radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer,
+			  struct radv_meta_saved_compute_state *save);
+void radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer,
+			struct radv_meta_saved_compute_state *save);
+void radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer,
+			    struct radv_meta_saved_compute_state *save);
+void radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
+			  struct radv_meta_saved_compute_state *save);
 void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 			       struct radv_meta_blit2d_surf *src,
 			       struct radv_meta_blit2d_buffer *dst,
 			       unsigned num_rects,
 			       struct radv_meta_blit2d_rect *rects);

+void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
+				  struct radv_meta_blit2d_buffer *src,
+				  struct radv_meta_blit2d_surf *dst,
+				  unsigned num_rects,
+				  struct radv_meta_blit2d_rect *rects);
+void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
+				 struct radv_meta_blit2d_surf *src,
+				 struct radv_meta_blit2d_surf *dst,
+				 unsigned num_rects,
+				 struct radv_meta_blit2d_rect *rects);
+void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
+			      struct radv_meta_blit2d_surf *dst,
+			      const VkClearColorValue *clear_color);
+
 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageSubresourceRange *subresourceRange);
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -38,7 +38,7 @@ build_nir_vertex_shader(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");

 	nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						   vec4, "a_pos");
@@ -70,7 +70,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_fs.%d", tex_dim);
-	b.shader->info.name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info->name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -124,7 +124,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_depth_fs.%d", tex_dim);
-	b.shader->info.name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info->name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -178,7 +178,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_stencil_fs.%d", tex_dim);
-	b.shader->info.name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info->name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -226,12 +226,13 @@ static void
 meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               struct radv_image *src_image,
               struct radv_image_view *src_iview,
-               VkOffset3D src_offset,
-               VkExtent3D src_extent,
+               VkOffset3D src_offset_0,
+               VkOffset3D src_offset_1,
               struct radv_image *dest_image,
               struct radv_image_view *dest_iview,
-               VkOffset3D dest_offset,
-               VkExtent3D dest_extent,
+               VkOffset3D dest_offset_0,
+               VkOffset3D dest_offset_1,
+               VkRect2D dest_box,
               VkFilter blit_filter)
 {
 	struct radv_device *device = cmd_buffer->device;
@@ -245,38 +246,37 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 	unsigned vb_size = 3 * sizeof(*vb_data);
 	vb_data[0] = (struct blit_vb_data) {
 		.pos = {
-			dest_offset.x,
-			dest_offset.y,
+			dest_offset_0.x,
+			dest_offset_0.y,
 		},
 		.tex_coord = {
-			(float)(src_offset.x) / (float)src_iview->extent.width,
-			(float)(src_offset.y) / (float)src_iview->extent.height,
-			(float)src_offset.z / (float)src_iview->extent.depth,
+			(float)src_offset_0.x / (float)src_iview->extent.width,
+			(float)src_offset_0.y / (float)src_iview->extent.height,
+			(float)src_offset_0.z / (float)src_iview->extent.depth,
 		},
 	};

 	vb_data[1] = (struct blit_vb_data) {
 		.pos = {
-			dest_offset.x,
-			dest_offset.y + dest_extent.height,
+			dest_offset_0.x,
+			dest_offset_1.y,
 		},
 		.tex_coord = {
-			(float)src_offset.x / (float)src_iview->extent.width,
-			(float)(src_offset.y + src_extent.height) /
-			(float)src_iview->extent.height,
-			(float)src_offset.z / (float)src_iview->extent.depth,
+			(float)src_offset_0.x / (float)src_iview->extent.width,
+			(float)src_offset_1.y / (float)src_iview->extent.height,
+			(float)src_offset_0.z / (float)src_iview->extent.depth,
 		},
 	};

 	vb_data[2] = (struct blit_vb_data) {
 		.pos = {
-			dest_offset.x + dest_extent.width,
-			dest_offset.y,
+			dest_offset_1.x,
+			dest_offset_0.y,
 		},
 		.tex_coord = {
-			(float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width,
-			(float)src_offset.y / (float)src_iview->extent.height,
-			(float)src_offset.z / (float)src_iview->extent.depth,
+			(float)src_offset_1.x / (float)src_iview->extent.width,
+			(float)src_offset_0.y / (float)src_iview->extent.height,
+			(float)src_offset_0.z / (float)src_iview->extent.depth,
 		},
 	};
 	radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
@@ -355,8 +355,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 							      .renderPass = device->meta_state.blit.render_pass[fs_key],
 							      .framebuffer = fb,
 							      .renderArea = {
-							      .offset = { dest_offset.x, dest_offset.y },
-							      .extent = { dest_extent.width, dest_extent.height },
+							      .offset = { dest_box.offset.x, dest_box.offset.y },
+							      .extent = { dest_box.extent.width, dest_box.extent.height },
 						      },
 							      .clearValueCount = 0,
 								       .pClearValues = NULL,
@@ -383,8 +383,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 							      .renderPass = device->meta_state.blit.depth_only_rp,
 							      .framebuffer = fb,
 							      .renderArea = {
-							      .offset = { dest_offset.x, dest_offset.y },
-							      .extent = { dest_extent.width, dest_extent.height },
+							      .offset = { dest_box.offset.x, dest_box.offset.y },
+							      .extent = { dest_box.extent.width, dest_box.extent.height },
 						      },
 							      .clearValueCount = 0,
 								       .pClearValues = NULL,
@@ -410,9 +410,9 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 							      .renderPass = device->meta_state.blit.stencil_only_rp,
 							      .framebuffer = fb,
 							      .renderArea = {
-							      .offset = { dest_offset.x, dest_offset.y },
-							      .extent = { dest_extent.width, dest_extent.height },
-						      },
+							      .offset = { dest_box.offset.x, dest_box.offset.y },
+							      .extent = { dest_box.extent.width, dest_box.extent.height },
+						              },
 							      .clearValueCount = 0,
 								       .pClearValues = NULL,
 						       }, VK_SUBPASS_CONTENTS_INLINE);
@@ -461,6 +461,26 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 				&cmd_buffer->pool->alloc);
 }

+static bool
+flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
+{
+	bool flip = false;
+	if (*src0 > *src1) {
+		unsigned tmp = *src0;
+		*src0 = *src1;
+		*src1 = tmp;
+		flip = !flip;
+	}
+
+	if (*dst0 > *dst1) {
+		unsigned tmp = *dst0;
+		*dst0 = *dst1;
+		*dst1 = tmp;
+		flip = !flip;
+	}
+	return flip;
+}
+
 void radv_CmdBlitImage(
 	VkCommandBuffer                             commandBuffer,
 	VkImage                                     srcImage,
@@ -488,6 +508,8 @@ void radv_CmdBlitImage(
 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {
+		const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
+		const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
 		struct radv_image_view src_iview;
 		radv_image_view_init(&src_iview, cmd_buffer->device,
 				     &(VkImageViewCreateInfo) {
@@ -496,59 +518,92 @@ void radv_CmdBlitImage(
 						     .viewType = radv_meta_get_view_type(src_image),
 						     .format = src_image->vk_format,
 						     .subresourceRange = {
-						     .aspectMask = pRegions[r].srcSubresource.aspectMask,
-						     .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+						     .aspectMask = src_res->aspectMask,
+						     .baseMipLevel = src_res->mipLevel,
 						     .levelCount = 1,
-						     .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+						     .baseArrayLayer = src_res->baseArrayLayer,
 						     .layerCount = 1
 					     },
 						     },
 				     cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);

-		if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
-		    pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
-		    pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
-		    pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
-			radv_finishme("FINISHME: Allow flipping in blits");
+		unsigned dst_start, dst_end;
+		if (dest_image->type == VK_IMAGE_TYPE_3D) {
+			assert(dst_res->baseArrayLayer == 0);
+			dst_start = pRegions[r].dstOffsets[0].z;
+			dst_end = pRegions[r].dstOffsets[1].z;
+		} else {
+			dst_start = dst_res->baseArrayLayer;
+			dst_end = dst_start + dst_res->layerCount;
+		}

-		const VkExtent3D dest_extent = {
-			.width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
-			.height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
-			.depth = 1,
-		};
+		unsigned src_start, src_end;
+		if (src_image->type == VK_IMAGE_TYPE_3D) {
+			assert(src_res->baseArrayLayer == 0);
+			src_start = pRegions[r].srcOffsets[0].z;
+			src_end = pRegions[r].srcOffsets[1].z;
+		} else {
+			src_start = src_res->baseArrayLayer;
+			src_end = src_start + src_res->layerCount;
+		}

-		const VkExtent3D src_extent = {
-			.width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
-			.height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
-			.depth = pRegions[r].srcOffsets[1].z - pRegions[r].srcOffsets[0].z,
-		};
+		bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
+		float src_z_step = (float)(src_end + 1 - src_start) /
+			(float)(dst_end + 1 - dst_start);

+		if (flip_z) {
+			src_start = src_end;
+			src_z_step *= -1;
+		}

-		if (pRegions[r].srcSubresource.layerCount > 1)
-			radv_finishme("FINISHME: copy multiple array layers");
+		unsigned src_x0 = pRegions[r].srcOffsets[0].x;
+		unsigned src_x1 = pRegions[r].srcOffsets[1].x;
+		unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
+		unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
+
+		unsigned src_y0 = pRegions[r].srcOffsets[0].y;
+		unsigned src_y1 = pRegions[r].srcOffsets[1].y;
+		unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
+		unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
+
+		VkRect2D dest_box;
+		dest_box.offset.x = MIN2(dst_x0, dst_x1);
+		dest_box.offset.y = MIN2(dst_y0, dst_y1);
+		dest_box.extent.width = abs(dst_x1 - dst_x0);
+		dest_box.extent.height = abs(dst_y1 - dst_y0);

 		struct radv_image_view dest_iview;
 		unsigned usage;
-		if (pRegions[r].dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
+		if (dst_res->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
 			usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 		else
 			usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;

-		for (unsigned i = pRegions[r].dstOffsets[0].z; i < pRegions[r].dstOffsets[1].z; i++) {
-
-			const VkOffset3D dest_offset = {
-				.x = pRegions[r].dstOffsets[0].x,
-				.y = pRegions[r].dstOffsets[0].y,
-				.z = i,
+		const unsigned num_layers = dst_end - dst_start;
+		for (unsigned i = 0; i < num_layers; i++) {
+			const VkOffset3D dest_offset_0 = {
+				.x = dst_x0,
+				.y = dst_y0,
+				.z = dst_start + i ,
 			};
-			VkOffset3D src_offset = {
-				.x = pRegions[r].srcOffsets[0].x,
-				.y = pRegions[r].srcOffsets[0].y,
-				.z = i,
+			const VkOffset3D dest_offset_1 = {
+				.x = dst_x1,
+				.y = dst_y1,
+				.z = dst_start + i ,
+			};
+			VkOffset3D src_offset_0 = {
+				.x = src_x0,
+				.y = src_y0,
+				.z = src_start + i * src_z_step,
+			};
+			VkOffset3D src_offset_1 = {
+				.x = src_x1,
+				.y = src_y1,
+				.z = src_start + i * src_z_step,
 			};
 			const uint32_t dest_array_slice =
-				radv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
-							  &dest_offset);
+				radv_meta_get_iview_layer(dest_image, dst_res,
+							  &dest_offset_0);

 			radv_image_view_init(&dest_iview, cmd_buffer->device,
 					     &(VkImageViewCreateInfo) {
@@ -557,8 +612,8 @@ void radv_CmdBlitImage(
 							     .viewType = radv_meta_get_view_type(dest_image),
 							     .format = dest_image->vk_format,
 							     .subresourceRange = {
-							     .aspectMask = pRegions[r].dstSubresource.aspectMask,
-							     .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+							     .aspectMask = dst_res->aspectMask,
+							     .baseMipLevel = dst_res->mipLevel,
 							     .levelCount = 1,
 							     .baseArrayLayer = dest_array_slice,
 							     .layerCount = 1
@@ -567,9 +622,10 @@ void radv_CmdBlitImage(
 					     cmd_buffer, usage);
 			meta_emit_blit(cmd_buffer,
 				       src_image, &src_iview,
-				       src_offset, src_extent,
+				       src_offset_0, src_offset_1,
 				       dest_image, &dest_iview,
-				       dest_offset, dest_extent,
+				       dest_offset_0, dest_offset_1,
+				       dest_box,
 				       filter);
 		}
 	}
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -112,7 +112,6 @@ static void
 blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
                struct radv_meta_blit2d_surf *src_img,
                struct radv_meta_blit2d_buffer *src_buf,
-                struct radv_meta_blit2d_rect *rect,
                struct blit2d_src_temps *tmp,
                enum blit2d_src_type src_type, VkFormat depth_format)
 {
@@ -164,7 +163,7 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
 								  .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 								  .pImageInfo = (VkDescriptorImageInfo[]) {
 								  {
-									  .sampler = NULL,
+									  .sampler = VK_NULL_HANDLE,
 									  .imageView = radv_image_view_to_handle(&tmp->iview),
 									  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 								  },
@@ -287,7 +286,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 		if (dst->aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
 			depth_format = dst->image->vk_format;
 		struct blit2d_src_temps src_temps;
-		blit2d_bind_src(cmd_buffer, src_img, src_buf, &rects[r], &src_temps, src_type, depth_format);
+		blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);

 		uint32_t offset = 0;
 		struct blit2d_dst_temps dst_temps;
@@ -439,7 +438,7 @@ build_nir_vertex_shader(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");

 	nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						   vec4, "a_pos");
@@ -574,7 +573,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info->name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
@@ -603,7 +602,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info->name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
@@ -632,7 +631,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info->name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill");
+	b.shader->info->cs.local_size[0] = 64;
+	b.shader->info->cs.local_size[1] = 1;
+	b.shader->info->cs.local_size[2] = 1;

 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+						b.shader->info->cs.local_size[0],
+						b.shader->info->cs.local_size[1],
+						b.shader->info->cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

@@ -60,17 +60,17 @@ build_buffer_copy_shader(struct radv_device *dev)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy");
+	b.shader->info->cs.local_size[0] = 64;
+	b.shader->info->cs.local_size[1] = 1;
+	b.shader->info->cs.local_size[2] = 1;

 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+						b.shader->info->cs.local_size[0],
+						b.shader->info->cs.local_size[1],
+						b.shader->info->cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

@@ -511,10 +511,11 @@ void radv_CmdUpdateBuffer(
 	VkBuffer                                    dstBuffer,
 	VkDeviceSize                                dstOffset,
 	VkDeviceSize                                dataSize,
-	const uint32_t*                             pData)
+	const void*                                 pData)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
 	uint64_t words = dataSize / 4;
 	uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
 	va += dstOffset + dst_buffer->offset;
@@ -528,7 +529,8 @@ void radv_CmdUpdateBuffer(
 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);

 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
-		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
+		                                V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
 		                            S_370_WR_CONFIRM(1) |
 		                            S_370_ENGINE_SEL(V_370_ME));
 		radeon_emit(cmd_buffer->cs, va);
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -56,8 +56,8 @@ build_color_shaders(struct nir_shader **out_vs,
 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);

-	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
-	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
+	vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
+	fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");

 	const struct glsl_type *position_type = glsl_vec4_type();
 	const struct glsl_type *color_type = glsl_vec4_type();
@@ -98,6 +98,16 @@ build_color_shaders(struct nir_shader **out_vs,
 	nir_copy_var(&vs_b, vs_out_color, vs_in_color);
 	nir_copy_var(&fs_b, fs_out_color, fs_in_color);

+	const struct glsl_type *layer_type = glsl_int_type();
+	nir_variable *vs_out_layer =
+		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
+				    "v_layer");
+	vs_out_layer->data.location = VARYING_SLOT_LAYER;
+	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+	nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
+
+	nir_store_var(&vs_b, vs_out_layer, inst_id, 0x1);
+
 	*out_vs = vs_b.shader;
 	*out_fs = fs_b.shader;
 }
@@ -204,13 +214,51 @@ create_pipeline(struct radv_device *device,
 	return result;
 }

+static VkResult
+create_color_renderpass(struct radv_device *device,
+			VkFormat vk_format,
+			uint32_t samples,
+			VkRenderPass *pass)
+{
+	return radv_CreateRenderPass(radv_device_to_handle(device),
+				       &(VkRenderPassCreateInfo) {
+					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+						       .attachmentCount = 1,
+						       .pAttachments = &(VkAttachmentDescription) {
+						       .format = vk_format,
+						       .samples = samples,
+						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+					       },
+						       .subpassCount = 1,
+								.pSubpasses = &(VkSubpassDescription) {
+						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+						       .inputAttachmentCount = 0,
+						       .colorAttachmentCount = 1,
+						       .pColorAttachments = &(VkAttachmentReference) {
+							       .attachment = 0,
+							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						       },
+						       .pResolveAttachments = NULL,
+						       .pDepthStencilAttachment = &(VkAttachmentReference) {
+							       .attachment = VK_ATTACHMENT_UNUSED,
+							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						       },
+						       .preserveAttachmentCount = 1,
+						       .pPreserveAttachments = (uint32_t[]) { 0 },
+					       },
+								.dependencyCount = 0,
+									 }, &device->meta_state.alloc, pass);
+}
+
 static VkResult
 create_color_pipeline(struct radv_device *device,
-                      VkFormat vk_format,
 		      uint32_t samples,
                      uint32_t frag_output,
                      struct radv_pipeline **pipeline,
-		      VkRenderPass *pass)
+		      VkRenderPass pass)
 {
 	struct nir_shader *vs_nir;
 	struct nir_shader *fs_nir;
@@ -270,44 +318,11 @@ create_color_pipeline(struct radv_device *device,
 		.pAttachments = blend_attachment_state
 	};

-	result = radv_CreateRenderPass(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription) {
-						       .format = vk_format,
-						       .samples = samples,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-								.pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 1,
-						       .pColorAttachments = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = VK_ATTACHMENT_UNUSED,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-								.dependencyCount = 0,
-									 }, &device->meta_state.alloc, pass);

-	if (result != VK_SUCCESS)
-		return result;
 	struct radv_graphics_pipeline_create_info extra = {
 		.use_rectlist = true,
 	};
-	result = create_pipeline(device, radv_render_pass_from_handle(*pass),
+	result = create_pipeline(device, radv_render_pass_from_handle(pass),
 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
 				 &extra, &device->meta_state.alloc, pipeline);

@@ -346,12 +361,10 @@ radv_device_finish_meta_clear_state(struct radv_device *device)

 		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
 			destroy_pipeline(device, state->clear[i].depth_only_pipeline[j]);
-			destroy_render_pass(device, state->clear[i].depth_only_rp[j]);
 			destroy_pipeline(device, state->clear[i].stencil_only_pipeline[j]);
-			destroy_render_pass(device, state->clear[i].stencil_only_rp[j]);
 			destroy_pipeline(device, state->clear[i].depthstencil_pipeline[j]);
-			destroy_render_pass(device, state->clear[i].depthstencil_rp[j]);
 		}
+		destroy_render_pass(device, state->clear[i].depthstencil_rp);
 	}

 }
@@ -444,7 +457,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 					   pipeline_h);
 	}

-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+	radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);

 	radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
 }
@@ -458,8 +471,8 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);

-	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
-	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
+	vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
+	fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
 	const struct glsl_type *position_type = glsl_vec4_type();

 	nir_variable *vs_in_pos =
@@ -474,17 +487,61 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs

 	nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);

+	const struct glsl_type *layer_type = glsl_int_type();
+	nir_variable *vs_out_layer =
+		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
+				    "v_layer");
+	vs_out_layer->data.location = VARYING_SLOT_LAYER;
+	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+	nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
+	nir_store_var(&vs_b, vs_out_layer, inst_id, 0x1);
+
 	*out_vs = vs_b.shader;
 	*out_fs = fs_b.shader;
 }

+static VkResult
+create_depthstencil_renderpass(struct radv_device *device,
+			       uint32_t samples,
+			       VkRenderPass *render_pass)
+{
+	return radv_CreateRenderPass(radv_device_to_handle(device),
+				       &(VkRenderPassCreateInfo) {
+					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+						       .attachmentCount = 1,
+						       .pAttachments = &(VkAttachmentDescription) {
+						       .format = VK_FORMAT_UNDEFINED,
+						       .samples = samples,
+						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+					       },
+						       .subpassCount = 1,
+								.pSubpasses = &(VkSubpassDescription) {
+						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+						       .inputAttachmentCount = 0,
+						       .colorAttachmentCount = 0,
+						       .pColorAttachments = NULL,
+						       .pResolveAttachments = NULL,
+						       .pDepthStencilAttachment = &(VkAttachmentReference) {
+							       .attachment = 0,
+							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						       },
+						       .preserveAttachmentCount = 1,
+						       .pPreserveAttachments = (uint32_t[]) { 0 },
+					       },
+								.dependencyCount = 0,
+									 }, &device->meta_state.alloc, render_pass);
+}
+
 static VkResult
 create_depthstencil_pipeline(struct radv_device *device,
                             VkImageAspectFlags aspects,
 			     uint32_t samples,
 			     int index,
                             struct radv_pipeline **pipeline,
-			     VkRenderPass *render_pass)
+			     VkRenderPass render_pass)
 {
 	struct nir_shader *vs_nir, *fs_nir;
 	VkResult result;
@@ -535,36 +592,6 @@ create_depthstencil_pipeline(struct radv_device *device,
 		.pAttachments = NULL,
 	};

-	result = radv_CreateRenderPass(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription) {
-						       .format = VK_FORMAT_UNDEFINED,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-								.pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-								.dependencyCount = 0,
-									 }, &device->meta_state.alloc, render_pass);
-	if (result != VK_SUCCESS)
-		return result;
-
 	struct radv_graphics_pipeline_create_info extra = {
 		.use_rectlist = true,
 	};
@@ -577,7 +604,7 @@ create_depthstencil_pipeline(struct radv_device *device,
 		extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
 		extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
 	}
-	result = create_pipeline(device, radv_render_pass_from_handle(*render_pass),
+	result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
 				 &extra, &device->meta_state.alloc, pipeline);
 	return result;
@@ -709,7 +736,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 	if (depth_view_can_fast_clear(iview, subpass->depth_stencil_attachment.layout, clear_rect))
 		radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);

-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+	radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);
 }


@@ -740,20 +767,32 @@ radv_device_init_meta_clear_state(struct radv_device *device)
 			VkFormat format = pipeline_formats[j];
 			unsigned fs_key = radv_format_meta_fs_key(format);
 			assert(!state->clear[i].color_pipelines[fs_key]);
-			res = create_color_pipeline(device, format, samples, 0, &state->clear[i].color_pipelines[fs_key],
-						    &state->clear[i].render_pass[fs_key]);
+
+			res = create_color_renderpass(device, format, samples,
+						      &state->clear[i].render_pass[fs_key]);
+			if (res != VK_SUCCESS)
+				goto fail;
+
+			res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
+						    state->clear[i].render_pass[fs_key]);
 			if (res != VK_SUCCESS)
 				goto fail;

 		}

+		res = create_depthstencil_renderpass(device,
+						     samples,
+						     &state->clear[i].depthstencil_rp);
+		if (res != VK_SUCCESS)
+			goto fail;
+
 		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
 			res = create_depthstencil_pipeline(device,
 							   VK_IMAGE_ASPECT_DEPTH_BIT,
 							   samples,
 							   j,
 							   &state->clear[i].depth_only_pipeline[j],
-							   &state->clear[i].depth_only_rp[j]);
+							   state->clear[i].depthstencil_rp);
 			if (res != VK_SUCCESS)
 				goto fail;

@@ -762,7 +801,7 @@ radv_device_init_meta_clear_state(struct radv_device *device)
 							   samples,
 							   j,
 							   &state->clear[i].stencil_only_pipeline[j],
-							   &state->clear[i].stencil_only_rp[j]);
+							   state->clear[i].depthstencil_rp);
 			if (res != VK_SUCCESS)
 				goto fail;

@@ -772,7 +811,7 @@ radv_device_init_meta_clear_state(struct radv_device *device)
 							   samples,
 							   j,
 							   &state->clear[i].depthstencil_pipeline[j],
-							   &state->clear[i].depthstencil_rp[j]);
+							   state->clear[i].depthstencil_rp);
 			if (res != VK_SUCCESS)
 				goto fail;
 		}
@@ -802,10 +841,10 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
 	if (!iview->image->cmask.size && !iview->image->surface.dcc_size)
 		return false;

-	if (!cmd_buffer->device->allow_fast_clears)
+	if (!(cmd_buffer->device->debug_flags & RADV_DEBUG_FAST_CLEARS))
 		return false;

-	if (!radv_layout_has_cmask(iview->image, image_layout))
+	if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index)))
 		goto fail;
 	if (vk_format_get_blocksizebits(iview->image->vk_format) > 64)
 		goto fail;
@@ -928,13 +967,10 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)

 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

-	if (cmd_state->framebuffer->layers > 1)
-		radv_finishme("clearing multi-layer framebuffer");
-
 	VkClearRect clear_rect = {
 		.rect = cmd_state->render_area,
 		.baseArrayLayer = 0,
-		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+		.layerCount = cmd_state->framebuffer->layers,
 	};

 	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
@@ -975,15 +1011,141 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
 	radv_meta_restore(&saved_state, cmd_buffer);
 }

+static void
+radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
+		       struct radv_image *image,
+		       VkImageLayout image_layout,
+		       const VkImageSubresourceRange *range,
+		       VkFormat format, int level, int layer,
+		       const VkClearValue *clear_val)
+{
+	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
+	struct radv_image_view iview;
+	radv_image_view_init(&iview, cmd_buffer->device,
+			     &(VkImageViewCreateInfo) {
+				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					     .image = radv_image_to_handle(image),
+					     .viewType = radv_meta_get_view_type(image),
+					     .format = format,
+					     .subresourceRange = {
+					     .aspectMask = range->aspectMask,
+					     .baseMipLevel = range->baseMipLevel + level,
+					     .levelCount = 1,
+					     .baseArrayLayer = range->baseArrayLayer + layer,
+					     .layerCount = 1
+				     },
+			     },
+			     cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+
+	VkFramebuffer fb;
+	radv_CreateFramebuffer(device_h,
+			       &(VkFramebufferCreateInfo) {
+				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+					       .attachmentCount = 1,
+					       .pAttachments = (VkImageView[]) {
+					       radv_image_view_to_handle(&iview),
+				       },
+					       .width = iview.extent.width,
+							.height = iview.extent.height,
+							.layers = 1
+			       },
+			       &cmd_buffer->pool->alloc,
+			       &fb);
+
+	VkAttachmentDescription att_desc = {
+		.format = iview.vk_format,
+		.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+		.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+		.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+		.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+		.initialLayout = image_layout,
+		.finalLayout = image_layout,
+	};
+
+	VkSubpassDescription subpass_desc = {
+		.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+		.inputAttachmentCount = 0,
+		.colorAttachmentCount = 0,
+		.pColorAttachments = NULL,
+		.pResolveAttachments = NULL,
+		.pDepthStencilAttachment = NULL,
+		.preserveAttachmentCount = 0,
+		.pPreserveAttachments = NULL,
+	};
+
+	const VkAttachmentReference att_ref = {
+		.attachment = 0,
+		.layout = image_layout,
+	};
+
+	if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+		subpass_desc.colorAttachmentCount = 1;
+		subpass_desc.pColorAttachments = &att_ref;
+	} else {
+		subpass_desc.pDepthStencilAttachment = &att_ref;
+	}
+
+	VkRenderPass pass;
+	radv_CreateRenderPass(device_h,
+			      &(VkRenderPassCreateInfo) {
+				      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+					      .attachmentCount = 1,
+					      .pAttachments = &att_desc,
+					      .subpassCount = 1,
+					      .pSubpasses = &subpass_desc,
+					      },
+			      &cmd_buffer->pool->alloc,
+			      &pass);
+
+	radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+				&(VkRenderPassBeginInfo) {
+					.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+						.renderArea = {
+						.offset = { 0, 0, },
+						.extent = {
+							.width = iview.extent.width,
+							.height = iview.extent.height,
+						},
+					},
+						.renderPass = pass,
+						.framebuffer = fb,
+						.clearValueCount = 0,
+						.pClearValues = NULL,
+						},
+				VK_SUBPASS_CONTENTS_INLINE);
+
+	VkClearAttachment clear_att = {
+		.aspectMask = range->aspectMask,
+		.colorAttachment = 0,
+		.clearValue = *clear_val,
+	};
+
+	VkClearRect clear_rect = {
+		.rect = {
+			.offset = { 0, 0 },
+			.extent = { iview.extent.width, iview.extent.height },
+		},
+		.baseArrayLayer = range->baseArrayLayer,
+		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+	};
+
+	emit_clear(cmd_buffer, &clear_att, &clear_rect);
+
+	radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+	radv_DestroyRenderPass(device_h, pass,
+			       &cmd_buffer->pool->alloc);
+	radv_DestroyFramebuffer(device_h, fb,
+				&cmd_buffer->pool->alloc);
+}
 static void
 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		     struct radv_image *image,
 		     VkImageLayout image_layout,
 		     const VkClearValue *clear_value,
 		     uint32_t range_count,
-		     const VkImageSubresourceRange *ranges)
+		     const VkImageSubresourceRange *ranges,
+		     bool cs)
 {
-	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
 	VkFormat format = image->vk_format;
 	VkClearValue internal_clear_value = *clear_value;

@@ -998,130 +1160,33 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		const VkImageSubresourceRange *range = &ranges[r];
 		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
 			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
-				radv_minify(image->extent.depth, l) :
+				radv_minify(image->extent.depth, range->baseMipLevel + l) :
 				radv_get_layerCount(image, range);
 			for (uint32_t s = 0; s < layer_count; ++s) {
-				struct radv_image_view iview;
-				radv_image_view_init(&iview, cmd_buffer->device,
-						     &(VkImageViewCreateInfo) {
-							     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-								     .image = radv_image_to_handle(image),
-								     .viewType = radv_meta_get_view_type(image),
-								     .format = format,
-								     .subresourceRange = {
-								     .aspectMask = range->aspectMask,
-								     .baseMipLevel = range->baseMipLevel + l,
-								     .levelCount = 1,
-								     .baseArrayLayer = range->baseArrayLayer + s,
-								     .layerCount = 1
-							     },
-								     },
-						     cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);

-				VkFramebuffer fb;
-				radv_CreateFramebuffer(device_h,
-						       &(VkFramebufferCreateInfo) {
-							       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-								       .attachmentCount = 1,
-								       .pAttachments = (VkImageView[]) {
-								       radv_image_view_to_handle(&iview),
-							       },
-								       .width = iview.extent.width,
-										.height = iview.extent.height,
-										.layers = 1
-										},
-						       &cmd_buffer->pool->alloc,
-						       &fb);
-
-				VkAttachmentDescription att_desc = {
-					.format = iview.vk_format,
-					.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-					.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-					.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-					.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
-					.initialLayout = image_layout,
-					.finalLayout = image_layout,
-				};
-
-				VkSubpassDescription subpass_desc = {
-					.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-					.inputAttachmentCount = 0,
-					.colorAttachmentCount = 0,
-					.pColorAttachments = NULL,
-					.pResolveAttachments = NULL,
-					.pDepthStencilAttachment = NULL,
-					.preserveAttachmentCount = 0,
-					.pPreserveAttachments = NULL,
-				};
-
-				const VkAttachmentReference att_ref = {
-					.attachment = 0,
-					.layout = image_layout,
-				};
-
-				if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
-					subpass_desc.colorAttachmentCount = 1;
-					subpass_desc.pColorAttachments = &att_ref;
+				if (cs) {
+					struct radv_meta_blit2d_surf surf;
+					surf.format = format;
+					surf.image = image;
+					surf.level = range->baseMipLevel + l;
+					surf.layer = range->baseArrayLayer + s;
+					surf.aspect_mask = range->aspectMask;
+					radv_meta_clear_image_cs(cmd_buffer, &surf,
+								 &internal_clear_value.color);
 				} else {
-					subpass_desc.pDepthStencilAttachment = &att_ref;
+					radv_clear_image_layer(cmd_buffer, image, image_layout,
+							       range, format, l, s, &internal_clear_value);
 				}
-
-				VkRenderPass pass;
-				radv_CreateRenderPass(device_h,
-						      &(VkRenderPassCreateInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-								      .attachmentCount = 1,
-								      .pAttachments = &att_desc,
-								      .subpassCount = 1,
-								      .pSubpasses = &subpass_desc,
-								      },
-						      &cmd_buffer->pool->alloc,
-						      &pass);
-
-				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-							      &(VkRenderPassBeginInfo) {
-								      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-									      .renderArea = {
-									      .offset = { 0, 0, },
-									      .extent = {
-										      .width = iview.extent.width,
-										      .height = iview.extent.height,
-									      },
-								      },
-									      .renderPass = pass,
-										       .framebuffer = fb,
-										       .clearValueCount = 0,
-										       .pClearValues = NULL,
-										       },
-							      VK_SUBPASS_CONTENTS_INLINE);
-
-				VkClearAttachment clear_att = {
-					.aspectMask = range->aspectMask,
-					.colorAttachment = 0,
-					.clearValue = internal_clear_value,
-				};
-
-				VkClearRect clear_rect = {
-					.rect = {
-						.offset = { 0, 0 },
-						.extent = { iview.extent.width, iview.extent.height },
-					},
-					.baseArrayLayer = range->baseArrayLayer,
-					.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
-				};
-
-				emit_clear(cmd_buffer, &clear_att, &clear_rect);
-
-				radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
-				radv_DestroyRenderPass(device_h, pass,
-							     &cmd_buffer->pool->alloc);
-				radv_DestroyFramebuffer(device_h, fb,
-							      &cmd_buffer->pool->alloc);
 			}
 		}
 	}
 }

+union meta_saved_state {
+	struct radv_meta_saved_state gfx;
+	struct radv_meta_saved_compute_state compute;
+};
+
 void radv_CmdClearColorImage(
 	VkCommandBuffer                             commandBuffer,
 	VkImage                                     image_h,
@@ -1132,15 +1197,22 @@ void radv_CmdClearColorImage(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_image, image, image_h);
-	struct radv_meta_saved_state saved_state;
+	union meta_saved_state saved_state;
+	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_begin_cleari(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);

 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
 			     (const VkClearValue *) pColor,
-			     rangeCount, pRanges);
+			     rangeCount, pRanges, cs);

-	radv_meta_restore(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_end_cleari(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_restore(&saved_state.gfx, cmd_buffer);
 }

 void radv_CmdClearDepthStencilImage(
@@ -1159,7 +1231,7 @@ void radv_CmdClearDepthStencilImage(

 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
 			     (const VkClearValue *) pDepthStencil,
-			     rangeCount, pRanges);
+			     rangeCount, pRanges, false);

 	radv_meta_restore(&saved_state, cmd_buffer);
 }
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -78,13 +78,13 @@ vk_format_for_size(int bs)
 }

 static struct radv_meta_blit2d_surf
-blit_surf_for_image_level_layer(struct radv_image* image, VkImageAspectFlags aspectMask,
-				int level, int layer)
+blit_surf_for_image_level_layer(struct radv_image *image,
+				const VkImageSubresourceLayers *subres)
 {
 	VkFormat format = image->vk_format;
-	if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+	if (subres->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
 		format = vk_format_depth_only(format);
-	else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+	else if (subres->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
 		format = vk_format_stencil_only(format);

 	if (!image->surface.dcc_size)
@@ -93,13 +93,18 @@ blit_surf_for_image_level_layer(struct radv_image* image, VkImageAspectFlags asp
 	return (struct radv_meta_blit2d_surf) {
 		.format = format,
 		.bs = vk_format_get_blocksize(format),
-		.level = level,
-		.layer = layer,
+		.level = subres->mipLevel,
+		.layer = subres->baseArrayLayer,
 		.image = image,
-		.aspect_mask = aspectMask,
+		.aspect_mask = subres->aspectMask,
 	};
 }

+union meta_saved_state {
+	struct radv_meta_saved_state gfx;
+	struct radv_meta_saved_compute_state compute;
+};
+
 static void
 meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
                          struct radv_buffer* buffer,
@@ -107,14 +112,18 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
                          uint32_t regionCount,
                          const VkBufferImageCopy* pRegions)
 {
-	struct radv_meta_saved_state saved_state;
+	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+	union meta_saved_state saved_state;

 	/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
 	 * VK_SAMPLE_COUNT_1_BIT."
 	 */
 	assert(image->samples == 1);

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {

@@ -150,9 +159,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf img_bsurf =
 			blit_surf_for_image_level_layer(image,
-							pRegions[r].imageSubresource.aspectMask,
-							pRegions[r].imageSubresource.mipLevel,
-							pRegions[r].imageSubresource.baseArrayLayer);
+							&pRegions[r].imageSubresource);

 		struct radv_meta_blit2d_buffer buf_bsurf = {
 			.bs = img_bsurf.bs,
@@ -174,7 +181,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,


 			/* Perform Blit */
-			radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
+			if (cs)
+				radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
+			else
+				radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);

 			/* Once we've done the blit, all of the actual information about
 			 * the image is embedded in the command buffer so we can just
@@ -190,7 +200,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 				slice_array++;
 		}
 	}
-	radv_meta_restore(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_end_bufimage(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_restore(&saved_state.gfx, cmd_buffer);
 }

 void radv_CmdCopyBufferToImage(
@@ -253,9 +266,8 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf img_info =
 			blit_surf_for_image_level_layer(image,
-							pRegions[r].imageSubresource.aspectMask,
-							pRegions[r].imageSubresource.mipLevel,
-							pRegions[r].imageSubresource.baseArrayLayer);
+							&pRegions[r].imageSubresource);
+
 		struct radv_meta_blit2d_buffer buf_info = {
 			.bs = img_info.bs,
 			.format = img_info.format,
@@ -306,19 +318,15 @@ void radv_CmdCopyImageToBuffer(
 				  regionCount, pRegions);
 }

-void radv_CmdCopyImage(
-	VkCommandBuffer                             commandBuffer,
-	VkImage                                     srcImage,
-	VkImageLayout                               srcImageLayout,
-	VkImage                                     destImage,
-	VkImageLayout                               destImageLayout,
-	uint32_t                                    regionCount,
-	const VkImageCopy*                          pRegions)
+static void
+meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
+		struct radv_image *src_image,
+		struct radv_image *dest_image,
+		uint32_t regionCount,
+		const VkImageCopy *pRegions)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, src_image, srcImage);
-	RADV_FROM_HANDLE(radv_image, dest_image, destImage);
-	struct radv_meta_saved_state saved_state;
+	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+	union meta_saved_state saved_state;

 	/* From the Vulkan 1.0 spec:
 	 *
@@ -326,8 +334,10 @@ void radv_CmdCopyImage(
 	 *    images, but both images must have the same number of samples.
 	 */
 	assert(src_image->samples == dest_image->samples);
-
-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_begin_itoi(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {
 		assert(pRegions[r].srcSubresource.aspectMask ==
@@ -336,14 +346,11 @@ void radv_CmdCopyImage(
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf b_src =
 			blit_surf_for_image_level_layer(src_image,
-							pRegions[r].srcSubresource.aspectMask,
-							pRegions[r].srcSubresource.mipLevel,
-							pRegions[r].srcSubresource.baseArrayLayer);
+							&pRegions[r].srcSubresource);
+
 		struct radv_meta_blit2d_surf b_dst =
 			blit_surf_for_image_level_layer(dest_image,
-							pRegions[r].dstSubresource.aspectMask,
-							pRegions[r].dstSubresource.mipLevel,
-							pRegions[r].dstSubresource.baseArrayLayer);
+							&pRegions[r].dstSubresource);

 		/* for DCC */
 		b_src.format = b_dst.format;
@@ -384,7 +391,10 @@ void radv_CmdCopyImage(
 			rect.src_y = src_offset_el.y;

 			/* Perform Blit */
-			radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
+			if (cs)
+				radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
+			else
+				radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);

 			b_src.layer++;
 			b_dst.layer++;
@@ -395,5 +405,25 @@ void radv_CmdCopyImage(
 		}
 	}

-	radv_meta_restore(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_end_itoi(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_restore(&saved_state.gfx, cmd_buffer);
+}
+
+void radv_CmdCopyImage(
+	VkCommandBuffer                             commandBuffer,
+	VkImage                                     srcImage,
+	VkImageLayout                               srcImageLayout,
+	VkImage                                     destImage,
+	VkImageLayout                               destImageLayout,
+	uint32_t                                    regionCount,
+	const VkImageCopy*                          pRegions)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	RADV_FROM_HANDLE(radv_image, src_image, srcImage);
+	RADV_FROM_HANDLE(radv_image, dest_image, destImage);
+
+	meta_copy_image(cmd_buffer, src_image, dest_image,
+			regionCount, pRegions);
 }
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -46,7 +46,7 @@ build_nir_vs(void)
 	nir_variable *v_position;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");

 	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 					 "a_position");
@@ -68,8 +68,8 @@ build_nir_fs(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_asprintf(b.shader,
-					      "meta_depth_decomp_noop_fs");
+	b.shader->info->name = ralloc_asprintf(b.shader,
+					       "meta_depth_decomp_noop_fs");

 	return b.shader;
 }
@@ -382,7 +382,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,

 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

-	for (uint32_t layer = 0; layer < subresourceRange->layerCount; layer++) {
+	for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
 		struct radv_image_view iview;

 		radv_image_view_init(&iview, cmd_buffer->device,
@@ -450,6 +450,7 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageSubresourceRange *subresourceRange)
 {
+	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
 					 cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline);
 }
@@ -458,6 +459,7 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageSubresourceRange *subresourceRange)
 {
+	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
 					 cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline);
 }
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -46,7 +46,7 @@ build_nir_vs(void)
 	nir_variable *v_position;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_fast_clear_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_fast_clear_vs");

 	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 					 "a_position");
@@ -68,7 +68,7 @@ build_nir_fs(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_asprintf(b.shader,
+	b.shader->info->name = ralloc_asprintf(b.shader,
 					      "meta_fast_clear_noop_fs");

 	return b.shader;
@@ -419,6 +419,7 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);

+	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 	radv_meta_save_pass(&saved_pass_state, cmd_buffer);
 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -33,7 +33,6 @@
 */
 struct vertex_attrs {
 	float position[2]; /**< 3DPRIM_RECTLIST */
-	float tex_position[2];
 };

 /* passthrough vertex shader */
@@ -45,11 +44,9 @@ build_nir_vs(void)
 	nir_builder b;
 	nir_variable *a_position;
 	nir_variable *v_position;
-	nir_variable *a_tex_position;
-	nir_variable *v_tex_position;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_resolve_vs");

 	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 					 "a_position");
@@ -59,16 +56,7 @@ build_nir_vs(void)
 					 "gl_Position");
 	v_position->data.location = VARYING_SLOT_POS;

-	a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-					     "a_tex_position");
-	a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
-
-	v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-					     "v_tex_position");
-	v_tex_position->data.location = VARYING_SLOT_VAR0;
-
 	nir_copy_var(&b, v_position, a_position);
-	nir_copy_var(&b, v_tex_position, a_tex_position);

 	return b.shader;
 }
@@ -79,22 +67,16 @@ build_nir_fs(void)
 {
 	const struct glsl_type *vec4 = glsl_vec4_type();
 	nir_builder b;
-	nir_variable *v_tex_position; /* vec4, varying texture coordinate */
 	nir_variable *f_color; /* vec4, fragment output color */

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_asprintf(b.shader,
-					      "meta_resolve_fs");
-
-	v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-					     "v_tex_position");
-	v_tex_position->data.location = VARYING_SLOT_VAR0;
+	b.shader->info->name = ralloc_asprintf(b.shader,
+					       "meta_resolve_fs");

 	f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
 				      "f_color");
 	f_color->data.location = FRAG_RESULT_DATA0;
-
-	nir_copy_var(&b, f_color, v_tex_position);
+	nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);

 	return b.shader;
 }
@@ -198,7 +180,7 @@ create_pipeline(struct radv_device *device,
 								       .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
 							       },
 						       },
-						       .vertexAttributeDescriptionCount = 2,
+						       .vertexAttributeDescriptionCount = 1,
 						       .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
 							       {
 								       /* Position */
@@ -207,13 +189,6 @@ create_pipeline(struct radv_device *device,
 								       .format = VK_FORMAT_R32G32_SFLOAT,
 								       .offset = offsetof(struct vertex_attrs, position),
 							       },
-							       {
-								       /* Texture Coordinate */
-								       .location = 1,
-								       .binding = 0,
-								       .format = VK_FORMAT_R32G32_SFLOAT,
-								       .offset = offsetof(struct vertex_attrs, tex_position),
-							       },
 						       },
 					       },
 					       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
@@ -333,7 +308,6 @@ cleanup:

 static void
 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
-             const VkOffset2D *src_offset,
             const VkOffset2D *dest_offset,
             const VkExtent2D *resolve_extent)
 {
@@ -346,30 +320,18 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 				dest_offset->x,
 				dest_offset->y,
 			},
-			.tex_position = {
-				src_offset->x,
-				src_offset->y,
-			},
 		},
 		{
 			.position = {
 				dest_offset->x,
 				dest_offset->y + resolve_extent->height,
 			},
-			.tex_position = {
-				src_offset->x,
-				src_offset->y + resolve_extent->height,
-			},
 		},
 		{
 			.position = {
 				dest_offset->x + resolve_extent->width,
 				dest_offset->y,
 			},
-			.tex_position = {
-				src_offset->x + resolve_extent->width,
-				src_offset->y,
-			},
 		},
 	};

@@ -505,8 +467,6 @@ void radv_CmdResolveImage(
 		 */
 		const struct VkExtent3D extent =
 			radv_sanitize_image_extent(src_image->type, region->extent);
-		const struct VkOffset3D srcOffset =
-			radv_sanitize_image_offset(src_image->type, region->srcOffset);
 		const struct VkOffset3D dstOffset =
 			radv_sanitize_image_offset(dest_image->type, region->dstOffset);

@@ -587,10 +547,6 @@ void radv_CmdResolveImage(
 						      VK_SUBPASS_CONTENTS_INLINE);

 			emit_resolve(cmd_buffer,
-				     &(VkOffset2D) {
-					     .x = srcOffset.x,
-					     .y = srcOffset.y,
-				     },
 				     &(VkOffset2D) {
 					     .x = dstOffset.x,
 					     .y = dstOffset.y,
@@ -662,7 +618,6 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 		 * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
 		 */
 		emit_resolve(cmd_buffer,
-			     &(VkOffset2D) { 0, 0 },
 			     &(VkOffset2D) { 0, 0 },
 			     &(VkExtent2D) { fb->width, fb->height });
 	}
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -47,10 +47,10 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
 							     GLSL_TYPE_FLOAT);
 	snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float");
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
-	b.shader->info.cs.local_size[0] = 16;
-	b.shader->info.cs.local_size[1] = 16;
-	b.shader->info.cs.local_size[2] = 1;
+	b.shader->info->name = ralloc_strdup(b.shader, name);
+	b.shader->info->cs.local_size[0] = 16;
+	b.shader->info->cs.local_size[1] = 16;
+	b.shader->info->cs.local_size[2] = 1;

 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
 						      sampler_type, "s_tex");
@@ -64,9 +64,9 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+						b.shader->info->cs.local_size[0],
+						b.shader->info->cs.local_size[1],
+						b.shader->info->cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

@@ -405,7 +405,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 						  .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 						  .pImageInfo = (VkDescriptorImageInfo[]) {
 							  {
-								  .sampler = NULL,
+								  .sampler = VK_NULL_HANDLE,
 								  .imageView = radv_image_view_to_handle(&src_iview),
 								  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 							  },
@@ -420,7 +420,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 						  .pImageInfo = (VkDescriptorImageInfo[]) {
 							  {
-								  .sampler = NULL,
+								  .sampler = VK_NULL_HANDLE,
 								  .imageView = radv_image_view_to_handle(&dest_iview),
 								  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 							  },
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -144,6 +144,7 @@ radv_optimize_nir(struct nir_shader *shader)
                NIR_PASS(progress, shader, nir_opt_algebraic);
                NIR_PASS(progress, shader, nir_opt_constant_folding);
                NIR_PASS(progress, shader, nir_opt_undef);
+                NIR_PASS(progress, shader, nir_opt_conditional_discard);
        } while (progress);
 }

@@ -187,24 +188,30 @@ radv_shader_compile_to_nir(struct radv_device *device,
 				assert(data + entry.size <= spec_info->pData + spec_info->dataSize);

 				spec_entries[i].id = spec_info->pMapEntries[i].constantID;
-				spec_entries[i].data = *(const uint32_t *)data;
+				if (spec_info->dataSize == 8)
+					spec_entries[i].data64 = *(const uint64_t *)data;
+				else
+					spec_entries[i].data32 = *(const uint32_t *)data;
 			}
 		}
-
+		const struct nir_spirv_supported_extensions supported_ext = {
+		};
 		entry_point = spirv_to_nir(spirv, module->size / 4,
 					   spec_entries, num_spec_entries,
-					   stage, entrypoint_name, &nir_options);
+					   stage, entrypoint_name, &supported_ext, &nir_options);
 		nir = entry_point->shader;
 		assert(nir->stage == stage);
 		nir_validate_shader(nir);

 		free(spec_entries);

-		nir_lower_returns(nir);
-		nir_validate_shader(nir);
-
-		nir_inline_functions(nir);
-		nir_validate_shader(nir);
+		/* We have to lower away local constant initializers right before we
+		 * inline functions.  That way they get properly initialized at the top
+		 * of the function and not at the top of its caller.
+		 */
+		NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+		NIR_PASS_V(nir, nir_lower_returns);
+		NIR_PASS_V(nir, nir_inline_functions);

 		/* Pick off the single entrypoint that we want */
 		foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
@@ -214,17 +221,18 @@ radv_shader_compile_to_nir(struct radv_device *device,
 		assert(exec_list_length(&nir->functions) == 1);
 		entry_point->name = ralloc_strdup(entry_point, "main");

-		nir_remove_dead_variables(nir, nir_var_shader_in);
-		nir_remove_dead_variables(nir, nir_var_shader_out);
-		nir_remove_dead_variables(nir, nir_var_system_value);
-		nir_validate_shader(nir);
+		NIR_PASS_V(nir, nir_remove_dead_variables,
+		           nir_var_shader_in | nir_var_shader_out | nir_var_system_value);

-		nir_lower_system_values(nir);
-		nir_validate_shader(nir);
+		/* Now that we've deleted all but the main function, we can go ahead and
+		 * lower the rest of the constant initializers.
+		 */
+		NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+		NIR_PASS_V(nir, nir_lower_system_values);
 	}

 	/* Vulkan uses the separate-shader linking model */
-	nir->info.separate_shader = true;
+	nir->info->separate_shader = true;

 	//   nir = brw_preprocess_nir(compiler, nir);

@@ -256,6 +264,81 @@ radv_shader_compile_to_nir(struct radv_device *device,
 	return nir;
 }

+static const char *radv_get_shader_name(struct radv_shader_variant *var,
+					gl_shader_stage stage)
+{
+	switch (stage) {
+	case MESA_SHADER_VERTEX: return "Vertex Shader as VS";
+	case MESA_SHADER_FRAGMENT: return "Pixel Shader";
+	case MESA_SHADER_COMPUTE: return "Compute Shader";
+	default:
+		return "Unknown shader";
+	};
+
+}
+static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+	unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
+	struct radv_shader_variant *var;
+	struct ac_shader_config *conf;
+	int i;
+	FILE *file = stderr;
+	unsigned max_simd_waves = 10;
+	unsigned lds_per_wave = 0;
+
+	for (i = 0; i < MESA_SHADER_STAGES; i++) {
+		if (!pipeline->shaders[i])
+			continue;
+		var = pipeline->shaders[i];
+
+		conf = &var->config;
+
+		if (i == MESA_SHADER_FRAGMENT) {
+			lds_per_wave = conf->lds_size * lds_increment +
+				align(var->info.fs.num_interp * 48, lds_increment);
+		}
+
+		if (conf->num_sgprs) {
+			if (device->physical_device->rad_info.chip_class >= VI)
+				max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+			else
+				max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+		}
+
+		if (conf->num_vgprs)
+			max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+		/* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+		 * that PS can use.
+		 */
+		if (lds_per_wave)
+			max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
+		fprintf(file, "\n%s:\n",
+			radv_get_shader_name(var, i));
+		if (i == MESA_SHADER_FRAGMENT) {
+			fprintf(file, "*** SHADER CONFIG ***\n"
+				"SPI_PS_INPUT_ADDR = 0x%04x\n"
+				"SPI_PS_INPUT_ENA  = 0x%04x\n",
+				conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+		}
+		fprintf(file, "*** SHADER STATS ***\n"
+			"SGPRS: %d\n"
+			"VGPRS: %d\n"
+		        "Spilled SGPRs: %d\n"
+			"Spilled VGPRs: %d\n"
+			"Code Size: %d bytes\n"
+			"LDS: %d blocks\n"
+			"Scratch: %d bytes per wave\n"
+			"Max Waves: %d\n"
+			"********************\n\n\n",
+			conf->num_sgprs, conf->num_vgprs,
+			conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
+			conf->lds_size, conf->scratch_bytes_per_wave,
+			max_simd_waves);
+	}
+}
+
 void radv_shader_variant_destroy(struct radv_device *device,
                                 struct radv_shader_variant *variant)
 {
@@ -266,42 +349,19 @@ void radv_shader_variant_destroy(struct radv_device *device,
 	free(variant);
 }

-static
-struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
-                                                       struct nir_shader *shader,
-                                                       struct radv_pipeline_layout *layout,
-                                                       const union ac_shader_variant_key *key,
-						       void** code_out,
-						       unsigned *code_size_out,
-						       bool dump)
+static void radv_fill_shader_variant(struct radv_device *device,
+				     struct radv_shader_variant *variant,
+				     struct ac_shader_binary *binary,
+				     gl_shader_stage stage)
 {
-	struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
-	enum radeon_family chip_family = device->instance->physicalDevice.rad_info.family;
-	LLVMTargetMachineRef tm;
-	if (!variant)
-		return NULL;
-
-	struct ac_nir_compiler_options options = {0};
-	options.layout = layout;
-	if (key)
-		options.key = *key;
-
-	struct ac_shader_binary binary;
-
-	options.unsafe_math = env_var_as_boolean("RADV_UNSAFE_MATH", false);
-	options.family = chip_family;
-	options.chip_class = device->instance->physicalDevice.rad_info.chip_class;
-	tm = ac_create_target_machine(chip_family);
-	ac_compile_nir_shader(tm, &binary, &variant->config,
-			      &variant->info, shader, &options, dump);
-	LLVMDisposeTargetMachine(tm);
-
+	variant->code_size = binary->code_size;
 	bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
 	unsigned vgpr_comp_cnt = 0;

 	if (scratch_enabled)
 		radv_finishme("shader scratch space");
-	switch (shader->stage) {
+
+	switch (stage) {
 	case MESA_SHADER_VERTEX:
 		variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
 			S_00B12C_SCRATCH_EN(scratch_enabled);
@@ -330,13 +390,47 @@ struct radv_shader_variant *radv_shader_variant_create(struct radv_device *devic
 		S_00B848_DX10_CLAMP(1) |
 		S_00B848_FLOAT_MODE(variant->config.float_mode);

-	variant->bo = device->ws->buffer_create(device->ws, binary.code_size, 256,
+	variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 256,
 						RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);

 	void *ptr = device->ws->buffer_map(variant->bo);
-	memcpy(ptr, binary.code, binary.code_size);
+	memcpy(ptr, binary->code, binary->code_size);
 	device->ws->buffer_unmap(variant->bo);

+
+}
+
+static struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
+							      struct nir_shader *shader,
+							      struct radv_pipeline_layout *layout,
+							      const union ac_shader_variant_key *key,
+							      void** code_out,
+							      unsigned *code_size_out,
+							      bool dump)
+{
+	struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
+	enum radeon_family chip_family = device->physical_device->rad_info.family;
+	LLVMTargetMachineRef tm;
+	if (!variant)
+		return NULL;
+
+	struct ac_nir_compiler_options options = {0};
+	options.layout = layout;
+	if (key)
+		options.key = *key;
+
+	struct ac_shader_binary binary;
+
+	options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
+	options.family = chip_family;
+	options.chip_class = device->physical_device->rad_info.chip_class;
+	tm = ac_create_target_machine(chip_family);
+	ac_compile_nir_shader(tm, &binary, &variant->config,
+			      &variant->info, shader, &options, dump);
+	LLVMDisposeTargetMachine(tm);
+
+	radv_fill_shader_variant(device, variant, &binary, shader->stage);
+
 	if (code_out) {
 		*code_out = binary.code;
 		*code_size_out = binary.code_size;
@@ -360,18 +454,18 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
 		      gl_shader_stage stage,
 		      const VkSpecializationInfo *spec_info,
 		      struct radv_pipeline_layout *layout,
-		      const union ac_shader_variant_key *key,
-		      bool dump)
+		      const union ac_shader_variant_key *key)
 {
 	unsigned char sha1[20];
 	struct radv_shader_variant *variant;
 	nir_shader *nir;
 	void *code = NULL;
 	unsigned code_size = 0;
+	bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);

 	if (module->nir)
-		_mesa_sha1_compute(module->nir->info.name,
-				   strlen(module->nir->info.name),
+		_mesa_sha1_compute(module->nir->info->name,
+				   strlen(module->nir->info->name),
 				   module->sha1);

 	radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key);
@@ -642,7 +736,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 					const VkGraphicsPipelineCreateInfo *pCreateInfo,
 					uint32_t blend_enable,
 					uint32_t blend_need_alpha,
-					bool single_cb_enable)
+					bool single_cb_enable,
+					bool blend_mrt0_is_dual_src)
 {
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
@@ -664,6 +759,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,

 	blend->cb_shader_mask = si_get_cb_shader_mask(col_format);

+	if (blend_mrt0_is_dual_src)
+		col_format |= (col_format & 0xf) << 4;
 	if (!col_format)
 		col_format |= V_028714_SPI_SHADER_32_R;
 	blend->spi_shader_col_format = col_format;
@@ -715,8 +812,13 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	unsigned mode = V_028808_CB_NORMAL;
 	uint32_t blend_enable = 0, blend_need_alpha = 0;
+	bool blend_mrt0_is_dual_src = false;
 	int i;
 	bool single_cb_enable = false;
+
+	if (!vkblend)
+		return;
+
 	if (extra && extra->custom_blend_mode) {
 		single_cb_enable = true;
 		mode = extra->custom_blend_mode;
@@ -755,7 +857,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		}

 		if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
-			radv_finishme("dual source blending");
+			if (i == 0)
+				blend_mrt0_is_dual_src = true;
+
 		if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
 			srcRGB = VK_BLEND_FACTOR_ONE;
 			dstRGB = VK_BLEND_FACTOR_ONE;
@@ -797,7 +901,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		blend->cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);

 	radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo,
-						blend_enable, blend_need_alpha, single_cb_enable);
+						blend_enable, blend_need_alpha, single_cb_enable, blend_mrt0_is_dual_src);
 }

 static uint32_t si_translate_stencil_op(enum VkStencilOp op)
@@ -930,11 +1034,16 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
 	const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	struct radv_multisample_state *ms = &pipeline->graphics.ms;
-	unsigned num_tile_pipes = pipeline->device->instance->physicalDevice.rad_info.num_tile_pipes;
+	unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
 	int ps_iter_samples = 1;
 	uint32_t mask = 0xffff;

 	ms->num_samples = vkms->rasterizationSamples;
+
+	if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.force_persample) {
+		ps_iter_samples = vkms->rasterizationSamples;
+	}
+
 	ms->pa_sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
 	ms->pa_sc_aa_config = 0;
 	ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
@@ -1069,18 +1178,27 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,

 	struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;

-	dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
-	if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
-		typed_memcpy(dynamic->viewport.viewports,
-			     pCreateInfo->pViewportState->pViewports,
-			     pCreateInfo->pViewportState->viewportCount);
-	}
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pViewportState is [...] NULL if the pipeline
+	 *    has rasterization disabled.
+	 */
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
+		assert(pCreateInfo->pViewportState);

-	dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
-	if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
-		typed_memcpy(dynamic->scissor.scissors,
-			     pCreateInfo->pViewportState->pScissors,
-			     pCreateInfo->pViewportState->scissorCount);
+		dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+		if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+			typed_memcpy(dynamic->viewport.viewports,
+				     pCreateInfo->pViewportState->pViewports,
+				     pCreateInfo->pViewportState->viewportCount);
+		}
+
+		dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+		if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+			typed_memcpy(dynamic->scissor.scissors,
+				     pCreateInfo->pViewportState->pScissors,
+				     pCreateInfo->pViewportState->scissorCount);
+		}
 	}

 	if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
@@ -1098,7 +1216,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 			pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
 	}

-	if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pColorBlendState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is
+	 *    created against does not use any color attachments.
+	 */
+	bool uses_color_att = false;
+	for (unsigned i = 0; i < subpass->color_count; ++i) {
+		if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
+			uses_color_att = true;
+			break;
+		}
+	}
+
+	if (uses_color_att && states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
 		assert(pCreateInfo->pColorBlendState);
 		typed_memcpy(dynamic->blend_constants,
 			     pCreateInfo->pColorBlendState->blendConstants, 4);
@@ -1110,14 +1242,17 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 	 * no need to override the depthstencil defaults in
 	 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
 	 *
-	 * From the Vulkan spec (20 Oct 2015, git-aa308cb):
+	 * Section 9.2 of the Vulkan 1.0.15 spec says:
 	 *
-	 *    pDepthStencilState [...] may only be NULL if renderPass and subpass
-	 *    specify a subpass that has no depth/stencil attachment.
+	 *    pDepthStencilState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is created
+	 *    against does not use a depth/stencil attachment.
 	 */
-	if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
+	    subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+		assert(pCreateInfo->pDepthStencilState);
+
 		if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->depth_bounds.min =
 				pCreateInfo->pDepthStencilState->minDepthBounds;
 			dynamic->depth_bounds.max =
@@ -1125,7 +1260,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_compare_mask.front =
 				pCreateInfo->pDepthStencilState->front.compareMask;
 			dynamic->stencil_compare_mask.back =
@@ -1133,7 +1267,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_write_mask.front =
 				pCreateInfo->pDepthStencilState->front.writeMask;
 			dynamic->stencil_write_mask.back =
@@ -1141,7 +1274,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_reference.front =
 				pCreateInfo->pDepthStencilState->front.reference;
 			dynamic->stencil_reference.back =
@@ -1181,7 +1313,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 {
 	struct radv_shader_module fs_m = {0};

-	bool dump = getenv("RADV_DUMP_SHADERS");
 	if (alloc == NULL)
 		alloc = &device->alloc;

@@ -1208,7 +1339,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 					       pStages[MESA_SHADER_VERTEX]->pName,
 					       MESA_SHADER_VERTEX,
 					       pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
-					       pipeline->layout, &key, dump);
+					       pipeline->layout, &key);

 		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
 	}
@@ -1216,7 +1347,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 	if (!modules[MESA_SHADER_FRAGMENT]) {
 		nir_builder fs_b;
 		nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
-		fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
+		fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "noop_fs");
 		fs_m.nir = fs_b.shader;
 		modules[MESA_SHADER_FRAGMENT] = &fs_m;
 	}
@@ -1233,7 +1364,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 					       stage ? stage->pName : "main",
 					       MESA_SHADER_FRAGMENT,
 					       stage ? stage->pSpecializationInfo : NULL,
-					       pipeline->layout, &key, dump);
+					       pipeline->layout, &key);
 		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
 	}

@@ -1285,6 +1416,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 		pipeline->binding_stride[desc->binding] = desc->stride;
 	}

+	if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
+		radv_dump_pipeline_stats(device, pipeline);
+	}
+
 	return VK_SUCCESS;
 }

@@ -1332,20 +1467,18 @@ VkResult radv_CreateGraphicsPipelines(
 	unsigned i = 0;

 	for (; i < count; i++) {
-		result = radv_graphics_pipeline_create(_device,
-						       pipelineCache,
-						       &pCreateInfos[i],
-						       NULL, pAllocator, &pPipelines[i]);
-		if (result != VK_SUCCESS) {
-			for (unsigned j = 0; j < i; j++) {
-				radv_DestroyPipeline(_device, pPipelines[j], pAllocator);
-			}
-
-			return result;
+		VkResult r;
+		r = radv_graphics_pipeline_create(_device,
+						  pipelineCache,
+						  &pCreateInfos[i],
+						  NULL, pAllocator, &pPipelines[i]);
+		if (r != VK_SUCCESS) {
+			result = r;
+			pPipelines[i] = VK_NULL_HANDLE;
 		}
 	}

-	return VK_SUCCESS;
+	return result;
 }

 static VkResult radv_compute_pipeline_create(
@@ -1359,7 +1492,6 @@ static VkResult radv_compute_pipeline_create(
 	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
 	RADV_FROM_HANDLE(radv_shader_module, module, pCreateInfo->stage.module);
 	struct radv_pipeline *pipeline;
-	bool dump = getenv("RADV_DUMP_SHADERS");

 	pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
 			       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -1375,9 +1507,13 @@ static VkResult radv_compute_pipeline_create(
 				       pCreateInfo->stage.pName,
 				       MESA_SHADER_COMPUTE,
 				       pCreateInfo->stage.pSpecializationInfo,
-				       pipeline->layout, NULL, dump);
+				       pipeline->layout, NULL);

 	*pPipeline = radv_pipeline_to_handle(pipeline);
+
+	if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
+		radv_dump_pipeline_stats(device, pipeline);
+	}
 	return VK_SUCCESS;
 }
 VkResult radv_CreateComputePipelines(
@@ -1392,17 +1528,15 @@ VkResult radv_CreateComputePipelines(

 	unsigned i = 0;
 	for (; i < count; i++) {
-		result = radv_compute_pipeline_create(_device, pipelineCache,
-						      &pCreateInfos[i],
-						      pAllocator, &pPipelines[i]);
-		if (result != VK_SUCCESS) {
-			for (unsigned j = 0; j < i; j++) {
-				radv_DestroyPipeline(_device, pPipelines[j], pAllocator);
-			}
-
-			return result;
+		VkResult r;
+		r = radv_compute_pipeline_create(_device, pipelineCache,
+						 &pCreateInfos[i],
+						 pAllocator, &pPipelines[i]);
+		if (r != VK_SUCCESS) {
+			result = r;
+			pPipelines[i] = VK_NULL_HANDLE;
 		}
 	}

-	return VK_SUCCESS;
+	return result;
 }
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -57,7 +57,7 @@ radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
 	/* We don't consider allocation failure fatal, we just start with a 0-sized
 	 * cache. */
 	if (cache->hash_table == NULL ||
-	    !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true))
+	    (device->debug_flags & RADV_DEBUG_NO_CACHE))
 		cache->table_size = 0;
 	else
 		memset(cache->hash_table, 0, byte_size);
@@ -309,7 +309,6 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
 {
 	struct radv_device *device = cache->device;
 	struct cache_header header;
-	uint8_t uuid[VK_UUID_SIZE];

 	if (size < sizeof(header))
 		return;
@@ -320,10 +319,9 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
 		return;
 	if (header.vendor_id != 0x1002)
 		return;
-	if (header.device_id != device->instance->physicalDevice.rad_info.pci_id)
+	if (header.device_id != device->physical_device->rad_info.pci_id)
 		return;
-	radv_device_get_cache_uuid(uuid);
-	if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
+	if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0)
 		return;

 	char *end = (void *) data + size;
@@ -421,8 +419,8 @@ VkResult radv_GetPipelineCacheData(
 	header->header_size = sizeof(*header);
 	header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 	header->vendor_id = 0x1002;
-	header->device_id = device->instance->physicalDevice.rad_info.pci_id;
-	radv_device_get_cache_uuid(header->uuid);
+	header->device_id = device->physical_device->rad_info.pci_id;
+	memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
 	p += header->header_size;

 	struct cache_entry *entry;
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -79,13 +79,39 @@ typedef uint32_t xcb_window_t;
 #define MAX_SCISSORS    16
 #define MAX_PUSH_CONSTANTS_SIZE 128
 #define MAX_DYNAMIC_BUFFERS 16
-#define MAX_IMAGES 8
-#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */
+#define MAX_SAMPLES_LOG2 4
 #define NUM_META_FS_KEYS 11
+#define RADV_MAX_DRM_DEVICES 8

 #define NUM_DEPTH_CLEAR_PIPELINES 3

-#define radv_noreturn __attribute__((__noreturn__))
+enum radv_mem_heap {
+	RADV_MEM_HEAP_VRAM,
+	RADV_MEM_HEAP_VRAM_CPU_ACCESS,
+	RADV_MEM_HEAP_GTT,
+	RADV_MEM_HEAP_COUNT
+};
+
+enum radv_mem_type {
+	RADV_MEM_TYPE_VRAM,
+	RADV_MEM_TYPE_GTT_WRITE_COMBINE,
+	RADV_MEM_TYPE_VRAM_CPU_ACCESS,
+	RADV_MEM_TYPE_GTT_CACHED,
+	RADV_MEM_TYPE_COUNT
+};
+
+
+enum {
+	RADV_DEBUG_FAST_CLEARS       =   0x1,
+	RADV_DEBUG_NO_DCC            =   0x2,
+	RADV_DEBUG_DUMP_SHADERS      =   0x4,
+	RADV_DEBUG_NO_CACHE          =   0x8,
+	RADV_DEBUG_DUMP_SHADER_STATS =  0x10,
+	RADV_DEBUG_NO_HIZ            =  0x20,
+	RADV_DEBUG_NO_COMPUTE_QUEUE  =  0x40,
+	RADV_DEBUG_UNSAFE_MATH       =  0x80,
+};
+
 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))

 static inline uint32_t
@@ -173,20 +199,12 @@ radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
 	     __dword &= ~(1 << (b)))

 #define typed_memcpy(dest, src, count) ({				\
-			static_assert(sizeof(*src) == sizeof(*dest), ""); \
+			STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
 			memcpy((dest), (src), (count) * sizeof(*(src))); \
 		})

 #define zero(x) (memset(&(x), 0, sizeof(x)))

-/* Define no kernel as 1, since that's an illegal offset for a kernel */
-#define NO_KERNEL 1
-
-struct radv_common {
-	VkStructureType                             sType;
-	const void*                                 pNext;
-};
-
 /* Whenever we generate an error, pass it through this function. Useful for
 * debugging, where we can break on it. Only call at error site, not when
 * propagating errors. Might be useful to plug in a stack trace here.
@@ -211,7 +229,13 @@ void radv_loge_v(const char *format, va_list va);
 * Print a FINISHME message, including its source location.
 */
 #define radv_finishme(format, ...)					\
-	__radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__);
+	do { \
+		static bool reported = false; \
+		if (!reported) { \
+			__radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
+			reported = true; \
+		} \
+	} while (0)

 /* A non-fatal assert.  Useful for debugging. */
 #ifdef DEBUG
@@ -223,9 +247,6 @@ void radv_loge_v(const char *format, va_list va);
 #define radv_assert(x)
 #endif

-void radv_abortf(const char *format, ...) radv_noreturn radv_printflike(1, 2);
-void radv_abortfv(const char *format, va_list va) radv_noreturn;
-
 #define stub_return(v)					\
 	do {						\
 		radv_finishme("stub %s", __func__);	\
@@ -243,6 +264,11 @@ void *radv_lookup_entrypoint(const char *name);

 extern struct radv_dispatch_table dtable;

+struct radv_extensions {
+	VkExtensionProperties       *ext_array;
+	uint32_t                    num_ext;
+};
+
 struct radv_physical_device {
 	VK_LOADER_DATA                              _loader_data;

@@ -250,15 +276,12 @@ struct radv_physical_device {

 	struct radeon_winsys *ws;
 	struct radeon_info rad_info;
-	uint32_t                                    chipset_id;
 	char                                        path[20];
 	const char *                                name;
-	uint64_t                                    aperture_size;
-	int                                         cmd_parser_version;
-	uint32_t                    pci_vendor_id;
-	uint32_t                    pci_device_id;
+	uint8_t                                     uuid[VK_UUID_SIZE];

 	struct wsi_device                       wsi_device;
+	struct radv_extensions                      extensions;
 };

 struct radv_instance {
@@ -268,7 +291,9 @@ struct radv_instance {

 	uint32_t                                    apiVersion;
 	int                                         physicalDeviceCount;
-	struct radv_physical_device                  physicalDevice;
+	struct radv_physical_device                 physicalDevices[RADV_MAX_DRM_DEVICES];
+
+	uint64_t debug_flags;
 };

 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -324,11 +349,9 @@ struct radv_meta_state {
 		VkRenderPass render_pass[NUM_META_FS_KEYS];
 		struct radv_pipeline *color_pipelines[NUM_META_FS_KEYS];

-		VkRenderPass depth_only_rp[NUM_DEPTH_CLEAR_PIPELINES];
+		VkRenderPass depthstencil_rp;
 		struct radv_pipeline *depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkRenderPass stencil_only_rp[NUM_DEPTH_CLEAR_PIPELINES];
 		struct radv_pipeline *stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkRenderPass depthstencil_rp[NUM_DEPTH_CLEAR_PIPELINES];
 		struct radv_pipeline *depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
 	} clear[1 + MAX_SAMPLES_LOG2];

@@ -382,6 +405,16 @@ struct radv_meta_state {
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
 	} btoi;
+	struct {
+		VkPipelineLayout                          img_p_layout;
+		VkDescriptorSetLayout                     img_ds_layout;
+		VkPipeline pipeline;
+	} itoi;
+	struct {
+		VkPipelineLayout                          img_p_layout;
+		VkDescriptorSetLayout                     img_ds_layout;
+		VkPipeline pipeline;
+	} cleari;

 	struct {
 		VkPipeline                                pipeline;
@@ -419,12 +452,21 @@ struct radv_meta_state {
 	} buffer;
 };

+/* queue types */
+#define RADV_QUEUE_GENERAL 0
+#define RADV_QUEUE_COMPUTE 1
+#define RADV_QUEUE_TRANSFER 2
+
+#define RADV_MAX_QUEUE_FAMILIES 3
+
+enum ring_type radv_queue_family_to_ring(int f);
+
 struct radv_queue {
 	VK_LOADER_DATA                              _loader_data;
-
 	struct radv_device *                         device;
-
-	struct radv_state_pool *                     pool;
+	struct radeon_winsys_ctx                    *hw_ctx;
+	int queue_family_index;
+	int queue_idx;
 };

 struct radv_device {
@@ -434,14 +476,14 @@ struct radv_device {

 	struct radv_instance *                       instance;
 	struct radeon_winsys *ws;
-	struct radeon_winsys_ctx *hw_ctx;

 	struct radv_meta_state                       meta_state;
-	struct radv_queue                            queue;
-	struct radeon_winsys_cs *empty_cs;

-	bool allow_fast_clears;
-	bool allow_dcc;
+	struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
+	int queue_count[RADV_MAX_QUEUE_FAMILIES];
+	struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
+
+	uint64_t debug_flags;

 	/* MSAA sample locations.
 	 * The first index is the sample index.
@@ -451,9 +493,12 @@ struct radv_device {
 	float sample_locations_4x[4][2];
 	float sample_locations_8x[8][2];
 	float sample_locations_16x[16][2];
-};

-void radv_device_get_cache_uuid(void *uuid);
+	struct radeon_winsys_bo                      *trace_bo;
+	uint32_t                                     *trace_id_ptr;
+
+	struct radv_physical_device                  *physical_device;
+};

 struct radv_device_memory {
 	struct radeon_winsys_bo                      *bo;
@@ -642,10 +687,14 @@ struct radv_cmd_state {
 	enum radv_cmd_flush_bits                     flush_bits;
 	unsigned                                     active_occlusion_queries;
 	float					     offset_scale;
+	uint32_t                                      descriptors_dirty;
+	uint32_t                                      trace_id;
 };
+
 struct radv_cmd_pool {
 	VkAllocationCallbacks                        alloc;
 	struct list_head                             cmd_buffers;
+	uint32_t queue_family_index;
 };

 struct radv_cmd_buffer_upload {
@@ -668,6 +717,7 @@ struct radv_cmd_buffer {
 	VkCommandBufferLevel                         level;
 	struct radeon_winsys_cs *cs;
 	struct radv_cmd_state state;
+	uint32_t queue_family_index;

 	uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
 	uint32_t dynamic_buffers[16 * MAX_DYNAMIC_BUFFERS];
@@ -680,6 +730,10 @@ struct radv_cmd_buffer {

 struct radv_image;

+bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
+
+void si_init_compute(struct radv_physical_device *physical_device,
+		     struct radv_cmd_buffer *cmd_buffer);
 void si_init_config(struct radv_physical_device *physical_device,
 		    struct radv_cmd_buffer *cmd_buffer);
 void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
@@ -729,6 +783,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
 		      struct radeon_winsys_bo *bo,
 		      uint64_t offset, uint64_t size, uint32_t value);
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);

 /*
 * Takes x,y,z as exact numbers of invocations, instead of blocks.
@@ -794,6 +849,7 @@ struct radv_shader_variant {
 	struct ac_shader_variant_info info;
 	unsigned rsrc1;
 	unsigned rsrc2;
+	uint32_t code_size;
 };

 struct radv_depth_stencil_state {
@@ -933,10 +989,6 @@ struct radv_cmask_info {
 	uint64_t offset;
 	uint64_t size;
 	unsigned alignment;
-	unsigned pitch;
-	unsigned height;
-	unsigned xalign;
-	unsigned yalign;
 	unsigned slice_tile_max;
 	unsigned base_address_reg;
 };
@@ -967,6 +1019,9 @@ struct radv_image {
 	VkDeviceSize size;
 	uint32_t alignment;

+	bool exclusive;
+	unsigned queue_family_mask;
+
 	/* Set when bound */
 	struct radeon_winsys_bo *bo;
 	VkDeviceSize offset;
@@ -987,8 +1042,13 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image,
                                     VkImageLayout layout);
 bool radv_layout_can_expclear(const struct radv_image *image,
                              VkImageLayout layout);
-bool radv_layout_has_cmask(const struct radv_image *image,
-			   VkImageLayout layout);
+bool radv_layout_can_fast_clear(const struct radv_image *image,
+			        VkImageLayout layout,
+			        unsigned queue_mask);
+
+
+unsigned radv_image_queue_family_mask(const struct radv_image *image, int family);
+
 static inline uint32_t
 radv_get_layerCount(const struct radv_image *image,
 		    const VkImageSubresourceRange *range)
@@ -1206,6 +1266,13 @@ void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
 			   struct radv_image *image, uint32_t value);
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, uint32_t value);
+
+struct radv_fence {
+	struct radeon_winsys_fence *fence;
+	bool submitted;
+	bool signalled;
+};
+
 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
 								\
 	static inline struct __radv_type *			\
@@ -1263,20 +1330,4 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule)

-#define RADV_DEFINE_STRUCT_CASTS(__radv_type, __VkType)			\
-									\
-	static inline const __VkType *					\
-	__radv_type ## _to_ ## __VkType(const struct __radv_type *__radv_obj) \
-	{								\
-		return (const __VkType *) __radv_obj;			\
-	}
-
-#define RADV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name)	\
-	const __VkType *__vk_name = radv_common_to_ ## __VkType(__common_name)
-
-RADV_DEFINE_STRUCT_CASTS(radv_common, VkMemoryBarrier)
-RADV_DEFINE_STRUCT_CASTS(radv_common, VkBufferMemoryBarrier)
-RADV_DEFINE_STRUCT_CASTS(radv_common, VkImageMemoryBarrier)
-
-
 #endif /* RADV_PRIVATE_H */
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -35,10 +35,10 @@

 static unsigned get_max_db(struct radv_device *device)
 {
-	unsigned num_db = device->instance->physicalDevice.rad_info.num_render_backends;
-	unsigned rb_mask = device->instance->physicalDevice.rad_info.enabled_rb_mask;
+	unsigned num_db = device->physical_device->rad_info.num_render_backends;
+	MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;

-	if (device->instance->physicalDevice.rad_info.chip_class == SI)
+	if (device->physical_device->rad_info.chip_class == SI)
 		num_db = 8;
 	else
 		num_db = MAX2(8, num_db);
@@ -217,7 +217,7 @@ void radv_CmdCopyQueryPoolResults(
 		uint64_t local_src_va = va  + query * pool->stride;
 		unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;

-		unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 26);
+		MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 26);

 		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
 			/* TODO, not sure if there is any case where we won't always be ready yet */
@@ -387,6 +387,7 @@ void radv_CmdWriteTimestamp(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
 	struct radeon_winsys_cs *cs = cmd_buffer->cs;
 	uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
 	uint64_t avail_va = va + pool->availability_offset + 4 * query;
@@ -394,17 +395,27 @@ void radv_CmdWriteTimestamp(

 	cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5);

-	unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 11);
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);

-	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-	radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
-	radeon_emit(cs, query_va);
-	radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
-	radeon_emit(cs, 0);
-	radeon_emit(cs, 0);
+	if (mec) {
+		radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
+		radeon_emit(cs, 3 << 29);
+		radeon_emit(cs, query_va);
+		radeon_emit(cs, query_va >> 32);
+		radeon_emit(cs, 0);
+		radeon_emit(cs, 0);
+	} else {
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
+		radeon_emit(cs, query_va);
+		radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, 0);
+	}

 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+	radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
 		    S_370_WR_CONFIRM(1) |
 		    S_370_ENGINE_SEL(V_370_ME));
 	radeon_emit(cs, avail_va);
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -85,11 +85,13 @@ struct radeon_info {
 	uint32_t                    gart_page_size;
 	uint64_t                    gart_size;
 	uint64_t                    vram_size;
+	uint64_t                    visible_vram_size;
 	bool                        has_dedicated_vram;
 	bool                     has_virtual_memory;
 	bool                        gfx_ib_pad_with_type2;
-	bool                     has_sdma;
 	bool                     has_uvd;
+	uint32_t                    sdma_rings;
+	uint32_t                    compute_rings;
 	uint32_t                    vce_fw_version;
 	uint32_t                    vce_harvest_config;
 	uint32_t                    clock_crystal_freq;
@@ -251,6 +253,7 @@ struct radeon_bo_metadata {

 struct radeon_winsys_bo;
 struct radeon_winsys_fence;
+struct radeon_winsys_sem;

 struct radeon_winsys {
 	void (*destroy)(struct radeon_winsys *ws);
@@ -284,7 +287,8 @@ struct radeon_winsys {
 	struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
 	void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);

-	bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx);
+	bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
+	                      enum ring_type ring_type, int ring_index);

 	struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
 					      enum ring_type ring_type);
@@ -298,8 +302,13 @@ struct radeon_winsys {
 	void (*cs_grow)(struct radeon_winsys_cs * cs, size_t min_size);

 	int (*cs_submit)(struct radeon_winsys_ctx *ctx,
+			 int queue_index,
 			 struct radeon_winsys_cs **cs_array,
 			 unsigned cs_count,
+			 struct radeon_winsys_sem **wait_sem,
+			 unsigned wait_sem_count,
+			 struct radeon_winsys_sem **signal_sem,
+			 unsigned signal_sem_count,
 			 bool can_patch,
 			 struct radeon_winsys_fence *fence);

@@ -310,6 +319,8 @@ struct radeon_winsys {
 	void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
 				    struct radeon_winsys_cs *child);

+	void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
+
 	int (*surface_init)(struct radeon_winsys *ws,
 			    struct radeon_surf *surf);

@@ -322,6 +333,10 @@ struct radeon_winsys {
 			   struct radeon_winsys_fence *fence,
 			   bool absolute,
 			   uint64_t timeout);
+
+	struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws);
+	void (*destroy_sem)(struct radeon_winsys_sem *sem);
+
 };

 static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
--- a/src/amd/vulkan/radv_util.c
+++ b/src/amd/vulkan/radv_util.c
@@ -65,25 +65,6 @@ void radv_printflike(3, 4)
 	fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
 }

-void radv_noreturn radv_printflike(1, 2)
-	radv_abortf(const char *format, ...)
-{
-	va_list va;
-
-	va_start(va, format);
-	radv_abortfv(format, va);
-	va_end(va);
-}
-
-void radv_noreturn
-radv_abortfv(const char *format, va_list va)
-{
-	fprintf(stderr, "vk: error: ");
-	vfprintf(stderr, format, va);
-	fprintf(stderr, "\n");
-	abort();
-}
-
 VkResult
 __vk_errorf(VkResult error, const char *file, int line, const char *format, ...)
 {
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -75,7 +75,7 @@ void radv_DestroySurfaceKHR(
 	const VkAllocationCallbacks*                 pAllocator)
 {
 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);

 	vk_free2(&instance->alloc, pAllocator, surface);
 }
@@ -87,7 +87,7 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
 	VkBool32*                                   pSupported)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_support(surface, &device->wsi_device,
@@ -101,7 +101,7 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
 	VkSurfaceCapabilitiesKHR*                   pSurfaceCapabilities)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_capabilities(surface, pSurfaceCapabilities);
@@ -114,7 +114,7 @@ VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
 	VkSurfaceFormatKHR*                         pSurfaceFormats)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_formats(surface, &device->wsi_device, pSurfaceFormatCount,
@@ -128,7 +128,7 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
 	VkPresentModeKHR*                           pPresentModes)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_present_modes(surface, pPresentModeCount,
@@ -249,9 +249,9 @@ VkResult radv_CreateSwapchainKHR(
 	VkSwapchainKHR*                              pSwapchain)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
 	struct wsi_interface *iface =
-		device->instance->physicalDevice.wsi_device.wsi[surface->platform];
+		device->physical_device->wsi_device.wsi[surface->platform];
 	struct wsi_swapchain *swapchain;
 	const VkAllocationCallbacks *alloc;
 	if (pAllocator)
@@ -259,7 +259,7 @@ VkResult radv_CreateSwapchainKHR(
 	else
 		alloc = &device->alloc;
 	VkResult result = iface->create_swapchain(surface, _device,
-						  &device->instance->physicalDevice.wsi_device,
+						  &device->physical_device->wsi_device,
 						  pCreateInfo,
 						  alloc, &radv_wsi_image_fns,
 						  &swapchain);
@@ -288,6 +288,9 @@ void radv_DestroySwapchainKHR(
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
 	const VkAllocationCallbacks *alloc;

+	if (!_swapchain)
+		return;
+
 	if (pAllocator)
 		alloc = pAllocator;
 	else
@@ -318,13 +321,21 @@ VkResult radv_AcquireNextImageKHR(
 	VkSwapchainKHR                               _swapchain,
 	uint64_t                                     timeout,
 	VkSemaphore                                  semaphore,
-	VkFence                                      fence,
+	VkFence                                      _fence,
 	uint32_t*                                    pImageIndex)
 {
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+	RADV_FROM_HANDLE(radv_fence, fence, _fence);

-	return swapchain->acquire_next_image(swapchain, timeout, semaphore,
-					     pImageIndex);
+	VkResult result = swapchain->acquire_next_image(swapchain, timeout, semaphore,
+	                                                pImageIndex);
+
+	if (fence && result == VK_SUCCESS) {
+		fence->submitted = true;
+		fence->signalled = true;
+	}
+
+	return result;
 }

 VkResult radv_QueuePresentKHR(
@@ -351,7 +362,15 @@ VkResult radv_QueuePresentKHR(
 					 1, &swapchain->fences[0]);
 		}

-		radv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]);
+		RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
+		struct radeon_winsys_fence *base_fence = fence->fence;
+		struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+		queue->device->ws->cs_submit(ctx, queue->queue_idx,
+					     &queue->device->empty_cs[queue->queue_family_index],
+					     1,
+					     (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
+					     pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
+		fence->submitted = true;

 		result = swapchain->queue_present(swapchain,
 						  pPresentInfo->pImageIndices[i]);
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -170,10 +170,11 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
 				       S_030800_INSTANCE_BROADCAST_WRITES(1));
 }

-static void
+void
 si_init_compute(struct radv_physical_device *physical_device,
-                struct radeon_winsys_cs *cs)
+                struct radv_cmd_buffer *cmd_buffer)
 {
+	struct radeon_winsys_cs *cs = cmd_buffer->cs;
 	radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
 	radeon_emit(cs, 0);
 	radeon_emit(cs, 0);
@@ -419,7 +420,7 @@ void si_init_config(struct radv_physical_device *physical_device,
 	if (physical_device->rad_info.family == CHIP_STONEY)
 		radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);

-	si_init_compute(physical_device, cs);
+	si_init_compute(physical_device, cmd_buffer);
 }

 static void
@@ -479,11 +480,11 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
 		radeon_emit(cs, fui(translate[2]));
 	}

+	radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
+				   first_vp * 4 * 2, count * 2);
 	for (i = 0; i < count; i++) {
 		float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
 		float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
-		radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
-					   first_vp * 4 * 2, count * 2);
 		radeon_emit(cs, fui(zmin));
 		radeon_emit(cs, fui(zmax));
 	}
@@ -510,8 +511,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first,
 uint32_t
 si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
 {
-	enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
-	struct radeon_info *info = &cmd_buffer->device->instance->physicalDevice.rad_info;
+	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
+	struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
 	unsigned prim = cmd_buffer->state.pipeline->graphics.prim;
 	unsigned primgroup_size = 128; /* recommended without a GS */
 	unsigned max_primgroup_in_wave = 2;
@@ -598,8 +599,18 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
 void
 si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 {
-	enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
+	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
 	unsigned cp_coher_cntl = 0;
+	bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+
+	if (is_compute)
+		cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+	                                          RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+	                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+	                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
+	                                          RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+	                                          RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
+	                                          RADV_CMD_FLAG_VGT_FLUSH);

 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);

@@ -627,7 +638,7 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 			S_0085F0_CB7_DEST_BASE_ENA(1);

 		/* Necessary for DCC */
-		if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI) {
+		if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
 			radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 			radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
 			                            EVENT_INDEX(5));
@@ -678,7 +689,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 	/* Make sure ME is idle (it executes most packets) before continuing.
 	 * This prevents read-after-write hazards between PFP and ME.
 	 */
-	if (cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+	if ((cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) &&
+	    !radv_cmd_buffer_uses_mec(cmd_buffer)) {
 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cmd_buffer->cs, 0);
 	}
@@ -687,14 +699,27 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 	 * Therefore, it should be last. Done in PFP.
 	 */
 	if (cp_coher_cntl) {
-		/* ACQUIRE_MEM is only required on a compute ring. */
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
-		radeon_emit(cmd_buffer->cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-		radeon_emit(cmd_buffer->cs, 0xffffffff);      /* CP_COHER_SIZE */
-		radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE */
-		radeon_emit(cmd_buffer->cs, 0x0000000A);      /* POLL_INTERVAL */
+		if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+			radeon_emit(cmd_buffer->cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
+			                            PKT3_SHADER_TYPE_S(1));
+			radeon_emit(cmd_buffer->cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+			radeon_emit(cmd_buffer->cs, 0xffffffff);      /* CP_COHER_SIZE */
+			radeon_emit(cmd_buffer->cs, 0xff);            /* CP_COHER_SIZE_HI */
+			radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE */
+			radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE_HI */
+			radeon_emit(cmd_buffer->cs, 0x0000000A);      /* POLL_INTERVAL */
+		} else {
+			/* ACQUIRE_MEM is only required on a compute ring. */
+			radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+			radeon_emit(cmd_buffer->cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+			radeon_emit(cmd_buffer->cs, 0xffffffff);      /* CP_COHER_SIZE */
+			radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE */
+			radeon_emit(cmd_buffer->cs, 0x0000000A);      /* POLL_INTERVAL */
+		}
 	}

+	if (cmd_buffer->state.flush_bits)
+		radv_cmd_buffer_trace_emit(cmd_buffer);
 	cmd_buffer->state.flush_bits = 0;
 }

@@ -731,7 +756,7 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,

 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);

-	if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
 		radeon_emit(cs, sync_flag | sel);	/* CP_SYNC [31] */
 		radeon_emit(cs, src_va);		/* SRC_ADDR_LO [31:0] */
@@ -753,10 +778,12 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
 	 * indices. If we wanted to execute CP DMA in PFP, this packet
 	 * should precede it.
 	 */
-	if (sync_flag) {
+	if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
+
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }

 /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
@@ -775,7 +802,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,

 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);

-	if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
 		radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
 		radeon_emit(cs, clear_value);		/* DATA [31:0] */
@@ -793,10 +820,11 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
 	}

 	/* See "copy_buffer" for explanation. */
-	if (sync_flag) {
+	if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }

 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
@@ -847,8 +875,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 	uint64_t skipped_size = 0, realign_size = 0;


-	if (cmd_buffer->device->instance->physicalDevice.rad_info.family <= CHIP_CARRIZO ||
-	    cmd_buffer->device->instance->physicalDevice.rad_info.family == CHIP_STONEY) {
+	if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
+	    cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
 		/* If the size is not aligned, we must add a dummy copy at the end
 		 * just to align the internal counter. Otherwise, the DMA engine
 		 * would slow down by an order of magnitude for following copies.
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -27,6 +27,7 @@
 #include <amdgpu_drm.h>
 #include <assert.h>

+#include "ac_debug.h"
 #include "amdgpu_id.h"
 #include "radv_radeon_winsys.h"
 #include "radv_amdgpu_cs.h"
@@ -54,6 +55,7 @@ struct radv_amdgpu_cs {
 	bool                        is_chained;

 	int                         buffer_hash_table[1024];
+	unsigned                    hw_ip;
 };

 static inline struct radv_amdgpu_cs *
@@ -62,6 +64,30 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base)
 	return (struct radv_amdgpu_cs*)base;
 }

+static int ring_to_hw_ip(enum ring_type ring)
+{
+	switch (ring) {
+	case RING_GFX:
+		return AMDGPU_HW_IP_GFX;
+	case RING_DMA:
+		return AMDGPU_HW_IP_DMA;
+	case RING_COMPUTE:
+		return AMDGPU_HW_IP_COMPUTE;
+	default:
+		unreachable("unsupported ring");
+	}
+}
+
+static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
+					 struct amdgpu_cs_fence *fence,
+					 struct amdgpu_cs_request *req)
+{
+	fence->context = ctx->ctx;
+	fence->ip_type = req->ip_type;
+	fence->ip_instance = req->ip_instance;
+	fence->ring = req->ring;
+	fence->fence = req->seq_no;
+}

 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
 {
@@ -126,6 +152,7 @@ static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
 	for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
 		cs->buffer_hash_table[i] = -1;

+	cs->hw_ip = ring_to_hw_ip(ring_type);
 	return true;
 }

@@ -140,7 +167,7 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws,
 		return NULL;

 	cs->ws = radv_amdgpu_winsys(ws);
-	radv_amdgpu_init_cs(cs, RING_GFX);
+	radv_amdgpu_init_cs(cs, ring_type);

 	if (cs->ws->use_ib_bos) {
 		cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
@@ -475,7 +502,16 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 	return r;
 }

+static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
+				    struct amdgpu_cs_request *request)
+{
+	radv_amdgpu_request_to_fence(ctx,
+	                             &ctx->last_submission[request->ip_type][request->ring],
+	                             request);
+}
+
 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
+						int queue_idx,
 						struct radeon_winsys_cs **cs_array,
 						unsigned cs_count,
 						struct radeon_winsys_fence *_fence)
@@ -515,7 +551,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 		return r;
 	}

-	request.ip_type = AMDGPU_HW_IP_GFX;
+	request.ip_type = cs0->hw_ip;
+	request.ring = queue_idx;
 	request.number_of_ibs = 1;
 	request.ibs = &cs0->ib;
 	request.resources = bo_list;
@@ -531,19 +568,16 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,

 	amdgpu_bo_list_destroy(bo_list);

-	if (fence) {
-		fence->context = ctx->ctx;
-		fence->ip_type = request.ip_type;
-		fence->ip_instance = request.ip_instance;
-		fence->ring = request.ring;
-		fence->fence = request.seq_no;
-	}
-	ctx->last_seq_no = request.seq_no;
+	if (fence)
+		radv_amdgpu_request_to_fence(ctx, fence, &request);
+
+	radv_assign_last_submit(ctx, &request);

 	return r;
 }

 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
+						 int queue_idx,
 						 struct radeon_winsys_cs **cs_array,
 						 unsigned cs_count,
 						 struct radeon_winsys_fence *_fence)
@@ -569,7 +603,8 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 			return r;
 		}

-		request.ip_type = AMDGPU_HW_IP_GFX;
+		request.ip_type = cs0->hw_ip;
+		request.ring = queue_idx;
 		request.resources = bo_list;
 		request.number_of_ibs = cnt;
 		request.ibs = ibs;
@@ -600,19 +635,16 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,

 		i += cnt;
 	}
-	if (fence) {
-		fence->context = ctx->ctx;
-		fence->ip_type = request.ip_type;
-		fence->ip_instance = request.ip_instance;
-		fence->ring = request.ring;
-		fence->fence = request.seq_no;
-	}
-	ctx->last_seq_no = request.seq_no;
+	if (fence)
+		radv_amdgpu_request_to_fence(ctx, fence, &request);
+
+	radv_assign_last_submit(ctx, &request);

 	return 0;
 }

 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
+					       int queue_idx,
 					       struct radeon_winsys_cs **cs_array,
 					       unsigned cs_count,
 					       struct radeon_winsys_fence *_fence)
@@ -673,7 +705,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 		ib.size = size;
 		ib.ib_mc_address = ws->buffer_get_va(bo);

-		request.ip_type = AMDGPU_HW_IP_GFX;
+		request.ip_type = cs0->hw_ip;
+		request.ring = queue_idx;
 		request.resources = bo_list;
 		request.number_of_ibs = 1;
 		request.ibs = &ib;
@@ -695,35 +728,82 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,

 		i += cnt;
 	}
-	if (fence) {
-		fence->context = ctx->ctx;
-		fence->ip_type = request.ip_type;
-		fence->ip_instance = request.ip_instance;
-		fence->ring = request.ring;
-		fence->fence = request.seq_no;
-	}
-	ctx->last_seq_no = request.seq_no;
+	if (fence)
+		radv_amdgpu_request_to_fence(ctx, fence, &request);
+
+	radv_assign_last_submit(ctx, &request);

 	return 0;
 }

 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
+					int queue_idx,
 					struct radeon_winsys_cs **cs_array,
 					unsigned cs_count,
+					struct radeon_winsys_sem **wait_sem,
+					unsigned wait_sem_count,
+					struct radeon_winsys_sem **signal_sem,
+					unsigned signal_sem_count,
 					bool can_patch,
 					struct radeon_winsys_fence *_fence)
 {
 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
+	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+	int ret;
+	int i;
+	
+	for (i = 0; i < wait_sem_count; i++) {
+		amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i];
+		amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
+					 sem);
+	}
 	if (!cs->ws->use_ib_bos) {
-		return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array,
+		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
 							   cs_count, _fence);
 	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
-		return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array,
+		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
 							    cs_count, _fence);
 	} else {
-		return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array,
+		ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
 							     cs_count, _fence);
 	}
+
+	for (i = 0; i < signal_sem_count; i++) {
+		amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i];
+		amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
+					   sem);
+	}
+	return ret;
+}
+
+
+static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
+{
+	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+	void *ret = NULL;
+	for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
+		struct radv_amdgpu_winsys_bo *bo;
+
+		bo = (struct radv_amdgpu_winsys_bo*)
+		       (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
+		if (addr >= bo->va && addr - bo->va < bo->size) {
+			if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
+				return (char *)ret + (addr - bo->va);
+		}
+	}
+	return ret;
+}
+
+static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
+                                       FILE* file,
+                                       uint32_t trace_id)
+{
+	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+
+	ac_parse_ib(file,
+		    radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
+		    cs->ib.size, trace_id,  "main IB", cs->ws->info.chip_class,
+		    radv_amdgpu_winsys_get_cpu_addr, cs);
 }

 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
@@ -742,6 +822,7 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w
 	ctx->ws = ws;
 	return (struct radeon_winsys_ctx *)ctx;
 error_create:
+	FREE(ctx);
 	return NULL;
 }

@@ -752,22 +833,16 @@ static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
 	FREE(ctx);
 }

-static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
+static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
+                                      enum ring_type ring_type, int ring_index)
 {
 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+	int ip_type = ring_to_hw_ip(ring_type);

-	if (ctx->last_seq_no) {
+	if (ctx->last_submission[ip_type][ring_index].fence) {
 		uint32_t expired;
-		struct amdgpu_cs_fence fence;
-
-		fence.context = ctx->ctx;
-		fence.ip_type = RING_GFX;
-		fence.ip_instance = 0;
-		fence.ring = 0;
-		fence.fence = ctx->last_seq_no;
-
-		int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0,
-						       &expired);
+		int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
+		                                       1000000000ull, 0, &expired);

 		if (ret || !expired)
 			return false;
@@ -776,6 +851,23 @@ static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
 	return true;
 }

+static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws)
+{
+	int ret;
+	amdgpu_semaphore_handle sem;
+
+	ret = amdgpu_cs_create_semaphore(&sem);
+	if (ret)
+		return NULL;
+	return (struct radeon_winsys_sem *)sem;
+}
+
+static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
+{
+	amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem;
+	amdgpu_cs_destroy_semaphore(sem);
+}
+
 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
 {
 	ws->base.ctx_create = radv_amdgpu_ctx_create;
@@ -789,7 +881,10 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
 	ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
 	ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
 	ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
+	ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
 	ws->base.create_fence = radv_amdgpu_create_fence;
 	ws->base.destroy_fence = radv_amdgpu_destroy_fence;
+	ws->base.create_sem = radv_amdgpu_create_sem;
+	ws->base.destroy_sem = radv_amdgpu_destroy_sem;
 	ws->base.fence_wait = radv_amdgpu_fence_wait;
 }
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
@@ -38,10 +38,14 @@
 #include "radv_radeon_winsys.h"
 #include "radv_amdgpu_winsys.h"

+enum {
+	MAX_RINGS_PER_TYPE = 8
+};
+
 struct radv_amdgpu_ctx {
 	struct radv_amdgpu_winsys *ws;
 	amdgpu_context_handle ctx;
-	uint64_t last_seq_no;
+	struct amdgpu_cs_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
 };

 static inline struct radv_amdgpu_ctx *
--- a/Show More
+++ b/Show More