util: import sha1 implementation from OpenBSD

At the moment we support 5+ different implementations each with varying amount of bugs - from thread safely problems [1], to outright broken implementation(s) [2] In order to accommodate these we have 150+ lines of configure script and extra two configure toggles. Whist an actual implementation being ~200loc and our current compat wrapping ~250. Let's not forget that different people use different code paths, thus effectively makes it harder to test and debug since the default implementation is automatically detected. To minimise all these lovely experiences, import the "100% Public Domain" OpenBSD sha1 implementation. Clearly document any changes needed to get building correctly, since many/most of those can be upstreamed making future syncs easier. As an added bonus this will avoid all the 'fun' experiences trying to integrate it with the Android and SCons builds. v2: Manually expand __BEGIN_DECLS/__END_DECLS and document (Tapani). Furthermore it seems that some games (or surrounding runtime) static link against OpenSSL resulting in conflicts. For more information see the discussion thread [3] Bugzilla [1]: https://bugs.freedesktop.org/show_bug.cgi?id=94904 Bugzilla [2]: https://bugs.freedesktop.org/show_bug.cgi?id=97967 [3] https://lists.freedesktop.org/archives/mesa-dev/2017-January/140748.html Cc: Mark Janes <mark.a.janes@intel.com> Cc: Vinson Lee <vlee@freedesktop.org> Cc: Tapani Pälli <tapani.palli@intel.com> Cc: Jonathan Gray <jsg@jsg.id.au> Tested-by: Jonathan Gray <jsg@jsg.id.au> Signed-off-by: Emil Velikov <emil.velikov@collabora.com> Acked-by: Tapani Pälli <tapani.palli@intel.com> (v1) Acked-by: Jason Ekstrand <jason@jlekstrand.net> (v1)
i965: Make brw_cache_item structure private to brw_program_cache.c.
2017-01-18 19:07:23 +00:00 · 2017-01-18 10:53:14 -08:00 · 2017-01-18 19:51:31 +01:00 · 2017-01-18 19:51:31 +01:00 · 2017-01-18 19:51:31 +01:00 · 2017-01-18 19:51:31 +01:00
1368 changed files with 96529 additions and 35846 deletions
--- a/.dir-locals.el
+++ b/.dir-locals.el
@@ -1,4 +1,5 @@
-((prog-mode
+((nil . ((show-trailing-whitespace . t)))
+ (prog-mode
  (indent-tabs-mode . nil)
  (tab-width . 8)
  (c-basic-offset . 3)
@@ -8,6 +9,10 @@
 	    (c-set-offset 'case-label '0)
 	    (c-set-offset 'innamespace '0)
 	    (c-set-offset 'inline-open '0)))
-  )
+  (whitespace-style face indentation)
+  (whitespace-line-column . 79)
+  (eval ignore-errors
+        (require 'whitespace)
+        (whitespace-mode 1)))
 (makefile-mode (indent-tabs-mode . t))
 )
--- a/.editorconfig
+++ b/.editorconfig
@@ -6,6 +6,7 @@ root = true
 [*]
 charset = utf-8
 insert_final_newline = true
+tab_width = 8

 [*.{c,h,cpp,hpp,cc,hh}]
 indent_style = space
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,7 +32,7 @@ env:
    - DRI3PROTO_VERSION=dri3proto-1.0
    - PRESENTPROTO_VERSION=presentproto-1.0
    - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
-    - LIBDRM_VERSION=libdrm-2.4.65
+    - LIBDRM_VERSION=libdrm-2.4.74
    - XCBPROTO_VERSION=xcb-proto-1.11
    - LIBXCB_VERSION=libxcb-1.11
    - LIBXSHMFENCE_VERSION=libxshmfence-1.2
@@ -92,7 +92,7 @@ install:

  - wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
  - tar -jxvf $LIBDRM_VERSION.tar.bz2
-  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 && make install)
+  - (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-etnaviv-experimental-api && make install)

  - wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
  - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
@@ -103,7 +103,7 @@ script:
      ./autogen.sh --enable-debug
        --with-egl-platforms=x11,drm
        --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
-        --with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600
+        --with-gallium-drivers=svga,swrast,vc4,virgl,r300,r600,etnaviv,imx
        --disable-llvm-shared-libs
        ;
      make && make check;
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -51,6 +51,7 @@ LOCAL_CFLAGS += \
 	-DHAVE_FUNC_ATTRIBUTE_UNUSED \
 	-DHAVE_FUNC_ATTRIBUTE_FORMAT \
 	-DHAVE_FUNC_ATTRIBUTE_PACKED \
+	_DHAVE_FUNC_ATTRIBUTE_ALIAS \
 	-DHAVE___BUILTIN_CTZ \
 	-DHAVE___BUILTIN_POPCOUNT \
 	-DHAVE___BUILTIN_POPCOUNTLL \
@@ -82,11 +83,13 @@ LOCAL_CFLAGS += \
 	-D__STDC_LIMIT_MACROS
 endif

+ifneq ($(LOCAL_IS_HOST_MODULE),true)
 # add libdrm if there are hardware drivers
 ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES += libdrm
 endif
+endif

 LOCAL_CPPFLAGS += \
 	$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-D_USING_LIBCXX) \
--- a/Makefile.am
+++ b/Makefile.am
@@ -40,10 +40,10 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-vdpau \
 	--enable-xa \
 	--enable-xvmc \
-	--disable-llvm-shared-libs \
+	--enable-llvm-shared-libs \
 	--with-egl-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
-	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr \
+	--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
 	--with-vulkan-drivers=intel,radeon

 ACLOCAL_AMFLAGS = -I m4
@@ -62,6 +62,7 @@ noinst_HEADERS = \
 	include/c99_math.h \
 	include/c11 \
 	include/D3D9 \
+	include/GL/wglext.h \
 	include/HaikuGL \
 	include/no_extern_c.h \
 	include/pci_ids
--- a/6
+++ b/6
@@ -92,10 +92,16 @@ F: */Makefile.sources

 ANDROID BUILD
 R: Emil Velikov <emil.l.velikov@gmail.com>
+R: Rob Herring <robh@kernel.org>
 F: CleanSpec.mk
 F: */Android.*mk
 F: */Makefile.sources

+ANDROID EGL SUPPORT
+R: Rob Herring <robh@kernel.org>
+R: Tomasz Figa <tfiga@chromium.org>
+F: src/egl/drivers/dri2/platform_android.c
+
 WAYLAND EGL SUPPORT
 R: Daniel Stone <daniels@collabora.com>
 F: src/egl/wayland/*
--- a/2
+++ b/2
@@ -1 +1 @@
-12.1.0-devel
+17.0.0-devel
--- a/bin/get-typod-pick-list.sh
+++ b/bin/get-typod-pick-list.sh
@@ -0,0 +1,39 @@
+#!/bin/sh
+
+# Script for generating a list of candidates which have typos in the nomination line
+#
+# Usage examples:
+#
+# $ bin/get-typod-pick-list.sh
+# $ bin/get-typod-pick-list.sh > picklist
+# $ bin/get-typod-pick-list.sh | tee picklist
+
+# NB:
+# This script intentionally _never_ checks for specific version tag
+# Should we consider folding it with the original get-pick-list.sh
+
+# Grep for commits with "cherry picked from commit" in the commit message.
+git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
+	grep "cherry picked from commit" |\
+	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
+
+# Grep for commits that were marked as a candidate for the stable tree.
+git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' HEAD..origin/master |\
+while read sha
+do
+	# Check to see whether the patch is on the ignore list.
+	if [ -f bin/.cherry-ignore ] ; then
+		if grep -q ^$sha bin/.cherry-ignore ; then
+			continue
+		fi
+	fi
+
+	# Check to see if it has already been picked over.
+	if grep -q ^$sha already_picked ; then
+		continue
+	fi
+
+	git log -n1 --pretty=oneline $sha | cat
+done
+
+rm -f already_picked
--- a/common.py
+++ b/common.py
@@ -59,7 +59,7 @@ if target_platform == 'windows' and host_platform != 'windows':


 # find default_llvm value
-if 'LLVM' in os.environ:
+if 'LLVM' in os.environ or 'LLVM_CONFIG' in os.environ:
    default_llvm = 'yes'
 else:
    default_llvm = 'no'
@@ -110,5 +110,6 @@ def AddOptions(opts):
    opts.Add(BoolOption('texture_float',
                        'enable floating-point textures and renderbuffers',
                        'no'))
+    opts.Add(BoolOption('swr', 'Build OpenSWR', 'no'))
    if host_platform == 'windows':
        opts.Add('MSVC_VERSION', 'Microsoft Visual C/C++ version')
--- a/configure.ac
+++ b/configure.ac
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -55,7 +55,7 @@ to your preference, type:
 </pre>

 <p>
-This will produce libGL.so and several other libraries depending on the
+This will produce libGL.so and/or several other libraries depending on the
 options you have chosen. Later, if you want to rebuild for a different
 configuration run <code>make realclean</code> before rebuilding.
 </p>
@@ -133,9 +133,11 @@ There are also a few general options for altering the Mesa build:
 </p>
 <dl>
 <dt><code>--enable-debug</code></dt>
-<dd><p>This option will enable compiler
-options and macros to aid in debugging the Mesa libraries.</p>
-</dd>
+<dd><p>This option will set the compiler debug/optimisation levels (if the user
+hasn't already set them via the CFLAGS/CXXFLAGS) and macros to aid in
+debugging the Mesa libraries.</p>
+
+<p>Note that enabling this option can lead to noticable loss of performance.</p>

 <dt><code>--disable-asm</code></dt>
 <dd><p>There are assembly routines
@@ -174,27 +176,22 @@ architecture, the following should be sufficient to configure multilib Mesa</p>
 </dl>


-<h2 id="driver">2. Driver Options</h2>
+<h2 id="driver">2. GL Driver Options</h2>

 <p>
 There are several different driver modes that Mesa can use. These are
 described in more detail in the <a href="install.html">basic
 installation instructions</a>. The Mesa driver is controlled through the
-configure options <code>--enable-xlib-glx</code>, <code>--enable-osmesa</code>,
-and <code>--enable-dri</code>.
+configure options <code>--enable-glx</code> and <code>--enable-osmesa</code>
 </p>

 <h3 id="xlib">Xlib</h3><p>
 It uses Xlib as a software renderer to do all rendering. It corresponds
-to the option <code>--enable-xlib-glx</code>. The libX11 and libXext
-libraries, as well as the X11 development headers, will be need to
-support the Xlib driver.
+to the option <code>--enable-glx=xlib</code> or <code>--enable-glx=gallium-xlib</code>.

 <h3 id="dri">DRI</h3><p>This mode uses the DRI hardware drivers for
-accelerated OpenGL rendering. Enable the DRI drivers with the option
-<code>--enable-dri</code>. See the <a href="install.html">basic
-installation instructions</a> for details on prerequisites for the DRI
-drivers.
+accelerated OpenGL rendering. To enable use <code>--enable-glx=dri
+--enable-dri</code>.

 <!-- DRI specific options -->
 <dl>
@@ -252,10 +249,8 @@ will create the libOSMesa16 library with a 16-bit color channel.
 <h2 id="library">3. Library Options</h2>

 <p>
-The configure script provides more fine grained control over the GL
-libraries that will be built. More details on the specific GL libraries
-can be found in the <a href="install.html">basic installation
-instructions</a>.
+The configure script provides more fine grained control over the libraries
+that will be built.

 </div>
 </body>
--- a/docs/codingstyle.html
+++ b/docs/codingstyle.html
@@ -0,0 +1,142 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Coding Style</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Coding Style</h1>
+
+<p>
+Mesa is over 20 years old and the coding style has evolved over time.
+Some old parts use a style that's a bit out of date.
+
+Different sections of mesa can use different coding style as set in the local
+EditorConfig (.editorconfig) and/or Emacs (.dir-locals.el) file.
+
+Alternatively the following is applicable.
+
+If the guidelines below don't cover something, try following the format of
+existing, neighboring code.
+</p>
+
+<p>
+Basic formatting guidelines
+</p>
+
+<ul>
+<li>3-space indentation, no tabs.
+<li>Limit lines to 78 or fewer characters.  The idea is to prevent line
+wrapping in 80-column editors and terminals.  There are exceptions, such
+as if you're defining a large, static table of information.
+<li>Opening braces go on the same line as the if/for/while statement.
+For example:
+<pre>
+   if (condition) {
+      foo;
+   } else {
+      bar;
+   }
+</pre>
+
+<li>Put a space before/after operators.  For example, <tt>a = b + c;</tt>
+and not <tt>a=b+c;</tt>
+
+<li>This GNU indent command generally does the right thing for formatting:
+<pre>
+   indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
+</pre>
+
+<li>Use comments wherever you think it would be helpful for other developers.
+Several specific cases and style examples follow.  Note that we roughly
+follow <a href="http://www.stack.nl/~dimitri/doxygen/">Doxygen</a> conventions.
+<br>
+<br>
+Single-line comments:
+<pre>
+   /* null-out pointer to prevent dangling reference below */
+   bufferObj = NULL;
+</pre>
+Or,
+<pre>
+   bufferObj = NULL;  /* prevent dangling reference below */
+</pre>
+Multi-line comment:
+<pre>
+   /* If this is a new buffer object id, or one which was generated but
+    * never used before, allocate a buffer object now.
+    */
+</pre>
+We try to quote the OpenGL specification where prudent:
+<pre>
+   /* Page 38 of the PDF of the OpenGL ES 3.0 spec says:
+    *
+    *     "An INVALID_OPERATION error is generated for any of the following
+    *     conditions:
+    *
+    *     * <length> is zero."
+    *
+    * Additionally, page 94 of the PDF of the OpenGL 4.5 core spec
+    * (30.10.2014) also says this, so it's no longer allowed for desktop GL,
+    * either.
+    */
+</pre>
+Function comment example:
+<pre>
+   /**
+    * Create and initialize a new buffer object.  Called via the
+    * ctx->Driver.CreateObject() driver callback function.
+    * \param  name  integer name of the object
+    * \param  type  one of GL_FOO, GL_BAR, etc.
+    * \return  pointer to new object or NULL if error
+    */
+   struct gl_object *
+   _mesa_create_object(GLuint name, GLenum type)
+   {
+      /* function body */
+   }
+</pre>
+
+<li>Put the function return type and qualifiers on one line and the function
+name and parameters on the next, as seen above.  This makes it easy to use
+<code>grep ^function_name dir/*</code> to find function definitions.  Also,
+the opening brace goes on the next line by itself (see above.)
+
+<li>Function names follow various conventions depending on the type of function:
+<pre>
+   glFooBar()       - a public GL entry point (in glapi_dispatch.c)
+   _mesa_FooBar()   - the internal immediate mode function
+   save_FooBar()    - retained mode (display list) function in dlist.c
+   foo_bar()        - a static (private) function
+   _mesa_foo_bar()  - an internal non-static Mesa function
+</pre>
+
+<li>Constants, macros and enumerant names are ALL_UPPERCASE, with _ between
+words.
+<li>Mesa usually uses camel case for local variables (Ex: "localVarname")
+while gallium typically uses underscores (Ex: "local_var_name").
+<li>Global variables are almost never used because Mesa should be thread-safe.
+
+<li>Booleans.  Places that are not directly visible to the GL API
+should prefer the use of <tt>bool</tt>, <tt>true</tt>, and
+<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
+<tt>GL_FALSE</tt>.  In C code, this may mean that
+<tt>#include &lt;stdbool.h&gt;</tt> needs to be added.  The
+<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
+src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.
+
+</ul>
+</p>
+
+</div>
+</body>
+</html>
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -66,7 +66,7 @@
 <li><a href="debugging.html" target="_parent">Debugging Tips</a>
 <li><a href="perf.html" target="_parent">Performance Tips</a>
 <li><a href="extensions.html" target="_parent">Mesa Extensions</a>
-<li><a href="mangling.html" target="_parent">Function Name Mangling</a>
+<li><a href="mangling.html" target="_parent">GL Function Name Mangling</a>
 <li><a href="llvmpipe.html" target="_parent">Gallium llvmpipe driver</a>
 <li><a href="vmware-guest.html" target="_parent">VMware SVGA3D guest driver</a>
 <li><a href="postprocess.html" target="_parent">Gallium post-processing</a>
@@ -81,6 +81,8 @@
 <li><a href="utilities.html" target="_parent">Utilities</a>
 <li><a href="helpwanted.html" target="_parent">Help Wanted</a>
 <li><a href="devinfo.html" target="_parent">Development Notes</a>
+<li><a href="codingstyle.html" target="_parent">Coding Style</a>
+<li><a href="submittingpatches.html" target="_parent">Submitting patches</a>
 <li><a href="sourcedocs.html" target="_parent">Source Documentation</a>
 <li><a href="dispatch.html" target="_parent">GL Dispatch</a>
 </ul>
--- a/docs/devinfo.html
+++ b/docs/devinfo.html
@@ -18,650 +18,9 @@


 <ul>
-<li><a href="#style">Coding Style</a>
-<li><a href="#submitting">Submitting Patches</a>
-<li><a href="#release">Making a New Mesa Release</a>
 <li><a href="#extensions">Adding Extensions</a>
 </ul>

-
-<h2 id="style">Coding Style</h2>
-
-<p>
-Mesa is over 20 years old and the coding style has evolved over time.
-Some old parts use a style that's a bit out of date.
-If the guidelines below don't cover something, try following the format of
-existing, neighboring code.
-</p>
-
-<p>
-Basic formatting guidelines
-</p>
-
-<ul>
-<li>3-space indentation, no tabs.
-<li>Limit lines to 78 or fewer characters.  The idea is to prevent line
-wrapping in 80-column editors and terminals.  There are exceptions, such
-as if you're defining a large, static table of information.
-<li>Opening braces go on the same line as the if/for/while statement.
-For example:
-<pre>
-   if (condition) {
-      foo;
-   } else {
-      bar;
-   }
-</pre>
-
-<li>Put a space before/after operators.  For example, <tt>a = b + c;</tt>
-and not <tt>a=b+c;</tt>
-
-<li>This GNU indent command generally does the right thing for formatting:
-<pre>
-   indent -br -i3 -npcs --no-tabs infile.c -o outfile.c
-</pre>
-
-<li>Use comments wherever you think it would be helpful for other developers.
-Several specific cases and style examples follow.  Note that we roughly
-follow <a href="http://www.stack.nl/~dimitri/doxygen/">Doxygen</a> conventions.
-<br>
-<br>
-Single-line comments:
-<pre>
-   /* null-out pointer to prevent dangling reference below */
-   bufferObj = NULL;
-</pre>
-Or,
-<pre>
-   bufferObj = NULL;  /* prevent dangling reference below */
-</pre>
-Multi-line comment:
-<pre>
-   /* If this is a new buffer object id, or one which was generated but
-    * never used before, allocate a buffer object now.
-    */
-</pre>
-We try to quote the OpenGL specification where prudent:
-<pre>
-   /* Page 38 of the PDF of the OpenGL ES 3.0 spec says:
-    *
-    *     "An INVALID_OPERATION error is generated for any of the following
-    *     conditions:
-    *
-    *     * <length> is zero."
-    *
-    * Additionally, page 94 of the PDF of the OpenGL 4.5 core spec
-    * (30.10.2014) also says this, so it's no longer allowed for desktop GL,
-    * either.
-    */
-</pre>
-Function comment example:
-<pre>
-   /**
-    * Create and initialize a new buffer object.  Called via the
-    * ctx->Driver.CreateObject() driver callback function.
-    * \param  name  integer name of the object
-    * \param  type  one of GL_FOO, GL_BAR, etc.
-    * \return  pointer to new object or NULL if error
-    */
-   struct gl_object *
-   _mesa_create_object(GLuint name, GLenum type)
-   {
-      /* function body */
-   }
-</pre>
-
-<li>Put the function return type and qualifiers on one line and the function
-name and parameters on the next, as seen above.  This makes it easy to use
-<code>grep ^function_name dir/*</code> to find function definitions.  Also,
-the opening brace goes on the next line by itself (see above.)
-
-<li>Function names follow various conventions depending on the type of function:
-<pre>
-   glFooBar()       - a public GL entry point (in glapi_dispatch.c)
-   _mesa_FooBar()   - the internal immediate mode function
-   save_FooBar()    - retained mode (display list) function in dlist.c
-   foo_bar()        - a static (private) function
-   _mesa_foo_bar()  - an internal non-static Mesa function
-</pre>
-
-<li>Constants, macros and enumerant names are ALL_UPPERCASE, with _ between
-words.
-<li>Mesa usually uses camel case for local variables (Ex: "localVarname")
-while gallium typically uses underscores (Ex: "local_var_name").
-<li>Global variables are almost never used because Mesa should be thread-safe.
-
-<li>Booleans.  Places that are not directly visible to the GL API
-should prefer the use of <tt>bool</tt>, <tt>true</tt>, and
-<tt>false</tt> over <tt>GLboolean</tt>, <tt>GL_TRUE</tt>, and
-<tt>GL_FALSE</tt>.  In C code, this may mean that
-<tt>#include &lt;stdbool.h&gt;</tt> needs to be added.  The
-<tt>try_emit_</tt>* methods in src/mesa/program/ir_to_mesa.cpp and
-src/mesa/state_tracker/st_glsl_to_tgsi.cpp can serve as examples.
-
-</ul>
-
-
-<h2 id="submitting">Submitting patches</h2>
-
-<p>
-The basic guidelines for submitting patches are:
-</p>
-
-<ul>
-<li>Patches should be sufficiently tested before submitting.
-<li>Code patches should follow Mesa coding conventions.
-<li>Whenever possible, patches should only effect individual Mesa/Gallium
-components.
-<li>Patches should never introduce build breaks and should be bisectable (see
-<code>git bisect</code>.)
-<li>Patches should be properly formatted (see below).
-<li>Patches should be submitted to mesa-dev for review using
-<code>git send-email</code>.
-<li>Patches should not mix code changes with code formatting changes (except,
-perhaps, in very trivial cases.)
-</ul>
-
-<h3>Patch formatting</h3>
-
-<p>
-The basic rules for patch formatting are:
-</p>
-
-<ul>
-<li>Lines should be limited to 75 characters or less so that git logs
-displayed in 80-column terminals avoid line wrapping.  Note that git
-log uses 4 spaces of indentation (4 + 75 &lt; 80).
-<li>The first line should be a short, concise summary of the change prefixed
-with a module name.  Examples:
-<pre>
-    mesa: Add support for querying GL_VERTEX_ATTRIB_ARRAY_LONG
-
-    gallium: add PIPE_CAP_DEVICE_RESET_STATUS_QUERY
-
-    i965: Fix missing type in local variable declaration.
-</pre>
-<li>Subsequent patch comments should describe the change in more detail,
-if needed.  For example:
-<pre>
-    i965: Remove end-of-thread SEND alignment code.
-    
-    This was present in Eric's initial implementation of the compaction code
-    for Sandybridge (commit 077d01b6). There is no documentation saying this
-    is necessary, and removing it causes no regressions in piglit on any
-    platform.
-</pre>
-<li>A "Signed-off-by:" line is not required, but not discouraged either.
-<li>If a patch address a bugzilla issue, that should be noted in the
-patch comment.  For example:
-<pre>
-   Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
-</pre>
-<li>If there have been several revisions to a patch during the review
-process, they should be noted such as in this example:
-<pre>
-    st/mesa: add ARB_texture_stencil8 support (v4)
-    
-    if we support stencil texturing, enable texture_stencil8
-    there is no requirement to support native S8 for this,
-    the texture can be converted to x24s8 fine.
-    
-    v2: fold fixes from Marek in:
-       a) put S8 last in the list
-       b) fix renderable to always test for d/s renderable
-        fixup the texture case to use a stencil only format
-        for picking the format for the texture view.
-    v3: hit fallback for getteximage
-    v4: put s8 back in front, it shouldn't get picked now (Ilia)
-</pre>
-<li>If someone tested your patch, document it with a line like this:
-<pre>
-    Tested-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-<li>If the patch was reviewed (usually the case) or acked by someone,
-that should be documented with:
-<pre>
-    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
-    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-</ul>
-
-
-
-<h3>Testing Patches</h3>
-
-<p>
-It should go without saying that patches must be tested.  In general,
-do whatever testing is prudent.
-</p>
-
-<p>
-You should always run the Mesa test suite before submitting patches.
-The test suite can be run using the 'make check' command. All tests
-must pass before patches will be accepted, this may mean you have
-to update the tests themselves.
-</p>
-
-<p>
-Whenever possible and applicable, test the patch with
-<a href="http://piglit.freedesktop.org">Piglit</a> to
-check for regressions.
-</p>
-
-
-<h3>Mailing Patches</h3>
-
-<p>
-Patches should be sent to the mesa-dev mailing list for review:
-<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev">
-mesa-dev@lists.freedesktop.org<a/>.
-When submitting a patch make sure to use
-<a href="https://git-scm.com/docs/git-send-email">git send-email</a>
-rather than attaching patches to emails. Sending patches as
-attachments prevents people from being able to provide in-line review
-comments.
-</p>
-
-<p>
-When submitting follow-up patches you can use --in-reply-to to make v2, v3,
-etc patches show up as replies to the originals. This usually works well
-when you're sending out updates to individual patches (as opposed to
-re-sending the whole series). Using --in-reply-to makes
-it harder for reviewers to accidentally review old patches.
-</p>
-
-<p>
-When submitting follow-up patches you should also login to
-<a href="https://patchwork.freedesktop.org">patchwork</a> and change the
-state of your old patches to Superseded.
-</p>
-
-<h3>Reviewing Patches</h3>
-
-<p>
-When you've reviewed a patch on the mailing list, please be unambiguous
-about your review.  That is, state either
-<pre>
-    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-or
-<pre>
-    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-Rather than saying just "LGTM" or "Seems OK".
-</p>
-
-<p>
-If small changes are suggested, it's OK to say something like:
-<pre>
-   With the above fixes, Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
-</pre>
-which tells the patch author that the patch can be committed, as long
-as the issues are resolved first.
-</p>
-
-
-<h3>Marking a commit as a candidate for a stable branch</h3>
-
-<p>
-If you want a commit to be applied to a stable branch,
-you should add an appropriate note to the commit message.
-</p>
-
-<p>
-Here are some examples of such a note:
-</p>
-<ul>
-  <li>CC: &lt;mesa-stable@lists.freedesktop.org&gt;</li>
-  <li>CC: "9.2 10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
-  <li>CC: "10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
-</ul>
-
-Simply adding the CC to the mesa-stable list address is adequate to nominate
-the commit for the most-recently-created stable branch. It is only necessary
-to specify a specific branch name, (such as "9.2 10.0" or "10.0" in the
-examples above), if you want to nominate the commit for an older stable
-branch. And, as in these examples, you can nominate the commit for the older
-branch in addition to the more recent branch, or nominate the commit
-exclusively for the older branch.
-
-This "CC" syntax for patch nomination will cause patches to automatically be
-copied to the mesa-stable@ mailing list when you use "git send-email" to send
-patches to the mesa-dev@ mailing list. Also, if you realize that a commit
-should be nominated for the stable branch after it has already been committed,
-you can send a note directly to the mesa-stable@lists.freedesktop.org where
-the Mesa stable-branch maintainers will receive it. Be sure to mention the
-commit ID of the commit of interest (as it appears in the mesa master branch).
-
-The latest set of patches that have been nominated, accepted, or rejected for
-the upcoming stable release can always be seen on the
-<a href="http://cworth.org/~cworth/mesa-stable-queue/">Mesa Stable Queue</a>
-page.
-
-<h3>Criteria for accepting patches to the stable branch</h3>
-
-Mesa has a designated release manager for each stable branch, and the release
-manager is the only developer that should be pushing changes to these
-branches. Everyone else should simply nominate patches using the mechanism
-described above.
-
-The stable-release manager will work with the list of nominated patches, and
-for each patch that meets the crtieria below will cherry-pick the patch with:
-<code>git cherry-pick -x &lt;commit&gt;</code>. The <code>-x</code> option is
-important so that the picked patch references the comit ID of the original
-patch.
-
-The stable-release manager may at times need to force-push changes to the
-stable branches, for example, to drop a previously-picked patch that was later
-identified as causing a regression). These force-pushes may cause changes to
-be lost from the stable branch if developers push things directly. Consider
-yourself warned.
-
-The stable-release manager is also given broad discretion in rejecting patches
-that have been nominated for the stable branch. The most basic rule is that
-the stable branch is for bug fixes only, (no new features, no
-regressions). Here is a non-exhaustive list of some reasons that a patch may
-be rejected:
-
-<ul>
-  <li>Patch introduces a regression. Any reported build breakage or other
-  regression caused by a particular patch, (game no longer work, piglit test
-  changes from PASS to FAIL), is justification for rejecting a patch.</li>
-
-  <li>Patch is too large, (say, larger than 100 lines)</li>
-
-  <li>Patch is not a fix. For example, a commit that moves code around with no
-  functional change should be rejected.</li>
-
-  <li>Patch fix is not clearly described. For example, a commit message
-  of only a single line, no description of the bug, no mention of bugzilla,
-  etc.</li>
-
-  <li>Patch has not obviously been reviewed, For example, the commit message
-  has no Reviewed-by, Signed-off-by, nor Tested-by tags from anyone but the
-  author.</li>
-
-  <li>Patch has not already been merged to the master branch. As a rule, bug
-  fixes should never be applied first to a stable branch. Patches should land
-  first on the master branch and then be cherry-picked to a stable
-  branch. (This is to avoid future releases causing regressions if the patch
-  is not also applied to master.) The only things that might look like
-  exceptions would be backports of patches from master that happen to look
-  significantly different.</li>
-
-  <li>Patch depends on too many other patches. Ideally, all stable-branch
-  patches should be self-contained. It sometimes occurs that a single, logical
-  bug-fix occurs as two separate patches on master, (such as an original
-  patch, then a subsequent fix-up to that patch). In such a case, these two
-  patches should be squashed into a single, self-contained patch for the
-  stable branch. (Of course, if the squashing makes the patch too large, then
-  that could be a reason to reject the patch.)</li>
-
-  <li>Patch includes new feature development, not bug fixes. New OpenGL
-  features, extensions, etc. should be applied to Mesa master and included in
-  the next major release. Stable releases are intended only for bug fixes.
-
-  Note: As an exception to this rule, the stable-release manager may accept
-  hardware-enabling "features". For example, backports of new code to support
-  a newly-developed hardware product can be accepted if they can be reasonably
-  determined to not have effects on other hardware.</li>
-
-  <li>Patch is a performance optimization. As a rule, performance patches are
-  not candidates for the stable branch. The only exception might be a case
-  where an application's performance was recently severely impacted so as to
-  become unusable. The fix for this performance regression could then be
-  considered for a stable branch. The optimization must also be
-  non-controversial and the patches still need to meet the other criteria of
-  being simple and self-contained</li>
-
-  <li>Patch introduces a new failure mode (such as an assert). While the new
-  assert might technically be correct, for example to make Mesa more
-  conformant, this is not the kind of "bug fix" we want in a stable
-  release. The potential problem here is that an OpenGL program that was
-  previously working, (even if technically non-compliant with the
-  specification), could stop working after this patch. So that would be a
-  regression that is unaacceptable for the stable branch.</li>
-</ul>
-
-
-<h2 id="release">Making a New Mesa Release</h2>
-
-<p>
-These are the instructions for making a new Mesa release.
-</p>
-
-<h3>Get latest source files</h3>
-<p>
-Use git to get the latest Mesa files from the git repository, from whatever
-branch is relevant. This document uses the convention X.Y.Z for the release
-being created, which should be created from a branch named X.Y.
-</p>
-
-<h3>Perform basic testing</h3>
-<p>
-The release manager should, at the very least, test the code by compiling it,
-installing it, and running the latest piglit to ensure that no piglit tests
-have regressed since the previous release.
-</p>
-
-<p>
-The release manager should do this testing with at least one hardware driver,
-(say, whatever is contained in the local development machine), as well as on
-both Gallium and non-Gallium software drivers. The software testing can be
-performed by running piglit with the following environment-variable set:
-</p>
-
-<pre>
-LIBGL_ALWAYS_SOFTWARE=1
-</pre>
-
-And Gallium vs. non-Gallium software drivers can be obtained by using the
-following configure flags on separate builds:
-
-<pre>
--with-dri-drivers=swrast
--with-gallium-drivers=swrast
-</pre>
-
-<p>
-Note: If both options are given in one build, both swrast_dri.so drivers will
-be compiled, but only one will be installed. The following command can be used
-to ensure the correct driver is being tested:
-</p>
-
-<pre>
-LIBGL_ALWAYS_SOFTWARE=1 glxinfo | grep "renderer string"
-</pre>
-
-If any regressions are found in this testing with piglit, stop here, and do
-not perform a release until regressions are fixed.
-
-<h3>Update version in file VERSION</h3>
-
-<p>
-Increment the version contained in the file VERSION at Mesa's top-level, then
-commit this change.
-</p>
-
-<h3>Create release notes for the new release</h3>
-
-<p>
-Create a new file docs/relnotes/X.Y.Z.html, (follow the style of the previous
-release notes). Note that the sha256sums section of the release notes should
-be empty at this point.
-</p>
-
-<p>
-Two scripts are available to help generate portions of the release notes:
-
-<pre>
-	./bin/bugzilla_mesa.sh
-	./bin/shortlog_mesa.sh
-</pre>
-
-<p>
-The first script identifies commits that reference bugzilla bugs and obtains
-the descriptions of those bugs from bugzilla. The second script generates a
-log of all commits. In both cases, HTML-formatted lists are printed to stdout
-to be included in the release notes.
-</p>
-
-<p>
-Commit these changes
-</p>
-
-<h3>Make the release archives, signatures, and the release tag</h3>
-<p>
-From inside the Mesa directory:
-<pre>
-	./autogen.sh
-	make -j1 tarballs
-</pre>
-
-<p>
-After the tarballs are created, the sha256 checksums for the files will
-be computed and printed. These will be used in a step below.
-</p>
-
-<p>
-It's important at this point to also verify that the constructed tar file
-actually builds:
-</p>
-
-<pre>
-	tar xjf MesaLib-X.Y.Z.tar.bz2
-	cd Mesa-X.Y.Z
-	./configure --enable-gallium-llvm
-	make -j6
-	make install
-</pre>
-
-<p>
-Some touch testing should also be performed at this point, (run glxgears or
-more involved OpenGL programs against the installed Mesa).
-</p>
-
-<p>
-Create detached GPG signatures for each of the archive files created above:
-</p>
-
-<pre>
-	gpg --sign --detach MesaLib-X.Y.Z.tar.gz
-	gpg --sign --detach MesaLib-X.Y.Z.tar.bz2
-	gpg --sign --detach MesaLib-X.Y.Z.zip
-</pre>
-
-<p>
-Tag the commit used for the build:
-</p>
-
-<pre>
-	git tag -s mesa-X.Y.X -m "Mesa X.Y.Z release"
-</pre>
-
-<p>
-Note: It would be nice to investigate and fix the issue that causes the
-tarballs target to fail with multiple build process, such as with "-j4". It
-would also be nice to incorporate all of the above commands into a single
-makefile target. And instead of a custom "tarballs" target, we should
-incorporate things into the standard "make dist" and "make distcheck" targets.
-</p>
-
-<h3>Add the sha256sums to the release notes</h3>
-
-<p>
-Edit docs/relnotes/X.Y.Z.html to add the sha256sums printed as part of "make
-tarballs" in the previous step. Commit this change.
-</p>
-
-<h3>Push all commits and the tag created above</h3>
-
-<p>
-This is the first step that cannot easily be undone. The release is going
-forward from this point:
-</p>
-
-<pre>
-	git push origin X.Y --tags
-</pre>
-
-<h3>Install the release files and signatures on the distribution server</h3>
-
-<p>
-The following commands can be used to copy the release archive files and
-signatures to the freedesktop.org server:
-</p>
-
-<pre>
-	scp MesaLib-X.Y.Z* people.freedesktop.org:
-	ssh people.freedesktop.org
-	cd /srv/ftp.freedesktop.org/pub/mesa
-	mkdir X.Y.Z
-	cd X.Y.Z
-	mv ~/MesaLib-X.Y.Z* .
-</pre>
-
-<h3>Back on mesa master, add the new release notes into the tree</h3>
-
-<p>
-Something like the following steps will do the trick:
-</p>
-
-<pre>
-	cp docs/relnotes/X.Y.Z.html /tmp
-        git checkout master
-        cp /tmp/X.Y.Z.html docs/relnotes
-        git add docs/relnotes/X.Y.Z.html
-</pre>
-
-<p>
-Also, edit docs/relnotes.html to add a link to the new release notes, and edit
-docs/index.html to add a news entry. Then commit and push:
-</p>
-
-<pre>
-	git commit -a -m "docs: Import X.Y.Z release notes, add news item."
-        git push origin
-</pre>
-
-<h3>Update the mesa3d.org website</h3>
-
-<p>
-NOTE: The recent release managers have not been performing this step
-themselves, but leaving this to Brian Paul, (who has access to the
-sourceforge.net hosting for mesa3d.org). Brian is more than willing to grant
-the permission necessary to future release managers to do this step on their
-own.
-</p>
-
-<p>
-Update the web site by copying the docs/ directory's files to 
-/home/users/b/br/brianp/mesa-www/htdocs/ with:
-<br>
-<code>
-sftp USERNAME,mesa3d@web.sourceforge.net
-</code>
-</p>
-
-
-<h3>Announce the release</h3>
-<p>
-Make an announcement on the mailing lists:
-
-<em>mesa-dev@lists.freedesktop.org</em>,
-and
-<em>mesa-announce@lists.freedesktop.org</em>
-
-Follow the template of previously-sent release announcements. The following
-command can be used to generate the log of changes to be included in the
-release announcement:
-
-<pre>
-	git shortlog mesa-X.Y.Z-1..mesa-X.Y.Z
-</pre>
-</p>
-
-
 <h2 id="extensions">Adding Extensions</h2>

 <p>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -60,6 +60,8 @@ sometimes be useful for debugging end-user issues.
     <li>flush - flush after each drawing command</li>
     <li>incomplete_tex - extra debug messages when a texture is incomplete</li>
     <li>incomplete_fbo - extra debug messages when a fbo is incomplete</li>
+     <li>context - create a debug context (see GLX_CONTEXT_DEBUG_BIT_ARB) and
+         print error and performance messages to stderr (or MESA_LOG_FILE).</li>
   </ul>
 <li>MESA_LOG_FILE - specifies a file name for logging all errors, warnings,
 etc., rather than stderr
@@ -185,6 +187,8 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
   <li>do32 - generate compute shader SIMD32 programs even if workgroup size doesn't exceed the SIMD16 limit</li>
   <li>norbc - disable single sampled render buffer compression</li>
 </ul>
+<li>INTEL_PRECISE_TRIG - if set to 1, true or yes, then the driver prefers
+   accuracy over performance in trig functions.</li>
 </ul>


@@ -217,6 +221,8 @@ Mesa EGL supports different sets of environment variables.  See the
    disable for unencumbered viewing the rest of the time. For example, set
    GALLIUM_HUD_VISIBLE to false and GALLIUM_HUD_TOGGLE_SIGNAL to 10 (SIGUSR1).
    Use kill -10 <pid> to toggle the hud as desired.
+<li>GALLIUM_HUD_DUMP_DIR - specifies a directory for writing the displayed
+    hud values into files.
 <li>GALLIUM_DRIVER - useful in combination with LIBGL_ALWAYS_SOFTWARE=1 for
    choosing one of the software renderers "softpipe", "llvmpipe" or "swr".
 <li>GALLIUM_LOG_FILE - specifies a file for logging all errors, warnings, etc.
@@ -235,6 +241,21 @@ Setting to "tgsi", for example, will print all the TGSI shaders.
 See src/mesa/state_tracker/st_debug.c for other options.
 </ul>

+<h3>Clover state tracker environment variables</h3>
+
+<ul>
+<li>CLOVER_EXTRA_BUILD_OPTIONS - allows specifying additional compiler and linker
+    options. Specified options are appended after the options set by the OpenCL
+    program in clBuildProgram.
+<li>CLOVER_EXTRA_COMPILE_OPTIONS - allows specifying additional compiler
+    options. Specified options are appended after the options set by the OpenCL
+    program in clCompileProgram.
+<li>CLOVER_EXTRA_LINK_OPTIONS - allows specifying additional linker
+    options. Specified options are appended after the options set by the OpenCL
+    program in clLinkProgram.
+</ul>
+
+
 <h3>Softpipe driver environment variables</h3>
 <ul>
 <li>SOFTPIPE_DUMP_FS - if set, the softpipe driver will print fragment shaders
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -33,7 +33,7 @@ are exposed in the 3.0 context as extensions.
 Feature                                                 Status
 ------------------------------------------------------- ------------------------

-GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr
+GL 3.0, GLSL 1.30 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr

  glBindFragDataLocation, glGetFragDataLocation         DONE
  GL_NV_conditional_render (Conditional rendering)      DONE ()
@@ -60,12 +60,12 @@ GL 3.0, GLSL 1.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft
  glVertexAttribI commands                              DONE
  Depth format cube textures                            DONE ()
  GLX_ARB_create_context (GLX 1.4 is required)          DONE
-  Multisample anti-aliasing                             DONE (llvmpipe (*), softpipe (*), swr (*))
+  Multisample anti-aliasing                             DONE (freedreno (*), llvmpipe (*), softpipe (*), swr (*))

-(*) llvmpipe, softpipe, and swr have fake Multisample anti-aliasing support
+(*) freedreno, llvmpipe, softpipe, and swr have fake Multisample anti-aliasing support


-GL 3.1, GLSL 1.40 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr
+GL 3.1, GLSL 1.40 --- all DONE: freedreno, i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe, swr

  Forward compatible context support/deprecations       DONE ()
  GL_ARB_draw_instanced (Instanced drawing)             DONE ()
@@ -82,34 +82,34 @@ GL 3.2, GLSL 1.50 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, soft

  Core/compatibility profiles                           DONE
  Geometry shaders                                      DONE ()
-  GL_ARB_vertex_array_bgra (BGRA vertex order)          DONE (swr)
-  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (swr)
-  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (swr)
-  GL_ARB_provoking_vertex (Provoking vertex)            DONE (swr)
-  GL_ARB_seamless_cube_map (Seamless cubemaps)          DONE (swr)
+  GL_ARB_vertex_array_bgra (BGRA vertex order)          DONE (freedreno, swr)
+  GL_ARB_draw_elements_base_vertex (Base vertex offset) DONE (freedreno, swr)
+  GL_ARB_fragment_coord_conventions (Frag shader coord) DONE (freedreno, swr)
+  GL_ARB_provoking_vertex (Provoking vertex)            DONE (freedreno, swr)
+  GL_ARB_seamless_cube_map (Seamless cubemaps)          DONE (freedreno, swr)
  GL_ARB_texture_multisample (Multisample textures)     DONE (swr)
-  GL_ARB_depth_clamp (Frag depth clamp)                 DONE (swr)
-  GL_ARB_sync (Fence objects)                           DONE (swr)
+  GL_ARB_depth_clamp (Frag depth clamp)                 DONE (freedreno, swr)
+  GL_ARB_sync (Fence objects)                           DONE (freedreno, swr)
  GLX_ARB_create_context_profile                        DONE


 GL 3.3, GLSL 3.30 --- all DONE: i965, nv50, nvc0, r600, radeonsi, llvmpipe, softpipe

-  GL_ARB_blend_func_extended                            DONE (swr)
+  GL_ARB_blend_func_extended                            DONE (freedreno/a3xx, swr)
  GL_ARB_explicit_attrib_location                       DONE (all drivers that support GLSL)
-  GL_ARB_occlusion_query2                               DONE (swr)
+  GL_ARB_occlusion_query2                               DONE (freedreno, swr)
  GL_ARB_sampler_objects                                DONE (all drivers)
-  GL_ARB_shader_bit_encoding                            DONE (swr)
-  GL_ARB_texture_rgb10_a2ui                             DONE (swr)
-  GL_ARB_texture_swizzle                                DONE (swr)
+  GL_ARB_shader_bit_encoding                            DONE (freedreno, swr)
+  GL_ARB_texture_rgb10_a2ui                             DONE (freedreno, swr)
+  GL_ARB_texture_swizzle                                DONE (freedreno, swr)
  GL_ARB_timer_query                                    DONE (swr)
-  GL_ARB_instanced_arrays                               DONE (swr)
-  GL_ARB_vertex_type_2_10_10_10_rev                     DONE (swr)
+  GL_ARB_instanced_arrays                               DONE (freedreno, swr)
+  GL_ARB_vertex_type_2_10_10_10_rev                     DONE (freedreno, swr)


-GL 4.0, GLSL 4.00 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
+GL 4.0, GLSL 4.00 --- all DONE: i965/hsw+, nvc0, r600, radeonsi

-  GL_ARB_draw_buffers_blend                             DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
+  GL_ARB_draw_buffers_blend                             DONE (freedreno, i965/gen6+, nv50, llvmpipe, softpipe, swr)
  GL_ARB_draw_indirect                                  DONE (i965/gen7+, llvmpipe, softpipe, swr)
  GL_ARB_gpu_shader5                                    DONE (i965/gen7+)
  - 'precise' qualifier                                 DONE
@@ -124,7 +124,7 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
  - Enhanced per-sample shading                         DONE ()
  - Interpolation functions                             DONE ()
  - New overload resolution rules                       DONE
-  GL_ARB_gpu_shader_fp64                                DONE (llvmpipe, softpipe)
+  GL_ARB_gpu_shader_fp64                                DONE (i965/hsw+, llvmpipe, softpipe)
  GL_ARB_sample_shading                                 DONE (i965/gen6+, nv50)
  GL_ARB_shader_subroutine                              DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
  GL_ARB_tessellation_shader                            DONE (i965/gen7+)
@@ -133,20 +133,20 @@ GL 4.0, GLSL 4.00 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
  GL_ARB_texture_gather                                 DONE (i965/gen6+, nv50, llvmpipe, softpipe, swr)
  GL_ARB_texture_query_lod                              DONE (i965, nv50, softpipe)
  GL_ARB_transform_feedback2                            DONE (i965/gen7+, nv50, llvmpipe, softpipe, swr)
-  GL_ARB_transform_feedback3                            DONE (i965/gen7+, nv50, llvmpipe, softpipe, swr)
+  GL_ARB_transform_feedback3                            DONE (i965/gen7+, llvmpipe, softpipe, swr)


-GL 4.1, GLSL 4.10 --- all DONE: i965/gen8+, nvc0, r600, radeonsi
+GL 4.1, GLSL 4.10 --- all DONE: i965/hsw+, nvc0, r600, radeonsi

  GL_ARB_ES2_compatibility                              DONE (i965, nv50, llvmpipe, softpipe, swr)
  GL_ARB_get_program_binary                             DONE (0 binary formats)
  GL_ARB_separate_shader_objects                        DONE (all drivers)
-  GL_ARB_shader_precision                               DONE (all drivers that support GLSL 4.10)
-  GL_ARB_vertex_attrib_64bit                            DONE (llvmpipe, softpipe)
+  GL_ARB_shader_precision                               DONE (i965/hsw+, all drivers that support GLSL 4.10)
+  GL_ARB_vertex_attrib_64bit                            DONE (i965/hsw+, llvmpipe, softpipe)
  GL_ARB_viewport_array                                 DONE (i965, nv50, llvmpipe, softpipe)


-GL 4.2, GLSL 4.20 -- all DONE: i965/gen8+, nvc0, radeonsi
+GL 4.2, GLSL 4.20 -- all DONE: i965/hsw+, nvc0, radeonsi

  GL_ARB_texture_compression_bptc                       DONE (i965, r600)
  GL_ARB_compressed_texture_pixel_storage               DONE (all drivers)
@@ -253,25 +253,25 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, radeonsi
 GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+

  GL_EXT_color_buffer_float                             DONE (all drivers)
-  GL_KHR_blend_equation_advanced                        DONE (i965)
+  GL_KHR_blend_equation_advanced                        DONE (i965, nvc0)
  GL_KHR_debug                                          DONE (all drivers)
  GL_KHR_robustness                                     DONE (i965, nvc0, radeonsi)
  GL_KHR_texture_compression_astc_ldr                   DONE (i965/gen9+)
  GL_OES_copy_image                                     DONE (all drivers)
  GL_OES_draw_buffers_indexed                           DONE (all drivers that support GL_ARB_draw_buffers_blend)
  GL_OES_draw_elements_base_vertex                      DONE (all drivers)
-  GL_OES_geometry_shader                                DONE (i965/gen8+, nvc0, radeonsi)
+  GL_OES_geometry_shader                                DONE (i965/hsw+, nvc0, radeonsi)
  GL_OES_gpu_shader5                                    DONE (all drivers that support GL_ARB_gpu_shader5)
  GL_OES_primitive_bounding_box                         DONE (i965/gen7+, nvc0, radeonsi)
  GL_OES_sample_shading                                 DONE (i965, nvc0, r600, radeonsi)
  GL_OES_sample_variables                               DONE (i965, nvc0, r600, radeonsi)
  GL_OES_shader_image_atomic                            DONE (all drivers that support GL_ARB_shader_image_load_store)
-  GL_OES_shader_io_blocks                               DONE (i965/gen8+, nvc0, radeonsi)
+  GL_OES_shader_io_blocks                               DONE (All drivers that support GLES 3.1)
  GL_OES_shader_multisample_interpolation               DONE (i965, nvc0, r600, radeonsi)
  GL_OES_tessellation_shader                            DONE (all drivers that support GL_ARB_tessellation_shader)
  GL_OES_texture_border_clamp                           DONE (all drivers)
  GL_OES_texture_buffer                                 DONE (i965, nvc0, radeonsi)
-  GL_OES_texture_cube_map_array                         DONE (i965/gen8+, nvc0, radeonsi)
+  GL_OES_texture_cube_map_array                         DONE (i965/hsw+, nvc0, radeonsi)
  GL_OES_texture_stencil8                               DONE (all drivers that support GL_ARB_texture_stencil8)
  GL_OES_texture_storage_multisample_2d_array           DONE (all drivers that support GL_ARB_texture_multisample)

@@ -287,7 +287,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GL_ARB_indirect_parameters                            DONE (nvc0, radeonsi)
  GL_ARB_parallel_shader_compile                        not started, but Chia-I Wu did some related work in 2014
  GL_ARB_pipeline_statistics_query                      DONE (i965, nvc0, radeonsi, softpipe, swr)
-  GL_ARB_post_depth_coverage                            not started
+  GL_ARB_post_depth_coverage                            DONE (i965)
  GL_ARB_robustness_isolation                           not started
  GL_ARB_sample_locations                               not started
  GL_ARB_seamless_cubemap_per_texture                   DONE (i965, nvc0, radeonsi, r600, softpipe, swr)
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,47 @@

 <h1>News</h1>

+<h2>January 5, 2017</h2>
+<p>
+<a href="relnotes/13.0.3.html">Mesa 13.0.3</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>December 5, 2016</h2>
+<p>
+<a href="relnotes/12.0.5.html">Mesa 12.0.5</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 12.0.5 will be the final release in the 12.0
+series. Users of 12.0 are encouraged to migrate to the 13.0 series in order
+to obtain future fixes.
+</p>
+
+<h2>November 28, 2016</h2>
+<p>
+<a href="relnotes/13.0.2.html">Mesa 13.0.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 14, 2016</h2>
+<p>
+<a href="relnotes/13.0.1.html">Mesa 13.0.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 10, 2016</h2>
+<p>
+<a href="relnotes/12.0.4.html">Mesa 12.0.4</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>November 1, 2016</h2>
+<p>
+<a href="relnotes/13.0.0.html">Mesa 13.0.0</a> is released.  This is a
+new development release.  See the release notes for more information
+about the release.
+</p>
+
 <h2>September 15, 2016</h2>
 <p>
 <a href="relnotes/12.0.3.html">Mesa 12.0.3</a> is released.
--- a/docs/install.html
+++ b/docs/install.html
@@ -24,7 +24,7 @@
  </ul>
 <li><a href="#autoconf">Building with autoconf (Linux/Unix/X11)</a>
 <li><a href="#scons">Building with SCons (Windows/Linux)</a>
-<li><a href="#other">Building for other systems</a>
+<li><a href="#android">Building with AOSP (Android)</a>
 <li><a href="#libs">Library Information</a>
 <li><a href="#pkg-config">Building OpenGL programs with pkg-config</a>
 </ol>
@@ -33,62 +33,85 @@
 <h1 id="prereq-general">1. Prerequisites for building</h1>

 <h2>1.1 General</h2>
+
+<p>
+Build system.
+</p>
+
+<ul>
+<li>Autoconf is required when building on *nix platforms.
+<li><a href="http://www.scons.org/">SCons</a> is required for building on
+Windows and optional for Linux (it's an alternative to autoconf/automake.)
+</li>
+<li>Android Build system when building as native Android component. Autoconf
+is used when when building ARC.
+</li>
+</ul>
+
+
+<p>
+The following compilers are known to work, if you know of others or you're
+willing to maintain support for other compiler get in touch.
+</p>
+
+<ul>
+<li>GCC 4.2.0 or later (some parts of Mesa may require later versions)
+<li>clang - exact minimum requirement is currently unknown.
+<li>Microsoft Visual Studio 2013 Update 4 or later is required, for building on Windows.
+</ul>
+
+
+<p>
+Third party/extra tools.
+<br>
+<strong>Note</strong>: These should not be required, when building from a release tarball. If
+you think you've spotted a bug let developers know by filing a
+<a href="bugs.html">bug report</a>.
+</p>
+
+
 <ul>
 <li><a href="http://www.python.org/">Python</a> - Python is required.
 Version 2.6.4 or later should work.
 </li>
-<br>
 <li><a href="http://www.makotemplates.org/">Python Mako module</a> -
 Python Mako module is required. Version 0.3.4 or later should work.
 </li>
-</br>
-<li><a href="http://www.scons.org/">SCons</a> is required for building on
-Windows and optional for Linux (it's an alternative to autoconf/automake.)
-</li>
-<br>
-<li>lex / yacc - for building the GLSL compiler.
-<br>
-<br>
-On Linux systems, flex and bison are used.
-Versions 2.5.35 and 2.4.1, respectively, (or later) should work.
-<br>
-<br>
+<li>lex / yacc - for building the Mesa IR and GLSL compiler.
+<div>
+On Linux systems, flex and bison versions 2.5.35 and 2.4.1, respectively,
+(or later) should work.
 On Windows with MinGW, install flex and bison with:
 <pre>mingw-get install msys-flex msys-bison</pre>
 For MSVC on Windows, install
 <a href="http://winflexbison.sourceforge.net/">Win flex-bison</a>.
-</li>
-<br>
-<li>For building on Windows, Microsoft Visual Studio 2013 or later is required.
-</li>
+</div>
 </ul>
+<p><strong>Note</strong>: Some versions can be buggy (eg. flex 2.6.2) so do try others if things fail.</p>


-<h3 id="prereq-dri">1.2 For DRI and hardware acceleration</h3>
+<h3 id="prereq-dri">1.2 Requirements</h3>

 <p>
-The following are required for DRI-based hardware acceleration with Mesa:
+The requirements depends on the features selected at configure stage.
+Check/install the respective -devel package as prompted by the configure error
+message.
 </p>

-<ul>
-<li><a href="http://xorg.freedesktop.org/releases/individual/proto/">
-dri2proto</a> version 2.6 or later
-<li><a href="http://dri.freedesktop.org/libdrm/">libDRM</a> latest version
-<li>Xorg server version 1.5 or later
-<li>Linux 2.6.28 or later
-</ul>
 <p>
-If you're using a fedora distro the following command should install all
-the needed dependencies:
+Here are some common ways to retrieve most/all of the dependencies based on
+the packaging tool used by your distro.
 </p>
+
 <pre>
-  sudo yum install flex bison imake libtool xorg-x11-proto-devel libdrm-devel \
-  gcc-c++ xorg-x11-server-devel libXi-devel libXmu-devel libXdamage-devel git \
-  expat-devel llvm-devel python-mako
+  zypper source-install --build-deps-only Mesa # openSUSE/SLED/SLES
+  yum-builddep mesa # yum Fedora, OpenSuse(?)
+  dnf builddep mesa # dnf Fedora
+  apt-get build-dep mesa # Debian and derivatives
+  ... # others
 </pre>


-
 <h1 id="autoconf">2. Building with autoconf (Linux/Unix/X11)</h1>

 <p>
@@ -139,22 +162,30 @@ This will create:
 </ul>
 <p>
 Put them all in the same directory to test them.
+
+Additional information is available in <a href="README.WIN32">README.WIN32</a>.
+
 </p>



-<h1 id="other">4. Building for other systems</h1>
+<h1 id="android">4. Building with AOSP (Android)</h1>

 <p>
-Documentation for other environments (some may be very out of date):
+Currently one can build Mesa for Android as part of the AOSP project, yet
+your experience might vary.
 </p>

-<ul>
-<li><a href="README.VMS">README.VMS</a> - VMS
-<li><a href="README.CYGWIN">README.CYGWIN</a> - Cygwin
-<li><a href="README.WIN32">README.WIN32</a> - Win32
-</ul>
+<p>
+In order to achieve that one should update their local manifest to point to the
+upstream repo, set the approapriate BOARD_GPU_DRIVERS and build the
+libGLES_mesa library.
+</p>

+<p>
+FINISHME: Improve on the instructions add references to Rob H repos/Jenkins,
+Android-x86 and/or other resources.
+</p>


 <h1 id="libs">5. Library Information</h1>
--- a/docs/mangling.html
+++ b/docs/mangling.html
@@ -2,7 +2,7 @@
 <html lang="en">
 <head>
  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Function Name Mangling</title>
+  <title>GL Function Name Mangling</title>
  <link rel="stylesheet" type="text/css" href="mesa.css">
 </head>
 <body>
@@ -14,7 +14,7 @@
 <iframe src="contents.html"></iframe>
 <div class="content">

-<h1>Function Name Mangling</h1>
+<h1>GL Function Name Mangling</h1>

 <p>
 If you want to use both Mesa and another OpenGL library in the same
@@ -25,12 +25,11 @@ This results in all the Mesa functions being prefixed with
 </p>

 <p>
-To do this, recompile Mesa with the compiler flag -DUSE_MGL_NAMESPACE.
-Add the flag to CFLAGS in the configuration file which you want to use.
-For example:
+This option is supported only with the autoconf build. To use it add
+--enable-mangling to your configure line.
 </p>
 <pre>
-CFLAGS += -DUSE_MGL_NAMESPACE
+<code>./configure --enable-mangling ...</code>
 </pre>

 </div>
--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -0,0 +1,509 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Releasing process</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Releasing process</h1>
+
+<ul>
+<li><a href="#overview">Overview</a>
+<li><a href="#schedule">Release schedule</a>
+<li><a href="#pickntest">Cherry-pick and test</a>
+<li><a href="#branch">Making a branchpoint</a>
+<li><a href="#prerelease">Pre-release announcement</a>
+<li><a href="#release">Making a new release</a>
+<li><a href="#announce">Announce the release</a>
+<li><a href="#website">Update the mesa3d.org website</a>
+<li><a href="#bugzilla">Update Bugzilla</a>
+</ul>
+
+<h1 id="overview">Overview</h1>
+
+<p>
+This document uses the convention X.Y.Z for the release number with X.Y being
+the stable branch name.
+<br>
+Mesa provides feature and bugfix releases. Former use zero as patch version (Z),
+while the latter have a non-zero one.
+</p>
+
+<p>
+For example:
+</p>
+<pre>
+	Mesa 10.1.0 - 10.1 branch, feature
+	Mesa 10.1.4 - 10.1 branch, bugfix
+	Mesa 12.0.0 - 12.0 branch, feature
+	Mesa 12.0.2 - 12.0 branch, bugfix
+</pre>
+
+<h1 id="schedule">Release schedule</h1>
+
+<p>
+Releases should happen on Fridays. Delays can occur although those should be keep
+to a minimum.
+</p>
+
+<h2>Feature releases</h2>
+<ul>
+<li>Available approximatelly every three months.
+<li>Initial timeplan available 2-4 weeks before the planned branchpoint (rc1)
+on the mesa-announce@ mailing list.
+<li>A <a href="#prerelease">pre-release</a> announcement should be available
+approximatelly 24 hours before the final (non-rc) release.
+</ul>
+
+<h2>Stable releases</h2>
+<ul>
+<li>Normally available once every two weeks.
+<li>Only the latest branch has releases. See note below.
+<li>A <a href="#prerelease">pre-release</a> announcement should be available
+approximatelly 48 hours before the actual release.
+</ul>
+
+<p>
+Note: There is one or two releases overlap when changing branches. For example:
+<br>
+The final release from the 12.0 series Mesa 12.0.5 will be out around the same
+time (or shortly after) 13.0.1 is out.
+</p>
+
+<h1 id="pickntest">Cherry-picking and testing</h1>
+
+<p>
+Commits nominated for the active branch are picked as based on the
+<a href="submittingpatches.html#criteria" target="_parent">criteria</a> as
+described in the same section.
+
+<p>
+Maintainer is responsible for testing in various possible permutations of
+the autoconf and scons build.
+</p>
+
+<h2>Cherry-picking and build/check testing</h2>
+
+<p>Done continuously up-to the <a href="#prerelease">pre-release</a> announcement.</p>
+
+<p>
+As an exception, patches can be applied up-to the last ~1h before the actual
+release. This is made <strong>only</strong> with explicit permission/request,
+and the patch <strong>must</strong> be very well contained. Thus it cannot
+affect more than one driver/subsystem.
+</p>
+<p>
+Currently Ilia Mirkin and AMD devs have requested "permanent" exception.
+</p>
+
+
+<ul>
+<li>make distcheck, scons and scons check must pass
+<li>Testing with different version of system components - LLVM and others is also
+performed where possible.
+</ul>
+<p>
+Achieved by combination of local ad-hoc scripts and AppVeyor plus Travis-CI,
+the latter as part of their Github integration.
+</p>
+
+<h2>Regression/functionality testing</h2>
+
+<p>
+Less often (once or twice), shortly before the pre-release announcement.
+Ensure that testing is redone if Intel devs have requested an exception, as per above.
+</p>
+<ul>
+<li><em>no regressions should be observed for Piglit/dEQP/CTS/Vulkan on Intel platforms</em>
+<li><em>no regressions should be observed for Piglit using the swrast, softpipe
+and llvmpipe drivers</em>
+</ul>
+<p>
+Currently testing is performed courtesy of the Intel OTC team and their Jenkins CI setup. Check with the Intel team over IRC how to get things setup.
+</p>
+
+
+<h1 id="branch">Making a branchpoint</h1>
+
+<p>
+A branchpoint is made such that new development can continue in parallel to
+stabilisation and bugfixing.
+</p>
+
+<p>
+Note: Before doing a branch ensure that basic build and <code>make check</code>
+testing is done and there are little to-no issues.
+<br>
+Ideally all of those should be tackled already.
+</p>
+
+<p>
+Check if the version number is going to remain as, alternatively
+<code> git mv docs/relnotes/{current,new}.html </code> as appropriate.
+</p>
+
+<p>
+To setup the branchpoint:
+</p>
+<pre>
+	git checkout master # make sure we're in master first
+	git tag -s X.Y-branchpoint -m "Mesa X.Y branchpoint"
+	git checkout -b X.Y
+	git push origin X.Y-branchpoint X.Y
+</pre>
+
+<p>
+Now go to
+<a href="https://bugs.freedesktop.org/editversions.cgi?action=add&amp;product=Mesa" target="_parent">Bugzilla</a> and add the new Mesa version X.Y.
+</p>
+<p>
+Check for rare that there are no distribution breaking changes and revert them
+if needed. Extremely rare - we had only one case so far (see
+commit 2ced8eb136528914e1bf4e000dea06a9d53c7e04).
+</p>
+<p>
+Proceed to <a href="#release">release</a> -rc1.
+</p>
+
+<h1 id="prerelease">Pre-release announcement</h1>
+
+<p>
+It comes shortly after outstanding patches in the respective branch are pushed.
+Developers can check, in brief, what's the status of their patches. They,
+alongside very early testers, are strongly encouraged to test the branch and
+report any regressions.
+<br>
+It is followed by a brief period (normally 24 or 48 hours) before the actual
+release is made.
+</p>
+
+<h2>Terminology used</h2>
+<ul><li>Nominated</ul>
+<p>
+Patch that is nominated but yet to to merged in the patch queue/branch.
+</p>
+
+<ul><li>Queued</ul>
+<p>
+Patch is in the queue/branch and will feature in the next release.
+Barring reported regressions or objections from developers.
+</p>
+
+<ul><li>Rejected</ul>
+<p>
+Patch does not fit the
+<a href="submittingpatches.html#criteria" target="_parent">criteria</a> and
+is followed by a brief information.
+<br>
+The release maintainer is human so if you believe you've spotted a mistake do
+let them know.
+</p>
+
+<h2>Format/template</h2>
+<pre>
+Subject: [ANNOUNCE] Mesa X.Y.Z release candidate
+To: mesa-announce@...
+Cc: mesa-dev@...
+
+Hello list,
+
+The candidate for the Mesa X.Y.Z is now available. Currently we have:
+ - NUMBER queued
+ - NUMBER nominated (outstanding)
+ - and NUMBER rejected patches
+
+BRIEF SUMMARY OF CHANGES
+
+Take a look at section "Mesa stable queue" for more information.
+
+
+Testing reports/general approval
+--------------------------------
+Any testing reports (or general approval of the state of the branch) will be
+greatly appreciated.
+
+The plan is to have X.Y.Z this DAY (DATE), around or shortly after TIME.
+
+If you have any questions or suggestions - be that about the current patch
+queue or otherwise, please go ahead.
+
+
+Trivial merge conflicts
+-----------------------
+List of commits where manual intervention was required.
+Keep the authors in the CC list.
+
+commit SHA
+Author: AUTHOR
+
+    COMMIT SUMMARY
+
+    CHERRY PICKED FROM
+
+
+For example:
+
+commit 990f395e007c3204639daa34efc3049f350ee819
+Author: Emil Velikov &lt;emil.velikov@collabora.com&gt;
+
+    anv: automake: cleanup the generated json file during make clean
+
+    (cherry picked from commit 8df581520a823564be0ab5af7dbb7d501b1c9670)
+
+
+Cheers,
+Emil
+
+
+Mesa stable queue
+-----------------
+
+Nominated (NUMBER)
+==================
+
+AUTHOR (NUMBER):
+      SHA     COMMIT SUMMARY
+
+For example:
+
+Dave Airlie (1):
+      2de85eb radv: fix texturesamples to handle single sample case
+
+
+Queued (NUMBER)
+===============
+
+AUTHOR (NUMBER):
+      COMMIT SUMMARY
+
+
+Rejected (NUMBER)
+=================
+
+Rejected (11)
+=============
+
+AUTHOR (NUMBER):
+      SHA     COMMIT SUMMARY
+
+Reason: ...
+</pre>
+
+<h1 id="release">Making a new release</h1>
+
+<p>
+These are the instructions for making a new Mesa release.
+</p>
+
+<h3>Get latest source files</h3>
+<p>
+Ensure the latest code is available - both in your local master and the
+relevant branch.
+</p>
+
+<h3>Perform basic testing</h3>
+<p>
+Most of the testing should already be done during the
+<a href="#pickntest">cherry-pick</a> and
+<a href="#prerelease">pre-announce</a> stages.
+
+So we do a quick 'touch test'
+<ul>
+<li>make distcheck (you can omit this if you're not using --dist below)
+<li>scons (from release tarball)
+<li>the produced binaries work
+</ul>
+
+<p>
+Here is one solution that I've been using.
+</p>
+
+<pre>
+	git clean -fXd; git clean -nxd
+	read # quick cross check any outstanding files
+	export __version=`cat VERSION`
+	export __mesa_root=../
+	export __build_root=./foo
+	chmod 755 -fR $__build_root; rm -rf $__build_root
+	mkdir -p $__build_root &amp;&amp; cd $__build_root
+
+	$__mesa_root/autogen.sh --enable-llvm-shared-libs &amp;&amp; make -j2 distcheck
+
+	# Build check the tarballs (scons)
+	tar -xaf mesa-$__version.tar.xz &amp;&amp; cd mesa-$__version &amp;&amp; scons &amp;&amp; cd ..
+
+	# Test the automake binaries
+	rm -rf cd mesa-$__version
+	tar -xaf mesa-$__version.tar.xz &amp;&amp; cd mesa-$__version
+	./configure \
+		--with-dri-drivers=i965,swrast \
+		--with-gallium-drivers=swrast \
+		--with-vulkan-drivers=intel \
+		--enable-llvm-shared-libs \
+		--enable-gallium-llvm \
+		--enable-glx-tls \
+		--enable-gbm \
+		--enable-egl \
+		--with-egl-platforms=x11,drm,wayland
+	make -j2 &amp;&amp; DESTDIR=`pwd`/test make -j6 install
+	export LD_LIBRARY_PATH=`pwd`/test/usr/local/lib/
+	export LIBGL_DRIVERS_PATH=`pwd`/test/usr/local/lib/dri/
+	export LIBGL_DEBUG=verbose
+	glxinfo | egrep -o "Mesa.*"
+	glxgears
+	es2_info | egrep "GL_VERSION|GL_RENDERER"
+	es2gears_x11
+	export LIBGL_ALWAYS_SOFTWARE=1
+	glxinfo | egrep -o "Mesa.*|Gallium.*"
+	glxgears
+	es2_info | egrep "GL_VERSION|GL_RENDERER"
+	es2gears_x11
+	export LIBGL_ALWAYS_SOFTWARE=1
+	export GALLIUM_DRIVER=softpipe
+	glxinfo | egrep -o "Mesa.*|Gallium.*"
+	glxgears
+	es2_info | egrep "GL_VERSION|GL_RENDERER"
+	es2gears_x11
+	# Smoke test DOTA2
+	unset LD_LIBRARY_PATH
+	unset LIBGL_DRIVERS_PATH
+	unset LIBGL_DEBUG
+	unset LIBGL_ALWAYS_SOFTWARE
+	export VK_ICD_FILENAMES=`pwd`/src/intel/vulkan/dev_icd.json
+	steam steam://rungameid/570  -vconsole -vulkan
+</pre>
+
+<h3>Update version in file VERSION</h3>
+
+<p>
+Increment the version contained in the file VERSION at Mesa's top-level, then
+commit this change.
+</p>
+
+<h3>Create release notes for the new release</h3>
+
+<p>
+Create a new file docs/relnotes/X.Y.Z.html, (follow the style of the previous
+release notes). Note that the sha256sums section of the release notes should
+be empty (TBD) at this point.
+</p>
+
+<p>
+Two scripts are available to help generate portions of the release notes:
+
+<pre>
+	./bin/bugzilla_mesa.sh
+	./bin/shortlog_mesa.sh
+</pre>
+
+<p>
+The first script identifies commits that reference bugzilla bugs and obtains
+the descriptions of those bugs from bugzilla. The second script generates a
+log of all commits. In both cases, HTML-formatted lists are printed to stdout
+to be included in the release notes.
+</p>
+
+<p>
+Commit these changes and push the branch.
+</p>
+<pre>
+	git push origin HEAD
+</pre>
+
+
+<h3>Use the release.sh script from xorg util-macros</h3>
+
+<p>
+Ensure that the mesa git tree is clean via <code>git clean -fXd</code> and
+start the release process.
+</p>
+<pre>
+	../relative/path/to/release.sh . # append --dist if you've already done distcheck above
+</pre>
+
+<p>
+Pay close attention to the prompts as you might be required to enter your GPG
+and SSH passphrase(s) to sign and upload the files, respectively.
+</p>
+
+<h3>Add the sha256sums to the release notes</h3>
+
+<p>
+Edit docs/relnotes/X.Y.Z.html to add the sha256sums as availabe in the mesa-X.Y.Z.announce template. Commit this change.
+</p>
+
+<h3>Back on mesa master, add the new release notes into the tree</h3>
+
+<p>
+Something like the following steps will do the trick:
+</p>
+
+<pre>
+	git cherry-pick -x X.Y~1
+	git cherry-pick -x X.Y
+</pre>
+
+<p>
+Also, edit docs/relnotes.html to add a link to the new release notes, and edit
+docs/index.html to add a news entry. Then commit and push:
+</p>
+
+<pre>
+	git commit -as -m "docs: add news item and link release notes for X.Y.Z"
+	git push origin master X.Y
+</pre>
+
+
+<h1 id="announce">Announce the release</h1>
+<p>
+Use the generated template during the releasing process.
+</p>
+
+
+<h1 id="website">Update the mesa3d.org website</h1>
+
+<p>
+NOTE: The recent release managers have not been performing this step
+themselves, but leaving this to Brian Paul, (who has access to the
+sourceforge.net hosting for mesa3d.org). Brian is more than willing to grant
+the permission necessary to future release managers to do this step on their
+own.
+</p>
+
+<p>
+Update the web site by copying the docs/ directory's files to
+/home/users/b/br/brianp/mesa-www/htdocs/ with:
+<br>
+<code>
+sftp USERNAME,mesa3d@web.sourceforge.net
+</code>
+</p>
+
+
+<h1 id="bugzilla">Update Bugzilla</h1>
+
+<p>
+Parse through the bugreports as listed in the docs/relnotes/X.Y.Z.html
+document.
+<br>
+If there's outstanding action, close the bug referencing the commit ID which
+addresses the bug and mention the Mesa version that has the fix.
+</p>
+
+<p>
+Note: the above is not applicable to all the reports, so use common sense.
+</p>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,12 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/13.0.3.html">13.0.3 release notes</a>
+<li><a href="relnotes/12.0.5.html">12.0.5 release notes</a>
+<li><a href="relnotes/13.0.2.html">13.0.2 release notes</a>
+<li><a href="relnotes/13.0.1.html">13.0.1 release notes</a>
+<li><a href="relnotes/12.0.4.html">12.0.4 release notes</a>
+<li><a href="relnotes/13.0.0.html">13.0.0 release notes</a>
 <li><a href="relnotes/12.0.3.html">12.0.3 release notes</a>
 <li><a href="relnotes/12.0.2.html">12.0.2 release notes</a>
 <li><a href="relnotes/12.0.1.html">12.0.1 release notes</a>
--- a/docs/relnotes/12.0.4.html
+++ b/docs/relnotes/12.0.4.html
@@ -0,0 +1,321 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.4 Release Notes / November 10, 2016</h1>
+
+<p>
+Mesa 12.0.4 is a bug fix release which fixes bugs found since the 12.0.4 release.
+</p>
+<p>
+Mesa 12.0.4 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+22026ce4f1c6a7908b0d10ff057decec0a5633afe7f38a0cef5c08d0689f02a6 mesa-12.0.4.tar.gz
+5d6003da867d3f54e5000b4acdfc37e6cce5b6a4459274fdad73e24bd2f0065e mesa-12.0.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with &quot;intel_do_flush_locked failed: No such file or directory&quot; if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from &quot;loader/dri3: Overhaul dri3_update_num_back&quot; commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Axel Davy (4):</p>
+<ul>
+  <li>gallium/util: Really allow aliasing of dst for u_box_union_*</li>
+  <li>st/nine: Fix the calculation of the number of vs inputs</li>
+  <li>st/nine: Fix mistake in Volume9 UnlockBox</li>
+  <li>st/nine: Fix locking CubeTexture surfaces.</li>
+</ul>
+
+<p>Brendan King (1):</p>
+<ul>
+  <li>configure.ac: fix the name of the Wayland Scanner pc file</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: fix swizzle issue in st_create_sampler_view_from_stobj()</li>
+</ul>
+
+<p>Chad Versace (3):</p>
+<ul>
+  <li>egl: Fix truncation error in _eglParseSyncAttribList64</li>
+  <li>i965/sync: Fix uninitalized usage and leak of mutex</li>
+  <li>egl: Don't advertise unsupported platform extensions</li>
+</ul>
+
+<p>Chuanbo Weng (1):</p>
+<ul>
+  <li>gbm: fix potential NULL deref of mapImage/unmapImage.</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>autoconf: Make header install distinct for various APIs (v2)</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>anv: initialise and increment send_sbc</li>
+  <li>anv/wsi: fix apps that acquire multiple images up front</li>
+  <li>Revert "st/vdpau: use linear layout for output surfaces"</li>
+</ul>
+
+<p>Emil Velikov (12):</p>
+<ul>
+  <li>docs: add sha256 checksums for 12.0.3</li>
+  <li>cherry-ignore: add non-applicable i965 commit</li>
+  <li>cherry-ignore: add vaapi encode fix</li>
+  <li>cherry-ignore: add EGL_KHR_debug fix</li>
+  <li>cherry-ignore: add update_renderbuffer_read_surfaces()</li>
+  <li>isl/gen6: correctly check msaa layout samples count</li>
+  <li>egl/x11: don't crash if dri2_dpy-&gt;conn is NULL</li>
+  <li>get-pick-list.sh: Require explicit "12.0" for nominating stable patches</li>
+  <li>automake: don't forget to pick wglext.h in the tarball</li>
+  <li>cherry-ignore: add N/A EGL revert</li>
+  <li>cherry-ignore: add ClientWaitSync fixes</li>
+  <li>Update version to 12.0.4</li>
+</ul>
+
+<p>Eric Anholt (5):</p>
+<ul>
+  <li>travis: Parse configure.ac to pick an updated LIBDRM_VERSION.</li>
+  <li>travis: Update to the Ubuntu Trusty image.</li>
+  <li>travis: Enable vc4 in libdrm to satisfy vc4 test build dependency.</li>
+  <li>travis: Upgrade LLVM dependency to 3.5 and enable LLVM drivers.</li>
+  <li>gallium: Fix install-gallium-links.mk on non-bash /bin/sh</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>pipe_loader_sw: Fix fd leak when instantiated via pipe_loader_sw_probe_kms</li>
+</ul>
+
+<p>Ian Romanick (1):</p>
+<ul>
+  <li>glsl: Fix cut-and-paste bug in hierarchical visitor ir_expression::accept</li>
+</ul>
+
+<p>Ilia Mirkin (16):</p>
+<ul>
+  <li>nv30: set usage to staging so that the buffer is allocated in GART</li>
+  <li>a3xx: make sure to actually clamp depth as requested</li>
+  <li>a3xx: make use of software clipping when hw can't handle it</li>
+  <li>a3xx: use window scissor to simulate viewport xy clip</li>
+  <li>main: GL_RGB10_A2UI does not come with GL 3.0/EXT_texture_integer</li>
+  <li>mesa/formatquery: limit ES target support, fix core context support</li>
+  <li>nir: fix definition of pack_uvec2_to_uint</li>
+  <li>gm107/ir: AL2P writes to a predicate register</li>
+  <li>st/mesa: fix is_scissor_enabled when X/Y are negative</li>
+  <li>nvc0/ir: fix overwriting of value backing non-constant gather offset</li>
+  <li>nv50/ir: copy over value's register id when resolving merge of a phi</li>
+  <li>nvc0/ir: fix textureGather with a single offset</li>
+  <li>gm107/ir: fix texturing with indirect samplers</li>
+  <li>gm107/ir: fix bit offset of tex lod setting for indirect texturing</li>
+  <li>nv50,nvc0: avoid reading out of bounds when getting bogus so info</li>
+  <li>nv50/ir: process texture offset sources as regular sources</li>
+</ul>
+
+<p>James Legg (1):</p>
+<ul>
+  <li>radeonsi: Fix primitive restart when index changes</li>
+</ul>
+
+<p>Jason Ekstrand (9):</p>
+<ul>
+  <li>nir/spirv: Swap the argument order for AtomicCompareExchange</li>
+  <li>nir/spirv: Use the correct sources for CompareExchange on images</li>
+  <li>nir/spirv: Break variable decoration handling into a helper</li>
+  <li>nir/spirv: Refactor variable deocration handling</li>
+  <li>nir/spirv/cfg: Handle switches whose break block is a loop continue</li>
+  <li>nir/spirv/cfg: Detect switch_break after loop_break/continue</li>
+  <li>nir: Add a nop intrinsic</li>
+  <li>nir/spirv/cfg: Use a nop intrinsic for tagging the ends of blocks</li>
+  <li>intel/blorp: Rework our usage of ralloc when compiling shaders</li>
+</ul>
+
+<p>Jonathan Gray (3):</p>
+<ul>
+  <li>genxml: add generated headers to EXTRA_DIST</li>
+  <li>mapi: automake: set VISIBILITY_CFLAGS for shared glapi</li>
+  <li>mesa: automake: include mesa_glinterop.h in distfile</li>
+</ul>
+
+<p>Julien Isorce (1):</p>
+<ul>
+  <li>st/va: also honors interlaced preference when providing a video format</li>
+</ul>
+
+<p>Kenneth Graunke (8):</p>
+<ul>
+  <li>nir: Call nir_metadata_preserve from nir_lower_alu_to_scalar().</li>
+  <li>mesa: Expose RESET_NOTIFICATION_STRATEGY with KHR_robustness.</li>
+  <li>i965: Fix missing _NEW_TRANSFORM in Gen8+ 3DSTATE_DS atom.</li>
+  <li>i965: Add missing BRW_NEW_VS_PROG_DATA to 3DSTATE_CLIP.</li>
+  <li>i965: Move BRW_NEW_FRAGMENT_PROGRAM from 3DSTATE_PS to PS_EXTRA.</li>
+  <li>i965: Add missing BRW_NEW_CS_PROG_DATA to compute constant atom.</li>
+  <li>i965: Add missing BRW_CS_PROG_DATA to CS work group surface atom.</li>
+  <li>i965: Fix gl_InvocationID in dual object GS where invocations == 1.</li>
+</ul>
+
+<p>Marek Olšák (12):</p>
+<ul>
+  <li>radeonsi: fix cubemaps viewed as 2D</li>
+  <li>radeonsi: take compute shader and dispatch indirect memory usage into account</li>
+  <li>radeonsi: fix FP64 UBO loads with indirect uniform block indexing</li>
+  <li>mesa: fix glGetFramebufferAttachmentParameteriv w/ on-demand FRONT_BACK alloc</li>
+  <li>radeonsi: fix interpolateAt opcodes for .zw components</li>
+  <li>radeonsi: fix texture border colors for compute shaders</li>
+  <li>radeonsi: disable ReZ</li>
+  <li>gallium/radeon: make sure the address of separate CMASK is aligned properly</li>
+  <li>winsys/amdgpu: fix radeon_surf::macro_tile_index for imported textures</li>
+  <li>egl: use util/macros.h</li>
+  <li>egl: make interop ABI visible again</li>
+  <li>glx: make interop ABI visible again</li>
+</ul>
+
+<p>Mario Kleiner (1):</p>
+<ul>
+  <li>glx: Perform check for valid fbconfig against proper X-Screen.</li>
+</ul>
+
+<p>Martin Peres (2):</p>
+<ul>
+  <li>loader/dri3: add get_dri_screen() to the vtable</li>
+  <li>loader/dri3: import prime buffers in the currently-bound screen</li>
+</ul>
+
+<p>Matt Whitlock (5):</p>
+<ul>
+  <li>egl/android: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>gallium/auxiliary: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>st/dri: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>st/xa: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+  <li>gallium/winsys: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
+</ul>
+
+<p>Max Staudt (1):</p>
+<ul>
+  <li>r300g: Set R300_VAP_CNTL on RSxxx to avoid triangle flickering</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>loader/dri3: Overhaul dri3_update_num_back</li>
+</ul>
+
+<p>Nicholas Bishop (2):</p>
+<ul>
+  <li>gbm: return appropriate error when queryImage() fails</li>
+  <li>st/dri: check pipe_screen-&gt;resource_get_handle() return value</li>
+</ul>
+
+<p>Nicolai Hähnle (10):</p>
+<ul>
+  <li>gallium/radeon: cleanup and fix branch emits</li>
+  <li>st/glsl_to_tgsi: disable on-the-fly peephole for 64-bit operations</li>
+  <li>st/glsl_to_tgsi: simplify translate_tex_offset</li>
+  <li>st/glsl_to_tgsi: fix textureGatherOffset with indirectly loaded offsets</li>
+  <li>st/mesa: fix vertex elements setup for doubles</li>
+  <li>radeonsi: fix indirect loads of 64 bit constants</li>
+  <li>st/glsl_to_tgsi: fix atomic counter addressing</li>
+  <li>st/glsl_to_tgsi: fix block copies of arrays of doubles</li>
+  <li>st/mesa: only set primitive_restart when the restart index is in range</li>
+  <li>radeonsi: fix 64-bit loads from LDS</li>
+</ul>
+
+<p>Samuel Pitoiset (4):</p>
+<ul>
+  <li>nvc0/ir: fix subops for IMAD</li>
+  <li>gk110/ir: fix wrong emission of OP_NOT</li>
+  <li>nvc0: use correct bufctx when invalidating CP textures</li>
+  <li>nvc0/ir: fix emission of IMAD with NEG modifiers</li>
+</ul>
+
+<p>Stencel, Joanna (1):</p>
+<ul>
+  <li>egl/wayland: add missing destroy_window callback</li>
+</ul>
+
+<p>Tapani Pälli (5):</p>
+<ul>
+  <li>egl: stop claiming support for pbuffer + msaa</li>
+  <li>egl/dri2: set max values for pbuffer width and height</li>
+  <li>egl: add check that eglCreateContext gets a valid config</li>
+  <li>mesa: fix error handling in DrawBuffers</li>
+  <li>egl: set preserved behavior for surface only if config supports it</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>configure.ac: add llvm inteljitevents component if enabled</li>
+</ul>
+
+<p>Vedran Miletić (1):</p>
+<ul>
+  <li>clover: Fix build against clang SVN &gt;= r273191</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>Revert "mesa_glinterop: remove inclusion of GLX header"</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/12.0.5.html
+++ b/docs/relnotes/12.0.5.html
@@ -0,0 +1,138 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 12.0.5 Release Notes / December 5, 2016</h1>
+
+<p>
+Mesa 12.0.5 is a bug fix release which fixes bugs found since the 12.0.5 release.
+</p>
+<p>
+Mesa 12.0.5 implements the OpenGL 4.3 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.3.  OpenGL
+4.3 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+44d08a27d98bfeacd864381189e434d98afbf451689d01f80380dc1d66450e5b  mesa-12.0.5.tar.gz
+2b0a972d8282860a11291c09c3ef01ac45171405951eb21a83c45ed2b4321924  mesa-12.0.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<p>This list is likely incomplete.</p>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (2):</p>
+<ul>
+  <li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
+  <li>glx/glvnd: Fix dispatch function names and indices</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add release notes for 12.0.4</li>
+  <li>docs: add sha256 checksums for 12.0.4</li>
+  <li>cherry-ignore: add reverted LLVM_LIBDIR patch</li>
+  <li>Update version to 12.0.5</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>mesa: change state query return value for RGB565</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>i965/fs/generator: Don't use the address immediate for MOV_INDIRECT</li>
+  <li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
+  <li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>intel: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
+</ul>
+
+<p>Marek Olšák (13):</p>
+<ul>
+  <li>gallium/radeon: fix behavior of GLSL findLSB(0)</li>
+  <li>gallium/radeon: make sure HTILE address is aligned properly</li>
+  <li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
+  <li>gallium/radeon: unify viewport emission code</li>
+  <li>gallium/radeon: set VPORT_ZMIN/MAX registers correctly</li>
+  <li>radeonsi: fix gl_PatchVerticesIn for tessellation evaluation shader</li>
+  <li>radeonsi: fix a crash in imageSize for cubemap arrays</li>
+  <li>radeonsi: emit TA_CS_BC_BASE_ADDR on SI only if the kernel allows it</li>
+  <li>gallium/radeon: add support for sharing textures with DCC between processes</li>
+  <li>radeonsi: always set all blend registers</li>
+  <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+  <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+  <li>radeonsi: silence runtime warnings with LLVM 3.9</li>
+</ul>
+
+<p>Matt Turner (1):</p>
+<ul>
+  <li>anv: Replace "abi_versions" with correct "api_version".</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Steinar H. Gunderson (1):</p>
+<ul>
+  <li>Fix races during _mesa_HashWalk().</li>
+</ul>
+
+<p>Tim Rowley (3):</p>
+<ul>
+  <li>swr: [rasterizer jitter] cleanup supporting different llvm versions</li>
+  <li>swr: [rasterizer jitter] fix llvm-3.7 compile</li>
+  <li>swr: [rasterizer] add support for llvm-3.9</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.0.html
+++ b/docs/relnotes/13.0.0.html
@@ -14,7 +14,7 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 13.0.0 Release Notes / TBD</h1>
+<h1>Mesa 13.0.0 Release Notes / November 1, 2016</h1>

 <p>
 Mesa 13.0.0 is a new development release.
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+4a54d7cdc1a94a8dae05a75ccff48356406d51b0d6a64cbdc641c266e3e008eb  mesa-13.0.0.tar.gz
+94edb4ebff82066a68be79d9c2627f15995e1fe10f67ab3fc63deb842027d727  mesa-13.0.0.tar.xz
 </pre>


@@ -74,11 +75,236 @@ Note: some of the new features are only available with certain drivers.

 <h2>Bug fixes</h2>

-TBD.
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=61907">Bug 61907</a> - Indirect rendering of multi-texture vertex arrays broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=69622">Bug 69622</a> - eglTerminate then eglMakeCurrent crahes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with &quot;intel_do_flush_locked failed: No such file or directory&quot; if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83036">Bug 83036</a> - [ILK]Piglit spec_ARB_copy_image_arb_copy_image-formats fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89599">Bug 89599</a> - symbol 'x86_64_entry_start' is already defined when building with LLVM/clang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90513">Bug 90513</a> - Odd gray and red flicker in The Talos Principle on GK104</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92306">Bug 92306</a> - GL Excess demo renders incorrectly on nv43</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94148">Bug 94148</a> - Framebuffer considered invalid when a draw call is done before glCheckFramebufferStatus</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94561">Bug 94561</a> - [llvmpipe] PIPE_CAP_VIDEO_MEMORY reports negative value on 32 bits (with 16GB ram)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94627">Bug 94627</a> - Game Risen on wine black grass</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94681">Bug 94681</a> - dEQP-GLES31.functional.ssbo.layout.random.all_shared_buffer.23 takes 25 minutes to compile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95000">Bug 95000</a> - deqp: assert in dEQP-GLES3.functional.vertex_arrays.single_attribute.strides.fixed.user_ptr_stride17_components2_quads1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95246">Bug 95246</a> - Segfault in glBindFramebuffer()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95419">Bug 95419</a> - [HSW][regression][bisect] RPG Maker game gives &quot;invalid floating point operation&quot; at startup</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95462">Bug 95462</a> - [BXT,BSW] arb_gpu_shader_fp64 causes gpu hang</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95529">Bug 95529</a> - [regression, bisected] Image corruption in Chrome</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96235">Bug 96235</a> - st_nir.h:34: error: redefinition of typedef ‘nir_shader’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96274">Bug 96274</a> - [NVC0] Failure when compiling compute shader: Assertion `bb-&gt;getFirst()-&gt;serial &lt;= bb-&gt;getExit()-&gt;serial' failed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96285">Bug 96285</a> - Mesa build broken</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96299">Bug 96299</a> - [vulkan] 64 regressions due to mesa d5f2f32</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96343">Bug 96343</a> - oom since st/mesa: implement PBO downloads for ReadPixels</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96346">Bug 96346</a> - [SNB,CTS] es2-cts.gtf.gl.atan regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96349">Bug 96349</a> - [CTS,SKL,BSW,BDW,KBL,BXT] es31-cts.arrays_of_arrays.interactionuniformbuffers3</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96351">Bug 96351</a> - [CTS,SKL,KBL,BXT] es2-cts.gtf.gl2extensiontests.egl_image.egl_image</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96358">Bug 96358</a> - SSO: wrong interface validation between GS and VS (regresion due to latest gles 3.1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96425">Bug 96425</a> - [bisected] occasional dark render in The Talos Principle</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96484">Bug 96484</a> - [vulkan] deqp-vk.glsl.builtin.precision.sin / cos regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96504">Bug 96504</a> - [vulkancts] compute tests crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96516">Bug 96516</a> - [bisected: 482526] &quot;clover: Update OpenCL version string to match OpenGL&quot;: clover's build fails because of missing git_sha1.h</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96528">Bug 96528</a> - Location qualifier segfaults during shader compilation</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96541">Bug 96541</a> - Tonga Unreal elemental bad rendering since radeonsi: Decompress DCC textures in a render feedback loop</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96565">Bug 96565</a> - Clive Barker's Jericho displays strange,vivid colors when motion blur enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96607">Bug 96607</a> - [bisected] texture misrender / flicker in The Talos Principle on SKL</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96617">Bug 96617</a> - gl_SecondaryFragDataEXT doesn't work for extended blend func</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96629">Bug 96629</a> - dEQP-GLES2.functional.texture.completeness.cube.not_positive_level_0: Assertion `width &gt;= 1' failed.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96639">Bug 96639</a> - st/mesa: transfer_map with too-high level with dEQP-GLES2.functional.texture.completeness.cube.extra_level</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96674">Bug 96674</a> - [SNB, ILK] spec.ext_image_dma_buf_import.ext_image_dma_buf_import-sample_nv1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96729">Bug 96729</a> - Wrong shader compilation error message</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96762">Bug 96762</a> - [radeonsi,apitrace] Firewatch: nothing rendered in scrollable (text) areas</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96765">Bug 96765</a> - BindFragDataLocationIndexed on array fragment shader output.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96782">Bug 96782</a> - [regression bisected] R600 fp64 and glsl-4.00 piglit failures</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96791">Bug 96791</a> - Cannot use image from swapchains for sampling</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96825">Bug 96825</a> - anv_device.c:31:27: fatal error: anv_timestamp.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96835">Bug 96835</a> - &quot;gallium: Force blend color to 16-byte alignment&quot; crash with &quot;-march=native -O3&quot; causes some 32bit games to crash</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96850">Bug 96850</a> - Crucible tests fail for 32bit mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96878">Bug 96878</a> - [Bisected: cc2d0e6][HSW] &quot;GPU HANG&quot; msg after autologin to gnome-session</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96908">Bug 96908</a> - [radeonsi] MSAA causes graphical artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96911">Bug 96911</a> - webgl2 conformance2/textures/misc/tex-mipmap-levels.html crashes 12.1 Intel driver</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96949">Bug 96949</a> - [regression] Piglit numSamples assertion failures with 9a23a177b90</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96950">Bug 96950</a> - Another regression from bc4e0c486: vbo: Use a bitmask to track the active arrays in vbo_exec*.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96971">Bug 96971</a> - invariant qualifier is not valid for shader inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97019">Bug 97019</a> - [clover] build failure in llvm/codegen/native.cpp:129:52</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97032">Bug 97032</a> - [BDW,SKL] piglit.spec.arb_gpu_shader5.arb_gpu_shader5-interpolateatcentroid-flat</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97033">Bug 97033</a> - [BDW,SKL] piglit.spec.arb_gpu_shader_fp64.varying-packing.simple regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97039">Bug 97039</a> - The Talos Principle and Serious Sam 3 GPU faults</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97083">Bug 97083</a> - [IVB,BYT] GPU hang on deqp-gles31.functional.separate.shader.random</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97140">Bug 97140</a> - dd_draw.c:949:11: error: implicit declaration of function 'fmemopen' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97207">Bug 97207</a> - [IVY BRIDGE] Fragment shader discard writing to depth</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97214">Bug 97214</a> - X not running with error &quot;Failed to make EGL context current&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97225">Bug 97225</a> - [i965 on HD4600 Haswell] xcom switch to ingame cinematics cause segmentation fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97267">Bug 97267</a> - [BDW] GL45-CTS.texture_cube_map_array.sampling asserts inside brw_fs.cpp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97278">Bug 97278</a> - [vulkancts,HSW] all vulkancts tests assert on HSW</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97285">Bug 97285</a> - Darkness in Dota 2 after Patch &quot;Make Gallium's BlitFramebuffer follow the GL 4.4 sRGB rules&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97286">Bug 97286</a> - `make check` fails uniform-initializer-test</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97305">Bug 97305</a> - Gallium: TBOs and images set the offset in elements, not bytes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97307">Bug 97307</a> - glsl/glcpp/tests/glcpp-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97309">Bug 97309</a> - piglit.spec.glsl-1_30.compiler.switch-statement.switch-case-duplicated.vert regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97322">Bug 97322</a> - GenerateMipmap creates wrong mipmap for sRGB texture</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97331">Bug 97331</a> - glDrawElementsBaseVertex doesn't work in display list on i915</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97351">Bug 97351</a> - DrawElementsBaseVertex with VBO ignores base vertex on Intel GMA 9xx in some cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97413">Bug 97413</a> - BioShock Infinite crashes on startup with Mesa Git version, R7 370</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97426">Bug 97426</a> - glScissor gives vertically inverted result</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97448">Bug 97448</a> - [HSW] deqp-vk.api_.copy_and_blit.image_to_image_stencil regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97476">Bug 97476</a> - Shader binaries should not be stored in the PipelineCache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97477">Bug 97477</a> - i915g: gl_FragCoord is always (0.0, max_y)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97513">Bug 97513</a> - clover reports wrong device pointer size</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from &quot;loader/dri3: Overhaul dri3_update_num_back&quot; commit</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97587">Bug 97587</a> - make check nir/tests/control_flow_tests regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97761">Bug 97761</a> - es2-cts.gtf.gl2extensiontests.egl_image_external.testsimpleunassociated crashes</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97773">Bug 97773</a> - New Mesa master now results in warnings in glrender (and subsurfaces and simple-egl), black screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97790">Bug 97790</a> - Vulkan cts regressions due to 24be63066</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97804">Bug 97804</a> - Later precision statement isn't overriding earlier one</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97808">Bug 97808</a> - &quot;tgsi/scan: don't set interp flags for inputs only used by INTERP instructions&quot; causes glitches in wine with gallium nine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97894">Bug 97894</a> - Crash in u_transfer_unmap_vtbl when unmapping a buffer mapped in different context</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97952">Bug 97952</a> - /usr/include/string.h:518:12: error: exception specification in declaration does not match previous declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97969">Bug 97969</a> - [radeonsi, bisected: fb827c0] Video decoding shows green artifacts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97976">Bug 97976</a> - VCE regression BO to small for addr since winsys/amdgpu: enable buffer allocation from slabs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98005">Bug 98005</a> - VCE dual instance encoding inconsistent since st/va: enable dual instances encode by sync surface</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98128">Bug 98128</a> - nir/tests/control_flow_tests.cpp:79:73: error: ‘nir_loop_first_cf_node’ was not declared in this scope</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98131">Bug 98131</a> - Compiler should reject lowp/mediump qualifiers on atomic_uints</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98133">Bug 98133</a> - GetSynciv should raise an error if bufSize &lt; 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98135">Bug 98135</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.shader.transform_feedback_varyings wants a different GL error code</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98167">Bug 98167</a> - [vulkan, radv] missing libgcrypt and openssl devel results in linker error in libvulkan_common</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98172">Bug 98172</a> - Concurrent call to glClientWaitSync results in segfault in one of the waiters.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98244">Bug 98244</a> - dEQP: textureOffset(sampler2DArrayShadow, ...) should not exist.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98264">Bug 98264</a> - Build broken for i965 due to multiple deifnitions of intelFenceExtension</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98307">Bug 98307</a> - &quot;st/glsl_to_tgsi: explicitly track all input and output declaration&quot; broke flightgear colors on rs780</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98431">Bug 98431</a> - UnrealEngine v4 demos startup fails to blorp blit assert</li>
+
+</ul>
+

 <h2>Changes</h2>

-TBD.
+Mesa no longer depends on libudev.

 </div>
 </body>
--- a/docs/relnotes/13.0.1.html
+++ b/docs/relnotes/13.0.1.html
@@ -0,0 +1,188 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.1 Release Notes / November 14, 2016</h1>
+
+<p>
+Mesa 13.0.1 is a bug fix release which fixes bugs found since the 13.0.0 release.
+</p>
+<p>
+Mesa 13.0.1 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+7cbb91dead05cde279ee95f86e8321c8e1c8fc9deb88f12e0f587672a10d88c5  mesa-13.0.1.tar.gz
+71962fb2bf77d33b0ad4a565b490dbbeaf4619099c6d9722f04a73187957a731  mesa-13.0.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97715">Bug 97715</a> - [ILK,G45,G965] piglit.spec.arb_separate_shader_objects.misc api error checks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98012">Bug 98012</a> - [IVB] Segfault when running Dolphin twice with Vulkan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98512">Bug 98512</a> - radeon r600 vdpau: Invalid command stream: texture bo too small</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (2):</p>
+<ul>
+  <li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
+  <li>glx/glvnd: Fix dispatch function names and indices</li>
+</ul>
+
+<p>Andreas Boll (1):</p>
+<ul>
+  <li>glx/windows: Add wgl.h to the sources list</li>
+</ul>
+
+<p>Anuj Phogat (1):</p>
+<ul>
+  <li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
+</ul>
+
+<p>Chih-Wei Huang (1):</p>
+<ul>
+  <li>android: avoid using libdrm with host modules</li>
+</ul>
+
+<p>Darren Salt (1):</p>
+<ul>
+  <li>radv/pipeline: Don't dereference NULL dynamic state pointers</li>
+</ul>
+
+<p>Dave Airlie (8):</p>
+<ul>
+  <li>radv: expose xlib platform extension</li>
+  <li>radv: fix dual source blending</li>
+  <li>Revert "st/vdpau: use linear layout for output surfaces"</li>
+  <li>radv: emit correct last export when Z/stencil export is enabled</li>
+  <li>ac/nir: add support for discard_if intrinsic (v2)</li>
+  <li>nir: add conditional discard optimisation (v4)</li>
+  <li>radv: enable conditional discard optimisation on radv.</li>
+  <li>radv: fix GetFenceStatus for signaled fences</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.0</li>
+  <li>amd/addrlib: limit fastcall/regparm to GCC i386</li>
+  <li>anv: use correct .specVersion for extensions</li>
+  <li>radv: use correct .specVersion for extensions</li>
+  <li>radv: Suffix the radeon_icd file with the host CPU</li>
+  <li>Update version to 13.0.1</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Use Newton-Raphson on the 1/W write to fix glmark2 terrain.</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().</li>
+</ul>
+
+<p>Fredrik Höglund (1):</p>
+<ul>
+  <li>radv: add support for anisotropic filtering on VI+</li>
+</ul>
+
+<p>Jason Ekstrand (21):</p>
+<ul>
+  <li>anv/device: Return DEVICE_LOST if execbuf2 fails</li>
+  <li>vulkan/wsi/x11: Better handle wsi_x11_connection_create failure</li>
+  <li>vulkan/wsi/x11: Clean up connections in finish_wsi</li>
+  <li>anv: Better handle return codes from anv_physical_device_init</li>
+  <li>intel/blorp: Use wm_prog_data instead of hand-rolling our own</li>
+  <li>intel/blorp: Pass a brw_stage_prog_data to upload_shader</li>
+  <li>anv/pipeline: Put actual pointers in anv_shader_bin</li>
+  <li>anv/pipeline: Properly cache prog_data::param</li>
+  <li>intel/blorp: Emit all the binding tables</li>
+  <li>anv/device: Add an execbuf wrapper</li>
+  <li>anv: Add a cmd_buffer_execbuf helper</li>
+  <li>anv: Don't presume to know what address is in a surface relocation</li>
+  <li>anv: Add a new bo_pool_init helper</li>
+  <li>anv/allocator: Simplify anv_scratch_pool</li>
+  <li>anv: Initialize anv_bo::offset to -1</li>
+  <li>anv/batch_chain: Improve write_reloc</li>
+  <li>anv: Add an anv_execbuf helper struct</li>
+  <li>anv/batch: Move last_ss_pool_bo_offset to the command buffer</li>
+  <li>anv: Move relocation handling from EndCommandBuffer to QueueSubmit</li>
+  <li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
+  <li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
+</ul>
+
+<p>Kenneth Graunke (2):</p>
+<ul>
+  <li>glsl: Update deref types when resizing implicitly sized arrays.</li>
+  <li>mesa: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
+</ul>
+
+<p>Kristian Høgsberg (1):</p>
+<ul>
+  <li>anv: Do relocations in userspace before execbuf ioctl</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>egl: use util/macros.h</li>
+  <li>egl: make interop ABI visible again</li>
+  <li>glx: make interop ABI visible again</li>
+  <li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix BFE/BFI lowering for GLSL semantics</li>
+  <li>glsl: fix lowering of UBO references of named blocks</li>
+  <li>st/glsl_to_tgsi: fix dvec[34] loads from SSBO</li>
+  <li>st/mesa: fix the layer of VDPAU surface samplers</li>
+</ul>
+
+<p>Steven Toth (3):</p>
+<ul>
+  <li>gallium/hud: fix a problem where objects are free'd while in use.</li>
+  <li>gallium/hud: close a previously opened handle</li>
+  <li>gallium/hud: protect against and initialization race</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>mesa/glsl: delete previously linked shaders earlier when linking</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.2.html
+++ b/docs/relnotes/13.0.2.html
@@ -0,0 +1,189 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.2 Release Notes / November 28, 2016</h1>
+
+<p>
+Mesa 13.0.2 is a bug fix release which fixes bugs found since the 13.0.1 release.
+</p>
+<p>
+Mesa 13.0.2 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6014233a5db6032ab8de4881384871bbe029de684502707794ce7b3e6beec308  mesa-13.0.2.tar.gz
+a6ed622645f4ed61da418bf65adde5bcc4bb79023c36ba7d6b45b389da4416d5  mesa-13.0.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97321">Bug 97321</a> - Query INFO_LOG_LENGTH for empty info log should return 0</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97420">Bug 97420</a> - &quot;#version 0&quot; crashes glsl_compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98632">Bug 98632</a> - Fix build on Hurd without PATH_MAX</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Ben Widawsky (3):</p>
+<ul>
+  <li>i965: Add some APL and KBL SKU strings</li>
+  <li>i965: Reorder PCI ID list to match release order</li>
+  <li>i965/glk: Add basic Geminilake support</li>
+</ul>
+
+<p>Dave Airlie (14):</p>
+<ul>
+  <li>radv: fix texturesamples to handle single sample case</li>
+  <li>wsi: fix VK_INCOMPLETE for vkGetSwapchainImagesKHR</li>
+  <li>radv: don't crash on null swapchain destroy.</li>
+  <li>ac/nir/llvm: fix channel in texture gather lowering code.</li>
+  <li>radv: make sure to flush input attachments correctly.</li>
+  <li>radv: fix image view creation for depth and stencil only</li>
+  <li>radv: spir-v allows texture size query with and without lod.</li>
+  <li>vulkan/wsi/x11: handle timeouts properly in next image acquire (v1.1)</li>
+  <li>vulkan/wsi: store present mode in swapchain base class</li>
+  <li>vulkan/wsi/x11: add support for IMMEDIATE present mode</li>
+  <li>radv: fix texel fetch offset with 2d arrays.</li>
+  <li>radv/si: fix optimal micro tile selection</li>
+  <li>radv/ac/llvm: shadow samplers only return one value.</li>
+  <li>radv: fix 3D clears with baseMiplevel</li>
+</ul>
+
+<p>Eduardo Lima Mitev (2):</p>
+<ul>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfaceFormatsKHR</li>
+  <li>vulkan/wsi/x11: Fix behavior of vkGetPhysicalDeviceSurfacePresentModesKHR</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.1</li>
+  <li>cherry-ignore: add reverted LLVM_LIBDIR patch</li>
+  <li>anv: fix enumeration of properties</li>
+  <li>radv: honour the number of properties available</li>
+  <li>Update version to 13.0.2</li>
+</ul>
+
+<p>Eric Anholt (3):</p>
+<ul>
+  <li>vc4: Don't abort when a shader compile fails.</li>
+  <li>vc4: Clamp the shadow comparison value.</li>
+  <li>vc4: Fix register class handling of DDX/DDY arguments.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (2):</p>
+<ul>
+  <li>util/disk_cache: close a previously opened handle in disk_cache_put (v2)</li>
+  <li>anv: Fix unintentional integer overflow in anv_CreateDmaBufImageINTEL</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>anv/format: handle unsupported formats properly</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>glcpp: Handle '#version 0' and other invalid values</li>
+  <li>glsl: Parse 0 as a preprocessor INTCONSTANT</li>
+</ul>
+
+<p>Jason Ekstrand (15):</p>
+<ul>
+  <li>anv/gen8: Stall when needed in Cmd(Set|Reset)Event</li>
+  <li>anv/wsi: Set the fence to signaled in AcquireNextImageKHR</li>
+  <li>anv: Rework fences</li>
+  <li>vulkan/wsi/wayland: Include pthread.h</li>
+  <li>vulkan/wsi/wayland: Clean up some error handling paths</li>
+  <li>vulkan/wsi: Report the correct min/maxImageCount</li>
+  <li>i965/gs: Allow primitive id to be a system value</li>
+  <li>anv: Handle null in all destructors</li>
+  <li>anv/fence: Handle ANV_FENCE_CREATE_SIGNALED_BIT</li>
+  <li>nir/spirv: Fix handling of gl_PrimitiveId</li>
+  <li>anv/blorp: Ignore clears for attachments first used as resolve destinations</li>
+  <li>anv: Implement a depth stall restriction on gen7</li>
+  <li>anv/cmd_buffer: Handle running out of binding tables in compute shaders</li>
+  <li>anv/cmd_buffer: Emit a CS stall before setting a CS pipeline</li>
+  <li>vulkan/wsi/x11: Implement FIFO mode.</li>
+</ul>
+
+<p>Jordan Justen (2):</p>
+<ul>
+  <li>isl: Fix height calculation in isl_msaa_interleaved_scale_px_to_sa</li>
+  <li>i965/hsw: Set integer mode in sampling state for stencil texturing</li>
+</ul>
+
+<p>Kenneth Graunke (4):</p>
+<ul>
+  <li>intel: Set min_ds_entries on Broxton.</li>
+  <li>i965: Fix compute shader crash.</li>
+  <li>mesa: Drop PATH_MAX usage.</li>
+  <li>i965: Fix GS push inputs with enhanced layouts.</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>vulkan/wsi: Add a thread-safe queue implementation</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: fix multi level clears with VK_REMAINING_MIP_LEVELS</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>gbm: request correct version of the DRI2_FENCE extension</li>
+</ul>
+
+<p>Nicolai Hähnle (2):</p>
+<ul>
+  <li>radeonsi: store group_size_variable in struct si_compute</li>
+  <li>glsl/lower_output_reads: fix geometry shader output handling with conditional emit</li>
+</ul>
+
+<p>Steinar H. Gunderson (1):</p>
+<ul>
+  <li>Fix races during _mesa_HashWalk().</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>mesa: fix empty program log length</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/13.0.3.html
+++ b/docs/relnotes/13.0.3.html
@@ -0,0 +1,177 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 13.0.3 Release Notes / January 5, 2017</h1>
+
+<p>
+Mesa 13.0.3 is a bug fix release which fixes bugs found since the 13.0.2 release.
+</p>
+<p>
+Mesa 13.0.3 implements the OpenGL 4.4 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.4.  OpenGL
+4.4 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+55b07d056f9b855ba9d7c8b2ddc7d3b220a61c6ab1bdc73cbfc2f607721094c2  mesa-13.0.3.tar.gz
+d9aa8be5c176d00d0cd503cb2f64a5a403ea471ec819c022581414860d7ba40e  mesa-13.0.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99038">Bug 99038</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.negative_api.create_pixmap_surface crashes</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Chad Versace (2):</p>
+<ul>
+  <li>i965/mt: Disable aux surfaces after making miptree shareable</li>
+  <li>egl: Fix crashes in eglCreate*Surface()</li>
+</ul>
+
+<p>Dave Airlie (4):</p>
+<ul>
+  <li>anv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: set maxFragmentDualSrcAttachments to 1</li>
+  <li>radv: fix another regression since shadow fixes.</li>
+  <li>radv: add missing license file to radv_meta_bufimage.</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 13.0.2</li>
+  <li>anv: don't double-close the same fd</li>
+  <li>anv: don't leak memory if anv_init_wsi() fails</li>
+  <li>radv: don't leak the fd if radv_physical_device_init() succeeds</li>
+  <li>Update version to 13.0.3</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: In a loop break/continue, jump if everyone has taken the path.</li>
+</ul>
+
+<p>Gwan-gyeong Mun (3):</p>
+<ul>
+  <li>anv: Add missing error-checking to anv_block_pool_init (v2)</li>
+  <li>anv: Update the teardown in reverse order of the anv_CreateDevice</li>
+  <li>vulkan/wsi: Fix resource leak in success path of wsi_queue_init()</li>
+</ul>
+
+<p>Haixia Shi (1):</p>
+<ul>
+  <li>compiler/glsl: fix precision problem of tanh</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>mesa: only verify that enabled arrays have backing buffers</li>
+</ul>
+
+<p>Jason Ekstrand (8):</p>
+<ul>
+  <li>anv/cmd_buffer: Re-emit MEDIA_CURBE_LOAD when CS push constants are dirty</li>
+  <li>anv/image: Rename hiz_surface to aux_surface</li>
+  <li>anv/cmd_buffer: Remove the 1-D case from the HiZ QPitch calculation</li>
+  <li>genxml/gen9: Change the default of MI_SEMAPHORE_WAIT::RegisterPoleMode</li>
+  <li>anv/device: Return the right error for failed maps</li>
+  <li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
+  <li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
+  <li>spirv: Use a simpler and more correct implementaiton of tanh()</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965: Allocate at least some URB space even when max_vertices = 0.</li>
+</ul>
+
+<p>Marek Olšák (17):</p>
+<ul>
+  <li>radeonsi: always set all blend registers</li>
+  <li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
+  <li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
+  <li>radeonsi: consolidate max-work-group-size computation</li>
+  <li>radeonsi: apply a multi-wave workgroup SPI bug workaround to affected CIK chips</li>
+  <li>radeonsi: apply a TC L1 write corruption workaround for SI</li>
+  <li>radeonsi: apply a tessellation bug workaround for SI</li>
+  <li>radeonsi: add a tess+GS hang workaround for VI dGPUs</li>
+  <li>radeonsi: apply the double EVENT_WRITE_EOP workaround to VI as well</li>
+  <li>cso: don't release sampler states that are bound</li>
+  <li>radeonsi: always restore sampler states when unbinding sampler views</li>
+  <li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
+  <li>radeonsi: allow specifying simm16 of emit_waitcnt at call sites</li>
+  <li>radeonsi: wait for outstanding memory instructions in TCS barriers</li>
+  <li>tgsi: fix the src type of TGSI_OPCODE_MEMBAR</li>
+  <li>radeonsi: wait for outstanding LDS instructions in memory barriers if needed</li>
+  <li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
+</ul>
+
+<p>Matt Turner (3):</p>
+<ul>
+  <li>i965/fs: Rename opt_copy_propagate -&gt; opt_copy_propagation.</li>
+  <li>i965/fs: Add unit tests for copy propagation pass.</li>
+  <li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>radeonsi: fix isolines tess factor writes to control ring</li>
+  <li>radeonsi: update all GSVS ring descriptors for new buffer allocations</li>
+  <li>radeonsi: do not kill GS with memory writes</li>
+  <li>radeonsi: fix an off-by-one error in the bounds check for max_vertices</li>
+</ul>
+
+<p>Rhys Kidd (1):</p>
+<ul>
+  <li>glsl: Add pthread libs to cache_test</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>mesa: fix active subroutine uniforms properly</li>
+  <li>Revert "nir: Turn imov/fmov of undef into undef."</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.0.0.html
+++ b/docs/relnotes/17.0.0.html
@@ -0,0 +1,75 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.0.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 17.0.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 17.0.1.
+</p>
+<p>
+Mesa 17.0.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_ARB_post_depth_coverage on i965/gen9+</li>
+<li>GL_KHR_blend_equation_advanced on nvc0</li>
+<li>GL_INTEL_conservative_rasterization on i965/gen9+</li>
+<li>GL_NV_image_formats on any driver supporting GL_ARB_shader_image_load_store (i965, nvc0, radeonsi, softpipe)</li>
+<li>GL_ARB_gpu_shader_fp64 in i965/haswell</li>
+<li>GL_ARB_vertex_attrib_64bit in i965/haswell</li>
+<li>GL_ARB_shader_precision in i965/haswell</li>
+<li>Intel Haswell now supports OpenGL 4.2</li>
+<li>GL_OES_geometry_shader on i965/haswell</li>
+<li>GL_OES_texture_cube_map_array on i965/haswell</li>
+<li>GL_OES_viewport_array on i965/haswell</li>
+<li>Vulkan Float64 capability support on Intel's ANV driver</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98480">Bug 98480</a> - Support R8 image texture in ES 3.1</li>
+
+</ul>
+
+<h2>Changes</h2>
+
+TBD.
+
+</div>
+</body>
+</html>
--- a/docs/repository.html
+++ b/docs/repository.html
@@ -75,7 +75,8 @@ follow this procedure:
 <li>Subscribe to the
 <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">mesa-dev</a>
 mailing list.
-<li>Start contributing to the project by posting patches / review requests to
+<li>Start contributing to the project by
+<a href="submittingpatches.html" target="_parent">submitting patches</a> to
 the mesa-dev list.  Specifically,
 <ul>
 <li>Use <code>git send-mail</code> to post your patches to mesa-dev.
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -172,7 +172,7 @@ This tool is useful for:
 </ul>

 <p>
-After building Mesa, the compiler can be found at src/glsl/glsl_compiler
+After building Mesa, the compiler can be found at src/compiler/glsl/glsl_compiler
 </p>

 <p>
@@ -180,7 +180,7 @@ Here's an example of using the compiler to compile a vertex shader and
 emit GL_ARB_vertex_program-style instructions:
 </p>
 <pre>
-    src/glsl/glsl_compiler --dump-ast myshader.vert
+    src/compiler/glsl/glsl_compiler --version XXX --dump-ast myshader.vert
 </pre>

 Options include
@@ -188,7 +188,11 @@ Options include
 <li><b>--dump-ast</b> - dump GPU code
 <li><b>--dump-hir</b> - dump high-level IR code
 <li><b>--dump-lir</b> - dump low-level IR code
-<li><b>--link</b> - ???
+<li><b>--dump-builder</b> - dump GLSL IR code
+<li><b>--link</b> - link shaders
+<li><b>--just-log</b> - display only shader / linker info if exist,
+without any header or separator
+<li><b>--version</b> - [Mandatory] define the GLSL version to use
 </ul>


@@ -196,7 +200,7 @@ Options include

 <p>
 The source code for Mesa's shading language compiler is in the
-<code>src/glsl/</code> directory.
+<code>src/compiler/glsl/</code> directory.
 </p>

 <p>
--- a/docs/sourcetree.html
+++ b/docs/sourcetree.html
@@ -27,14 +27,18 @@ each directory.
 <li><b>include</b> - Public OpenGL header files
 <li><b>src</b>
  <ul>
+  <li><b>compiler</b> - Common utility sources for different compilers.
+    <ul>
+    <li><b>glsl</b> - the GLSL IR and compiler
+    <li><b>nir</b> - the NIR IR and compiler
+    <li><b>spriv</b> - the SPIR-V compiler
+    </ul>
  <li><b>egl</b> - EGL library sources
    <ul>
-    <li><b>docs</b> - EGL documentation
    <li><b>drivers</b> - EGL drivers
    <li><b>main</b> - main EGL library implementation.  This is where all
        the EGL API functions are implemented, like eglCreateContext().
    </ul>
-  <li><b>glsl</b> - the GLSL compiler
  <li><b>mapi</b> - Mesa APIs
    <li><b>glapi</b> - OpenGL API dispatch layer.  This is where all the
        GL entrypoints like glClear, glBegin, etc. are generated, as well as
@@ -94,7 +98,8 @@ each directory.
      <ul>
      <li><b>i915</b> - Driver for Intel i915/i945.
      <li><b>llvmpipe</b> - Software driver using LLVM for runtime code generation.
-      <li><b>nv*</b> - Drivers for NVIDIA GPUs.
+      <li><b>nouveau</b> - Driver for NVIDIA GPUs.
+      <li><b>radeon</b> - Shared module for the r600 and radeonsi drivers.
      <li><b>radeonsi</b> - Driver for AMD Southern Island.
      <li><b>r300</b> - Driver for ATI R300 - R500.
      <li><b>r600</b> - Driver for ATI/AMD R600 - Northern Island.
@@ -128,16 +133,19 @@ each directory.
          to another.
      <li><b>util</b> - assorted utilities for arithmetic, hashing, surface
          creation, memory management, 2D blitting, simple rendering, etc.
+      <li>XXX more
      </ul>
    <li><b>state_trackers</b> -
       <ul>
       <li><b>clover</b> - OpenCL state tracker
       <li><b>dri</b> - Meta state tracker for DRI drivers
       <li><b>glx</b> - Meta state tracker for GLX
-       <li><b>vdpau</b> - VDPAU state tracker
-       <li><b>wgl</b> -
-       <li><b>xorg</b> - Meta state tracker for Xorg video drivers
+       <li><b>wgl</b> - Windows WGL state tracker
+       <li><b>xa</b> - XA state tracker
       <li><b>xvmc</b> - XvMC state tracker
+       <li><b>vdpau</b> - VDPAU state tracker
+       <li><b>va</b> - VA-API state tracker
+       <li><b>omx</b> - OpenMAX state tracker
       </ul>
    <li><b>winsys</b> -
       <ul>
@@ -148,11 +156,11 @@ each directory.
    </ul>
  </ul>
  <ul>
-  <li><b>glx</b> - The GLX library code for building libGL.  This is used for
-         direct rendering drivers.  It will dynamically load one of the 
-         xxx_dri.so drivers.
+  <li><b>glx</b> - The GLX library code for building libGL using DRI drivers.
  </ul>
-<li><b>lib</b> - where the GL libraries are placed
+<li><b>lib</b> - hardlinks to most binaries as produced by <strong>make</strong>.
+        These (shortcuts) are used for development purposes in conjunction with
+        LD_LIBRARY_PATH and/or LIBGL_DRIVERS_PATH.
 </ul>

 </div>
--- a/docs/specs/WL_bind_wayland_display.spec
+++ b/docs/specs/WL_bind_wayland_display.spec
@@ -75,6 +75,7 @@ New Tokens
        EGL_TEXTURE_Y_U_V_WL                    0x31D7
        EGL_TEXTURE_Y_UV_WL                     0x31D8
        EGL_TEXTURE_Y_XUXV_WL                   0x31D9
+        EGL_TEXTURE_EXTERNAL_WL                 0x31DA

    Accepted in the <attribute> parameter of eglQueryWaylandBufferWL:

@@ -148,6 +149,10 @@ Additions to the EGL 1.4 Specification:
                Two planes, samples Y from the first plane to r in
                the shader, U and V from the second plane to g and a.

+        EGL_TEXTURE_EXTERNAL_WL
+                Treated as a single plane texture, but sampled with
+                samplerExternalOES according to OES_EGL_image_external
+
    After querying the wl_buffer layout, create EGLImages for the
    planes by calling eglCreateImageKHR with wl_buffer as
    EGLClientBuffer, EGL_WAYLAND_BUFFER_WL as the target, NULL
--- a/docs/submittingpatches.html
+++ b/docs/submittingpatches.html
@@ -0,0 +1,378 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Submitting patches</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Submitting patches</h1>
+
+
+<ul>
+<li><a href="#guidelines">Basic guidelines</a>
+<li><a href="#formatting">Patch formatting</a>
+<li><a href="#testing">Testing Patches</a>
+<li><a href="#mailing">Mailing Patches</a>
+<li><a href="#reviewing">Reviewing Patches</a>
+<li><a href="#nominations">Nominating a commit for a stable branch</a>
+<li><a href="#criteria">Criteria for accepting patches to the stable branch</a>
+<li><a href="#gittips">Git tips</a>
+</ul>
+
+<h2 id="guidelines">Basic guidelines</h2>
+
+<ul>
+<li>Patches should not mix code changes with code formatting changes (except,
+perhaps, in very trivial cases.)
+<li>Code patches should follow Mesa
+<a href="codingstyle.html" target="_parent">coding conventions</a>.
+<li>Whenever possible, patches should only effect individual Mesa/Gallium
+components.
+<li>Patches should never introduce build breaks and should be bisectable (see
+<code>git bisect</code>.)
+<li>Patches should be properly <a href="#formatting">formatted</a>.
+<li>Patches should be sufficiently <a href="#testing">tested</a> before submitting.
+<li>Patches should be submitted to <a href="#mailing">mesa-dev</a>
+for <a href="#reviewing">review</a> using <code>git send-email</code>.
+
+</ul>
+
+<h2 id="formatting">Patch formatting</h2>
+
+<ul>
+<li>Lines should be limited to 75 characters or less so that git logs
+displayed in 80-column terminals avoid line wrapping.  Note that git
+log uses 4 spaces of indentation (4 + 75 &lt; 80).
+<li>The first line should be a short, concise summary of the change prefixed
+with a module name.  Examples:
+<pre>
+    mesa: Add support for querying GL_VERTEX_ATTRIB_ARRAY_LONG
+
+    gallium: add PIPE_CAP_DEVICE_RESET_STATUS_QUERY
+
+    i965: Fix missing type in local variable declaration.
+</pre>
+<li>Subsequent patch comments should describe the change in more detail,
+if needed.  For example:
+<pre>
+    i965: Remove end-of-thread SEND alignment code.
+    
+    This was present in Eric's initial implementation of the compaction code
+    for Sandybridge (commit 077d01b6). There is no documentation saying this
+    is necessary, and removing it causes no regressions in piglit on any
+    platform.
+</pre>
+<li>A "Signed-off-by:" line is not required, but not discouraged either.
+<li>If a patch address a bugzilla issue, that should be noted in the
+patch comment.  For example:
+<pre>
+   Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=89689
+</pre>
+<li>If there have been several revisions to a patch during the review
+process, they should be noted such as in this example:
+<pre>
+    st/mesa: add ARB_texture_stencil8 support (v4)
+    
+    if we support stencil texturing, enable texture_stencil8
+    there is no requirement to support native S8 for this,
+    the texture can be converted to x24s8 fine.
+    
+    v2: fold fixes from Marek in:
+       a) put S8 last in the list
+       b) fix renderable to always test for d/s renderable
+        fixup the texture case to use a stencil only format
+        for picking the format for the texture view.
+    v3: hit fallback for getteximage
+    v4: put s8 back in front, it shouldn't get picked now (Ilia)
+</pre>
+<li>If someone tested your patch, document it with a line like this:
+<pre>
+    Tested-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<li>If the patch was reviewed (usually the case) or acked by someone,
+that should be documented with:
+<pre>
+    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<li>If sending later revision of a patch, add all the tags - ack, r-b,
+Cc: mesa-stable and/or other. This provides reviewers with quick feedback if the
+patch has already been reviewed.
+<li>In order for your patch to reach the prospective reviewer easier/faster,
+use the script scripts/get_reviewer.pl to get a list of individuals and include
+them in the CC list.
+<br>
+Please use common sense and do <strong>not</strong> blindly add everyone.
+<br>
+<pre>
+    $ scripts/get_reviewer.pl --help # to get the the help screen
+    $ scripts/get_reviewer.pl -f src/egl/drivers/dri2/platform_android.c
+    Rob Herring <robh@kernel.org> (reviewer:ANDROID EGL SUPPORT,added_lines:188/700=27%,removed_lines:58/283=20%)
+    Tomasz Figa <tfiga@chromium.org> (reviewer:ANDROID EGL SUPPORT,authored:12/41=29%,added_lines:308/700=44%,removed_lines:115/283=41%)
+    Emil Velikov <emil.l.velikov@gmail.com> (authored:13/41=32%,removed_lines:76/283=27%)
+</pre>
+</ul>
+
+
+
+<h2 id="testing">Testing Patches</h2>
+
+<p>
+It should go without saying that patches must be tested.  In general,
+do whatever testing is prudent.
+</p>
+
+<p>
+You should always run the Mesa test suite before submitting patches.
+The test suite can be run using the 'make check' command. All tests
+must pass before patches will be accepted, this may mean you have
+to update the tests themselves.
+</p>
+
+<p>
+Whenever possible and applicable, test the patch with
+<a href="http://piglit.freedesktop.org">Piglit</a> and/or
+<a href="https://android.googlesource.com/platform/external/deqp/">dEQP</a>
+to check for regressions.
+</p>
+
+
+<h2 id="mailing">Mailing Patches</h2>
+
+<p>
+Patches should be sent to the mesa-dev mailing list for review:
+<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev">
+mesa-dev@lists.freedesktop.org</a>.
+When submitting a patch make sure to use
+<a href="https://git-scm.com/docs/git-send-email">git send-email</a>
+rather than attaching patches to emails. Sending patches as
+attachments prevents people from being able to provide in-line review
+comments.
+</p>
+
+<p>
+When submitting follow-up patches you can use --in-reply-to to make v2, v3,
+etc patches show up as replies to the originals. This usually works well
+when you're sending out updates to individual patches (as opposed to
+re-sending the whole series). Using --in-reply-to makes
+it harder for reviewers to accidentally review old patches.
+</p>
+
+<p>
+When submitting follow-up patches you should also login to
+<a href="https://patchwork.freedesktop.org">patchwork</a> and change the
+state of your old patches to Superseded.
+</p>
+
+<h2 id="reviewing">Reviewing Patches</h2>
+
+<p>
+When you've reviewed a patch on the mailing list, please be unambiguous
+about your review.  That is, state either
+</p>
+<pre>
+    Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+or
+<pre>
+    Acked-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<p>
+Rather than saying just "LGTM" or "Seems OK".
+</p>
+
+<p>
+If small changes are suggested, it's OK to say something like:
+</p>
+<pre>
+   With the above fixes, Reviewed-by: Joe Hacker &lt;jhacker@foo.com&gt;
+</pre>
+<p>
+which tells the patch author that the patch can be committed, as long
+as the issues are resolved first.
+</p>
+
+
+<h2 id="nominations">Nominating a commit for a stable branch</h2>
+
+<p>
+There are three ways to nominate patch for inclusion of the stable branch and
+release.
+</p>
+<ul>
+<li> By adding the Cc: mesa-stable@ tag as described below.
+<li> Sending the commit ID (as seen in master branch) to the mesa-stable@ mailing list.
+<li> Forwarding the patch from the mesa-dev@ mailing list.
+</li>
+</ul>
+<p>
+Note: resending patch identical to one on mesa-dev@ or one that differs only
+by the extra mesa-stable@ tag is <strong>not</strong> recommended.
+</p>
+
+
+<h3 id="thetag">The stable tag</h3>
+
+<p>
+If you want a commit to be applied to a stable branch,
+you should add an appropriate note to the commit message.
+</p>
+
+<p>
+Here are some examples of such a note:
+</p>
+<ul>
+  <li>CC: &lt;mesa-stable@lists.freedesktop.org&gt;</li>
+  <li>CC: "9.2 10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
+  <li>CC: "10.0" &lt;mesa-stable@lists.freedesktop.org&gt;</li>
+</ul>
+
+Simply adding the CC to the mesa-stable list address is adequate to nominate
+the commit for the most-recently-created stable branch. It is only necessary
+to specify a specific branch name, (such as "9.2 10.0" or "10.0" in the
+examples above), if you want to nominate the commit for an older stable
+branch. And, as in these examples, you can nominate the commit for the older
+branch in addition to the more recent branch, or nominate the commit
+exclusively for the older branch.
+
+This "CC" syntax for patch nomination will cause patches to automatically be
+copied to the mesa-stable@ mailing list when you use "git send-email" to send
+patches to the mesa-dev@ mailing list. If you prefer using --suppress-cc that
+won't have any effect negative effect on the patch nomination.
+
+<p>
+Note: by removing the tag [as the commit is pushed] the patch is
+<strong>explicitly</strong> rejected from inclusion in the stable branch(es).
+<br>
+Thus, drop the line <strong>only</strong> if you want to cancel the nomination.
+</p>
+
+<h2 id="criteria">Criteria for accepting patches to the stable branch</h2>
+
+Mesa has a designated release manager for each stable branch, and the release
+manager is the only developer that should be pushing changes to these
+branches. Everyone else should simply nominate patches using the mechanism
+described above.
+
+The stable-release manager will work with the list of nominated patches, and
+for each patch that meets the criteria below will cherry-pick the patch with:
+<code>git cherry-pick -x &lt;commit&gt;</code>. The <code>-x</code> option is
+important so that the picked patch references the commit ID of the original
+patch.
+
+The stable-release manager may at times need to force-push changes to the
+stable branches, for example, to drop a previously-picked patch that was later
+identified as causing a regression). These force-pushes may cause changes to
+be lost from the stable branch if developers push things directly. Consider
+yourself warned.
+
+The stable-release manager is also given broad discretion in rejecting patches
+that have been nominated for the stable branch. The most basic rule is that
+the stable branch is for bug fixes only, (no new features, no
+regressions). Here is a non-exhaustive list of some reasons that a patch may
+be rejected:
+
+<ul>
+  <li>Patch introduces a regression. Any reported build breakage or other
+  regression caused by a particular patch, (game no longer work, piglit test
+  changes from PASS to FAIL), is justification for rejecting a patch.</li>
+
+  <li>Patch is too large, (say, larger than 100 lines)</li>
+
+  <li>Patch is not a fix. For example, a commit that moves code around with no
+  functional change should be rejected.</li>
+
+  <li>Patch fix is not clearly described. For example, a commit message
+  of only a single line, no description of the bug, no mention of bugzilla,
+  etc.</li>
+
+  <li>Patch has not obviously been reviewed, For example, the commit message
+  has no Reviewed-by, Signed-off-by, nor Tested-by tags from anyone but the
+  author.</li>
+
+  <li>Patch has not already been merged to the master branch. As a rule, bug
+  fixes should never be applied first to a stable branch. Patches should land
+  first on the master branch and then be cherry-picked to a stable
+  branch. (This is to avoid future releases causing regressions if the patch
+  is not also applied to master.) The only things that might look like
+  exceptions would be backports of patches from master that happen to look
+  significantly different.</li>
+
+  <li>Patch depends on too many other patches. Ideally, all stable-branch
+  patches should be self-contained. It sometimes occurs that a single, logical
+  bug-fix occurs as two separate patches on master, (such as an original
+  patch, then a subsequent fix-up to that patch). In such a case, these two
+  patches should be squashed into a single, self-contained patch for the
+  stable branch. (Of course, if the squashing makes the patch too large, then
+  that could be a reason to reject the patch.)</li>
+
+  <li>Patch includes new feature development, not bug fixes. New OpenGL
+  features, extensions, etc. should be applied to Mesa master and included in
+  the next major release. Stable releases are intended only for bug fixes.
+
+  Note: As an exception to this rule, the stable-release manager may accept
+  hardware-enabling "features". For example, backports of new code to support
+  a newly-developed hardware product can be accepted if they can be reasonably
+  determined to not have effects on other hardware.</li>
+
+  <li>Patch is a performance optimization. As a rule, performance patches are
+  not candidates for the stable branch. The only exception might be a case
+  where an application's performance was recently severely impacted so as to
+  become unusable. The fix for this performance regression could then be
+  considered for a stable branch. The optimization must also be
+  non-controversial and the patches still need to meet the other criteria of
+  being simple and self-contained</li>
+
+  <li>Patch introduces a new failure mode (such as an assert). While the new
+  assert might technically be correct, for example to make Mesa more
+  conformant, this is not the kind of "bug fix" we want in a stable
+  release. The potential problem here is that an OpenGL program that was
+  previously working, (even if technically non-compliant with the
+  specification), could stop working after this patch. So that would be a
+  regression that is unacceptable for the stable branch.</li>
+</ul>
+
+<h2 id="gittips">Git tips</h2>
+
+<ul>
+<li><code>git rebase -i ...</code> is your friend. Don't be afraid to use it.
+<li>Apply a fixup to commit FOO.
+<pre>
+    git add ...
+    git commit --fixup=FOO
+    git rebase -i --autosquash ...
+</pre>
+<li>Test for build breakage between patches e.g last 8 commits.
+<pre>
+    git rebase -i --exec="make -j4" HEAD~8
+</pre>
+<li>Sets the default mailing address for your repo.
+<pre>
+    git config --local sendemail.to mesa-dev@lists.freedesktop.org
+</pre>
+<li> Add version to subject line of patch series in this case for the last 8
+commits before sending.
+<pre>
+    git send-email --subject-prefix="PATCH v4" HEAD~8
+    git send-email -v4 @~8 # shorter version, inherited from git format-patch
+</pre>
+<li> Configure git to use the get_reviewer.pl script interactively. Thus you
+can avoid adding the world to the CC list.
+<pre>
+    git config sendemail.cccmd "./scripts/get_reviewer.pl -i"
+</pre>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/include/EGL/eglmesaext.h
+++ b/include/EGL/eglmesaext.h
@@ -52,6 +52,7 @@ extern "C" {
 #define EGL_TEXTURE_Y_U_V_WL            0x31D7
 #define EGL_TEXTURE_Y_UV_WL             0x31D8
 #define EGL_TEXTURE_Y_XUXV_WL           0x31D9
+#define EGL_TEXTURE_EXTERNAL_WL         0x31DA

 struct wl_display;
 struct wl_resource;
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -340,12 +340,19 @@ struct __DRI2throttleExtensionRec {
 */

 #define __DRI2_FENCE "DRI2_Fence"
-#define __DRI2_FENCE_VERSION 1
+#define __DRI2_FENCE_VERSION 2

 #define __DRI2_FENCE_TIMEOUT_INFINITE     0xffffffffffffffffllu

 #define __DRI2_FENCE_FLAG_FLUSH_COMMANDS  (1 << 0)

+/**
+ * \name Capabilities that might be returned by __DRI2fenceExtensionRec::get_capabilities
+ */
+/*@{*/
+#define __DRI_FENCE_CAP_NATIVE_FD 1
+/*@}*/
+
 struct __DRI2fenceExtensionRec {
   __DRIextension base;

@@ -390,6 +397,41 @@ struct __DRI2fenceExtensionRec {
    *                sense with this function (right now there are none)
    */
   void (*server_wait_sync)(__DRIcontext *ctx, void *fence, unsigned flags);
+
+   /**
+    * Query for general capabilities of the driver that concern fences.
+    * Returns a bitmask of __DRI_FENCE_CAP_x
+    *
+    * \since 2
+    */
+   unsigned (*get_capabilities)(__DRIscreen *screen);
+
+   /**
+    * Create an fd (file descriptor) associated fence.  If the fence fd
+    * is -1, this behaves similarly to create_fence() except that when
+    * rendering is flushed the driver creates a fence fd.  Otherwise,
+    * the driver wraps an existing fence fd.
+    *
+    * This is used to implement the EGL_ANDROID_native_fence_sync extension.
+    *
+    * \since 2
+    *
+    * \param ctx     the context associated with the fence
+    * \param fd      the fence fd or -1
+    */
+   void *(*create_fence_fd)(__DRIcontext *ctx, int fd);
+
+   /**
+    * For fences created with create_fence_fd(), after rendering is flushed,
+    * this retrieves the native fence fd.  Caller takes ownership of the
+    * fd and will close() it when it is no longer needed.
+    *
+    * \since 2
+    *
+    * \param screen  the screen associated with the fence
+    * \param fence   the fence
+    */
+   int (*get_fence_fd)(__DRIscreen *screen, void *fence);
 };


@@ -1121,6 +1163,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FORMAT_XRGB2101010  0x1009
 #define __DRI_IMAGE_FORMAT_ARGB2101010  0x100a
 #define __DRI_IMAGE_FORMAT_SARGB8       0x100b
+#define __DRI_IMAGE_FORMAT_ARGB1555     0x100c

 #define __DRI_IMAGE_USE_SHARE		0x0001
 #define __DRI_IMAGE_USE_SCANOUT		0x0002
@@ -1148,6 +1191,7 @@ struct __DRIdri2ExtensionRec {

 #define __DRI_IMAGE_FOURCC_R8		0x20203852
 #define __DRI_IMAGE_FOURCC_GR88		0x38385247
+#define __DRI_IMAGE_FOURCC_ARGB1555	0x35315241
 #define __DRI_IMAGE_FOURCC_RGB565	0x36314752
 #define __DRI_IMAGE_FOURCC_ARGB8888	0x34325241
 #define __DRI_IMAGE_FOURCC_XRGB8888	0x34325258
--- a/include/c99_compat.h
+++ b/include/c99_compat.h
@@ -163,6 +163,7 @@ test_c99_compat_h(const void * restrict a,
 #    define HAVE_FUNC_ATTRIBUTE_UNUSED 1
 #    define HAVE_FUNC_ATTRIBUTE_FORMAT 1
 #    define HAVE_FUNC_ATTRIBUTE_PACKED 1
+#    define HAVE_FUNC_ATTRIBUTE_ALIAS 1

 #    if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)
       /* https://gcc.gnu.org/onlinedocs/gcc-4.3.6/gcc/Other-Builtins.html */
--- a/include/d3dadapter/present.h
+++ b/include/d3dadapter/present.h
@@ -35,6 +35,22 @@ typedef struct ID3DPresentGroup ID3DPresentGroup;
 typedef struct ID3DAdapter9 ID3DAdapter9;
 typedef struct D3DWindowBuffer D3DWindowBuffer;

+/* Available since version 1.3 */
+typedef struct _D3DPRESENT_PARAMETERS2_ {
+    /* Whether D3DSWAPEFFECT_DISCARD is allowed to release the
+     * D3DWindowBuffers in any order, and eventually with a delay.
+     * FALSE (Default): buffers should be released as soon as possible.
+     * TRUE: it is allowed to release some buffers with a delay, and in
+     * a random order. */
+    BOOL AllowDISCARDDelayedRelease;
+    /* User preference for D3DSWAPEFFECT_DISCARD with D3DPRESENT_INTERVAL_IMMEDIATE.
+     * FALSE (Default): User prefers presentation to occur as soon as possible,
+     * with potential tearings.
+     * TRUE: User prefers presentation to be tear free. Requires
+     * AllowDISCARDDelayedRelease to have any effect. */
+    BOOL TearFreeDISCARD;
+} D3DPRESENT_PARAMETERS2, *PD3DPRESENT_PARAMETERS2, *LPD3DPRESENT_PARAMETERS2;
+
 /* Presentation backend for drivers to display their brilliant work */
 typedef struct ID3DPresentVtbl
 {
@@ -54,7 +70,10 @@ typedef struct ID3DPresentVtbl
    HRESULT (WINAPI *DestroyD3DWindowBuffer)(ID3DPresent *This, D3DWindowBuffer *buffer);
    /* After presenting a buffer to the window system, the buffer
     * may be used as is (no copy of the content) by the window system.
-     * You must not use a non-released buffer, else the user may see undefined content. */
+     * You must not use a non-released buffer, else the user may see undefined content.
+     * Note: This function waits as well that the buffer content was displayed (this
+     * can be after the release of the buffer if the window system decided to make
+     * an internal copy and release early. */
    HRESULT (WINAPI *WaitBufferReleased)(ID3DPresent *This, D3DWindowBuffer *buffer);
    HRESULT (WINAPI *FrontBufferCopy)(ID3DPresent *This, D3DWindowBuffer *buffer);
    /* It is possible to do partial copy, but impossible to do resizing, which must
@@ -75,6 +94,11 @@ typedef struct ID3DPresentVtbl
    BOOL (WINAPI *ResolutionMismatch)(ID3DPresent *This);
    HANDLE (WINAPI *CreateThread)(ID3DPresent *This, void *pThreadfunc, void *pParam);
    BOOL (WINAPI *WaitForThread)(ID3DPresent *This, HANDLE thread);
+    /* Available since version 1.3 */
+    HRESULT (WINAPI *SetPresentParameters2)(ID3DPresent *This, D3DPRESENT_PARAMETERS2 *pParameters);
+    BOOL (WINAPI *IsBufferReleased)(ID3DPresent *This, D3DWindowBuffer *buffer);
+    /* Wait a buffer gets released. */
+    HRESULT (WINAPI *WaitBufferReleaseEvent)(ID3DPresent *This);
 } ID3DPresentVtbl;

 struct ID3DPresent
@@ -106,6 +130,9 @@ struct ID3DPresent
 #define ID3DPresent_ResolutionMismatch(p) (p)->lpVtbl->ResolutionMismatch(p)
 #define ID3DPresent_CreateThread(p,a,b) (p)->lpVtbl->CreateThread(p,a,b)
 #define ID3DPresent_WaitForThread(p,a) (p)->lpVtbl->WaitForThread(p,a)
+#define ID3DPresent_SetPresentParameters2(p,a) (p)->lpVtbl->SetPresentParameters2(p,a)
+#define ID3DPresent_IsBufferReleased(p,a) (p)->lpVtbl->IsBufferReleased(p,a)
+#define ID3DPresent_WaitBufferReleaseEvent(p) (p)->lpVtbl->WaitBufferReleaseEvent(p)

 typedef struct ID3DPresentGroupVtbl
 {
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -109,6 +109,10 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
+CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
+CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
+CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
@@ -134,6 +138,11 @@ CHIPSET(0x1932, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193A, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
 CHIPSET(0x193B, skl_gt4, "Intel(R) Iris Pro Graphics 580 (Skylake GT4e)")
 CHIPSET(0x193D, skl_gt4, "Intel(R) Iris Pro Graphics P580 (Skylake GT4e)")
+CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
+CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics 505 (Broxton)")
+CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics 500 (Broxton 2x6)")
 CHIPSET(0x5902, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x5906, kbl_gt1, "Intel(R) Kabylake GT1")
 CHIPSET(0x590A, kbl_gt1, "Intel(R) Kabylake GT1")
@@ -144,22 +153,15 @@ CHIPSET(0x5913, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5915, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5917, kbl_gt1_5, "Intel(R) Kabylake GT1.5")
 CHIPSET(0x5912, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x5916, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x5916, kbl_gt2, "Intel(R) HD Graphics 620 (Kabylake GT2)")
 CHIPSET(0x591A, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591B, kbl_gt2, "Intel(R) Kabylake GT2")
 CHIPSET(0x591D, kbl_gt2, "Intel(R) Kabylake GT2")
-CHIPSET(0x591E, kbl_gt2, "Intel(R) Kabylake GT2")
+CHIPSET(0x591E, kbl_gt2, "Intel(R) HD Graphics 615 (Kabylake GT2)")
 CHIPSET(0x5921, kbl_gt2, "Intel(R) Kabylake GT2F")
 CHIPSET(0x5923, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5926, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x5927, kbl_gt3, "Intel(R) Kabylake GT3")
 CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
-CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
-CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
-CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x0A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x1A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
-CHIPSET(0x5A84, bxt,     "Intel(R) HD Graphics (Broxton)")
-CHIPSET(0x5A85, bxt_2x6, "Intel(R) HD Graphics (Broxton 2x6)")
+CHIPSET(0x3184, glk,     "Intel(R) HD Graphics (Geminilake)")
+CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -205,3 +205,10 @@ CHIPSET(0x67CF, POLARIS10_, POLARIS10)
 CHIPSET(0x67DF, POLARIS10_, POLARIS10)

 CHIPSET(0x98E4, STONEY_, STONEY)
+
+CHIPSET(0x6980, POLARIS12_, POLARIS12)
+CHIPSET(0x6981, POLARIS12_, POLARIS12)
+CHIPSET(0x6985, POLARIS12_, POLARIS12)
+CHIPSET(0x6986, POLARIS12_, POLARIS12)
+CHIPSET(0x6987, POLARIS12_, POLARIS12)
+CHIPSET(0x699F, POLARIS12_, POLARIS12)
--- a/include/vulkan/vk_icd.h
+++ b/include/vulkan/vk_icd.h
@@ -1,28 +1,56 @@
+//
+// File: vk_icd.h
+//
+/*
+ * Copyright (c) 2015-2016 The Khronos Group Inc.
+ * Copyright (c) 2015-2016 Valve Corporation
+ * Copyright (c) 2015-2016 LunarG, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+
 #ifndef VKICD_H
 #define VKICD_H

-#include "vk_platform.h"
+#include "vulkan.h"

+/*
+ * Loader-ICD version negotiation API
+ */
+#define CURRENT_LOADER_ICD_INTERFACE_VERSION 3
+#define MIN_SUPPORTED_LOADER_ICD_INTERFACE_VERSION 0
+typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderICDInterfaceVersion)(uint32_t *pVersion);
 /*
 * The ICD must reserve space for a pointer for the loader's dispatch
 * table, at the start of <each object>.
 * The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro.
 */

-#define ICD_LOADER_MAGIC   0x01CDC0DE
+#define ICD_LOADER_MAGIC 0x01CDC0DE

-typedef union _VK_LOADER_DATA {
-  uintptr_t loaderMagic;
-  void *loaderData;
+typedef union {
+    uintptr_t loaderMagic;
+    void *loaderData;
 } VK_LOADER_DATA;

-static inline void set_loader_magic_value(void* pNewObject) {
-    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline void set_loader_magic_value(void *pNewObject) {
+    VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    loader_info->loaderMagic = ICD_LOADER_MAGIC;
 }

-static inline bool valid_loader_magic_value(void* pNewObject) {
-    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
+static inline bool valid_loader_magic_value(void *pNewObject) {
+    const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
    return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC;
 }

@@ -30,56 +58,74 @@ static inline bool valid_loader_magic_value(void* pNewObject) {
 * Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that
 * contains the platform-specific connection and surface information.
 */
-typedef enum _VkIcdWsiPlatform {
+typedef enum {
    VK_ICD_WSI_PLATFORM_MIR,
    VK_ICD_WSI_PLATFORM_WAYLAND,
    VK_ICD_WSI_PLATFORM_WIN32,
    VK_ICD_WSI_PLATFORM_XCB,
    VK_ICD_WSI_PLATFORM_XLIB,
+    VK_ICD_WSI_PLATFORM_DISPLAY
 } VkIcdWsiPlatform;

-typedef struct _VkIcdSurfaceBase {
-    VkIcdWsiPlatform   platform;
+typedef struct {
+    VkIcdWsiPlatform platform;
 } VkIcdSurfaceBase;

 #ifdef VK_USE_PLATFORM_MIR_KHR
-typedef struct _VkIcdSurfaceMir {
-    VkIcdSurfaceBase   base;
-    MirConnection*     connection;
-    MirSurface*        mirSurface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    MirConnection *connection;
+    MirSurface *mirSurface;
 } VkIcdSurfaceMir;
 #endif // VK_USE_PLATFORM_MIR_KHR

 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
-typedef struct _VkIcdSurfaceWayland {
-    VkIcdSurfaceBase   base;
-    struct wl_display* display;
-    struct wl_surface* surface;
+typedef struct {
+    VkIcdSurfaceBase base;
+    struct wl_display *display;
+    struct wl_surface *surface;
 } VkIcdSurfaceWayland;
 #endif // VK_USE_PLATFORM_WAYLAND_KHR

 #ifdef VK_USE_PLATFORM_WIN32_KHR
-typedef struct _VkIcdSurfaceWin32 {
-    VkIcdSurfaceBase   base;
-    HINSTANCE          hinstance;
-    HWND               hwnd;
+typedef struct {
+    VkIcdSurfaceBase base;
+    HINSTANCE hinstance;
+    HWND hwnd;
 } VkIcdSurfaceWin32;
 #endif // VK_USE_PLATFORM_WIN32_KHR

 #ifdef VK_USE_PLATFORM_XCB_KHR
-typedef struct _VkIcdSurfaceXcb {
-    VkIcdSurfaceBase   base;
-    xcb_connection_t*  connection;
-    xcb_window_t       window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    xcb_connection_t *connection;
+    xcb_window_t window;
 } VkIcdSurfaceXcb;
 #endif // VK_USE_PLATFORM_XCB_KHR

 #ifdef VK_USE_PLATFORM_XLIB_KHR
-typedef struct _VkIcdSurfaceXlib {
-    VkIcdSurfaceBase   base;
-    Display*           dpy;
-    Window             window;
+typedef struct {
+    VkIcdSurfaceBase base;
+    Display *dpy;
+    Window window;
 } VkIcdSurfaceXlib;
 #endif // VK_USE_PLATFORM_XLIB_KHR

+#ifdef VK_USE_PLATFORM_ANDROID_KHR
+typedef struct {
+    ANativeWindow* window;
+} VkIcdSurfaceAndroid;
+#endif //VK_USE_PLATFORM_ANDROID_KHR
+
+typedef struct {
+    VkIcdSurfaceBase base;
+    VkDisplayModeKHR displayMode;
+    uint32_t planeIndex;
+    uint32_t planeStackIndex;
+    VkSurfaceTransformFlagBitsKHR transform;
+    float globalAlpha;
+    VkDisplayPlaneAlphaFlagBitsKHR alphaMode;
+    VkExtent2D imageExtent;
+} VkIcdSurfaceDisplay;
+
 #endif // VKICD_H
--- a/include/vulkan/vk_platform.h
+++ b/include/vulkan/vk_platform.h
@@ -2,26 +2,19 @@
 // File: vk_platform.h
 //
 /*
-** Copyright (c) 2014-2015 The Khronos Group Inc.
+** Copyright (c) 2014-2017 The Khronos Group Inc.
 **
-** Permission is hereby granted, free of charge, to any person obtaining a
-** copy of this software and/or associated documentation files (the
-** "Materials"), to deal in the Materials without restriction, including
-** without limitation the rights to use, copy, modify, merge, publish,
-** distribute, sublicense, and/or sell copies of the Materials, and to
-** permit persons to whom the Materials are furnished to do so, subject to
-** the following conditions:
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
 **
-** The above copyright notice and this permission notice shall be included
-** in all copies or substantial portions of the Materials.
+**     http://www.apache.org/licenses/LICENSE-2.0
 **
-** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
 */


@@ -58,13 +51,13 @@ extern "C"
    #define VKAPI_ATTR
    #define VKAPI_CALL __stdcall
    #define VKAPI_PTR  VKAPI_CALL
-#elif defined(__ANDROID__) && defined(__ARM_EABI__) && !defined(__ARM_ARCH_7A__)
-    // Android does not support Vulkan in native code using the "armeabi" ABI.
-    #error "Vulkan requires the 'armeabi-v7a' or 'armeabi-v7a-hard' ABI on 32-bit ARM CPUs"
-#elif defined(__ANDROID__) && defined(__ARM_ARCH_7A__)
-    // On Android/ARMv7a, Vulkan functions use the armeabi-v7a-hard calling
-    // convention, even if the application's native code is compiled with the
-    // armeabi-v7a calling convention.
+#elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH < 7
+    #error "Vulkan isn't supported for the 'armeabi' NDK ABI"
+#elif defined(__ANDROID__) && defined(__ARM_ARCH) && __ARM_ARCH >= 7 && defined(__ARM_32BIT_STATE)
+    // On Android 32-bit ARM targets, Vulkan functions use the "hardfloat"
+    // calling convention, i.e. float parameters are passed in registers. This
+    // is true even if the rest of the application passes floats on the stack,
+    // as it does by default when compiling for the armeabi-v7a NDK ABI.
    #define VKAPI_ATTR __attribute__((pcs("aapcs-vfp")))
    #define VKAPI_CALL
    #define VKAPI_PTR  VKAPI_ATTR
--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
@@ -8,24 +8,17 @@ extern "C" {
 /*
 ** Copyright (c) 2015-2016 The Khronos Group Inc.
 **
-** Permission is hereby granted, free of charge, to any person obtaining a
-** copy of this software and/or associated documentation files (the
-** "Materials"), to deal in the Materials without restriction, including
-** without limitation the rights to use, copy, modify, merge, publish,
-** distribute, sublicense, and/or sell copies of the Materials, and to
-** permit persons to whom the Materials are furnished to do so, subject to
-** the following conditions:
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
 **
-** The above copyright notice and this permission notice shall be included
-** in all copies or substantial portions of the Materials.
+**     http://www.apache.org/licenses/LICENSE-2.0
 **
-** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
 */

 /*
@@ -50,7 +43,7 @@ extern "C" {
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 6
+#define VK_HEADER_VERSION 38


 #define VK_NULL_HANDLE 0
@@ -60,11 +53,13 @@ extern "C" {
 #define VK_DEFINE_HANDLE(object) typedef struct object##_T* object;


-#if defined(__LP64__) || defined(_WIN64) || defined(__x86_64__) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
+#if !defined(VK_DEFINE_NON_DISPATCHABLE_HANDLE)
+#if defined(__LP64__) || defined(_WIN64) || (defined(__x86_64__) && !defined(__ILP32__) ) || defined(_M_X64) || defined(__ia64) || defined (_M_IA64) || defined(__aarch64__) || defined(__powerpc64__)
        #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef struct object##_T *object;
 #else
        #define VK_DEFINE_NON_DISPATCHABLE_HANDLE(object) typedef uint64_t object;
 #endif
+#endif
        


@@ -142,6 +137,7 @@ typedef enum VkResult {
    VK_ERROR_INCOMPATIBLE_DRIVER = -9,
    VK_ERROR_TOO_MANY_OBJECTS = -10,
    VK_ERROR_FORMAT_NOT_SUPPORTED = -11,
+    VK_ERROR_FRAGMENTED_POOL = -12,
    VK_ERROR_SURFACE_LOST_KHR = -1000000000,
    VK_ERROR_NATIVE_WINDOW_IN_USE_KHR = -1000000001,
    VK_SUBOPTIMAL_KHR = 1000001003,
@@ -149,9 +145,9 @@ typedef enum VkResult {
    VK_ERROR_INCOMPATIBLE_DISPLAY_KHR = -1000003001,
    VK_ERROR_VALIDATION_FAILED_EXT = -1000011001,
    VK_ERROR_INVALID_SHADER_NV = -1000012000,
-    VK_RESULT_BEGIN_RANGE = VK_ERROR_FORMAT_NOT_SUPPORTED,
+    VK_RESULT_BEGIN_RANGE = VK_ERROR_FRAGMENTED_POOL,
    VK_RESULT_END_RANGE = VK_INCOMPLETE,
-    VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FORMAT_NOT_SUPPORTED + 1),
+    VK_RESULT_RANGE_SIZE = (VK_INCOMPLETE - VK_ERROR_FRAGMENTED_POOL + 1),
    VK_RESULT_MAX_ENUM = 0x7FFFFFFF
 } VkResult;

@@ -217,6 +213,25 @@ typedef enum VkStructureType {
    VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR = 1000008000,
    VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR = 1000009000,
    VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT = 1000011000,
+    VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD = 1000018000,
+    VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_NAME_INFO_EXT = 1000022000,
+    VK_STRUCTURE_TYPE_DEBUG_MARKER_OBJECT_TAG_INFO_EXT = 1000022001,
+    VK_STRUCTURE_TYPE_DEBUG_MARKER_MARKER_INFO_EXT = 1000022002,
+    VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_IMAGE_CREATE_INFO_NV = 1000026000,
+    VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_BUFFER_CREATE_INFO_NV = 1000026001,
+    VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV = 1000026002,
+    VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_NV = 1000056000,
+    VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_NV = 1000056001,
+    VK_STRUCTURE_TYPE_IMPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057000,
+    VK_STRUCTURE_TYPE_EXPORT_MEMORY_WIN32_HANDLE_INFO_NV = 1000057001,
+    VK_STRUCTURE_TYPE_WIN32_KEYED_MUTEX_ACQUIRE_RELEASE_INFO_NV = 1000058000,
+    VK_STRUCTURE_TYPE_VALIDATION_FLAGS_EXT = 1000061000,
+    VK_STRUCTURE_TYPE_OBJECT_TABLE_CREATE_INFO_NVX = 1000086000,
+    VK_STRUCTURE_TYPE_INDIRECT_COMMANDS_LAYOUT_CREATE_INFO_NVX = 1000086001,
+    VK_STRUCTURE_TYPE_CMD_PROCESS_COMMANDS_INFO_NVX = 1000086002,
+    VK_STRUCTURE_TYPE_CMD_RESERVE_SPACE_FOR_COMMANDS_INFO_NVX = 1000086003,
+    VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_LIMITS_NVX = 1000086004,
+    VK_STRUCTURE_TYPE_DEVICE_GENERATED_COMMANDS_FEATURES_NVX = 1000086005,
    VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO,
    VK_STRUCTURE_TYPE_END_RANGE = VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO,
    VK_STRUCTURE_TYPE_RANGE_SIZE = (VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO - VK_STRUCTURE_TYPE_APPLICATION_INFO + 1),
@@ -429,6 +444,14 @@ typedef enum VkFormat {
    VK_FORMAT_ASTC_12x10_SRGB_BLOCK = 182,
    VK_FORMAT_ASTC_12x12_UNORM_BLOCK = 183,
    VK_FORMAT_ASTC_12x12_SRGB_BLOCK = 184,
+    VK_FORMAT_PVRTC1_2BPP_UNORM_BLOCK_IMG = 1000054000,
+    VK_FORMAT_PVRTC1_4BPP_UNORM_BLOCK_IMG = 1000054001,
+    VK_FORMAT_PVRTC2_2BPP_UNORM_BLOCK_IMG = 1000054002,
+    VK_FORMAT_PVRTC2_4BPP_UNORM_BLOCK_IMG = 1000054003,
+    VK_FORMAT_PVRTC1_2BPP_SRGB_BLOCK_IMG = 1000054004,
+    VK_FORMAT_PVRTC1_4BPP_SRGB_BLOCK_IMG = 1000054005,
+    VK_FORMAT_PVRTC2_2BPP_SRGB_BLOCK_IMG = 1000054006,
+    VK_FORMAT_PVRTC2_4BPP_SRGB_BLOCK_IMG = 1000054007,
    VK_FORMAT_BEGIN_RANGE = VK_FORMAT_UNDEFINED,
    VK_FORMAT_END_RANGE = VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
    VK_FORMAT_RANGE_SIZE = (VK_FORMAT_ASTC_12x12_SRGB_BLOCK - VK_FORMAT_UNDEFINED + 1),
@@ -817,6 +840,7 @@ typedef enum VkFormatFeatureFlagBits {
    VK_FORMAT_FEATURE_BLIT_DST_BIT = 0x00000800,
    VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT = 0x00001000,
    VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG = 0x00002000,
+    VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkFormatFeatureFlagBits;
 typedef VkFlags VkFormatFeatureFlags;

@@ -829,6 +853,7 @@ typedef enum VkImageUsageFlagBits {
    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT = 0x00000020,
    VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT = 0x00000040,
    VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT = 0x00000080,
+    VK_IMAGE_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkImageUsageFlagBits;
 typedef VkFlags VkImageUsageFlags;

@@ -838,6 +863,7 @@ typedef enum VkImageCreateFlagBits {
    VK_IMAGE_CREATE_SPARSE_ALIASED_BIT = 0x00000004,
    VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT = 0x00000008,
    VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT = 0x00000010,
+    VK_IMAGE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkImageCreateFlagBits;
 typedef VkFlags VkImageCreateFlags;

@@ -849,6 +875,7 @@ typedef enum VkSampleCountFlagBits {
    VK_SAMPLE_COUNT_16_BIT = 0x00000010,
    VK_SAMPLE_COUNT_32_BIT = 0x00000020,
    VK_SAMPLE_COUNT_64_BIT = 0x00000040,
+    VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkSampleCountFlagBits;
 typedef VkFlags VkSampleCountFlags;

@@ -857,6 +884,7 @@ typedef enum VkQueueFlagBits {
    VK_QUEUE_COMPUTE_BIT = 0x00000002,
    VK_QUEUE_TRANSFER_BIT = 0x00000004,
    VK_QUEUE_SPARSE_BINDING_BIT = 0x00000008,
+    VK_QUEUE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkQueueFlagBits;
 typedef VkFlags VkQueueFlags;

@@ -866,11 +894,13 @@ typedef enum VkMemoryPropertyFlagBits {
    VK_MEMORY_PROPERTY_HOST_COHERENT_BIT = 0x00000004,
    VK_MEMORY_PROPERTY_HOST_CACHED_BIT = 0x00000008,
    VK_MEMORY_PROPERTY_LAZILY_ALLOCATED_BIT = 0x00000010,
+    VK_MEMORY_PROPERTY_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkMemoryPropertyFlagBits;
 typedef VkFlags VkMemoryPropertyFlags;

 typedef enum VkMemoryHeapFlagBits {
    VK_MEMORY_HEAP_DEVICE_LOCAL_BIT = 0x00000001,
+    VK_MEMORY_HEAP_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkMemoryHeapFlagBits;
 typedef VkFlags VkMemoryHeapFlags;
 typedef VkFlags VkDeviceCreateFlags;
@@ -894,6 +924,8 @@ typedef enum VkPipelineStageFlagBits {
    VK_PIPELINE_STAGE_HOST_BIT = 0x00004000,
    VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT = 0x00008000,
    VK_PIPELINE_STAGE_ALL_COMMANDS_BIT = 0x00010000,
+    VK_PIPELINE_STAGE_COMMAND_PROCESS_BIT_NVX = 0x00020000,
+    VK_PIPELINE_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkPipelineStageFlagBits;
 typedef VkFlags VkPipelineStageFlags;
 typedef VkFlags VkMemoryMapFlags;
@@ -903,6 +935,7 @@ typedef enum VkImageAspectFlagBits {
    VK_IMAGE_ASPECT_DEPTH_BIT = 0x00000002,
    VK_IMAGE_ASPECT_STENCIL_BIT = 0x00000004,
    VK_IMAGE_ASPECT_METADATA_BIT = 0x00000008,
+    VK_IMAGE_ASPECT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkImageAspectFlagBits;
 typedef VkFlags VkImageAspectFlags;

@@ -910,16 +943,19 @@ typedef enum VkSparseImageFormatFlagBits {
    VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT = 0x00000001,
    VK_SPARSE_IMAGE_FORMAT_ALIGNED_MIP_SIZE_BIT = 0x00000002,
    VK_SPARSE_IMAGE_FORMAT_NONSTANDARD_BLOCK_SIZE_BIT = 0x00000004,
+    VK_SPARSE_IMAGE_FORMAT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkSparseImageFormatFlagBits;
 typedef VkFlags VkSparseImageFormatFlags;

 typedef enum VkSparseMemoryBindFlagBits {
    VK_SPARSE_MEMORY_BIND_METADATA_BIT = 0x00000001,
+    VK_SPARSE_MEMORY_BIND_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkSparseMemoryBindFlagBits;
 typedef VkFlags VkSparseMemoryBindFlags;

 typedef enum VkFenceCreateFlagBits {
    VK_FENCE_CREATE_SIGNALED_BIT = 0x00000001,
+    VK_FENCE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkFenceCreateFlagBits;
 typedef VkFlags VkFenceCreateFlags;
 typedef VkFlags VkSemaphoreCreateFlags;
@@ -938,6 +974,7 @@ typedef enum VkQueryPipelineStatisticFlagBits {
    VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_CONTROL_SHADER_PATCHES_BIT = 0x00000100,
    VK_QUERY_PIPELINE_STATISTIC_TESSELLATION_EVALUATION_SHADER_INVOCATIONS_BIT = 0x00000200,
    VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT = 0x00000400,
+    VK_QUERY_PIPELINE_STATISTIC_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkQueryPipelineStatisticFlagBits;
 typedef VkFlags VkQueryPipelineStatisticFlags;

@@ -946,6 +983,7 @@ typedef enum VkQueryResultFlagBits {
    VK_QUERY_RESULT_WAIT_BIT = 0x00000002,
    VK_QUERY_RESULT_WITH_AVAILABILITY_BIT = 0x00000004,
    VK_QUERY_RESULT_PARTIAL_BIT = 0x00000008,
+    VK_QUERY_RESULT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkQueryResultFlagBits;
 typedef VkFlags VkQueryResultFlags;

@@ -953,6 +991,7 @@ typedef enum VkBufferCreateFlagBits {
    VK_BUFFER_CREATE_SPARSE_BINDING_BIT = 0x00000001,
    VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT = 0x00000002,
    VK_BUFFER_CREATE_SPARSE_ALIASED_BIT = 0x00000004,
+    VK_BUFFER_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkBufferCreateFlagBits;
 typedef VkFlags VkBufferCreateFlags;

@@ -966,6 +1005,7 @@ typedef enum VkBufferUsageFlagBits {
    VK_BUFFER_USAGE_INDEX_BUFFER_BIT = 0x00000040,
    VK_BUFFER_USAGE_VERTEX_BUFFER_BIT = 0x00000080,
    VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT = 0x00000100,
+    VK_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkBufferUsageFlagBits;
 typedef VkFlags VkBufferUsageFlags;
 typedef VkFlags VkBufferViewCreateFlags;
@@ -977,6 +1017,7 @@ typedef enum VkPipelineCreateFlagBits {
    VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT = 0x00000001,
    VK_PIPELINE_CREATE_ALLOW_DERIVATIVES_BIT = 0x00000002,
    VK_PIPELINE_CREATE_DERIVATIVE_BIT = 0x00000004,
+    VK_PIPELINE_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkPipelineCreateFlagBits;
 typedef VkFlags VkPipelineCreateFlags;
 typedef VkFlags VkPipelineShaderStageCreateFlags;
@@ -990,6 +1031,7 @@ typedef enum VkShaderStageFlagBits {
    VK_SHADER_STAGE_COMPUTE_BIT = 0x00000020,
    VK_SHADER_STAGE_ALL_GRAPHICS = 0x0000001F,
    VK_SHADER_STAGE_ALL = 0x7FFFFFFF,
+    VK_SHADER_STAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkShaderStageFlagBits;
 typedef VkFlags VkPipelineVertexInputStateCreateFlags;
 typedef VkFlags VkPipelineInputAssemblyStateCreateFlags;
@@ -1002,6 +1044,7 @@ typedef enum VkCullModeFlagBits {
    VK_CULL_MODE_FRONT_BIT = 0x00000001,
    VK_CULL_MODE_BACK_BIT = 0x00000002,
    VK_CULL_MODE_FRONT_AND_BACK = 0x00000003,
+    VK_CULL_MODE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkCullModeFlagBits;
 typedef VkFlags VkCullModeFlags;
 typedef VkFlags VkPipelineMultisampleStateCreateFlags;
@@ -1013,6 +1056,7 @@ typedef enum VkColorComponentFlagBits {
    VK_COLOR_COMPONENT_G_BIT = 0x00000002,
    VK_COLOR_COMPONENT_B_BIT = 0x00000004,
    VK_COLOR_COMPONENT_A_BIT = 0x00000008,
+    VK_COLOR_COMPONENT_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkColorComponentFlagBits;
 typedef VkFlags VkColorComponentFlags;
 typedef VkFlags VkPipelineDynamicStateCreateFlags;
@@ -1023,6 +1067,7 @@ typedef VkFlags VkDescriptorSetLayoutCreateFlags;

 typedef enum VkDescriptorPoolCreateFlagBits {
    VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT = 0x00000001,
+    VK_DESCRIPTOR_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkDescriptorPoolCreateFlagBits;
 typedef VkFlags VkDescriptorPoolCreateFlags;
 typedef VkFlags VkDescriptorPoolResetFlags;
@@ -1031,6 +1076,7 @@ typedef VkFlags VkRenderPassCreateFlags;

 typedef enum VkAttachmentDescriptionFlagBits {
    VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT = 0x00000001,
+    VK_ATTACHMENT_DESCRIPTION_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkAttachmentDescriptionFlagBits;
 typedef VkFlags VkAttachmentDescriptionFlags;
 typedef VkFlags VkSubpassDescriptionFlags;
@@ -1053,22 +1099,28 @@ typedef enum VkAccessFlagBits {
    VK_ACCESS_HOST_WRITE_BIT = 0x00004000,
    VK_ACCESS_MEMORY_READ_BIT = 0x00008000,
    VK_ACCESS_MEMORY_WRITE_BIT = 0x00010000,
+    VK_ACCESS_COMMAND_PROCESS_READ_BIT_NVX = 0x00020000,
+    VK_ACCESS_COMMAND_PROCESS_WRITE_BIT_NVX = 0x00040000,
+    VK_ACCESS_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkAccessFlagBits;
 typedef VkFlags VkAccessFlags;

 typedef enum VkDependencyFlagBits {
    VK_DEPENDENCY_BY_REGION_BIT = 0x00000001,
+    VK_DEPENDENCY_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkDependencyFlagBits;
 typedef VkFlags VkDependencyFlags;

 typedef enum VkCommandPoolCreateFlagBits {
    VK_COMMAND_POOL_CREATE_TRANSIENT_BIT = 0x00000001,
    VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT = 0x00000002,
+    VK_COMMAND_POOL_CREATE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkCommandPoolCreateFlagBits;
 typedef VkFlags VkCommandPoolCreateFlags;

 typedef enum VkCommandPoolResetFlagBits {
    VK_COMMAND_POOL_RESET_RELEASE_RESOURCES_BIT = 0x00000001,
+    VK_COMMAND_POOL_RESET_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkCommandPoolResetFlagBits;
 typedef VkFlags VkCommandPoolResetFlags;

@@ -1076,16 +1128,19 @@ typedef enum VkCommandBufferUsageFlagBits {
    VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT = 0x00000001,
    VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT = 0x00000002,
    VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT = 0x00000004,
+    VK_COMMAND_BUFFER_USAGE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkCommandBufferUsageFlagBits;
 typedef VkFlags VkCommandBufferUsageFlags;

 typedef enum VkQueryControlFlagBits {
    VK_QUERY_CONTROL_PRECISE_BIT = 0x00000001,
+    VK_QUERY_CONTROL_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkQueryControlFlagBits;
 typedef VkFlags VkQueryControlFlags;

 typedef enum VkCommandBufferResetFlagBits {
    VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT = 0x00000001,
+    VK_COMMAND_BUFFER_RESET_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkCommandBufferResetFlagBits;
 typedef VkFlags VkCommandBufferResetFlags;

@@ -1093,6 +1148,7 @@ typedef enum VkStencilFaceFlagBits {
    VK_STENCIL_FACE_FRONT_BIT = 0x00000001,
    VK_STENCIL_FACE_BACK_BIT = 0x00000002,
    VK_STENCIL_FRONT_AND_BACK = 0x00000003,
+    VK_STENCIL_FACE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkStencilFaceFlagBits;
 typedef VkFlags VkStencilFaceFlags;

@@ -2320,7 +2376,7 @@ typedef void (VKAPI_PTR *PFN_vkCmdCopyImage)(VkCommandBuffer commandBuffer, VkIm
 typedef void (VKAPI_PTR *PFN_vkCmdBlitImage)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkImageBlit* pRegions, VkFilter filter);
 typedef void (VKAPI_PTR *PFN_vkCmdCopyBufferToImage)(VkCommandBuffer commandBuffer, VkBuffer srcBuffer, VkImage dstImage, VkImageLayout dstImageLayout, uint32_t regionCount, const VkBufferImageCopy* pRegions);
 typedef void (VKAPI_PTR *PFN_vkCmdCopyImageToBuffer)(VkCommandBuffer commandBuffer, VkImage srcImage, VkImageLayout srcImageLayout, VkBuffer dstBuffer, uint32_t regionCount, const VkBufferImageCopy* pRegions);
-typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const uint32_t* pData);
+typedef void (VKAPI_PTR *PFN_vkCmdUpdateBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize dataSize, const void* pData);
 typedef void (VKAPI_PTR *PFN_vkCmdFillBuffer)(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset, VkDeviceSize size, uint32_t data);
 typedef void (VKAPI_PTR *PFN_vkCmdClearColorImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearColorValue* pColor, uint32_t rangeCount, const VkImageSubresourceRange* pRanges);
 typedef void (VKAPI_PTR *PFN_vkCmdClearDepthStencilImage)(VkCommandBuffer commandBuffer, VkImage image, VkImageLayout imageLayout, const VkClearDepthStencilValue* pDepthStencil, uint32_t rangeCount, const VkImageSubresourceRange* pRanges);
@@ -3005,7 +3061,7 @@ VKAPI_ATTR void VKAPI_CALL vkCmdUpdateBuffer(
    VkBuffer                                    dstBuffer,
    VkDeviceSize                                dstOffset,
    VkDeviceSize                                dataSize,
-    const uint32_t*                             pData);
+    const void*                                 pData);

 VKAPI_ATTR void VKAPI_CALL vkCmdFillBuffer(
    VkCommandBuffer                             commandBuffer,
@@ -3145,14 +3201,15 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSurfaceKHR)

 #define VK_KHR_SURFACE_SPEC_VERSION       25
 #define VK_KHR_SURFACE_EXTENSION_NAME     "VK_KHR_surface"
+#define VK_COLORSPACE_SRGB_NONLINEAR_KHR  VK_COLOR_SPACE_SRGB_NONLINEAR_KHR


 typedef enum VkColorSpaceKHR {
-    VK_COLORSPACE_SRGB_NONLINEAR_KHR = 0,
-    VK_COLORSPACE_BEGIN_RANGE = VK_COLORSPACE_SRGB_NONLINEAR_KHR,
-    VK_COLORSPACE_END_RANGE = VK_COLORSPACE_SRGB_NONLINEAR_KHR,
-    VK_COLORSPACE_RANGE_SIZE = (VK_COLORSPACE_SRGB_NONLINEAR_KHR - VK_COLORSPACE_SRGB_NONLINEAR_KHR + 1),
-    VK_COLORSPACE_MAX_ENUM = 0x7FFFFFFF
+    VK_COLOR_SPACE_SRGB_NONLINEAR_KHR = 0,
+    VK_COLOR_SPACE_BEGIN_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
+    VK_COLOR_SPACE_END_RANGE_KHR = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR,
+    VK_COLOR_SPACE_RANGE_SIZE_KHR = (VK_COLOR_SPACE_SRGB_NONLINEAR_KHR - VK_COLOR_SPACE_SRGB_NONLINEAR_KHR + 1),
+    VK_COLOR_SPACE_MAX_ENUM_KHR = 0x7FFFFFFF
 } VkColorSpaceKHR;

 typedef enum VkPresentModeKHR {
@@ -3160,10 +3217,10 @@ typedef enum VkPresentModeKHR {
    VK_PRESENT_MODE_MAILBOX_KHR = 1,
    VK_PRESENT_MODE_FIFO_KHR = 2,
    VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3,
-    VK_PRESENT_MODE_BEGIN_RANGE = VK_PRESENT_MODE_IMMEDIATE_KHR,
-    VK_PRESENT_MODE_END_RANGE = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
-    VK_PRESENT_MODE_RANGE_SIZE = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1),
-    VK_PRESENT_MODE_MAX_ENUM = 0x7FFFFFFF
+    VK_PRESENT_MODE_BEGIN_RANGE_KHR = VK_PRESENT_MODE_IMMEDIATE_KHR,
+    VK_PRESENT_MODE_END_RANGE_KHR = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
+    VK_PRESENT_MODE_RANGE_SIZE_KHR = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1),
+    VK_PRESENT_MODE_MAX_ENUM_KHR = 0x7FFFFFFF
 } VkPresentModeKHR;


@@ -3177,6 +3234,7 @@ typedef enum VkSurfaceTransformFlagBitsKHR {
    VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_180_BIT_KHR = 0x00000040,
    VK_SURFACE_TRANSFORM_HORIZONTAL_MIRROR_ROTATE_270_BIT_KHR = 0x00000080,
    VK_SURFACE_TRANSFORM_INHERIT_BIT_KHR = 0x00000100,
+    VK_SURFACE_TRANSFORM_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF
 } VkSurfaceTransformFlagBitsKHR;
 typedef VkFlags VkSurfaceTransformFlagsKHR;

@@ -3185,6 +3243,7 @@ typedef enum VkCompositeAlphaFlagBitsKHR {
    VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR = 0x00000002,
    VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR = 0x00000004,
    VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR = 0x00000008,
+    VK_COMPOSITE_ALPHA_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF
 } VkCompositeAlphaFlagBitsKHR;
 typedef VkFlags VkCompositeAlphaFlagsKHR;

@@ -3246,7 +3305,7 @@ VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfacePresentModesKHR(
 #define VK_KHR_swapchain 1
 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkSwapchainKHR)

-#define VK_KHR_SWAPCHAIN_SPEC_VERSION     67
+#define VK_KHR_SWAPCHAIN_SPEC_VERSION     68
 #define VK_KHR_SWAPCHAIN_EXTENSION_NAME   "VK_KHR_swapchain"

 typedef VkFlags VkSwapchainCreateFlagsKHR;
@@ -3334,6 +3393,7 @@ typedef enum VkDisplayPlaneAlphaFlagBitsKHR {
    VK_DISPLAY_PLANE_ALPHA_GLOBAL_BIT_KHR = 0x00000002,
    VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_BIT_KHR = 0x00000004,
    VK_DISPLAY_PLANE_ALPHA_PER_PIXEL_PREMULTIPLIED_BIT_KHR = 0x00000008,
+    VK_DISPLAY_PLANE_ALPHA_FLAG_BITS_MAX_ENUM_KHR = 0x7FFFFFFF
 } VkDisplayPlaneAlphaFlagBitsKHR;
 typedef VkFlags VkDisplayPlaneAlphaFlagsKHR;
 typedef VkFlags VkDisplayModeCreateFlagsKHR;
@@ -3401,7 +3461,7 @@ typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPropertiesKHR)(VkPhys
 typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceDisplayPlanePropertiesKHR)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkDisplayPlanePropertiesKHR* pProperties);
 typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneSupportedDisplaysKHR)(VkPhysicalDevice physicalDevice, uint32_t planeIndex, uint32_t* pDisplayCount, VkDisplayKHR* pDisplays);
 typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayModePropertiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, uint32_t* pPropertyCount, VkDisplayModePropertiesKHR* pProperties);
-typedef VkResult (VKAPI_PTR *PFN_vkCreateDisplayModeKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR*pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode);
+typedef VkResult (VKAPI_PTR *PFN_vkCreateDisplayModeKHR)(VkPhysicalDevice physicalDevice, VkDisplayKHR display, const VkDisplayModeCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkDisplayModeKHR* pMode);
 typedef VkResult (VKAPI_PTR *PFN_vkGetDisplayPlaneCapabilitiesKHR)(VkPhysicalDevice physicalDevice, VkDisplayModeKHR mode, uint32_t planeIndex, VkDisplayPlaneCapabilitiesKHR* pCapabilities);
 typedef VkResult (VKAPI_PTR *PFN_vkCreateDisplayPlaneSurfaceKHR)(VkInstance instance, const VkDisplaySurfaceCreateInfoKHR* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);

@@ -3684,7 +3744,7 @@ VKAPI_ATTR VkBool32 VKAPI_CALL vkGetPhysicalDeviceWin32PresentationSupportKHR(
 #define VK_EXT_debug_report 1
 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT)

-#define VK_EXT_DEBUG_REPORT_SPEC_VERSION  2
+#define VK_EXT_DEBUG_REPORT_SPEC_VERSION  4
 #define VK_EXT_DEBUG_REPORT_EXTENSION_NAME "VK_EXT_debug_report"
 #define VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT

@@ -3719,11 +3779,23 @@ typedef enum VkDebugReportObjectTypeEXT {
    VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT = 26,
    VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT = 27,
    VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_EXT = 28,
+    VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT = 29,
+    VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT = 30,
+    VK_DEBUG_REPORT_OBJECT_TYPE_OBJECT_TABLE_NVX_EXT = 31,
+    VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT = 32,
+    VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,
+    VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT,
+    VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT - VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1),
+    VK_DEBUG_REPORT_OBJECT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF
 } VkDebugReportObjectTypeEXT;

 typedef enum VkDebugReportErrorEXT {
    VK_DEBUG_REPORT_ERROR_NONE_EXT = 0,
    VK_DEBUG_REPORT_ERROR_CALLBACK_REF_EXT = 1,
+    VK_DEBUG_REPORT_ERROR_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_ERROR_NONE_EXT,
+    VK_DEBUG_REPORT_ERROR_END_RANGE_EXT = VK_DEBUG_REPORT_ERROR_CALLBACK_REF_EXT,
+    VK_DEBUG_REPORT_ERROR_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_ERROR_CALLBACK_REF_EXT - VK_DEBUG_REPORT_ERROR_NONE_EXT + 1),
+    VK_DEBUG_REPORT_ERROR_MAX_ENUM_EXT = 0x7FFFFFFF
 } VkDebugReportErrorEXT;


@@ -3733,6 +3805,7 @@ typedef enum VkDebugReportFlagBitsEXT {
    VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT = 0x00000004,
    VK_DEBUG_REPORT_ERROR_BIT_EXT = 0x00000008,
    VK_DEBUG_REPORT_DEBUG_BIT_EXT = 0x00000010,
+    VK_DEBUG_REPORT_FLAG_BITS_MAX_ENUM_EXT = 0x7FFFFFFF
 } VkDebugReportFlagBitsEXT;
 typedef VkFlags VkDebugReportFlagsEXT;

@@ -3793,6 +3866,533 @@ VKAPI_ATTR void VKAPI_CALL vkDebugReportMessageEXT(
 #define VK_IMG_FILTER_CUBIC_EXTENSION_NAME "VK_IMG_filter_cubic"


+#define VK_AMD_rasterization_order 1
+#define VK_AMD_RASTERIZATION_ORDER_SPEC_VERSION 1
+#define VK_AMD_RASTERIZATION_ORDER_EXTENSION_NAME "VK_AMD_rasterization_order"
+
+
+typedef enum VkRasterizationOrderAMD {
+    VK_RASTERIZATION_ORDER_STRICT_AMD = 0,
+    VK_RASTERIZATION_ORDER_RELAXED_AMD = 1,
+    VK_RASTERIZATION_ORDER_BEGIN_RANGE_AMD = VK_RASTERIZATION_ORDER_STRICT_AMD,
+    VK_RASTERIZATION_ORDER_END_RANGE_AMD = VK_RASTERIZATION_ORDER_RELAXED_AMD,
+    VK_RASTERIZATION_ORDER_RANGE_SIZE_AMD = (VK_RASTERIZATION_ORDER_RELAXED_AMD - VK_RASTERIZATION_ORDER_STRICT_AMD + 1),
+    VK_RASTERIZATION_ORDER_MAX_ENUM_AMD = 0x7FFFFFFF
+} VkRasterizationOrderAMD;
+
+typedef struct VkPipelineRasterizationStateRasterizationOrderAMD {
+    VkStructureType            sType;
+    const void*                pNext;
+    VkRasterizationOrderAMD    rasterizationOrder;
+} VkPipelineRasterizationStateRasterizationOrderAMD;
+
+
+
+#define VK_AMD_shader_trinary_minmax 1
+#define VK_AMD_SHADER_TRINARY_MINMAX_SPEC_VERSION 1
+#define VK_AMD_SHADER_TRINARY_MINMAX_EXTENSION_NAME "VK_AMD_shader_trinary_minmax"
+
+
+#define VK_AMD_shader_explicit_vertex_parameter 1
+#define VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_SPEC_VERSION 1
+#define VK_AMD_SHADER_EXPLICIT_VERTEX_PARAMETER_EXTENSION_NAME "VK_AMD_shader_explicit_vertex_parameter"
+
+
+#define VK_EXT_debug_marker 1
+#define VK_EXT_DEBUG_MARKER_SPEC_VERSION  3
+#define VK_EXT_DEBUG_MARKER_EXTENSION_NAME "VK_EXT_debug_marker"
+
+typedef struct VkDebugMarkerObjectNameInfoEXT {
+    VkStructureType               sType;
+    const void*                   pNext;
+    VkDebugReportObjectTypeEXT    objectType;
+    uint64_t                      object;
+    const char*                   pObjectName;
+} VkDebugMarkerObjectNameInfoEXT;
+
+typedef struct VkDebugMarkerObjectTagInfoEXT {
+    VkStructureType               sType;
+    const void*                   pNext;
+    VkDebugReportObjectTypeEXT    objectType;
+    uint64_t                      object;
+    uint64_t                      tagName;
+    size_t                        tagSize;
+    const void*                   pTag;
+} VkDebugMarkerObjectTagInfoEXT;
+
+typedef struct VkDebugMarkerMarkerInfoEXT {
+    VkStructureType    sType;
+    const void*        pNext;
+    const char*        pMarkerName;
+    float              color[4];
+} VkDebugMarkerMarkerInfoEXT;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectTagEXT)(VkDevice device, VkDebugMarkerObjectTagInfoEXT* pTagInfo);
+typedef VkResult (VKAPI_PTR *PFN_vkDebugMarkerSetObjectNameEXT)(VkDevice device, VkDebugMarkerObjectNameInfoEXT* pNameInfo);
+typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerBeginEXT)(VkCommandBuffer commandBuffer, VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerEndEXT)(VkCommandBuffer commandBuffer);
+typedef void (VKAPI_PTR *PFN_vkCmdDebugMarkerInsertEXT)(VkCommandBuffer commandBuffer, VkDebugMarkerMarkerInfoEXT* pMarkerInfo);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkDebugMarkerSetObjectTagEXT(
+    VkDevice                                    device,
+    VkDebugMarkerObjectTagInfoEXT*              pTagInfo);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkDebugMarkerSetObjectNameEXT(
+    VkDevice                                    device,
+    VkDebugMarkerObjectNameInfoEXT*             pNameInfo);
+
+VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerBeginEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkDebugMarkerMarkerInfoEXT*                 pMarkerInfo);
+
+VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerEndEXT(
+    VkCommandBuffer                             commandBuffer);
+
+VKAPI_ATTR void VKAPI_CALL vkCmdDebugMarkerInsertEXT(
+    VkCommandBuffer                             commandBuffer,
+    VkDebugMarkerMarkerInfoEXT*                 pMarkerInfo);
+#endif
+
+#define VK_AMD_gcn_shader 1
+#define VK_AMD_GCN_SHADER_SPEC_VERSION    1
+#define VK_AMD_GCN_SHADER_EXTENSION_NAME  "VK_AMD_gcn_shader"
+
+
+#define VK_NV_dedicated_allocation 1
+#define VK_NV_DEDICATED_ALLOCATION_SPEC_VERSION 1
+#define VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME "VK_NV_dedicated_allocation"
+
+typedef struct VkDedicatedAllocationImageCreateInfoNV {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkBool32           dedicatedAllocation;
+} VkDedicatedAllocationImageCreateInfoNV;
+
+typedef struct VkDedicatedAllocationBufferCreateInfoNV {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkBool32           dedicatedAllocation;
+} VkDedicatedAllocationBufferCreateInfoNV;
+
+typedef struct VkDedicatedAllocationMemoryAllocateInfoNV {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkImage            image;
+    VkBuffer           buffer;
+} VkDedicatedAllocationMemoryAllocateInfoNV;
+
+
+
+#define VK_AMD_draw_indirect_count 1
+#define VK_AMD_DRAW_INDIRECT_COUNT_SPEC_VERSION 1
+#define VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME "VK_AMD_draw_indirect_count"
+
+typedef void (VKAPI_PTR *PFN_vkCmdDrawIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride);
+typedef void (VKAPI_PTR *PFN_vkCmdDrawIndexedIndirectCountAMD)(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, VkBuffer countBuffer, VkDeviceSize countBufferOffset, uint32_t maxDrawCount, uint32_t stride);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndirectCountAMD(
+    VkCommandBuffer                             commandBuffer,
+    VkBuffer                                    buffer,
+    VkDeviceSize                                offset,
+    VkBuffer                                    countBuffer,
+    VkDeviceSize                                countBufferOffset,
+    uint32_t                                    maxDrawCount,
+    uint32_t                                    stride);
+
+VKAPI_ATTR void VKAPI_CALL vkCmdDrawIndexedIndirectCountAMD(
+    VkCommandBuffer                             commandBuffer,
+    VkBuffer                                    buffer,
+    VkDeviceSize                                offset,
+    VkBuffer                                    countBuffer,
+    VkDeviceSize                                countBufferOffset,
+    uint32_t                                    maxDrawCount,
+    uint32_t                                    stride);
+#endif
+
+#define VK_AMD_negative_viewport_height 1
+#define VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_SPEC_VERSION 1
+#define VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_EXTENSION_NAME "VK_AMD_negative_viewport_height"
+
+
+#define VK_AMD_gpu_shader_half_float 1
+#define VK_AMD_GPU_SHADER_HALF_FLOAT_SPEC_VERSION 1
+#define VK_AMD_GPU_SHADER_HALF_FLOAT_EXTENSION_NAME "VK_AMD_gpu_shader_half_float"
+
+
+#define VK_AMD_shader_ballot 1
+#define VK_AMD_SHADER_BALLOT_SPEC_VERSION 1
+#define VK_AMD_SHADER_BALLOT_EXTENSION_NAME "VK_AMD_shader_ballot"
+
+
+#define VK_IMG_format_pvrtc 1
+#define VK_IMG_FORMAT_PVRTC_SPEC_VERSION  1
+#define VK_IMG_FORMAT_PVRTC_EXTENSION_NAME "VK_IMG_format_pvrtc"
+
+
+#define VK_NV_external_memory_capabilities 1
+#define VK_NV_EXTERNAL_MEMORY_CAPABILITIES_SPEC_VERSION 1
+#define VK_NV_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME "VK_NV_external_memory_capabilities"
+
+
+typedef enum VkExternalMemoryHandleTypeFlagBitsNV {
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_NV = 0x00000001,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT_NV = 0x00000002,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_IMAGE_BIT_NV = 0x00000004,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_IMAGE_KMT_BIT_NV = 0x00000008,
+    VK_EXTERNAL_MEMORY_HANDLE_TYPE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF
+} VkExternalMemoryHandleTypeFlagBitsNV;
+typedef VkFlags VkExternalMemoryHandleTypeFlagsNV;
+
+typedef enum VkExternalMemoryFeatureFlagBitsNV {
+    VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_NV = 0x00000001,
+    VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_NV = 0x00000002,
+    VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_NV = 0x00000004,
+    VK_EXTERNAL_MEMORY_FEATURE_FLAG_BITS_MAX_ENUM_NV = 0x7FFFFFFF
+} VkExternalMemoryFeatureFlagBitsNV;
+typedef VkFlags VkExternalMemoryFeatureFlagsNV;
+
+typedef struct VkExternalImageFormatPropertiesNV {
+    VkImageFormatProperties              imageFormatProperties;
+    VkExternalMemoryFeatureFlagsNV       externalMemoryFeatures;
+    VkExternalMemoryHandleTypeFlagsNV    exportFromImportedHandleTypes;
+    VkExternalMemoryHandleTypeFlagsNV    compatibleHandleTypes;
+} VkExternalImageFormatPropertiesNV;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceExternalImageFormatPropertiesNV)(VkPhysicalDevice physicalDevice, VkFormat format, VkImageType type, VkImageTiling tiling, VkImageUsageFlags usage, VkImageCreateFlags flags, VkExternalMemoryHandleTypeFlagsNV externalHandleType, VkExternalImageFormatPropertiesNV* pExternalImageFormatProperties);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceExternalImageFormatPropertiesNV(
+    VkPhysicalDevice                            physicalDevice,
+    VkFormat                                    format,
+    VkImageType                                 type,
+    VkImageTiling                               tiling,
+    VkImageUsageFlags                           usage,
+    VkImageCreateFlags                          flags,
+    VkExternalMemoryHandleTypeFlagsNV           externalHandleType,
+    VkExternalImageFormatPropertiesNV*          pExternalImageFormatProperties);
+#endif
+
+#define VK_NV_external_memory 1
+#define VK_NV_EXTERNAL_MEMORY_SPEC_VERSION 1
+#define VK_NV_EXTERNAL_MEMORY_EXTENSION_NAME "VK_NV_external_memory"
+
+typedef struct VkExternalMemoryImageCreateInfoNV {
+    VkStructureType                      sType;
+    const void*                          pNext;
+    VkExternalMemoryHandleTypeFlagsNV    handleTypes;
+} VkExternalMemoryImageCreateInfoNV;
+
+typedef struct VkExportMemoryAllocateInfoNV {
+    VkStructureType                      sType;
+    const void*                          pNext;
+    VkExternalMemoryHandleTypeFlagsNV    handleTypes;
+} VkExportMemoryAllocateInfoNV;
+
+
+
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+#define VK_NV_external_memory_win32 1
+#define VK_NV_EXTERNAL_MEMORY_WIN32_SPEC_VERSION 1
+#define VK_NV_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME "VK_NV_external_memory_win32"
+
+typedef struct VkImportMemoryWin32HandleInfoNV {
+    VkStructureType                      sType;
+    const void*                          pNext;
+    VkExternalMemoryHandleTypeFlagsNV    handleType;
+    HANDLE                               handle;
+} VkImportMemoryWin32HandleInfoNV;
+
+typedef struct VkExportMemoryWin32HandleInfoNV {
+    VkStructureType               sType;
+    const void*                   pNext;
+    const SECURITY_ATTRIBUTES*    pAttributes;
+    DWORD                         dwAccess;
+} VkExportMemoryWin32HandleInfoNV;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkGetMemoryWin32HandleNV)(VkDevice device, VkDeviceMemory memory, VkExternalMemoryHandleTypeFlagsNV handleType, HANDLE* pHandle);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkGetMemoryWin32HandleNV(
+    VkDevice                                    device,
+    VkDeviceMemory                              memory,
+    VkExternalMemoryHandleTypeFlagsNV           handleType,
+    HANDLE*                                     pHandle);
+#endif
+#endif /* VK_USE_PLATFORM_WIN32_KHR */
+
+#ifdef VK_USE_PLATFORM_WIN32_KHR
+#define VK_NV_win32_keyed_mutex 1
+#define VK_NV_WIN32_KEYED_MUTEX_SPEC_VERSION 1
+#define VK_NV_WIN32_KEYED_MUTEX_EXTENSION_NAME "VK_NV_win32_keyed_mutex"
+
+typedef struct VkWin32KeyedMutexAcquireReleaseInfoNV {
+    VkStructureType          sType;
+    const void*              pNext;
+    uint32_t                 acquireCount;
+    const VkDeviceMemory*    pAcquireSyncs;
+    const uint64_t*          pAcquireKeys;
+    const uint32_t*          pAcquireTimeoutMilliseconds;
+    uint32_t                 releaseCount;
+    const VkDeviceMemory*    pReleaseSyncs;
+    const uint64_t*          pReleaseKeys;
+} VkWin32KeyedMutexAcquireReleaseInfoNV;
+
+
+#endif /* VK_USE_PLATFORM_WIN32_KHR */
+
+#define VK_EXT_validation_flags 1
+#define VK_EXT_VALIDATION_FLAGS_SPEC_VERSION 1
+#define VK_EXT_VALIDATION_FLAGS_EXTENSION_NAME "VK_EXT_validation_flags"
+
+
+typedef enum VkValidationCheckEXT {
+    VK_VALIDATION_CHECK_ALL_EXT = 0,
+    VK_VALIDATION_CHECK_BEGIN_RANGE_EXT = VK_VALIDATION_CHECK_ALL_EXT,
+    VK_VALIDATION_CHECK_END_RANGE_EXT = VK_VALIDATION_CHECK_ALL_EXT,
+    VK_VALIDATION_CHECK_RANGE_SIZE_EXT = (VK_VALIDATION_CHECK_ALL_EXT - VK_VALIDATION_CHECK_ALL_EXT + 1),
+    VK_VALIDATION_CHECK_MAX_ENUM_EXT = 0x7FFFFFFF
+} VkValidationCheckEXT;
+
+typedef struct VkValidationFlagsEXT {
+    VkStructureType          sType;
+    const void*              pNext;
+    uint32_t                 disabledValidationCheckCount;
+    VkValidationCheckEXT*    pDisabledValidationChecks;
+} VkValidationFlagsEXT;
+
+
+
+#define VK_NVX_device_generated_commands 1
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkObjectTableNVX)
+VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkIndirectCommandsLayoutNVX)
+
+#define VK_NVX_DEVICE_GENERATED_COMMANDS_SPEC_VERSION 1
+#define VK_NVX_DEVICE_GENERATED_COMMANDS_EXTENSION_NAME "VK_NVX_device_generated_commands"
+
+
+typedef enum VkIndirectCommandsTokenTypeNVX {
+    VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX = 0,
+    VK_INDIRECT_COMMANDS_TOKEN_DESCRIPTOR_SET_NVX = 1,
+    VK_INDIRECT_COMMANDS_TOKEN_INDEX_BUFFER_NVX = 2,
+    VK_INDIRECT_COMMANDS_TOKEN_VERTEX_BUFFER_NVX = 3,
+    VK_INDIRECT_COMMANDS_TOKEN_PUSH_CONSTANT_NVX = 4,
+    VK_INDIRECT_COMMANDS_TOKEN_DRAW_INDEXED_NVX = 5,
+    VK_INDIRECT_COMMANDS_TOKEN_DRAW_NVX = 6,
+    VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX = 7,
+    VK_INDIRECT_COMMANDS_TOKEN_TYPE_BEGIN_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX,
+    VK_INDIRECT_COMMANDS_TOKEN_TYPE_END_RANGE_NVX = VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX,
+    VK_INDIRECT_COMMANDS_TOKEN_TYPE_RANGE_SIZE_NVX = (VK_INDIRECT_COMMANDS_TOKEN_DISPATCH_NVX - VK_INDIRECT_COMMANDS_TOKEN_PIPELINE_NVX + 1),
+    VK_INDIRECT_COMMANDS_TOKEN_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF
+} VkIndirectCommandsTokenTypeNVX;
+
+typedef enum VkObjectEntryTypeNVX {
+    VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX = 0,
+    VK_OBJECT_ENTRY_PIPELINE_NVX = 1,
+    VK_OBJECT_ENTRY_INDEX_BUFFER_NVX = 2,
+    VK_OBJECT_ENTRY_VERTEX_BUFFER_NVX = 3,
+    VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX = 4,
+    VK_OBJECT_ENTRY_TYPE_BEGIN_RANGE_NVX = VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX,
+    VK_OBJECT_ENTRY_TYPE_END_RANGE_NVX = VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX,
+    VK_OBJECT_ENTRY_TYPE_RANGE_SIZE_NVX = (VK_OBJECT_ENTRY_PUSH_CONSTANT_NVX - VK_OBJECT_ENTRY_DESCRIPTOR_SET_NVX + 1),
+    VK_OBJECT_ENTRY_TYPE_MAX_ENUM_NVX = 0x7FFFFFFF
+} VkObjectEntryTypeNVX;
+
+
+typedef enum VkIndirectCommandsLayoutUsageFlagBitsNVX {
+    VK_INDIRECT_COMMANDS_LAYOUT_USAGE_UNORDERED_SEQUENCES_BIT_NVX = 0x00000001,
+    VK_INDIRECT_COMMANDS_LAYOUT_USAGE_SPARSE_SEQUENCES_BIT_NVX = 0x00000002,
+    VK_INDIRECT_COMMANDS_LAYOUT_USAGE_EMPTY_EXECUTIONS_BIT_NVX = 0x00000004,
+    VK_INDIRECT_COMMANDS_LAYOUT_USAGE_INDEXED_SEQUENCES_BIT_NVX = 0x00000008,
+    VK_INDIRECT_COMMANDS_LAYOUT_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF
+} VkIndirectCommandsLayoutUsageFlagBitsNVX;
+typedef VkFlags VkIndirectCommandsLayoutUsageFlagsNVX;
+
+typedef enum VkObjectEntryUsageFlagBitsNVX {
+    VK_OBJECT_ENTRY_USAGE_GRAPHICS_BIT_NVX = 0x00000001,
+    VK_OBJECT_ENTRY_USAGE_COMPUTE_BIT_NVX = 0x00000002,
+    VK_OBJECT_ENTRY_USAGE_FLAG_BITS_MAX_ENUM_NVX = 0x7FFFFFFF
+} VkObjectEntryUsageFlagBitsNVX;
+typedef VkFlags VkObjectEntryUsageFlagsNVX;
+
+typedef struct VkDeviceGeneratedCommandsFeaturesNVX {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkBool32           computeBindingPointSupport;
+} VkDeviceGeneratedCommandsFeaturesNVX;
+
+typedef struct VkDeviceGeneratedCommandsLimitsNVX {
+    VkStructureType    sType;
+    const void*        pNext;
+    uint32_t           maxIndirectCommandsLayoutTokenCount;
+    uint32_t           maxObjectEntryCounts;
+    uint32_t           minSequenceCountBufferOffsetAlignment;
+    uint32_t           minSequenceIndexBufferOffsetAlignment;
+    uint32_t           minCommandsTokenBufferOffsetAlignment;
+} VkDeviceGeneratedCommandsLimitsNVX;
+
+typedef struct VkIndirectCommandsTokenNVX {
+    VkIndirectCommandsTokenTypeNVX    tokenType;
+    VkBuffer                          buffer;
+    VkDeviceSize                      offset;
+} VkIndirectCommandsTokenNVX;
+
+typedef struct VkIndirectCommandsLayoutTokenNVX {
+    VkIndirectCommandsTokenTypeNVX    tokenType;
+    uint32_t                          bindingUnit;
+    uint32_t                          dynamicCount;
+    uint32_t                          divisor;
+} VkIndirectCommandsLayoutTokenNVX;
+
+typedef struct VkIndirectCommandsLayoutCreateInfoNVX {
+    VkStructureType                            sType;
+    const void*                                pNext;
+    VkPipelineBindPoint                        pipelineBindPoint;
+    VkIndirectCommandsLayoutUsageFlagsNVX      flags;
+    uint32_t                                   tokenCount;
+    const VkIndirectCommandsLayoutTokenNVX*    pTokens;
+} VkIndirectCommandsLayoutCreateInfoNVX;
+
+typedef struct VkCmdProcessCommandsInfoNVX {
+    VkStructureType                      sType;
+    const void*                          pNext;
+    VkObjectTableNVX                     objectTable;
+    VkIndirectCommandsLayoutNVX          indirectCommandsLayout;
+    uint32_t                             indirectCommandsTokenCount;
+    const VkIndirectCommandsTokenNVX*    pIndirectCommandsTokens;
+    uint32_t                             maxSequencesCount;
+    VkCommandBuffer                      targetCommandBuffer;
+    VkBuffer                             sequencesCountBuffer;
+    VkDeviceSize                         sequencesCountOffset;
+    VkBuffer                             sequencesIndexBuffer;
+    VkDeviceSize                         sequencesIndexOffset;
+} VkCmdProcessCommandsInfoNVX;
+
+typedef struct VkCmdReserveSpaceForCommandsInfoNVX {
+    VkStructureType                sType;
+    const void*                    pNext;
+    VkObjectTableNVX               objectTable;
+    VkIndirectCommandsLayoutNVX    indirectCommandsLayout;
+    uint32_t                       maxSequencesCount;
+} VkCmdReserveSpaceForCommandsInfoNVX;
+
+typedef struct VkObjectTableCreateInfoNVX {
+    VkStructureType                      sType;
+    const void*                          pNext;
+    uint32_t                             objectCount;
+    const VkObjectEntryTypeNVX*          pObjectEntryTypes;
+    const uint32_t*                      pObjectEntryCounts;
+    const VkObjectEntryUsageFlagsNVX*    pObjectEntryUsageFlags;
+    uint32_t                             maxUniformBuffersPerDescriptor;
+    uint32_t                             maxStorageBuffersPerDescriptor;
+    uint32_t                             maxStorageImagesPerDescriptor;
+    uint32_t                             maxSampledImagesPerDescriptor;
+    uint32_t                             maxPipelineLayouts;
+} VkObjectTableCreateInfoNVX;
+
+typedef struct VkObjectTableEntryNVX {
+    VkObjectEntryTypeNVX          type;
+    VkObjectEntryUsageFlagsNVX    flags;
+} VkObjectTableEntryNVX;
+
+typedef struct VkObjectTablePipelineEntryNVX {
+    VkObjectEntryTypeNVX          type;
+    VkObjectEntryUsageFlagsNVX    flags;
+    VkPipeline                    pipeline;
+} VkObjectTablePipelineEntryNVX;
+
+typedef struct VkObjectTableDescriptorSetEntryNVX {
+    VkObjectEntryTypeNVX          type;
+    VkObjectEntryUsageFlagsNVX    flags;
+    VkPipelineLayout              pipelineLayout;
+    VkDescriptorSet               descriptorSet;
+} VkObjectTableDescriptorSetEntryNVX;
+
+typedef struct VkObjectTableVertexBufferEntryNVX {
+    VkObjectEntryTypeNVX          type;
+    VkObjectEntryUsageFlagsNVX    flags;
+    VkBuffer                      buffer;
+} VkObjectTableVertexBufferEntryNVX;
+
+typedef struct VkObjectTableIndexBufferEntryNVX {
+    VkObjectEntryTypeNVX          type;
+    VkObjectEntryUsageFlagsNVX    flags;
+    VkBuffer                      buffer;
+} VkObjectTableIndexBufferEntryNVX;
+
+typedef struct VkObjectTablePushConstantEntryNVX {
+    VkObjectEntryTypeNVX          type;
+    VkObjectEntryUsageFlagsNVX    flags;
+    VkPipelineLayout              pipelineLayout;
+    VkShaderStageFlags            stageFlags;
+} VkObjectTablePushConstantEntryNVX;
+
+
+typedef void (VKAPI_PTR *PFN_vkCmdProcessCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdProcessCommandsInfoNVX* pProcessCommandsInfo);
+typedef void (VKAPI_PTR *PFN_vkCmdReserveSpaceForCommandsNVX)(VkCommandBuffer commandBuffer, const VkCmdReserveSpaceForCommandsInfoNVX* pReserveSpaceInfo);
+typedef VkResult (VKAPI_PTR *PFN_vkCreateIndirectCommandsLayoutNVX)(VkDevice device, const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkIndirectCommandsLayoutNVX* pIndirectCommandsLayout);
+typedef void (VKAPI_PTR *PFN_vkDestroyIndirectCommandsLayoutNVX)(VkDevice device, VkIndirectCommandsLayoutNVX indirectCommandsLayout, const VkAllocationCallbacks* pAllocator);
+typedef VkResult (VKAPI_PTR *PFN_vkCreateObjectTableNVX)(VkDevice device, const VkObjectTableCreateInfoNVX* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkObjectTableNVX* pObjectTable);
+typedef void (VKAPI_PTR *PFN_vkDestroyObjectTableNVX)(VkDevice device, VkObjectTableNVX objectTable, const VkAllocationCallbacks* pAllocator);
+typedef VkResult (VKAPI_PTR *PFN_vkRegisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectTableEntryNVX* const*    ppObjectTableEntries, const uint32_t* pObjectIndices);
+typedef VkResult (VKAPI_PTR *PFN_vkUnregisterObjectsNVX)(VkDevice device, VkObjectTableNVX objectTable, uint32_t objectCount, const VkObjectEntryTypeNVX* pObjectEntryTypes, const uint32_t* pObjectIndices);
+typedef void (VKAPI_PTR *PFN_vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX)(VkPhysicalDevice physicalDevice, VkDeviceGeneratedCommandsFeaturesNVX* pFeatures, VkDeviceGeneratedCommandsLimitsNVX* pLimits);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR void VKAPI_CALL vkCmdProcessCommandsNVX(
+    VkCommandBuffer                             commandBuffer,
+    const VkCmdProcessCommandsInfoNVX*          pProcessCommandsInfo);
+
+VKAPI_ATTR void VKAPI_CALL vkCmdReserveSpaceForCommandsNVX(
+    VkCommandBuffer                             commandBuffer,
+    const VkCmdReserveSpaceForCommandsInfoNVX*  pReserveSpaceInfo);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateIndirectCommandsLayoutNVX(
+    VkDevice                                    device,
+    const VkIndirectCommandsLayoutCreateInfoNVX* pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkIndirectCommandsLayoutNVX*                pIndirectCommandsLayout);
+
+VKAPI_ATTR void VKAPI_CALL vkDestroyIndirectCommandsLayoutNVX(
+    VkDevice                                    device,
+    VkIndirectCommandsLayoutNVX                 indirectCommandsLayout,
+    const VkAllocationCallbacks*                pAllocator);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateObjectTableNVX(
+    VkDevice                                    device,
+    const VkObjectTableCreateInfoNVX*           pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkObjectTableNVX*                           pObjectTable);
+
+VKAPI_ATTR void VKAPI_CALL vkDestroyObjectTableNVX(
+    VkDevice                                    device,
+    VkObjectTableNVX                            objectTable,
+    const VkAllocationCallbacks*                pAllocator);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkRegisterObjectsNVX(
+    VkDevice                                    device,
+    VkObjectTableNVX                            objectTable,
+    uint32_t                                    objectCount,
+    const VkObjectTableEntryNVX* const*         ppObjectTableEntries,
+    const uint32_t*                             pObjectIndices);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkUnregisterObjectsNVX(
+    VkDevice                                    device,
+    VkObjectTableNVX                            objectTable,
+    uint32_t                                    objectCount,
+    const VkObjectEntryTypeNVX*                 pObjectEntryTypes,
+    const uint32_t*                             pObjectIndices);
+
+VKAPI_ATTR void VKAPI_CALL vkGetPhysicalDeviceGeneratedCommandsPropertiesNVX(
+    VkPhysicalDevice                            physicalDevice,
+    VkDeviceGeneratedCommandsFeaturesNVX*       pFeatures,
+    VkDeviceGeneratedCommandsLimitsNVX*         pLimits);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -281,7 +281,7 @@ def parse_source_list(env, filename, names=None):
                    # cause duplicate actions.
                    f = f[len(cur_srcdir + '/'):]
                # do not include any headers
-                if f.endswith('.h'):
+                if f.endswith(tuple(['.h','.hpp'])):
                    continue
                srcs.append(f)

--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -323,10 +323,6 @@ def generate(env):
                'GLX_DIRECT_RENDERING',
                'GLX_INDIRECT_RENDERING',
            ]
-        if env['platform'] in ('linux', 'freebsd'):
-            cppdefines += ['HAVE_ALIAS']
-        else:
-            cppdefines += ['GLX_ALIAS_UNSUPPORTED']

        if env['platform'] in ('linux', 'darwin'):
            cppdefines += ['HAVE_XLOCALE_H']
@@ -651,7 +647,7 @@ def generate(env):
    env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
    env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
    env.PkgCheckModules('XF86VIDMODE', ['xxf86vm'])
-    env.PkgCheckModules('DRM', ['libdrm >= 2.4.38'])
+    env.PkgCheckModules('DRM', ['libdrm >= 2.4.66'])

    if env['x11']:
        env.Append(CPPPATH = env['X11_CPPPATH'])
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -106,7 +106,24 @@ def generate(env):
        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
-        if llvm_version >= distutils.version.LooseVersion('3.7'):
+        if llvm_version >= distutils.version.LooseVersion('3.9'):
+            env.Prepend(LIBS = [
+                'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
+                'LLVMDebugInfoCodeView', 'LLVMCodeGen',
+                'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMInstrumentation', 'LLVMTransformUtils',
+                'LLVMBitWriter', 'LLVMX86Desc',
+                'LLVMMCDisassembler', 'LLVMX86Info',
+                'LLVMX86AsmPrinter', 'LLVMX86Utils',
+                'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
+                'LLVMAnalysis', 'LLVMProfileData',
+                'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
+                'LLVMBitReader', 'LLVMMC', 'LLVMCore',
+                'LLVMSupport',
+                'LLVMIRReader', 'LLVMASMParser'
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('3.7'):
            env.Prepend(LIBS = [
                'LLVMBitWriter', 'LLVMX86Disassembler', 'LLVMX86AsmParser',
                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
@@ -177,11 +194,12 @@ def generate(env):
                # that.
                env.Append(LINKFLAGS = ['/nodefaultlib:LIBCMT'])
    else:
-        if not env.Detect('llvm-config'):
-            print 'scons: llvm-config script not found'
+        llvm_config = os.environ.get('LLVM_CONFIG', 'llvm-config')
+        if not env.Detect(llvm_config):
+            print 'scons: %s script not found' % llvm_config
            return

-        llvm_version = env.backtick('llvm-config --version').rstrip()
+        llvm_version = env.backtick('%s --version' % llvm_config).rstrip()
        llvm_version = distutils.version.LooseVersion(llvm_version)

        if llvm_version < distutils.version.LooseVersion(required_llvm_version):
@@ -191,7 +209,7 @@ def generate(env):
        try:
            # Treat --cppflags specially to prevent NDEBUG from disabling
            # assertion failures in debug builds.
-            cppflags = env.ParseFlags('!llvm-config --cppflags')
+            cppflags = env.ParseFlags('!%s --cppflags' % llvm_config)
            try:
                cppflags['CPPDEFINES'].remove('NDEBUG')
            except ValueError:
@@ -199,16 +217,16 @@ def generate(env):
            env.MergeFlags(cppflags)

            # Match llvm --fno-rtti flag
-            cxxflags = env.backtick('llvm-config --cxxflags').split()
+            cxxflags = env.backtick('%s --cxxflags' % llvm_config).split()
            if '-fno-rtti' in cxxflags:
                env.Append(CXXFLAGS = ['-fno-rtti'])

-            components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 'mcdisassembler']
+            components = ['engine', 'mcjit', 'bitwriter', 'x86asmprinter', 'mcdisassembler', 'irreader']

-            env.ParseConfig('llvm-config --libs ' + ' '.join(components))
-            env.ParseConfig('llvm-config --ldflags')
+            env.ParseConfig('%s --libs ' % llvm_config + ' '.join(components))
+            env.ParseConfig('%s --ldflags' % llvm_config)
            if llvm_version >= distutils.version.LooseVersion('3.5'):
-                env.ParseConfig('llvm-config --system-libs')
+                env.ParseConfig('%s --system-libs' % llvm_config)
                env.Append(CXXFLAGS = ['-std=c++11'])
        except OSError:
            print 'scons: llvm-config version %s failed' % llvm_version
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -44,6 +44,7 @@ git_sha1.h: git_sha1.h.tmp

 BUILT_SOURCES = git_sha1.h
 CLEANFILES = $(BUILT_SOURCES)
+EXTRA_DIST =

 SUBDIRS = . gtest util mapi/glapi/gen mapi

@@ -118,6 +119,7 @@ endif
 if HAVE_VULKAN_COMMON
 SUBDIRS += vulkan/wsi
 endif
+EXTRA_DIST += vulkan/registry/vk.xml

 ## Requires the i965 compiler (part of mesa) and wayland-drm
 if HAVE_INTEL_VULKAN
@@ -126,7 +128,6 @@ endif

 # Requires wayland-drm
 if HAVE_RADEON_VULKAN
-SUBDIRS += amd/common
 SUBDIRS += amd/vulkan
 endif

@@ -134,7 +135,7 @@ if HAVE_GALLIUM
 SUBDIRS += gallium
 endif

-EXTRA_DIST = \
+EXTRA_DIST += \
 	getopt hgl SConscript \
 	$(top_srcdir)/include/GL/mesa_glinterop.h

--- a/src/amd/Android.common.mk
+++ b/src/amd/Android.common.mk
@@ -0,0 +1,63 @@
+# Copyright Â© 2016 Red Hat.
+# Copyright Â© 2016 Mauro Rossi <issor.oruam@gmail.com>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice (including the next
+# paragraph) shall be included in all copies or substantial portions of the
+# Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+# IN THE SOFTWARE.
+
+# ---------------------------------------
+# Build libmesa_amd_common
+# ---------------------------------------
+
+include $(CLEAR_VARS)
+
+LOCAL_MODULE := libmesa_amd_common
+
+LOCAL_SRC_FILES := \
+	$(AMD_COMPILER_FILES) \
+	$(AMD_DEBUG_FILES)
+
+LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU   # instructs LLVM to declare LLVMInitializeAMDGPU* functions
+
+# generate sources
+LOCAL_MODULE_CLASS := STATIC_LIBRARIES
+intermediates := $(call local-generated-sources-dir)
+LOCAL_GENERATED_SOURCES := $(addprefix $(intermediates)/, $(AMD_GENERATED_FILES))
+
+$(LOCAL_GENERATED_SOURCES): PRIVATE_PYTHON := $(MESA_PYTHON2)
+$(LOCAL_GENERATED_SOURCES): PRIVATE_CUSTOM_TOOL = $(PRIVATE_PYTHON) $^ > $@
+
+$(intermediates)/common/sid_tables.h: $(LOCAL_PATH)/common/sid_tables.py $(MESA_TOP)/src/amd/common/sid.h
+	$(transform-generated-source)
+
+LOCAL_C_INCLUDES := \
+	$(MESA_TOP)/include \
+	$(MESA_TOP)/src \
+	$(MESA_TOP)/src/amd/common \
+	$(MESA_TOP)/src/gallium/include \
+	$(MESA_TOP)/src/gallium/auxiliary \
+	$(intermediates)/common \
+	external/llvm/include \
+	external/llvm/device/include \
+	external/libcxx/include \
+	external/elfutils/$(if $(filter 5,$(MESA_ANDROID_MAJOR_VERSION)),0.153/,$(if $(filter 6,$(MESA_ANDROID_MAJOR_VERSION)),src/))libelf
+
+LOCAL_STATIC_LIBRARIES := libLLVMCore
+
+include $(MESA_COMMON_MK)
+include $(BUILD_STATIC_LIBRARY)
--- a/src/amd/Android.mk
+++ b/src/amd/Android.mk
@@ -26,3 +26,4 @@ LOCAL_PATH := $(call my-dir)
 include $(LOCAL_PATH)/Makefile.sources

 include $(LOCAL_PATH)/Android.addrlib.mk
+include $(LOCAL_PATH)/Android.common.mk
--- a/src/amd/Makefile.am
+++ b/src/amd/Makefile.am
@@ -23,5 +23,6 @@ include Makefile.sources

 noinst_LTLIBRARIES =

-EXTRA_DIST = $(COMMON_HEADER_FILES)
+EXTRA_DIST = $(COMMON_HEADER_FILES) common/sid_tables.py
 include Makefile.addrlib.am
+include Makefile.common.am
--- a/src/amd/Makefile.common.am
+++ b/src/amd/Makefile.common.am
@@ -19,15 +19,19 @@
 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 # IN THE SOFTWARE.

-include Makefile.sources
+if NEED_RADEON_LLVM
+
+COMMON_LIBS = common/libamd_common.la

 # TODO cleanup these
-AM_CPPFLAGS = \
+common_libamd_common_la_CPPFLAGS = \
 	$(VALGRIND_CFLAGS) \
 	$(DEFINES) \
 	-I$(top_srcdir)/include \
 	-I$(top_builddir)/src \
 	-I$(top_srcdir)/src \
+	-I$(top_builddir)/src/amd/common \
+	-I$(top_srcdir)/src/amd/common \
 	-I$(top_builddir)/src/compiler \
 	-I$(top_builddir)/src/compiler/nir \
 	-I$(top_srcdir)/src/compiler \
@@ -37,15 +41,32 @@ AM_CPPFLAGS = \
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	-I$(top_srcdir)/src/gallium/include

-AM_CFLAGS = $(VISIBILITY_CFLAGS) \
+common_libamd_common_la_CFLAGS = \
+	$(VISIBILITY_CFLAGS) \
 	$(PTHREAD_CFLAGS) \
 	$(LLVM_CFLAGS) \
 	$(LIBELF_CFLAGS)

-AM_CXXFLAGS = \
+common_libamd_common_la_CXXFLAGS = \
 	$(VISIBILITY_CXXFLAGS) \
 	$(LLVM_CXXFLAGS)

-noinst_LTLIBRARIES = libamd_common.la
+noinst_LTLIBRARIES += $(COMMON_LIBS)

-libamd_common_la_SOURCES = $(AMD_COMPILER_SOURCES)
+common_libamd_common_la_SOURCES = \
+	$(AMD_COMPILER_FILES) \
+	$(AMD_DEBUG_FILES) \
+	$(AMD_GENERATED_FILES)
+
+# nir_to_llvm requires LLVM 3.9, which is only required as a minimum when
+# radv is built.
+if HAVE_RADEON_VULKAN
+common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
+endif
+endif
+
+common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h
+	$(AM_V_at)$(MKDIR_P) $(@D)
+	$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h > $@
+
+BUILT_SOURCES = $(AMD_GENERATED_FILES)
--- a/src/amd/Makefile.sources
+++ b/src/amd/Makefile.sources
@@ -25,3 +25,21 @@ ADDRLIB_FILES = \
 	addrlib/r800/egbaddrlib.h \
 	addrlib/r800/siaddrlib.cpp \
 	addrlib/r800/siaddrlib.h
+
+AMD_COMPILER_FILES = \
+	common/ac_binary.c \
+	common/ac_binary.h \
+	common/ac_llvm_helper.cpp \
+	common/ac_llvm_util.c \
+	common/ac_llvm_util.h
+
+AMD_NIR_FILES = \
+	common/ac_nir_to_llvm.c \
+	common/ac_nir_to_llvm.h
+
+AMD_DEBUG_FILES = \
+	common/ac_debug.c \
+	common/ac_debug.h
+
+AMD_GENERATED_FILES = \
+	common/sid_tables.h
--- a/src/amd/addrlib/addrtypes.h
+++ b/src/amd/addrlib/addrtypes.h
@@ -88,7 +88,11 @@ typedef int            INT;

 #ifndef ADDR_FASTCALL
    #if defined(__GNUC__)
-        #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #if defined(__i386__)
+            #define ADDR_FASTCALL __attribute__((regparm(0)))
+        #else
+            #define ADDR_FASTCALL
+        #endif
    #else
        #define ADDR_FASTCALL __fastcall
    #endif
--- a/src/amd/addrlib/r800/ciaddrlib.cpp
+++ b/src/amd/addrlib/r800/ciaddrlib.cpp
@@ -353,6 +353,7 @@ AddrChipFamily CIAddrLib::HwlConvertChipFamily(
            m_settings.isFiji            = ASICREV_IS_FIJI_P(uChipRevision);
            m_settings.isPolaris10       = ASICREV_IS_POLARIS10_P(uChipRevision);
            m_settings.isPolaris11       = ASICREV_IS_POLARIS11_M(uChipRevision);
+            m_settings.isPolaris12       = ASICREV_IS_POLARIS12_V(uChipRevision);
            break;
        case FAMILY_CZ:
            m_settings.isCarrizo         = 1;
@@ -417,7 +418,7 @@ BOOL_32 CIAddrLib::HwlInitGlobalParams(
    {
        m_pipes = 16;
    }
-    else if (m_settings.isPolaris11)
+    else if (m_settings.isPolaris11 || m_settings.isPolaris12)
    {
        m_pipes = 4;
    }
--- a/src/amd/addrlib/r800/ciaddrlib.h
+++ b/src/amd/addrlib/r800/ciaddrlib.h
@@ -62,6 +62,7 @@ struct CIChipSettings
        UINT_32 isFiji            : 1;
        UINT_32 isPolaris10       : 1;
        UINT_32 isPolaris11       : 1;
+        UINT_32 isPolaris12       : 1;
        // VI fusion (Carrizo)
        UINT_32 isCarrizo         : 1;
    };
--- a/src/gallium/drivers/radeonsi/.gitignore
+++ b/src/gallium/drivers/radeonsi/.gitignore
--- a/src/amd/common/Makefile.sources
+++ b/src/amd/common/Makefile.sources
@@ -1,29 +0,0 @@
-# Copyright © 2016 Bas Nieuwenhuizen
-#
-# Permission is hereby granted, free of charge, to any person obtaining a
-# copy of this software and associated documentation files (the "Software"),
-# to deal in the Software without restriction, including without limitation
-# the rights to use, copy, modify, merge, publish, distribute, sublicense,
-# and/or sell copies of the Software, and to permit persons to whom the
-# Software is furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice (including the next
-# paragraph) shall be included in all copies or substantial portions of the
-# Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
-# IN THE SOFTWARE.
-
-AMD_COMPILER_SOURCES := \
-	ac_binary.c \
-	ac_binary.h \
-	ac_llvm_helper.cpp \
-	ac_llvm_util.c \
-	ac_llvm_util.h \
-	ac_nir_to_llvm.c \
-	ac_nir_to_llvm.h
--- a/src/amd/common/ac_debug.c
+++ b/src/amd/common/ac_debug.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+
+#include "ac_debug.h"
+
+#include "sid.h"
+#include "sid_tables.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+/* Parsed IBs are difficult to read without colors. Use "less -R file" to
+ * read them, or use "aha -b -f file" to convert them to html.
+ */
+#define COLOR_RESET	"\033[0m"
+#define COLOR_RED	"\033[31m"
+#define COLOR_GREEN	"\033[1;32m"
+#define COLOR_YELLOW	"\033[1;33m"
+#define COLOR_CYAN	"\033[1;36m"
+
+#define INDENT_PKT 8
+
+static void print_spaces(FILE *f, unsigned num)
+{
+	fprintf(f, "%*s", num, "");
+}
+
+static void print_value(FILE *file, uint32_t value, int bits)
+{
+	/* Guess if it's int or float */
+	if (value <= (1 << 15)) {
+		if (value <= 9)
+			fprintf(file, "%u\n", value);
+		else
+			fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value);
+	} else {
+		float f = uif(value);
+
+		if (fabs(f) < 100000 && f*10 == floor(f*10))
+			fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value);
+		else
+			/* Don't print more leading zeros than there are bits. */
+			fprintf(file, "0x%0*x\n", bits / 4, value);
+	}
+}
+
+static void print_named_value(FILE *file, const char *name, uint32_t value,
+			      int bits)
+{
+	print_spaces(file, INDENT_PKT);
+	fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name);
+	print_value(file, value, bits);
+}
+
+void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
+		 uint32_t field_mask)
+{
+	int r, f;
+
+	for (r = 0; r < ARRAY_SIZE(sid_reg_table); r++) {
+		const struct si_reg *reg = &sid_reg_table[r];
+		const char *reg_name = sid_strings + reg->name_offset;
+
+		if (reg->offset == offset) {
+			bool first_field = true;
+
+			print_spaces(file, INDENT_PKT);
+			fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ",
+				reg_name);
+
+			if (!reg->num_fields) {
+				print_value(file, value, 32);
+				return;
+			}
+
+			for (f = 0; f < reg->num_fields; f++) {
+				const struct si_field *field = sid_fields_table + reg->fields_offset + f;
+				const int *values_offsets = sid_strings_offsets + field->values_offset;
+				uint32_t val = (value & field->mask) >>
+					       (ffs(field->mask) - 1);
+
+				if (!(field->mask & field_mask))
+					continue;
+
+				/* Indent the field. */
+				if (!first_field)
+					print_spaces(file,
+						     INDENT_PKT + strlen(reg_name) + 4);
+
+				/* Print the field. */
+				fprintf(file, "%s = ", sid_strings + field->name_offset);
+
+				if (val < field->num_values && values_offsets[val] >= 0)
+					fprintf(file, "%s\n", sid_strings + values_offsets[val]);
+				else
+					print_value(file, val,
+						    util_bitcount(field->mask));
+
+				first_field = false;
+			}
+			return;
+		}
+	}
+
+	print_spaces(file, INDENT_PKT);
+	fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value);
+}
+
+static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
+				    unsigned reg_offset)
+{
+	unsigned reg = (ib[1] << 2) + reg_offset;
+	int i;
+
+	for (i = 0; i < count; i++)
+		ac_dump_reg(f, reg + i*4, ib[2+i], ~0);
+}
+
+static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
+				  int trace_id, enum chip_class chip_class,
+				  ac_debug_addr_callback addr_callback,
+				  void *addr_callback_data)
+{
+	unsigned count = PKT_COUNT_G(ib[0]);
+	unsigned op = PKT3_IT_OPCODE_G(ib[0]);
+	const char *predicate = PKT3_PREDICATE(ib[0]) ? "(predicate)" : "";
+	int i;
+
+	/* Print the name first. */
+	for (i = 0; i < ARRAY_SIZE(packet3_table); i++)
+		if (packet3_table[i].op == op)
+			break;
+
+	if (i < ARRAY_SIZE(packet3_table)) {
+		const char *name = sid_strings + packet3_table[i].name_offset;
+
+		if (op == PKT3_SET_CONTEXT_REG ||
+		    op == PKT3_SET_CONFIG_REG ||
+		    op == PKT3_SET_UCONFIG_REG ||
+		    op == PKT3_SET_SH_REG)
+			fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n",
+				name, predicate);
+		else
+			fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n",
+				name, predicate);
+	} else
+		fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n",
+			op, predicate);
+
+	/* Print the contents. */
+	switch (op) {
+	case PKT3_SET_CONTEXT_REG:
+		ac_parse_set_reg_packet(f, ib, count, SI_CONTEXT_REG_OFFSET);
+		break;
+	case PKT3_SET_CONFIG_REG:
+		ac_parse_set_reg_packet(f, ib, count, SI_CONFIG_REG_OFFSET);
+		break;
+	case PKT3_SET_UCONFIG_REG:
+		ac_parse_set_reg_packet(f, ib, count, CIK_UCONFIG_REG_OFFSET);
+		break;
+	case PKT3_SET_SH_REG:
+		ac_parse_set_reg_packet(f, ib, count, SI_SH_REG_OFFSET);
+		break;
+	case PKT3_ACQUIRE_MEM:
+		ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0);
+		ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0);
+		ac_dump_reg(f, R_030230_CP_COHER_SIZE_HI, ib[3], ~0);
+		ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[4], ~0);
+		ac_dump_reg(f, R_0301E4_CP_COHER_BASE_HI, ib[5], ~0);
+		print_named_value(f, "POLL_INTERVAL", ib[6], 16);
+		break;
+	case PKT3_SURFACE_SYNC:
+		if (chip_class >= CIK) {
+			ac_dump_reg(f, R_0301F0_CP_COHER_CNTL, ib[1], ~0);
+			ac_dump_reg(f, R_0301F4_CP_COHER_SIZE, ib[2], ~0);
+			ac_dump_reg(f, R_0301F8_CP_COHER_BASE, ib[3], ~0);
+		} else {
+			ac_dump_reg(f, R_0085F0_CP_COHER_CNTL, ib[1], ~0);
+			ac_dump_reg(f, R_0085F4_CP_COHER_SIZE, ib[2], ~0);
+			ac_dump_reg(f, R_0085F8_CP_COHER_BASE, ib[3], ~0);
+		}
+		print_named_value(f, "POLL_INTERVAL", ib[4], 16);
+		break;
+	case PKT3_EVENT_WRITE:
+		ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
+			    S_028A90_EVENT_TYPE(~0));
+		print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
+		print_named_value(f, "INV_L2", (ib[1] >> 20) & 0x1, 1);
+		if (count > 0) {
+			print_named_value(f, "ADDRESS_LO", ib[2], 32);
+			print_named_value(f, "ADDRESS_HI", ib[3], 16);
+		}
+		break;
+	case PKT3_DRAW_INDEX_AUTO:
+		ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[1], ~0);
+		ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[2], ~0);
+		break;
+	case PKT3_DRAW_INDEX_2:
+		ac_dump_reg(f, R_028A78_VGT_DMA_MAX_SIZE, ib[1], ~0);
+		ac_dump_reg(f, R_0287E8_VGT_DMA_BASE, ib[2], ~0);
+		ac_dump_reg(f, R_0287E4_VGT_DMA_BASE_HI, ib[3], ~0);
+		ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[4], ~0);
+		ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[5], ~0);
+		break;
+	case PKT3_INDEX_TYPE:
+		ac_dump_reg(f, R_028A7C_VGT_DMA_INDEX_TYPE, ib[1], ~0);
+		break;
+	case PKT3_NUM_INSTANCES:
+		ac_dump_reg(f, R_030934_VGT_NUM_INSTANCES, ib[1], ~0);
+		break;
+	case PKT3_WRITE_DATA:
+		ac_dump_reg(f, R_370_CONTROL, ib[1], ~0);
+		ac_dump_reg(f, R_371_DST_ADDR_LO, ib[2], ~0);
+		ac_dump_reg(f, R_372_DST_ADDR_HI, ib[3], ~0);
+		for (i = 2; i < count; i++) {
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, "0x%08x\n", ib[2+i]);
+		}
+		break;
+	case PKT3_CP_DMA:
+		ac_dump_reg(f, R_410_CP_DMA_WORD0, ib[1], ~0);
+		ac_dump_reg(f, R_411_CP_DMA_WORD1, ib[2], ~0);
+		ac_dump_reg(f, R_412_CP_DMA_WORD2, ib[3], ~0);
+		ac_dump_reg(f, R_413_CP_DMA_WORD3, ib[4], ~0);
+		ac_dump_reg(f, R_414_COMMAND, ib[5], ~0);
+		break;
+	case PKT3_DMA_DATA:
+		ac_dump_reg(f, R_500_DMA_DATA_WORD0, ib[1], ~0);
+		ac_dump_reg(f, R_501_SRC_ADDR_LO, ib[2], ~0);
+		ac_dump_reg(f, R_502_SRC_ADDR_HI, ib[3], ~0);
+		ac_dump_reg(f, R_503_DST_ADDR_LO, ib[4], ~0);
+		ac_dump_reg(f, R_504_DST_ADDR_HI, ib[5], ~0);
+		ac_dump_reg(f, R_414_COMMAND, ib[6], ~0);
+		break;
+	case PKT3_INDIRECT_BUFFER_SI:
+	case PKT3_INDIRECT_BUFFER_CONST:
+	case PKT3_INDIRECT_BUFFER_CIK:
+		ac_dump_reg(f, R_3F0_IB_BASE_LO, ib[1], ~0);
+		ac_dump_reg(f, R_3F1_IB_BASE_HI, ib[2], ~0);
+		ac_dump_reg(f, R_3F2_CONTROL, ib[3], ~0);
+
+		if (addr_callback) {
+			uint64_t addr = ((uint64_t)ib[2] << 32) | ib[1];
+			void *data = addr_callback(addr_callback_data, addr);
+			const char *name = G_3F2_CHAIN(ib[3]) ? "chained" : "nested";
+
+			if (data)
+				ac_parse_ib(f, data,  G_3F2_IB_SIZE(ib[3]),
+					    trace_id, name, chip_class,
+					    addr_callback, addr_callback_data);
+		}
+		break;
+	case PKT3_CLEAR_STATE:
+	case PKT3_INCREMENT_DE_COUNTER:
+	case PKT3_PFP_SYNC_ME:
+		break;
+	case PKT3_NOP:
+		if (ib[0] == 0xffff1000) {
+			count = -1; /* One dword NOP. */
+			break;
+		} else if (count == 0 && AC_IS_TRACE_POINT(ib[1])) {
+			unsigned packet_id = AC_GET_TRACE_POINT_ID(ib[1]);
+
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
+
+			if (trace_id == -1)
+				break; /* tracing was disabled */
+
+			print_spaces(f, INDENT_PKT);
+			if (packet_id < trace_id)
+				fprintf(f, COLOR_RED
+					"This trace point was reached by the CP."
+					COLOR_RESET "\n");
+			else if (packet_id == trace_id)
+				fprintf(f, COLOR_RED
+					"!!!!! This is the last trace point that "
+					"was reached by the CP !!!!!"
+					COLOR_RESET "\n");
+			else if (packet_id+1 == trace_id)
+				fprintf(f, COLOR_RED
+					"!!!!! This is the first trace point that "
+					"was NOT been reached by the CP !!!!!"
+					COLOR_RESET "\n");
+			else
+				fprintf(f, COLOR_RED
+					"!!!!! This trace point was NOT reached "
+					"by the CP !!!!!"
+					COLOR_RESET "\n");
+			break;
+		}
+		/* fall through, print all dwords */
+	default:
+		for (i = 0; i < count+1; i++) {
+			print_spaces(f, INDENT_PKT);
+			fprintf(f, "0x%08x\n", ib[1+i]);
+		}
+	}
+
+	ib += count + 2;
+	*num_dw -= count + 2;
+	return ib;
+}
+
+/**
+ * Parse and print an IB into a file.
+ *
+ * \param f		file
+ * \param ib		IB
+ * \param num_dw	size of the IB
+ * \param chip_class	chip class
+ * \param trace_id	the last trace ID that is known to have been reached
+ *			and executed by the CP, typically read from a buffer
+ * \param addr_callback Get a mapped pointer of the IB at a given address. Can
+ *                      be NULL.
+ * \param addr_callback_data user data for addr_callback
+ */
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
+		 const char *name, enum chip_class chip_class,
+		 ac_debug_addr_callback addr_callback, void *addr_callback_data)
+{
+	fprintf(f, "------------------ %s begin ------------------\n", name);
+
+	while (num_dw > 0) {
+		unsigned type = PKT_TYPE_G(ib[0]);
+
+		switch (type) {
+		case 3:
+			ib = ac_parse_packet3(f, ib, &num_dw, trace_id,
+					      chip_class, addr_callback,
+					      addr_callback_data);
+			break;
+		case 2:
+			/* type-2 nop */
+			if (ib[0] == 0x80000000) {
+				fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n");
+				ib++;
+				num_dw--;
+				break;
+			}
+			/* fall through */
+		default:
+			fprintf(f, "Unknown packet type %i\n", type);
+			return;
+		}
+	}
+
+	fprintf(f, "------------------- %s end -------------------\n", name);
+	if (num_dw < 0) {
+		printf("Packet ends after the end of IB.\n");
+		exit(0);
+	}
+	fprintf(f, "\n");
+}
--- a/src/amd/common/ac_debug.h
+++ b/src/amd/common/ac_debug.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Marek Olšák <maraeo@gmail.com>
+ */
+#ifndef AC_DEBUG_H
+#define AC_DEBUG_H
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "amd_family.h"
+
+#define AC_ENCODE_TRACE_POINT(id)       (0xcafe0000 | ((id) & 0xffff))
+#define AC_IS_TRACE_POINT(x)            (((x) & 0xcafe0000) == 0xcafe0000)
+#define AC_GET_TRACE_POINT_ID(x)        ((x) & 0xffff)
+
+typedef void *(*ac_debug_addr_callback)(void *data, uint64_t addr);
+
+void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
+		 uint32_t field_mask);
+void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, int trace_id,
+		 const char *name, enum chip_class chip_class,
+		 ac_debug_addr_callback addr_callback, void *addr_callback_data);
+
+#endif
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -31,16 +31,25 @@
 #  undef DEBUG
 #endif

-#include "ac_nir_to_llvm.h"
+#include "ac_llvm_util.h"
 #include <llvm-c/Core.h>
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
+#include <llvm/IR/Attributes.h>

-extern "C" void
-ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
+void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
   llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
   llvm::AttrBuilder B;
   B.addDereferenceableAttr(bytes);
   A->addAttr(llvm::AttributeSet::get(A->getContext(), A->getArgNo() + 1,  B));
 }
+
+bool ac_is_sgpr_param(LLVMValueRef arg)
+{
+	llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
+	llvm::AttributeSet AS = A->getParent()->getAttributes();
+	unsigned ArgNo = A->getArgNo();
+	return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
+	       AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
+}
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -32,6 +32,9 @@
 #include <assert.h>
 #include <stdio.h>

+#include "util/bitscan.h"
+#include "util/macros.h"
+
 static void ac_init_llvm_target()
 {
 #if HAVE_LLVM < 0x0307
@@ -140,3 +143,364 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family)

 	return tm;
 }
+
+/* Initialize module-independent parts of the context.
+ *
+ * The caller is responsible for initializing ctx::module and ctx::builder.
+ */
+void
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
+{
+	LLVMValueRef args[1];
+
+	ctx->context = context;
+	ctx->module = NULL;
+	ctx->builder = NULL;
+
+	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
+	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+
+	ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
+
+	args[0] = LLVMConstReal(ctx->f32, 2.5);
+	ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
+}
+
+#if HAVE_LLVM < 0x0400
+static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
+{
+   switch (attr) {
+   case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
+   case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
+   case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
+   case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
+   case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
+   case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
+   case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
+   default:
+	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
+	   return 0;
+   }
+}
+
+#else
+
+static const char *attr_to_str(enum ac_func_attr attr)
+{
+   switch (attr) {
+   case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
+   case AC_FUNC_ATTR_BYVAL: return "byval";
+   case AC_FUNC_ATTR_INREG: return "inreg";
+   case AC_FUNC_ATTR_NOALIAS: return "noalias";
+   case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
+   case AC_FUNC_ATTR_READNONE: return "readnone";
+   case AC_FUNC_ATTR_READONLY: return "readonly";
+   default:
+	   fprintf(stderr, "Unhandled function attribute: %x\n", attr);
+	   return 0;
+   }
+}
+
+#endif
+
+void
+ac_add_function_attr(LLVMValueRef function,
+                     int attr_idx,
+                     enum ac_func_attr attr)
+{
+
+#if HAVE_LLVM < 0x0400
+   LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
+   if (attr_idx == -1) {
+      LLVMAddFunctionAttr(function, llvm_attr);
+   } else {
+      LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
+   }
+#else
+   LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
+   const char *attr_name = attr_to_str(attr);
+   unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
+                                                      strlen(attr_name));
+   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
+   LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
+#endif
+}
+
+LLVMValueRef
+ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
+		       LLVMTypeRef return_type, LLVMValueRef *params,
+		       unsigned param_count, unsigned attrib_mask)
+{
+	LLVMValueRef function;
+
+	function = LLVMGetNamedFunction(ctx->module, name);
+	if (!function) {
+		LLVMTypeRef param_types[32], function_type;
+		unsigned i;
+
+		assert(param_count <= 32);
+
+		for (i = 0; i < param_count; ++i) {
+			assert(params[i]);
+			param_types[i] = LLVMTypeOf(params[i]);
+		}
+		function_type =
+		    LLVMFunctionType(return_type, param_types, param_count, 0);
+		function = LLVMAddFunction(ctx->module, name, function_type);
+
+		LLVMSetFunctionCallConv(function, LLVMCCallConv);
+		LLVMSetLinkage(function, LLVMExternalLinkage);
+
+		attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
+		while (attrib_mask) {
+			enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
+			ac_add_function_attr(function, -1, attr);
+		}
+	}
+	return LLVMBuildCall(ctx->builder, function, params, param_count, "");
+}
+
+LLVMValueRef
+ac_build_gather_values_extended(struct ac_llvm_context *ctx,
+				LLVMValueRef *values,
+				unsigned value_count,
+				unsigned value_stride,
+				bool load)
+{
+	LLVMBuilderRef builder = ctx->builder;
+	LLVMValueRef vec;
+	unsigned i;
+
+
+	if (value_count == 1) {
+		if (load)
+			return LLVMBuildLoad(builder, values[0], "");
+		return values[0];
+	} else if (!value_count)
+		unreachable("value_count is 0");
+
+	for (i = 0; i < value_count; i++) {
+		LLVMValueRef value = values[i * value_stride];
+		if (load)
+			value = LLVMBuildLoad(builder, value, "");
+
+		if (!i)
+			vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
+		LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
+		vec = LLVMBuildInsertElement(builder, vec, value, index, "");
+	}
+	return vec;
+}
+
+LLVMValueRef
+ac_build_gather_values(struct ac_llvm_context *ctx,
+		       LLVMValueRef *values,
+		       unsigned value_count)
+{
+	return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
+}
+
+LLVMValueRef
+ac_emit_fdiv(struct ac_llvm_context *ctx,
+	     LLVMValueRef num,
+	     LLVMValueRef den)
+{
+	LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
+
+	if (!LLVMIsConstant(ret))
+		LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
+	return ret;
+}
+
+/* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
+ * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
+ * already multiplied by two. id is the cube face number.
+ */
+struct cube_selection_coords {
+	LLVMValueRef stc[2];
+	LLVMValueRef ma;
+	LLVMValueRef id;
+};
+
+static void
+build_cube_intrinsic(struct ac_llvm_context *ctx,
+		     LLVMValueRef in[3],
+		     struct cube_selection_coords *out)
+{
+	LLVMBuilderRef builder = ctx->builder;
+
+	if (HAVE_LLVM >= 0x0309) {
+		LLVMTypeRef f32 = ctx->f32;
+
+		out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+		out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+		out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+		out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
+					f32, in, 3, AC_FUNC_ATTR_READNONE);
+	} else {
+		LLVMValueRef c[4] = {
+			in[0],
+			in[1],
+			in[2],
+			LLVMGetUndef(LLVMTypeOf(in[0]))
+		};
+		LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
+
+		LLVMValueRef tmp =
+			ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube",
+					  LLVMTypeOf(vec), &vec, 1,
+					  AC_FUNC_ATTR_READNONE);
+
+		out->stc[1] = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 0, 0), "");
+		out->stc[0] = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 1, 0), "");
+		out->ma = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 2, 0), "");
+		out->id = LLVMBuildExtractElement(builder, tmp,
+				LLVMConstInt(ctx->i32, 3, 0), "");
+	}
+}
+
+/**
+ * Build a manual selection sequence for cube face sc/tc coordinates and
+ * major axis vector (multiplied by 2 for consistency) for the given
+ * vec3 \p coords, for the face implied by \p selcoords.
+ *
+ * For the major axis, we always adjust the sign to be in the direction of
+ * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
+ * the selcoords major axis.
+ */
+static void build_cube_select(LLVMBuilderRef builder,
+			      const struct cube_selection_coords *selcoords,
+			      const LLVMValueRef *coords,
+			      LLVMValueRef *out_st,
+			      LLVMValueRef *out_ma)
+{
+	LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
+	LLVMValueRef is_ma_positive;
+	LLVMValueRef sgn_ma;
+	LLVMValueRef is_ma_z, is_not_ma_z;
+	LLVMValueRef is_ma_y;
+	LLVMValueRef is_ma_x;
+	LLVMValueRef sgn;
+	LLVMValueRef tmp;
+
+	is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
+		selcoords->ma, LLVMConstReal(f32, 0.0), "");
+	sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
+		LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
+
+	is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
+	is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
+	is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
+		LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
+	is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
+
+	/* Select sc */
+	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
+	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
+		LLVMBuildSelect(builder, is_ma_x, sgn_ma,
+			LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
+	out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+	/* Select tc */
+	tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
+	sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
+		LLVMConstReal(f32, -1.0), "");
+	out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
+
+	/* Select ma */
+	tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
+		LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
+	sgn = LLVMBuildSelect(builder, is_ma_positive,
+		LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
+	*out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
+}
+
+void
+ac_prepare_cube_coords(struct ac_llvm_context *ctx,
+		       bool is_deriv, bool is_array,
+		       LLVMValueRef *coords_arg,
+		       LLVMValueRef *derivs_arg)
+{
+
+	LLVMBuilderRef builder = ctx->builder;
+	struct cube_selection_coords selcoords;
+	LLVMValueRef coords[3];
+	LLVMValueRef invma;
+
+	build_cube_intrinsic(ctx, coords_arg, &selcoords);
+
+	invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32",
+			ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
+	invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
+
+	for (int i = 0; i < 2; ++i)
+		coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
+
+	coords[2] = selcoords.id;
+
+	if (is_deriv && derivs_arg) {
+		LLVMValueRef derivs[4];
+		int axis;
+
+		/* Convert cube derivatives to 2D derivatives. */
+		for (axis = 0; axis < 2; axis++) {
+			LLVMValueRef deriv_st[2];
+			LLVMValueRef deriv_ma;
+
+			/* Transform the derivative alongside the texture
+			 * coordinate. Mathematically, the correct formula is
+			 * as follows. Assume we're projecting onto the +Z face
+			 * and denote by dx/dh the derivative of the (original)
+			 * X texture coordinate with respect to horizontal
+			 * window coordinates. The projection onto the +Z face
+			 * plane is:
+			 *
+			 *   f(x,z) = x/z
+			 *
+			 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
+			 *            = 1/z * dx/dh - x/z * 1/z * dz/dh.
+			 *
+			 * This motivatives the implementation below.
+			 *
+			 * Whether this actually gives the expected results for
+			 * apps that might feed in derivatives obtained via
+			 * finite differences is anyone's guess. The OpenGL spec
+			 * seems awfully quiet about how textureGrad for cube
+			 * maps should be handled.
+			 */
+			build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
+					  deriv_st, &deriv_ma);
+
+			deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
+
+			for (int i = 0; i < 2; ++i)
+				derivs[axis * 2 + i] =
+					LLVMBuildFSub(builder,
+						LLVMBuildFMul(builder, deriv_st[i], invma, ""),
+						LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
+		}
+
+		memcpy(derivs_arg, derivs, sizeof(derivs));
+	}
+
+	/* Shift the texture coordinate. This must be applied after the
+	 * derivative calculation.
+	 */
+	for (int i = 0; i < 2; ++i)
+		coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
+
+	if (is_array) {
+		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
+		/* coords_arg.w component - array_index for cube arrays */
+		LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
+		coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
+	}
+
+	memcpy(coords_arg, coords, sizeof(coords));
+}
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -24,8 +24,77 @@
 */
 #pragma once

+#include <stdbool.h>
 #include <llvm-c/TargetMachine.h>

 #include "amd_family.h"

+#ifdef __cplusplus
+extern "C" {
+#endif
+
+enum ac_func_attr {
+	AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
+	AC_FUNC_ATTR_BYVAL        = (1 << 1),
+	AC_FUNC_ATTR_INREG        = (1 << 2),
+	AC_FUNC_ATTR_NOALIAS      = (1 << 3),
+	AC_FUNC_ATTR_NOUNWIND     = (1 << 4),
+	AC_FUNC_ATTR_READNONE     = (1 << 5),
+	AC_FUNC_ATTR_READONLY     = (1 << 6),
+	AC_FUNC_ATTR_LAST         = (1 << 7)
+};
+
+struct ac_llvm_context {
+	LLVMContextRef context;
+	LLVMModuleRef module;
+	LLVMBuilderRef builder;
+
+	LLVMTypeRef i32;
+	LLVMTypeRef f32;
+
+	unsigned fpmath_md_kind;
+	LLVMValueRef fpmath_md_2p5_ulp;
+};
+
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family);
+
+void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
+bool ac_is_sgpr_param(LLVMValueRef param);
+
+void
+ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context);
+
+void
+ac_add_function_attr(LLVMValueRef function,
+                     int attr_idx,
+                     enum ac_func_attr attr);
+LLVMValueRef
+ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
+		       LLVMTypeRef return_type, LLVMValueRef *params,
+		       unsigned param_count, unsigned attrib_mask);
+
+LLVMValueRef
+ac_build_gather_values_extended(struct ac_llvm_context *ctx,
+				LLVMValueRef *values,
+				unsigned value_count,
+				unsigned value_stride,
+				bool load);
+LLVMValueRef
+ac_build_gather_values(struct ac_llvm_context *ctx,
+		       LLVMValueRef *values,
+		       unsigned value_count);
+
+LLVMValueRef
+ac_emit_fdiv(struct ac_llvm_context *ctx,
+	     LLVMValueRef num,
+	     LLVMValueRef den);
+
+void
+ac_prepare_cube_coords(struct ac_llvm_context *ctx,
+		       bool is_deriv, bool is_array,
+		       LLVMValueRef *coords_arg,
+		       LLVMValueRef *derivs_arg);
+
+#ifdef __cplusplus
+}
+#endif
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -56,7 +56,35 @@ struct ac_nir_compiler_options {
 	enum chip_class chip_class;
 };

+struct ac_userdata_info {
+	int8_t sgpr_idx;
+	uint8_t num_sgprs;
+	bool indirect;
+	uint32_t indirect_offset;
+};
+
+enum ac_ud_index {
+	AC_UD_PUSH_CONSTANTS = 0,
+	AC_UD_SHADER_START = 1,
+	AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
+	AC_UD_VS_BASE_VERTEX_START_INSTANCE,
+	AC_UD_VS_MAX_UD,
+	AC_UD_PS_SAMPLE_POS = AC_UD_SHADER_START,
+	AC_UD_PS_MAX_UD,
+	AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
+	AC_UD_CS_MAX_UD,
+	AC_UD_MAX_UD = AC_UD_VS_MAX_UD,
+};
+
+#define AC_UD_MAX_SETS 4
+
+struct ac_userdata_locations {
+	struct ac_userdata_info descriptor_sets[AC_UD_MAX_SETS];
+	struct ac_userdata_info shader_data[AC_UD_MAX_UD];
+};
+
 struct ac_shader_variant_info {
+	struct ac_userdata_locations user_sgprs_locs;
 	unsigned num_user_sgprs;
 	unsigned num_input_sgprs;
 	unsigned num_input_vgprs;
@@ -67,6 +95,8 @@ struct ac_shader_variant_info {
 			unsigned vgpr_comp_cnt;
 			uint32_t export_mask;
 			bool writes_pointsize;
+			bool writes_layer;
+			bool writes_viewport_index;
 			uint8_t clip_dist_mask;
 			uint8_t cull_dist_mask;
 		} vs;
@@ -81,6 +111,7 @@ struct ac_shader_variant_info {
 			bool writes_stencil;
 			bool early_fragment_test;
 			bool writes_memory;
+			bool force_persample;
 		} fs;
 		struct {
 			unsigned block_size[3];
@@ -96,24 +127,4 @@ void ac_compile_nir_shader(LLVMTargetMachineRef tm,
                           const struct ac_nir_compiler_options *options,
 			   bool dump_shader);

-/* SHADER ABI defines */

-/* offset in dwords */
-#define AC_USERDATA_DESCRIPTOR_SET_0 0
-#define AC_USERDATA_DESCRIPTOR_SET_1 2
-#define AC_USERDATA_DESCRIPTOR_SET_2 4
-#define AC_USERDATA_DESCRIPTOR_SET_3 6
-#define AC_USERDATA_PUSH_CONST_DYN 8
-
-#define AC_USERDATA_VS_VERTEX_BUFFERS 10
-#define AC_USERDATA_VS_BASE_VERTEX 12
-#define AC_USERDATA_VS_START_INSTANCE 13
-
-#define AC_USERDATA_PS_SAMPLE_POS 10
-
-#define AC_USERDATA_CS_GRID_SIZE 10
-
-#ifdef __cplusplus
-extern "C"
-#endif
-void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes);
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -91,6 +91,7 @@ enum radeon_family {
    CHIP_STONEY,
    CHIP_POLARIS10,
    CHIP_POLARIS11,
+    CHIP_POLARIS12,
    CHIP_LAST,
 };

--- a/src/amd/common/amdgpu_id.h
+++ b/src/amd/common/amdgpu_id.h
@@ -142,6 +142,8 @@ enum {

 	VI_POLARIS11_M_A0 = 90,

+	VI_POLARIS12_V_A0 = 100,
+
 	VI_UNKNOWN        = 0xFF
 };

@@ -156,6 +158,8 @@ enum {
 	((eChipRev >= VI_POLARIS10_P_A0) && (eChipRev < VI_POLARIS11_M_A0))
 #define ASICREV_IS_POLARIS11_M(eChipRev)   \
 	(eChipRev >= VI_POLARIS11_M_A0)
+#define ASICREV_IS_POLARIS12_V(eChipRev)\
+	(eChipRev >= VI_POLARIS12_V_A0)

 /* CZ specific rev IDs */
 enum {
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -133,7 +133,9 @@
 #define   R_3F1_IB_BASE_HI                     0x3F1
 #define   R_3F2_CONTROL                        0x3F2
 #define     S_3F2_IB_SIZE(x)                   (((unsigned)(x) & 0xfffff) << 0)
+#define     G_3F2_IB_SIZE(x)                   (((unsigned)(x) >> 0) & 0xfffff)
 #define     S_3F2_CHAIN(x)                     (((unsigned)(x) & 0x1) << 20)
+#define     G_3F2_CHAIN(x)                     (((unsigned)(x) >> 20) & 0x1)
 #define     S_3F2_VALID(x)                     (((unsigned)(x) & 0x1) << 23)

 #define PKT3_COPY_DATA			       0x40
@@ -151,7 +153,12 @@
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47
-#define PKT3_EVENT_WRITE_EOS                   0x48
+/* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
+ * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
+ * DST_SEL=MC. Only CIK chips are affected.
+ */
+/*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* fix CP DMA before uncommenting */
+#define PKT3_RELEASE_MEM                       0x49
 #define PKT3_ONE_REG_WRITE                     0x57 /* not on CIK */
 #define PKT3_ACQUIRE_MEM                       0x58 /* new for CIK */
 #define PKT3_SET_CONFIG_REG                    0x68
@@ -7761,7 +7768,7 @@
 #define     V_028A90_FLUSH_HS_OUTPUT                                0x11
 #define     V_028A90_FLUSH_LS_OUTPUT                                0x12
 #define     V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT                   0x14
-#define     V_028A90_ZPASS_DONE                                     0x15 /* not on CIK */
+#define     V_028A90_ZPASS_DONE                                     0x15
 #define     V_028A90_CACHE_FLUSH_AND_INV_EVENT                      0x16
 #define     V_028A90_PERFCOUNTER_START                              0x17
 #define     V_028A90_PERFCOUNTER_STOP                               0x18
@@ -7795,7 +7802,7 @@
 /* CIK */
 #define     V_028A90_PIXEL_PIPE_STAT_CONTROL                        0x38
 #define     V_028A90_PIXEL_PIPE_STAT_DUMP                           0x39
-#define     V_028A90_PIXEL_PIPE_STAT_RESET                          0x40
+#define     V_028A90_PIXEL_PIPE_STAT_RESET                          0x3A
 /*     */
 #define   S_028A90_ADDRESS_HI(x)                                      (((unsigned)(x) & 0x1FF) << 18)
 #define   G_028A90_ADDRESS_HI(x)                                      (((x) >> 18) & 0x1FF)
@@ -9016,8 +9023,10 @@
 /* SI async DMA Packet types */
 #define    SI_DMA_PACKET_WRITE                     0x2
 #define    SI_DMA_PACKET_COPY                      0x3
-#define    SI_DMA_COPY_MAX_SIZE                    0xfffe0
-#define    SI_DMA_COPY_MAX_SIZE_DW                 0xffff8
+#define    SI_DMA_COPY_MAX_BYTE_ALIGNED_SIZE       0xfffe0
+/* The documentation says 0xffff8 is the maximum size in dwords, which is
+ * 0x3fffe0 in bytes. */
+#define    SI_DMA_COPY_MAX_DWORD_ALIGNED_SIZE      0x3fffe0
 #define    SI_DMA_COPY_DWORD_ALIGNED               0x00
 #define    SI_DMA_COPY_BYTE_ALIGNED                0x40
 #define    SI_DMA_COPY_TILED                       0x8
--- a/src/gallium/drivers/radeonsi/sid_tables.py
+++ b/src/gallium/drivers/radeonsi/sid_tables.py
--- a/src/amd/vulkan/.gitignore
+++ b/src/amd/vulkan/.gitignore
@@ -4,3 +4,4 @@
 /radv_timestamp.h
 /dev_icd.json
 /vk_format_table.c
+/radeon_icd.*.json
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -111,31 +111,27 @@ VULKAN_LIB_DEPS += \
 nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
 libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)

-radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
-	$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
+
+radv_entrypoints.h : radv_entrypoints_gen.py $(vulkan_api_xml)
+	$(AM_V_GEN) cat $(vulkan_api_xml) |\
 	$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py header > $@

-radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_include_HEADERS)
-	$(AM_V_GEN) cat $(vulkan_include_HEADERS) |\
+radv_entrypoints.c : radv_entrypoints_gen.py $(vulkan_api_xml)
+	$(AM_V_GEN) cat $(vulkan_api_xml) |\
 	$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py code > $@

-.PHONY: radv_timestamp.h
-
-radv_timestamp.h:
-	@echo "Updating radv_timestamp.h"
-	$(AM_V_GEN) echo "#define RADV_TIMESTAMP \"$(TIMESTAMP_CMD)\"" > $@
-
 vk_format_table.c: vk_format_table.py \
 		   vk_format_parse.py \
                   vk_format_layout.csv
 	$(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@

 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
-CLEANFILES = $(BUILT_SOURCES) dev_icd.json radv_timestamp.h
+CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json
 EXTRA_DIST = \
 	$(top_srcdir)/include/vulkan/vk_icd.h \
 	dev_icd.json.in \
-	radeon_icd.json \
+	radeon_icd.json.in \
 	radv_entrypoints_gen.py \
 	vk_format_layout.csv \
 	vk_format_parse.py \
@@ -155,7 +151,7 @@ libvulkan_radeon_la_LDFLAGS = \


 icdconfdir = @VULKAN_ICD_INSTALL_DIR@
-icdconf_DATA = radeon_icd.json
+icdconf_DATA = radeon_icd.@host_cpu@.json
 # The following is used for development purposes, by setting VK_ICD_FILENAMES.
 noinst_DATA = dev_icd.json

@@ -164,4 +160,9 @@ dev_icd.json : dev_icd.json.in
 		-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
 		< $(srcdir)/dev_icd.json.in > $@

+radeon_icd.@host_cpu@.json : radeon_icd.json.in
+	$(AM_V_GEN) $(SED) \
+		-e "s#@install_libdir@#${libdir}#" \
+		< $(srcdir)/radeon_icd.json.in > $@
+
 include $(top_srcdir)/install-lib-links.mk
--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -72,6 +72,5 @@ VULKAN_WSI_X11_FILES := \

 VULKAN_GENERATED_FILES := \
 	radv_entrypoints.c \
-	radv_entrypoints.h \
-	radv_timestamp.h
+	radv_entrypoints.h

--- a/src/amd/vulkan/dev_icd.json.in
+++ b/src/amd/vulkan/dev_icd.json.in
@@ -2,6 +2,6 @@
    "file_format_version": "1.0.0",
    "ICD": {
        "library_path": "@build_libdir@/libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
+        "api_version": "1.0.3"
    }
 }
--- a/src/amd/vulkan/radeon_icd.json
+++ b/src/amd/vulkan/radeon_icd.json
@@ -1,7 +0,0 @@
-{
-    "file_format_version": "1.0.0",
-    "ICD": {
-        "library_path": "libvulkan_radeon.so",
-        "abi_versions": "1.0.3"
-    }
-}
--- a/src/amd/vulkan/radeon_icd.json.in
+++ b/src/amd/vulkan/radeon_icd.json.in
@@ -0,0 +1,7 @@
+{
+    "file_format_version": "1.0.0",
+    "ICD": {
+        "library_path": "@install_libdir@/libvulkan_radeon.so",
+        "api_version": "1.0.3"
+    }
+}
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
--- a/src/amd/vulkan/radv_entrypoints_gen.py
+++ b/src/amd/vulkan/radv_entrypoints_gen.py
@@ -22,14 +22,8 @@
 # IN THE SOFTWARE.
 #

-import fileinput, re, sys
-
-# Each function typedef in the vulkan.h header is all on one line and matches
-# this regepx. We hope that won't change.
-
-p = re.compile('typedef ([^ ]*) *\((?:VKAPI_PTR)? *\*PFN_vk([^(]*)\)(.*);')
-
-entrypoints = []
+import sys
+import xml.etree.ElementTree as ET

 # We generate a static hash table for entry point lookup
 # (vkGetProcAddress). We use a linear congruential generator for our hash
@@ -51,29 +45,11 @@ def hash(name):

    return h

-def get_platform_guard_macro(name):
-    if "Xlib" in name:
-        return "VK_USE_PLATFORM_XLIB_KHR"
-    elif "Xcb" in name:
-        return "VK_USE_PLATFORM_XCB_KHR"
-    elif "Wayland" in name:
-        return "VK_USE_PLATFORM_WAYLAND_KHR"
-    elif "Mir" in name:
-        return "VK_USE_PLATFORM_MIR_KHR"
-    elif "Android" in name:
-        return "VK_USE_PLATFORM_ANDROID_KHR"
-    elif "Win32" in name:
-        return "VK_USE_PLATFORM_WIN32_KHR"
-    else:
-        return None
-
-def print_guard_start(name):
-    guard = get_platform_guard_macro(name)
+def print_guard_start(guard):
    if guard is not None:
        print "#ifdef {0}".format(guard)

-def print_guard_end(name):
-    guard = get_platform_guard_macro(name)
+def print_guard_end(guard):
    if guard is not None:
        print "#endif // {0}".format(guard)

@@ -87,18 +63,37 @@ elif (sys.argv[1] == "code"):
    opt_code = True
    sys.argv.pop()

-# Parse the entry points in the header
+# Extract the entry points from the registry
+def get_entrypoints(doc, entrypoints_to_defines):
+    entrypoints = []
+    commands = doc.findall('./commands/command')
+    for i, command in enumerate(commands):
+        type = command.find('./proto/type').text
+        fullname = command.find('./proto/name').text
+        shortname = fullname[2:]
+        params = map(lambda p: "".join(p.itertext()), command.findall('./param'))
+        params = ', '.join(params)
+        if fullname in entrypoints_to_defines:
+            guard = entrypoints_to_defines[fullname]
+        else:
+            guard = None
+        entrypoints.append((type, shortname, params, i, hash(fullname), guard))
+    return entrypoints

-i = 0
-for line in fileinput.input():
-    m  = p.match(line)
-    if (m):
-        if m.group(2) == 'VoidFunction':
-            continue
-        fullname = "vk" + m.group(2)
-        h = hash(fullname)
-        entrypoints.append((m.group(1), m.group(2), m.group(3), i, h))
-        i = i + 1
+# Maps entry points to extension defines
+def get_entrypoints_defines(doc):
+    entrypoints_to_defines = {}
+    extensions = doc.findall('./extensions/extension')
+    for extension in extensions:
+        define = extension.get('protect')
+        entrypoints = extension.findall('./require/command')
+        for entrypoint in entrypoints:
+            fullname = entrypoint.get('name')
+            entrypoints_to_defines[fullname] = define
+    return entrypoints_to_defines
+
+doc = ET.parse(sys.stdin)
+entrypoints = get_entrypoints(doc, get_entrypoints_defines(doc))

 # For outputting entrypoints.h we generate a radv_EntryPoint() prototype
 # per entry point.
@@ -111,8 +106,7 @@ if opt_header:
    print "      void *entrypoints[%d];" % len(entrypoints)
    print "      struct {"

-    for type, name, args, num, h in entrypoints:
-        guard = get_platform_guard_macro(name)
+    for type, name, args, num, h, guard in entrypoints:
        if guard is not None:
            print "#ifdef {0}".format(guard)
            print "         PFN_vk{0} {0};".format(name)
@@ -125,10 +119,10 @@ if opt_header:
    print "   };\n"
    print "};\n"

-    for type, name, args, num, h in entrypoints:
-        print_guard_start(name)
-        print "%s radv_%s%s;" % (type, name, args)
-        print_guard_end(name)
+    for type, name, args, num, h, guard in entrypoints:
+        print_guard_start(guard)
+        print "%s radv_%s(%s);" % (type, name, args)
+        print_guard_end(guard)
    exit()


@@ -174,7 +168,7 @@ static const char strings[] ="""

 offsets = []
 i = 0;
-for type, name, args, num, h in entrypoints:
+for type, name, args, num, h, guard in entrypoints:
    print "   \"vk%s\\0\"" % name
    offsets.append(i)
    i += 2 + len(name) + 1
@@ -183,7 +177,7 @@ print "   ;"
 # Now generate the table of all entry points

 print "\nstatic const struct radv_entrypoint entrypoints[] = {"
-for type, name, args, num, h in entrypoints:
+for type, name, args, num, h, guard in entrypoints:
    print "   { %5d, 0x%08x }," % (offsets[num], h)
 print "};\n"

@@ -196,15 +190,15 @@ print """
 """

 for layer in [ "radv" ]:
-    for type, name, args, num, h in entrypoints:
-        print_guard_start(name)
-        print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
-        print_guard_end(name)
+    for type, name, args, num, h, guard in entrypoints:
+        print_guard_start(guard)
+        print "%s %s_%s(%s) __attribute__ ((weak));" % (type, layer, name, args)
+        print_guard_end(guard)
    print "\nconst struct radv_dispatch_table %s_layer = {" % layer
-    for type, name, args, num, h in entrypoints:
-        print_guard_start(name)
+    for type, name, args, num, h, guard in entrypoints:
+        print_guard_start(guard)
        print "   .%s = %s_%s," % (name, layer, name)
-        print_guard_end(name)
+        print_guard_end(guard)
    print "};\n"

 print """
@@ -222,7 +216,7 @@ radv_resolve_entrypoint(uint32_t index)

 map = [none for f in xrange(hash_size)]
 collisions = [0 for f in xrange(10)]
-for type, name, args, num, h in entrypoints:
+for type, name, args, num, h, guard in entrypoints:
    level = 0
    while map[h & hash_mask] != none:
        h = h + prime_step
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -154,6 +154,7 @@ uint32_t radv_translate_tex_dataformat(VkFormat format,
 		case VK_FORMAT_D16_UNORM:
 			return V_008F14_IMG_DATA_FORMAT_16;
 		case VK_FORMAT_D24_UNORM_S8_UINT:
+		case VK_FORMAT_X8_D24_UNORM_PACK32:
 			return V_008F14_IMG_DATA_FORMAT_8_24;
 		case VK_FORMAT_S8_UINT:
 			return V_008F14_IMG_DATA_FORMAT_8;
@@ -393,7 +394,7 @@ uint32_t radv_translate_color_numformat(VkFormat format,
 					int first_non_void)
 {
 	unsigned ntype;
-	if (first_non_void == 4 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT)
+	if (first_non_void == -1 || desc->channel[first_non_void].type == VK_FORMAT_TYPE_FLOAT)
 		ntype = V_028C70_NUMBER_FLOAT;
 	else {
 		ntype = V_028C70_NUMBER_UNORM;
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -112,8 +112,8 @@ radv_init_surface(struct radv_device *device,
 	                           VK_IMAGE_USAGE_STORAGE_BIT)) ||
 	    (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
            (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
-            device->instance->physicalDevice.rad_info.chip_class < VI ||
-            create_info->scanout || !device->allow_dcc ||
+            device->physical_device->rad_info.chip_class < VI ||
+            create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
            !radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
 		surface->flags |= RADEON_SURF_DISABLE_DCC;
 	if (create_info->scanout)
@@ -123,7 +123,7 @@ radv_init_surface(struct radv_device *device,
 #define ATI_VENDOR_ID 0x1002
 static uint32_t si_get_bo_metadata_word1(struct radv_device *device)
 {
-	return (ATI_VENDOR_ID << 16) | device->instance->physicalDevice.rad_info.pci_id;
+	return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
 }

 static inline unsigned
@@ -267,17 +267,7 @@ si_make_texture_descriptor(struct radv_device *device,

 	if (desc->colorspace == VK_FORMAT_COLORSPACE_ZS) {
 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
-		const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
-
-		switch (vk_format) {
-		case VK_FORMAT_X8_D24_UNORM_PACK32:
-		case VK_FORMAT_D24_UNORM_S8_UINT:
-		case VK_FORMAT_D32_SFLOAT_S8_UINT:
-			vk_format_compose_swizzles(mapping, swizzle_yyyy, swizzle);
-			break;
-		default:
-			vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
-		}
+		vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
 	} else {
 		vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
 	}
@@ -336,7 +326,7 @@ si_make_texture_descriptor(struct radv_device *device,
 		/* The last dword is unused by hw. The shader uses it to clear
 		 * bits in the first dword of sampler state.
 		 */
-		if (device->instance->physicalDevice.rad_info.chip_class <= CIK && image->samples <= 1) {
+		if (device->physical_device->rad_info.chip_class <= CIK && image->samples <= 1) {
 			if (first_level == last_level)
 				state[7] = C_008F30_MAX_ANISO_RATIO;
 			else
@@ -527,8 +517,8 @@ radv_image_get_cmask_info(struct radv_device *device,
 			  struct radv_image *image,
 			  struct radv_cmask_info *out)
 {
-	unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
-	unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes;
+	unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;
+	unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
 	unsigned cl_width, cl_height;

 	switch (num_pipes) {
@@ -562,10 +552,6 @@ radv_image_get_cmask_info(struct radv_device *device,
 	/* Each element of CMASK is a nibble. */
 	unsigned slice_bytes = slice_elements / 2;

-	out->pitch = width;
-	out->height = height;
-	out->xalign = cl_width * 8;
-	out->yalign = cl_height * 8;
 	out->slice_tile_max = (width * height) / (128*128);
 	if (out->slice_tile_max)
 		out->slice_tile_max -= 1;
@@ -603,8 +589,8 @@ radv_image_get_htile_size(struct radv_device *device,
 {
 	unsigned cl_width, cl_height, width, height;
 	unsigned slice_elements, slice_bytes, base_align;
-	unsigned num_pipes = device->instance->physicalDevice.rad_info.num_tile_pipes;
-	unsigned pipe_interleave_bytes = device->instance->physicalDevice.rad_info.pipe_interleave_bytes;
+	unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
+	unsigned pipe_interleave_bytes = device->physical_device->rad_info.pipe_interleave_bytes;

 	/* Overalign HTILE on P2 configs to work around GPU hangs in
 	 * piglit/depthstencil-render-miplevels 585.
@@ -613,7 +599,7 @@ radv_image_get_htile_size(struct radv_device *device,
 	 * are always reproducible. I think I have seen the test hang
 	 * on Carrizo too, though it was very rare there.
 	 */
-	if (device->instance->physicalDevice.rad_info.chip_class >= CIK && num_pipes < 4)
+	if (device->physical_device->rad_info.chip_class >= CIK && num_pipes < 4)
 		num_pipes = 4;

 	switch (num_pipes) {
@@ -663,7 +649,7 @@ static void
 radv_image_alloc_htile(struct radv_device *device,
 		       struct radv_image *image)
 {
-	if (env_var_as_boolean("RADV_HIZ_DISABLE", false))
+	if (device->debug_flags & RADV_DEBUG_NO_HIZ)
 		return;

 	image->htile.size = radv_image_get_htile_size(device, image);
@@ -688,7 +674,7 @@ radv_image_create(VkDevice _device,
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
 	struct radv_image *image = NULL;
-
+	bool can_cmask_dcc = false;
 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);

 	radv_assert(pCreateInfo->mipLevels > 0);
@@ -712,6 +698,13 @@ radv_image_create(VkDevice _device,
 	image->samples = pCreateInfo->samples;
 	image->tiling = pCreateInfo->tiling;
 	image->usage = pCreateInfo->usage;
+
+	image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
+	if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
+		for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
+			image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
+	}
+
 	radv_init_surface(device, &image->surface, create_info);

 	device->ws->surface_init(device->ws, &image->surface);
@@ -719,15 +712,18 @@ radv_image_create(VkDevice _device,
 	image->size = image->surface.bo_size;
 	image->alignment = image->surface.bo_alignment;

+	if (image->exclusive || image->queue_family_mask == 1)
+		can_cmask_dcc = true;
+
 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
-	    image->surface.dcc_size)
+	    image->surface.dcc_size && can_cmask_dcc)
 		radv_image_alloc_dcc(device, image);
 	else
 		image->surface.dcc_size = 0;

 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
 	    pCreateInfo->mipLevels == 1 &&
-	    !image->surface.dcc_size && image->extent.depth == 1)
+	    !image->surface.dcc_size && image->extent.depth == 1 && can_cmask_dcc)
 		radv_image_alloc_cmask(device, image);
 	if (image->samples > 1 && vk_format_is_color(pCreateInfo->format)) {
 		radv_image_alloc_fmask(device, image);
@@ -756,6 +752,7 @@ radv_image_view_init(struct radv_image_view *iview,
 {
 	RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
 	const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
+	uint32_t blk_w;
 	bool is_stencil = false;
 	switch (image->type) {
 	case VK_IMAGE_TYPE_1D:
@@ -775,8 +772,13 @@ radv_image_view_init(struct radv_image_view *iview,
 	iview->vk_format = pCreateInfo->format;
 	iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;

-	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
+	if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
 		is_stencil = true;
+		iview->vk_format = vk_format_stencil_only(iview->vk_format);
+	} else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
+		iview->vk_format = vk_format_depth_only(iview->vk_format);
+	}
+
 	iview->extent = (VkExtent3D) {
 		.width  = radv_minify(image->extent.width , range->baseMipLevel),
 		.height = radv_minify(image->extent.height, range->baseMipLevel),
@@ -788,13 +790,15 @@ radv_image_view_init(struct radv_image_view *iview,
 	iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format),
 					    vk_format_get_blockheight(image->vk_format));

+	assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
+	blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
 	iview->base_layer = range->baseArrayLayer;
 	iview->layer_count = radv_get_layerCount(image, range);
 	iview->base_mip = range->baseMipLevel;

 	si_make_texture_descriptor(device, image, false,
 				   iview->type,
-				   pCreateInfo->format,
+				   iview->vk_format,
 				   &pCreateInfo->components,
 				   0, radv_get_levelCount(image, range) - 1,
 				   range->baseArrayLayer,
@@ -807,7 +811,7 @@ radv_image_view_init(struct radv_image_view *iview,
 	si_set_mutable_tex_desc_fields(device, image,
 				       is_stencil ? &image->surface.stencil_level[range->baseMipLevel] : &image->surface.level[range->baseMipLevel], range->baseMipLevel,
 				       range->baseMipLevel,
-				       image->surface.blk_w, is_stencil, iview->descriptor);
+				       blk_w, is_stencil, iview->descriptor);
 }

 void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
@@ -817,7 +821,7 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
 	 * definitions for them either. They are all 2D_TILED_THIN1 modes with
 	 * different bpp and micro tile mode.
 	 */
-	if (device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+	if (device->physical_device->rad_info.chip_class >= CIK) {
 		switch (micro_tile_mode) {
 		case 0: /* displayable */
 			image->surface.tiling_index[0] = 10;
@@ -836,29 +840,29 @@ void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
 		switch (micro_tile_mode) {
 		case 0: /* displayable */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                            image->surface.tiling_index[0] = 10;
                            break;
-			case 16:
+			case 2:
                            image->surface.tiling_index[0] = 11;
                            break;
-			default: /* 32, 64 */
+			default: /* 4, 8 */
                            image->surface.tiling_index[0] = 12;
                            break;
 			}
 			break;
 		case 1: /* thin */
 			switch (image->surface.bpe) {
-			case 8:
+			case 1:
                                image->surface.tiling_index[0] = 14;
                                break;
-			case 16:
+			case 2:
                                image->surface.tiling_index[0] = 15;
                                break;
-			case 32:
+			case 4:
                                image->surface.tiling_index[0] = 16;
                                break;
-			default: /* 64, 128 */
+			default: /* 8, 16 */
                                image->surface.tiling_index[0] = 17;
                                break;
 			}
@@ -892,11 +896,19 @@ bool radv_layout_can_expclear(const struct radv_image *image,
 		layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
 }

-bool radv_layout_has_cmask(const struct radv_image *image,
-			   VkImageLayout layout)
+bool radv_layout_can_fast_clear(const struct radv_image *image,
+			        VkImageLayout layout,
+			        unsigned queue_mask)
 {
-	return (layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ||
-		layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+	return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+		queue_mask == (1u << RADV_QUEUE_GENERAL);
+}
+
+
+unsigned radv_image_queue_family_mask(const struct radv_image *image, int family) {
+	if (image->exclusive)
+		return 1u <<family;
+	return image->queue_family_mask;
 }

 VkResult
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -159,13 +159,34 @@ void radv_meta_begin_bufimage(struct radv_cmd_buffer *cmd_buffer,
 			      struct radv_meta_saved_compute_state *save);
 void radv_meta_end_bufimage(struct radv_cmd_buffer *cmd_buffer,
 			    struct radv_meta_saved_compute_state *save);
-
+void radv_meta_begin_itoi(struct radv_cmd_buffer *cmd_buffer,
+			  struct radv_meta_saved_compute_state *save);
+void radv_meta_end_itoi(struct radv_cmd_buffer *cmd_buffer,
+			struct radv_meta_saved_compute_state *save);
+void radv_meta_begin_cleari(struct radv_cmd_buffer *cmd_buffer,
+			    struct radv_meta_saved_compute_state *save);
+void radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
+			  struct radv_meta_saved_compute_state *save);
 void radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 			       struct radv_meta_blit2d_surf *src,
 			       struct radv_meta_blit2d_buffer *dst,
 			       unsigned num_rects,
 			       struct radv_meta_blit2d_rect *rects);

+void radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
+				  struct radv_meta_blit2d_buffer *src,
+				  struct radv_meta_blit2d_surf *dst,
+				  unsigned num_rects,
+				  struct radv_meta_blit2d_rect *rects);
+void radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
+				 struct radv_meta_blit2d_surf *src,
+				 struct radv_meta_blit2d_surf *dst,
+				 unsigned num_rects,
+				 struct radv_meta_blit2d_rect *rects);
+void radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
+			      struct radv_meta_blit2d_surf *dst,
+			      const VkClearColorValue *clear_color);
+
 void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageSubresourceRange *subresourceRange);
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -38,7 +38,7 @@ build_nir_vertex_shader(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");

 	nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						   vec4, "a_pos");
@@ -70,7 +70,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_fs.%d", tex_dim);
-	b.shader->info.name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info->name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -124,7 +124,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_depth_fs.%d", tex_dim);
-	b.shader->info.name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info->name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -178,7 +178,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_stencil_fs.%d", tex_dim);
-	b.shader->info.name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info->name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -226,12 +226,13 @@ static void
 meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               struct radv_image *src_image,
               struct radv_image_view *src_iview,
-               VkOffset3D src_offset,
-               VkExtent3D src_extent,
+               VkOffset3D src_offset_0,
+               VkOffset3D src_offset_1,
               struct radv_image *dest_image,
               struct radv_image_view *dest_iview,
-               VkOffset3D dest_offset,
-               VkExtent3D dest_extent,
+               VkOffset3D dest_offset_0,
+               VkOffset3D dest_offset_1,
+               VkRect2D dest_box,
               VkFilter blit_filter)
 {
 	struct radv_device *device = cmd_buffer->device;
@@ -245,38 +246,37 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 	unsigned vb_size = 3 * sizeof(*vb_data);
 	vb_data[0] = (struct blit_vb_data) {
 		.pos = {
-			dest_offset.x,
-			dest_offset.y,
+			dest_offset_0.x,
+			dest_offset_0.y,
 		},
 		.tex_coord = {
-			(float)(src_offset.x) / (float)src_iview->extent.width,
-			(float)(src_offset.y) / (float)src_iview->extent.height,
-			(float)src_offset.z / (float)src_iview->extent.depth,
+			(float)src_offset_0.x / (float)src_iview->extent.width,
+			(float)src_offset_0.y / (float)src_iview->extent.height,
+			(float)src_offset_0.z / (float)src_iview->extent.depth,
 		},
 	};

 	vb_data[1] = (struct blit_vb_data) {
 		.pos = {
-			dest_offset.x,
-			dest_offset.y + dest_extent.height,
+			dest_offset_0.x,
+			dest_offset_1.y,
 		},
 		.tex_coord = {
-			(float)src_offset.x / (float)src_iview->extent.width,
-			(float)(src_offset.y + src_extent.height) /
-			(float)src_iview->extent.height,
-			(float)src_offset.z / (float)src_iview->extent.depth,
+			(float)src_offset_0.x / (float)src_iview->extent.width,
+			(float)src_offset_1.y / (float)src_iview->extent.height,
+			(float)src_offset_0.z / (float)src_iview->extent.depth,
 		},
 	};

 	vb_data[2] = (struct blit_vb_data) {
 		.pos = {
-			dest_offset.x + dest_extent.width,
-			dest_offset.y,
+			dest_offset_1.x,
+			dest_offset_0.y,
 		},
 		.tex_coord = {
-			(float)(src_offset.x + src_extent.width) / (float)src_iview->extent.width,
-			(float)src_offset.y / (float)src_iview->extent.height,
-			(float)src_offset.z / (float)src_iview->extent.depth,
+			(float)src_offset_1.x / (float)src_iview->extent.width,
+			(float)src_offset_0.y / (float)src_iview->extent.height,
+			(float)src_offset_0.z / (float)src_iview->extent.depth,
 		},
 	};
 	radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
@@ -355,8 +355,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 							      .renderPass = device->meta_state.blit.render_pass[fs_key],
 							      .framebuffer = fb,
 							      .renderArea = {
-							      .offset = { dest_offset.x, dest_offset.y },
-							      .extent = { dest_extent.width, dest_extent.height },
+							      .offset = { dest_box.offset.x, dest_box.offset.y },
+							      .extent = { dest_box.extent.width, dest_box.extent.height },
 						      },
 							      .clearValueCount = 0,
 								       .pClearValues = NULL,
@@ -383,8 +383,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 							      .renderPass = device->meta_state.blit.depth_only_rp,
 							      .framebuffer = fb,
 							      .renderArea = {
-							      .offset = { dest_offset.x, dest_offset.y },
-							      .extent = { dest_extent.width, dest_extent.height },
+							      .offset = { dest_box.offset.x, dest_box.offset.y },
+							      .extent = { dest_box.extent.width, dest_box.extent.height },
 						      },
 							      .clearValueCount = 0,
 								       .pClearValues = NULL,
@@ -410,9 +410,9 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 							      .renderPass = device->meta_state.blit.stencil_only_rp,
 							      .framebuffer = fb,
 							      .renderArea = {
-							      .offset = { dest_offset.x, dest_offset.y },
-							      .extent = { dest_extent.width, dest_extent.height },
-						      },
+							      .offset = { dest_box.offset.x, dest_box.offset.y },
+							      .extent = { dest_box.extent.width, dest_box.extent.height },
+						              },
 							      .clearValueCount = 0,
 								       .pClearValues = NULL,
 						       }, VK_SUBPASS_CONTENTS_INLINE);
@@ -461,6 +461,26 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 				&cmd_buffer->pool->alloc);
 }

+static bool
+flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
+{
+	bool flip = false;
+	if (*src0 > *src1) {
+		unsigned tmp = *src0;
+		*src0 = *src1;
+		*src1 = tmp;
+		flip = !flip;
+	}
+
+	if (*dst0 > *dst1) {
+		unsigned tmp = *dst0;
+		*dst0 = *dst1;
+		*dst1 = tmp;
+		flip = !flip;
+	}
+	return flip;
+}
+
 void radv_CmdBlitImage(
 	VkCommandBuffer                             commandBuffer,
 	VkImage                                     srcImage,
@@ -488,6 +508,8 @@ void radv_CmdBlitImage(
 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {
+		const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
+		const VkImageSubresourceLayers *dst_res = &pRegions[r].dstSubresource;
 		struct radv_image_view src_iview;
 		radv_image_view_init(&src_iview, cmd_buffer->device,
 				     &(VkImageViewCreateInfo) {
@@ -496,59 +518,92 @@ void radv_CmdBlitImage(
 						     .viewType = radv_meta_get_view_type(src_image),
 						     .format = src_image->vk_format,
 						     .subresourceRange = {
-						     .aspectMask = pRegions[r].srcSubresource.aspectMask,
-						     .baseMipLevel = pRegions[r].srcSubresource.mipLevel,
+						     .aspectMask = src_res->aspectMask,
+						     .baseMipLevel = src_res->mipLevel,
 						     .levelCount = 1,
-						     .baseArrayLayer = pRegions[r].srcSubresource.baseArrayLayer,
+						     .baseArrayLayer = src_res->baseArrayLayer,
 						     .layerCount = 1
 					     },
 						     },
 				     cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);

-		if (pRegions[r].dstOffsets[1].x < pRegions[r].dstOffsets[0].x ||
-		    pRegions[r].dstOffsets[1].y < pRegions[r].dstOffsets[0].y ||
-		    pRegions[r].srcOffsets[1].x < pRegions[r].srcOffsets[0].x ||
-		    pRegions[r].srcOffsets[1].y < pRegions[r].srcOffsets[0].y)
-			radv_finishme("FINISHME: Allow flipping in blits");
+		unsigned dst_start, dst_end;
+		if (dest_image->type == VK_IMAGE_TYPE_3D) {
+			assert(dst_res->baseArrayLayer == 0);
+			dst_start = pRegions[r].dstOffsets[0].z;
+			dst_end = pRegions[r].dstOffsets[1].z;
+		} else {
+			dst_start = dst_res->baseArrayLayer;
+			dst_end = dst_start + dst_res->layerCount;
+		}

-		const VkExtent3D dest_extent = {
-			.width = pRegions[r].dstOffsets[1].x - pRegions[r].dstOffsets[0].x,
-			.height = pRegions[r].dstOffsets[1].y - pRegions[r].dstOffsets[0].y,
-			.depth = 1,
-		};
+		unsigned src_start, src_end;
+		if (src_image->type == VK_IMAGE_TYPE_3D) {
+			assert(src_res->baseArrayLayer == 0);
+			src_start = pRegions[r].srcOffsets[0].z;
+			src_end = pRegions[r].srcOffsets[1].z;
+		} else {
+			src_start = src_res->baseArrayLayer;
+			src_end = src_start + src_res->layerCount;
+		}

-		const VkExtent3D src_extent = {
-			.width = pRegions[r].srcOffsets[1].x - pRegions[r].srcOffsets[0].x,
-			.height = pRegions[r].srcOffsets[1].y - pRegions[r].srcOffsets[0].y,
-			.depth = pRegions[r].srcOffsets[1].z - pRegions[r].srcOffsets[0].z,
-		};
+		bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
+		float src_z_step = (float)(src_end + 1 - src_start) /
+			(float)(dst_end + 1 - dst_start);

+		if (flip_z) {
+			src_start = src_end;
+			src_z_step *= -1;
+		}

-		if (pRegions[r].srcSubresource.layerCount > 1)
-			radv_finishme("FINISHME: copy multiple array layers");
+		unsigned src_x0 = pRegions[r].srcOffsets[0].x;
+		unsigned src_x1 = pRegions[r].srcOffsets[1].x;
+		unsigned dst_x0 = pRegions[r].dstOffsets[0].x;
+		unsigned dst_x1 = pRegions[r].dstOffsets[1].x;
+
+		unsigned src_y0 = pRegions[r].srcOffsets[0].y;
+		unsigned src_y1 = pRegions[r].srcOffsets[1].y;
+		unsigned dst_y0 = pRegions[r].dstOffsets[0].y;
+		unsigned dst_y1 = pRegions[r].dstOffsets[1].y;
+
+		VkRect2D dest_box;
+		dest_box.offset.x = MIN2(dst_x0, dst_x1);
+		dest_box.offset.y = MIN2(dst_y0, dst_y1);
+		dest_box.extent.width = abs(dst_x1 - dst_x0);
+		dest_box.extent.height = abs(dst_y1 - dst_y0);

 		struct radv_image_view dest_iview;
 		unsigned usage;
-		if (pRegions[r].dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
+		if (dst_res->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
 			usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
 		else
 			usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;

-		for (unsigned i = pRegions[r].dstOffsets[0].z; i < pRegions[r].dstOffsets[1].z; i++) {
-
-			const VkOffset3D dest_offset = {
-				.x = pRegions[r].dstOffsets[0].x,
-				.y = pRegions[r].dstOffsets[0].y,
-				.z = i,
+		const unsigned num_layers = dst_end - dst_start;
+		for (unsigned i = 0; i < num_layers; i++) {
+			const VkOffset3D dest_offset_0 = {
+				.x = dst_x0,
+				.y = dst_y0,
+				.z = dst_start + i ,
 			};
-			VkOffset3D src_offset = {
-				.x = pRegions[r].srcOffsets[0].x,
-				.y = pRegions[r].srcOffsets[0].y,
-				.z = i,
+			const VkOffset3D dest_offset_1 = {
+				.x = dst_x1,
+				.y = dst_y1,
+				.z = dst_start + i ,
+			};
+			VkOffset3D src_offset_0 = {
+				.x = src_x0,
+				.y = src_y0,
+				.z = src_start + i * src_z_step,
+			};
+			VkOffset3D src_offset_1 = {
+				.x = src_x1,
+				.y = src_y1,
+				.z = src_start + i * src_z_step,
 			};
 			const uint32_t dest_array_slice =
-				radv_meta_get_iview_layer(dest_image, &pRegions[r].dstSubresource,
-							  &dest_offset);
+				radv_meta_get_iview_layer(dest_image, dst_res,
+							  &dest_offset_0);

 			radv_image_view_init(&dest_iview, cmd_buffer->device,
 					     &(VkImageViewCreateInfo) {
@@ -557,8 +612,8 @@ void radv_CmdBlitImage(
 							     .viewType = radv_meta_get_view_type(dest_image),
 							     .format = dest_image->vk_format,
 							     .subresourceRange = {
-							     .aspectMask = pRegions[r].dstSubresource.aspectMask,
-							     .baseMipLevel = pRegions[r].dstSubresource.mipLevel,
+							     .aspectMask = dst_res->aspectMask,
+							     .baseMipLevel = dst_res->mipLevel,
 							     .levelCount = 1,
 							     .baseArrayLayer = dest_array_slice,
 							     .layerCount = 1
@@ -567,9 +622,10 @@ void radv_CmdBlitImage(
 					     cmd_buffer, usage);
 			meta_emit_blit(cmd_buffer,
 				       src_image, &src_iview,
-				       src_offset, src_extent,
+				       src_offset_0, src_offset_1,
 				       dest_image, &dest_iview,
-				       dest_offset, dest_extent,
+				       dest_offset_0, dest_offset_1,
+				       dest_box,
 				       filter);
 		}
 	}
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -112,7 +112,6 @@ static void
 blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
                struct radv_meta_blit2d_surf *src_img,
                struct radv_meta_blit2d_buffer *src_buf,
-                struct radv_meta_blit2d_rect *rect,
                struct blit2d_src_temps *tmp,
                enum blit2d_src_type src_type, VkFormat depth_format)
 {
@@ -164,7 +163,7 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
 								  .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 								  .pImageInfo = (VkDescriptorImageInfo[]) {
 								  {
-									  .sampler = NULL,
+									  .sampler = VK_NULL_HANDLE,
 									  .imageView = radv_image_view_to_handle(&tmp->iview),
 									  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 								  },
@@ -287,7 +286,7 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 		if (dst->aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT)
 			depth_format = dst->image->vk_format;
 		struct blit2d_src_temps src_temps;
-		blit2d_bind_src(cmd_buffer, src_img, src_buf, &rects[r], &src_temps, src_type, depth_format);
+		blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);

 		uint32_t offset = 0;
 		struct blit2d_dst_temps dst_temps;
@@ -439,7 +438,7 @@ build_nir_vertex_shader(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");

 	nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						   vec4, "a_pos");
@@ -574,7 +573,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info->name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
@@ -603,7 +602,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info->name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
@@ -632,7 +631,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info->name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill");
+	b.shader->info->cs.local_size[0] = 64;
+	b.shader->info->cs.local_size[1] = 1;
+	b.shader->info->cs.local_size[2] = 1;

 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+						b.shader->info->cs.local_size[0],
+						b.shader->info->cs.local_size[1],
+						b.shader->info->cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

@@ -60,17 +60,17 @@ build_buffer_copy_shader(struct radv_device *dev)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
-	b.shader->info.cs.local_size[0] = 64;
-	b.shader->info.cs.local_size[1] = 1;
-	b.shader->info.cs.local_size[2] = 1;
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy");
+	b.shader->info->cs.local_size[0] = 64;
+	b.shader->info->cs.local_size[1] = 1;
+	b.shader->info->cs.local_size[2] = 1;

 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+						b.shader->info->cs.local_size[0],
+						b.shader->info->cs.local_size[1],
+						b.shader->info->cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

@@ -511,10 +511,11 @@ void radv_CmdUpdateBuffer(
 	VkBuffer                                    dstBuffer,
 	VkDeviceSize                                dstOffset,
 	VkDeviceSize                                dataSize,
-	const uint32_t*                             pData)
+	const void*                                 pData)
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
+	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
 	uint64_t words = dataSize / 4;
 	uint64_t va = cmd_buffer->device->ws->buffer_get_va(dst_buffer->bo);
 	va += dstOffset + dst_buffer->offset;
@@ -528,7 +529,8 @@ void radv_CmdUpdateBuffer(
 		radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);

 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
-		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+		radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ?
+		                                V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
 		                            S_370_WR_CONFIRM(1) |
 		                            S_370_ENGINE_SEL(V_370_ME));
 		radeon_emit(cmd_buffer->cs, va);
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -56,8 +56,8 @@ build_color_shaders(struct nir_shader **out_vs,
 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);

-	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
-	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
+	vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
+	fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");

 	const struct glsl_type *position_type = glsl_vec4_type();
 	const struct glsl_type *color_type = glsl_vec4_type();
@@ -98,6 +98,16 @@ build_color_shaders(struct nir_shader **out_vs,
 	nir_copy_var(&vs_b, vs_out_color, vs_in_color);
 	nir_copy_var(&fs_b, fs_out_color, fs_in_color);

+	const struct glsl_type *layer_type = glsl_int_type();
+	nir_variable *vs_out_layer =
+		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
+				    "v_layer");
+	vs_out_layer->data.location = VARYING_SLOT_LAYER;
+	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+	nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
+
+	nir_store_var(&vs_b, vs_out_layer, inst_id, 0x1);
+
 	*out_vs = vs_b.shader;
 	*out_fs = fs_b.shader;
 }
@@ -204,13 +214,51 @@ create_pipeline(struct radv_device *device,
 	return result;
 }

+static VkResult
+create_color_renderpass(struct radv_device *device,
+			VkFormat vk_format,
+			uint32_t samples,
+			VkRenderPass *pass)
+{
+	return radv_CreateRenderPass(radv_device_to_handle(device),
+				       &(VkRenderPassCreateInfo) {
+					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+						       .attachmentCount = 1,
+						       .pAttachments = &(VkAttachmentDescription) {
+						       .format = vk_format,
+						       .samples = samples,
+						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+					       },
+						       .subpassCount = 1,
+								.pSubpasses = &(VkSubpassDescription) {
+						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+						       .inputAttachmentCount = 0,
+						       .colorAttachmentCount = 1,
+						       .pColorAttachments = &(VkAttachmentReference) {
+							       .attachment = 0,
+							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						       },
+						       .pResolveAttachments = NULL,
+						       .pDepthStencilAttachment = &(VkAttachmentReference) {
+							       .attachment = VK_ATTACHMENT_UNUSED,
+							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						       },
+						       .preserveAttachmentCount = 1,
+						       .pPreserveAttachments = (uint32_t[]) { 0 },
+					       },
+								.dependencyCount = 0,
+									 }, &device->meta_state.alloc, pass);
+}
+
 static VkResult
 create_color_pipeline(struct radv_device *device,
-                      VkFormat vk_format,
 		      uint32_t samples,
                      uint32_t frag_output,
                      struct radv_pipeline **pipeline,
-		      VkRenderPass *pass)
+		      VkRenderPass pass)
 {
 	struct nir_shader *vs_nir;
 	struct nir_shader *fs_nir;
@@ -270,44 +318,11 @@ create_color_pipeline(struct radv_device *device,
 		.pAttachments = blend_attachment_state
 	};

-	result = radv_CreateRenderPass(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription) {
-						       .format = vk_format,
-						       .samples = samples,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-								.pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 1,
-						       .pColorAttachments = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = VK_ATTACHMENT_UNUSED,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-								.dependencyCount = 0,
-									 }, &device->meta_state.alloc, pass);

-	if (result != VK_SUCCESS)
-		return result;
 	struct radv_graphics_pipeline_create_info extra = {
 		.use_rectlist = true,
 	};
-	result = create_pipeline(device, radv_render_pass_from_handle(*pass),
+	result = create_pipeline(device, radv_render_pass_from_handle(pass),
 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
 				 &extra, &device->meta_state.alloc, pipeline);

@@ -346,12 +361,10 @@ radv_device_finish_meta_clear_state(struct radv_device *device)

 		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
 			destroy_pipeline(device, state->clear[i].depth_only_pipeline[j]);
-			destroy_render_pass(device, state->clear[i].depth_only_rp[j]);
 			destroy_pipeline(device, state->clear[i].stencil_only_pipeline[j]);
-			destroy_render_pass(device, state->clear[i].stencil_only_rp[j]);
 			destroy_pipeline(device, state->clear[i].depthstencil_pipeline[j]);
-			destroy_render_pass(device, state->clear[i].depthstencil_rp[j]);
 		}
+		destroy_render_pass(device, state->clear[i].depthstencil_rp);
 	}

 }
@@ -444,7 +457,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 					   pipeline_h);
 	}

-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+	radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);

 	radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
 }
@@ -458,8 +471,8 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);

-	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
-	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
+	vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
+	fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
 	const struct glsl_type *position_type = glsl_vec4_type();

 	nir_variable *vs_in_pos =
@@ -474,17 +487,61 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs

 	nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);

+	const struct glsl_type *layer_type = glsl_int_type();
+	nir_variable *vs_out_layer =
+		nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type,
+				    "v_layer");
+	vs_out_layer->data.location = VARYING_SLOT_LAYER;
+	vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
+	nir_ssa_def *inst_id = nir_load_system_value(&vs_b, nir_intrinsic_load_instance_id, 0);
+	nir_store_var(&vs_b, vs_out_layer, inst_id, 0x1);
+
 	*out_vs = vs_b.shader;
 	*out_fs = fs_b.shader;
 }

+static VkResult
+create_depthstencil_renderpass(struct radv_device *device,
+			       uint32_t samples,
+			       VkRenderPass *render_pass)
+{
+	return radv_CreateRenderPass(radv_device_to_handle(device),
+				       &(VkRenderPassCreateInfo) {
+					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+						       .attachmentCount = 1,
+						       .pAttachments = &(VkAttachmentDescription) {
+						       .format = VK_FORMAT_UNDEFINED,
+						       .samples = samples,
+						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+					       },
+						       .subpassCount = 1,
+								.pSubpasses = &(VkSubpassDescription) {
+						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+						       .inputAttachmentCount = 0,
+						       .colorAttachmentCount = 0,
+						       .pColorAttachments = NULL,
+						       .pResolveAttachments = NULL,
+						       .pDepthStencilAttachment = &(VkAttachmentReference) {
+							       .attachment = 0,
+							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						       },
+						       .preserveAttachmentCount = 1,
+						       .pPreserveAttachments = (uint32_t[]) { 0 },
+					       },
+								.dependencyCount = 0,
+									 }, &device->meta_state.alloc, render_pass);
+}
+
 static VkResult
 create_depthstencil_pipeline(struct radv_device *device,
                             VkImageAspectFlags aspects,
 			     uint32_t samples,
 			     int index,
                             struct radv_pipeline **pipeline,
-			     VkRenderPass *render_pass)
+			     VkRenderPass render_pass)
 {
 	struct nir_shader *vs_nir, *fs_nir;
 	VkResult result;
@@ -535,36 +592,6 @@ create_depthstencil_pipeline(struct radv_device *device,
 		.pAttachments = NULL,
 	};

-	result = radv_CreateRenderPass(radv_device_to_handle(device),
-				       &(VkRenderPassCreateInfo) {
-					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-						       .attachmentCount = 1,
-						       .pAttachments = &(VkAttachmentDescription) {
-						       .format = VK_FORMAT_UNDEFINED,
-						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
-					       },
-						       .subpassCount = 1,
-								.pSubpasses = &(VkSubpassDescription) {
-						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-						       .inputAttachmentCount = 0,
-						       .colorAttachmentCount = 0,
-						       .pColorAttachments = NULL,
-						       .pResolveAttachments = NULL,
-						       .pDepthStencilAttachment = &(VkAttachmentReference) {
-							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
-						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
-					       },
-								.dependencyCount = 0,
-									 }, &device->meta_state.alloc, render_pass);
-	if (result != VK_SUCCESS)
-		return result;
-
 	struct radv_graphics_pipeline_create_info extra = {
 		.use_rectlist = true,
 	};
@@ -577,7 +604,7 @@ create_depthstencil_pipeline(struct radv_device *device,
 		extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
 		extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;
 	}
-	result = create_pipeline(device, radv_render_pass_from_handle(*render_pass),
+	result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
 				 &extra, &device->meta_state.alloc, pipeline);
 	return result;
@@ -709,7 +736,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 	if (depth_view_can_fast_clear(iview, subpass->depth_stencil_attachment.layout, clear_rect))
 		radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);

-	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+	radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);
 }


@@ -740,20 +767,32 @@ radv_device_init_meta_clear_state(struct radv_device *device)
 			VkFormat format = pipeline_formats[j];
 			unsigned fs_key = radv_format_meta_fs_key(format);
 			assert(!state->clear[i].color_pipelines[fs_key]);
-			res = create_color_pipeline(device, format, samples, 0, &state->clear[i].color_pipelines[fs_key],
-						    &state->clear[i].render_pass[fs_key]);
+
+			res = create_color_renderpass(device, format, samples,
+						      &state->clear[i].render_pass[fs_key]);
+			if (res != VK_SUCCESS)
+				goto fail;
+
+			res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],
+						    state->clear[i].render_pass[fs_key]);
 			if (res != VK_SUCCESS)
 				goto fail;

 		}

+		res = create_depthstencil_renderpass(device,
+						     samples,
+						     &state->clear[i].depthstencil_rp);
+		if (res != VK_SUCCESS)
+			goto fail;
+
 		for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
 			res = create_depthstencil_pipeline(device,
 							   VK_IMAGE_ASPECT_DEPTH_BIT,
 							   samples,
 							   j,
 							   &state->clear[i].depth_only_pipeline[j],
-							   &state->clear[i].depth_only_rp[j]);
+							   state->clear[i].depthstencil_rp);
 			if (res != VK_SUCCESS)
 				goto fail;

@@ -762,7 +801,7 @@ radv_device_init_meta_clear_state(struct radv_device *device)
 							   samples,
 							   j,
 							   &state->clear[i].stencil_only_pipeline[j],
-							   &state->clear[i].stencil_only_rp[j]);
+							   state->clear[i].depthstencil_rp);
 			if (res != VK_SUCCESS)
 				goto fail;

@@ -772,7 +811,7 @@ radv_device_init_meta_clear_state(struct radv_device *device)
 							   samples,
 							   j,
 							   &state->clear[i].depthstencil_pipeline[j],
-							   &state->clear[i].depthstencil_rp[j]);
+							   state->clear[i].depthstencil_rp);
 			if (res != VK_SUCCESS)
 				goto fail;
 		}
@@ -802,10 +841,10 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
 	if (!iview->image->cmask.size && !iview->image->surface.dcc_size)
 		return false;

-	if (!cmd_buffer->device->allow_fast_clears)
+	if (!(cmd_buffer->device->debug_flags & RADV_DEBUG_FAST_CLEARS))
 		return false;

-	if (!radv_layout_has_cmask(iview->image, image_layout))
+	if (!radv_layout_can_fast_clear(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index)))
 		goto fail;
 	if (vk_format_get_blocksizebits(iview->image->vk_format) > 64)
 		goto fail;
@@ -928,13 +967,10 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)

 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

-	if (cmd_state->framebuffer->layers > 1)
-		radv_finishme("clearing multi-layer framebuffer");
-
 	VkClearRect clear_rect = {
 		.rect = cmd_state->render_area,
 		.baseArrayLayer = 0,
-		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+		.layerCount = cmd_state->framebuffer->layers,
 	};

 	for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
@@ -975,15 +1011,141 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
 	radv_meta_restore(&saved_state, cmd_buffer);
 }

+static void
+radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
+		       struct radv_image *image,
+		       VkImageLayout image_layout,
+		       const VkImageSubresourceRange *range,
+		       VkFormat format, int level, int layer,
+		       const VkClearValue *clear_val)
+{
+	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
+	struct radv_image_view iview;
+	radv_image_view_init(&iview, cmd_buffer->device,
+			     &(VkImageViewCreateInfo) {
+				     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+					     .image = radv_image_to_handle(image),
+					     .viewType = radv_meta_get_view_type(image),
+					     .format = format,
+					     .subresourceRange = {
+					     .aspectMask = range->aspectMask,
+					     .baseMipLevel = range->baseMipLevel + level,
+					     .levelCount = 1,
+					     .baseArrayLayer = range->baseArrayLayer + layer,
+					     .layerCount = 1
+				     },
+			     },
+			     cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+
+	VkFramebuffer fb;
+	radv_CreateFramebuffer(device_h,
+			       &(VkFramebufferCreateInfo) {
+				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+					       .attachmentCount = 1,
+					       .pAttachments = (VkImageView[]) {
+					       radv_image_view_to_handle(&iview),
+				       },
+					       .width = iview.extent.width,
+							.height = iview.extent.height,
+							.layers = 1
+			       },
+			       &cmd_buffer->pool->alloc,
+			       &fb);
+
+	VkAttachmentDescription att_desc = {
+		.format = iview.vk_format,
+		.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+		.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+		.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+		.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+		.initialLayout = image_layout,
+		.finalLayout = image_layout,
+	};
+
+	VkSubpassDescription subpass_desc = {
+		.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+		.inputAttachmentCount = 0,
+		.colorAttachmentCount = 0,
+		.pColorAttachments = NULL,
+		.pResolveAttachments = NULL,
+		.pDepthStencilAttachment = NULL,
+		.preserveAttachmentCount = 0,
+		.pPreserveAttachments = NULL,
+	};
+
+	const VkAttachmentReference att_ref = {
+		.attachment = 0,
+		.layout = image_layout,
+	};
+
+	if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
+		subpass_desc.colorAttachmentCount = 1;
+		subpass_desc.pColorAttachments = &att_ref;
+	} else {
+		subpass_desc.pDepthStencilAttachment = &att_ref;
+	}
+
+	VkRenderPass pass;
+	radv_CreateRenderPass(device_h,
+			      &(VkRenderPassCreateInfo) {
+				      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+					      .attachmentCount = 1,
+					      .pAttachments = &att_desc,
+					      .subpassCount = 1,
+					      .pSubpasses = &subpass_desc,
+					      },
+			      &cmd_buffer->pool->alloc,
+			      &pass);
+
+	radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+				&(VkRenderPassBeginInfo) {
+					.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+						.renderArea = {
+						.offset = { 0, 0, },
+						.extent = {
+							.width = iview.extent.width,
+							.height = iview.extent.height,
+						},
+					},
+						.renderPass = pass,
+						.framebuffer = fb,
+						.clearValueCount = 0,
+						.pClearValues = NULL,
+						},
+				VK_SUBPASS_CONTENTS_INLINE);
+
+	VkClearAttachment clear_att = {
+		.aspectMask = range->aspectMask,
+		.colorAttachment = 0,
+		.clearValue = *clear_val,
+	};
+
+	VkClearRect clear_rect = {
+		.rect = {
+			.offset = { 0, 0 },
+			.extent = { iview.extent.width, iview.extent.height },
+		},
+		.baseArrayLayer = range->baseArrayLayer,
+		.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
+	};
+
+	emit_clear(cmd_buffer, &clear_att, &clear_rect);
+
+	radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+	radv_DestroyRenderPass(device_h, pass,
+			       &cmd_buffer->pool->alloc);
+	radv_DestroyFramebuffer(device_h, fb,
+				&cmd_buffer->pool->alloc);
+}
 static void
 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		     struct radv_image *image,
 		     VkImageLayout image_layout,
 		     const VkClearValue *clear_value,
 		     uint32_t range_count,
-		     const VkImageSubresourceRange *ranges)
+		     const VkImageSubresourceRange *ranges,
+		     bool cs)
 {
-	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
 	VkFormat format = image->vk_format;
 	VkClearValue internal_clear_value = *clear_value;

@@ -998,130 +1160,33 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		const VkImageSubresourceRange *range = &ranges[r];
 		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
 			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
-				radv_minify(image->extent.depth, l) :
+				radv_minify(image->extent.depth, range->baseMipLevel + l) :
 				radv_get_layerCount(image, range);
 			for (uint32_t s = 0; s < layer_count; ++s) {
-				struct radv_image_view iview;
-				radv_image_view_init(&iview, cmd_buffer->device,
-						     &(VkImageViewCreateInfo) {
-							     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
-								     .image = radv_image_to_handle(image),
-								     .viewType = radv_meta_get_view_type(image),
-								     .format = format,
-								     .subresourceRange = {
-								     .aspectMask = range->aspectMask,
-								     .baseMipLevel = range->baseMipLevel + l,
-								     .levelCount = 1,
-								     .baseArrayLayer = range->baseArrayLayer + s,
-								     .layerCount = 1
-							     },
-								     },
-						     cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);

-				VkFramebuffer fb;
-				radv_CreateFramebuffer(device_h,
-						       &(VkFramebufferCreateInfo) {
-							       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
-								       .attachmentCount = 1,
-								       .pAttachments = (VkImageView[]) {
-								       radv_image_view_to_handle(&iview),
-							       },
-								       .width = iview.extent.width,
-										.height = iview.extent.height,
-										.layers = 1
-										},
-						       &cmd_buffer->pool->alloc,
-						       &fb);
-
-				VkAttachmentDescription att_desc = {
-					.format = iview.vk_format,
-					.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-					.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-					.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
-					.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
-					.initialLayout = image_layout,
-					.finalLayout = image_layout,
-				};
-
-				VkSubpassDescription subpass_desc = {
-					.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
-					.inputAttachmentCount = 0,
-					.colorAttachmentCount = 0,
-					.pColorAttachments = NULL,
-					.pResolveAttachments = NULL,
-					.pDepthStencilAttachment = NULL,
-					.preserveAttachmentCount = 0,
-					.pPreserveAttachments = NULL,
-				};
-
-				const VkAttachmentReference att_ref = {
-					.attachment = 0,
-					.layout = image_layout,
-				};
-
-				if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
-					subpass_desc.colorAttachmentCount = 1;
-					subpass_desc.pColorAttachments = &att_ref;
+				if (cs) {
+					struct radv_meta_blit2d_surf surf;
+					surf.format = format;
+					surf.image = image;
+					surf.level = range->baseMipLevel + l;
+					surf.layer = range->baseArrayLayer + s;
+					surf.aspect_mask = range->aspectMask;
+					radv_meta_clear_image_cs(cmd_buffer, &surf,
+								 &internal_clear_value.color);
 				} else {
-					subpass_desc.pDepthStencilAttachment = &att_ref;
+					radv_clear_image_layer(cmd_buffer, image, image_layout,
+							       range, format, l, s, &internal_clear_value);
 				}
-
-				VkRenderPass pass;
-				radv_CreateRenderPass(device_h,
-						      &(VkRenderPassCreateInfo) {
-							      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
-								      .attachmentCount = 1,
-								      .pAttachments = &att_desc,
-								      .subpassCount = 1,
-								      .pSubpasses = &subpass_desc,
-								      },
-						      &cmd_buffer->pool->alloc,
-						      &pass);
-
-				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
-							      &(VkRenderPassBeginInfo) {
-								      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-									      .renderArea = {
-									      .offset = { 0, 0, },
-									      .extent = {
-										      .width = iview.extent.width,
-										      .height = iview.extent.height,
-									      },
-								      },
-									      .renderPass = pass,
-										       .framebuffer = fb,
-										       .clearValueCount = 0,
-										       .pClearValues = NULL,
-										       },
-							      VK_SUBPASS_CONTENTS_INLINE);
-
-				VkClearAttachment clear_att = {
-					.aspectMask = range->aspectMask,
-					.colorAttachment = 0,
-					.clearValue = internal_clear_value,
-				};
-
-				VkClearRect clear_rect = {
-					.rect = {
-						.offset = { 0, 0 },
-						.extent = { iview.extent.width, iview.extent.height },
-					},
-					.baseArrayLayer = range->baseArrayLayer,
-					.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
-				};
-
-				emit_clear(cmd_buffer, &clear_att, &clear_rect);
-
-				radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
-				radv_DestroyRenderPass(device_h, pass,
-							     &cmd_buffer->pool->alloc);
-				radv_DestroyFramebuffer(device_h, fb,
-							      &cmd_buffer->pool->alloc);
 			}
 		}
 	}
 }

+union meta_saved_state {
+	struct radv_meta_saved_state gfx;
+	struct radv_meta_saved_compute_state compute;
+};
+
 void radv_CmdClearColorImage(
 	VkCommandBuffer                             commandBuffer,
 	VkImage                                     image_h,
@@ -1132,15 +1197,22 @@ void radv_CmdClearColorImage(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_image, image, image_h);
-	struct radv_meta_saved_state saved_state;
+	union meta_saved_state saved_state;
+	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_begin_cleari(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);

 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
 			     (const VkClearValue *) pColor,
-			     rangeCount, pRanges);
+			     rangeCount, pRanges, cs);

-	radv_meta_restore(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_end_cleari(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_restore(&saved_state.gfx, cmd_buffer);
 }

 void radv_CmdClearDepthStencilImage(
@@ -1159,7 +1231,7 @@ void radv_CmdClearDepthStencilImage(

 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
 			     (const VkClearValue *) pDepthStencil,
-			     rangeCount, pRanges);
+			     rangeCount, pRanges, false);

 	radv_meta_restore(&saved_state, cmd_buffer);
 }
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -78,13 +78,13 @@ vk_format_for_size(int bs)
 }

 static struct radv_meta_blit2d_surf
-blit_surf_for_image_level_layer(struct radv_image* image, VkImageAspectFlags aspectMask,
-				int level, int layer)
+blit_surf_for_image_level_layer(struct radv_image *image,
+				const VkImageSubresourceLayers *subres)
 {
 	VkFormat format = image->vk_format;
-	if (aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
+	if (subres->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
 		format = vk_format_depth_only(format);
-	else if (aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
+	else if (subres->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
 		format = vk_format_stencil_only(format);

 	if (!image->surface.dcc_size)
@@ -93,13 +93,18 @@ blit_surf_for_image_level_layer(struct radv_image* image, VkImageAspectFlags asp
 	return (struct radv_meta_blit2d_surf) {
 		.format = format,
 		.bs = vk_format_get_blocksize(format),
-		.level = level,
-		.layer = layer,
+		.level = subres->mipLevel,
+		.layer = subres->baseArrayLayer,
 		.image = image,
-		.aspect_mask = aspectMask,
+		.aspect_mask = subres->aspectMask,
 	};
 }

+union meta_saved_state {
+	struct radv_meta_saved_state gfx;
+	struct radv_meta_saved_compute_state compute;
+};
+
 static void
 meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
                          struct radv_buffer* buffer,
@@ -107,14 +112,18 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
                          uint32_t regionCount,
                          const VkBufferImageCopy* pRegions)
 {
-	struct radv_meta_saved_state saved_state;
+	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+	union meta_saved_state saved_state;

 	/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
 	 * VK_SAMPLE_COUNT_1_BIT."
 	 */
 	assert(image->samples == 1);

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {

@@ -150,9 +159,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf img_bsurf =
 			blit_surf_for_image_level_layer(image,
-							pRegions[r].imageSubresource.aspectMask,
-							pRegions[r].imageSubresource.mipLevel,
-							pRegions[r].imageSubresource.baseArrayLayer);
+							&pRegions[r].imageSubresource);

 		struct radv_meta_blit2d_buffer buf_bsurf = {
 			.bs = img_bsurf.bs,
@@ -174,7 +181,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,


 			/* Perform Blit */
-			radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);
+			if (cs)
+				radv_meta_buffer_to_image_cs(cmd_buffer, &buf_bsurf, &img_bsurf, 1, &rect);
+			else
+				radv_meta_blit2d(cmd_buffer, NULL, &buf_bsurf, &img_bsurf, 1, &rect);

 			/* Once we've done the blit, all of the actual information about
 			 * the image is embedded in the command buffer so we can just
@@ -190,7 +200,10 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 				slice_array++;
 		}
 	}
-	radv_meta_restore(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_end_bufimage(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_restore(&saved_state.gfx, cmd_buffer);
 }

 void radv_CmdCopyBufferToImage(
@@ -253,9 +266,8 @@ meta_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf img_info =
 			blit_surf_for_image_level_layer(image,
-							pRegions[r].imageSubresource.aspectMask,
-							pRegions[r].imageSubresource.mipLevel,
-							pRegions[r].imageSubresource.baseArrayLayer);
+							&pRegions[r].imageSubresource);
+
 		struct radv_meta_blit2d_buffer buf_info = {
 			.bs = img_info.bs,
 			.format = img_info.format,
@@ -306,19 +318,15 @@ void radv_CmdCopyImageToBuffer(
 				  regionCount, pRegions);
 }

-void radv_CmdCopyImage(
-	VkCommandBuffer                             commandBuffer,
-	VkImage                                     srcImage,
-	VkImageLayout                               srcImageLayout,
-	VkImage                                     destImage,
-	VkImageLayout                               destImageLayout,
-	uint32_t                                    regionCount,
-	const VkImageCopy*                          pRegions)
+static void
+meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
+		struct radv_image *src_image,
+		struct radv_image *dest_image,
+		uint32_t regionCount,
+		const VkImageCopy *pRegions)
 {
-	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
-	RADV_FROM_HANDLE(radv_image, src_image, srcImage);
-	RADV_FROM_HANDLE(radv_image, dest_image, destImage);
-	struct radv_meta_saved_state saved_state;
+	bool cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+	union meta_saved_state saved_state;

 	/* From the Vulkan 1.0 spec:
 	 *
@@ -326,8 +334,10 @@ void radv_CmdCopyImage(
 	 *    images, but both images must have the same number of samples.
 	 */
 	assert(src_image->samples == dest_image->samples);
-
-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_begin_itoi(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {
 		assert(pRegions[r].srcSubresource.aspectMask ==
@@ -336,14 +346,11 @@ void radv_CmdCopyImage(
 		/* Create blit surfaces */
 		struct radv_meta_blit2d_surf b_src =
 			blit_surf_for_image_level_layer(src_image,
-							pRegions[r].srcSubresource.aspectMask,
-							pRegions[r].srcSubresource.mipLevel,
-							pRegions[r].srcSubresource.baseArrayLayer);
+							&pRegions[r].srcSubresource);
+
 		struct radv_meta_blit2d_surf b_dst =
 			blit_surf_for_image_level_layer(dest_image,
-							pRegions[r].dstSubresource.aspectMask,
-							pRegions[r].dstSubresource.mipLevel,
-							pRegions[r].dstSubresource.baseArrayLayer);
+							&pRegions[r].dstSubresource);

 		/* for DCC */
 		b_src.format = b_dst.format;
@@ -384,7 +391,10 @@ void radv_CmdCopyImage(
 			rect.src_y = src_offset_el.y;

 			/* Perform Blit */
-			radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);
+			if (cs)
+				radv_meta_image_to_image_cs(cmd_buffer, &b_src, &b_dst, 1, &rect);
+			else
+				radv_meta_blit2d(cmd_buffer, &b_src, NULL, &b_dst, 1, &rect);

 			b_src.layer++;
 			b_dst.layer++;
@@ -395,5 +405,25 @@ void radv_CmdCopyImage(
 		}
 	}

-	radv_meta_restore(&saved_state, cmd_buffer);
+	if (cs)
+		radv_meta_end_itoi(cmd_buffer, &saved_state.compute);
+	else
+		radv_meta_restore(&saved_state.gfx, cmd_buffer);
+}
+
+void radv_CmdCopyImage(
+	VkCommandBuffer                             commandBuffer,
+	VkImage                                     srcImage,
+	VkImageLayout                               srcImageLayout,
+	VkImage                                     destImage,
+	VkImageLayout                               destImageLayout,
+	uint32_t                                    regionCount,
+	const VkImageCopy*                          pRegions)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	RADV_FROM_HANDLE(radv_image, src_image, srcImage);
+	RADV_FROM_HANDLE(radv_image, dest_image, destImage);
+
+	meta_copy_image(cmd_buffer, src_image, dest_image,
+			regionCount, pRegions);
 }
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -46,7 +46,7 @@ build_nir_vs(void)
 	nir_variable *v_position;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");

 	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 					 "a_position");
@@ -68,8 +68,8 @@ build_nir_fs(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_asprintf(b.shader,
-					      "meta_depth_decomp_noop_fs");
+	b.shader->info->name = ralloc_asprintf(b.shader,
+					       "meta_depth_decomp_noop_fs");

 	return b.shader;
 }
@@ -382,7 +382,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,

 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

-	for (uint32_t layer = 0; layer < subresourceRange->layerCount; layer++) {
+	for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
 		struct radv_image_view iview;

 		radv_image_view_init(&iview, cmd_buffer->device,
@@ -450,6 +450,7 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageSubresourceRange *subresourceRange)
 {
+	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
 					 cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline);
 }
@@ -458,6 +459,7 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 VkImageSubresourceRange *subresourceRange)
 {
+	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
 					 cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline);
 }
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -46,7 +46,7 @@ build_nir_vs(void)
 	nir_variable *v_position;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_fast_clear_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_fast_clear_vs");

 	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 					 "a_position");
@@ -68,7 +68,7 @@ build_nir_fs(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_asprintf(b.shader,
+	b.shader->info->name = ralloc_asprintf(b.shader,
 					      "meta_fast_clear_noop_fs");

 	return b.shader;
@@ -419,6 +419,7 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);

+	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 	radv_meta_save_pass(&saved_pass_state, cmd_buffer);
 	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);

--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -33,7 +33,6 @@
 */
 struct vertex_attrs {
 	float position[2]; /**< 3DPRIM_RECTLIST */
-	float tex_position[2];
 };

 /* passthrough vertex shader */
@@ -45,11 +44,9 @@ build_nir_vs(void)
 	nir_builder b;
 	nir_variable *a_position;
 	nir_variable *v_position;
-	nir_variable *a_tex_position;
-	nir_variable *v_tex_position;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
+	b.shader->info->name = ralloc_strdup(b.shader, "meta_resolve_vs");

 	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
 					 "a_position");
@@ -59,16 +56,7 @@ build_nir_vs(void)
 					 "gl_Position");
 	v_position->data.location = VARYING_SLOT_POS;

-	a_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-					     "a_tex_position");
-	a_tex_position->data.location = VERT_ATTRIB_GENERIC1;
-
-	v_tex_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-					     "v_tex_position");
-	v_tex_position->data.location = VARYING_SLOT_VAR0;
-
 	nir_copy_var(&b, v_position, a_position);
-	nir_copy_var(&b, v_tex_position, a_tex_position);

 	return b.shader;
 }
@@ -79,22 +67,16 @@ build_nir_fs(void)
 {
 	const struct glsl_type *vec4 = glsl_vec4_type();
 	nir_builder b;
-	nir_variable *v_tex_position; /* vec4, varying texture coordinate */
 	nir_variable *f_color; /* vec4, fragment output color */

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info.name = ralloc_asprintf(b.shader,
-					      "meta_resolve_fs");
-
-	v_tex_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-					     "v_tex_position");
-	v_tex_position->data.location = VARYING_SLOT_VAR0;
+	b.shader->info->name = ralloc_asprintf(b.shader,
+					       "meta_resolve_fs");

 	f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
 				      "f_color");
 	f_color->data.location = FRAG_RESULT_DATA0;
-
-	nir_copy_var(&b, f_color, v_tex_position);
+	nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);

 	return b.shader;
 }
@@ -198,7 +180,7 @@ create_pipeline(struct radv_device *device,
 								       .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
 							       },
 						       },
-						       .vertexAttributeDescriptionCount = 2,
+						       .vertexAttributeDescriptionCount = 1,
 						       .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
 							       {
 								       /* Position */
@@ -207,13 +189,6 @@ create_pipeline(struct radv_device *device,
 								       .format = VK_FORMAT_R32G32_SFLOAT,
 								       .offset = offsetof(struct vertex_attrs, position),
 							       },
-							       {
-								       /* Texture Coordinate */
-								       .location = 1,
-								       .binding = 0,
-								       .format = VK_FORMAT_R32G32_SFLOAT,
-								       .offset = offsetof(struct vertex_attrs, tex_position),
-							       },
 						       },
 					       },
 					       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
@@ -333,7 +308,6 @@ cleanup:

 static void
 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
-             const VkOffset2D *src_offset,
             const VkOffset2D *dest_offset,
             const VkExtent2D *resolve_extent)
 {
@@ -346,30 +320,18 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 				dest_offset->x,
 				dest_offset->y,
 			},
-			.tex_position = {
-				src_offset->x,
-				src_offset->y,
-			},
 		},
 		{
 			.position = {
 				dest_offset->x,
 				dest_offset->y + resolve_extent->height,
 			},
-			.tex_position = {
-				src_offset->x,
-				src_offset->y + resolve_extent->height,
-			},
 		},
 		{
 			.position = {
 				dest_offset->x + resolve_extent->width,
 				dest_offset->y,
 			},
-			.tex_position = {
-				src_offset->x + resolve_extent->width,
-				src_offset->y,
-			},
 		},
 	};

@@ -505,8 +467,6 @@ void radv_CmdResolveImage(
 		 */
 		const struct VkExtent3D extent =
 			radv_sanitize_image_extent(src_image->type, region->extent);
-		const struct VkOffset3D srcOffset =
-			radv_sanitize_image_offset(src_image->type, region->srcOffset);
 		const struct VkOffset3D dstOffset =
 			radv_sanitize_image_offset(dest_image->type, region->dstOffset);

@@ -587,10 +547,6 @@ void radv_CmdResolveImage(
 						      VK_SUBPASS_CONTENTS_INLINE);

 			emit_resolve(cmd_buffer,
-				     &(VkOffset2D) {
-					     .x = srcOffset.x,
-					     .y = srcOffset.y,
-				     },
 				     &(VkOffset2D) {
 					     .x = dstOffset.x,
 					     .y = dstOffset.y,
@@ -662,7 +618,6 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 		 * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
 		 */
 		emit_resolve(cmd_buffer,
-			     &(VkOffset2D) { 0, 0 },
 			     &(VkOffset2D) { 0, 0 },
 			     &(VkExtent2D) { fb->width, fb->height });
 	}
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -47,10 +47,10 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
 							     GLSL_TYPE_FLOAT);
 	snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float");
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info.name = ralloc_strdup(b.shader, name);
-	b.shader->info.cs.local_size[0] = 16;
-	b.shader->info.cs.local_size[1] = 16;
-	b.shader->info.cs.local_size[2] = 1;
+	b.shader->info->name = ralloc_strdup(b.shader, name);
+	b.shader->info->cs.local_size[0] = 16;
+	b.shader->info->cs.local_size[1] = 16;
+	b.shader->info->cs.local_size[2] = 1;

 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
 						      sampler_type, "s_tex");
@@ -64,9 +64,9 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info.cs.local_size[0],
-						b.shader->info.cs.local_size[1],
-						b.shader->info.cs.local_size[2], 0);
+						b.shader->info->cs.local_size[0],
+						b.shader->info->cs.local_size[1],
+						b.shader->info->cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

@@ -405,7 +405,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 						  .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
 						  .pImageInfo = (VkDescriptorImageInfo[]) {
 							  {
-								  .sampler = NULL,
+								  .sampler = VK_NULL_HANDLE,
 								  .imageView = radv_image_view_to_handle(&src_iview),
 								  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 							  },
@@ -420,7 +420,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 						  .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
 						  .pImageInfo = (VkDescriptorImageInfo[]) {
 							  {
-								  .sampler = NULL,
+								  .sampler = VK_NULL_HANDLE,
 								  .imageView = radv_image_view_to_handle(&dest_iview),
 								  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
 							  },
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -144,6 +144,7 @@ radv_optimize_nir(struct nir_shader *shader)
                NIR_PASS(progress, shader, nir_opt_algebraic);
                NIR_PASS(progress, shader, nir_opt_constant_folding);
                NIR_PASS(progress, shader, nir_opt_undef);
+                NIR_PASS(progress, shader, nir_opt_conditional_discard);
        } while (progress);
 }

@@ -187,24 +188,30 @@ radv_shader_compile_to_nir(struct radv_device *device,
 				assert(data + entry.size <= spec_info->pData + spec_info->dataSize);

 				spec_entries[i].id = spec_info->pMapEntries[i].constantID;
-				spec_entries[i].data = *(const uint32_t *)data;
+				if (spec_info->dataSize == 8)
+					spec_entries[i].data64 = *(const uint64_t *)data;
+				else
+					spec_entries[i].data32 = *(const uint32_t *)data;
 			}
 		}
-
+		const struct nir_spirv_supported_extensions supported_ext = {
+		};
 		entry_point = spirv_to_nir(spirv, module->size / 4,
 					   spec_entries, num_spec_entries,
-					   stage, entrypoint_name, &nir_options);
+					   stage, entrypoint_name, &supported_ext, &nir_options);
 		nir = entry_point->shader;
 		assert(nir->stage == stage);
 		nir_validate_shader(nir);

 		free(spec_entries);

-		nir_lower_returns(nir);
-		nir_validate_shader(nir);
-
-		nir_inline_functions(nir);
-		nir_validate_shader(nir);
+		/* We have to lower away local constant initializers right before we
+		 * inline functions.  That way they get properly initialized at the top
+		 * of the function and not at the top of its caller.
+		 */
+		NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local);
+		NIR_PASS_V(nir, nir_lower_returns);
+		NIR_PASS_V(nir, nir_inline_functions);

 		/* Pick off the single entrypoint that we want */
 		foreach_list_typed_safe(nir_function, func, node, &nir->functions) {
@@ -214,17 +221,18 @@ radv_shader_compile_to_nir(struct radv_device *device,
 		assert(exec_list_length(&nir->functions) == 1);
 		entry_point->name = ralloc_strdup(entry_point, "main");

-		nir_remove_dead_variables(nir, nir_var_shader_in);
-		nir_remove_dead_variables(nir, nir_var_shader_out);
-		nir_remove_dead_variables(nir, nir_var_system_value);
-		nir_validate_shader(nir);
+		NIR_PASS_V(nir, nir_remove_dead_variables,
+		           nir_var_shader_in | nir_var_shader_out | nir_var_system_value);

-		nir_lower_system_values(nir);
-		nir_validate_shader(nir);
+		/* Now that we've deleted all but the main function, we can go ahead and
+		 * lower the rest of the constant initializers.
+		 */
+		NIR_PASS_V(nir, nir_lower_constant_initializers, ~0);
+		NIR_PASS_V(nir, nir_lower_system_values);
 	}

 	/* Vulkan uses the separate-shader linking model */
-	nir->info.separate_shader = true;
+	nir->info->separate_shader = true;

 	//   nir = brw_preprocess_nir(compiler, nir);

@@ -256,6 +264,81 @@ radv_shader_compile_to_nir(struct radv_device *device,
 	return nir;
 }

+static const char *radv_get_shader_name(struct radv_shader_variant *var,
+					gl_shader_stage stage)
+{
+	switch (stage) {
+	case MESA_SHADER_VERTEX: return "Vertex Shader as VS";
+	case MESA_SHADER_FRAGMENT: return "Pixel Shader";
+	case MESA_SHADER_COMPUTE: return "Compute Shader";
+	default:
+		return "Unknown shader";
+	};
+
+}
+static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pipeline *pipeline)
+{
+	unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
+	struct radv_shader_variant *var;
+	struct ac_shader_config *conf;
+	int i;
+	FILE *file = stderr;
+	unsigned max_simd_waves = 10;
+	unsigned lds_per_wave = 0;
+
+	for (i = 0; i < MESA_SHADER_STAGES; i++) {
+		if (!pipeline->shaders[i])
+			continue;
+		var = pipeline->shaders[i];
+
+		conf = &var->config;
+
+		if (i == MESA_SHADER_FRAGMENT) {
+			lds_per_wave = conf->lds_size * lds_increment +
+				align(var->info.fs.num_interp * 48, lds_increment);
+		}
+
+		if (conf->num_sgprs) {
+			if (device->physical_device->rad_info.chip_class >= VI)
+				max_simd_waves = MIN2(max_simd_waves, 800 / conf->num_sgprs);
+			else
+				max_simd_waves = MIN2(max_simd_waves, 512 / conf->num_sgprs);
+		}
+
+		if (conf->num_vgprs)
+			max_simd_waves = MIN2(max_simd_waves, 256 / conf->num_vgprs);
+
+		/* LDS is 64KB per CU (4 SIMDs), divided into 16KB blocks per SIMD
+		 * that PS can use.
+		 */
+		if (lds_per_wave)
+			max_simd_waves = MIN2(max_simd_waves, 16384 / lds_per_wave);
+
+		fprintf(file, "\n%s:\n",
+			radv_get_shader_name(var, i));
+		if (i == MESA_SHADER_FRAGMENT) {
+			fprintf(file, "*** SHADER CONFIG ***\n"
+				"SPI_PS_INPUT_ADDR = 0x%04x\n"
+				"SPI_PS_INPUT_ENA  = 0x%04x\n",
+				conf->spi_ps_input_addr, conf->spi_ps_input_ena);
+		}
+		fprintf(file, "*** SHADER STATS ***\n"
+			"SGPRS: %d\n"
+			"VGPRS: %d\n"
+		        "Spilled SGPRs: %d\n"
+			"Spilled VGPRs: %d\n"
+			"Code Size: %d bytes\n"
+			"LDS: %d blocks\n"
+			"Scratch: %d bytes per wave\n"
+			"Max Waves: %d\n"
+			"********************\n\n\n",
+			conf->num_sgprs, conf->num_vgprs,
+			conf->spilled_sgprs, conf->spilled_vgprs, var->code_size,
+			conf->lds_size, conf->scratch_bytes_per_wave,
+			max_simd_waves);
+	}
+}
+
 void radv_shader_variant_destroy(struct radv_device *device,
                                 struct radv_shader_variant *variant)
 {
@@ -266,42 +349,19 @@ void radv_shader_variant_destroy(struct radv_device *device,
 	free(variant);
 }

-static
-struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
-                                                       struct nir_shader *shader,
-                                                       struct radv_pipeline_layout *layout,
-                                                       const union ac_shader_variant_key *key,
-						       void** code_out,
-						       unsigned *code_size_out,
-						       bool dump)
+static void radv_fill_shader_variant(struct radv_device *device,
+				     struct radv_shader_variant *variant,
+				     struct ac_shader_binary *binary,
+				     gl_shader_stage stage)
 {
-	struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
-	enum radeon_family chip_family = device->instance->physicalDevice.rad_info.family;
-	LLVMTargetMachineRef tm;
-	if (!variant)
-		return NULL;
-
-	struct ac_nir_compiler_options options = {0};
-	options.layout = layout;
-	if (key)
-		options.key = *key;
-
-	struct ac_shader_binary binary;
-
-	options.unsafe_math = env_var_as_boolean("RADV_UNSAFE_MATH", false);
-	options.family = chip_family;
-	options.chip_class = device->instance->physicalDevice.rad_info.chip_class;
-	tm = ac_create_target_machine(chip_family);
-	ac_compile_nir_shader(tm, &binary, &variant->config,
-			      &variant->info, shader, &options, dump);
-	LLVMDisposeTargetMachine(tm);
-
+	variant->code_size = binary->code_size;
 	bool scratch_enabled = variant->config.scratch_bytes_per_wave > 0;
 	unsigned vgpr_comp_cnt = 0;

 	if (scratch_enabled)
 		radv_finishme("shader scratch space");
-	switch (shader->stage) {
+
+	switch (stage) {
 	case MESA_SHADER_VERTEX:
 		variant->rsrc2 = S_00B12C_USER_SGPR(variant->info.num_user_sgprs) |
 			S_00B12C_SCRATCH_EN(scratch_enabled);
@@ -330,13 +390,47 @@ struct radv_shader_variant *radv_shader_variant_create(struct radv_device *devic
 		S_00B848_DX10_CLAMP(1) |
 		S_00B848_FLOAT_MODE(variant->config.float_mode);

-	variant->bo = device->ws->buffer_create(device->ws, binary.code_size, 256,
+	variant->bo = device->ws->buffer_create(device->ws, binary->code_size, 256,
 						RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);

 	void *ptr = device->ws->buffer_map(variant->bo);
-	memcpy(ptr, binary.code, binary.code_size);
+	memcpy(ptr, binary->code, binary->code_size);
 	device->ws->buffer_unmap(variant->bo);

+
+}
+
+static struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
+							      struct nir_shader *shader,
+							      struct radv_pipeline_layout *layout,
+							      const union ac_shader_variant_key *key,
+							      void** code_out,
+							      unsigned *code_size_out,
+							      bool dump)
+{
+	struct radv_shader_variant *variant = calloc(1, sizeof(struct radv_shader_variant));
+	enum radeon_family chip_family = device->physical_device->rad_info.family;
+	LLVMTargetMachineRef tm;
+	if (!variant)
+		return NULL;
+
+	struct ac_nir_compiler_options options = {0};
+	options.layout = layout;
+	if (key)
+		options.key = *key;
+
+	struct ac_shader_binary binary;
+
+	options.unsafe_math = !!(device->debug_flags & RADV_DEBUG_UNSAFE_MATH);
+	options.family = chip_family;
+	options.chip_class = device->physical_device->rad_info.chip_class;
+	tm = ac_create_target_machine(chip_family);
+	ac_compile_nir_shader(tm, &binary, &variant->config,
+			      &variant->info, shader, &options, dump);
+	LLVMDisposeTargetMachine(tm);
+
+	radv_fill_shader_variant(device, variant, &binary, shader->stage);
+
 	if (code_out) {
 		*code_out = binary.code;
 		*code_size_out = binary.code_size;
@@ -360,18 +454,18 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
 		      gl_shader_stage stage,
 		      const VkSpecializationInfo *spec_info,
 		      struct radv_pipeline_layout *layout,
-		      const union ac_shader_variant_key *key,
-		      bool dump)
+		      const union ac_shader_variant_key *key)
 {
 	unsigned char sha1[20];
 	struct radv_shader_variant *variant;
 	nir_shader *nir;
 	void *code = NULL;
 	unsigned code_size = 0;
+	bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);

 	if (module->nir)
-		_mesa_sha1_compute(module->nir->info.name,
-				   strlen(module->nir->info.name),
+		_mesa_sha1_compute(module->nir->info->name,
+				   strlen(module->nir->info->name),
 				   module->sha1);

 	radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key);
@@ -642,7 +736,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
 					const VkGraphicsPipelineCreateInfo *pCreateInfo,
 					uint32_t blend_enable,
 					uint32_t blend_need_alpha,
-					bool single_cb_enable)
+					bool single_cb_enable,
+					bool blend_mrt0_is_dual_src)
 {
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
@@ -664,6 +759,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,

 	blend->cb_shader_mask = si_get_cb_shader_mask(col_format);

+	if (blend_mrt0_is_dual_src)
+		col_format |= (col_format & 0xf) << 4;
 	if (!col_format)
 		col_format |= V_028714_SPI_SHADER_32_R;
 	blend->spi_shader_col_format = col_format;
@@ -715,8 +812,13 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	unsigned mode = V_028808_CB_NORMAL;
 	uint32_t blend_enable = 0, blend_need_alpha = 0;
+	bool blend_mrt0_is_dual_src = false;
 	int i;
 	bool single_cb_enable = false;
+
+	if (!vkblend)
+		return;
+
 	if (extra && extra->custom_blend_mode) {
 		single_cb_enable = true;
 		mode = extra->custom_blend_mode;
@@ -755,7 +857,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		}

 		if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
-			radv_finishme("dual source blending");
+			if (i == 0)
+				blend_mrt0_is_dual_src = true;
+
 		if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
 			srcRGB = VK_BLEND_FACTOR_ONE;
 			dstRGB = VK_BLEND_FACTOR_ONE;
@@ -797,7 +901,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
 		blend->cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);

 	radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo,
-						blend_enable, blend_need_alpha, single_cb_enable);
+						blend_enable, blend_need_alpha, single_cb_enable, blend_mrt0_is_dual_src);
 }

 static uint32_t si_translate_stencil_op(enum VkStencilOp op)
@@ -930,11 +1034,16 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
 	const VkPipelineMultisampleStateCreateInfo *vkms = pCreateInfo->pMultisampleState;
 	struct radv_blend_state *blend = &pipeline->graphics.blend;
 	struct radv_multisample_state *ms = &pipeline->graphics.ms;
-	unsigned num_tile_pipes = pipeline->device->instance->physicalDevice.rad_info.num_tile_pipes;
+	unsigned num_tile_pipes = pipeline->device->physical_device->rad_info.num_tile_pipes;
 	int ps_iter_samples = 1;
 	uint32_t mask = 0xffff;

 	ms->num_samples = vkms->rasterizationSamples;
+
+	if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.force_persample) {
+		ps_iter_samples = vkms->rasterizationSamples;
+	}
+
 	ms->pa_sc_line_cntl = S_028BDC_DX10_DIAMOND_TEST_ENA(1);
 	ms->pa_sc_aa_config = 0;
 	ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
@@ -1069,18 +1178,27 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,

 	struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;

-	dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
-	if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
-		typed_memcpy(dynamic->viewport.viewports,
-			     pCreateInfo->pViewportState->pViewports,
-			     pCreateInfo->pViewportState->viewportCount);
-	}
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pViewportState is [...] NULL if the pipeline
+	 *    has rasterization disabled.
+	 */
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
+		assert(pCreateInfo->pViewportState);

-	dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
-	if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
-		typed_memcpy(dynamic->scissor.scissors,
-			     pCreateInfo->pViewportState->pScissors,
-			     pCreateInfo->pViewportState->scissorCount);
+		dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
+		if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
+			typed_memcpy(dynamic->viewport.viewports,
+				     pCreateInfo->pViewportState->pViewports,
+				     pCreateInfo->pViewportState->viewportCount);
+		}
+
+		dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
+		if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
+			typed_memcpy(dynamic->scissor.scissors,
+				     pCreateInfo->pViewportState->pScissors,
+				     pCreateInfo->pViewportState->scissorCount);
+		}
 	}

 	if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
@@ -1098,7 +1216,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 			pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
 	}

-	if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
+	/* Section 9.2 of the Vulkan 1.0.15 spec says:
+	 *
+	 *    pColorBlendState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is
+	 *    created against does not use any color attachments.
+	 */
+	bool uses_color_att = false;
+	for (unsigned i = 0; i < subpass->color_count; ++i) {
+		if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
+			uses_color_att = true;
+			break;
+		}
+	}
+
+	if (uses_color_att && states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
 		assert(pCreateInfo->pColorBlendState);
 		typed_memcpy(dynamic->blend_constants,
 			     pCreateInfo->pColorBlendState->blendConstants, 4);
@@ -1110,14 +1242,17 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 	 * no need to override the depthstencil defaults in
 	 * radv_pipeline::dynamic_state when there is no depthstencil attachment.
 	 *
-	 * From the Vulkan spec (20 Oct 2015, git-aa308cb):
+	 * Section 9.2 of the Vulkan 1.0.15 spec says:
 	 *
-	 *    pDepthStencilState [...] may only be NULL if renderPass and subpass
-	 *    specify a subpass that has no depth/stencil attachment.
+	 *    pDepthStencilState is [...] NULL if the pipeline has rasterization
+	 *    disabled or if the subpass of the render pass the pipeline is created
+	 *    against does not use a depth/stencil attachment.
 	 */
-	if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+	if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
+	    subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+		assert(pCreateInfo->pDepthStencilState);
+
 		if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->depth_bounds.min =
 				pCreateInfo->pDepthStencilState->minDepthBounds;
 			dynamic->depth_bounds.max =
@@ -1125,7 +1260,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_compare_mask.front =
 				pCreateInfo->pDepthStencilState->front.compareMask;
 			dynamic->stencil_compare_mask.back =
@@ -1133,7 +1267,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_write_mask.front =
 				pCreateInfo->pDepthStencilState->front.writeMask;
 			dynamic->stencil_write_mask.back =
@@ -1141,7 +1274,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 		}

 		if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
-			assert(pCreateInfo->pDepthStencilState);
 			dynamic->stencil_reference.front =
 				pCreateInfo->pDepthStencilState->front.reference;
 			dynamic->stencil_reference.back =
@@ -1181,7 +1313,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 {
 	struct radv_shader_module fs_m = {0};

-	bool dump = getenv("RADV_DUMP_SHADERS");
 	if (alloc == NULL)
 		alloc = &device->alloc;

@@ -1208,7 +1339,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 					       pStages[MESA_SHADER_VERTEX]->pName,
 					       MESA_SHADER_VERTEX,
 					       pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
-					       pipeline->layout, &key, dump);
+					       pipeline->layout, &key);

 		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
 	}
@@ -1216,7 +1347,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 	if (!modules[MESA_SHADER_FRAGMENT]) {
 		nir_builder fs_b;
 		nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
-		fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
+		fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "noop_fs");
 		fs_m.nir = fs_b.shader;
 		modules[MESA_SHADER_FRAGMENT] = &fs_m;
 	}
@@ -1233,7 +1364,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 					       stage ? stage->pName : "main",
 					       MESA_SHADER_FRAGMENT,
 					       stage ? stage->pSpecializationInfo : NULL,
-					       pipeline->layout, &key, dump);
+					       pipeline->layout, &key);
 		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_FRAGMENT);
 	}

@@ -1285,6 +1416,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 		pipeline->binding_stride[desc->binding] = desc->stride;
 	}

+	if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
+		radv_dump_pipeline_stats(device, pipeline);
+	}
+
 	return VK_SUCCESS;
 }

@@ -1332,20 +1467,18 @@ VkResult radv_CreateGraphicsPipelines(
 	unsigned i = 0;

 	for (; i < count; i++) {
-		result = radv_graphics_pipeline_create(_device,
-						       pipelineCache,
-						       &pCreateInfos[i],
-						       NULL, pAllocator, &pPipelines[i]);
-		if (result != VK_SUCCESS) {
-			for (unsigned j = 0; j < i; j++) {
-				radv_DestroyPipeline(_device, pPipelines[j], pAllocator);
-			}
-
-			return result;
+		VkResult r;
+		r = radv_graphics_pipeline_create(_device,
+						  pipelineCache,
+						  &pCreateInfos[i],
+						  NULL, pAllocator, &pPipelines[i]);
+		if (r != VK_SUCCESS) {
+			result = r;
+			pPipelines[i] = VK_NULL_HANDLE;
 		}
 	}

-	return VK_SUCCESS;
+	return result;
 }

 static VkResult radv_compute_pipeline_create(
@@ -1359,7 +1492,6 @@ static VkResult radv_compute_pipeline_create(
 	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
 	RADV_FROM_HANDLE(radv_shader_module, module, pCreateInfo->stage.module);
 	struct radv_pipeline *pipeline;
-	bool dump = getenv("RADV_DUMP_SHADERS");

 	pipeline = vk_alloc2(&device->alloc, pAllocator, sizeof(*pipeline), 8,
 			       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -1375,9 +1507,13 @@ static VkResult radv_compute_pipeline_create(
 				       pCreateInfo->stage.pName,
 				       MESA_SHADER_COMPUTE,
 				       pCreateInfo->stage.pSpecializationInfo,
-				       pipeline->layout, NULL, dump);
+				       pipeline->layout, NULL);

 	*pPipeline = radv_pipeline_to_handle(pipeline);
+
+	if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
+		radv_dump_pipeline_stats(device, pipeline);
+	}
 	return VK_SUCCESS;
 }
 VkResult radv_CreateComputePipelines(
@@ -1392,17 +1528,15 @@ VkResult radv_CreateComputePipelines(

 	unsigned i = 0;
 	for (; i < count; i++) {
-		result = radv_compute_pipeline_create(_device, pipelineCache,
-						      &pCreateInfos[i],
-						      pAllocator, &pPipelines[i]);
-		if (result != VK_SUCCESS) {
-			for (unsigned j = 0; j < i; j++) {
-				radv_DestroyPipeline(_device, pPipelines[j], pAllocator);
-			}
-
-			return result;
+		VkResult r;
+		r = radv_compute_pipeline_create(_device, pipelineCache,
+						 &pCreateInfos[i],
+						 pAllocator, &pPipelines[i]);
+		if (r != VK_SUCCESS) {
+			result = r;
+			pPipelines[i] = VK_NULL_HANDLE;
 		}
 	}

-	return VK_SUCCESS;
+	return result;
 }
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -57,7 +57,7 @@ radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
 	/* We don't consider allocation failure fatal, we just start with a 0-sized
 	 * cache. */
 	if (cache->hash_table == NULL ||
-	    !env_var_as_boolean("RADV_ENABLE_PIPELINE_CACHE", true))
+	    (device->debug_flags & RADV_DEBUG_NO_CACHE))
 		cache->table_size = 0;
 	else
 		memset(cache->hash_table, 0, byte_size);
@@ -309,7 +309,6 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
 {
 	struct radv_device *device = cache->device;
 	struct cache_header header;
-	uint8_t uuid[VK_UUID_SIZE];

 	if (size < sizeof(header))
 		return;
@@ -320,10 +319,9 @@ radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
 		return;
 	if (header.vendor_id != 0x1002)
 		return;
-	if (header.device_id != device->instance->physicalDevice.rad_info.pci_id)
+	if (header.device_id != device->physical_device->rad_info.pci_id)
 		return;
-	radv_device_get_cache_uuid(uuid);
-	if (memcmp(header.uuid, uuid, VK_UUID_SIZE) != 0)
+	if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0)
 		return;

 	char *end = (void *) data + size;
@@ -421,8 +419,8 @@ VkResult radv_GetPipelineCacheData(
 	header->header_size = sizeof(*header);
 	header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
 	header->vendor_id = 0x1002;
-	header->device_id = device->instance->physicalDevice.rad_info.pci_id;
-	radv_device_get_cache_uuid(header->uuid);
+	header->device_id = device->physical_device->rad_info.pci_id;
+	memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
 	p += header->header_size;

 	struct cache_entry *entry;
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -79,13 +79,39 @@ typedef uint32_t xcb_window_t;
 #define MAX_SCISSORS    16
 #define MAX_PUSH_CONSTANTS_SIZE 128
 #define MAX_DYNAMIC_BUFFERS 16
-#define MAX_IMAGES 8
-#define MAX_SAMPLES_LOG2 4 /* SKL supports 16 samples */
+#define MAX_SAMPLES_LOG2 4
 #define NUM_META_FS_KEYS 11
+#define RADV_MAX_DRM_DEVICES 8

 #define NUM_DEPTH_CLEAR_PIPELINES 3

-#define radv_noreturn __attribute__((__noreturn__))
+enum radv_mem_heap {
+	RADV_MEM_HEAP_VRAM,
+	RADV_MEM_HEAP_VRAM_CPU_ACCESS,
+	RADV_MEM_HEAP_GTT,
+	RADV_MEM_HEAP_COUNT
+};
+
+enum radv_mem_type {
+	RADV_MEM_TYPE_VRAM,
+	RADV_MEM_TYPE_GTT_WRITE_COMBINE,
+	RADV_MEM_TYPE_VRAM_CPU_ACCESS,
+	RADV_MEM_TYPE_GTT_CACHED,
+	RADV_MEM_TYPE_COUNT
+};
+
+
+enum {
+	RADV_DEBUG_FAST_CLEARS       =   0x1,
+	RADV_DEBUG_NO_DCC            =   0x2,
+	RADV_DEBUG_DUMP_SHADERS      =   0x4,
+	RADV_DEBUG_NO_CACHE          =   0x8,
+	RADV_DEBUG_DUMP_SHADER_STATS =  0x10,
+	RADV_DEBUG_NO_HIZ            =  0x20,
+	RADV_DEBUG_NO_COMPUTE_QUEUE  =  0x40,
+	RADV_DEBUG_UNSAFE_MATH       =  0x80,
+};
+
 #define radv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))

 static inline uint32_t
@@ -173,20 +199,12 @@ radv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)
 	     __dword &= ~(1 << (b)))

 #define typed_memcpy(dest, src, count) ({				\
-			static_assert(sizeof(*src) == sizeof(*dest), ""); \
+			STATIC_ASSERT(sizeof(*src) == sizeof(*dest)); \
 			memcpy((dest), (src), (count) * sizeof(*(src))); \
 		})

 #define zero(x) (memset(&(x), 0, sizeof(x)))

-/* Define no kernel as 1, since that's an illegal offset for a kernel */
-#define NO_KERNEL 1
-
-struct radv_common {
-	VkStructureType                             sType;
-	const void*                                 pNext;
-};
-
 /* Whenever we generate an error, pass it through this function. Useful for
 * debugging, where we can break on it. Only call at error site, not when
 * propagating errors. Might be useful to plug in a stack trace here.
@@ -211,7 +229,13 @@ void radv_loge_v(const char *format, va_list va);
 * Print a FINISHME message, including its source location.
 */
 #define radv_finishme(format, ...)					\
-	__radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__);
+	do { \
+		static bool reported = false; \
+		if (!reported) { \
+			__radv_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \
+			reported = true; \
+		} \
+	} while (0)

 /* A non-fatal assert.  Useful for debugging. */
 #ifdef DEBUG
@@ -223,9 +247,6 @@ void radv_loge_v(const char *format, va_list va);
 #define radv_assert(x)
 #endif

-void radv_abortf(const char *format, ...) radv_noreturn radv_printflike(1, 2);
-void radv_abortfv(const char *format, va_list va) radv_noreturn;
-
 #define stub_return(v)					\
 	do {						\
 		radv_finishme("stub %s", __func__);	\
@@ -243,6 +264,11 @@ void *radv_lookup_entrypoint(const char *name);

 extern struct radv_dispatch_table dtable;

+struct radv_extensions {
+	VkExtensionProperties       *ext_array;
+	uint32_t                    num_ext;
+};
+
 struct radv_physical_device {
 	VK_LOADER_DATA                              _loader_data;

@@ -250,15 +276,12 @@ struct radv_physical_device {

 	struct radeon_winsys *ws;
 	struct radeon_info rad_info;
-	uint32_t                                    chipset_id;
 	char                                        path[20];
 	const char *                                name;
-	uint64_t                                    aperture_size;
-	int                                         cmd_parser_version;
-	uint32_t                    pci_vendor_id;
-	uint32_t                    pci_device_id;
+	uint8_t                                     uuid[VK_UUID_SIZE];

 	struct wsi_device                       wsi_device;
+	struct radv_extensions                      extensions;
 };

 struct radv_instance {
@@ -268,7 +291,9 @@ struct radv_instance {

 	uint32_t                                    apiVersion;
 	int                                         physicalDeviceCount;
-	struct radv_physical_device                  physicalDevice;
+	struct radv_physical_device                 physicalDevices[RADV_MAX_DRM_DEVICES];
+
+	uint64_t debug_flags;
 };

 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -324,11 +349,9 @@ struct radv_meta_state {
 		VkRenderPass render_pass[NUM_META_FS_KEYS];
 		struct radv_pipeline *color_pipelines[NUM_META_FS_KEYS];

-		VkRenderPass depth_only_rp[NUM_DEPTH_CLEAR_PIPELINES];
+		VkRenderPass depthstencil_rp;
 		struct radv_pipeline *depth_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkRenderPass stencil_only_rp[NUM_DEPTH_CLEAR_PIPELINES];
 		struct radv_pipeline *stencil_only_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
-		VkRenderPass depthstencil_rp[NUM_DEPTH_CLEAR_PIPELINES];
 		struct radv_pipeline *depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
 	} clear[1 + MAX_SAMPLES_LOG2];

@@ -382,6 +405,16 @@ struct radv_meta_state {
 		VkDescriptorSetLayout                     img_ds_layout;
 		VkPipeline pipeline;
 	} btoi;
+	struct {
+		VkPipelineLayout                          img_p_layout;
+		VkDescriptorSetLayout                     img_ds_layout;
+		VkPipeline pipeline;
+	} itoi;
+	struct {
+		VkPipelineLayout                          img_p_layout;
+		VkDescriptorSetLayout                     img_ds_layout;
+		VkPipeline pipeline;
+	} cleari;

 	struct {
 		VkPipeline                                pipeline;
@@ -419,12 +452,21 @@ struct radv_meta_state {
 	} buffer;
 };

+/* queue types */
+#define RADV_QUEUE_GENERAL 0
+#define RADV_QUEUE_COMPUTE 1
+#define RADV_QUEUE_TRANSFER 2
+
+#define RADV_MAX_QUEUE_FAMILIES 3
+
+enum ring_type radv_queue_family_to_ring(int f);
+
 struct radv_queue {
 	VK_LOADER_DATA                              _loader_data;
-
 	struct radv_device *                         device;
-
-	struct radv_state_pool *                     pool;
+	struct radeon_winsys_ctx                    *hw_ctx;
+	int queue_family_index;
+	int queue_idx;
 };

 struct radv_device {
@@ -434,14 +476,14 @@ struct radv_device {

 	struct radv_instance *                       instance;
 	struct radeon_winsys *ws;
-	struct radeon_winsys_ctx *hw_ctx;

 	struct radv_meta_state                       meta_state;
-	struct radv_queue                            queue;
-	struct radeon_winsys_cs *empty_cs;

-	bool allow_fast_clears;
-	bool allow_dcc;
+	struct radv_queue *queues[RADV_MAX_QUEUE_FAMILIES];
+	int queue_count[RADV_MAX_QUEUE_FAMILIES];
+	struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
+
+	uint64_t debug_flags;

 	/* MSAA sample locations.
 	 * The first index is the sample index.
@@ -451,9 +493,12 @@ struct radv_device {
 	float sample_locations_4x[4][2];
 	float sample_locations_8x[8][2];
 	float sample_locations_16x[16][2];
-};

-void radv_device_get_cache_uuid(void *uuid);
+	struct radeon_winsys_bo                      *trace_bo;
+	uint32_t                                     *trace_id_ptr;
+
+	struct radv_physical_device                  *physical_device;
+};

 struct radv_device_memory {
 	struct radeon_winsys_bo                      *bo;
@@ -642,10 +687,14 @@ struct radv_cmd_state {
 	enum radv_cmd_flush_bits                     flush_bits;
 	unsigned                                     active_occlusion_queries;
 	float					     offset_scale;
+	uint32_t                                      descriptors_dirty;
+	uint32_t                                      trace_id;
 };
+
 struct radv_cmd_pool {
 	VkAllocationCallbacks                        alloc;
 	struct list_head                             cmd_buffers;
+	uint32_t queue_family_index;
 };

 struct radv_cmd_buffer_upload {
@@ -668,6 +717,7 @@ struct radv_cmd_buffer {
 	VkCommandBufferLevel                         level;
 	struct radeon_winsys_cs *cs;
 	struct radv_cmd_state state;
+	uint32_t queue_family_index;

 	uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
 	uint32_t dynamic_buffers[16 * MAX_DYNAMIC_BUFFERS];
@@ -680,6 +730,10 @@ struct radv_cmd_buffer {

 struct radv_image;

+bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer);
+
+void si_init_compute(struct radv_physical_device *physical_device,
+		     struct radv_cmd_buffer *cmd_buffer);
 void si_init_config(struct radv_physical_device *physical_device,
 		    struct radv_cmd_buffer *cmd_buffer);
 void si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
@@ -729,6 +783,7 @@ void radv_set_color_clear_regs(struct radv_cmd_buffer *cmd_buffer,
 void radv_fill_buffer(struct radv_cmd_buffer *cmd_buffer,
 		      struct radeon_winsys_bo *bo,
 		      uint64_t offset, uint64_t size, uint32_t value);
+void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer);

 /*
 * Takes x,y,z as exact numbers of invocations, instead of blocks.
@@ -794,6 +849,7 @@ struct radv_shader_variant {
 	struct ac_shader_variant_info info;
 	unsigned rsrc1;
 	unsigned rsrc2;
+	uint32_t code_size;
 };

 struct radv_depth_stencil_state {
@@ -933,10 +989,6 @@ struct radv_cmask_info {
 	uint64_t offset;
 	uint64_t size;
 	unsigned alignment;
-	unsigned pitch;
-	unsigned height;
-	unsigned xalign;
-	unsigned yalign;
 	unsigned slice_tile_max;
 	unsigned base_address_reg;
 };
@@ -967,6 +1019,9 @@ struct radv_image {
 	VkDeviceSize size;
 	uint32_t alignment;

+	bool exclusive;
+	unsigned queue_family_mask;
+
 	/* Set when bound */
 	struct radeon_winsys_bo *bo;
 	VkDeviceSize offset;
@@ -987,8 +1042,13 @@ bool radv_layout_is_htile_compressed(const struct radv_image *image,
                                     VkImageLayout layout);
 bool radv_layout_can_expclear(const struct radv_image *image,
                              VkImageLayout layout);
-bool radv_layout_has_cmask(const struct radv_image *image,
-			   VkImageLayout layout);
+bool radv_layout_can_fast_clear(const struct radv_image *image,
+			        VkImageLayout layout,
+			        unsigned queue_mask);
+
+
+unsigned radv_image_queue_family_mask(const struct radv_image *image, int family);
+
 static inline uint32_t
 radv_get_layerCount(const struct radv_image *image,
 		    const VkImageSubresourceRange *range)
@@ -1206,6 +1266,13 @@ void radv_initialise_cmask(struct radv_cmd_buffer *cmd_buffer,
 			   struct radv_image *image, uint32_t value);
 void radv_initialize_dcc(struct radv_cmd_buffer *cmd_buffer,
 			 struct radv_image *image, uint32_t value);
+
+struct radv_fence {
+	struct radeon_winsys_fence *fence;
+	bool submitted;
+	bool signalled;
+};
+
 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
 								\
 	static inline struct __radv_type *			\
@@ -1263,20 +1330,4 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler)
 RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule)

-#define RADV_DEFINE_STRUCT_CASTS(__radv_type, __VkType)			\
-									\
-	static inline const __VkType *					\
-	__radv_type ## _to_ ## __VkType(const struct __radv_type *__radv_obj) \
-	{								\
-		return (const __VkType *) __radv_obj;			\
-	}
-
-#define RADV_COMMON_TO_STRUCT(__VkType, __vk_name, __common_name)	\
-	const __VkType *__vk_name = radv_common_to_ ## __VkType(__common_name)
-
-RADV_DEFINE_STRUCT_CASTS(radv_common, VkMemoryBarrier)
-RADV_DEFINE_STRUCT_CASTS(radv_common, VkBufferMemoryBarrier)
-RADV_DEFINE_STRUCT_CASTS(radv_common, VkImageMemoryBarrier)
-
-
 #endif /* RADV_PRIVATE_H */
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -35,10 +35,10 @@

 static unsigned get_max_db(struct radv_device *device)
 {
-	unsigned num_db = device->instance->physicalDevice.rad_info.num_render_backends;
-	unsigned rb_mask = device->instance->physicalDevice.rad_info.enabled_rb_mask;
+	unsigned num_db = device->physical_device->rad_info.num_render_backends;
+	MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;

-	if (device->instance->physicalDevice.rad_info.chip_class == SI)
+	if (device->physical_device->rad_info.chip_class == SI)
 		num_db = 8;
 	else
 		num_db = MAX2(8, num_db);
@@ -217,7 +217,7 @@ void radv_CmdCopyQueryPoolResults(
 		uint64_t local_src_va = va  + query * pool->stride;
 		unsigned elem_size = (flags & VK_QUERY_RESULT_64_BIT) ? 8 : 4;

-		unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 26);
+		MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 26);

 		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
 			/* TODO, not sure if there is any case where we won't always be ready yet */
@@ -387,6 +387,7 @@ void radv_CmdWriteTimestamp(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_query_pool, pool, queryPool);
+	bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
 	struct radeon_winsys_cs *cs = cmd_buffer->cs;
 	uint64_t va = cmd_buffer->device->ws->buffer_get_va(pool->bo);
 	uint64_t avail_va = va + pool->availability_offset + 4 * query;
@@ -394,17 +395,27 @@ void radv_CmdWriteTimestamp(

 	cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5);

-	unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 11);
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);

-	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-	radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
-	radeon_emit(cs, query_va);
-	radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
-	radeon_emit(cs, 0);
-	radeon_emit(cs, 0);
+	if (mec) {
+		radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
+		radeon_emit(cs, 3 << 29);
+		radeon_emit(cs, query_va);
+		radeon_emit(cs, query_va >> 32);
+		radeon_emit(cs, 0);
+		radeon_emit(cs, 0);
+	} else {
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+		radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
+		radeon_emit(cs, query_va);
+		radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, 0);
+	}

 	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(V_370_MEMORY_SYNC) |
+	radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
 		    S_370_WR_CONFIRM(1) |
 		    S_370_ENGINE_SEL(V_370_ME));
 	radeon_emit(cs, avail_va);
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -85,11 +85,13 @@ struct radeon_info {
 	uint32_t                    gart_page_size;
 	uint64_t                    gart_size;
 	uint64_t                    vram_size;
+	uint64_t                    visible_vram_size;
 	bool                        has_dedicated_vram;
 	bool                     has_virtual_memory;
 	bool                        gfx_ib_pad_with_type2;
-	bool                     has_sdma;
 	bool                     has_uvd;
+	uint32_t                    sdma_rings;
+	uint32_t                    compute_rings;
 	uint32_t                    vce_fw_version;
 	uint32_t                    vce_harvest_config;
 	uint32_t                    clock_crystal_freq;
@@ -251,6 +253,7 @@ struct radeon_bo_metadata {

 struct radeon_winsys_bo;
 struct radeon_winsys_fence;
+struct radeon_winsys_sem;

 struct radeon_winsys {
 	void (*destroy)(struct radeon_winsys *ws);
@@ -284,7 +287,8 @@ struct radeon_winsys {
 	struct radeon_winsys_ctx *(*ctx_create)(struct radeon_winsys *ws);
 	void (*ctx_destroy)(struct radeon_winsys_ctx *ctx);

-	bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx);
+	bool (*ctx_wait_idle)(struct radeon_winsys_ctx *ctx,
+	                      enum ring_type ring_type, int ring_index);

 	struct radeon_winsys_cs *(*cs_create)(struct radeon_winsys *ws,
 					      enum ring_type ring_type);
@@ -298,8 +302,13 @@ struct radeon_winsys {
 	void (*cs_grow)(struct radeon_winsys_cs * cs, size_t min_size);

 	int (*cs_submit)(struct radeon_winsys_ctx *ctx,
+			 int queue_index,
 			 struct radeon_winsys_cs **cs_array,
 			 unsigned cs_count,
+			 struct radeon_winsys_sem **wait_sem,
+			 unsigned wait_sem_count,
+			 struct radeon_winsys_sem **signal_sem,
+			 unsigned signal_sem_count,
 			 bool can_patch,
 			 struct radeon_winsys_fence *fence);

@@ -310,6 +319,8 @@ struct radeon_winsys {
 	void (*cs_execute_secondary)(struct radeon_winsys_cs *parent,
 				    struct radeon_winsys_cs *child);

+	void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
+
 	int (*surface_init)(struct radeon_winsys *ws,
 			    struct radeon_surf *surf);

@@ -322,6 +333,10 @@ struct radeon_winsys {
 			   struct radeon_winsys_fence *fence,
 			   bool absolute,
 			   uint64_t timeout);
+
+	struct radeon_winsys_sem *(*create_sem)(struct radeon_winsys *ws);
+	void (*destroy_sem)(struct radeon_winsys_sem *sem);
+
 };

 static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
--- a/src/amd/vulkan/radv_util.c
+++ b/src/amd/vulkan/radv_util.c
@@ -65,25 +65,6 @@ void radv_printflike(3, 4)
 	fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
 }

-void radv_noreturn radv_printflike(1, 2)
-	radv_abortf(const char *format, ...)
-{
-	va_list va;
-
-	va_start(va, format);
-	radv_abortfv(format, va);
-	va_end(va);
-}
-
-void radv_noreturn
-radv_abortfv(const char *format, va_list va)
-{
-	fprintf(stderr, "vk: error: ");
-	vfprintf(stderr, format, va);
-	fprintf(stderr, "\n");
-	abort();
-}
-
 VkResult
 __vk_errorf(VkResult error, const char *file, int line, const char *format, ...)
 {
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -75,7 +75,7 @@ void radv_DestroySurfaceKHR(
 	const VkAllocationCallbacks*                 pAllocator)
 {
 	RADV_FROM_HANDLE(radv_instance, instance, _instance);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);

 	vk_free2(&instance->alloc, pAllocator, surface);
 }
@@ -87,7 +87,7 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
 	VkBool32*                                   pSupported)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_support(surface, &device->wsi_device,
@@ -101,7 +101,7 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
 	VkSurfaceCapabilitiesKHR*                   pSurfaceCapabilities)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_capabilities(surface, pSurfaceCapabilities);
@@ -114,7 +114,7 @@ VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
 	VkSurfaceFormatKHR*                         pSurfaceFormats)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_formats(surface, &device->wsi_device, pSurfaceFormatCount,
@@ -128,7 +128,7 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
 	VkPresentModeKHR*                           pPresentModes)
 {
 	RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
 	struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];

 	return iface->get_present_modes(surface, pPresentModeCount,
@@ -249,9 +249,9 @@ VkResult radv_CreateSwapchainKHR(
 	VkSwapchainKHR*                              pSwapchain)
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
-	RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
+	ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
 	struct wsi_interface *iface =
-		device->instance->physicalDevice.wsi_device.wsi[surface->platform];
+		device->physical_device->wsi_device.wsi[surface->platform];
 	struct wsi_swapchain *swapchain;
 	const VkAllocationCallbacks *alloc;
 	if (pAllocator)
@@ -259,7 +259,7 @@ VkResult radv_CreateSwapchainKHR(
 	else
 		alloc = &device->alloc;
 	VkResult result = iface->create_swapchain(surface, _device,
-						  &device->instance->physicalDevice.wsi_device,
+						  &device->physical_device->wsi_device,
 						  pCreateInfo,
 						  alloc, &radv_wsi_image_fns,
 						  &swapchain);
@@ -288,6 +288,9 @@ void radv_DestroySwapchainKHR(
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
 	const VkAllocationCallbacks *alloc;

+	if (!_swapchain)
+		return;
+
 	if (pAllocator)
 		alloc = pAllocator;
 	else
@@ -318,13 +321,21 @@ VkResult radv_AcquireNextImageKHR(
 	VkSwapchainKHR                               _swapchain,
 	uint64_t                                     timeout,
 	VkSemaphore                                  semaphore,
-	VkFence                                      fence,
+	VkFence                                      _fence,
 	uint32_t*                                    pImageIndex)
 {
 	RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
+	RADV_FROM_HANDLE(radv_fence, fence, _fence);

-	return swapchain->acquire_next_image(swapchain, timeout, semaphore,
-					     pImageIndex);
+	VkResult result = swapchain->acquire_next_image(swapchain, timeout, semaphore,
+	                                                pImageIndex);
+
+	if (fence && result == VK_SUCCESS) {
+		fence->submitted = true;
+		fence->signalled = true;
+	}
+
+	return result;
 }

 VkResult radv_QueuePresentKHR(
@@ -351,7 +362,15 @@ VkResult radv_QueuePresentKHR(
 					 1, &swapchain->fences[0]);
 		}

-		radv_QueueSubmit(_queue, 0, NULL, swapchain->fences[0]);
+		RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
+		struct radeon_winsys_fence *base_fence = fence->fence;
+		struct radeon_winsys_ctx *ctx = queue->hw_ctx;
+		queue->device->ws->cs_submit(ctx, queue->queue_idx,
+					     &queue->device->empty_cs[queue->queue_family_index],
+					     1,
+					     (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
+					     pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
+		fence->submitted = true;

 		result = swapchain->queue_present(swapchain,
 						  pPresentInfo->pImageIndices[i]);
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -170,10 +170,11 @@ si_write_harvested_raster_configs(struct radv_physical_device *physical_device,
 				       S_030800_INSTANCE_BROADCAST_WRITES(1));
 }

-static void
+void
 si_init_compute(struct radv_physical_device *physical_device,
-                struct radeon_winsys_cs *cs)
+                struct radv_cmd_buffer *cmd_buffer)
 {
+	struct radeon_winsys_cs *cs = cmd_buffer->cs;
 	radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
 	radeon_emit(cs, 0);
 	radeon_emit(cs, 0);
@@ -419,7 +420,7 @@ void si_init_config(struct radv_physical_device *physical_device,
 	if (physical_device->rad_info.family == CHIP_STONEY)
 		radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);

-	si_init_compute(physical_device, cs);
+	si_init_compute(physical_device, cmd_buffer);
 }

 static void
@@ -479,11 +480,11 @@ si_write_viewport(struct radeon_winsys_cs *cs, int first_vp,
 		radeon_emit(cs, fui(translate[2]));
 	}

+	radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
+				   first_vp * 4 * 2, count * 2);
 	for (i = 0; i < count; i++) {
 		float zmin = MIN2(viewports[i].minDepth, viewports[i].maxDepth);
 		float zmax = MAX2(viewports[i].minDepth, viewports[i].maxDepth);
-		radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
-					   first_vp * 4 * 2, count * 2);
 		radeon_emit(cs, fui(zmin));
 		radeon_emit(cs, fui(zmax));
 	}
@@ -510,8 +511,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first,
 uint32_t
 si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
 {
-	enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
-	struct radeon_info *info = &cmd_buffer->device->instance->physicalDevice.rad_info;
+	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
+	struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
 	unsigned prim = cmd_buffer->state.pipeline->graphics.prim;
 	unsigned primgroup_size = 128; /* recommended without a GS */
 	unsigned max_primgroup_in_wave = 2;
@@ -598,8 +599,18 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer)
 void
 si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 {
-	enum chip_class chip_class = cmd_buffer->device->instance->physicalDevice.rad_info.chip_class;
+	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
 	unsigned cp_coher_cntl = 0;
+	bool is_compute = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE;
+
+	if (is_compute)
+		cmd_buffer->state.flush_bits &= ~(RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+	                                          RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
+	                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+	                                          RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
+	                                          RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
+	                                          RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
+	                                          RADV_CMD_FLAG_VGT_FLUSH);

 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);

@@ -627,7 +638,7 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 			S_0085F0_CB7_DEST_BASE_ENA(1);

 		/* Necessary for DCC */
-		if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= VI) {
+		if (cmd_buffer->device->physical_device->rad_info.chip_class >= VI) {
 			radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 			radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
 			                            EVENT_INDEX(5));
@@ -678,7 +689,8 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 	/* Make sure ME is idle (it executes most packets) before continuing.
 	 * This prevents read-after-write hazards between PFP and ME.
 	 */
-	if (cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
+	if ((cp_coher_cntl || (cmd_buffer->state.flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) &&
+	    !radv_cmd_buffer_uses_mec(cmd_buffer)) {
 		radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cmd_buffer->cs, 0);
 	}
@@ -687,14 +699,27 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 	 * Therefore, it should be last. Done in PFP.
 	 */
 	if (cp_coher_cntl) {
-		/* ACQUIRE_MEM is only required on a compute ring. */
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
-		radeon_emit(cmd_buffer->cs, cp_coher_cntl);   /* CP_COHER_CNTL */
-		radeon_emit(cmd_buffer->cs, 0xffffffff);      /* CP_COHER_SIZE */
-		radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE */
-		radeon_emit(cmd_buffer->cs, 0x0000000A);      /* POLL_INTERVAL */
+		if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
+			radeon_emit(cmd_buffer->cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
+			                            PKT3_SHADER_TYPE_S(1));
+			radeon_emit(cmd_buffer->cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+			radeon_emit(cmd_buffer->cs, 0xffffffff);      /* CP_COHER_SIZE */
+			radeon_emit(cmd_buffer->cs, 0xff);            /* CP_COHER_SIZE_HI */
+			radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE */
+			radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE_HI */
+			radeon_emit(cmd_buffer->cs, 0x0000000A);      /* POLL_INTERVAL */
+		} else {
+			/* ACQUIRE_MEM is only required on a compute ring. */
+			radeon_emit(cmd_buffer->cs, PKT3(PKT3_SURFACE_SYNC, 3, 0));
+			radeon_emit(cmd_buffer->cs, cp_coher_cntl);   /* CP_COHER_CNTL */
+			radeon_emit(cmd_buffer->cs, 0xffffffff);      /* CP_COHER_SIZE */
+			radeon_emit(cmd_buffer->cs, 0);               /* CP_COHER_BASE */
+			radeon_emit(cmd_buffer->cs, 0x0000000A);      /* POLL_INTERVAL */
+		}
 	}

+	if (cmd_buffer->state.flush_bits)
+		radv_cmd_buffer_trace_emit(cmd_buffer);
 	cmd_buffer->state.flush_bits = 0;
 }

@@ -731,7 +756,7 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,

 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);

-	if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
 		radeon_emit(cs, sync_flag | sel);	/* CP_SYNC [31] */
 		radeon_emit(cs, src_va);		/* SRC_ADDR_LO [31:0] */
@@ -753,10 +778,12 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
 	 * indices. If we wanted to execute CP DMA in PFP, this packet
 	 * should precede it.
 	 */
-	if (sync_flag) {
+	if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
+
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }

 /* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
@@ -775,7 +802,7 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,

 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);

-	if (cmd_buffer->device->instance->physicalDevice.rad_info.chip_class >= CIK) {
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
 		radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
 		radeon_emit(cs, clear_value);		/* DATA [31:0] */
@@ -793,10 +820,11 @@ static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
 	}

 	/* See "copy_buffer" for explanation. */
-	if (sync_flag) {
+	if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 }

 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
@@ -847,8 +875,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 	uint64_t skipped_size = 0, realign_size = 0;


-	if (cmd_buffer->device->instance->physicalDevice.rad_info.family <= CHIP_CARRIZO ||
-	    cmd_buffer->device->instance->physicalDevice.rad_info.family == CHIP_STONEY) {
+	if (cmd_buffer->device->physical_device->rad_info.family <= CHIP_CARRIZO ||
+	    cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY) {
 		/* If the size is not aligned, we must add a dummy copy at the end
 		 * just to align the internal counter. Otherwise, the DMA engine
 		 * would slow down by an order of magnitude for following copies.
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -27,6 +27,7 @@
 #include <amdgpu_drm.h>
 #include <assert.h>

+#include "ac_debug.h"
 #include "amdgpu_id.h"
 #include "radv_radeon_winsys.h"
 #include "radv_amdgpu_cs.h"
@@ -54,6 +55,7 @@ struct radv_amdgpu_cs {
 	bool                        is_chained;

 	int                         buffer_hash_table[1024];
+	unsigned                    hw_ip;
 };

 static inline struct radv_amdgpu_cs *
@@ -62,6 +64,30 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base)
 	return (struct radv_amdgpu_cs*)base;
 }

+static int ring_to_hw_ip(enum ring_type ring)
+{
+	switch (ring) {
+	case RING_GFX:
+		return AMDGPU_HW_IP_GFX;
+	case RING_DMA:
+		return AMDGPU_HW_IP_DMA;
+	case RING_COMPUTE:
+		return AMDGPU_HW_IP_COMPUTE;
+	default:
+		unreachable("unsupported ring");
+	}
+}
+
+static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
+					 struct amdgpu_cs_fence *fence,
+					 struct amdgpu_cs_request *req)
+{
+	fence->context = ctx->ctx;
+	fence->ip_type = req->ip_type;
+	fence->ip_instance = req->ip_instance;
+	fence->ring = req->ring;
+	fence->fence = req->seq_no;
+}

 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
 {
@@ -126,6 +152,7 @@ static boolean radv_amdgpu_init_cs(struct radv_amdgpu_cs *cs,
 	for (int i = 0; i < ARRAY_SIZE(cs->buffer_hash_table); ++i)
 		cs->buffer_hash_table[i] = -1;

+	cs->hw_ip = ring_to_hw_ip(ring_type);
 	return true;
 }

@@ -140,7 +167,7 @@ radv_amdgpu_cs_create(struct radeon_winsys *ws,
 		return NULL;

 	cs->ws = radv_amdgpu_winsys(ws);
-	radv_amdgpu_init_cs(cs, RING_GFX);
+	radv_amdgpu_init_cs(cs, ring_type);

 	if (cs->ws->use_ib_bos) {
 		cs->ib_buffer = ws->buffer_create(ws, ib_size, 0,
@@ -475,7 +502,16 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 	return r;
 }

+static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
+				    struct amdgpu_cs_request *request)
+{
+	radv_amdgpu_request_to_fence(ctx,
+	                             &ctx->last_submission[request->ip_type][request->ring],
+	                             request);
+}
+
 static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
+						int queue_idx,
 						struct radeon_winsys_cs **cs_array,
 						unsigned cs_count,
 						struct radeon_winsys_fence *_fence)
@@ -515,7 +551,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 		return r;
 	}

-	request.ip_type = AMDGPU_HW_IP_GFX;
+	request.ip_type = cs0->hw_ip;
+	request.ring = queue_idx;
 	request.number_of_ibs = 1;
 	request.ibs = &cs0->ib;
 	request.resources = bo_list;
@@ -531,19 +568,16 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,

 	amdgpu_bo_list_destroy(bo_list);

-	if (fence) {
-		fence->context = ctx->ctx;
-		fence->ip_type = request.ip_type;
-		fence->ip_instance = request.ip_instance;
-		fence->ring = request.ring;
-		fence->fence = request.seq_no;
-	}
-	ctx->last_seq_no = request.seq_no;
+	if (fence)
+		radv_amdgpu_request_to_fence(ctx, fence, &request);
+
+	radv_assign_last_submit(ctx, &request);

 	return r;
 }

 static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
+						 int queue_idx,
 						 struct radeon_winsys_cs **cs_array,
 						 unsigned cs_count,
 						 struct radeon_winsys_fence *_fence)
@@ -569,7 +603,8 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 			return r;
 		}

-		request.ip_type = AMDGPU_HW_IP_GFX;
+		request.ip_type = cs0->hw_ip;
+		request.ring = queue_idx;
 		request.resources = bo_list;
 		request.number_of_ibs = cnt;
 		request.ibs = ibs;
@@ -600,19 +635,16 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,

 		i += cnt;
 	}
-	if (fence) {
-		fence->context = ctx->ctx;
-		fence->ip_type = request.ip_type;
-		fence->ip_instance = request.ip_instance;
-		fence->ring = request.ring;
-		fence->fence = request.seq_no;
-	}
-	ctx->last_seq_no = request.seq_no;
+	if (fence)
+		radv_amdgpu_request_to_fence(ctx, fence, &request);
+
+	radv_assign_last_submit(ctx, &request);

 	return 0;
 }

 static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
+					       int queue_idx,
 					       struct radeon_winsys_cs **cs_array,
 					       unsigned cs_count,
 					       struct radeon_winsys_fence *_fence)
@@ -673,7 +705,8 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 		ib.size = size;
 		ib.ib_mc_address = ws->buffer_get_va(bo);

-		request.ip_type = AMDGPU_HW_IP_GFX;
+		request.ip_type = cs0->hw_ip;
+		request.ring = queue_idx;
 		request.resources = bo_list;
 		request.number_of_ibs = 1;
 		request.ibs = &ib;
@@ -695,35 +728,82 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,

 		i += cnt;
 	}
-	if (fence) {
-		fence->context = ctx->ctx;
-		fence->ip_type = request.ip_type;
-		fence->ip_instance = request.ip_instance;
-		fence->ring = request.ring;
-		fence->fence = request.seq_no;
-	}
-	ctx->last_seq_no = request.seq_no;
+	if (fence)
+		radv_amdgpu_request_to_fence(ctx, fence, &request);
+
+	radv_assign_last_submit(ctx, &request);

 	return 0;
 }

 static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
+					int queue_idx,
 					struct radeon_winsys_cs **cs_array,
 					unsigned cs_count,
+					struct radeon_winsys_sem **wait_sem,
+					unsigned wait_sem_count,
+					struct radeon_winsys_sem **signal_sem,
+					unsigned signal_sem_count,
 					bool can_patch,
 					struct radeon_winsys_fence *_fence)
 {
 	struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
+	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
+	int ret;
+	int i;
+	
+	for (i = 0; i < wait_sem_count; i++) {
+		amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)wait_sem[i];
+		amdgpu_cs_wait_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
+					 sem);
+	}
 	if (!cs->ws->use_ib_bos) {
-		return radv_amdgpu_winsys_cs_submit_sysmem(_ctx, cs_array,
+		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
 							   cs_count, _fence);
 	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
-		return radv_amdgpu_winsys_cs_submit_chained(_ctx, cs_array,
+		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
 							    cs_count, _fence);
 	} else {
-		return radv_amdgpu_winsys_cs_submit_fallback(_ctx, cs_array,
+		ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, cs_array,
 							     cs_count, _fence);
 	}
+
+	for (i = 0; i < signal_sem_count; i++) {
+		amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)signal_sem[i];
+		amdgpu_cs_signal_semaphore(ctx->ctx, cs->hw_ip, 0, queue_idx,
+					   sem);
+	}
+	return ret;
+}
+
+
+static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
+{
+	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+	void *ret = NULL;
+	for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
+		struct radv_amdgpu_winsys_bo *bo;
+
+		bo = (struct radv_amdgpu_winsys_bo*)
+		       (i == cs->num_old_ib_buffers ? cs->ib_buffer : cs->old_ib_buffers[i]);
+		if (addr >= bo->va && addr - bo->va < bo->size) {
+			if (amdgpu_bo_cpu_map(bo->bo, &ret) == 0)
+				return (char *)ret + (addr - bo->va);
+		}
+	}
+	return ret;
+}
+
+static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
+                                       FILE* file,
+                                       uint32_t trace_id)
+{
+	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+
+	ac_parse_ib(file,
+		    radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
+		    cs->ib.size, trace_id,  "main IB", cs->ws->info.chip_class,
+		    radv_amdgpu_winsys_get_cpu_addr, cs);
 }

 static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_ws)
@@ -742,6 +822,7 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w
 	ctx->ws = ws;
 	return (struct radeon_winsys_ctx *)ctx;
 error_create:
+	FREE(ctx);
 	return NULL;
 }

@@ -752,22 +833,16 @@ static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
 	FREE(ctx);
 }

-static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
+static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
+                                      enum ring_type ring_type, int ring_index)
 {
 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+	int ip_type = ring_to_hw_ip(ring_type);

-	if (ctx->last_seq_no) {
+	if (ctx->last_submission[ip_type][ring_index].fence) {
 		uint32_t expired;
-		struct amdgpu_cs_fence fence;
-
-		fence.context = ctx->ctx;
-		fence.ip_type = RING_GFX;
-		fence.ip_instance = 0;
-		fence.ring = 0;
-		fence.fence = ctx->last_seq_no;
-
-		int ret = amdgpu_cs_query_fence_status(&fence, 1000000000ull, 0,
-						       &expired);
+		int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
+		                                       1000000000ull, 0, &expired);

 		if (ret || !expired)
 			return false;
@@ -776,6 +851,23 @@ static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx)
 	return true;
 }

+static struct radeon_winsys_sem *radv_amdgpu_create_sem(struct radeon_winsys *_ws)
+{
+	int ret;
+	amdgpu_semaphore_handle sem;
+
+	ret = amdgpu_cs_create_semaphore(&sem);
+	if (ret)
+		return NULL;
+	return (struct radeon_winsys_sem *)sem;
+}
+
+static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
+{
+	amdgpu_semaphore_handle sem = (amdgpu_semaphore_handle)_sem;
+	amdgpu_cs_destroy_semaphore(sem);
+}
+
 void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
 {
 	ws->base.ctx_create = radv_amdgpu_ctx_create;
@@ -789,7 +881,10 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
 	ws->base.cs_add_buffer = radv_amdgpu_cs_add_buffer;
 	ws->base.cs_execute_secondary = radv_amdgpu_cs_execute_secondary;
 	ws->base.cs_submit = radv_amdgpu_winsys_cs_submit;
+	ws->base.cs_dump = radv_amdgpu_winsys_cs_dump;
 	ws->base.create_fence = radv_amdgpu_create_fence;
 	ws->base.destroy_fence = radv_amdgpu_destroy_fence;
+	ws->base.create_sem = radv_amdgpu_create_sem;
+	ws->base.destroy_sem = radv_amdgpu_destroy_sem;
 	ws->base.fence_wait = radv_amdgpu_fence_wait;
 }
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
@@ -38,10 +38,14 @@
 #include "radv_radeon_winsys.h"
 #include "radv_amdgpu_winsys.h"

+enum {
+	MAX_RINGS_PER_TYPE = 8
+};
+
 struct radv_amdgpu_ctx {
 	struct radv_amdgpu_winsys *ws;
 	amdgpu_context_handle ctx;
-	uint64_t last_seq_no;
+	struct amdgpu_cs_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
 };

 static inline struct radv_amdgpu_ctx *
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -116,8 +116,9 @@ static bool
 do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 {
 	struct amdgpu_buffer_size_alignments alignment_info = {};
-	struct amdgpu_heap_info vram, gtt;
+	struct amdgpu_heap_info vram, visible_vram, gtt;
 	struct drm_amdgpu_info_hw_ip dma = {};
+	struct drm_amdgpu_info_hw_ip compute = {};
 	drmDevicePtr devinfo;
 	int r;
 	int i, j;
@@ -152,6 +153,13 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 		goto fail;
 	}

+	r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
+	                           AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &visible_vram);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_heap_info(visible_vram) failed.\n");
+		goto fail;
+	}
+
 	r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
 	if (r) {
 		fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
@@ -163,6 +171,12 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
 		goto fail;
 	}
+
+	r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
+		goto fail;
+	}
 	ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */
 	ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config;

@@ -266,10 +280,15 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 		fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
 		goto fail;
 	}
+
+	assert(util_is_power_of_two(dma.available_rings + 1));
+	assert(util_is_power_of_two(compute.available_rings + 1));
+
 	/* Set hardware information. */
 	ws->info.name = get_chip_name(ws->info.family);
 	ws->info.gart_size = gtt.heap_size;
 	ws->info.vram_size = vram.heap_size;
+	ws->info.visible_vram_size = visible_vram.heap_size;
 	/* convert the shader clock from KHz to MHz */
 	ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000;
 	ws->info.max_se = ws->amdinfo.num_shader_engines;
@@ -282,7 +301,10 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 	ws->info.num_tile_pipes = radv_cik_get_num_tile_pipes(&ws->amdinfo);
 	ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
 	ws->info.has_virtual_memory = TRUE;
-	ws->info.has_sdma = dma.available_rings != 0;
+	ws->info.sdma_rings = MIN2(util_bitcount(dma.available_rings),
+	                           MAX_RINGS_PER_TYPE);
+	ws->info.compute_rings = MIN2(util_bitcount(compute.available_rings),
+	                              MAX_RINGS_PER_TYPE);

 	/* Get the number of good compute units. */
 	ws->info.num_good_compute_units = 0;
--- a/Show More
+++ b/Show More