swr: Remove need to allocate vertex buffer scratch space all in one go

Deferred deletion (via "fence_work") has obsoleted the need to allocate all client vertex buffer scratch space in a single chunk. Scratch allocations are now valid until the referenced fence is complete. Reviewed-by: Tim Rowley <timothy.o.rowley@intel.com>
swr: conditionally validate vertex buffer state
2017-06-29 13:23:33 -05:00 · 2017-06-29 13:23:33 -05:00 · 2017-06-29 13:23:33 -05:00 · 2017-06-29 17:38:26 +01:00 · 2017-06-29 16:24:58 +01:00 · 2017-06-29 16:19:35 +02:00
1466 changed files with 181085 additions and 47541 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,24 +1,11 @@
 language: c

-sudo: required
+sudo: false
 dist: trusty

 cache:
-  directories:
-    - $HOME/.ccache
-
-addons:
-  apt:
-    packages:
-      - libdrm-dev
-      - x11proto-xf86vidmode-dev
-      - libexpat1-dev
-      - libxcb-dri2-0-dev
-      - libx11-xcb-dev
-      # LLVM packaging is broken and misses these dependencies
-      - libedit-dev
-      - libelf-dev
-      - scons
+  apt: true
+  ccache: true

 env:
  global:
@@ -32,17 +19,259 @@ env:
    - XCBPROTO_VERSION=xcb-proto-1.11
    - LIBXCB_VERSION=libxcb-1.11
    - LIBXSHMFENCE_VERSION=libxshmfence-1.2
-    - LLVM_VERSION=3.9
-    - LLVM_PACKAGE="llvm-${LLVM_VERSION} llvm-${LLVM_VERSION}-dev"
-    - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+    - LIBTXC_DXTN_VERSION=libtxc_dxtn-1.0.1
+    - LIBVDPAU_VERSION=libvdpau-1.1
+    - LIBVA_VERSION=libva-1.6.2
+    - LIBWAYLAND_VERSION=wayland-1.11.1
    - PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig
-    - MAKEFLAGS=-j2
-  matrix:
-    - BUILD=make
-    - BUILD=scons
+    - LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH"
+
+matrix:
+  include:
+    - env:
+        - LABEL="make loaders/classic DRI"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="make check"
+        - DRI_LOADERS="--enable-glx --enable-gbm --enable-egl --with-platforms=x11,drm,surfaceless,wayland --enable-osmesa"
+        - DRI_DRIVERS="i915,i965,radeon,r200,swrast,nouveau"
+        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS=""
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          packages:
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libxdamage-dev
+            - libxfixes-dev
+    - env:
+        # NOTE: Building SWR is 2x (yes two) times slower than all the other
+        # gallium drivers combined.
+        # Start this early so that it doesn't hunder the run time.
+        - LABEL="make Gallium Drivers SWR"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - OVERRIDE_CC="gcc-5"
+        - OVERRIDE_CXX="g++-5"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS="swr"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - llvm-toolchain-trusty-3.9
+          packages:
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - g++-5
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="make Gallium Drivers Other"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS="i915,nouveau,pl111,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-3.9
+          packages:
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        # NOTE: Analogous to SWR above, building Clover is quite slow.
+        - LABEL="make Gallium ST Clover"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.6
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - OVERRIDE_CC=gcc-4.7
+        - OVERRIDE_CXX=g++-4.7
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        # i915 most likely doesn't work with OpenCL.
+        # Regardless - we're doing a quick build test here.
+        - GALLIUM_DRIVERS="i915"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-3.6
+          packages:
+            - libclc-dev
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            - g++-4.7
+            # From sources above
+            - llvm-3.6-dev
+            - clang-3.6
+            - libclang-3.6-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="make Gallium ST Other"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--enable-dri --disable-opencl --enable-xa --enable-nine --enable-xvmc --enable-vdpau --enable-va --enable-omx --enable-gallium-osmesa"
+        # We need swrast for osmesa and nine.
+        # i915 most likely doesn't work with most ST.
+        # Regardless - we're doing a quick build test here.
+        - GALLIUM_DRIVERS="i915,swrast"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          packages:
+            # Nine requires gcc 4.6... which is the one we have right ?
+            - libxvmc-dev
+            # Build locally, for now.
+            #- libvdpau-dev
+            #- libva-dev
+            - libomxil-bellagio-dev
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="make Vulkan"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--enable-dri --enable-dri3 --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS=""
+        - VULKAN_DRIVERS="intel,radeon"
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-3.9
+          packages:
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="scons"
+        - BUILD=scons
+        - SCONSFLAGS="-j4"
+        # Explicitly disable.
+        - SCONS_TARGET="llvm=0"
+        # Keep it symmetrical to the make build.
+        - SCONS_CHECK_COMMAND="scons llvm=0 check"
+      addons:
+        apt:
+          packages:
+            - scons
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="scons LLVM"
+        - BUILD=scons
+        - SCONSFLAGS="-j4"
+        - SCONS_TARGET="llvm=1"
+        # Keep it symmetrical to the make build.
+        - SCONS_CHECK_COMMAND="scons llvm=1 check"
+        - LLVM_VERSION=3.3
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          packages:
+            - scons
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            - llvm-3.3-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="scons SWR"
+        - BUILD=scons
+        - SCONSFLAGS="-j4"
+        - SCONS_TARGET="swr=1"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        # Keep it symmetrical to the make build. There's no actual SWR, yet.
+        - SCONS_CHECK_COMMAND="true"
+        - OVERRIDE_CC="gcc-5"
+        - OVERRIDE_CXX="g++-5"
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - llvm-toolchain-trusty-3.9
+          packages:
+            - scons
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - g++-5
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev

 install:
-  - export PATH="/usr/lib/ccache:$PATH"
  - pip install --user mako

  # Since libdrm gets updated in configure.ac regularly, try to pick up the
@@ -90,25 +319,64 @@ install:
  - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
  - (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)

-  # Install LLVM directly via apt-get (not Travis-CI's apt addon)
-  # See https://github.com/travis-ci/apt-source-whitelist/pull/205#issuecomment-216054237
+  # libtxc-dxtn uses the patented S3 Texture Compression
+  # algorithm. Therefore, we don't want to use this library but it is
+  # still possible through setting the USE_TXC_DXTN variable to yes in
+  # the travis web UI.
+  #
+  # According to Wikipedia, the patent expires on October 2, 2017:
+  # https://en.wikipedia.org/wiki/S3_Texture_Compression#Patent
+  - if test "x$USE_TXC_DXTN" = xyes; then
+      wget https://people.freedesktop.org/~cbrill/libtxc_dxtn/$LIBTXC_DXTN_VERSION.tar.bz2;
+      tar -jxvf $LIBTXC_DXTN_VERSION.tar.bz2;
+      (cd $LIBTXC_DXTN_VERSION && ./configure --prefix=$HOME/prefix && make install);
+    fi

-  - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-  - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-  - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty main'
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq -y $LLVM_PACKAGE
+  - wget http://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
+  - tar -jxvf $LIBVDPAU_VERSION.tar.bz2
+  - (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
+
+  - wget http://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
+  - tar -jxvf $LIBVA_VERSION.tar.bz2
+  - (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
+
+  - wget http://wayland.freedesktop.org/releases/$LIBWAYLAND_VERSION.tar.xz
+  - tar -axvf $LIBWAYLAND_VERSION.tar.xz
+  - (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
+
+  # Generate the header since one is missing on the Travis instance
+  - mkdir -p linux
+  - printf "%s\n" \
+           "#ifndef _LINUX_MEMFD_H" \
+           "#define _LINUX_MEMFD_H" \
+           "" \
+           "#define __NR_memfd_create 319" \
+           "#define SYS_memfd_create __NR_memfd_create" \
+           "" \
+           "#define MFD_CLOEXEC             0x0001U" \
+           "#define MFD_ALLOW_SEALING       0x0002U" \
+           "" \
+           "#endif /* _LINUX_MEMFD_H */" > linux/memfd.h

 script:
  - if test "x$BUILD" = xmake; then
+      test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
+      test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
+      export CC="$CC -isystem`pwd`";
+
      ./autogen.sh --enable-debug
-        --with-platforms=x11,drm
-        --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
-        --with-gallium-drivers=i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx
-        --with-vulkan-drivers=radeon
+        $DRI_LOADERS
+        --with-dri-drivers=$DRI_DRIVERS
+        $GALLIUM_ST
+        --with-gallium-drivers=$GALLIUM_DRIVERS
+        --with-vulkan-drivers=$VULKAN_DRIVERS
        --disable-llvm-shared-libs
-        ;
-      make && make check;
-    elif test x$BUILD = xscons; then
-      scons llvm=1 && scons llvm=1 check;
+        &&
+      make && eval $MAKE_CHECK_COMMAND;
+    fi
+
+  - if test "x$BUILD" = xscons; then
+      test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
+      test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
+      scons $SCONS_TARGET && eval $SCONS_CHECK_COMMAND;
    fi
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -37,11 +37,18 @@ LOCAL_CFLAGS += \
 	-Wno-missing-field-initializers \
 	-Wno-initializer-overrides \
 	-Wno-mismatched-tags \
+	-DVERSION=\"$(MESA_VERSION)\" \
 	-DPACKAGE_VERSION=\"$(MESA_VERSION)\" \
 	-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\"

+# XXX: The following __STDC_*_MACROS defines should not be needed.
+# It's likely due to a bug elsewhere, but let's temporarily add them
+# here to fix the radeonsi build.
 LOCAL_CFLAGS += \
+	-DANDROID_API_LEVEL=$(PLATFORM_SDK_VERSION) \
 	-DENABLE_SHADER_CACHE \
+	-D__STDC_CONSTANT_MACROS \
+	-D__STDC_LIMIT_MACROS \
 	-DHAVE___BUILTIN_EXPECT \
 	-DHAVE___BUILTIN_FFS \
 	-DHAVE___BUILTIN_FFSLL \
@@ -59,6 +66,7 @@ LOCAL_CFLAGS += \
 	-DHAVE_PTHREAD=1 \
 	-DHAVE_DLOPEN \
 	-DHAVE_DL_ITERATE_PHDR \
+	-DMAJOR_IN_SYSMACROS \
 	-fvisibility=hidden \
 	-Wno-sign-compare

@@ -81,28 +89,10 @@ LOCAL_CFLAGS += \
 endif
 endif

-ifeq ($(MESA_ENABLE_LLVM),true)
-  ifeq ($(MESA_ANDROID_MAJOR_VERSION),5)
-    LOCAL_CFLAGS += -DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2
-    ELF_INCLUDES := external/elfutils/0.153/libelf
-  endif
-  ifeq ($(MESA_ANDROID_MAJOR_VERSION),6)
-    LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0
-    ELF_INCLUDES := external/elfutils/src/libelf
-  endif
-  ifeq ($(MESA_ANDROID_MAJOR_VERSION),7)
-    LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0
-    ELF_INCLUDES := external/elfutils/libelf
-  endif
-endif
-
 ifneq ($(LOCAL_IS_HOST_MODULE),true)
-# add libdrm if there are hardware drivers
-ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
 LOCAL_CFLAGS += -DHAVE_LIBDRM
 LOCAL_SHARED_LIBRARIES += libdrm
 endif
-endif

 LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/system/lib/$(MESA_DRI_MODULE_REL_PATH)\"
 LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/system/lib64/$(MESA_DRI_MODULE_REL_PATH)\"
@@ -116,7 +106,3 @@ endif

 # Quiet down the build system and remove any .h files from the sources
 LOCAL_SRC_FILES := $(patsubst %.h, , $(LOCAL_SRC_FILES))
-
-ifneq ($(LOCAL_IS_HOST_MODULE),true)
-LOCAL_SHARED_LIBRARIES += libz
-endif
--- a/Android.mk
+++ b/Android.mk
@@ -24,7 +24,7 @@
 # BOARD_GPU_DRIVERS should be defined.  The valid values are
 #
 #   classic drivers: i915 i965
-#   gallium drivers: swrast freedreno i915g nouveau r300g r600g radeonsi vc4 virgl vmwgfx
+#   gallium drivers: swrast freedreno i915g nouveau pl111 r300g r600g radeonsi vc4 virgl vmwgfx
 #
 # The main target is libGLES_mesa.  For each classic driver enabled, a DRI
 # module will also be built.  DRI modules will be loaded by libGLES_mesa.
@@ -32,6 +32,9 @@
 MESA_TOP := $(call my-dir)

 MESA_ANDROID_MAJOR_VERSION := $(word 1, $(subst ., , $(PLATFORM_VERSION)))
+ifneq ($(filter 2 4, $(MESA_ANDROID_MAJOR_VERSION)),)
+$(error "Android 4.4 and earlier not supported")
+endif

 MESA_DRI_MODULE_REL_PATH := dri
 MESA_DRI_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/$(MESA_DRI_MODULE_REL_PATH)
@@ -40,19 +43,37 @@ MESA_DRI_MODULE_UNSTRIPPED_PATH := $(TARGET_OUT_SHARED_LIBRARIES_UNSTRIPPED)/$(M
 MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk
 MESA_PYTHON2 := python

-classic_drivers := i915 i965
-gallium_drivers := swrast freedreno i915g nouveau r300g r600g radeonsi vmwgfx vc4 virgl
+# Lists to convert driver names to boolean variables
+# in form of <driver name>.<boolean make variable>
+classic_drivers := i915.HAVE_I915_DRI i965.HAVE_I965_DRI
+gallium_drivers := \
+	swrast.HAVE_GALLIUM_SOFTPIPE \
+	freedreno.HAVE_GALLIUM_FREEDRENO \
+	i915g.HAVE_GALLIUM_I915 \
+	nouveau.HAVE_GALLIUM_NOUVEAU \
+	pl111.HAVE_GALLIUM_PL111 \
+	r300g.HAVE_GALLIUM_R300 \
+	r600g.HAVE_GALLIUM_R600 \
+	radeonsi.HAVE_GALLIUM_RADEONSI \
+	vmwgfx.HAVE_GALLIUM_VMWGFX \
+	vc4.HAVE_GALLIUM_VC4 \
+	virgl.HAVE_GALLIUM_VIRGL

-MESA_GPU_DRIVERS := $(strip $(BOARD_GPU_DRIVERS))
-
-# warn about invalid drivers
-invalid_drivers := $(filter-out \
-	$(classic_drivers) $(gallium_drivers), $(MESA_GPU_DRIVERS))
-ifneq ($(invalid_drivers),)
-$(warning invalid GPU drivers: $(invalid_drivers))
-# tidy up
-MESA_GPU_DRIVERS := $(filter-out $(invalid_drivers), $(MESA_GPU_DRIVERS))
+ifeq ($(BOARD_GPU_DRIVERS),all)
+MESA_BUILD_CLASSIC := $(filter HAVE_%, $(subst ., , $(classic_drivers)))
+MESA_BUILD_GALLIUM := $(filter HAVE_%, $(subst ., , $(gallium_drivers)))
+else
+# Warn if we have any invalid driver names
+$(foreach d, $(BOARD_GPU_DRIVERS), \
+	$(if $(findstring $(d).,$(classic_drivers) $(gallium_drivers)), \
+		, \
+		$(warning invalid GPU driver: $(d)) \
+	) \
+)
+MESA_BUILD_CLASSIC := $(strip $(foreach d, $(BOARD_GPU_DRIVERS), $(patsubst $(d).%,%, $(filter $(d).%, $(classic_drivers)))))
+MESA_BUILD_GALLIUM := $(strip $(foreach d, $(BOARD_GPU_DRIVERS), $(patsubst $(d).%,%, $(filter $(d).%, $(gallium_drivers)))))
 endif
+$(foreach d, $(MESA_BUILD_CLASSIC) $(MESA_BUILD_GALLIUM), $(eval $(d) := true))

 # host and target must be the same arch to generate matypes.h
 ifeq ($(TARGET_ARCH),$(HOST_ARCH))
@@ -61,23 +82,27 @@ else
 MESA_ENABLE_ASM := false
 endif

-ifneq ($(filter $(classic_drivers), $(MESA_GPU_DRIVERS)),)
-MESA_BUILD_CLASSIC := true
-else
-MESA_BUILD_CLASSIC := false
+ifneq ($(filter true, $(HAVE_GALLIUM_RADEONSI)),)
+MESA_ENABLE_LLVM := true
 endif

-ifneq ($(filter $(gallium_drivers), $(MESA_GPU_DRIVERS)),)
-MESA_BUILD_GALLIUM := true
-else
-MESA_BUILD_GALLIUM := false
-endif
-
-MESA_ENABLE_LLVM := $(if $(filter radeonsi,$(MESA_GPU_DRIVERS)),true,false)
+define mesa-build-with-llvm
+  $(if $(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5), \
+    $(warning Unsupported LLVM version in Android $(MESA_ANDROID_MAJOR_VERSION)),) \
+  $(if $(filter 6,$(MESA_ANDROID_MAJOR_VERSION)), \
+    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0) \
+    $(eval LOCAL_STATIC_LIBRARIES += libLLVMCore) \
+    $(eval LOCAL_C_INCLUDES += external/llvm/include external/llvm/device/include),) \
+  $(if $(filter 7,$(MESA_ANDROID_MAJOR_VERSION)), \
+    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0) \
+    $(eval LOCAL_STATIC_LIBRARIES += libLLVMCore) \
+    $(eval LOCAL_C_INCLUDES += external/llvm/include external/llvm/device/include),) \
+  $(if $(filter O,$(MESA_ANDROID_MAJOR_VERSION)), \
+    $(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0309 -DMESA_LLVM_VERSION_PATCH=0) \
+    $(eval LOCAL_HEADER_LIBRARIES += llvm-headers),)
+endef

 # add subdirectories
-ifneq ($(strip $(MESA_GPU_DRIVERS)),)
-
 SUBDIRS := \
 	src/gbm \
 	src/loader \
@@ -92,11 +117,5 @@ SUBDIRS := \
 	src/vulkan

 INC_DIRS := $(call all-named-subdir-makefiles,$(SUBDIRS))
-
-ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
 INC_DIRS += $(call all-named-subdir-makefiles,src/gallium)
-endif
-
 include $(INC_DIRS)
-
-endif
--- a/Makefile.am
+++ b/Makefile.am
@@ -43,7 +43,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
 	--enable-llvm-shared-libs \
 	--with-platforms=x11,wayland,drm,surfaceless \
 	--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
-	--with-gallium-drivers=i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
+	--with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
 	--with-vulkan-drivers=intel,radeon

 ACLOCAL_AMFLAGS = -I m4
--- a/2
+++ b/2
@@ -1 +1 @@
-17.1.0-devel
+17.2.0-devel
--- a/bin/.editorconfig
+++ b/bin/.editorconfig
@@ -1,3 +1,2 @@
 [*.sh]
-indent_style = space
-indent_size = 2
+indent_style = tab
--- a/bin/get-extra-pick-list.sh
+++ b/bin/get-extra-pick-list.sh
@@ -30,7 +30,15 @@ do
 		if grep -q ^$candidate already_picked ; then
 			continue
 		fi
-		echo Commit $candidate references $sha
+		# Or if it isn't in the ignore list.
+		if [ -f bin/.cherry-ignore ] ; then
+			if grep -q ^$candidate bin/.cherry-ignore ; then
+				continue
+			fi
+		fi
+		printf "Commit \"%s\" references %s\n" \
+		       "`git log -n1 --pretty=oneline $candidate`" \
+		       "$sha"
 	done
 done

--- a/bin/get-fixes-pick-list.sh
+++ b/bin/get-fixes-pick-list.sh
@@ -24,35 +24,55 @@ git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |
 git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
 while read sha
 do
-	# For each one try to extract the tag
-	fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
-	if [ "x$fixes_count" != x1 ] ; then
-		echo WARNING: Commit $sha has more than one Fixes tag
+	# Check to see whether the patch is on the ignore list ...
+	if [ -f bin/.cherry-ignore ] ; then
+		if grep -q ^$sha bin/.cherry-ignore ; then
+			continue
+		fi
 	fi
-	fixes=`git show $sha | grep -i "fixes:" | head -n 1`
-	# The following sed/cut combination is borrowed from GregKH
-	id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`

-	# Bail out if we cannot find suitable id.
-	# Any specific validation the $id is valid and not some junk, is
-	# implied with the follow up code
-	if [ "x$id" = x ] ; then
+	# Skip if it has been already cherry-picked.
+	if grep -q ^$sha already_picked ; then
 		continue
 	fi

-	# Check if the offending commit is in branch.
+	# Place every "fixes:" tag on its own line and join with the next word
+	# on its line or a later one.
+	fixes=`git show -s $sha | tr -d "\n" | sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'`

-	# Be that cherry-picked ...
-	# ... or landed before the branchpoint.
-	if grep -q ^$id already_picked ||
-	   grep -q ^$id already_landed ; then
+	# For each one try to extract the tag
+	fixes_count=`echo "$fixes" | wc -l`
+	warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0`
+	while [ $fixes_count -gt 0 ] ; do
+		# Treat only the current line
+		id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
+		fixes_count=$(($fixes_count-1))

-		# Finally nominate the fix if it hasn't landed yet.
-		if grep -q ^$sha already_picked ; then
+		# Bail out if we cannot find suitable id.
+		# Any specific validation the $id is valid and not some junk, is
+		# implied with the follow up code
+		if [ "x$id" = x ] ; then
 			continue
 		fi

-		echo Commit $sha fixes $id
+		# Check if the offending commit is in branch.
+
+		# Be that cherry-picked ...
+		# ... or landed before the branchpoint.
+		if grep -q ^$id already_picked ||
+		   grep -q ^$id already_landed ; then
+
+			printf "Commit \"%s\" fixes %s\n" \
+			       "`git log -n1 --pretty=oneline $sha`" \
+			       "$id"
+			warn=$(($warn-1))
+		fi
+
+	done
+
+	if [ $warn -gt 0 ] ; then
+		printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
+		       "`git log -n1 --pretty=oneline $sha`"
 	fi

 done
--- a/bin/perf-annotate-jit.py
+++ b/bin/perf-annotate-jit.py
@@ -133,7 +133,7 @@ class PerfParser(LineParser):

    def __init__(self, infile, symbol):
        LineParser.__init__(self, infile)
-	self.symbol = symbol
+        self.symbol = symbol

    def readline(self):
        # Override LineParser.readline to ignore comment lines
@@ -155,7 +155,7 @@ class PerfParser(LineParser):
        addresses.sort()
        total_samples = 0

-	sys.stdout.write('%s:\n' % self.symbol)
+        sys.stdout.write('%s:\n' % self.symbol)
        for address, instr in asm:
            try:
                sample = samples.pop(address)
--- a/configure.ac
+++ b/configure.ac
@@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION])
 # in the first entry.
 LIBDRM_REQUIRED=2.4.75
 LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.79
+LIBDRM_AMDGPU_REQUIRED=2.4.81
 LIBDRM_INTEL_REQUIRED=2.4.75
 LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
@@ -97,13 +97,13 @@ XSHMFENCE_REQUIRED=1.1
 XVMC_REQUIRED=1.0.6
 PYTHON_MAKO_REQUIRED=0.8.0
 LIBSENSORS_REQUIRED=4.0.0
-ZLIB_REQUIRED=1.2.8
+ZLIB_REQUIRED=1.2.3

 dnl LLVM versions
 LLVM_REQUIRED_GALLIUM=3.3.0
 LLVM_REQUIRED_OPENCL=3.6.0
-LLVM_REQUIRED_R600=3.8.0
-LLVM_REQUIRED_RADEONSI=3.8.0
+LLVM_REQUIRED_R600=3.9.0
+LLVM_REQUIRED_RADEONSI=3.9.0
 LLVM_REQUIRED_RADV=3.9.0
 LLVM_REQUIRED_SWR=3.9.0

@@ -269,7 +269,7 @@ DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
 AC_SUBST([DEFINES])
 android=no
 case "$host_os" in
-*-android)
+*-android*)
    android=yes
    ;;
 linux*|*-gnu*|gnu*|cygwin*)
@@ -455,7 +455,7 @@ int main () {
 CFLAGS=$save_CFLAGS

 AC_ARG_ENABLE(pwr8,
-   [AS_HELP_STRING([--disable-pwr8-inst],
+   [AS_HELP_STRING([--disable-pwr8],
                   [disable POWER8-specific instructions])],
   [enable_pwr8=$enableval], [enable_pwr8=auto])

@@ -724,7 +724,7 @@ dnl Arch/platform-specific settings
 dnl
 AC_ARG_ENABLE([asm],
    [AS_HELP_STRING([--disable-asm],
-        [disable assembly usage @<:@default=enabled on supported plaforms@:>@])],
+        [disable assembly usage @<:@default=enabled on supported platforms@:>@])],
    [enable_asm="$enableval"],
    [enable_asm=yes]
 )
@@ -766,6 +766,13 @@ if test "x$enable_asm" = xyes; then
            ;;
        esac
        ;;
+    powerpc64le)
+        case "$host_os" in
+        linux*)
+            asm_arch=ppc64le
+            ;;
+        esac
+        ;;
    esac

    case "$asm_arch" in
@@ -781,6 +788,10 @@ if test "x$enable_asm" = xyes; then
        DEFINES="$DEFINES -DUSE_SPARC_ASM"
        AC_MSG_RESULT([yes, sparc])
        ;;
+    ppc64le)
+        DEFINES="$DEFINES -DUSE_PPC64LE_ASM"
+        AC_MSG_RESULT([yes, ppc64le])
+        ;;
    *)
        AC_MSG_RESULT([no, platform not supported])
        ;;
@@ -837,6 +848,11 @@ dnl is not valid for that platform.
 if test "x$android" = xno; then
    test -z "$PTHREAD_LIBS" && PTHREAD_LIBS="-lpthread"
 fi
+dnl According to the manual when using pthreads, one should add -pthread to
+dnl both compile and link-time arguments.
+dnl In practise that should be sufficient for all platforms, since any
+dnl platforms build with GCC and Clang support the flag.
+PTHREAD_LIBS="$PTHREAD_LIBS -pthread"

 dnl pthread-stubs is mandatory on BSD platforms, due to the nature of the
 dnl project. Even then there's a notable issue as described in the project README
@@ -851,8 +867,6 @@ esac

 if test "x$pthread_stubs_possible" = xyes; then
    PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4)
-    AC_SUBST(PTHREADSTUBS_CFLAGS)
-    AC_SUBST(PTHREADSTUBS_LIBS)
 fi

 dnl SELinux awareness.
@@ -1066,27 +1080,18 @@ AC_SUBST([LLVM_INCLUDEDIR])
 dnl
 dnl libunwind
 dnl
+PKG_CHECK_EXISTS(libunwind, [HAVE_LIBUNWIND=yes], [HAVE_LIBUNWIND=no])
 AC_ARG_ENABLE([libunwind],
    [AS_HELP_STRING([--enable-libunwind],
            [Use libunwind for backtracing (default: auto)])],
        [LIBUNWIND="$enableval"],
-        [LIBUNWIND="auto"])
-
-PKG_CHECK_EXISTS(libunwind, [HAVE_LIBUNWIND=yes], [HAVE_LIBUNWIND=no])
-if test "x$LIBUNWIND" = "xauto"; then
-    LIBUNWIND="$HAVE_LIBUNWIND"
-fi
+        [LIBUNWIND="$HAVE_LIBUNWIND"])

 if test "x$LIBUNWIND" = "xyes"; then
    PKG_CHECK_MODULES(LIBUNWIND, libunwind)
-    if test "x$HAVE_LIBUNWIND" != "xyes"; then
-        AC_MSG_ERROR([libunwind requested but not installed.])
-    fi
    AC_DEFINE(HAVE_LIBUNWIND, 1, [Have libunwind support])
 fi

-AM_CONDITIONAL(HAVE_LIBUNWIND, [test "x$LIBUNWIND" = xyes])
-

 dnl Options for APIs
 AC_ARG_ENABLE([opengl],
@@ -1245,7 +1250,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
 AC_ARG_WITH([gallium-drivers],
    [AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
        [comma delimited Gallium drivers list, e.g.
-        "i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,swr,vc4,virgl,etnaviv,imx"
+        "i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,virgl,etnaviv,imx"
        @<:@default=r300,r600,svga,swrast@:>@])],
    [with_gallium_drivers="$withval"],
    [with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -1367,7 +1372,7 @@ if test "x$enable_libglvnd" = xyes ; then
    esac

    PKG_CHECK_MODULES([GLVND], libglvnd >= 0.2.0)
-    PKG_CHECK_VAR(LIBGLVND_DATADIR, libglvnd, datadir)
+    LIBGLVND_DATADIR=`$PKG_CONFIG --variable=datadir libglvnd`
    AC_SUBST([LIBGLVND_DATADIR])

    DEFINES="${DEFINES} -DUSE_LIBGLVND=1"
@@ -1541,15 +1546,10 @@ xdri)

            PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED])
            GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV libdrm >= $LIBDRM_REQUIRED"
+
            if test x"$enable_dri" = xyes; then
               dri_modules="$dri_modules xcb-dri2 >= $XCBDRI2_REQUIRED"
            fi
-
-            if test x"$enable_dri3" = xyes; then
-               PKG_CHECK_EXISTS([xcb >= $XCB_REQUIRED], [], AC_MSG_ERROR([DRI3 requires xcb >= $XCB_REQUIRED]))
-               dri3_modules="xcb xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
-               PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
-            fi
        fi
        if test x"$dri_platform" = xapple ; then
            DEFINES="$DEFINES -DGLX_USE_APPLEGL"
@@ -1638,6 +1638,111 @@ if test "x$enable_glx_read_only_text" = xyes; then
    DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
 fi

+dnl
+dnl DEPRECATED: EGL Platforms configuration
+dnl
+AC_ARG_WITH([egl-platforms],
+    [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
+        [DEPRECATED: use --with-platforms instead@<:@default=auto@:>@])],
+    [with_egl_platforms="$withval"],
+    [with_egl_platforms=auto])
+
+if test "x$with_egl_platforms" = xauto; then
+    with_egl_platforms="x11,surfaceless"
+    if test "x$enable_gbm" = xyes; then
+        with_egl_platforms="$with_egl_platforms,drm"
+    fi
+else
+    AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-platforms instead.])
+fi
+
+dnl
+dnl Platforms configuration
+dnl
+AC_ARG_WITH([platforms],
+    [AS_HELP_STRING([--with-platforms@<:@=DIRS...@:>@],
+        [comma delimited native platforms libEGL/Vulkan/other supports, e.g.
+        "x11,drm,wayland,surfaceless..." @<:@default=auto@:>@])],
+    [with_platforms="$withval"],
+    [with_platforms=auto])
+
+# Reuse the autodetection rather than duplicating it.
+if test "x$with_platforms" = xauto; then
+    with_platforms=$with_egl_platforms
+fi
+
+PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
+        WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
+        WAYLAND_SCANNER='')
+if test "x$WAYLAND_SCANNER" = x; then
+    AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner], [:])
+fi
+
+# Do per platform setups and checks
+platforms=`IFS=', '; echo $with_platforms`
+for plat in $platforms; do
+	case "$plat" in
+	wayland)
+
+		PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
+
+		if test "x$WAYLAND_SCANNER" = "x:"; then
+			AC_MSG_ERROR([wayland-scanner is needed to compile the wayland platform])
+		fi
+		DEFINES="$DEFINES -DHAVE_WAYLAND_PLATFORM"
+		;;
+
+	x11)
+		PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED xcb-xfixes])
+		DEFINES="$DEFINES -DHAVE_X11_PLATFORM"
+		;;
+
+	drm)
+		test "x$enable_gbm" = "xno" &&
+			AC_MSG_ERROR([EGL platform drm needs gbm])
+		DEFINES="$DEFINES -DHAVE_DRM_PLATFORM"
+		;;
+
+	surfaceless)
+		DEFINES="$DEFINES -DHAVE_SURFACELESS_PLATFORM"
+		;;
+
+	android)
+		PKG_CHECK_MODULES([ANDROID], [cutils hardware sync])
+		DEFINES="$DEFINES -DHAVE_ANDROID_PLATFORM"
+		;;
+
+	*)
+		AC_MSG_ERROR([platform '$plat' does not exist])
+		;;
+	esac
+
+	case "$plat" in
+	wayland|drm|surfaceless)
+		require_libdrm "Platform $plat"
+		;;
+	esac
+done
+
+if test "x$enable_glx" != xno; then
+    if ! echo "$platforms" | grep -q 'x11'; then
+        AC_MSG_ERROR([Building GLX without the x11 platform is not supported])
+    fi
+fi
+
+if test x"$enable_dri3" = xyes; then
+    DEFINES="$DEFINES -DHAVE_DRI3"
+
+    dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 xcb-xfixes xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
+    PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
+fi
+
+AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11')
+AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 'wayland')
+AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
+AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 'surfaceless')
+AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android')
+
 dnl
 dnl More DRI setup
 dnl
@@ -1680,10 +1785,6 @@ if test "x$enable_dri" = xyes; then
    # Platform specific settings and drivers to build
    case "$host_os" in
    linux*)
-        if test "x$enable_dri3" = xyes; then
-            DEFINES="$DEFINES -DHAVE_DRI3"
-        fi
-
        case "$host_cpu" in
        powerpc* | sparc*)
            # Build only the drivers for cards that exist on PowerPC/sparc
@@ -1740,12 +1841,11 @@ if test -n "$with_dri_drivers"; then
        xi915)
            require_libdrm "i915"
            HAVE_I915_DRI=yes
-            PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
+            PKG_CHECK_MODULES([I915], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
            ;;
        xi965)
            require_libdrm "i965"
            HAVE_I965_DRI=yes
-            PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
            ;;
        xnouveau)
            require_libdrm "nouveau"
@@ -1839,6 +1939,14 @@ AC_ARG_WITH([vulkan-icddir],
    [VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
 AC_SUBST([VULKAN_ICD_INSTALL_DIR])

+require_x11_dri3() {
+    if echo "$platforms" | grep -q 'x11'; then
+        if test "x$enable_dri3" != xyes; then
+            AC_MSG_ERROR([$1 Vulkan driver requires DRI3 when built with X11])
+        fi
+    fi
+}
+
 if test -n "$with_vulkan_drivers"; then
    if test "x$ac_cv_func_dl_iterate_phdr" = xno; then
        AC_MSG_ERROR([Vulkan drivers require the dl_iterate_phdr function])
@@ -1849,13 +1957,14 @@ if test -n "$with_vulkan_drivers"; then
        case "x$driver" in
        xintel)
            require_libdrm "ANV"
-            PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
+            require_x11_dri3 "ANV"
            HAVE_INTEL_VULKAN=yes
            ;;
        xradeon)
            require_libdrm "radv"
            PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
            radeon_llvm_check $LLVM_REQUIRED_RADV "radv"
+            require_x11_dri3 "radv"
            HAVE_RADEON_VULKAN=yes
            ;;
        *)
@@ -1961,23 +2070,47 @@ if test "x$enable_xa" = xyes; then
 fi
 AM_CONDITIONAL(HAVE_ST_XA, test "x$enable_xa" = xyes)

+if echo $platforms | grep -q "x11"; then
+    have_xvmc_platform=yes
+else
+    have_xvmc_platform=no
+fi
+
+if echo $platforms | grep -q "x11"; then
+    have_vdpau_platform=yes
+else
+    have_vdpau_platform=no
+fi
+
+if echo $platforms | grep -q "x11\|drm"; then
+    have_omx_platform=yes
+else
+    have_omx_platform=no
+fi
+
+if echo $platforms | grep -q "x11\|drm\|wayland"; then
+    have_va_platform=yes
+else
+    have_va_platform=no
+fi
+
 dnl
 dnl Gallium G3DVL configuration
 dnl
 if test -n "$with_gallium_drivers" -a "x$with_gallium_drivers" != xswrast; then
-    if test "x$enable_xvmc" = xauto; then
-	PKG_CHECK_EXISTS([xvmc >= $XVMC_REQUIRED], [enable_xvmc=yes], [enable_xvmc=no])
+    if test "x$enable_xvmc" = xauto -a "x$have_xvmc_platform" = xyes; then
+        PKG_CHECK_EXISTS([xvmc >= $XVMC_REQUIRED], [enable_xvmc=yes], [enable_xvmc=no])
    fi

-    if test "x$enable_vdpau" = xauto; then
+    if test "x$enable_vdpau" = xauto -a "x$have_vdpau_platform" = xyes; then
 	PKG_CHECK_EXISTS([vdpau >= $VDPAU_REQUIRED], [enable_vdpau=yes], [enable_vdpau=no])
    fi

-    if test "x$enable_omx" = xauto; then
-	PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], [enable_omx=yes], [enable_omx=no])
+    if test "x$enable_omx" = xauto -a "x$have_omx_platform" = xyes; then
+        PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], [enable_omx=yes], [enable_omx=no])
    fi

-    if test "x$enable_va" = xauto; then
+    if test "x$enable_va" = xauto -a "x$have_va_platform" = xyes; then
        PKG_CHECK_EXISTS([libva >= $LIBVA_REQUIRED], [enable_va=yes], [enable_va=no])
    fi
 fi
@@ -1995,23 +2128,24 @@ if test "x$enable_xvmc" = xyes -o \
        "x$enable_vdpau" = xyes -o \
        "x$enable_omx" = xyes -o \
        "x$enable_va" = xyes; then
-    if test x"$enable_dri3" = xyes; then
-        PKG_CHECK_MODULES([VL], [xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED
-                                 xcb-xfixes x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
-    else
-        PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
-    fi
+    PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
    need_gallium_vl_winsys=yes
 fi
 AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)

 if test "x$enable_xvmc" = xyes; then
+    if test "x$have_xvmc_platform" != xyes; then
+        AC_MSG_ERROR([XVMC requires the x11 platforms])
+    fi
    PKG_CHECK_MODULES([XVMC], [xvmc >= $XVMC_REQUIRED])
    gallium_st="$gallium_st xvmc"
 fi
 AM_CONDITIONAL(HAVE_ST_XVMC, test "x$enable_xvmc" = xyes)

 if test "x$enable_vdpau" = xyes; then
+    if test "x$have_vdpau_platform" != xyes; then
+        AC_MSG_ERROR([VDPAU requires the x11 platforms])
+    fi
    PKG_CHECK_MODULES([VDPAU], [vdpau >= $VDPAU_REQUIRED])
    gallium_st="$gallium_st vdpau"
    DEFINES="$DEFINES -DHAVE_ST_VDPAU"
@@ -2019,12 +2153,18 @@ fi
 AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes)

 if test "x$enable_omx" = xyes; then
+    if test "x$have_omx_platform" != xyes; then
+        AC_MSG_ERROR([OMX requires at least one of the x11 or drm platforms])
+    fi
    PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED])
    gallium_st="$gallium_st omx"
 fi
 AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes)

 if test "x$enable_va" = xyes; then
+    if test "x$have_va_platform" != xyes; then
+        AC_MSG_ERROR([VA requires at least one of the x11 drm or wayland platforms])
+    fi
    PKG_CHECK_MODULES([VA], [libva >= $LIBVA_REQUIRED])
    gallium_st="$gallium_st va"
 fi
@@ -2141,112 +2281,21 @@ dnl Gallium configuration
 dnl
 AM_CONDITIONAL(HAVE_GALLIUM, test -n "$with_gallium_drivers")

-dnl
-dnl DEPRECATED: EGL Platforms configuration
-dnl
-AC_ARG_WITH([egl-platforms],
-    [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
-        [DEPRECATED: use --with-plaforms instead@<:@default=auto@:>@])],
-    [with_egl_platforms="$withval"],
-    [with_egl_platforms=auto])
-
-if test "x$with_egl_platforms" = xauto; then
-    AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-plaforms instead.])
-    if test "x$enable_egl" = xyes; then
-        if test "x$enable_gbm" = xyes; then
-           with_egl_platforms="x11,drm"
-        else
-           with_egl_platforms="x11"
-        fi
-    else
-        with_egl_platforms=""
-    fi
-fi
-
-dnl
-dnl Platforms configuration
-dnl
-AC_ARG_WITH([platforms],
-    [AS_HELP_STRING([--with-platforms@<:@=DIRS...@:>@],
-        [comma delimited native platforms libEGL/Vulkan/other supports, e.g.
-        "x11,drm,wayland,surfaceless..." @<:@default=auto@:>@])],
-    [with_platforms="$withval"],
-    [with_platforms=auto])
-
-# For the time being, we still reuse the EGL named variables/defines.
-if test "x$with_platforms" != xauto; then
-    with_egl_platforms=$with_platforms
-fi
-
-PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
-        WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
-        WAYLAND_SCANNER='')
-if test "x$WAYLAND_SCANNER" = x; then
-    AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner], [:])
-fi
-
-# Do per-EGL platform setups and checks
-egl_platforms=`IFS=', '; echo $with_egl_platforms`
-for plat in $egl_platforms; do
-	case "$plat" in
-	wayland)
-
-		PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
-
-		if test "x$WAYLAND_SCANNER" = "x:"; then
-			AC_MSG_ERROR([wayland-scanner is needed to compile the wayland egl platform])
-		fi
-		;;
-
-	x11)
-		PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED xcb-xfixes])
-		;;
-
-	drm)
-		test "x$enable_gbm" = "xno" &&
-			AC_MSG_ERROR([EGL platform drm needs gbm])
-		;;
-
-	surfaceless)
-		;;
-
-	android)
-		PKG_CHECK_MODULES([ANDROID], [cutils hardware sync])
-		;;
-
-	*)
-		AC_MSG_ERROR([EGL platform '$plat' does not exist])
-		;;
-	esac
-
-	case "$plat" in
-	wayland|drm|surfaceless)
-		require_libdrm "Platform $plat"
-		;;
-	esac
-done
-
 # libEGL wants to default to the first platform specified in
 # ./configure.  parse that here.
-if test "x$egl_platforms" != "x"; then
-    FIRST_PLATFORM_CAPS=`echo $egl_platforms | sed 's| .*||' | tr '[[a-z]]' '[[A-Z]]'`
+if test "x$platforms" != "x"; then
+    FIRST_PLATFORM_CAPS=`echo $platforms | sed 's| .*||' | tr '[[a-z]]' '[[A-Z]]'`
    EGL_NATIVE_PLATFORM="_EGL_PLATFORM_$FIRST_PLATFORM_CAPS"
 else
    EGL_NATIVE_PLATFORM="_EGL_INVALID_PLATFORM"
 fi

-AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$egl_platforms" | grep -q 'x11')
-AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$egl_platforms" | grep -q 'wayland')
-AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep -q 'drm')
-AM_CONDITIONAL(HAVE_EGL_PLATFORM_SURFACELESS, echo "$egl_platforms" | grep -q 'surfaceless')
-AM_CONDITIONAL(HAVE_EGL_PLATFORM_ANDROID, echo "$egl_platforms" | grep -q 'android')
-
 AC_SUBST([EGL_NATIVE_PLATFORM])
 AC_SUBST([EGL_CFLAGS])

 # If we don't have the X11 platform, set this define so we don't try to include
 # the X11 headers.
-if ! echo "$egl_platforms" | grep -q 'x11'; then
+if ! echo "$platforms" | grep -q 'x11'; then
    DEFINES="$DEFINES -DMESA_EGL_NO_X11_HEADERS"
    GL_PC_CFLAGS="$GL_PC_CFLAGS -DMESA_EGL_NO_X11_HEADERS"
 fi
@@ -2316,7 +2365,7 @@ dnl DRM is needed by X, Wayland, and offscreen rendering.
 dnl Surfaceless is an alternative for the last one.
 dnl
 require_basic_egl() {
-    case "$with_egl_platforms" in
+    case "$with_platforms" in
        *drm*|*surfaceless*)
            ;;
        *)
@@ -2378,7 +2427,7 @@ if test -n "$with_gallium_drivers"; then
            ;;
        xi915)
            HAVE_GALLIUM_I915=yes
-            PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
+            PKG_CHECK_MODULES([I915], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
            require_libdrm "Gallium i915"
            ;;
        xr300)
@@ -2435,10 +2484,10 @@ if test -n "$with_gallium_drivers"; then
        xswr)
            llvm_require_version $LLVM_REQUIRED_SWR "swr"

-            swr_require_cxx_feature_flags "C++14" "__cplusplus >= 201402L" \
-                "-std=c++14" \
-                SWR_CXX14_CXXFLAGS
-            AC_SUBST([SWR_CXX14_CXXFLAGS])
+            swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
+                ",-std=c++11" \
+                SWR_CXX11_CXXFLAGS
+            AC_SUBST([SWR_CXX11_CXXFLAGS])

            swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
                ",-mavx,-march=core-avx" \
@@ -2462,10 +2511,15 @@ if test -n "$with_gallium_drivers"; then
                               DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"],
                              [USE_VC4_SIMULATOR=no])
            ;;
+        xpl111)
+            HAVE_GALLIUM_PL111=yes
+            ;;
        xvirgl)
            HAVE_GALLIUM_VIRGL=yes
            require_libdrm "virgl"
-            require_basic_egl "virgl"
+            if test "x$enable_egl" = xyes; then
+                require_basic_egl "virgl"
+            fi
            ;;
        *)
            AC_MSG_ERROR([Unknown Gallium driver: $driver])
@@ -2474,6 +2528,10 @@ if test -n "$with_gallium_drivers"; then
    done
 fi

+# XXX: Keep in sync with LLVM_REQUIRED_SWR
+AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x3.9.0 -a \
+                                              "x$LLVM_VERSION" != x3.9.1)
+
 if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
    llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
    llvm_add_default_components "gallium"
@@ -2485,6 +2543,10 @@ if test "x$HAVE_GALLIUM_ETNAVIV" != xyes -a "x$HAVE_GALLIUM_IMX" = xyes  ; then
    AC_MSG_ERROR([Building with imx requires etnaviv])
 fi

+if test "x$HAVE_GALLIUM_VC4" != xyes -a "x$HAVE_GALLIUM_PL111" = xyes  ; then
+    AC_MSG_ERROR([Building with pl111 requires vc4])
+fi
+
 dnl
 dnl Set defines and buildtime variables only when using LLVM.
 dnl
@@ -2549,6 +2611,7 @@ fi

 AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes)
+AM_CONDITIONAL(HAVE_GALLIUM_PL111, test "x$HAVE_GALLIUM_PL111" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
 AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
@@ -2588,8 +2651,7 @@ AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes)
 AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes)
 AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes)

-AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_R600" = xyes -o \
-                                      "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
+AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
                                      "x$HAVE_RADEON_VULKAN" = xyes)

 AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
@@ -2612,6 +2674,7 @@ AM_CONDITIONAL(HAVE_COMMON_OSMESA, test "x$enable_osmesa" = xyes -o \
 AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64)
 AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64)
 AM_CONDITIONAL(HAVE_SPARC_ASM, test "x$asm_arch" = xsparc)
+AM_CONDITIONAL(HAVE_PPC64LE_ASM, test "x$asm_arch" = xppc64le)

 AC_SUBST([NINE_MAJOR], 1)
 AC_SUBST([NINE_MINOR], 0)
@@ -2698,6 +2761,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/drivers/llvmpipe/Makefile
 		src/gallium/drivers/noop/Makefile
 		src/gallium/drivers/nouveau/Makefile
+		src/gallium/drivers/pl111/Makefile
 		src/gallium/drivers/r300/Makefile
 		src/gallium/drivers/r600/Makefile
 		src/gallium/drivers/radeon/Makefile
@@ -2743,6 +2807,7 @@ AC_CONFIG_FILES([Makefile
 		src/gallium/winsys/freedreno/drm/Makefile
 		src/gallium/winsys/i915/drm/Makefile
 		src/gallium/winsys/nouveau/drm/Makefile
+		src/gallium/winsys/pl111/drm/Makefile
 		src/gallium/winsys/radeon/drm/Makefile
 		src/gallium/winsys/amdgpu/drm/Makefile
 		src/gallium/winsys/svga/drm/Makefile
@@ -2869,7 +2934,7 @@ else
    echo "        GBM:             no"
 fi

-    echo "        EGL/Vulkan/VL platforms:   $egl_platforms"
+    echo "        EGL/Vulkan/VL platforms:   $platforms"

 # Vulkan
 echo ""
@@ -2917,15 +2982,17 @@ echo "        Static libs:     $enable_static"
 echo "        Shared-glapi:    $enable_shared_glapi"

 dnl Compiler options
-# cleanup the CFLAGS/CXXFLAGS/DEFINES vars
+# cleanup the CFLAGS/CXXFLAGS/LDFLAGS/DEFINES vars
 cflags=`echo $CFLAGS | \
    $SED 's/^ *//;s/  */ /;s/ *$//'`
 cxxflags=`echo $CXXFLAGS | \
    $SED 's/^ *//;s/  */ /;s/ *$//'`
+ldflags=`echo $LDFLAGS | $SED 's/^ *//;s/  */ /;s/ *$//'`
 defines=`echo $DEFINES | $SED 's/^ *//;s/  */ /;s/ *$//'`
 echo ""
 echo "        CFLAGS:          $cflags"
 echo "        CXXFLAGS:        $cxxflags"
+echo "        LDFLAGS:         $ldflags"
 echo "        Macros:          $defines"
 echo ""
 if test "x$enable_llvm" = xyes; then
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -84,6 +84,7 @@
 <li><a href="codingstyle.html" target="_parent">Coding Style</a>
 <li><a href="submittingpatches.html" target="_parent">Submitting patches</a>
 <li><a href="releasing.html" target="_parent">Releasing process</a>
+<li><a href="release-calendar.html" target="_parent">Release calendar</a>
 <li><a href="sourcedocs.html" target="_parent">Source Documentation</a>
 <li><a href="dispatch.html" target="_parent">GL Dispatch</a>
 </ul>
--- a/docs/egl.html
+++ b/docs/egl.html
@@ -77,15 +77,13 @@ drivers will be installed to <code>${libdir}/egl</code>.</p>

 </dd>

-<dt><code>--with-egl-platforms</code></dt>
+<dt><code>--with-platforms</code></dt>
 <dd>

 <p>List the platforms (window systems) to support.  Its argument is a comma
-separated string such as <code>--with-egl-platforms=x11,drm</code>.  It decides
+separated string such as <code>--with-platforms=x11,drm</code>.  It decides
 the platforms a driver may support.  The first listed platform is also used by
-the main library to decide the native platform: this defines EGL native
-types such as <code>EGLNativeDisplayType</code> or
-<code>EGLNativeWindowType</code>.</p>
+the main library to decide the native platform.</p>

 <p>The available platforms are <code>x11</code>, <code>drm</code>,
 <code>wayland</code>, <code>surfaceless</code>, <code>android</code>,
@@ -167,9 +165,9 @@ binaries.</p>
 <dd>

 <p>This variable specifies the native platform.  The valid values are the same
-as those for <code>--with-egl-platforms</code>.  When the variable is not set,
+as those for <code>--with-platforms</code>.  When the variable is not set,
 the main library uses the first platform listed in
-<code>--with-egl-platforms</code> as the native platform.</p>
+<code>--with-platforms</code> as the native platform.</p>

 <p>Extensions like <code>EGL_MESA_drm_display</code> define new functions to
 create displays for non-native platforms.  These extensions are usually used by
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -46,6 +46,9 @@ sometimes be useful for debugging end-user issues.
 <li>MESA_NO_MMX - if set, disables Intel MMX optimizations
 <li>MESA_NO_3DNOW - if set, disables AMD 3DNow! optimizations
 <li>MESA_NO_SSE - if set, disables Intel SSE optimizations
+<li>MESA_NO_ERROR - if set error checking is disabled as per KHR_no_error.
+   This will result in undefined behaviour for invalid use of the api, but
+   can reduce CPU use for apps that are known to be error free.</li>
 <li>MESA_DEBUG - if set, error messages are printed to stderr.  For example,
   if the application generates a GL_INVALID_ENUM error, a corresponding error
   message indicating where the error occurred, and possibly why, will be
@@ -160,48 +163,47 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
   This is useful for debugging hangs, etc.</li>
 <li>INTEL_DEBUG - a comma-separated list of named flags, which do various things:
 <ul>
-   <li>color - use color in output</li>
-   <li>tex - emit messages about textures.</li>
-   <li>state - emit messages about state flag tracking</li>
-   <li>blit - emit messages about blit operations</li>
-   <li>miptree - emit messages about miptrees</li>
-   <li>perf - emit messages about performance issues</li>
-   <li>perfmon - emit messages about AMD_performance_monitor</li>
+   <li>ann - annotate IR in assembly dumps</li>
+   <li>aub - dump batches into an AUB trace for use with simulation tools</li>
   <li>bat - emit batch information</li>
-   <li>pix - emit messages about pixel operations</li>
+   <li>blit - emit messages about blit operations</li>
+   <li>blorp - emit messages about the blorp operations (blits &amp; clears)</li>
   <li>buf - emit messages about buffer objects</li>
+   <li>clip - emit messages about the clip unit (for old gens, includes the CLIP program)</li>
+   <li>color - use color in output</li>
+   <li>cs - dump shader assembly for compute shaders</li>
+   <li>do32 - generate compute shader SIMD32 programs even if workgroup size doesn't exceed the SIMD16 limit</li>
+   <li>dri - emit messages about the DRI interface</li>
   <li>fbo - emit messages about framebuffers</li>
   <li>fs - dump shader assembly for fragment shaders</li>
   <li>gs - dump shader assembly for geometry shaders</li>
-   <li>sync - after sending each batch, emit a message and wait for that batch to finish rendering</li>
-   <li>prim - emit messages about drawing primitives</li>
-   <li>vert - emit messages about vertex assembly</li>
-   <li>dri - emit messages about the DRI interface</li>
-   <li>sf - emit messages about the strips &amp; fans unit (for old gens, includes the SF program)</li>
-   <li>stats - enable statistics counters. you probably actually want perfmon or intel_gpu_top instead.</li>
-   <li>urb - emit messages about URB setup</li>
-   <li>vs - dump shader assembly for vertex shaders</li>
-   <li>clip - emit messages about the clip unit (for old gens, includes the CLIP program)</li>
-   <li>aub - dump batches into an AUB trace for use with simulation tools</li>
-   <li>shader_time - record how much GPU time is spent in each shader</li>
-   <li>no16 - suppress generation of 16-wide fragment shaders. useful for debugging broken shaders</li>
-   <li>blorp - emit messages about the blorp operations (blits &amp; clears)</li>
-   <li>nodualobj - suppress generation of dual-object geometry shader code</li>
-   <li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
-   <li>ann - annotate IR in assembly dumps</li>
+   <li>hex - print instruction hex dump with the disassembly</li>
+   <li>l3 - emit messages about the new L3 state during transitions</li>
+   <li>miptree - emit messages about miptrees</li>
   <li>no8 - don't generate SIMD8 fragment shader</li>
-   <li>vec4 - force vec4 mode in vertex shader</li>
+   <li>no16 - suppress generation of 16-wide fragment shaders. useful for debugging broken shaders</li>
+   <li>nocompact - disable instruction compaction</li>
+   <li>nodualobj - suppress generation of dual-object geometry shader code</li>
+   <li>norbc - disable single sampled render buffer compression</li>
+   <li>optimizer - dump shader assembly to files at each optimization pass and iteration that make progress</li>
+   <li>perf - emit messages about performance issues</li>
+   <li>perfmon - emit messages about AMD_performance_monitor</li>
+   <li>pix - emit messages about pixel operations</li>
+   <li>prim - emit messages about drawing primitives</li>
+   <li>sf - emit messages about the strips &amp; fans unit (for old gens, includes the SF program)</li>
+   <li>shader_time - record how much GPU time is spent in each shader</li>
   <li>spill_fs - force spilling of all registers in the scalar backend (useful to debug spilling code)</li>
   <li>spill_vec4 - force spilling of all registers in the vec4 backend (useful to debug spilling code)</li>
-   <li>cs - dump shader assembly for compute shaders</li>
-   <li>hex - print instruction hex dump with the disassembly</li>
-   <li>nocompact - disable instruction compaction</li>
+   <li>state - emit messages about state flag tracking</li>
+   <li>sync - after sending each batch, emit a message and wait for that batch to finish rendering</li>
   <li>tcs - dump shader assembly for tessellation control shaders</li>
   <li>tes - dump shader assembly for tessellation evaluation shaders</li>
-   <li>l3 - emit messages about the new L3 state during transitions</li>
-   <li>do32 - generate compute shader SIMD32 programs even if workgroup size doesn't exceed the SIMD16 limit</li>
-   <li>norbc - disable single sampled render buffer compression</li>
+   <li>tex - emit messages about textures.</li>
+   <li>urb - emit messages about URB setup</li>
+   <li>vert - emit messages about vertex assembly</li>
+   <li>vs - dump shader assembly for vertex shaders</li>
 </ul>
+<li>INTEL_SCALAR_VS (or TCS, TES, GS) - force scalar/vec4 mode for a shader stage (Gen8-9 only)</li>
 <li>INTEL_PRECISE_TRIG - if set to 1, true or yes, then the driver prefers
   accuracy over performance in trig functions.</li>
 </ul>
@@ -302,6 +304,8 @@ See src/mesa/state_tracker/st_debug.c for other options.
 (will often result in incorrect rendering).
 <li>SVGA_DEBUG - for dumping shaders, constant buffers, etc.  See the code
 for details.
+<li>SVGA_EXTRA_LOGGING - if set, enables extra logging to the vmware.log file,
+such as the OpenGL program's name and command line arguments.
 <li>See the driver code for other, lesser-used variables.
 </ul>

--- a/docs/features.txt
+++ b/docs/features.txt
@@ -277,7 +277,7 @@ GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+

 Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES version:

-  GL_ARB_bindless_texture                               started (airlied)
+  GL_ARB_bindless_texture                               DONE (radeonsi)
  GL_ARB_cl_event                                       not started
  GL_ARB_compute_variable_group_size                    DONE (nvc0, radeonsi)
  GL_ARB_ES3_2_compatibility                            DONE (i965/gen8+)
@@ -297,7 +297,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GL_ARB_shader_draw_parameters                         DONE (i965, nvc0, radeonsi)
  GL_ARB_shader_group_vote                              DONE (nvc0, radeonsi)
  GL_ARB_shader_stencil_export                          DONE (i965/gen9+, radeonsi, softpipe, llvmpipe, swr)
-  GL_ARB_shader_viewport_layer_array                    DONE (i965/gen6+, radeonsi)
+  GL_ARB_shader_viewport_layer_array                    DONE (i965/gen6+, nvc0, radeonsi)
  GL_ARB_sparse_buffer                                  DONE (radeonsi/CIK+)
  GL_ARB_sparse_texture                                 not started
  GL_ARB_sparse_texture2                                not started
@@ -305,9 +305,9 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GL_ARB_texture_filter_minmax                          not started
  GL_ARB_transform_feedback_overflow_query              DONE (i965/gen6+)
  GL_KHR_blend_equation_advanced_coherent               DONE (i965/gen9+)
-  GL_KHR_no_error                                       not started
-  GL_KHR_texture_compression_astc_hdr                   DONE (core only)
-  GL_KHR_texture_compression_astc_sliced_3d             not started
+  GL_KHR_no_error                                       started (Timothy Arceri)
+  GL_KHR_texture_compression_astc_hdr                   DONE (i965/bxt)
+  GL_KHR_texture_compression_astc_sliced_3d             DONE (i965/gen9+)
  GL_OES_depth_texture_cube_map                         DONE (all drivers that support GLSL 1.30+)
  GL_OES_EGL_image                                      DONE (all drivers)
  GL_OES_EGL_image_external_essl3                       not started
--- a/docs/index.html
+++ b/docs/index.html
@@ -16,6 +16,59 @@

 <h1>News</h1>

+<h2>June 19, 2017</h2>
+<p>
+<a href="relnotes/17.1.3.html">Mesa 17.1.3</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>June 5, 2017</h2>
+<p>
+<a href="relnotes/17.1.2.html">Mesa 17.1.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>June 1, 2017</h2>
+<p>
+<a href="relnotes/17.0.7.html">Mesa 17.0.7</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 17.0.7 will be the final release in the 17.0
+series. Users of 17.0 are encouraged to migrate to the 17.1 series in order
+to obtain future fixes.
+</p>
+
+<h2>May 25, 2017</h2>
+<p>
+<a href="relnotes/17.1.1.html">Mesa 17.1.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>May 12, 2017</h2>
+<p>
+<a href="relnotes/17.0.6.html">Mesa 17.0.6</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>May 10, 2017</h2>
+<p>
+<a href="relnotes/17.1.0.html">Mesa 17.1.0</a> is released.  This is a
+new development release.  See the release notes for more information
+about the release.
+</p>
+
+<h2>April 28, 2017</h2>
+<p>
+<a href="relnotes/17.0.5.html">Mesa 17.0.5</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>April 17, 2017</h2>
+<p>
+<a href="relnotes/17.0.4.html">Mesa 17.0.4</a> is released.
+This is a bug-fix release.
+</p>
+
 <h2>April 1, 2017</h2>
 <p>
 <a href="relnotes/17.0.3.html">Mesa 17.0.3</a> is released.
--- a/docs/release-calendar.html
+++ b/docs/release-calendar.html
@@ -0,0 +1,106 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Release calendar</title>
+  <link rel="stylesheet" type="text/css" href="mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="contents.html"></iframe>
+<div class="content">
+
+<h1>Overview</h1>
+
+<p>
+Mesa provides feature/development and stable releases.
+</p>
+<p>
+The table below lists the date and release manager that is expected to do the
+specific release.
+<br>
+Take a look <a href="submittingpatches.html#criteria" target="_parent">here</a>
+if you'd like to nominate a patch in the next stable release.
+</p>
+
+<h1 id="calendar">Calendar</h1>
+
+<table border="1">
+
+<tr>
+<th>Branch</th>
+<th>Expected date</th>
+<th>Release</th>
+<th>Release manager</th>
+<th>Notes</th>
+</tr>
+<tr>
+<td rowspan="5">17.1</td>
+<td>2017-06-30</td>
+<td>17.1.4</td>
+<td>Andres Gomez</td>
+<td></td>
+</tr>
+<tr>
+<td>2017-07-14</td>
+<td>17.1.5</td>
+<td>Andres Gomez</td>
+<td></td>
+</tr>
+<tr>
+<td>2017-07-28</td>
+<td>17.1.6</td>
+<td>Emil Velikov</td>
+<td></td>
+</tr>
+<tr>
+<td>2017-08-11</td>
+<td>17.1.7</td>
+<td>Juan A. Suarez Romero</td>
+<td></td>
+</tr>
+<tr>
+<td>2017-08-25</td>
+<td>17.1.8</td>
+<td>Andres Gomez</td>
+<td>Final planned release for the 17.1 series</td>
+</tr>
+<tr>
+<td rowspan="5">17.2</td>
+<td>2017-07-21</td>
+<td>17.2.0-rc1</td>
+<td>Emil Velikov</td>
+<td></td>
+</tr>
+<tr>
+<td>2017-07-28</td>
+<td>17.2.0-rc2</td>
+<td>Emil Velikov</td>
+<td></td>
+</tr>
+<tr>
+<td>2017-08-04</td>
+<td>17.2.0-rc3</td>
+<td>Emil Velikov</td>
+<td></td>
+</tr>
+<tr>
+<td>2017-08-11</td>
+<td>17.2.0-rc4</td>
+<td>Emil Velikov</td>
+<td>May be promoted to 17.2.0 final</td>
+</tr>
+<tr>
+<td>2017-08-25</td>
+<td>17.2.1</td>
+<td>Emil Velikov</td>
+<td></td>
+</table>
+
+</div>
+</body>
+</html>
--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -14,6 +14,7 @@
 <iframe src="contents.html"></iframe>
 <div class="content">

+
 <h1>Releasing process</h1>

 <ul>
@@ -23,11 +24,13 @@
 <li><a href="#branch">Making a branchpoint</a>
 <li><a href="#prerelease">Pre-release announcement</a>
 <li><a href="#release">Making a new release</a>
+<li><a href="#calendar">Update the calendar</a>
 <li><a href="#announce">Announce the release</a>
 <li><a href="#website">Update the mesa3d.org website</a>
 <li><a href="#bugzilla">Update Bugzilla</a>
 </ul>

+
 <h1 id="overview">Overview</h1>

 <p>
@@ -48,11 +51,15 @@ For example:
 	Mesa 12.0.2 - 12.0 branch, bugfix
 </pre>

+
 <h1 id="schedule">Release schedule</h1>

 <p>
 Releases should happen on Fridays. Delays can occur although those should be keep
 to a minimum.
+<br>
+See our <a href="release-calendar.html" target="_parent">calendar</a> for the
+date and other details for individual releases.
 </p>

 <h2>Feature releases</h2>
@@ -79,15 +86,24 @@ The final release from the 12.0 series Mesa 12.0.5 will be out around the same
 time (or shortly after) 13.0.1 is out.
 </p>

+
 <h1 id="pickntest">Cherry-picking and testing</h1>

 <p>
 Commits nominated for the active branch are picked as based on the
 <a href="submittingpatches.html#criteria" target="_parent">criteria</a> as
 described in the same section.
+</p>

 <p>
-Maintainer is responsible for testing in various possible permutations of
+Nomination happens in the mesa-stable@ mailing list. However,
+maintainer is resposible of checking for forgotten candidates in the
+master branch. This is achieved by a combination of ad-hoc scripts and
+a casual search for terms such as regression, fix, broken and similar.
+</p>
+
+<p>
+Maintainer is also responsible for testing in various possible permutations of
 the autoconf and scons build.
 </p>

@@ -101,33 +117,57 @@ release. This is made <strong>only</strong> with explicit permission/request,
 and the patch <strong>must</strong> be very well contained. Thus it cannot
 affect more than one driver/subsystem.
 </p>
+
 <p>
 Currently Ilia Mirkin and AMD devs have requested "permanent" exception.
 </p>

-
 <ul>
 <li>make distcheck, scons and scons check must pass
 <li>Testing with different version of system components - LLVM and others is also
 performed where possible.
+<li>As a general rule, testing with various combinations of configure
+switches, depending on the specific patchset.
 </ul>
+
 <p>
-Achieved by combination of local ad-hoc scripts and AppVeyor plus Travis-CI,
-the latter as part of their Github integration.
+Achieved by combination of local ad-hoc scripts, mingw-w64 cross
+compilation and AppVeyor plus Travis-CI, the latter as part of their
+Github integration.
 </p>
+
+<p>
+For Windows related changes, the main contact point is Brian
+Paul. Jose Fonseca can also help as a fallback contact.
+</p>
+
+<p>
+For Android related changes, the main contact is Tapani
+P&auml;lli. Mauro Rossi is collaborating with android-x86 and may
+provide feedback about the build status in that project.
+</p>
+
+<p>
+For MacOSX related changes, Jeremy Huddleston Sequoia is currently a
+good contact point.
+</p>
+
 <p>
 <strong>Note:</strong> If a patch in the current queue needs any additional
 fix(es), then they should be squashed together.
 <br>
 The commit messages and the <code>cherry picked from</code> tags must be preserved.
 </p>
+
 <p>
 This should be noted in the <a href="#prerelease">pre-announce</a> email.
+</p>
+
 <pre>
    git show b10859ec41d09c57663a258f43fe57c12332698e

    commit b10859ec41d09c57663a258f43fe57c12332698e
-    Author: Jonas Pfeil &ltpfeiljonas@gmx.de&gt
+    Author: Jonas Pfeil &lt;pfeiljonas@gmx.de&gt;
    Date:   Wed Mar 1 18:11:10 2017 +0100

        ralloc: Make sure ralloc() allocations match malloc()'s alignment.
@@ -146,7 +186,6 @@ This should be noted in the <a href="#prerelease">pre-announce</a> email.

        (cherry picked from commit ff494fe999510ea40e3ed5827e7818550b6de126)
 </pre>
-</p>

 <h2>Regression/functionality testing</h2>

@@ -154,15 +193,23 @@ This should be noted in the <a href="#prerelease">pre-announce</a> email.
 Less often (once or twice), shortly before the pre-release announcement.
 Ensure that testing is redone if Intel devs have requested an exception, as per above.
 </p>
+
 <ul>
 <li><em>no regressions should be observed for Piglit/dEQP/CTS/Vulkan on Intel platforms</em>
 <li><em>no regressions should be observed for Piglit using the swrast, softpipe
 and llvmpipe drivers</em>
 </ul>
+
 <p>
 Currently testing is performed courtesy of the Intel OTC team and their Jenkins CI setup. Check with the Intel team over IRC how to get things setup.
 </p>

+<p>
+Installing the built driver from the pre-announced RC branch in the
+system and making some every day's use until the release may be a good
+idea too.
+</p>
+

 <h1 id="branch">Making a branchpoint</h1>

@@ -202,15 +249,18 @@ To setup the branchpoint:
 Now go to
 <a href="https://bugs.freedesktop.org/editversions.cgi?action=add&amp;product=Mesa" target="_parent">Bugzilla</a> and add the new Mesa version X.Y.
 </p>
+
 <p>
 Check that there are no distribution breaking changes and revert them if needed.
 For example: files being overwritten on install, etc. Happens extremely rarely -
 we had only one case so far (see commit 2ced8eb136528914e1bf4e000dea06a9d53c7e04).
 </p>
+
 <p>
 Proceed to <a href="#release">release</a> -rc1.
 </p>

+
 <h1 id="prerelease">Pre-release announcement</h1>

 <p>
@@ -224,18 +274,22 @@ release is made.
 </p>

 <h2>Terminology used</h2>
+
 <ul><li>Nominated</ul>
+
 <p>
 Patch that is nominated but yet to to merged in the patch queue/branch.
 </p>

 <ul><li>Queued</ul>
+
 <p>
 Patch is in the queue/branch and will feature in the next release.
 Barring reported regressions or objections from developers.
 </p>

 <ul><li>Rejected</ul>
+
 <p>
 Patch does not fit the
 <a href="submittingpatches.html#criteria" target="_parent">criteria</a> and
@@ -341,6 +395,7 @@ AUTHOR (NUMBER):
 Reason: ...
 </pre>

+
 <h1 id="release">Making a new release</h1>

 <p>
@@ -348,18 +403,21 @@ These are the instructions for making a new Mesa release.
 </p>

 <h3>Get latest source files</h3>
+
 <p>
 Ensure the latest code is available - both in your local master and the
 relevant branch.
 </p>

 <h3>Perform basic testing</h3>
+
 <p>
 Most of the testing should already be done during the
 <a href="#pickntest">cherry-pick</a> and
 <a href="#prerelease">pre-announce</a> stages.
-
 So we do a quick 'touch test'
+</p>
+
 <ul>
 <li>make distcheck (you can omit this if you're not using --dist below)
 <li>scons (from release tarball)
@@ -402,7 +460,7 @@ Here is one solution that I've been using.
 		--enable-glx-tls \
 		--enable-gbm \
 		--enable-egl \
-		--with-egl-platforms=x11,drm,wayland
+		--with-platforms=x11,drm,wayland,surfaceless
 	make -j2 &amp;&amp; DESTDIR=`pwd`/test make -j6 install
 	__glxinfo_cmd='glxinfo 2>&amp;1 | egrep -o "Mesa.*|Gallium.*|.*dri\.so"'
 	__glxgears_cmd='glxgears 2>&amp;1 | grep -v "configuration file"'
@@ -452,6 +510,7 @@ be empty (TBD) at this point.

 <p>
 Two scripts are available to help generate portions of the release notes:
+</p>

 <pre>
 	./bin/bugzilla_mesa.sh
@@ -468,6 +527,7 @@ to be included in the release notes.
 <p>
 Commit these changes and push the branch.
 </p>
+
 <pre>
 	git push origin HEAD
 </pre>
@@ -478,6 +538,7 @@ Commit these changes and push the branch.
 <p>
 Start the release process.
 </p>
+
 <pre>
 	../relative/path/to/release.sh . # append --dist if you've already done distcheck above
 </pre>
@@ -515,7 +576,15 @@ docs/index.html to add a news entry. Then commit and push:
 </pre>


+<h1 id="calendar">Update the calendar</h1>
+
+<p>
+Remove the version from the <a href="release-calendar.html" target="_parent">calendar</a>.
+</p>
+
+
 <h1 id="announce">Announce the release</h1>
+
 <p>
 Use the generated template during the releasing process.
 </p>
@@ -528,6 +597,7 @@ As the hosting was moved to freedesktop, git hooks are deployed to update the
 website. Manually check that it is updated 5-10 minutes after the final <code>git push</code>
 </p>

+
 <h1 id="bugzilla">Update Bugzilla</h1>

 <p>
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,14 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/17.1.3.html">17.1.3 release notes</a>
+<li><a href="relnotes/17.1.2.html">17.1.2 release notes</a>
+<li><a href="relnotes/17.0.7.html">17.0.7 release notes</a>
+<li><a href="relnotes/17.1.1.html">17.1.1 release notes</a>
+<li><a href="relnotes/17.0.6.html">17.0.6 release notes</a>
+<li><a href="relnotes/17.1.0.html">17.1.0 release notes</a>
+<li><a href="relnotes/17.0.5.html">17.0.5 release notes</a>
+<li><a href="relnotes/17.0.4.html">17.0.4 release notes</a>
 <li><a href="relnotes/17.0.3.html">17.0.3 release notes</a>
 <li><a href="relnotes/17.0.2.html">17.0.2 release notes</a>
 <li><a href="relnotes/13.0.6.html">13.0.6 release notes</a>
--- a/docs/relnotes/17.0.4.html
+++ b/docs/relnotes/17.0.4.html
@@ -0,0 +1,156 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.0.4 Release Notes / April 17, 2017</h1>
+
+<p>
+Mesa 17.0.4 is a bug fix release which fixes bugs found since the 17.0.3 release.
+</p>
+<p>
+Mesa 17.0.4 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+c4c34ba05d48f76b45bc05bc4b6e9242077f403d63c4f0c355c7b07786de233e  mesa-17.0.4.tar.gz
+1269dc8545a193932a0779b2db5bce9be4a5f6813b98c38b93b372be8362a346  mesa-17.0.4.tar.xz
+</pre>
+
+
+<h2>Next release</h2>
+<p>
+Mesa 17.0.5 is expected in approximatelly two weeks. See the release
+<a href="../release-calendar.html#calendar" target="_parent">calendar</a>
+for details.
+</p>
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99515">Bug 99515</a> - SIGSEGV MAPERR on Android nougat-x86 with mesa 17.0.0rc</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100391">Bug 100391</a> - SachaWillems deferredmultisampling asserts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100452">Bug 100452</a> - push_constants host memory leak when resetting command buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100582">Bug 100582</a> - [GEN8+] piglit.spec.arb_stencil_texturing.glblitframebuffer corrupts state.gl_texture* assertions</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: add new polaris10 pci id</li>
+</ul>
+
+<p>Alex Smith (1):</p>
+<ul>
+  <li>radv: Invalidate L2 for TRANSFER_WRITE barriers</li>
+</ul>
+
+<p>Andres Gomez (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.0.3</li>
+</ul>
+
+<p>Craig Stout (1):</p>
+<ul>
+  <li>anv/cmd_buffer: fix host memory leak</li>
+</ul>
+
+<p>Emil Velikov (3):</p>
+<ul>
+  <li>Revert "cherry-ignore: add the Flush after unmap in gbm/dri fix"</li>
+  <li>Revert "freedreno: fix memory leak"</li>
+  <li>Update version to 17.0.4</li>
+</ul>
+
+<p>Fabio Estevam (1):</p>
+<ul>
+  <li>loader: Move non-error message to debug level</li>
+</ul>
+
+<p>Ilia Mirkin (4):</p>
+<ul>
+  <li>nvc0/ir: fix LSB/BFE/BFI implementations</li>
+  <li>nvc0/ir: fix overwriting of offset register with interpolateAtOffset</li>
+  <li>nvc0: increase texture buffer object alignment to 256 for pre-GM107</li>
+  <li>nouveau: when mapping a persistent buffer, synchronize on former xfers</li>
+</ul>
+
+<p>Jason Ekstrand (5):</p>
+<ul>
+  <li>i965/fs: Always provide a default LOD of 0 for TXS and TXL</li>
+  <li>anv/pipeline: Properly handle unset gl_Layer and gl_ViewportIndex</li>
+  <li>anv/blorp: Align vertex buffers to 64B</li>
+  <li>i965/blorp: Align vertex buffers to 64B</li>
+  <li>i965/blorp: Bump the batch space estimate</li>
+</ul>
+
+<p>Jerome Duval (2):</p>
+<ul>
+  <li>haiku: build fixes around debug defines</li>
+  <li>haiku/winsys: fix dt prototype args</li>
+</ul>
+
+<p>Julien Isorce (4):</p>
+<ul>
+  <li>winsys/radeon: check null in radeon_cs_create_fence</li>
+  <li>winsys/radeon: check null return from radeon_cs_create_fence in cs_flush</li>
+  <li>radeon: initialize hole variable before calling container_of</li>
+  <li>radeon_drm_bo: explicitly check return value of drmCommandWriteRead</li>
+</ul>
+
+<p>Kenneth Graunke (4):</p>
+<ul>
+  <li>i965: Document the sad story of the kernel command parser.</li>
+  <li>i965: Set screen-&gt;cmd_parser_version to 0 if we can't write registers.</li>
+  <li>i965: Skip register write detection when possible.</li>
+  <li>i965: Set kernel features before computing max GL version.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>targets: export radeon winsys_create functions to silence LLVM warning</li>
+</ul>
+
+<p>Michal Srb (1):</p>
+<ul>
+  <li>st: Add cubeMapFace parameter to st_finalize_texture.</li>
+</ul>
+
+<p>Thomas Hellstrom (1):</p>
+<ul>
+  <li>gbm/dri: Flush after unmap</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.0.5.html
+++ b/docs/relnotes/17.0.5.html
@@ -0,0 +1,144 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.0.5 Release Notes / April 28, 2017</h1>
+
+<p>
+Mesa 17.0.5 is a bug fix release which fixes bugs found since the 17.0.4 release.
+</p>
+<p>
+Mesa 17.0.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+7510eee0d0077860b250d30d73305048c2df4ba09ea8fc04e4f3eec7beece301  mesa-17.0.5.tar.gz
+668efa445d2f57a26e5c096b1965a685733a3b57d9c736f9d6460263847f9bfe  mesa-17.0.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (16):</p>
+<ul>
+  <li>cherry-ignore: Add the pci_id into the shader cache UUID</li>
+  <li>cherry-ignore: fix crash if ctx torn down with no rendering</li>
+  <li>cherry-ignore: Fix typos.</li>
+  <li>cherry-ignore: Revert "etnaviv: Cannot render to rb-swapped formats"</li>
+  <li>cherry-ignore: Revert "i965/fs: Don't emit SEL instructions for type-converting MOVs."</li>
+  <li>cherry-ignore: fix typo in a2b10g10r10 fast clear calculation</li>
+  <li>cherry-ignore: remove unused anv_dispatch_table dtable</li>
+  <li>cherry-ignore: remove unused radv_dispatch_table dtable</li>
+  <li>cherry-ignore: make radv_resolve_entrypoint static</li>
+  <li>cherry-ignore: vulkan: add support for libmesa_vulkan_util</li>
+  <li>cherry-ignore: r600: fix libmesa_amd_common dependency</li>
+  <li>cherry-ignore: remove dead brw_new_shader() declaration</li>
+  <li>cherry-ignore: remove i965_symbols_test reference from .gitignore</li>
+  <li>cherry-ignore: automake: ensure that the destination directory is created</li>
+  <li>cherry-ignore: provide required gem stubs for the tests</li>
+  <li>Update version to 17.0.5</li>
+</ul>
+
+<p>Boyan Ding (2):</p>
+<ul>
+  <li>nvc0/ir: Properly handle a "split form" of predicate destination</li>
+  <li>nir: Destination component count of shader_clock intrinsic is 2</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.0.4</li>
+  <li>winsys/sw/dri: don't use GNU void pointer arithmetic</li>
+  <li>st/clover: add space between &lt; and ::</li>
+  <li>configure.ac: check require_basic_egl only if egl enabled</li>
+  <li>st/mesa: automake: honour the vdpau header install location</li>
+</ul>
+
+<p>Francisco Jerez (2):</p>
+<ul>
+  <li>intel/fs: Use regs_written() in spilling cost heuristic for improved accuracy.</li>
+  <li>intel/fs: Take into account amount of data read in spilling cost heuristic.</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>radv: report timestampPeriod correctly</li>
+</ul>
+
+<p>Jason Ekstrand (5):</p>
+<ul>
+  <li>anv/blorp: Flush the texture cache in UpdateBuffer</li>
+  <li>anv/cmd_buffer: Flush the VF cache at the top of all primaries</li>
+  <li>anv/cmd_buffer: Always set up a null surface state</li>
+  <li>anv/cmd_buffer: Use the null surface state for ATTACHMENT_UNUSED</li>
+  <li>anv/blorp: Properly handle VK_ATTACHMENT_UNUSED</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>i965/vec4: Avoid reswizzling MACH instructions in opt_register_coalesce().</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: invalidate the readpix cache in st_indirect_draw_vbo</li>
+</ul>
+
+<p>Nanley Chery (1):</p>
+<ul>
+  <li>anv/cmd_buffer: Disable CCS on BDW input attachments</li>
+</ul>
+
+<p>Nicolai Hähnle (4):</p>
+<ul>
+  <li>mesa: fix remaining xfb prims check for GLES with multiple instances</li>
+  <li>mesa: extract need_xfb_remaining_prims_check</li>
+  <li>mesa: move glMultiDrawArrays to vbo and fix error handling</li>
+  <li>vbo: fix gl_DrawID handling in glMultiDrawArrays</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>util/queue: don't hang at exit</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>mesa: validate sampler type across the whole program</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.0.6.html
+++ b/docs/relnotes/17.0.6.html
@@ -0,0 +1,186 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.0.6 Release Notes / May 12, 2017</h1>
+
+<p>
+Mesa 17.0.6 is a bug fix release which fixes bugs found since the 17.0.5 release.
+</p>
+<p>
+Mesa 17.0.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+f1b2497d553e9a584f0caa3a2d9d310e27ead15fb0af170da69f6e70fb5031cd  mesa-17.0.6.tar.gz
+89ecf3bcd0f18dcca5aaa42bf36bb52a2df33be89889f94aaaad91f7a504a69d  mesa-17.0.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Adam Jackson (1):</p>
+<ul>
+  <li>egl/platform/drm: Don't take display ownership until gbm is initialized</li>
+</ul>
+
+<p>Andres Gomez (7):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.0.5</li>
+  <li>travis: replace Trusty-based LLVM toolchain apt-get with apt addon</li>
+  <li>travis: add the possibility of using the txc-dxtn library</li>
+  <li>cherry-ignore: 17.1 nominations only</li>
+  <li>cherry-ignore: fix regression in descriptor set freeing.</li>
+  <li>cherry-ignore: rejected commits</li>
+  <li>Update version to 17.0.6</li>
+</ul>
+
+<p>Ben Boeckel (1):</p>
+<ul>
+  <li>scons: update for LLVM 4.0</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>st/mesa: move duplicated st_ws_framebuffer() function into header file</li>
+</ul>
+
+<p>Chad Versace (3):</p>
+<ul>
+  <li>egl: Emit error when EGLSurface is lost</li>
+  <li>egl/android: Cancel any outstanding ANativeBuffer in surface destructor</li>
+  <li>egl/android: Mark surface as lost when dequeueBuffer fails</li>
+</ul>
+
+<p>Christian Gmeiner (1):</p>
+<ul>
+  <li>etnaviv: add L8A8_UNORM texture format</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>radv/wsi: report presentation error per image request</li>
+  <li>radv: enable POLARIS12 support.</li>
+</ul>
+
+<p>Emil Velikov (21):</p>
+<ul>
+  <li>travis: correct libdrm required regex to also track libdrm itself</li>
+  <li>travis: add nearly all gallium drivers to the list</li>
+  <li>travis: use both cores for make/make check</li>
+  <li>travis: bring the scons build on par with AppVeyor</li>
+  <li>travis: explicitly LD_LIBRARY_PATH the local libraries</li>
+  <li>travis: enable apt cache</li>
+  <li>travis: automatically manage ccache caching</li>
+  <li>travis: remove unused -dev packages</li>
+  <li>travis: rework "if test" blocks in the script section</li>
+  <li>travis: split out matrix from env</li>
+  <li>travis: add separate "scons" and "scons llvm" targets</li>
+  <li>travis: add "scons swr" to the build matrix</li>
+  <li>travis: add "make swr" to the build matrix</li>
+  <li>travis: split the make target to three separate ones</li>
+  <li>travis: model scons check target like the make one</li>
+  <li>travis: add Gallium state-tracker targets</li>
+  <li>travis: enable wayland support</li>
+  <li>travis: bump MAKEFLAGS to -j4</li>
+  <li>gallium/dri: always link against shared glapi</li>
+  <li>mesa/dri: always link against shared glapi</li>
+  <li>glx: glX_proto_send.py: use correct compile guard GLX_INDIRECT_RENDERING</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>nir: Pick just the channels we want for bitmap and drawpixels lowering.</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>gallium/targets: fix bool setting on BE architectures</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>anv/cmd_buffer: Use the device allocator for QueueSubmit</li>
+</ul>
+
+<p>Johnson Lin (1):</p>
+<ul>
+  <li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>radeonsi: adjust ESGS ring buffer size computation on VI</li>
+  <li>radeonsi: apply the tess+GS hang workaround to Polaris12 as well</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
+</ul>
+
+<p>Philipp Zabel (3):</p>
+<ul>
+  <li>renderonly: close transfer prime_fd</li>
+  <li>renderonly: drop resources on destroy</li>
+  <li>renderonly: use drmIoctl</li>
+</ul>
+
+<p>Rhys Kidd (3):</p>
+<ul>
+  <li>travis: Support LLVM 3.8+ on Trusty-based Travis-CI via apt-get not apt addon</li>
+  <li>travis: Add radv vulkan driver to continuous integration</li>
+  <li>travis: Add radeonsi to continuous integration</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno/a3xx: fix hang w/ large render targets and small gmem</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (5):</p>
+<ul>
+  <li>i965/vec4: fix vertical stride to avoid breaking region parameter rule</li>
+  <li>i965/vec4: fix register width for DF VGRF and UNIFORM</li>
+  <li>i965/vec4: don't modify regioning parameters to the sources of DF align1 instructions</li>
+  <li>anv: anv_gem_mmap() returns MAP_FAILED as mapping error</li>
+  <li>anv: vkBindImageMemory() should return VK_ERROR_OUT_OF_{HOST,DEVICE}_MEMORY on failure</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.0.7.html
+++ b/docs/relnotes/17.0.7.html
@@ -0,0 +1,145 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.0.7 Release Notes / June 1, 2017</h1>
+
+<p>
+Mesa 17.0.7 is a bug fix release which fixes bugs found since the 17.0.6 release.
+</p>
+<p>
+Mesa 17.0.7 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+bc68d13c6b1a053b855ac453ebf7e62bd89511adf44bad6c613e09f7fa13390a  mesa-17.0.7.tar.gz
+f6d75304a229c8d10443e219d6b6c0c342567dbab5a879ebe7cfa3c9139c4492  mesa-17.0.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.0.6</li>
+</ul>
+
+<p>Bartosz Tomczyk (1):</p>
+<ul>
+  <li>mesa: Avoid leaking surface in st_renderbuffer_delete</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
+</ul>
+
+<p>Daniel Stone (7):</p>
+<ul>
+  <li>vulkan: Fix Wayland uninitialised registry</li>
+  <li>vulkan/wsi/wayland: Remove roundtrip when creating image</li>
+  <li>vulkan/wsi/wayland: Use per-display event queue</li>
+  <li>vulkan/wsi/wayland: Use proxy wrappers for swapchain</li>
+  <li>egl/wayland: Don't open-code roundtrip</li>
+  <li>egl/wayland: Use per-surface event queues</li>
+  <li>egl/wayland: Ensure we get a back buffer</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>st/va: fix misplaced closing bracket</li>
+  <li>anv: automake: list shared libraries after the static ones</li>
+  <li>radv: automake: list shared libraries after the static ones</li>
+  <li>egl/wayland: select the format based on the interface used</li>
+  <li>Update version to 17.0.7</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>renderonly: Initialize fields of struct winsys_handle.</li>
+  <li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0/ir: SHLADD's middle source must be an immediate</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>i965/blorp: Do and end-of-pipe sync on both sides of fast-clear ops</li>
+  <li>i965: Round copy size to the nearest block in intel_miptree_copy</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>etnaviv: stop oversizing buffer resources</li>
+</ul>
+
+<p>Nanley Chery (2):</p>
+<ul>
+  <li>anv/formats: Update the three-channel BC1 mappings</li>
+  <li>i965/formats: Update the three-channel DXT1 mappings</li>
+</ul>
+
+<p>Pohjolainen, Topi (1):</p>
+<ul>
+  <li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (3):</p>
+<ul>
+  <li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
+  <li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
+  <li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.1.0.html
+++ b/docs/relnotes/17.1.0.html
@@ -14,12 +14,13 @@
 <iframe src="../contents.html"></iframe>
 <div class="content">

-<h1>Mesa 17.1.0 Release Notes / TBD</h1>
+<h1>Mesa 17.1.0 Release Notes / May 10, 2017</h1>

 <p>
 Mesa 17.1.0 is a new development release.
 People who are concerned with stability and reliability should stick
-with a previous release or wait for Mesa 17.1.1.
+with a previous release or wait for
+<a href="../release-calendar.html#calendar" target="_parent">Mesa 17.1.1</a>.
 </p>
 <p>
 Mesa 17.1.0 implements the OpenGL 4.5 API, but the version reported by
@@ -33,7 +34,8 @@ because compatibility contexts are not supported.

 <h2>SHA256 checksums</h2>
 <pre>
-TBD.
+c388069581a72853161657ac365f2c083afabd7cffd53f80513dacfa1cfa58a8  mesa-17.1.0.tar.gz
+cf234a6ed4764673886b6661553b54675776ef0898f774716173cec890ac3b17  mesa-17.1.0.tar.xz
 </pre>


@@ -63,6 +65,147 @@ Note: some of the new features are only available with certain drivers.
 <h2>Bug fixes</h2>

 <ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68504">Bug 68504</a> - 9.2-rc1 workaround for clover build failure on ppc/altivec: cannot convert 'bool' to '__vector(4) __bool int' in return</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84325">Bug 84325</a> - X.Org segfaults when starting DE on an Intel+Radeon laptop, caused by libpciaccess cleanup, patch attached</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93089">Bug 93089</a> - mesa fails to check for gcc atomic primitives before using them</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95460">Bug 95460</a> - Please add more drivers (freedreno, virgl) to features.txt status document</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96743">Bug 96743</a> - [BYT, HSW, SKL, BXT, KBL] GPU hangs with GfxBench 4.0 CarChase</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97102">Bug 97102</a> - [dri][swr] stack overflow / infinite loop with GALLIUM_DRIVER=swr</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97338">Bug 97338</a> - Black squares in the Spec Ops: The Line chapter select screen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97967">Bug 97967</a> - glsl/tests/cache-test regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97988">Bug 97988</a> - [radeonsi] playing back videos with VDPAU exhibits deinterlacing/anti-aliasing issues not visible with VA-API</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with &quot;Fatal error: Cannot set display mode.&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98502">Bug 98502</a> - Delay when starting firefox, thunderbird or chromium and dmesg spam</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98869">Bug 98869</a> - Electronic Super Joy graphic artefacts (regression,bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99010">Bug 99010</a> - --disable-gallium-llvm no longer recognized</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99246">Bug 99246</a> - [d3dadapter+radeonsi &amp; bisect] EVE-Online : hang on wormhole sight</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99265">Bug 99265</a> - i965: Piglit egl_khr_gl_renderbuffer_image-clear-shared-image fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99339">Bug 99339</a> - Blender line rendering broken after removing XY clipping of lines</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99401">Bug 99401</a> - [g33] regression: piglit.spec.!opengl 1_0.gl-1_0-beginend-coverage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99456">Bug 99456</a> - Firefox crashing when opening about:support with WebGL2 enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99465">Bug 99465</a> - vtn_vector_construct writing out of bounds when given multiple non-zero length sources</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99484">Bug 99484</a> - Crusader Kings 2 - Loading bars, siege bars, morale bars, etc. do not render correctly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99532">Bug 99532</a> - Compute shader doesn't give right result under some circumstances</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99542">Bug 99542</a> - vdpau  logging errors since gallium/radeon: adjust the rule for using the LINEAR_ALIGNED layout</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99631">Bug 99631</a> - segfault with OSVRTrackerView and openscenegraph git master</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99633">Bug 99633</a> - rasterizer/core/clip.h:279:49: error: ‘const struct API_STATE’ has no member named ‘linkageCount’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99660">Bug 99660</a> - Not all of the int64 conversion opcodes got implemented</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99677">Bug 99677</a> - heap-use-after-free in glsl</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99692">Bug 99692</a> - [radv] Mostly broken on Hawaii PRO/CIK ASICs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99701">Bug 99701</a> - loader.c:353:8: error: implicit declaration of function 'geteuid' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99715">Bug 99715</a> - Don't print: &quot;Note: Buggy applications may crash, if they do please report to vendor&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99789">Bug 99789</a> - Memory leak on failure to create an ir_constant in calculate_iterations in loop_controls.cpp</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99817">Bug 99817</a> - [softpipe] piglit glsl-fs-tan-1 regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99842">Bug 99842</a> - GL_ARB_transform_feedback2 on i965 gen6</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99850">Bug 99850</a> - Tessellation bug on Carrizo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99918">Bug 99918</a> - disk_cache.h:57:20: error: no member named 'st_mtim' in 'struct stat'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99953">Bug 99953</a> - device9.c:122:49: error: ‘PIPE_CAP_USER_INDEX_BUFFERS’ undeclared (first use in this function)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99955">Bug 99955</a> - [r600g] GPU load always displayed at 100% with GALLIUM_HUD=GPU-load</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100026">Bug 100026</a> - piglit.spec.arb_shader_subroutine.compiler.direct-call_vert regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100049">Bug 100049</a> - &quot;ralloc: Make sure ralloc() allocations match malloc()'s alignment.&quot; causes seg fault in 32bit build</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100060">Bug 100060</a> - wsi/wsi_common_wayland.c:25:41: fatal error: wayland-drm-client-protocol.h: No such file or directory</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100061">Bug 100061</a> - LODQ instruction generated with invalid dst mask</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100068">Bug 100068</a> - LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.buffer.load.format</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100088">Bug 100088</a> - piglit.spec.arb_get_texture_sub_image.arb_get_texture_sub_image regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100091">Bug 100091</a> - Failure to create folder for on-disk shader cache</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100133">Bug 100133</a> - swr_context.cpp:336:44: error: invalid conversion from ‘uint {aka unsigned int}’ to ‘pipe_render_cond_flag’ [-fpermissive]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100154">Bug 100154</a> - test_eu_compact regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100180">Bug 100180</a> - Build failure in GNOME Continuous</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100182">Bug 100182</a> - Flickering in The Talos Principle on Sky Lake GT4.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100201">Bug 100201</a> - Windows scons build with MSVC toolchain and LLVM 4.0 fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100223">Bug 100223</a> - marshal_generated.c:38:10: fatal error: 'X11/Xlib-xcb.h' file not found</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100236">Bug 100236</a> - Undefined symbols for architecture x86_64: &quot;typeinfo for llvm::RTDyldMemoryManager&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100259">Bug 100259</a> - [EGL] [GBM] undefined reference to `gbm_bo_create_with_modifiers'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100288">Bug 100288</a> - clover unable to run OpenCL kernels since 03127bb radeonsi: compile all TGSI compute shaders asynchronously</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100303">Bug 100303</a> - Adding a single, meaningless if-else to a shader source leads to different image</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100391">Bug 100391</a> - SachaWillems deferredmultisampling asserts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100452">Bug 100452</a> - push_constants host memory leak when resetting command buffer</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100531">Bug 100531</a> - [regression] Broken graphics in several games</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100562">Bug 100562</a> - u_debug_stack.c:59: undefined reference to `_Ux86_64_getcontext'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100569">Bug 100569</a> - core/resource.cpp:36:33: error: non-constant-expression cannot be narrowed from type 'int' to 'int16_t' (aka 'short') in initializer list [-Wc++11-narrowing]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100574">Bug 100574</a> - anv_device.c:189: undefined reference to `anv_gem_supports_48b_addresses'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100582">Bug 100582</a> - [GEN8+] piglit.spec.arb_stencil_texturing.glblitframebuffer corrupts state.gl_texture* assertions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100600">Bug 100600</a> - anv_device.c:1337: undefined reference to `anv_gem_busy'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100620">Bug 100620</a> - [SKL] 48-bit addresses break DOOM</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100663">Bug 100663</a> - commit 61e47d92c5196 breaks RS780</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100690">Bug 100690</a> - [Regression, bisected] TotalWar: Warhammer corrupted graphics</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100892">Bug 100892</a> - Polaris 12: winsys init bad switch (missing break) initializing addrlib</li>
+
 </ul>

 <h2>Changes</h2>
--- a/docs/relnotes/17.1.1.html
+++ b/docs/relnotes/17.1.1.html
@@ -0,0 +1,188 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.1.1 Release Notes / March 25, 2017</h1>
+
+<p>
+Mesa 17.1.1 is a bug fix release which fixes bugs found since the 17.1.0 release.
+</p>
+<p>
+Mesa 17.1.1 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+652315af87f2bb015ce99ee3b90d9d115d53cbf9e052493bd13d521a753b1930  mesa-17.1.1.tar.gz
+aed503f94c0c1630a162a3e276f4ee12a86764cee4cb92338ea2dea99a04e7ef  mesa-17.1.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (1):</p>
+<ul>
+  <li>radeonsi: add new vega10 pci ids</li>
+</ul>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>bin/get-fixes-pick-list.sh: don't warn if more than one, go over them</li>
+  <li>bin/get-fixes-pick-list.sh: bring back the warning</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>swr: move msaa resolve to generalized StoreTile</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
+</ul>
+
+<p>Chih-Wei Huang (1):</p>
+<ul>
+  <li>Android: correct libz dependency</li>
+</ul>
+
+<p>Daniel Stone (1):</p>
+<ul>
+  <li>gbm/dri: Fix sign-extension in modifier query</li>
+</ul>
+
+<p>Emil Velikov (6):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.1.0</li>
+  <li>radeon: automake: remove unneeded elf Cflags/Libs</li>
+  <li>configure: remove unneeded bits around libunwind handling</li>
+  <li>egl: add g_egldispatchstubs.h to the release tarball</li>
+  <li>automake: add SWR LLVM gen_builder.hpp workaround</li>
+  <li>Update version to 17.1.1</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>renderonly: Initialize fields of struct winsys_handle.</li>
+  <li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
+</ul>
+
+<p>Grazvydas Ignotas (2):</p>
+<ul>
+  <li>anv: fix possible stack corruption</li>
+  <li>anv: don't leak DRM devices</li>
+</ul>
+
+<p>Hans de Goede (1):</p>
+<ul>
+  <li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0/ir: SHLADD's middle source must be an immediate</li>
+</ul>
+
+<p>Johnson Lin (1):</p>
+<ul>
+  <li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>bin/get-{extra,fixes}-pick-list.sh: add support for ignore list</li>
+  <li>bin/get-{extra,fixes}-pick-list.sh: improve output</li>
+</ul>
+
+<p>Lucas Stach (2):</p>
+<ul>
+  <li>etnaviv: stop oversizing buffer resources</li>
+  <li>etnaviv: allow R/B swapped surfaces to be cleared</li>
+</ul>
+
+<p>Marek Olšák (2):</p>
+<ul>
+  <li>amd/addrlib: import Raven support</li>
+  <li>radeonsi/gfx9: add support for Raven</li>
+</ul>
+
+<p>Nanley Chery (2):</p>
+<ul>
+  <li>anv/formats: Update the three-channel BC1 mappings</li>
+  <li>i965/formats: Update the three-channel DXT1 mappings</li>
+</ul>
+
+<p>Nicolai Hähnle (5):</p>
+<ul>
+  <li>radeonsi: mark fast-cleared textures as compressed when dirtying</li>
+  <li>radeonsi: fix primitive ID in fragment shader when using tessellation</li>
+  <li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
+  <li>radeonsi: fix gl_PrimitiveIDIn in geometry shader when using tessellation</li>
+  <li>st/mesa: remove an incorrect assertion</li>
+</ul>
+
+<p>Pohjolainen, Topi (1):</p>
+<ul>
+  <li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>mesa/st: fix yuv EGLImage's</li>
+  <li>freedreno: fix crash when flush() but no rendering</li>
+</ul>
+
+<p>Rob Herring (1):</p>
+<ul>
+  <li>virgl: fix virgl_bo_transfer_{put, get} box struct copy</li>
+</ul>
+
+<p>Samuel Iglesias Gonsálvez (3):</p>
+<ul>
+  <li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
+  <li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
+  <li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
+</ul>
+
+<p>Tom Stellard (1):</p>
+<ul>
+  <li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.1.2.html
+++ b/docs/relnotes/17.1.2.html
@@ -0,0 +1,187 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.1.2 Release Notes / June 5, 2017</h1>
+
+<p>
+Mesa 17.1.2 is a bug fix release which fixes bugs found since the 17.1.1 release.
+</p>
+<p>
+Mesa 17.1.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+0d2020c2115db0d13a5be0075abf0da143290f69f5817a2f277861e89166a3e1  mesa-17.1.2.tar.gz
+0937804f43746339b1f9540d8f9c8b4a1bb3d3eec0e4020eac283b8799798239  mesa-17.1.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100877">Bug 100877</a> - vulkan/tests/block_pool_no_free regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101110">Bug 101110</a> - Build failure in GNOME Continuous</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bartosz Tomczyk (1):</p>
+<ul>
+  <li>mesa: Avoid leaking surface in st_renderbuffer_delete</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Reserve space for descriptor and push constant user SGPR setting.</li>
+</ul>
+
+<p>Daniel Stone (7):</p>
+<ul>
+  <li>vulkan: Fix Wayland uninitialised registry</li>
+  <li>vulkan/wsi/wayland: Remove roundtrip when creating image</li>
+  <li>vulkan/wsi/wayland: Use per-display event queue</li>
+  <li>vulkan/wsi/wayland: Use proxy wrappers for swapchain</li>
+  <li>egl/wayland: Don't open-code roundtrip</li>
+  <li>egl/wayland: Use per-surface event queues</li>
+  <li>egl/wayland: Ensure we get a back buffer</li>
+</ul>
+
+<p>Emil Velikov (24):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.1.1</li>
+  <li>configure: move platform handling further up</li>
+  <li>configure: rename remaining HAVE_EGL_PLATFORM_* guards</li>
+  <li>configure: update remaining --with-egl-platforms references</li>
+  <li>configure: loosen --with-platforms heuristics</li>
+  <li>configure: enable the surfaceless platform by default</li>
+  <li>configure: set HAVE_foo_PLATFORM as applicable</li>
+  <li>configure: error out when building GLX w/o the X11 platform</li>
+  <li>configure: check once for DRI3 dependencies</li>
+  <li>loader: build libloader_dri3_helper.la only with HAVE_PLATFORM_X11</li>
+  <li>configure: error out when building X11 Vulkan without DRI3</li>
+  <li>auxiliary/vl: use vl_*_screen_create stubs when building w/o platform</li>
+  <li>st/va: fix misplaced closing bracket</li>
+  <li>st/omx: remove unneeded X11 include</li>
+  <li>st/omx: fix building against X11-less setups</li>
+  <li>gallium/targets: link against XCB only as needed</li>
+  <li>configure: error out if building VA w/o supported platform</li>
+  <li>configure: error out if building OMX w/o supported platform</li>
+  <li>configure: error out if building VDPAU w/o supported platform</li>
+  <li>configure: error out if building XVMC w/o supported platform</li>
+  <li>travis: remove workarounds for the Vulkan target</li>
+  <li>anv: automake: list shared libraries after the static ones</li>
+  <li>radv: automake: list shared libraries after the static ones</li>
+  <li>egl/wayland: select the format based on the interface used</li>
+</ul>
+
+<p>Ian Romanick (3):</p>
+<ul>
+  <li>r100: Don't assume that the base mipmap of a texture exists</li>
+  <li>r100,r200: Don't assume glVisual is non-NULL during context creation</li>
+  <li>r100: Use _mesa_get_format_base_format in radeon_update_wrapper</li>
+</ul>
+
+<p>Jason Ekstrand (17):</p>
+<ul>
+  <li>anv: Handle color layout transitions from the UNINITIALIZED layout</li>
+  <li>anv: Handle transitioning depth from UNDEFINED to other layouts</li>
+  <li>anv/image: Get rid of the memset(aux, 0, sizeof(aux)) hack</li>
+  <li>anv: Predicate 48bit support on gen &gt;= 8</li>
+  <li>anv: Set up memory types and heaps during physical device init</li>
+  <li>anv: Set image memory types based on the type count</li>
+  <li>i965/blorp: Do and end-of-pipe sync on both sides of fast-clear ops</li>
+  <li>i965: Round copy size to the nearest block in intel_miptree_copy</li>
+  <li>anv: Set EXEC_OBJECT_ASYNC when available</li>
+  <li>anv: Determine the type of mapping based on type metadata</li>
+  <li>anv: Add valid_bufer_usage to the memory type metadata</li>
+  <li>anv: Stop setting BO flags in bo_init_new</li>
+  <li>anv: Make supports_48bit_addresses a heap property</li>
+  <li>anv: Refactor memory type setup</li>
+  <li>anv: Advertise both 32-bit and 48-bit heaps when we have enough memory</li>
+  <li>i965: Rework Sandy Bridge HiZ and stencil layouts</li>
+  <li>anv: Require vertex buffers to come from a 32-bit heap</li>
+</ul>
+
+<p>Juan A. Suarez Romero (13):</p>
+<ul>
+  <li>Revert "android: fix segfault within swap_buffers"</li>
+  <li>cherry-ignore: radeonsi: load patch_id for TES-as-ES when exporting for PS</li>
+  <li>cherry-ignore: anv: Determine the type of mapping based on type metadata</li>
+  <li>cherry-ignore: anv: Stop setting BO flags in bo_init_new</li>
+  <li>cherry-ignore: anv: Make supports_48bit_addresses a heap property</li>
+  <li>cherry-ignore: anv: Advertise both 32-bit and 48-bit heaps when we have enough memory</li>
+  <li>cherry-ignore: anv: Require vertex buffers to come from a 32-bit heap</li>
+  <li>cherry-ignore: radv: fix regression in descriptor set freeing</li>
+  <li>cherry-ignore: anv: Add valid_bufer_usage to the memory type metadata</li>
+  <li>cherry-ignore: anv: Refactor memory type setup</li>
+  <li>Revert "cherry-ignore: anv: [...]"</li>
+  <li>Revert "cherry-ignore: anv: Require vertex buffers to come from a 32-bit heap"</li>
+  <li>Update version to 17.1.2</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi/gfx9: compile shaders with +xnack</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>st/mesa: remove redundant stfb-&gt;iface checks</li>
+</ul>
+
+<p>Nicolas Boichat (1):</p>
+<ul>
+  <li>configure.ac: Also match -androideabi tuple</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: fix fence creation fail if no rendering</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>egl/android: fix segfault within swap_buffers</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>st/mesa: don't mark the program as in cache_fallback when there is cache miss</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.1.3.html
+++ b/docs/relnotes/17.1.3.html
@@ -0,0 +1,156 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.1.3 Release Notes / June 19, 2017</h1>
+
+<p>
+Mesa 17.1.3 is a bug fix release which fixes bugs found since the 17.1.2 release.
+</p>
+<p>
+Mesa 17.1.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+81ae9127286ff8d631e466d258608d6dea9854fe7bee2e8521da44c7544f01e5  mesa-17.1.3.tar.gz
+5f1ee9a8aea2880f887884df2dea0c16dd1b13eb42fd2e52265db0dc1b380e8c  mesa-17.1.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100988">Bug 100988</a> - glXGetCurrentDisplay() no longer works for FakeGLX contexts?</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Set both compute and graphics SGPRS on descriptor set flush.</li>
+  <li>radv: Dirty all descriptors sets when changing the pipeline.</li>
+  <li>radv: Remove SI num RB override for occlusion queries.</li>
+</ul>
+
+<p>Brian Paul (1):</p>
+<ul>
+  <li>xlib: fix glXGetCurrentDisplay() failure</li>
+</ul>
+
+<p>Chad Versace (1):</p>
+<ul>
+  <li>i965/dri: Fix bad GL error in intel_create_winsys_renderbuffer()</li>
+</ul>
+
+<p>Chuck Atkins (1):</p>
+<ul>
+  <li>configure.ac: Reduce zlib requirement from 1.2.8 to 1.2.3.</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>radv: expose integrated device type for APUs.</li>
+  <li>radv: set fmask state to all 0s when no fmask. (v2)</li>
+  <li>glsl/lower_distance: only set max_array_access for 1D clip dist arrays</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>Update version to 17.1.3</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>radv: fix trace dumping for !use_ib_bos</li>
+</ul>
+
+<p>Jason Ekstrand (4):</p>
+<ul>
+  <li>i965/blorp: Take a layer range in intel_hiz_exec</li>
+  <li>i965: Move the pre-depth-clear flush/stalls to intel_hiz_exec</li>
+  <li>i965: Perform HiZ flush/stall prior to HiZ resolves</li>
+  <li>i965: Mark depth surfaces as needing a HiZ resolve after blitting</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>automake: Link all libGL.so variants with -Bsymbolic.</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>docs: add sha256 checksums for 17.1.2</li>
+</ul>
+
+<p>Lucas Stach (1):</p>
+<ul>
+  <li>etnaviv: always do cpu_fini in transfer_unmap</li>
+</ul>
+
+<p>Lyude (1):</p>
+<ul>
+  <li>nvc0: disable BGRA8 images on Fermi</li>
+</ul>
+
+<p>Marek Olšák (3):</p>
+<ul>
+  <li>st/mesa: don't load cached TGSI shaders on demand</li>
+  <li>radeonsi: fix a GPU hang with tessellation on 2-CU configs</li>
+  <li>radeonsi: disable the patch ID workaround on SI when the patch ID isn't used (v2)</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>radv: fewer than 8 RBs are possible</li>
+</ul>
+
+<p>Nicolas Dechesne (1):</p>
+<ul>
+  <li>util/rand_xor: add missing include statements</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>egl: fix _eglQuerySurface in EGL_BUFFER_AGE_EXT case</li>
+</ul>
+
+<p>Thomas Hellstrom (1):</p>
+<ul>
+  <li>dri3/GLX: Fix drawable invalidation v2</li>
+</ul>
+
+<p>Tim Rowley (1):</p>
+<ul>
+  <li>swr: relax c++ requirement from c++14 to c++11</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/17.2.0.html
+++ b/docs/relnotes/17.2.0.html
@@ -0,0 +1,68 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 17.2.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 17.2.0 is a new development release.
+People who are concerned with stability and reliability should stick
+with a previous release or wait for Mesa 17.2.1.
+</p>
+<p>
+Mesa 17.2.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation
+because compatibility contexts are not supported.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_ARB_bindless_texture on radeonsi</li>
+<li>GL_ARB_post_depth_coverage on nvc0 (GM200+)</li>
+<li>GL_ARB_shader_viewport_layer_array on nvc0 (GM200+)</li>
+<li>GL_AMD_vertex_shader_layer on nvc0 (GM200+)</li>
+<li>GL_AMD_vertex_shader_viewport_index on nvc0 (GM200+)</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+<ul>
+TBD
+</ul>
+
+<h2>Changes</h2>
+
+<ul>
+<li>GL_APPLE_vertex_array_object support removed.</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -50,6 +50,8 @@ execution.  These are generally used for debugging.
    The filenames will be "shader_X.vert" or "shader_X.frag" where X
    the shader ID.
 <li><b>cache_info</b> - print debug information about shader cache
+<li><b>cache_fb</b> - force cached shaders to be ignored and do a full
+    recompile via the fallback path</li>
 <li><b>uniform</b> - print message to stdout when glUniform is called
 <li><b>nopvert</b> - force vertex shaders to be a simple shader that just transforms
    the vertex position with ftransform() and passes through the color and
--- a/git_sha1_gen.sh
+++ b/git_sha1_gen.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+# run git from the sources directory
+cd "$(dirname "$0")"
+
+# don't print anything if git fails
+if ! git_sha1=$(git --git-dir=.git rev-parse --short=10 HEAD 2>/dev/null)
+then
+  exit
+fi
+
+printf '#define MESA_GIT_SHA1 "git-%s"\n' "$git_sha1"
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -702,6 +702,7 @@ struct __DRIuseInvalidateExtensionRec {
 #define __DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS	46
 #define __DRI_ATTRIB_YINVERTED			47
 #define __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE	48
+#define __DRI_ATTRIB_MAX			(__DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE + 1)

 /* __DRI_ATTRIB_RENDER_TYPE */
 #define __DRI_ATTRIB_RGBA_BIT			0x01	
@@ -1136,7 +1137,7 @@ struct __DRIdri2ExtensionRec {
 * extensions.
 */
 #define __DRI_IMAGE "DRI_IMAGE"
-#define __DRI_IMAGE_VERSION 14
+#define __DRI_IMAGE_VERSION 15

 /**
 * These formats correspond to the similarly named MESA_FORMAT_*
@@ -1493,6 +1494,67 @@ struct __DRIimageExtensionRec {
                                           const uint64_t *modifiers,
                                           const unsigned int modifier_count,
                                           void *loaderPrivate);
+
+   /*
+    * Like createImageFromDmaBufs, but takes also format modifiers.
+    *
+    * For EGL_EXT_image_dma_buf_import_modifiers.
+    *
+    * \since 15
+    */
+   __DRIimage *(*createImageFromDmaBufs2)(__DRIscreen *screen,
+                                          int width, int height, int fourcc,
+                                          uint64_t modifier,
+                                          int *fds, int num_fds,
+                                          int *strides, int *offsets,
+                                          enum __DRIYUVColorSpace color_space,
+                                          enum __DRISampleRange sample_range,
+                                          enum __DRIChromaSiting horiz_siting,
+                                          enum __DRIChromaSiting vert_siting,
+                                          unsigned *error,
+                                          void *loaderPrivate);
+
+   /*
+    * dmabuf format query to support EGL_EXT_image_dma_buf_import_modifiers.
+    *
+    * \param max      Maximum number of formats that can be accomodated into
+    *                 \param formats. If zero, no formats are returned -
+    *                 instead, the driver returns the total number of
+    *                 supported dmabuf formats in \param count.
+    * \param formats  Buffer to fill formats into.
+    * \param count    Count of formats returned, or, total number of
+    *                 supported formats in case \param max is zero.
+    *
+    * Returns true on success.
+    *
+    * \since 15
+    */
+   GLboolean (*queryDmaBufFormats)(__DRIscreen *screen, int max,
+                                   int *formats, int *count);
+
+   /*
+    * dmabuf format modifier query for a given format to support
+    * EGL_EXT_image_dma_buf_import_modifiers.
+    *
+    * \param fourcc    The format to query modifiers for. If this format
+    *                  is not supported by the driver, return false.
+    * \param max       Maximum number of modifiers that can be accomodated in
+    *                  \param modifiers. If zero, no modifiers are returned -
+    *                  instead, the driver returns the total number of
+    *                  modifiers for \param format in \param count.
+    * \param modifiers Buffer to fill modifiers into.
+    * \param count     Count of the modifiers returned, or, total number of
+    *                  supported modifiers for \param fourcc in case
+    *                  \param max is zero.
+    *
+    * Returns true upon success.
+    *
+    * \since 15
+    */
+   GLboolean (*queryDmaBufModifiers)(__DRIscreen *screen, int fourcc,
+                                     int max, uint64_t *modifiers,
+                                     unsigned int *external_only,
+                                     int *count);
 };


@@ -1720,6 +1782,19 @@ struct __DRIbackgroundCallableExtensionRec {
    * operations (e.g. it should just set a thread-local variable).
    */
   void (*setBackgroundContext)(void *loaderPrivate);
+
+   /**
+    * Indicate that it is multithread safe to use glthread.  For GLX/EGL
+    * platforms using Xlib, that involves calling XInitThreads, before
+    * opening an X display.
+    *
+    * Note: only supported if extension version is at least 2.
+    *
+    * \param loaderPrivate is the value that was passed to to the driver when
+    * the context was created.  This can be used by the loader to identify
+    * which context any callbacks are associated with.
+    */
+   GLboolean (*isThreadSafe)(void *loaderPrivate);
 };

 #endif
--- a/include/c11/threads_win32.h
+++ b/include/c11/threads_win32.h
@@ -502,9 +502,13 @@ thrd_current(void)
    HANDLE hCurrentThread;
    BOOL bRet;

-    /* GetCurrentThread() returns a pseudo-handle, which is useless.  We need
-     * to call DuplicateHandle to get a real handle.  However the handle value
-     * will not match the one returned by thread_create.
+    /* GetCurrentThread() returns a pseudo-handle, which we need
+     * to pass to DuplicateHandle(). Only the resulting handle can be used
+     * from other threads.
+     *
+     * Note that neither handle can be compared to the one by thread_create.
+     * Only the thread IDs - as returned by GetThreadId() and GetCurrentThreadId()
+     * can be compared directly.
     *
     * Other potential solutions would be:
     * - define thrd_t as a thread Ids, but this would mean we'd need to OpenThread for many operations
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -165,3 +165,26 @@ CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3)")
 CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
 CHIPSET(0x3184, glk,     "Intel(R) HD Graphics (Geminilake)")
 CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
+CHIPSET(0x3E90, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
+CHIPSET(0x3E93, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
+CHIPSET(0x3E91, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
+CHIPSET(0x3E92, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
+CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
+CHIPSET(0x3E9B, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
+CHIPSET(0x3E94, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
+CHIPSET(0x3EA6, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
+CHIPSET(0x3EA7, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
+CHIPSET(0x3EA8, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
+CHIPSET(0x3EA5, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
+CHIPSET(0x5A49, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
+CHIPSET(0x5A4A, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
+CHIPSET(0x5A41, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
+CHIPSET(0x5A42, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
+CHIPSET(0x5A44, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
+CHIPSET(0x5A59, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
+CHIPSET(0x5A5A, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
+CHIPSET(0x5A5C, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
+CHIPSET(0x5A50, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
+CHIPSET(0x5A51, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
+CHIPSET(0x5A52, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
+CHIPSET(0x5A54, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -213,12 +213,17 @@ CHIPSET(0x6985, POLARIS12_, POLARIS12)
 CHIPSET(0x6986, POLARIS12_, POLARIS12)
 CHIPSET(0x6987, POLARIS12_, POLARIS12)
 CHIPSET(0x6995, POLARIS12_, POLARIS12)
+CHIPSET(0x6997, POLARIS12_, POLARIS12)
 CHIPSET(0x699F, POLARIS12_, POLARIS12)

 CHIPSET(0x6860, VEGA10_, VEGA10)
 CHIPSET(0x6861, VEGA10_, VEGA10)
 CHIPSET(0x6862, VEGA10_, VEGA10)
 CHIPSET(0x6863, VEGA10_, VEGA10)
+CHIPSET(0x6864, VEGA10_, VEGA10)
 CHIPSET(0x6867, VEGA10_, VEGA10)
+CHIPSET(0x6868, VEGA10_, VEGA10)
 CHIPSET(0x687F, VEGA10_, VEGA10)
 CHIPSET(0x686C, VEGA10_, VEGA10)
+
+CHIPSET(0x15DD, RAVEN_, RAVEN)
--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
@@ -43,7 +43,7 @@ extern "C" {
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 46
+#define VK_HEADER_VERSION 49


 #define VK_NULL_HANDLE 0
@@ -261,9 +261,6 @@ typedef enum VkStructureType {
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO_KHX = 1000071002,
    VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES_KHX = 1000071003,
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX = 1000071004,
-    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHX = 1000071005,
-    VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHX = 1000071006,
-    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHX = 1000071007,
    VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHX = 1000072000,
    VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHX = 1000072001,
    VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHX = 1000072002,
@@ -301,6 +298,10 @@ typedef enum VkStructureType {
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT = 1000099000,
    VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001,
    VK_STRUCTURE_TYPE_HDR_METADATA_EXT = 1000105000,
+    VK_STRUCTURE_TYPE_SHARED_PRESENT_SURFACE_CAPABILITIES_KHR = 1000111000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR = 1000119000,
+    VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR = 1000119001,
+    VK_STRUCTURE_TYPE_SURFACE_FORMAT_2_KHR = 1000119002,
    VK_STRUCTURE_TYPE_IOS_SURFACE_CREATE_INFO_MVK = 1000122000,
    VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK = 1000123000,
    VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO,
@@ -590,6 +591,7 @@ typedef enum VkImageLayout {
    VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7,
    VK_IMAGE_LAYOUT_PREINITIALIZED = 8,
    VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002,
+    VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000,
    VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED,
    VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED,
    VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1),
@@ -896,6 +898,47 @@ typedef enum VkSubpassContents {
    VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF
 } VkSubpassContents;

+typedef enum VkObjectType {
+    VK_OBJECT_TYPE_UNKNOWN = 0,
+    VK_OBJECT_TYPE_INSTANCE = 1,
+    VK_OBJECT_TYPE_PHYSICAL_DEVICE = 2,
+    VK_OBJECT_TYPE_DEVICE = 3,
+    VK_OBJECT_TYPE_QUEUE = 4,
+    VK_OBJECT_TYPE_SEMAPHORE = 5,
+    VK_OBJECT_TYPE_COMMAND_BUFFER = 6,
+    VK_OBJECT_TYPE_FENCE = 7,
+    VK_OBJECT_TYPE_DEVICE_MEMORY = 8,
+    VK_OBJECT_TYPE_BUFFER = 9,
+    VK_OBJECT_TYPE_IMAGE = 10,
+    VK_OBJECT_TYPE_EVENT = 11,
+    VK_OBJECT_TYPE_QUERY_POOL = 12,
+    VK_OBJECT_TYPE_BUFFER_VIEW = 13,
+    VK_OBJECT_TYPE_IMAGE_VIEW = 14,
+    VK_OBJECT_TYPE_SHADER_MODULE = 15,
+    VK_OBJECT_TYPE_PIPELINE_CACHE = 16,
+    VK_OBJECT_TYPE_PIPELINE_LAYOUT = 17,
+    VK_OBJECT_TYPE_RENDER_PASS = 18,
+    VK_OBJECT_TYPE_PIPELINE = 19,
+    VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 20,
+    VK_OBJECT_TYPE_SAMPLER = 21,
+    VK_OBJECT_TYPE_DESCRIPTOR_POOL = 22,
+    VK_OBJECT_TYPE_DESCRIPTOR_SET = 23,
+    VK_OBJECT_TYPE_FRAMEBUFFER = 24,
+    VK_OBJECT_TYPE_COMMAND_POOL = 25,
+    VK_OBJECT_TYPE_SURFACE_KHR = 1000000000,
+    VK_OBJECT_TYPE_SWAPCHAIN_KHR = 1000001000,
+    VK_OBJECT_TYPE_DISPLAY_KHR = 1000002000,
+    VK_OBJECT_TYPE_DISPLAY_MODE_KHR = 1000002001,
+    VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT = 1000011000,
+    VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = 1000085000,
+    VK_OBJECT_TYPE_OBJECT_TABLE_NVX = 1000086000,
+    VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX = 1000086001,
+    VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_UNKNOWN,
+    VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_COMMAND_POOL,
+    VK_OBJECT_TYPE_RANGE_SIZE = (VK_OBJECT_TYPE_COMMAND_POOL - VK_OBJECT_TYPE_UNKNOWN + 1),
+    VK_OBJECT_TYPE_MAX_ENUM = 0x7FFFFFFF
+} VkObjectType;
+
 typedef VkFlags VkInstanceCreateFlags;

 typedef enum VkFormatFeatureFlagBits {
@@ -3323,6 +3366,8 @@ typedef enum VkPresentModeKHR {
    VK_PRESENT_MODE_MAILBOX_KHR = 1,
    VK_PRESENT_MODE_FIFO_KHR = 2,
    VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3,
+    VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR = 1000111000,
+    VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR = 1000111001,
    VK_PRESENT_MODE_BEGIN_RANGE_KHR = VK_PRESENT_MODE_IMMEDIATE_KHR,
    VK_PRESENT_MODE_END_RANGE_KHR = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
    VK_PRESENT_MODE_RANGE_SIZE_KHR = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1),
@@ -4101,6 +4146,64 @@ VKAPI_ATTR void VKAPI_CALL vkCmdPushDescriptorSetWithTemplateKHR(
    const void*                                 pData);
 #endif

+#define VK_KHR_shared_presentable_image 1
+#define VK_KHR_SHARED_PRESENTABLE_IMAGE_SPEC_VERSION 1
+#define VK_KHR_SHARED_PRESENTABLE_IMAGE_EXTENSION_NAME "VK_KHR_shared_presentable_image"
+
+typedef struct VkSharedPresentSurfaceCapabilitiesKHR {
+    VkStructureType      sType;
+    void*                pNext;
+    VkImageUsageFlags    sharedPresentSupportedUsageFlags;
+} VkSharedPresentSurfaceCapabilitiesKHR;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainStatusKHR)(VkDevice device, VkSwapchainKHR swapchain);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainStatusKHR(
+    VkDevice                                    device,
+    VkSwapchainKHR                              swapchain);
+#endif
+
+#define VK_KHR_get_surface_capabilities2 1
+#define VK_KHR_GET_SURFACE_CAPABILITIES_2_SPEC_VERSION 1
+#define VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME "VK_KHR_get_surface_capabilities2"
+
+typedef struct VkPhysicalDeviceSurfaceInfo2KHR {
+    VkStructureType    sType;
+    const void*        pNext;
+    VkSurfaceKHR       surface;
+} VkPhysicalDeviceSurfaceInfo2KHR;
+
+typedef struct VkSurfaceCapabilities2KHR {
+    VkStructureType             sType;
+    void*                       pNext;
+    VkSurfaceCapabilitiesKHR    surfaceCapabilities;
+} VkSurfaceCapabilities2KHR;
+
+typedef struct VkSurfaceFormat2KHR {
+    VkStructureType       sType;
+    void*                 pNext;
+    VkSurfaceFormatKHR    surfaceFormat;
+} VkSurfaceFormat2KHR;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, VkSurfaceCapabilities2KHR* pSurfaceCapabilities);
+typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormats2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, uint32_t* pSurfaceFormatCount, VkSurfaceFormat2KHR* pSurfaceFormats);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilities2KHR(
+    VkPhysicalDevice                            physicalDevice,
+    const VkPhysicalDeviceSurfaceInfo2KHR*      pSurfaceInfo,
+    VkSurfaceCapabilities2KHR*                  pSurfaceCapabilities);
+
+VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormats2KHR(
+    VkPhysicalDevice                            physicalDevice,
+    const VkPhysicalDeviceSurfaceInfo2KHR*      pSurfaceInfo,
+    uint32_t*                                   pSurfaceFormatCount,
+    VkSurfaceFormat2KHR*                        pSurfaceFormats);
+#endif
+
 #define VK_EXT_debug_report 1
 VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT)

--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -103,8 +103,26 @@ def generate(env):
            'HAVE_STDINT_H',
        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
-        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
-        if llvm_version >= distutils.version.LooseVersion('3.9'):
+        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
+        if llvm_version >= distutils.version.LooseVersion('4.0'):
+            env.Prepend(LIBS = [
+                'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
+                'LLVMDebugInfoCodeView', 'LLVMCodeGen',
+                'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMTransformUtils',
+                'LLVMBitWriter', 'LLVMX86Desc',
+                'LLVMMCDisassembler', 'LLVMX86Info',
+                'LLVMX86AsmPrinter', 'LLVMX86Utils',
+                'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
+                'LLVMAnalysis', 'LLVMProfileData',
+                'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
+                'LLVMBitReader', 'LLVMMC', 'LLVMCore',
+                'LLVMSupport',
+                'LLVMIRReader', 'LLVMAsmParser',
+                'LLVMDemangle', 'LLVMGlobalISel', 'LLVMDebugInfoMSF',
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('3.9'):
            env.Prepend(LIBS = [
                'LLVMX86Disassembler', 'LLVMX86AsmParser',
                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -21,18 +21,7 @@

 .PHONY: git_sha1.h.tmp
 git_sha1.h.tmp:
-	@# Don't assume that $(top_srcdir)/.git is a directory. It may be
-	@# a gitlink file if $(top_srcdir) is a submodule checkout or a linked
-	@# worktree.
-	@# If we are building from a release tarball copy the bundled header.
-	@touch git_sha1.h.tmp
-	@if test -e $(top_srcdir)/.git; then \
-		if which git > /dev/null; then \
-		    git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \
-			sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
-			> git_sha1.h.tmp ; \
-		fi \
-	fi
+	@sh $(top_srcdir)/git_sha1_gen.sh > $@

 git_sha1.h: git_sha1.h.tmp
 	@echo "updating git_sha1.h"
--- a/src/SConscript
+++ b/src/SConscript
@@ -22,27 +22,15 @@ def write_git_sha1_h_file(filename):
    to retrieve the git hashid and write the header file.  An empty file
    will be created if anything goes wrong."""

-    args = [ 'git', 'rev-parse', '--short=10', 'HEAD' ]
-    try:
-        (commit, foo) = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()
-    except:
-        print "Warning: exception in write_git_sha1_h_file()"
-        # git log command didn't work
-        if not os.path.exists(filename):
-            dirname = os.path.dirname(filename)
-            if dirname and not os.path.exists(dirname):
-                os.makedirs(dirname)
-            # create an empty file if none already exists
-            f = open(filename, "w")
-            f.close()
-        return
-
-    # note that commit[:-1] removes the trailing newline character
-    commit = '#define MESA_GIT_SHA1 "git-%s"\n' % commit[:-1]
    tempfile = "git_sha1.h.tmp"
-    f = open(tempfile, "w")
-    f.write(commit)
-    f.close()
+    with open(tempfile, "w") as f:
+        args = [ 'sh', Dir('#').abspath + '/git_sha1_gen.sh' ]
+        try:
+            subprocess.Popen(args, stdout=f).wait()
+        except:
+            print "Warning: exception in write_git_sha1_h_file()"
+            return
+
    if not os.path.exists(filename) or not filecmp.cmp(tempfile, filename):
        # The filename does not exist or it's different from the new file,
        # so replace old file with new.
--- a/src/amd/Android.addrlib.mk
+++ b/src/amd/Android.addrlib.mk
@@ -42,5 +42,11 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/src/amd/addrlib/gfx9/chip \
 	$(MESA_TOP)/src/amd/addrlib/r800/chip

+LOCAL_EXPORT_C_INCLUDE_DIRS := \
+	$(LOCAL_PATH) \
+	$(LOCAL_PATH)/addrlib/core \
+	$(LOCAL_PATH)/addrlib/inc/chip/r800 \
+	$(LOCAL_PATH)/addrlib/r800/chip
+
 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
--- a/src/amd/Android.common.mk
+++ b/src/amd/Android.common.mk
@@ -29,6 +29,7 @@ include $(CLEAR_VARS)
 LOCAL_MODULE := libmesa_amd_common

 LOCAL_SRC_FILES := \
+	$(AMD_COMMON_FILES) \
 	$(AMD_COMPILER_FILES) \
 	$(AMD_DEBUG_FILES)

@@ -49,15 +50,27 @@ LOCAL_C_INCLUDES := \
 	$(MESA_TOP)/include \
 	$(MESA_TOP)/src \
 	$(MESA_TOP)/src/amd/common \
+	$(MESA_TOP)/src/compiler \
+	$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_nir,,)/nir \
 	$(MESA_TOP)/src/gallium/include \
 	$(MESA_TOP)/src/gallium/auxiliary \
 	$(intermediates)/common \
 	external/llvm/include \
-	external/llvm/device/include \
-	external/libcxx/include \
-	$(ELF_INCLUDES)
+	external/llvm/device/include

-LOCAL_STATIC_LIBRARIES := libLLVMCore
+LOCAL_EXPORT_C_INCLUDE_DIRS := \
+	$(LOCAL_PATH)/common
+
+LOCAL_SHARED_LIBRARIES := \
+	libdrm_amdgpu
+
+LOCAL_STATIC_LIBRARIES := \
+	libmesa_nir
+
+LOCAL_WHOLE_STATIC_LIBRARIES := \
+	libelf
+
+$(call mesa-build-with-llvm)

 include $(MESA_COMMON_MK)
 include $(BUILD_STATIC_LIBRARY)
--- a/src/amd/Makefile.common.am
+++ b/src/amd/Makefile.common.am
@@ -25,6 +25,7 @@ COMMON_LIBS = common/libamd_common.la

 # TODO cleanup these
 common_libamd_common_la_CPPFLAGS = \
+	$(AMDGPU_CFLAGS) \
 	$(VALGRIND_CFLAGS) \
 	$(DEFINES) \
 	-I$(top_srcdir)/include \
@@ -54,6 +55,7 @@ common_libamd_common_la_CXXFLAGS = \
 noinst_LTLIBRARIES += $(COMMON_LIBS)

 common_libamd_common_la_SOURCES = \
+	$(AMD_COMMON_FILES) \
 	$(AMD_COMPILER_FILES) \
 	$(AMD_DEBUG_FILES) \
 	$(AMD_GENERATED_FILES)
@@ -65,6 +67,8 @@ common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
 endif
 endif

+common_libamd_common_la_LIBADD = $(LIBELF_LIBS)
+
 common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h
 	$(AM_V_at)$(MKDIR_P) $(@D)
 	$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h > $@
--- a/src/amd/Makefile.sources
+++ b/src/amd/Makefile.sources
@@ -42,16 +42,25 @@ ADDRLIB_FILES = \
 AMD_COMPILER_FILES = \
 	common/ac_binary.c \
 	common/ac_binary.h \
+	common/ac_exp_param.h \
 	common/ac_llvm_build.c \
 	common/ac_llvm_build.h \
 	common/ac_llvm_helper.cpp \
 	common/ac_llvm_util.c \
-	common/ac_llvm_util.h
+	common/ac_llvm_util.h \
+	common/ac_shader_info.c \
+	common/ac_shader_info.h

 AMD_NIR_FILES = \
 	common/ac_nir_to_llvm.c \
 	common/ac_nir_to_llvm.h

+AMD_COMMON_FILES = \
+	common/ac_gpu_info.c \
+	common/ac_gpu_info.h \
+	common/ac_surface.c \
+	common/ac_surface.h
+
 AMD_DEBUG_FILES = \
 	common/ac_debug.c \
 	common/ac_debug.h
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -1193,6 +1193,20 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
            m_settings.depthPipeXorDisable = 1;
            break;

+        case FAMILY_RV:
+            m_settings.isArcticIsland = 1;
+            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
+
+            if (m_settings.isRaven)
+            {
+                m_settings.isDcn1   = 1;
+            }
+
+            m_settings.metaBaseAlignFix = 1;
+
+            m_settings.depthPipeXorDisable = 1;
+            break;
+
        default:
            ADDR_ASSERT(!"This should be a Fusion");
            break;
@@ -2734,6 +2748,35 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
                break;
        }
    }
+    else if (m_settings.isDcn1)
+    {
+        switch (swizzleMode)
+        {
+            case ADDR_SW_4KB_D:
+            case ADDR_SW_64KB_D:
+            case ADDR_SW_VAR_D:
+            case ADDR_SW_64KB_D_T:
+            case ADDR_SW_4KB_D_X:
+            case ADDR_SW_64KB_D_X:
+            case ADDR_SW_VAR_D_X:
+                support = (pIn->bpp == 64);
+                break;
+
+            case ADDR_SW_LINEAR:
+            case ADDR_SW_4KB_S:
+            case ADDR_SW_64KB_S:
+            case ADDR_SW_VAR_S:
+            case ADDR_SW_64KB_S_T:
+            case ADDR_SW_4KB_S_X:
+            case ADDR_SW_64KB_S_X:
+            case ADDR_SW_VAR_S_X:
+                support = (pIn->bpp <= 64);
+                break;
+
+            default:
+                break;
+        }
+    }
    else
    {
        ADDR_NOT_IMPLEMENTED();
@@ -3195,6 +3238,20 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
                        // DCE12 does not support display surface to be _T swizzle mode
                        prtXor = FALSE;
                    }
+                    else if (m_settings.isDcn1)
+                    {
+                        // _R is not supported by Dcn1
+                        if (pIn->bpp == 64)
+                        {
+                            swType = ADDR_SW_D;
+                        }
+                        else
+                        {
+                            swType = ADDR_SW_S;
+                        }
+
+                        blockSet.micro = FALSE;
+                    }
                    else
                    {
                        ADDR_NOT_IMPLEMENTED();
--- a/src/amd/addrlib/gfx9/gfx9addrlib.h
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.h
@@ -54,11 +54,13 @@ struct Gfx9ChipSettings
        // Asic/Generation name
        UINT_32 isArcticIsland      : 1;
        UINT_32 isVega10            : 1;
-        UINT_32 reserved0           : 30;
+        UINT_32 isRaven             : 1;
+        UINT_32 reserved0           : 29;

        // Display engine IP version name
        UINT_32 isDce12             : 1;
-        UINT_32 reserved1           : 31;
+        UINT_32 isDcn1              : 1;
+        UINT_32 reserved1           : 29;

        // Misc configuration bits
        UINT_32 metaBaseAlignFix    : 1;
@@ -201,7 +203,7 @@ protected:

        if (IsXor(swizzleMode))
        {
-            if (m_settings.isVega10)
+            if (m_settings.isVega10 || m_settings.isRaven)
            {
                baseAlign = GetBlockSize(swizzleMode);
            }
--- a/src/amd/common/ac_debug.c
+++ b/src/amd/common/ac_debug.c
@@ -132,9 +132,15 @@ void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
 static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
 				    unsigned reg_offset)
 {
-	unsigned reg = (ib[1] << 2) + reg_offset;
+	unsigned reg = ((ib[1] & 0xFFFF) << 2) + reg_offset;
+	unsigned index = ib[1] >> 28;
 	int i;

+	if (index != 0) {
+		print_spaces(f, INDENT_PKT);
+		fprintf(f, "INDEX = %u\n", index);
+	}
+
 	for (i = 0; i < count; i++)
 		ac_dump_reg(f, reg + i*4, ib[2+i], ~0);
 }
@@ -214,6 +220,52 @@ static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
 			print_named_value(f, "ADDRESS_HI", ib[3], 16);
 		}
 		break;
+	case PKT3_EVENT_WRITE_EOP:
+		ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
+			    S_028A90_EVENT_TYPE(~0));
+		print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
+		print_named_value(f, "TCL1_VOL_ACTION_ENA", (ib[1] >> 12) & 0x1, 1);
+		print_named_value(f, "TC_VOL_ACTION_ENA", (ib[1] >> 13) & 0x1, 1);
+		print_named_value(f, "TC_WB_ACTION_ENA", (ib[1] >> 15) & 0x1, 1);
+		print_named_value(f, "TCL1_ACTION_ENA", (ib[1] >> 16) & 0x1, 1);
+		print_named_value(f, "TC_ACTION_ENA", (ib[1] >> 17) & 0x1, 1);
+		print_named_value(f, "ADDRESS_LO", ib[2], 32);
+		print_named_value(f, "ADDRESS_HI", ib[3], 16);
+		print_named_value(f, "DST_SEL", (ib[3] >> 16) & 0x3, 2);
+		print_named_value(f, "INT_SEL", (ib[3] >> 24) & 0x7, 3);
+		print_named_value(f, "DATA_SEL", ib[3] >> 29, 3);
+		print_named_value(f, "DATA_LO", ib[4], 32);
+		print_named_value(f, "DATA_HI", ib[5], 32);
+		break;
+	case PKT3_RELEASE_MEM:
+		ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
+			    S_028A90_EVENT_TYPE(~0));
+		print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
+		print_named_value(f, "TCL1_VOL_ACTION_ENA", (ib[1] >> 12) & 0x1, 1);
+		print_named_value(f, "TC_VOL_ACTION_ENA", (ib[1] >> 13) & 0x1, 1);
+		print_named_value(f, "TC_WB_ACTION_ENA", (ib[1] >> 15) & 0x1, 1);
+		print_named_value(f, "TCL1_ACTION_ENA", (ib[1] >> 16) & 0x1, 1);
+		print_named_value(f, "TC_ACTION_ENA", (ib[1] >> 17) & 0x1, 1);
+		print_named_value(f, "TC_NC_ACTION_ENA", (ib[1] >> 19) & 0x1, 1);
+		print_named_value(f, "TC_WC_ACTION_ENA", (ib[1] >> 20) & 0x1, 1);
+		print_named_value(f, "TC_MD_ACTION_ENA", (ib[1] >> 21) & 0x1, 1);
+		print_named_value(f, "DST_SEL", (ib[2] >> 16) & 0x3, 2);
+		print_named_value(f, "INT_SEL", (ib[2] >> 24) & 0x7, 3);
+		print_named_value(f, "DATA_SEL", ib[2] >> 29, 3);
+		print_named_value(f, "ADDRESS_LO", ib[3], 32);
+		print_named_value(f, "ADDRESS_HI", ib[4], 32);
+		print_named_value(f, "DATA_LO", ib[5], 32);
+		print_named_value(f, "DATA_HI", ib[6], 32);
+		print_named_value(f, "CTXID", ib[7], 32);
+		break;
+	case PKT3_WAIT_REG_MEM:
+		print_named_value(f, "OP", ib[1], 32);
+		print_named_value(f, "ADDRESS_LO", ib[2], 32);
+		print_named_value(f, "ADDRESS_HI", ib[3], 32);
+		print_named_value(f, "REF", ib[4], 32);
+		print_named_value(f, "MASK", ib[5], 32);
+		print_named_value(f, "POLL_INTERVAL", ib[6], 16);
+		break;
 	case PKT3_DRAW_INDEX_AUTO:
 		ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[1], ~0);
 		ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[2], ~0);
--- a/src/amd/common/ac_exp_param.h
+++ b/src/amd/common/ac_exp_param.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ */
+#ifndef AC_EXP_PARAM_H
+#define AC_EXP_PARAM_H
+
+enum {
+	/* SPI_PS_INPUT_CNTL_i.OFFSET[0:4] */
+	AC_EXP_PARAM_OFFSET_0 = 0,
+	AC_EXP_PARAM_OFFSET_31 = 31,
+	/* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL[0:1] */
+	AC_EXP_PARAM_DEFAULT_VAL_0000 = 64,
+	AC_EXP_PARAM_DEFAULT_VAL_0001,
+	AC_EXP_PARAM_DEFAULT_VAL_1110,
+	AC_EXP_PARAM_DEFAULT_VAL_1111,
+	AC_EXP_PARAM_UNDEFINED = 255,
+};
+
+#endif
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright © 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#include "ac_gpu_info.h"
+#include "sid.h"
+#include "gfx9d.h"
+
+#include "util/u_math.h"
+
+#include <stdio.h>
+
+#include <xf86drm.h>
+#include <amdgpu_drm.h>
+
+#include <amdgpu.h>
+
+#define CIK_TILE_MODE_COLOR_2D			14
+
+#define CIK__GB_TILE_MODE__PIPE_CONFIG(x)        (((x) >> 6) & 0x1f)
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P2               0
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16          4
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16         5
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32         6
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32         7
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16    8
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16    9
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16    10
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16   11
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16   12
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32   13
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32   14
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16   16
+#define     CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16  17
+
+static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
+{
+   unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D];
+
+   switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
+   case CIK__PIPE_CONFIG__ADDR_SURF_P2:
+       return 2;
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32:
+       return 4;
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32:
+       return 8;
+   case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
+   case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
+       return 16;
+   default:
+       fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
+       assert(!"this should never occur");
+       return 2;
+   }
+}
+
+bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
+		       struct radeon_info *info,
+		       struct amdgpu_gpu_info *amdinfo)
+{
+	struct amdgpu_buffer_size_alignments alignment_info = {};
+	struct amdgpu_heap_info vram, vram_vis, gtt;
+	struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {};
+	uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
+	uint32_t unused_feature;
+	int r, i, j;
+	drmDevicePtr devinfo;
+
+	/* Get PCI info. */
+	r = drmGetDevice2(fd, 0, &devinfo);
+	if (r) {
+		fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
+		return false;
+	}
+	info->pci_domain = devinfo->businfo.pci->domain;
+	info->pci_bus = devinfo->businfo.pci->bus;
+	info->pci_dev = devinfo->businfo.pci->dev;
+	info->pci_func = devinfo->businfo.pci->func;
+	drmFreeDevice(&devinfo);
+
+	/* Query hardware and driver information. */
+	r = amdgpu_query_gpu_info(dev, amdinfo);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_buffer_size_alignment(dev, &alignment_info);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM,
+				AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
+				&vram_vis);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, &dma);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD, 0, &uvd);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
+		return false;
+	}
+
+	if (info->drm_major == 3 && info->drm_minor >= 17) {
+		r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
+		if (r) {
+			fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
+			return false;
+		}
+	}
+
+	r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0,
+					&info->me_fw_version, &unused_feature);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(me) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0,
+					&info->pfp_fw_version, &unused_feature);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_CE, 0, 0,
+					&info->ce_fw_version, &unused_feature);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(ce) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0,
+					&uvd_version, &uvd_feature);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(uvd) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCE, 0, &vce);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
+		return false;
+	}
+
+	r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0,
+					&vce_version, &vce_feature);
+	if (r) {
+		fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
+		return false;
+	}
+
+	/* Set chip identification. */
+	info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
+	info->vce_harvest_config = amdinfo->vce_harvest_config;
+
+	switch (info->pci_id) {
+#define CHIPSET(pci_id, name, cfamily) case pci_id: info->family = CHIP_##cfamily; break;
+#include "pci_ids/radeonsi_pci_ids.h"
+#undef CHIPSET
+
+	default:
+		fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
+		return false;
+	}
+
+	if (info->family >= CHIP_VEGA10)
+		info->chip_class = GFX9;
+	else if (info->family >= CHIP_TONGA)
+		info->chip_class = VI;
+	else if (info->family >= CHIP_BONAIRE)
+		info->chip_class = CIK;
+	else if (info->family >= CHIP_TAHITI)
+		info->chip_class = SI;
+	else {
+		fprintf(stderr, "amdgpu: Unknown family.\n");
+		return false;
+	}
+
+	/* Set which chips have dedicated VRAM. */
+	info->has_dedicated_vram =
+		!(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
+
+	/* Set hardware information. */
+	info->gart_size = gtt.heap_size;
+	info->vram_size = vram.heap_size;
+	info->vram_vis_size = vram_vis.heap_size;
+	/* The kernel can split large buffers in VRAM but not in GTT, so large
+	 * allocations can fail or cause buffer movement failures in the kernel.
+	 */
+	info->max_alloc_size = MIN2(info->vram_size * 0.9, info->gart_size * 0.7);
+	/* convert the shader clock from KHz to MHz */
+	info->max_shader_clock = amdinfo->max_engine_clk / 1000;
+	info->max_se = amdinfo->num_shader_engines;
+	info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine;
+	info->has_hw_decode =
+		(uvd.available_rings != 0) || (vcn_dec.available_rings != 0);
+	info->uvd_fw_version =
+		uvd.available_rings ? uvd_version : 0;
+	info->vce_fw_version =
+		vce.available_rings ? vce_version : 0;
+	info->has_userptr = true;
+	info->num_render_backends = amdinfo->rb_pipes;
+	info->clock_crystal_freq = amdinfo->gpu_counter_freq;
+	info->tcc_cache_line_size = 64; /* TC L2 line size on GCN */
+	if (info->chip_class == GFX9) {
+		info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
+		info->pipe_interleave_bytes =
+			256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(amdinfo->gb_addr_cfg);
+	} else {
+		info->num_tile_pipes = cik_get_num_tile_pipes(amdinfo);
+		info->pipe_interleave_bytes =
+			256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(amdinfo->gb_addr_cfg);
+	}
+	info->has_virtual_memory = true;
+
+	assert(util_is_power_of_two(dma.available_rings + 1));
+	assert(util_is_power_of_two(compute.available_rings + 1));
+
+	info->num_sdma_rings = util_bitcount(dma.available_rings);
+	info->num_compute_rings = util_bitcount(compute.available_rings);
+
+	/* Get the number of good compute units. */
+	info->num_good_compute_units = 0;
+	for (i = 0; i < info->max_se; i++)
+		for (j = 0; j < info->max_sh_per_se; j++)
+			info->num_good_compute_units +=
+				util_bitcount(amdinfo->cu_bitmap[i][j]);
+
+	memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
+		sizeof(amdinfo->gb_tile_mode));
+	info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask;
+
+	memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode,
+		sizeof(amdinfo->gb_macro_tile_mode));
+
+	info->pte_fragment_size = alignment_info.size_local;
+	info->gart_page_size = alignment_info.size_remote;
+
+	if (info->chip_class == SI)
+		info->gfx_ib_pad_with_type2 = TRUE;
+
+	return true;
+}
+
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright © 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef AC_GPU_INFO_H
+#define AC_GPU_INFO_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "amd_family.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Prior to C11 the following may trigger a typedef redeclaration warning */
+typedef struct amdgpu_device *amdgpu_device_handle;
+struct amdgpu_gpu_info;
+
+struct radeon_info {
+	/* PCI info: domain:bus:dev:func */
+	uint32_t                    pci_domain;
+	uint32_t                    pci_bus;
+	uint32_t                    pci_dev;
+	uint32_t                    pci_func;
+
+	/* Device info. */
+	uint32_t                    pci_id;
+	enum radeon_family          family;
+	enum chip_class             chip_class;
+	uint32_t                    pte_fragment_size;
+	uint32_t                    gart_page_size;
+	uint64_t                    gart_size;
+	uint64_t                    vram_size;
+	uint64_t                    vram_vis_size;
+	uint64_t                    max_alloc_size;
+	uint32_t                    min_alloc_size;
+	bool                        has_dedicated_vram;
+	bool                        has_virtual_memory;
+	bool                        gfx_ib_pad_with_type2;
+	bool                        has_hw_decode;
+	uint32_t                    num_sdma_rings;
+	uint32_t                    num_compute_rings;
+	uint32_t                    uvd_fw_version;
+	uint32_t                    vce_fw_version;
+	uint32_t                    me_fw_version;
+	uint32_t                    pfp_fw_version;
+	uint32_t                    ce_fw_version;
+	uint32_t                    vce_harvest_config;
+	uint32_t                    clock_crystal_freq;
+	uint32_t                    tcc_cache_line_size;
+
+	/* Kernel info. */
+	uint32_t                    drm_major; /* version */
+	uint32_t                    drm_minor;
+	uint32_t                    drm_patchlevel;
+	bool                        has_userptr;
+
+	/* Shader cores. */
+	uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
+	uint32_t                    max_shader_clock;
+	uint32_t                    num_good_compute_units;
+	uint32_t                    max_se; /* shader engines */
+	uint32_t                    max_sh_per_se; /* shader arrays per shader engine */
+
+	/* Render backends (color + depth blocks). */
+	uint32_t                    r300_num_gb_pipes;
+	uint32_t                    r300_num_z_pipes;
+	uint32_t                    r600_gb_backend_map; /* R600 harvest config */
+	bool                        r600_gb_backend_map_valid;
+	uint32_t                    r600_num_banks;
+	uint32_t                    num_render_backends;
+	uint32_t                    num_tile_pipes; /* pipe count from PIPE_CONFIG */
+	uint32_t                    pipe_interleave_bytes;
+	uint32_t                    enabled_rb_mask; /* GCN harvest config */
+
+	/* Tile modes. */
+	uint32_t                    si_tile_mode_array[32];
+	uint32_t                    cik_macrotile_mode_array[16];
+};
+
+bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
+		       struct radeon_info *info,
+		       struct amdgpu_gpu_info *amdinfo);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* AC_GPU_INFO_H */
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -33,11 +33,13 @@
 #include <stdio.h>

 #include "ac_llvm_util.h"
-
+#include "ac_exp_param.h"
 #include "util/bitscan.h"
 #include "util/macros.h"
 #include "sid.h"

+#include "shader_enums.h"
+
 /* Initialize module-independent parts of the context.
 *
 * The caller is responsible for initializing ctx::module and ctx::builder.
@@ -54,11 +56,20 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
 	ctx->voidt = LLVMVoidTypeInContext(ctx->context);
 	ctx->i1 = LLVMInt1TypeInContext(ctx->context);
 	ctx->i8 = LLVMInt8TypeInContext(ctx->context);
+	ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
 	ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
+	ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
+	ctx->f16 = LLVMHalfTypeInContext(ctx->context);
 	ctx->f32 = LLVMFloatTypeInContext(ctx->context);
+	ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
 	ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
-	ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
+	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
+
+	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
+	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
+	ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
+	ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);

 	ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
 						     "range", 5);
@@ -231,42 +242,16 @@ build_cube_intrinsic(struct ac_llvm_context *ctx,
 		     LLVMValueRef in[3],
 		     struct cube_selection_coords *out)
 {
-	LLVMBuilderRef builder = ctx->builder;
+	LLVMTypeRef f32 = ctx->f32;

-	if (HAVE_LLVM >= 0x0309) {
-		LLVMTypeRef f32 = ctx->f32;
-
-		out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
-					f32, in, 3, AC_FUNC_ATTR_READNONE);
-		out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
-					f32, in, 3, AC_FUNC_ATTR_READNONE);
-		out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
-					f32, in, 3, AC_FUNC_ATTR_READNONE);
-		out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
-					f32, in, 3, AC_FUNC_ATTR_READNONE);
-	} else {
-		LLVMValueRef c[4] = {
-			in[0],
-			in[1],
-			in[2],
-			LLVMGetUndef(LLVMTypeOf(in[0]))
-		};
-		LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
-
-		LLVMValueRef tmp =
-			ac_build_intrinsic(ctx, "llvm.AMDGPU.cube",
-					   LLVMTypeOf(vec), &vec, 1,
-					   AC_FUNC_ATTR_READNONE);
-
-		out->stc[1] = LLVMBuildExtractElement(builder, tmp,
-				LLVMConstInt(ctx->i32, 0, 0), "");
-		out->stc[0] = LLVMBuildExtractElement(builder, tmp,
-				LLVMConstInt(ctx->i32, 1, 0), "");
-		out->ma = LLVMBuildExtractElement(builder, tmp,
-				LLVMConstInt(ctx->i32, 2, 0), "");
-		out->id = LLVMBuildExtractElement(builder, tmp,
-				LLVMConstInt(ctx->i32, 3, 0), "");
-	}
+	out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
+					 f32, in, 3, AC_FUNC_ATTR_READNONE);
+	out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
+					 f32, in, 3, AC_FUNC_ATTR_READNONE);
+	out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
+				     f32, in, 3, AC_FUNC_ATTR_READNONE);
+	out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
+				     f32, in, 3, AC_FUNC_ATTR_READNONE);
 }

 /**
@@ -556,7 +541,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 			    bool has_add_tid)
 {
 	/* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
-	if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
+	if (!has_add_tid) {
 		/* Split 3 channel stores, becase LLVM doesn't support 3-channel
 		 * intrinsics. */
 		if (num_channels == 3) {
@@ -657,114 +642,89 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
 		     unsigned inst_offset,
 		     unsigned glc,
 		     unsigned slc,
-		     bool readonly_memory)
+		     bool can_speculate,
+		     bool allow_smem)
 {
+	LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
+	if (voffset)
+		offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
+	if (soffset)
+		offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
+
+	/* TODO: VI and later generations can use SMEM with GLC=1.*/
+	if (allow_smem && !glc && !slc) {
+		assert(vindex == NULL);
+
+		LLVMValueRef result[4];
+
+		for (int i = 0; i < num_channels; i++) {
+			if (i) {
+				offset = LLVMBuildAdd(ctx->builder, offset,
+						      LLVMConstInt(ctx->i32, 4, 0), "");
+			}
+			LLVMValueRef args[2] = {rsrc, offset};
+			result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
+						       ctx->f32, args, 2,
+						       AC_FUNC_ATTR_READNONE |
+						       AC_FUNC_ATTR_LEGACY);
+		}
+		if (num_channels == 1)
+			return result[0];
+
+		if (num_channels == 3)
+			result[num_channels++] = LLVMGetUndef(ctx->f32);
+		return ac_build_gather_values(ctx, result, num_channels);
+	}
+
 	unsigned func = CLAMP(num_channels, 1, 3) - 1;

-	if (HAVE_LLVM >= 0x309) {
-		LLVMValueRef args[] = {
-			LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-			vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
-			LLVMConstInt(ctx->i32, inst_offset, 0),
-			LLVMConstInt(ctx->i1, glc, 0),
-			LLVMConstInt(ctx->i1, slc, 0)
-		};
+	LLVMValueRef args[] = {
+		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
+		vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
+		offset,
+		LLVMConstInt(ctx->i1, glc, 0),
+		LLVMConstInt(ctx->i1, slc, 0)
+	};

-		LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
-		                       ctx->v4f32};
-		const char *type_names[] = {"f32", "v2f32", "v4f32"};
-		char name[256];
+	LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
+			       ctx->v4f32};
+	const char *type_names[] = {"f32", "v2f32", "v4f32"};
+	char name[256];

-		if (voffset) {
-			args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
-			                       "");
-		}
+	snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
+		 type_names[func]);

-		if (soffset) {
-			args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
-			                       "");
-		}
-
-		snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
-		         type_names[func]);
-
-		return ac_build_intrinsic(ctx, name, types[func], args,
-					  ARRAY_SIZE(args),
-					  /* READNONE means writes can't
-					   * affect it, while READONLY means
-					   * that writes can affect it. */
-					  readonly_memory && HAVE_LLVM >= 0x0400 ?
-						  AC_FUNC_ATTR_READNONE :
-						  AC_FUNC_ATTR_READONLY);
-	} else {
-		LLVMValueRef args[] = {
-			LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
-			voffset ? voffset : vindex,
-			soffset,
-			LLVMConstInt(ctx->i32, inst_offset, 0),
-			LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
-			LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
-			LLVMConstInt(ctx->i32, glc, 0),
-			LLVMConstInt(ctx->i32, slc, 0),
-			LLVMConstInt(ctx->i32, 0, 0), // TFE
-		};
-
-		LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
-		                       ctx->v4i32};
-		const char *type_names[] = {"i32", "v2i32", "v4i32"};
-		const char *arg_type = "i32";
-		char name[256];
-
-		if (voffset && vindex) {
-			LLVMValueRef vaddr[] = {vindex, voffset};
-
-			arg_type = "v2i32";
-			args[1] = ac_build_gather_values(ctx, vaddr, 2);
-		}
-
-		snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
-		         type_names[func], arg_type);
-
-		return ac_build_intrinsic(ctx, name, types[func], args,
-					  ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
-	}
+	return ac_build_intrinsic(ctx, name, types[func], args,
+				  ARRAY_SIZE(args),
+				  /* READNONE means writes can't affect it, while
+				   * READONLY means that writes can affect it. */
+				  can_speculate && HAVE_LLVM >= 0x0400 ?
+					  AC_FUNC_ATTR_READNONE :
+					  AC_FUNC_ATTR_READONLY);
 }

 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 					 LLVMValueRef rsrc,
 					 LLVMValueRef vindex,
 					 LLVMValueRef voffset,
-					 bool readonly_memory)
+					 bool can_speculate)
 {
-	if (HAVE_LLVM >= 0x0309) {
-		LLVMValueRef args [] = {
-			LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-			vindex,
-			voffset,
-			LLVMConstInt(ctx->i1, 0, 0), /* glc */
-			LLVMConstInt(ctx->i1, 0, 0), /* slc */
-		};
-
-		return ac_build_intrinsic(ctx,
-					  "llvm.amdgcn.buffer.load.format.v4f32",
-					  ctx->v4f32, args, ARRAY_SIZE(args),
-					  /* READNONE means writes can't
-					   * affect it, while READONLY means
-					   * that writes can affect it. */
-					  readonly_memory && HAVE_LLVM >= 0x0400 ?
-						  AC_FUNC_ATTR_READNONE :
-						  AC_FUNC_ATTR_READONLY);
-	}
-
-	LLVMValueRef args[] = {
-		rsrc,
-		voffset,
+	LLVMValueRef args [] = {
+		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
 		vindex,
+		voffset,
+		LLVMConstInt(ctx->i1, 0, 0), /* glc */
+		LLVMConstInt(ctx->i1, 0, 0), /* slc */
 	};
-	return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input",
-				  ctx->v4f32, args, 3,
-				  AC_FUNC_ATTR_READNONE |
-				  AC_FUNC_ATTR_LEGACY);
+
+	return ac_build_intrinsic(ctx,
+				  "llvm.amdgcn.buffer.load.format.v4f32",
+				  ctx->v4f32, args, ARRAY_SIZE(args),
+				  /* READNONE means writes can't affect it, while
+				   * READONLY means that writes can affect it. */
+				  can_speculate && HAVE_LLVM >= 0x0400 ?
+					  AC_FUNC_ATTR_READNONE :
+					  AC_FUNC_ATTR_READONLY);
 }

 /**
@@ -1244,3 +1204,265 @@ void ac_get_image_intr_name(const char *base_name,
                         data_type_name, coords_type_name, rsrc_type_name);
        }
 }
+
+#define AC_EXP_TARGET (HAVE_LLVM >= 0x0500 ? 0 : 3)
+#define AC_EXP_OUT0 (HAVE_LLVM >= 0x0500 ? 2 : 5)
+
+enum ac_ir_type {
+	AC_IR_UNDEF,
+	AC_IR_CONST,
+	AC_IR_VALUE,
+};
+
+struct ac_vs_exp_chan
+{
+	LLVMValueRef value;
+	float const_float;
+	enum ac_ir_type type;
+};
+
+struct ac_vs_exp_inst {
+	unsigned offset;
+	LLVMValueRef inst;
+	struct ac_vs_exp_chan chan[4];
+};
+
+struct ac_vs_exports {
+	unsigned num;
+	struct ac_vs_exp_inst exp[VARYING_SLOT_MAX];
+};
+
+/* Return true if the PARAM export has been eliminated. */
+static bool ac_eliminate_const_output(uint8_t *vs_output_param_offset,
+				      uint32_t num_outputs,
+				      struct ac_vs_exp_inst *exp)
+{
+	unsigned i, default_val; /* SPI_PS_INPUT_CNTL_i.DEFAULT_VAL */
+	bool is_zero[4] = {}, is_one[4] = {};
+
+	for (i = 0; i < 4; i++) {
+		/* It's a constant expression. Undef outputs are eliminated too. */
+		if (exp->chan[i].type == AC_IR_UNDEF) {
+			is_zero[i] = true;
+			is_one[i] = true;
+		} else if (exp->chan[i].type == AC_IR_CONST) {
+			if (exp->chan[i].const_float == 0)
+				is_zero[i] = true;
+			else if (exp->chan[i].const_float == 1)
+				is_one[i] = true;
+			else
+				return false; /* other constant */
+		} else
+			return false;
+	}
+
+	/* Only certain combinations of 0 and 1 can be eliminated. */
+	if (is_zero[0] && is_zero[1] && is_zero[2])
+		default_val = is_zero[3] ? 0 : 1;
+	else if (is_one[0] && is_one[1] && is_one[2])
+		default_val = is_zero[3] ? 2 : 3;
+	else
+		return false;
+
+	/* The PARAM export can be represented as DEFAULT_VAL. Kill it. */
+	LLVMInstructionEraseFromParent(exp->inst);
+
+	/* Change OFFSET to DEFAULT_VAL. */
+	for (i = 0; i < num_outputs; i++) {
+		if (vs_output_param_offset[i] == exp->offset) {
+			vs_output_param_offset[i] =
+				AC_EXP_PARAM_DEFAULT_VAL_0000 + default_val;
+			break;
+		}
+	}
+	return true;
+}
+
+static bool ac_eliminate_duplicated_output(uint8_t *vs_output_param_offset,
+					   uint32_t num_outputs,
+					   struct ac_vs_exports *processed,
+				           struct ac_vs_exp_inst *exp)
+{
+	unsigned p, copy_back_channels = 0;
+
+	/* See if the output is already in the list of processed outputs.
+	 * The LLVMValueRef comparison relies on SSA.
+	 */
+	for (p = 0; p < processed->num; p++) {
+		bool different = false;
+
+		for (unsigned j = 0; j < 4; j++) {
+			struct ac_vs_exp_chan *c1 = &processed->exp[p].chan[j];
+			struct ac_vs_exp_chan *c2 = &exp->chan[j];
+
+			/* Treat undef as a match. */
+			if (c2->type == AC_IR_UNDEF)
+				continue;
+
+			/* If c1 is undef but c2 isn't, we can copy c2 to c1
+			 * and consider the instruction duplicated.
+			 */
+			if (c1->type == AC_IR_UNDEF) {
+				copy_back_channels |= 1 << j;
+				continue;
+			}
+
+			/* Test whether the channels are not equal. */
+			if (c1->type != c2->type ||
+			    (c1->type == AC_IR_CONST &&
+			     c1->const_float != c2->const_float) ||
+			    (c1->type == AC_IR_VALUE &&
+			     c1->value != c2->value)) {
+				different = true;
+				break;
+			}
+		}
+		if (!different)
+			break;
+
+		copy_back_channels = 0;
+	}
+	if (p == processed->num)
+		return false;
+
+	/* If a match was found, but the matching export has undef where the new
+	 * one has a normal value, copy the normal value to the undef channel.
+	 */
+	struct ac_vs_exp_inst *match = &processed->exp[p];
+
+	while (copy_back_channels) {
+		unsigned chan = u_bit_scan(&copy_back_channels);
+
+		assert(match->chan[chan].type == AC_IR_UNDEF);
+		LLVMSetOperand(match->inst, AC_EXP_OUT0 + chan,
+			       exp->chan[chan].value);
+		match->chan[chan] = exp->chan[chan];
+	}
+
+	/* The PARAM export is duplicated. Kill it. */
+	LLVMInstructionEraseFromParent(exp->inst);
+
+	/* Change OFFSET to the matching export. */
+	for (unsigned i = 0; i < num_outputs; i++) {
+		if (vs_output_param_offset[i] == exp->offset) {
+			vs_output_param_offset[i] = match->offset;
+			break;
+		}
+	}
+	return true;
+}
+
+void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
+			    LLVMValueRef main_fn,
+			    uint8_t *vs_output_param_offset,
+			    uint32_t num_outputs,
+			    uint8_t *num_param_exports)
+{
+	LLVMBasicBlockRef bb;
+	bool removed_any = false;
+	struct ac_vs_exports exports;
+
+	exports.num = 0;
+
+	/* Process all LLVM instructions. */
+	bb = LLVMGetFirstBasicBlock(main_fn);
+	while (bb) {
+		LLVMValueRef inst = LLVMGetFirstInstruction(bb);
+
+		while (inst) {
+			LLVMValueRef cur = inst;
+			inst = LLVMGetNextInstruction(inst);
+			struct ac_vs_exp_inst exp;
+
+			if (LLVMGetInstructionOpcode(cur) != LLVMCall)
+				continue;
+
+			LLVMValueRef callee = ac_llvm_get_called_value(cur);
+
+			if (!ac_llvm_is_function(callee))
+				continue;
+
+			const char *name = LLVMGetValueName(callee);
+			unsigned num_args = LLVMCountParams(callee);
+
+			/* Check if this is an export instruction. */
+			if ((num_args != 9 && num_args != 8) ||
+			    (strcmp(name, "llvm.SI.export") &&
+			     strcmp(name, "llvm.amdgcn.exp.f32")))
+				continue;
+
+			LLVMValueRef arg = LLVMGetOperand(cur, AC_EXP_TARGET);
+			unsigned target = LLVMConstIntGetZExtValue(arg);
+
+			if (target < V_008DFC_SQ_EXP_PARAM)
+				continue;
+
+			target -= V_008DFC_SQ_EXP_PARAM;
+
+			/* Parse the instruction. */
+			memset(&exp, 0, sizeof(exp));
+			exp.offset = target;
+			exp.inst = cur;
+
+			for (unsigned i = 0; i < 4; i++) {
+				LLVMValueRef v = LLVMGetOperand(cur, AC_EXP_OUT0 + i);
+
+				exp.chan[i].value = v;
+
+				if (LLVMIsUndef(v)) {
+					exp.chan[i].type = AC_IR_UNDEF;
+				} else if (LLVMIsAConstantFP(v)) {
+					LLVMBool loses_info;
+					exp.chan[i].type = AC_IR_CONST;
+					exp.chan[i].const_float =
+						LLVMConstRealGetDouble(v, &loses_info);
+				} else {
+					exp.chan[i].type = AC_IR_VALUE;
+				}
+			}
+
+			/* Eliminate constant and duplicated PARAM exports. */
+			if (ac_eliminate_const_output(vs_output_param_offset,
+						      num_outputs, &exp) ||
+			    ac_eliminate_duplicated_output(vs_output_param_offset,
+							   num_outputs, &exports,
+							   &exp)) {
+				removed_any = true;
+			} else {
+				exports.exp[exports.num++] = exp;
+			}
+		}
+		bb = LLVMGetNextBasicBlock(bb);
+	}
+
+	/* Remove holes in export memory due to removed PARAM exports.
+	 * This is done by renumbering all PARAM exports.
+	 */
+	if (removed_any) {
+		uint8_t old_offset[VARYING_SLOT_MAX];
+		unsigned out, i;
+
+		/* Make a copy of the offsets. We need the old version while
+		 * we are modifying some of them. */
+		memcpy(old_offset, vs_output_param_offset,
+		       sizeof(old_offset));
+
+		for (i = 0; i < exports.num; i++) {
+			unsigned offset = exports.exp[i].offset;
+
+			/* Update vs_output_param_offset. Multiple outputs can
+			 * have the same offset.
+			 */
+			for (out = 0; out < num_outputs; out++) {
+				if (old_offset[out] == offset)
+					vs_output_param_offset[out] = i;
+			}
+
+			/* Change the PARAM offset in the instruction. */
+			LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
+				       LLVMConstInt(ctx->i32,
+						    V_008DFC_SQ_EXP_PARAM + i, 0));
+		}
+		*num_param_exports = exports.num;
+	}
+}
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -40,11 +40,20 @@ struct ac_llvm_context {
 	LLVMTypeRef voidt;
 	LLVMTypeRef i1;
 	LLVMTypeRef i8;
+	LLVMTypeRef i16;
 	LLVMTypeRef i32;
+	LLVMTypeRef i64;
+	LLVMTypeRef f16;
 	LLVMTypeRef f32;
+	LLVMTypeRef f64;
 	LLVMTypeRef v4i32;
 	LLVMTypeRef v4f32;
-	LLVMTypeRef v16i8;
+	LLVMTypeRef v8i32;
+
+	LLVMValueRef i32_0;
+	LLVMValueRef i32_1;
+	LLVMValueRef f32_0;
+	LLVMValueRef f32_1;

 	unsigned range_md_kind;
 	unsigned invariant_load_md_kind;
@@ -143,13 +152,14 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
 		     unsigned inst_offset,
 		     unsigned glc,
 		     unsigned slc,
-		     bool readonly_memory);
+		     bool can_speculate,
+		     bool allow_smem);

 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
 					 LLVMValueRef rsrc,
 					 LLVMValueRef vindex,
 					 LLVMValueRef voffset,
-					 bool readonly_memory);
+					 bool can_speculate);

 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
@@ -239,6 +249,12 @@ void ac_get_image_intr_name(const char *base_name,
 			    LLVMTypeRef coords_type,
 			    LLVMTypeRef rsrc_type,
 			    char *out_name, unsigned out_len);
+
+void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
+			    LLVMValueRef main_fn,
+			    uint8_t *vs_output_param_offset,
+			    uint32_t num_outputs,
+			    uint8_t *num_param_exports);
 #ifdef __cplusplus
 }
 #endif
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -34,6 +34,7 @@
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
 #include <llvm/IR/Attributes.h>
+#include <llvm/IR/CallSite.h>

 #if HAVE_LLVM < 0x0500
 namespace llvm {
@@ -44,9 +45,13 @@ typedef AttributeSet AttributeList;
 void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
 {
   llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
+#if HAVE_LLVM < 0x0500
   llvm::AttrBuilder B;
   B.addDereferenceableAttr(bytes);
   A->addAttr(llvm::AttributeList::get(A->getContext(), A->getArgNo() + 1,  B));
+#else
+   A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
+#endif
 }

 bool ac_is_sgpr_param(LLVMValueRef arg)
@@ -57,3 +62,21 @@ bool ac_is_sgpr_param(LLVMValueRef arg)
 	return AS.hasAttribute(ArgNo + 1, llvm::Attribute::ByVal) ||
 	       AS.hasAttribute(ArgNo + 1, llvm::Attribute::InReg);
 }
+
+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call)
+{
+#if HAVE_LLVM >= 0x0309
+	return LLVMGetCalledValue(call);
+#else
+	return llvm::wrap(llvm::CallSite(llvm::unwrap<llvm::Instruction>(call)).getCalledValue());
+#endif
+}
+
+bool ac_llvm_is_function(LLVMValueRef v)
+{
+#if HAVE_LLVM >= 0x0309
+	return LLVMGetValueKind(v) == LLVMFunctionValueKind;
+#else
+	return llvm::isa<llvm::Function>(llvm::unwrap(v));
+#endif
+}
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -105,17 +105,14 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
 		return "fiji";
 	case CHIP_STONEY:
 		return "stoney";
-#if HAVE_LLVM == 0x0308
-	case CHIP_POLARIS10:
-		return "tonga";
-	case CHIP_POLARIS11:
-		return "tonga";
-#else
 	case CHIP_POLARIS10:
 		return "polaris10";
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		return "polaris11";
-#endif
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
+		return "gfx900";
 	default:
 		return "";
 	}
@@ -131,7 +128,7 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool su
 	                             target,
 	                             triple,
 	                             ac_get_llvm_processor_name(family),
-	                             "+DumpCode,+vgpr-spilling",
+	                             "+DumpCode,+vgpr-spilling,-fp32-denormals,-xnack",
 	                             LLVMCodeGenLevelDefault,
 	                             LLVMRelocDefault,
 	                             LLVMCodeModelDefault);
@@ -223,3 +220,13 @@ ac_dump_module(LLVMModuleRef module)
 	fprintf(stderr, "%s", str);
 	LLVMDisposeMessage(str);
 }
+
+void
+ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
+				     const char *name, int value)
+{
+	char str[16];
+
+	snprintf(str, sizeof(str), "%i", value);
+	LLVMAddTargetDependentFunctionAttr(F, name, str);
+}
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -64,6 +64,13 @@ void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
 			    unsigned attrib_mask);
 void ac_dump_module(LLVMModuleRef module);

+LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
+bool ac_llvm_is_function(LLVMValueRef v);
+
+void
+ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
+				     const char *name, int value);
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -29,7 +29,7 @@
 #include "llvm-c/TargetMachine.h"
 #include "amd_family.h"
 #include "../vulkan/radv_descriptor_set.h"
-
+#include "ac_shader_info.h"
 #include "shader_enums.h"
 struct ac_shader_binary;
 struct ac_shader_config;
@@ -41,10 +41,12 @@ struct ac_vs_variant_key {
 	uint32_t instance_rate_inputs;
 	uint32_t as_es:1;
 	uint32_t as_ls:1;
+	uint32_t export_prim_id:1;
 };

 struct ac_tes_variant_key {
 	uint32_t as_es:1;
+	uint32_t export_prim_id:1;
 };

 struct ac_tcs_variant_key {
@@ -83,7 +85,8 @@ struct ac_userdata_info {
 enum ac_ud_index {
 	AC_UD_SCRATCH_RING_OFFSETS = 0,
 	AC_UD_PUSH_CONSTANTS = 1,
-	AC_UD_SHADER_START = 2,
+	AC_UD_INDIRECT_DESCRIPTOR_SETS = 2,
+	AC_UD_SHADER_START = 3,
 	AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
 	AC_UD_VS_BASE_VERTEX_START_INSTANCE,
 	AC_UD_VS_LS_TCS_IN_LAYOUT,
@@ -120,15 +123,15 @@ struct ac_userdata_locations {
 };

 struct ac_vs_output_info {
+	uint8_t	vs_output_param_offset[VARYING_SLOT_MAX];
 	uint8_t clip_dist_mask;
 	uint8_t cull_dist_mask;
+	uint8_t param_exports;
 	bool writes_pointsize;
 	bool writes_layer;
 	bool writes_viewport_index;
-	uint32_t prim_id_output;
-	uint32_t layer_output;
+	bool export_prim_id;
 	uint32_t export_mask;
-	unsigned param_exports;
 	unsigned pos_exports;
 };

@@ -138,10 +141,11 @@ struct ac_es_output_info {

 struct ac_shader_variant_info {
 	struct ac_userdata_locations user_sgprs_locs;
+	struct ac_shader_info info;
 	unsigned num_user_sgprs;
 	unsigned num_input_sgprs;
 	unsigned num_input_vgprs;
-
+	bool need_indirect_descriptor_sets;
 	union {
 		struct {
 			struct ac_vs_output_info outinfo;
@@ -166,7 +170,6 @@ struct ac_shader_variant_info {
 			bool force_persample;
 			bool prim_id_input;
 			bool layer_input;
-			bool uses_sample_positions;
 		} fs;
 		struct {
 			unsigned block_size[3];
@@ -178,6 +181,7 @@ struct ac_shader_variant_info {
 			unsigned invocations;
 			unsigned gsvs_vertex_size;
 			unsigned max_gsvs_emit_size;
+			bool uses_prim_id;
 		} gs;
 		struct {
 			bool uses_prim_id;
--- a/src/amd/common/ac_shader_info.c
+++ b/src/amd/common/ac_shader_info.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright © 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+#include "nir/nir.h"
+#include "ac_shader_info.h"
+#include "ac_nir_to_llvm.h"
+
+static void mark_sampler_desc(nir_variable *var, struct ac_shader_info *info)
+{
+	info->desc_set_used_mask = (1 << var->data.descriptor_set);
+}
+
+static void
+gather_intrinsic_info(nir_intrinsic_instr *instr, struct ac_shader_info *info)
+{
+	switch (instr->intrinsic) {
+	case nir_intrinsic_interp_var_at_sample:
+		info->ps.needs_sample_positions = true;
+		break;
+	case nir_intrinsic_load_draw_id:
+		info->vs.needs_draw_id = true;
+		break;
+	case nir_intrinsic_load_num_work_groups:
+		info->cs.grid_components_used = instr->num_components;
+		break;
+	case nir_intrinsic_vulkan_resource_index:
+		info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
+		break;
+	case nir_intrinsic_image_load:
+	case nir_intrinsic_image_store:
+	case nir_intrinsic_image_atomic_add:
+	case nir_intrinsic_image_atomic_min:
+	case nir_intrinsic_image_atomic_max:
+	case nir_intrinsic_image_atomic_and:
+	case nir_intrinsic_image_atomic_or:
+	case nir_intrinsic_image_atomic_xor:
+	case nir_intrinsic_image_atomic_exchange:
+	case nir_intrinsic_image_atomic_comp_swap:
+	case nir_intrinsic_image_size:
+		mark_sampler_desc(instr->variables[0]->var, info);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+gather_tex_info(nir_tex_instr *instr, struct ac_shader_info *info)
+{
+	if (instr->sampler)
+		mark_sampler_desc(instr->sampler->var, info);
+	if (instr->texture)
+		mark_sampler_desc(instr->texture->var, info);
+}
+
+static void
+gather_info_block(nir_block *block, struct ac_shader_info *info)
+{
+	nir_foreach_instr(instr, block) {
+		switch (instr->type) {
+		case nir_instr_type_intrinsic:
+			gather_intrinsic_info(nir_instr_as_intrinsic(instr), info);
+			break;
+		case nir_instr_type_tex:
+			gather_tex_info(nir_instr_as_tex(instr), info);
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+static void
+gather_info_input_decl(nir_shader *nir,
+		       const struct ac_nir_compiler_options *options,
+		       nir_variable *var,
+		       struct ac_shader_info *info)
+{
+	switch (nir->stage) {
+	case MESA_SHADER_VERTEX:
+		info->vs.has_vertex_buffers = true;
+		break;
+	default:
+		break;
+	}
+}
+
+void
+ac_nir_shader_info_pass(struct nir_shader *nir,
+			const struct ac_nir_compiler_options *options,
+			struct ac_shader_info *info)
+{
+	struct nir_function *func = (struct nir_function *)exec_list_get_head(&nir->functions);
+
+	info->needs_push_constants = true;
+	if (!options->layout)
+		info->needs_push_constants = false;
+	else if (!options->layout->push_constant_size &&
+		 !options->layout->dynamic_offset_count)
+		info->needs_push_constants = false;
+
+	nir_foreach_variable(variable, &nir->inputs)
+		gather_info_input_decl(nir, options, variable, info);
+
+	nir_foreach_block(block, func->impl) {
+		gather_info_block(block, info);
+	}
+}
--- a/src/amd/common/ac_shader_info.h
+++ b/src/amd/common/ac_shader_info.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright © 2017 Red Hat
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef AC_SHADER_INFO_H
+#define AC_SHADER_INFO_H
+
+struct nir_shader;
+struct ac_nir_compiler_options;
+
+struct ac_shader_info {
+	bool needs_push_constants;
+	uint32_t desc_set_used_mask;
+	struct {
+		bool has_vertex_buffers; /* needs vertex buffers and base/start */
+		bool needs_draw_id;
+	} vs;
+	struct {
+		bool needs_sample_positions;
+	} ps;
+	struct {
+		uint8_t grid_components_used;
+	} cs;
+};
+
+/* A NIR pass to gather all the info needed to optimise the allocation patterns
+ * for the RADV user sgprs
+ */
+void
+ac_nir_shader_info_pass(struct nir_shader *nir,
+			const struct ac_nir_compiler_options *options,
+			struct ac_shader_info *info);
+
+#endif
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -0,0 +1,220 @@
+/*
+ * Copyright © 2017 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
+ * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ */
+
+#ifndef AC_SURFACE_H
+#define AC_SURFACE_H
+
+#include <stdint.h>
+
+#include "amd_family.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Forward declarations. */
+typedef void* ADDR_HANDLE;
+
+struct amdgpu_gpu_info;
+struct radeon_info;
+
+#define RADEON_SURF_MAX_LEVELS                  15
+
+enum radeon_surf_mode {
+    RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
+    RADEON_SURF_MODE_1D = 2,
+    RADEON_SURF_MODE_2D = 3,
+};
+
+/* These are defined exactly like GB_TILE_MODEn.MICRO_TILE_MODE_NEW. */
+enum radeon_micro_mode {
+    RADEON_MICRO_MODE_DISPLAY = 0,
+    RADEON_MICRO_MODE_THIN = 1,
+    RADEON_MICRO_MODE_DEPTH = 2,
+    RADEON_MICRO_MODE_ROTATED = 3,
+};
+
+/* the first 16 bits are reserved for libdrm_radeon, don't use them */
+#define RADEON_SURF_SCANOUT                     (1 << 16)
+#define RADEON_SURF_ZBUFFER                     (1 << 17)
+#define RADEON_SURF_SBUFFER                     (1 << 18)
+#define RADEON_SURF_Z_OR_SBUFFER                (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
+/* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
+#define RADEON_SURF_HAS_TILE_MODE_INDEX         (1 << 20)
+#define RADEON_SURF_FMASK                       (1 << 21)
+#define RADEON_SURF_DISABLE_DCC                 (1 << 22)
+#define RADEON_SURF_TC_COMPATIBLE_HTILE         (1 << 23)
+#define RADEON_SURF_IMPORTED                    (1 << 24)
+#define RADEON_SURF_OPTIMIZE_FOR_SPACE          (1 << 25)
+
+struct legacy_surf_level {
+    uint64_t                    offset;
+    uint64_t                    slice_size;
+    uint64_t                    dcc_offset;
+    uint64_t                    dcc_fast_clear_size;
+    uint16_t                    nblk_x;
+    uint16_t                    nblk_y;
+    enum radeon_surf_mode       mode;
+};
+
+struct legacy_surf_layout {
+    unsigned                    bankw:4;  /* max 8 */
+    unsigned                    bankh:4;  /* max 8 */
+    unsigned                    mtilea:4; /* max 8 */
+    unsigned                    tile_split:13;         /* max 4K */
+    unsigned                    stencil_tile_split:13; /* max 4K */
+    unsigned                    pipe_config:5;      /* max 17 */
+    unsigned                    num_banks:5;        /* max 16 */
+    unsigned                    macro_tile_index:4; /* max 15 */
+
+    /* Whether the depth miptree or stencil miptree as used by the DB are
+     * adjusted from their TC compatible form to ensure depth/stencil
+     * compatibility. If either is true, the corresponding plane cannot be
+     * sampled from.
+     */
+    unsigned                    depth_adjusted:1;
+    unsigned                    stencil_adjusted:1;
+
+    struct legacy_surf_level    level[RADEON_SURF_MAX_LEVELS];
+    struct legacy_surf_level    stencil_level[RADEON_SURF_MAX_LEVELS];
+    uint8_t                     tiling_index[RADEON_SURF_MAX_LEVELS];
+    uint8_t                     stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
+};
+
+/* Same as addrlib - AddrResourceType. */
+enum gfx9_resource_type {
+    RADEON_RESOURCE_1D = 0,
+    RADEON_RESOURCE_2D,
+    RADEON_RESOURCE_3D,
+};
+
+struct gfx9_surf_flags {
+    uint16_t                    swizzle_mode; /* tile mode */
+    uint16_t                    epitch; /* (pitch - 1) or (height - 1) */
+};
+
+struct gfx9_surf_meta_flags {
+    unsigned                    rb_aligned:1;   /* optimal for RBs */
+    unsigned                    pipe_aligned:1; /* optimal for TC */
+};
+
+struct gfx9_surf_layout {
+    struct gfx9_surf_flags      surf;    /* color or depth surface */
+    struct gfx9_surf_flags      fmask;   /* not added to surf_size */
+    struct gfx9_surf_flags      stencil; /* added to surf_size, use stencil_offset */
+
+    struct gfx9_surf_meta_flags dcc;   /* metadata of color */
+    struct gfx9_surf_meta_flags htile; /* metadata of depth and stencil */
+    struct gfx9_surf_meta_flags cmask; /* metadata of fmask */
+
+    enum gfx9_resource_type     resource_type; /* 1D, 2D or 3D */
+    uint64_t                    surf_offset; /* 0 unless imported with an offset */
+    /* The size of the 2D plane containing all mipmap levels. */
+    uint64_t                    surf_slice_size;
+    uint16_t                    surf_pitch; /* in blocks */
+    uint16_t                    surf_height;
+    /* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
+    uint32_t                    offset[RADEON_SURF_MAX_LEVELS];
+
+    uint16_t                    dcc_pitch_max;  /* (mip chain pitch - 1) */
+
+    uint64_t                    stencil_offset; /* separate stencil */
+    uint64_t                    fmask_size;
+    uint64_t                    cmask_size;
+
+    uint32_t                    fmask_alignment;
+    uint32_t                    cmask_alignment;
+};
+
+struct radeon_surf {
+    /* Format properties. */
+    unsigned                    blk_w:4;
+    unsigned                    blk_h:4;
+    unsigned                    bpe:5;
+    /* Number of mipmap levels where DCC is enabled starting from level 0.
+     * Non-zero levels may be disabled due to alignment constraints, but not
+     * the first level.
+     */
+    unsigned                    num_dcc_levels:4;
+    unsigned                    is_linear:1;
+    /* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
+    unsigned                    micro_tile_mode:3;
+    uint32_t                    flags;
+
+    /* These are return values. Some of them can be set by the caller, but
+     * they will be treated as hints (e.g. bankw, bankh) and might be
+     * changed by the calculator.
+     */
+    uint64_t                    surf_size;
+    uint64_t                    dcc_size;
+    uint64_t                    htile_size;
+
+    uint32_t                    htile_slice_size;
+
+    uint32_t                    surf_alignment;
+    uint32_t                    dcc_alignment;
+    uint32_t                    htile_alignment;
+
+    union {
+        /* R600-VI return values.
+         *
+         * Some of them can be set by the caller if certain parameters are
+         * desirable. The allocator will try to obey them.
+         */
+        struct legacy_surf_layout legacy;
+
+        /* GFX9+ return values. */
+        struct gfx9_surf_layout gfx9;
+    } u;
+};
+
+struct ac_surf_info {
+	uint32_t width;
+	uint32_t height;
+	uint32_t depth;
+	uint8_t samples;
+	uint8_t levels;
+	uint16_t array_size;
+};
+
+struct ac_surf_config {
+	struct ac_surf_info info;
+	unsigned is_3d : 1;
+	unsigned is_cube : 1;
+};
+
+ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,
+			       const struct amdgpu_gpu_info *amdinfo);
+
+int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
+		       const struct ac_surf_config * config,
+		       enum radeon_surf_mode mode,
+		       struct radeon_surf *surf);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* AC_SURFACE_H */
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -93,6 +93,7 @@ enum radeon_family {
    CHIP_POLARIS11,
    CHIP_POLARIS12,
    CHIP_VEGA10,
+    CHIP_RAVEN,
    CHIP_LAST,
 };

--- a/src/amd/common/amd_kernel_code_t.h
+++ b/src/amd/common/amd_kernel_code_t.h
@@ -36,7 +36,7 @@

 // Gets bits for specified mask from specified src packed instance.
 #define AMD_HSA_BITS_GET(src, mask)                                            \
-  ((src & mask) >> mask ## _SHIFT)                                             \
+  ((src & mask) >> mask ## _SHIFT)

 /* Every amd_*_code_t has the following properties, which are composed of
 * a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),
--- a/src/amd/common/amdgpu_id.h
+++ b/src/amd/common/amdgpu_id.h
@@ -49,6 +49,7 @@ enum {
 	FAMILY_CZ,
 	FAMILY_PI,
 	FAMILY_AI,
+	FAMILY_RV,
 	FAMILY_LAST,
 };

@@ -185,4 +186,13 @@ enum {
 #define ASICREV_IS_VEGA10_P(eChipRev) \
   ((eChipRev) >= AI_VEGA10_P_A0 && (eChipRev) < AI_UNKNOWN)

+/* RV specific rev IDs */
+enum {
+   RAVEN_A0      = 0x01,
+   RAVEN_UNKNOWN = 0xFF
+};
+
+#define ASICREV_IS_RAVEN(eChipRev) \
+   ((eChipRev) >= RAVEN_A0 && (eChipRev) < RAVEN_UNKNOWN)
+
 #endif /* AMDGPU_ID_H */
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -1345,8 +1345,8 @@
 #define     V_008F14_IMG_DATA_FORMAT_RESERVED_56                    0x38
 #define     V_008F14_IMG_DATA_FORMAT_4_4                            0x39
 #define     V_008F14_IMG_DATA_FORMAT_6_5_5                          0x3A
-#define     V_008F14_IMG_DATA_S8_16                                 0x3B
-#define     V_008F14_IMG_DATA_S8_32                                 0x3C
+#define     V_008F14_IMG_DATA_FORMAT_S8_16                          0x3B
+#define     V_008F14_IMG_DATA_FORMAT_S8_32                          0x3C
 #define     V_008F14_IMG_DATA_FORMAT_8_AS_32                        0x3D
 #define     V_008F14_IMG_DATA_FORMAT_8_AS_32_32                     0x3E
 #define     V_008F14_IMG_DATA_FORMAT_32_AS_32_32_32_32              0x3F
@@ -4074,6 +4074,10 @@
 #define   S_028060_PUNCHOUT_MODE(x)                                   (((unsigned)(x) & 0x03) << 0)
 #define   G_028060_PUNCHOUT_MODE(x)                                   (((x) >> 0) & 0x03)
 #define   C_028060_PUNCHOUT_MODE                                      0xFFFFFFFC
+#define     V_028060_AUTO						0
+#define     V_028060_FORCE_ON						1
+#define     V_028060_FORCE_OFF						2
+#define     V_028060_RESERVED						3
 #define   S_028060_POPS_DRAIN_PS_ON_OVERLAP(x)                        (((unsigned)(x) & 0x1) << 2)
 #define   G_028060_POPS_DRAIN_PS_ON_OVERLAP(x)                        (((x) >> 2) & 0x1)
 #define   C_028060_POPS_DRAIN_PS_ON_OVERLAP                           0xFFFFFFFB
--- a/src/amd/common/r600d_common.h
+++ b/src/amd/common/r600d_common.h
@@ -54,6 +54,17 @@
 #define PKT3_WAIT_REG_MEM                      0x3C
 #define		WAIT_REG_MEM_EQUAL		3
 #define         WAIT_REG_MEM_MEM_SPACE(x)       (((unsigned)(x) & 0x3) << 4)
+#define PKT3_COPY_DATA			       0x40
+#define		COPY_DATA_SRC_SEL(x)		((x) & 0xf)
+#define			COPY_DATA_REG		0
+#define			COPY_DATA_MEM		1
+#define                 COPY_DATA_PERF          4
+#define                 COPY_DATA_IMM           5
+#define                 COPY_DATA_TIMESTAMP     9
+#define		COPY_DATA_DST_SEL(x)		(((unsigned)(x) & 0xf) << 8)
+#define                 COPY_DATA_MEM_ASYNC     5
+#define		COPY_DATA_COUNT_SEL		(1 << 16)
+#define		COPY_DATA_WR_CONFIRM		(1 << 20)
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47
 #define         EOP_DATA_SEL(x)                         ((x) << 29)
--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -154,6 +154,7 @@
 #define			COPY_DATA_MEM		1
 #define                 COPY_DATA_PERF          4
 #define                 COPY_DATA_IMM           5
+#define                 COPY_DATA_TIMESTAMP     9
 #define		COPY_DATA_DST_SEL(x)		(((unsigned)(x) & 0xf) << 8)
 #define		COPY_DATA_COUNT_SEL		(1 << 16)
 #define		COPY_DATA_WR_CONFIRM		(1 << 20)
@@ -169,7 +170,7 @@
 */
 /* fix CP DMA before uncommenting: */
 /*#define PKT3_EVENT_WRITE_EOS                   0x48*/ /* not on GFX9 */
-#define PKT3_RELEASE_MEM                       0x49 /* GFX9+ (any ring) or GFX8 (compute ring only) */
+#define PKT3_RELEASE_MEM                       0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
 #define PKT3_ONE_REG_WRITE                     0x57 /* not on CIK */
 #define PKT3_ACQUIRE_MEM                       0x58 /* new for CIK */
 #define PKT3_SET_CONFIG_REG                    0x68
@@ -279,6 +280,7 @@
 #define     S_500_DSL_SEL(x)		(((unsigned)(x) & 0x3) << 20)
 #define       V_500_DST_ADDR		0
 #define       V_500_GDS			1 /* program DAS to 1 as well */
+#define       V_500_NOWHERE		2 /* new for GFX9 */
 #define       V_500_DST_ADDR_TC_L2	3 /* new for CIK */
 #define     S_500_ENGINE(x)		((x) & 0x1)
 #define       V_500_ME			0
@@ -9094,5 +9096,18 @@
 #define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
 #define    CIK_SDMA_COPY_MAX_SIZE                  0x3fffe0

+enum amd_cmp_class_flags {
+	S_NAN = 1 << 0,        // Signaling NaN
+	Q_NAN = 1 << 1,        // Quiet NaN
+	N_INFINITY = 1 << 2,   // Negative infinity
+	N_NORMAL = 1 << 3,     // Negative normal
+	N_SUBNORMAL = 1 << 4,  // Negative subnormal
+	N_ZERO = 1 << 5,       // Negative zero
+	P_ZERO = 1 << 6,       // Positive zero
+	P_SUBNORMAL = 1 << 7,  // Positive subnormal
+	P_NORMAL = 1 << 8,     // Positive normal
+	P_INFINITY = 1 << 9    // Positive infinity
+};
+
 #endif /* _SID_H */

--- a/src/amd/common/sid_tables.py
+++ b/src/amd/common/sid_tables.py
@@ -110,7 +110,7 @@ class IntTable:
        [static] const typename name[] = { ... };
        to filp.
        """
-        idxs = sorted(self.idxs) + [-1]
+        idxs = sorted(self.idxs) + [len(self.table)]

        fragments = [
            ('\t/* %s */ %s' % (
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -59,8 +59,22 @@ VULKAN_SOURCES = \
 	$(VULKAN_GENERATED_FILES) \
 	$(VULKAN_FILES)

-VULKAN_LIB_DEPS =
-
+VULKAN_LIB_DEPS = \
+	libvulkan_common.la \
+	$(top_builddir)/src/vulkan/libvulkan_util.la \
+	$(top_builddir)/src/vulkan/libvulkan_wsi.la \
+	$(top_builddir)/src/amd/common/libamd_common.la \
+	$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
+	$(top_builddir)/src/compiler/nir/libnir.la \
+	$(top_builddir)/src/util/libmesautil.la \
+	$(LLVM_LIBS) \
+	$(LIBELF_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(AMDGPU_LIBS) \
+	$(LIBDRM_LIBS) \
+	$(PTHREAD_LIBS) \
+	$(DLOPEN_LIBS) \
+	-lm

 if HAVE_PLATFORM_X11
 AM_CPPFLAGS += \
@@ -70,8 +84,7 @@ AM_CPPFLAGS += \

 VULKAN_SOURCES += $(VULKAN_WSI_X11_FILES)

-# FIXME: Use pkg-config for X11-xcb ldflags.
-VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) -lX11-xcb
+VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS)
 endif


@@ -89,23 +102,6 @@ endif
 noinst_LTLIBRARIES = libvulkan_common.la
 libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)

-VULKAN_LIB_DEPS += \
-	libvulkan_common.la \
-	$(top_builddir)/src/vulkan/libvulkan_util.la \
-	$(top_builddir)/src/vulkan/libvulkan_wsi.la \
-	$(top_builddir)/src/amd/common/libamd_common.la \
-	$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
-	$(top_builddir)/src/compiler/nir/libnir.la \
-	$(top_builddir)/src/util/libmesautil.la \
-	$(LLVM_LIBS) \
-	$(LIBELF_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(AMDGPU_LIBS) \
-	$(LIBDRM_LIBS) \
-	$(PTHREAD_LIBS) \
-	$(DLOPEN_LIBS) \
-	-lm
-
 nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
 libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)

--- a/src/amd/vulkan/Makefile.sources
+++ b/src/amd/vulkan/Makefile.sources
@@ -51,6 +51,7 @@ VULKAN_FILES := \
 	radv_meta_fast_clear.c \
 	radv_meta_resolve.c \
 	radv_meta_resolve_cs.c \
+	radv_meta_resolve_fs.c \
 	radv_pass.c \
 	radv_pipeline.c \
 	radv_pipeline_cache.c \
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -37,4 +37,7 @@ enum {
 	RADV_DEBUG_NO_IBS            = 0x200,
 };

+enum {
+	RADV_PERFTEST_BATCHCHAIN     =   0x1,
+};
 #endif
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -77,6 +77,7 @@ VkResult radv_CreateDescriptorSetLayout(
 		const VkDescriptorSetLayoutBinding *binding = &pCreateInfo->pBindings[j];
 		uint32_t b = binding->binding;
 		uint32_t alignment;
+		unsigned binding_buffer_count = 0;

 		switch (binding->descriptorType) {
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -85,7 +86,7 @@ VkResult radv_CreateDescriptorSetLayout(
 			set_layout->binding[b].dynamic_offset_count = 1;
 			set_layout->dynamic_shader_stages |= binding->stageFlags;
 			set_layout->binding[b].size = 0;
-			set_layout->binding[b].buffer_count = 1;
+			binding_buffer_count = 1;
 			alignment = 1;
 			break;
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
@@ -93,7 +94,7 @@ VkResult radv_CreateDescriptorSetLayout(
 		case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
 		case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
 			set_layout->binding[b].size = 16;
-			set_layout->binding[b].buffer_count = 1;
+			binding_buffer_count = 1;
 			alignment = 16;
 			break;
 		case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
@@ -101,13 +102,13 @@ VkResult radv_CreateDescriptorSetLayout(
 		case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
 			/* main descriptor + fmask descriptor */
 			set_layout->binding[b].size = 64;
-			set_layout->binding[b].buffer_count = 1;
+			binding_buffer_count = 1;
 			alignment = 32;
 			break;
 		case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
 			/* main descriptor + fmask descriptor + sampler */
 			set_layout->binding[b].size = 96;
-			set_layout->binding[b].buffer_count = 1;
+			binding_buffer_count = 1;
 			alignment = 32;
 			break;
 		case VK_DESCRIPTOR_TYPE_SAMPLER:
@@ -150,7 +151,7 @@ VkResult radv_CreateDescriptorSetLayout(
 		}

 		set_layout->size += binding->descriptorCount * set_layout->binding[b].size;
-		buffer_count += binding->descriptorCount * set_layout->binding[b].buffer_count;
+		buffer_count += binding->descriptorCount * binding_buffer_count;
 		dynamic_offset_count += binding->descriptorCount *
 			set_layout->binding[b].dynamic_offset_count;
 		set_layout->shader_stages |= binding->stageFlags;
@@ -261,26 +262,29 @@ radv_descriptor_set_create(struct radv_device *device,
 			   struct radv_descriptor_set **out_set)
 {
 	struct radv_descriptor_set *set;
-	unsigned mem_size = sizeof(struct radv_descriptor_set) +
+	unsigned range_offset = sizeof(struct radv_descriptor_set) +
 		sizeof(struct radeon_winsys_bo *) * layout->buffer_count;
-	set = vk_alloc2(&device->alloc, NULL, mem_size, 8,
-			  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+	unsigned mem_size = range_offset +
+		sizeof(struct radv_descriptor_range) * layout->dynamic_offset_count;

-	if (!set)
-		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+	if (pool->host_memory_base) {
+		if (pool->host_memory_end - pool->host_memory_ptr < mem_size)
+			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
+
+		set = (struct radv_descriptor_set*)pool->host_memory_ptr;
+		pool->host_memory_ptr += mem_size;
+	} else {
+		set = vk_alloc2(&device->alloc, NULL, mem_size, 8,
+		                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+
+		if (!set)
+			return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+	}

 	memset(set, 0, mem_size);

 	if (layout->dynamic_offset_count) {
-		unsigned size = sizeof(struct radv_descriptor_range) *
-		                layout->dynamic_offset_count;
-		set->dynamic_descriptors = vk_alloc2(&device->alloc, NULL, size, 8,
-			                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-
-		if (!set->dynamic_descriptors) {
-			vk_free2(&device->alloc, NULL, set);
-			return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
-		}
+		set->dynamic_descriptors = (struct radv_descriptor_range*)((uint8_t*)set + range_offset);
 	}

 	set->layout = layout;
@@ -297,10 +301,12 @@ radv_descriptor_set_create(struct radv_device *device,
 			set->va = device->ws->buffer_get_va(set->bo) + pool->current_offset;
 			pool->current_offset += layout_size;
 			list_addtail(&set->vram_list, &pool->vram_list);
-		} else {
+		} else if (!pool->host_memory_base) {
 			uint64_t offset = 0;
 			struct list_head *prev = &pool->vram_list;
 			struct radv_descriptor_set *cur;
+
+			assert(!pool->host_memory_base);
 			LIST_FOR_EACH_ENTRY(cur, &pool->vram_list, vram_list) {
 				uint64_t start = (uint8_t*)cur->mapped_ptr - pool->mapped_ptr;
 				if (start - offset >= layout_size)
@@ -319,7 +325,8 @@ radv_descriptor_set_create(struct radv_device *device,
 			set->mapped_ptr = (uint32_t*)(pool->mapped_ptr + offset);
 			set->va = device->ws->buffer_get_va(set->bo) + offset;
 			list_add(&set->vram_list, prev);
-		}
+		} else
+			return vk_error(VK_ERROR_OUT_OF_POOL_MEMORY_KHR);
 	}

 	for (unsigned i = 0; i < layout->binding_count; ++i) {
@@ -348,10 +355,10 @@ radv_descriptor_set_destroy(struct radv_device *device,
 			    struct radv_descriptor_set *set,
 			    bool free_bo)
 {
+	assert(!pool->host_memory_base);
+
 	if (free_bo && set->size)
 		list_del(&set->vram_list);
-	if (set->dynamic_descriptors)
-		vk_free2(&device->alloc, NULL, set->dynamic_descriptors);
 	vk_free2(&device->alloc, NULL, set);
 }

@@ -364,18 +371,17 @@ VkResult radv_CreateDescriptorPool(
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	struct radv_descriptor_pool *pool;
 	int size = sizeof(struct radv_descriptor_pool);
-	uint64_t bo_size = 0;
-	pool = vk_alloc2(&device->alloc, pAllocator, size, 8,
-			   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
-	if (!pool)
-		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+	uint64_t bo_size = 0, bo_count = 0, range_count = 0;

-	memset(pool, 0, sizeof(*pool));

 	for (unsigned i = 0; i < pCreateInfo->poolSizeCount; ++i) {
+		if (pCreateInfo->pPoolSizes[i].type != VK_DESCRIPTOR_TYPE_SAMPLER)
+			bo_count += pCreateInfo->pPoolSizes[i].descriptorCount;
+
 		switch(pCreateInfo->pPoolSizes[i].type) {
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+			range_count += pCreateInfo->pPoolSizes[i].descriptorCount;
 			break;
 		case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
 		case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
@@ -399,6 +405,26 @@ VkResult radv_CreateDescriptorPool(
 		}
 	}

+	if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
+		uint64_t host_size = pCreateInfo->maxSets * sizeof(struct radv_descriptor_set);
+		host_size += sizeof(struct radeon_winsys_bo*) * bo_count;
+		host_size += sizeof(struct radv_descriptor_range) * range_count;
+		size += host_size;
+	}
+
+	pool = vk_alloc2(&device->alloc, pAllocator, size, 8,
+	                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
+	if (!pool)
+		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+	memset(pool, 0, sizeof(*pool));
+
+	if (!(pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT)) {
+		pool->host_memory_base = (uint8_t*)pool + sizeof(struct radv_descriptor_pool);
+		pool->host_memory_ptr = pool->host_memory_base;
+		pool->host_memory_end = (uint8_t*)pool + size;
+	}
+
 	if (bo_size) {
 		pool->bo = device->ws->buffer_create(device->ws, bo_size,
 							32, RADEON_DOMAIN_VRAM, 0);
@@ -422,9 +448,11 @@ void radv_DestroyDescriptorPool(
 	if (!pool)
 		return;

-	list_for_each_entry_safe(struct radv_descriptor_set, set,
-				 &pool->vram_list, vram_list) {
-		radv_descriptor_set_destroy(device, pool, set, false);
+	if (!pool->host_memory_base) {
+		list_for_each_entry_safe(struct radv_descriptor_set, set,
+		                         &pool->vram_list, vram_list) {
+			radv_descriptor_set_destroy(device, pool, set, false);
+		}
 	}

 	if (pool->bo)
@@ -440,14 +468,17 @@ VkResult radv_ResetDescriptorPool(
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	RADV_FROM_HANDLE(radv_descriptor_pool, pool, descriptorPool);

-	list_for_each_entry_safe(struct radv_descriptor_set, set,
-				 &pool->vram_list, vram_list) {
-		radv_descriptor_set_destroy(device, pool, set, false);
+	if (!pool->host_memory_base) {
+		list_for_each_entry_safe(struct radv_descriptor_set, set,
+		                         &pool->vram_list, vram_list) {
+			radv_descriptor_set_destroy(device, pool, set, false);
+		}
 	}

 	list_inithead(&pool->vram_list);

 	pool->current_offset = 0;
+	pool->host_memory_ptr = pool->host_memory_base;

 	return VK_SUCCESS;
 }
@@ -496,7 +527,7 @@ VkResult radv_FreeDescriptorSets(
 	for (uint32_t i = 0; i < count; i++) {
 		RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);

-		if (set)
+		if (set && !pool->host_memory_base)
 			radv_descriptor_set_destroy(device, pool, set, true);
 	}
 	return VK_SUCCESS;
@@ -639,7 +670,7 @@ void radv_update_descriptor_sets(
 		ptr += binding_layout->offset / 4;
 		ptr += binding_layout->size * writeset->dstArrayElement / 4;
 		buffer_list += binding_layout->buffer_offset;
-		buffer_list += binding_layout->buffer_count * writeset->dstArrayElement;
+		buffer_list += writeset->dstArrayElement;
 		for (j = 0; j < writeset->descriptorCount; ++j) {
 			switch(writeset->descriptorType) {
 			case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
@@ -690,7 +721,7 @@ void radv_update_descriptor_sets(
 				break;
 			}
 			ptr += binding_layout->size / 4;
-			buffer_list += binding_layout->buffer_count;
+			++buffer_list;
 		}

 	}
@@ -734,8 +765,7 @@ VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice _device,
 		const VkDescriptorUpdateTemplateEntryKHR *entry = &pCreateInfo->pDescriptorUpdateEntries[i];
 		const struct radv_descriptor_set_binding_layout *binding_layout =
 			set_layout->binding + entry->dstBinding;
-		const uint32_t buffer_offset = binding_layout->buffer_offset +
-			binding_layout->buffer_count * entry->dstArrayElement;
+		const uint32_t buffer_offset = binding_layout->buffer_offset + entry->dstArrayElement;
 		const uint32_t *immutable_samplers = NULL;
 		uint32_t dst_offset;
 		uint32_t dst_stride;
@@ -775,7 +805,6 @@ VkResult radv_CreateDescriptorUpdateTemplateKHR(VkDevice _device,
 			.dst_offset = dst_offset,
 			.dst_stride = dst_stride,
 			.buffer_offset = buffer_offset,
-			.buffer_count = binding_layout->buffer_count,
 			.has_sampler = !binding_layout->immutable_samplers_offset,
 			.immutable_samplers = immutable_samplers
 		};
@@ -859,7 +888,7 @@ void radv_update_descriptor_set_with_template(struct radv_device *device,
 			}
 		        pSrc += templ->entry[i].src_stride;
 			pDst += templ->entry[i].dst_stride;
-			buffer_list += templ->entry[i].buffer_count;
+			++buffer_list;
 		}
 	}
 }
--- a/src/amd/vulkan/radv_descriptor_set.h
+++ b/src/amd/vulkan/radv_descriptor_set.h
@@ -26,7 +26,7 @@

 #include <vulkan/vulkan.h>

-#define MAX_SETS         8
+#define MAX_SETS         32

 struct radv_descriptor_set_binding_layout {
   VkDescriptorType type;
@@ -38,10 +38,9 @@ struct radv_descriptor_set_binding_layout {
   uint32_t buffer_offset;
   uint16_t dynamic_offset_offset;

+   uint16_t dynamic_offset_count;
   /* redundant with the type, each for a single array element */
   uint32_t size;
-   uint32_t buffer_count;
-   uint16_t dynamic_offset_count;

   /* Offset in the radv_descriptor_set_layout of the immutable samplers, or 0
    * if there are no immutable samplers. */
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -33,7 +33,7 @@
 #include "radv_cs.h"
 #include "util/disk_cache.h"
 #include "util/strtod.h"
-#include "util/vk_util.h"
+#include "vk_util.h"
 #include <xf86drm.h>
 #include <amdgpu.h>
 #include <amdgpu_drm.h>
@@ -42,6 +42,7 @@
 #include "ac_llvm_util.h"
 #include "vk_format.h"
 #include "sid.h"
+#include "gfx9d.h"
 #include "util/debug.h"

 static int
@@ -61,6 +62,15 @@ radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
 	return 0;
 }

+static void
+radv_get_device_uuid(drmDevicePtr device, void *uuid) {
+	memset(uuid, 0, VK_UUID_SIZE);
+	memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
+	memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
+	memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
+	memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
+}
+
 static const VkExtensionProperties instance_extensions[] = {
 	{
 		.extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
@@ -88,6 +98,10 @@ static const VkExtensionProperties instance_extensions[] = {
 		.extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
 		.specVersion = 1,
 	},
+	{
+		.extensionName = VK_KHX_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
+		.specVersion = 1,
+	},
 };

 static const VkExtensionProperties common_device_extensions[] = {
@@ -127,6 +141,14 @@ static const VkExtensionProperties common_device_extensions[] = {
 		.extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
 		.specVersion = 1,
 	},
+	{
+		.extensionName = VK_KHX_EXTERNAL_MEMORY_EXTENSION_NAME,
+		.specVersion = 1,
+	},
+	{
+		.extensionName = VK_KHX_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
+		.specVersion = 1,
+	},
 };

 static VkResult
@@ -187,11 +209,40 @@ is_extension_enabled(const VkExtensionProperties *extensions,
 	return false;
 }

+static const char *
+get_chip_name(enum radeon_family family)
+{
+	switch (family) {
+	case CHIP_TAHITI: return "AMD RADV TAHITI";
+	case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
+	case CHIP_VERDE: return "AMD RADV CAPE VERDE";
+	case CHIP_OLAND: return "AMD RADV OLAND";
+	case CHIP_HAINAN: return "AMD RADV HAINAN";
+	case CHIP_BONAIRE: return "AMD RADV BONAIRE";
+	case CHIP_KAVERI: return "AMD RADV KAVERI";
+	case CHIP_KABINI: return "AMD RADV KABINI";
+	case CHIP_HAWAII: return "AMD RADV HAWAII";
+	case CHIP_MULLINS: return "AMD RADV MULLINS";
+	case CHIP_TONGA: return "AMD RADV TONGA";
+	case CHIP_ICELAND: return "AMD RADV ICELAND";
+	case CHIP_CARRIZO: return "AMD RADV CARRIZO";
+	case CHIP_FIJI: return "AMD RADV FIJI";
+	case CHIP_POLARIS10: return "AMD RADV POLARIS10";
+	case CHIP_POLARIS11: return "AMD RADV POLARIS11";
+	case CHIP_POLARIS12: return "AMD RADV POLARIS12";
+	case CHIP_STONEY: return "AMD RADV STONEY";
+	case CHIP_VEGA10: return "AMD RADV VEGA";
+	case CHIP_RAVEN: return "AMD RADV RAVEN";
+	default: return "AMD RADV unknown";
+	}
+}
+
 static VkResult
 radv_physical_device_init(struct radv_physical_device *device,
 			  struct radv_instance *instance,
-			  const char *path)
+			  drmDevicePtr drm_device)
 {
+	const char *path = drm_device->nodes[DRM_NODE_RENDER];
 	VkResult result;
 	drmVersionPtr version;
 	int fd;
@@ -219,7 +270,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 	assert(strlen(path) < ARRAY_SIZE(device->path));
 	strncpy(device->path, path, ARRAY_SIZE(device->path));

-	device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
+	device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
+					       instance->perftest_flags);
 	if (!device->ws) {
 		result = VK_ERROR_INCOMPATIBLE_DRIVER;
 		goto fail;
@@ -249,7 +301,15 @@ radv_physical_device_init(struct radv_physical_device *device,
 		goto fail;

 	fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
-	device->name = device->rad_info.name;
+	device->name = get_chip_name(device->rad_info.family);
+
+	radv_get_device_uuid(drm_device, device->device_uuid);
+
+	if (device->rad_info.family == CHIP_STONEY ||
+	    device->rad_info.chip_class >= GFX9) {
+		device->has_rbplus = true;
+		device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
+	}

 	return VK_SUCCESS;

@@ -267,7 +327,6 @@ radv_physical_device_finish(struct radv_physical_device *device)
 	close(device->local_fd);
 }

-
 static void *
 default_alloc_func(void *pUserData, size_t size, size_t align,
                   VkSystemAllocationScope allocationScope)
@@ -309,6 +368,11 @@ static const struct debug_control radv_debug_options[] = {
 	{NULL, 0}
 };

+static const struct debug_control radv_perftest_options[] = {
+	{"batchchain", RADV_PERFTEST_BATCHCHAIN},
+	{NULL, 0}
+};
+
 VkResult radv_CreateInstance(
 	const VkInstanceCreateInfo*                 pCreateInfo,
 	const VkAllocationCallbacks*                pAllocator,
@@ -366,6 +430,9 @@ VkResult radv_CreateInstance(
 	instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
 						   radv_debug_options);

+	instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
+						   radv_perftest_options);
+
 	*pInstance = radv_instance_to_handle(instance);

 	return VK_SUCCESS;
@@ -401,7 +468,7 @@ radv_enumerate_devices(struct radv_instance *instance)

 	instance->physicalDeviceCount = 0;

-	max_devices = drmGetDevices2(0, devices, sizeof(devices));
+	max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
 	if (max_devices < 1)
 		return VK_ERROR_INCOMPATIBLE_DRIVER;

@@ -413,13 +480,15 @@ radv_enumerate_devices(struct radv_instance *instance)
 			result = radv_physical_device_init(instance->physicalDevices +
 			                                   instance->physicalDeviceCount,
 			                                   instance,
-			                                   devices[i]->nodes[DRM_NODE_RENDER]);
+			                                   devices[i]);
 			if (result == VK_SUCCESS)
 				++instance->physicalDeviceCount;
 			else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
-				return result;
+				break;
 		}
 	}
+	drmFreeDevices(devices, max_devices);
+
 	return result;
 }

@@ -454,8 +523,8 @@ void radv_GetPhysicalDeviceFeatures(
 	VkPhysicalDevice                            physicalDevice,
 	VkPhysicalDeviceFeatures*                   pFeatures)
 {
-	//   RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
-
+	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
+	bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
 	memset(pFeatures, 0, sizeof(*pFeatures));

 	*pFeatures = (VkPhysicalDeviceFeatures) {
@@ -463,8 +532,8 @@ void radv_GetPhysicalDeviceFeatures(
 		.fullDrawIndexUint32                      = true,
 		.imageCubeArray                           = true,
 		.independentBlend                         = true,
-		.geometryShader                           = true,
-		.tessellationShader                       = true,
+		.geometryShader                           = !is_gfx9,
+		.tessellationShader                       = !is_gfx9,
 		.sampleRateShading                        = false,
 		.dualSrcBlend                             = true,
 		.logicOp                                  = true,
@@ -514,28 +583,6 @@ void radv_GetPhysicalDeviceFeatures2KHR(
 	return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
 }

-static uint32_t radv_get_driver_version()
-{
-	const char *minor_string = strchr(VERSION, '.');
-	const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
-	int major = atoi(VERSION);
-	int minor = minor_string ? atoi(minor_string + 1) : 0;
-	int patch = patch_string ? atoi(patch_string + 1) : 0;
-	if (strstr(VERSION, "devel")) {
-		if (patch == 0) {
-			patch = 99;
-			if (minor == 0) {
-				minor = 99;
-				--major;
-			} else
-				--minor;
-		} else
-			--patch;
-	}
-	uint32_t version = VK_MAKE_VERSION(major, minor, patch);
-	return version;
-}
-
 void radv_GetPhysicalDeviceProperties(
 	VkPhysicalDevice                            physicalDevice,
 	VkPhysicalDeviceProperties*                 pProperties)
@@ -652,7 +699,7 @@ void radv_GetPhysicalDeviceProperties(
 		.sampledImageStencilSampleCounts          = sample_counts,
 		.storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
 		.maxSampleMaskWords                       = 1,
-		.timestampComputeAndGraphics              = false,
+		.timestampComputeAndGraphics              = true,
 		.timestampPeriod                          = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
 		.maxClipDistances                         = 8,
 		.maxCullDistances                         = 8,
@@ -671,10 +718,10 @@ void radv_GetPhysicalDeviceProperties(

 	*pProperties = (VkPhysicalDeviceProperties) {
 		.apiVersion = VK_MAKE_VERSION(1, 0, 42),
-		.driverVersion = radv_get_driver_version(),
+		.driverVersion = vk_get_driver_version(),
 		.vendorID = 0x1002,
 		.deviceID = pdevice->rad_info.pci_id,
-		.deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
+		.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
 		.limits = limits,
 		.sparseProperties = {0},
 	};
@@ -687,6 +734,7 @@ void radv_GetPhysicalDeviceProperties2KHR(
 	VkPhysicalDevice                            physicalDevice,
 	VkPhysicalDeviceProperties2KHR             *pProperties)
 {
+	RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
 	radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);

 	vk_foreach_struct(ext, pProperties->pNext) {
@@ -697,6 +745,13 @@ void radv_GetPhysicalDeviceProperties2KHR(
 			properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
+			VkPhysicalDeviceIDPropertiesKHX *properties = (VkPhysicalDeviceIDPropertiesKHX*)ext;
+			radv_device_get_cache_uuid(0, properties->driverUUID);
+			memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
+			properties->deviceLUIDValid = false;
+			break;
+		}
 		default:
 			break;
 		}
@@ -710,7 +765,7 @@ static void radv_get_physical_device_queue_family_properties(
 {
 	int num_queue_families = 1;
 	int idx;
-	if (pdevice->rad_info.compute_rings > 0 &&
+	if (pdevice->rad_info.num_compute_rings > 0 &&
 	    pdevice->rad_info.chip_class >= CIK &&
 	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
 		num_queue_families++;
@@ -737,7 +792,7 @@ static void radv_get_physical_device_queue_family_properties(
 		idx++;
 	}

-	if (pdevice->rad_info.compute_rings > 0 &&
+	if (pdevice->rad_info.num_compute_rings > 0 &&
 	    pdevice->rad_info.chip_class >= CIK &&
 	    !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
 		if (*pCount > idx) {
@@ -745,7 +800,7 @@ static void radv_get_physical_device_queue_family_properties(
 				.queueFlags = VK_QUEUE_COMPUTE_BIT |
 				              VK_QUEUE_TRANSFER_BIT |
 				              VK_QUEUE_SPARSE_BINDING_BIT,
-				.queueCount = pdevice->rad_info.compute_rings,
+				.queueCount = pdevice->rad_info.num_compute_rings,
 				.timestampValidBits = 64,
 				.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
 			};
@@ -829,11 +884,11 @@ void radv_GetPhysicalDeviceMemoryProperties(
 	pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
 	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
 		.size = physical_device->rad_info.vram_size -
-				physical_device->rad_info.visible_vram_size,
+				physical_device->rad_info.vram_vis_size,
 		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 	};
 	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.visible_vram_size,
+		.size = physical_device->rad_info.vram_vis_size,
 		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 	};
 	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
@@ -915,6 +970,9 @@ radv_device_init_gs_info(struct radv_device *device)
 	case CHIP_FIJI:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGA10:
+	case CHIP_RAVEN:
 		device->gs_table_depth = 32;
 		return;
 	default:
@@ -1038,6 +1096,7 @@ VkResult radv_CreateDevice(
 		case RADV_QUEUE_COMPUTE:
 			si_cs_emit_cache_flush(device->flush_cs[family],
 			                       device->physical_device->rad_info.chip_class,
+					       NULL, 0,
 			                       family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
 			                       RADV_CMD_FLAG_INV_ICACHE |
 			                       RADV_CMD_FLAG_INV_SMEM_L1 |
@@ -1046,6 +1105,23 @@ VkResult radv_CreateDevice(
 			break;
 		}
 		device->ws->cs_finalize(device->flush_cs[family]);
+
+		device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
+		switch (family) {
+		case RADV_QUEUE_GENERAL:
+		case RADV_QUEUE_COMPUTE:
+			si_cs_emit_cache_flush(device->flush_shader_cs[family],
+			                       device->physical_device->rad_info.chip_class,
+					       NULL, 0,
+			                       family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
+					       family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
+			                       RADV_CMD_FLAG_INV_ICACHE |
+			                       RADV_CMD_FLAG_INV_SMEM_L1 |
+			                       RADV_CMD_FLAG_INV_VMEM_L1 |
+			                       RADV_CMD_FLAG_INV_GLOBAL_L2);
+			break;
+		}
+		device->ws->cs_finalize(device->flush_shader_cs[family]);
 	}

 	if (getenv("RADV_TRACE_FILE")) {
@@ -1121,6 +1197,8 @@ void radv_DestroyDevice(
 			device->ws->cs_destroy(device->empty_cs[i]);
 		if (device->flush_cs[i])
 			device->ws->cs_destroy(device->flush_cs[i]);
+		if (device->flush_shader_cs[i])
+			device->ws->cs_destroy(device->flush_shader_cs[i]);
 	}
 	radv_device_finish_meta(device);

@@ -1397,11 +1475,10 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
 		max_offchip_buffers = MIN2(max_offchip_buffers, 126);
 		break;
 	case CIK:
-		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
-		break;
 	case VI:
+	case GFX9:
 	default:
-		max_offchip_buffers = MIN2(max_offchip_buffers, 512);
+		max_offchip_buffers = MIN2(max_offchip_buffers, 508);
 		break;
 	}

@@ -1638,6 +1715,10 @@ radv_get_preamble_cs(struct radv_queue *queue,
 						       S_030938_SIZE(tess_factor_ring_size / 4));
 				radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
 						       tf_va >> 8);
+				if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
+					radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
+							       tf_va >> 40);
+				}
 				radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
 			} else {
 				radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
@@ -1681,6 +1762,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		if (!i) {
 			si_cs_emit_cache_flush(cs,
 			                       queue->device->physical_device->rad_info.chip_class,
+					       NULL, 0,
 			                       queue->queue_family_index == RING_COMPUTE &&
 			                         queue->device->physical_device->rad_info.chip_class >= CIK,
 			                       RADV_CMD_FLAG_INV_ICACHE |
@@ -1822,7 +1904,7 @@ VkResult radv_QueueSubmit(

 	for (uint32_t i = 0; i < submitCount; i++) {
 		struct radeon_winsys_cs **cs_array;
-		bool do_flush = !i;
+		bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
 		bool can_patch = !do_flush;
 		uint32_t advance;

@@ -1849,7 +1931,9 @@ VkResult radv_QueueSubmit(
 					        (pSubmits[i].commandBufferCount + do_flush));

 		if(do_flush)
-			cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
+			cs_array[0] = pSubmits[i].waitSemaphoreCount ?
+				queue->device->flush_shader_cs[queue->queue_family_index] :
+				queue->device->flush_cs[queue->queue_family_index];

 		for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
 			RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
@@ -1992,7 +2076,7 @@ VkResult radv_AllocateMemory(
 	VkResult result;
 	enum radeon_bo_domain domain;
 	uint32_t flags = 0;
-	const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
+
 	assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);

 	if (pAllocateInfo->allocationSize == 0) {
@@ -2001,15 +2085,10 @@ VkResult radv_AllocateMemory(
 		return VK_SUCCESS;
 	}

-	vk_foreach_struct(ext, pAllocateInfo->pNext) {
-		switch (ext->sType) {
-		case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
-			dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
-			break;
-		default:
-			break;
-		}
-	}
+	const VkImportMemoryFdInfoKHX *import_info =
+		vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHX);
+	const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info =
+		vk_find_struct_const(pAllocateInfo->pNext, DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV);

 	mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
 			  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -2024,6 +2103,18 @@ VkResult radv_AllocateMemory(
 		mem->buffer = NULL;
 	}

+	if (import_info) {
+		assert(import_info->handleType ==
+		       VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
+		mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
+						     NULL, NULL);
+		if (!mem->bo) {
+			result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
+			goto fail;
+		} else
+			goto out_success;
+	}
+
 	uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
 	if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
 	    pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
@@ -2047,7 +2138,7 @@ VkResult radv_AllocateMemory(
 		goto fail;
 	}
 	mem->type_index = pAllocateInfo->memoryTypeIndex;
-
+out_success:
 	*pMem = radv_device_memory_to_handle(mem);

 	return VK_SUCCESS;
@@ -2583,9 +2674,9 @@ static inline unsigned
 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
 {
 	if (stencil)
-		return image->surface.stencil_tiling_index[level];
+		return image->surface.u.legacy.stencil_tiling_index[level];
 	else
-		return image->surface.tiling_index[level];
+		return image->surface.u.legacy.tiling_index[level];
 }

 static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
@@ -2601,24 +2692,68 @@ radv_initialise_color_surface(struct radv_device *device,
 	const struct vk_format_description *desc;
 	unsigned ntype, format, swap, endian;
 	unsigned blend_clamp = 0, blend_bypass = 0;
-	unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
 	uint64_t va;
 	const struct radeon_surf *surf = &iview->image->surface;
-	const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];

 	desc = vk_format_description(iview->vk_format);

 	memset(cb, 0, sizeof(*cb));

+	/* Intensity is implemented as Red, so treat it that way. */
+	cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
+
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
-	va += level_info->offset;
+
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		struct gfx9_surf_meta_flags meta;
+		if (iview->image->dcc_offset)
+			meta = iview->image->surface.u.gfx9.dcc;
+		else
+			meta = iview->image->surface.u.gfx9.cmask;
+
+		cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
+			S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
+			S_028C74_RB_ALIGNED(meta.rb_aligned) |
+			S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
+
+		va += iview->image->surface.u.gfx9.surf_offset >> 8;
+	} else {
+		const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
+		unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
+
+		va += level_info->offset;
+
+		pitch_tile_max = level_info->nblk_x / 8 - 1;
+		slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
+		tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
+
+		cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
+		cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
+		cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
+
+		cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
+		cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
+
+		if (iview->image->fmask.size) {
+			if (device->physical_device->rad_info.chip_class >= CIK)
+				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
+			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
+			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
+		} else {
+			/* This must be set for fast clear to work without FMASK. */
+			if (device->physical_device->rad_info.chip_class >= CIK)
+				cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
+			cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
+			cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
+		}
+	}
+
 	cb->cb_color_base = va >> 8;

 	/* CMASK variables */
 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->cmask.offset;
 	cb->cb_color_cmask = va >> 8;
-	cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;

 	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
 	va += iview->image->dcc_offset;
@@ -2628,20 +2763,8 @@ radv_initialise_color_surface(struct radv_device *device,
 	cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
 		S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);

-	cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
-	pitch_tile_max = level_info->nblk_x / 8 - 1;
-	slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
-	tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
-
-	cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
-	cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
-
-	/* Intensity is implemented as Red, so treat it that way. */
-	cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
-		S_028C74_TILE_MODE_INDEX(tile_mode_index);
-
-	if (iview->image->samples > 1) {
-		unsigned log_samples = util_logbase2(iview->image->samples);
+	if (iview->image->info.samples > 1) {
+		unsigned log_samples = util_logbase2(iview->image->info.samples);

 		cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
 			S_028C74_NUM_FRAGMENTS(log_samples);
@@ -2649,18 +2772,9 @@ radv_initialise_color_surface(struct radv_device *device,

 	if (iview->image->fmask.size) {
 		va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
-		if (device->physical_device->rad_info.chip_class >= CIK)
-			cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
-		cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
 		cb->cb_color_fmask = va >> 8;
-		cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
 	} else {
-		/* This must be set for fast clear to work without FMASK. */
-		if (device->physical_device->rad_info.chip_class >= CIK)
-			cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
-		cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
 		cb->cb_color_fmask = cb->cb_color_base;
-		cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
 	}

 	ntype = radv_translate_color_numformat(iview->vk_format,
@@ -2705,7 +2819,7 @@ radv_initialise_color_surface(struct radv_device *device,
 				    format != V_028C70_COLOR_24_8) |
 		S_028C70_NUMBER_TYPE(ntype) |
 		S_028C70_ENDIAN(endian);
-	if (iview->image->samples > 1)
+	if (iview->image->info.samples > 1)
 		if (iview->image->fmask.size)
 			cb->cb_color_info |= S_028C70_COMPRESSION(1);

@@ -2713,12 +2827,12 @@ radv_initialise_color_surface(struct radv_device *device,
 	    !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
 		cb->cb_color_info |= S_028C70_FAST_CLEAR(1);

-	if (iview->image->surface.dcc_size && level_info->dcc_enabled)
+	if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
 		cb->cb_color_info |= S_028C70_DCC_ENABLE(1);

 	if (device->physical_device->rad_info.chip_class >= VI) {
 		unsigned max_uncompressed_block_size = 2;
-		if (iview->image->samples > 1) {
+		if (iview->image->info.samples > 1) {
 			if (iview->image->surface.bpe == 1)
 				max_uncompressed_block_size = 0;
 			else if (iview->image->surface.bpe == 2)
@@ -2732,9 +2846,24 @@ radv_initialise_color_surface(struct radv_device *device,
 	/* This must be set for fast clear to work without FMASK. */
 	if (!iview->image->fmask.size &&
 	    device->physical_device->rad_info.chip_class == SI) {
-		unsigned bankh = util_logbase2(iview->image->surface.bankh);
+		unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
 		cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
 	}
+
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		uint32_t max_slice = radv_surface_layer_count(iview);
+		unsigned mip0_depth = iview->base_layer + max_slice - 1;
+
+		cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
+		cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
+			S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
+		cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
+			S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
+			S_028C68_MAX_MIP(iview->image->info.levels);
+
+		cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
+
+	}
 }

 static void
@@ -2743,9 +2872,8 @@ radv_initialise_ds_surface(struct radv_device *device,
 			   struct radv_image_view *iview)
 {
 	unsigned level = iview->base_mip;
-	unsigned format;
+	unsigned format, stencil_format;
 	uint64_t va, s_offs, z_offs;
-	const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
 	bool stencil_only = false;
 	memset(ds, 0, sizeof(*ds));
 	switch (iview->vk_format) {
@@ -2767,98 +2895,121 @@ radv_initialise_ds_surface(struct radv_device *device,
 		break;
 	case VK_FORMAT_S8_UINT:
 		stencil_only = true;
-		level_info = &iview->image->surface.stencil_level[level];
 		break;
 	default:
 		break;
 	}

 	format = radv_translate_dbformat(iview->vk_format);
-
-	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
-	s_offs = z_offs = va;
-	z_offs += iview->image->surface.level[level].offset;
-	s_offs += iview->image->surface.stencil_level[level].offset;
+	stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
+		V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;

 	uint32_t max_slice = radv_surface_layer_count(iview);
 	ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
 		S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
-	ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
-	ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);

-	if (iview->image->samples > 1)
-		ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples));
+	ds->db_htile_data_base = 0;
+	ds->db_htile_surface = 0;

-	if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
-		ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
-	else
-		ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
+	va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
+	s_offs = z_offs = va;

-	if (device->physical_device->rad_info.chip_class >= CIK) {
-		struct radeon_info *info = &device->physical_device->rad_info;
-		unsigned tiling_index = iview->image->surface.tiling_index[level];
-		unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
-		unsigned macro_index = iview->image->surface.macro_tile_index;
-		unsigned tile_mode = info->si_tile_mode_array[tiling_index];
-		unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
-		unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		assert(iview->image->surface.u.gfx9.surf_offset == 0);
+		s_offs += iview->image->surface.u.gfx9.stencil_offset;
+
+		ds->db_z_info = S_028038_FORMAT(format) |
+			S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
+			S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
+			S_028038_MAXMIP(iview->image->info.levels - 1);
+		ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
+			S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
+
+		ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
+		ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
+		ds->db_depth_view |= S_028008_MIPID(level);
+
+		ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
+			S_02801C_Y_MAX(iview->image->info.height - 1);
+
+		/* Only use HTILE for the first level. */
+		if (iview->image->surface.htile_size && !level) {
+			ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
+
+			if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
+				/* Use all of the htile_buffer for depth if there's no stencil. */
+				ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
+			va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
+				iview->image->htile_offset;
+			ds->db_htile_data_base = va >> 8;
+			ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
+				S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
+				S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
+		}
+	} else {
+		const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];

 		if (stencil_only)
-			tile_mode = stencil_tile_mode;
+			level_info = &iview->image->surface.u.legacy.stencil_level[level];

-		ds->db_depth_info |=
-			S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
-			S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
-			S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
-			S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
-			S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
-			S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
-		ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
-		ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
-	} else {
-		unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
-		ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
-		tile_mode_index = si_tile_mode_index(iview->image, level, true);
-		ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
-	}
+		z_offs += iview->image->surface.u.legacy.level[level].offset;
+		s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;

-	if (iview->image->surface.htile_size && !level) {
-		ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
-			S_028040_ALLOW_EXPCLEAR(1);
+		ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
+		ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
+		ds->db_stencil_info = S_028044_FORMAT(stencil_format);

-		if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
-			/* Workaround: For a not yet understood reason, the
-			 * combination of MSAA, fast stencil clear and stencil
-			 * decompress messes with subsequent stencil buffer
-			 * uses. Problem was reproduced on Verde, Bonaire,
-			 * Tonga, and Carrizo.
-			 *
-			 * Disabling EXPCLEAR works around the problem.
-			 *
-			 * Check piglit's arb_texture_multisample-stencil-clear
-			 * test if you want to try changing this.
-			 */
-			if (iview->image->samples <= 1)
-				ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
-		} else
-			/* Use all of the htile_buffer for depth if there's no stencil. */
-			ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
+		if (iview->image->info.samples > 1)
+			ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));

-		va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
-		     iview->image->htile_offset;
-		ds->db_htile_data_base = va >> 8;
-		ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
-	} else {
-		ds->db_htile_data_base = 0;
-		ds->db_htile_surface = 0;
+		if (device->physical_device->rad_info.chip_class >= CIK) {
+			struct radeon_info *info = &device->physical_device->rad_info;
+			unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
+			unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
+			unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
+			unsigned tile_mode = info->si_tile_mode_array[tiling_index];
+			unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
+			unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
+
+			if (stencil_only)
+				tile_mode = stencil_tile_mode;
+
+			ds->db_depth_info |=
+				S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
+				S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
+				S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
+				S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
+				S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
+				S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
+			ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
+			ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
+		} else {
+			unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
+			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
+			tile_mode_index = si_tile_mode_index(iview->image, level, true);
+			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
+		}
+
+		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
+			S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
+		ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
+
+		if (iview->image->surface.htile_size && !level) {
+			ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
+
+			if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
+				/* Use all of the htile_buffer for depth if there's no stencil. */
+				ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
+
+			va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
+				iview->image->htile_offset;
+			ds->db_htile_data_base = va >> 8;
+			ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
+		}
 	}

 	ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
 	ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
-
-	ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
-		S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
-	ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
 }

 VkResult radv_CreateFramebuffer(
@@ -3092,7 +3243,6 @@ void radv_DestroySampler(
 	vk_free2(&device->alloc, pAllocator, sampler);
 }

-
 /* vk_icd.h does not declare this function, so we declare it here to
 * suppress Wmissing-prototypes.
 */
@@ -3136,3 +3286,34 @@ vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
 	*pSupportedVersion = MIN2(*pSupportedVersion, 3u);
 	return VK_SUCCESS;
 }
+
+VkResult radv_GetMemoryFdKHX(VkDevice _device,
+			     VkDeviceMemory _memory,
+			     VkExternalMemoryHandleTypeFlagsKHX handleType,
+			     int *pFD)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	RADV_FROM_HANDLE(radv_device_memory, memory, _memory);
+
+	/* We support only one handle type. */
+	assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
+
+	bool ret = radv_get_memory_fd(device, memory, pFD);
+	if (ret == false)
+		return VK_ERROR_OUT_OF_DEVICE_MEMORY;
+	return VK_SUCCESS;
+}
+
+VkResult radv_GetMemoryFdPropertiesKHX(VkDevice _device,
+				       VkExternalMemoryHandleTypeFlagBitsKHX handleType,
+				       int fd,
+				       VkMemoryFdPropertiesKHX *pMemoryFdProperties)
+{
+   /* The valid usage section for this function says:
+    *
+    *    "handleType must not be one of the handle types defined as opaque."
+    *
+    * Since we only handle opaque handles for now, there are no FD properties.
+    */
+   return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
+}
--- a/src/amd/vulkan/radv_entrypoints_gen.py
+++ b/src/amd/vulkan/radv_entrypoints_gen.py
@@ -42,6 +42,9 @@ supported_extensions = [
   'VK_KHR_wayland_surface',
   'VK_KHR_xcb_surface',
   'VK_KHR_xlib_surface',
+   'VK_KHX_external_memory_capabilities',
+   'VK_KHX_external_memory',
+   'VK_KHX_external_memory_fd',
 ]

 # We generate a static hash table for entry point lookup
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -28,6 +28,8 @@
 #include "sid.h"
 #include "r600d_common.h"

+#include "vk_util.h"
+
 #include "util/u_half.h"
 #include "util/format_srgb.h"
 #include "util/format_r11g11b10f.h"
@@ -597,13 +599,13 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 				tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
 			}
 		}
-		if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
+		if (tiled && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
 			tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
 			         VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
 		}
 	}

-	if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
+	if (linear && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
 		linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
 		          VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
 	}
@@ -1006,16 +1008,11 @@ void radv_GetPhysicalDeviceFormatProperties2KHR(
 						   &pFormatProperties->formatProperties);
 }

-VkResult radv_GetPhysicalDeviceImageFormatProperties(
-	VkPhysicalDevice                            physicalDevice,
-	VkFormat                                    format,
-	VkImageType                                 type,
-	VkImageTiling                               tiling,
-	VkImageUsageFlags                           usage,
-	VkImageCreateFlags                          createFlags,
-	VkImageFormatProperties*                    pImageFormatProperties)
+static VkResult radv_get_image_format_properties(struct radv_physical_device *physical_device,
+						 const VkPhysicalDeviceImageFormatInfo2KHR *info,
+						 VkImageFormatProperties *pImageFormatProperties)
+
 {
-	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
 	VkFormatProperties format_props;
 	VkFormatFeatureFlags format_feature_flags;
 	VkExtent3D maxExtent;
@@ -1023,11 +1020,11 @@ VkResult radv_GetPhysicalDeviceImageFormatProperties(
 	uint32_t maxArraySize;
 	VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;

-	radv_physical_device_get_format_properties(physical_device, format,
+	radv_physical_device_get_format_properties(physical_device, info->format,
 						   &format_props);
-	if (tiling == VK_IMAGE_TILING_LINEAR) {
+	if (info->tiling == VK_IMAGE_TILING_LINEAR) {
 		format_feature_flags = format_props.linearTilingFeatures;
-	} else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
+	} else if (info->tiling == VK_IMAGE_TILING_OPTIMAL) {
 		format_feature_flags = format_props.optimalTilingFeatures;
 	} else {
 		unreachable("bad VkImageTiling");
@@ -1036,7 +1033,7 @@ VkResult radv_GetPhysicalDeviceImageFormatProperties(
 	if (format_feature_flags == 0)
 		goto unsupported;

-	switch (type) {
+	switch (info->type) {
 	default:
 		unreachable("bad vkimage type\n");
 	case VK_IMAGE_TYPE_1D:
@@ -1062,34 +1059,34 @@ VkResult radv_GetPhysicalDeviceImageFormatProperties(
 		break;
 	}

-	if (tiling == VK_IMAGE_TILING_OPTIMAL &&
-	    type == VK_IMAGE_TYPE_2D &&
+	if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
+	    info->type == VK_IMAGE_TYPE_2D &&
 	    (format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
 				     VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
-	    !(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
-	    !(usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
+	    !(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
+	    !(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
 		sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
 	}

-	if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
+	if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
 		if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
 			goto unsupported;
 		}
 	}

-	if (usage & VK_IMAGE_USAGE_STORAGE_BIT) {
+	if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
 		if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
 			goto unsupported;
 		}
 	}

-	if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
+	if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
 		if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
 			goto unsupported;
 		}
 	}

-	if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
+	if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
 		if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
 			goto unsupported;
 		}
@@ -1120,18 +1117,132 @@ unsupported:
 	return VK_ERROR_FORMAT_NOT_SUPPORTED;
 }

+VkResult radv_GetPhysicalDeviceImageFormatProperties(
+	VkPhysicalDevice                            physicalDevice,
+	VkFormat                                    format,
+	VkImageType                                 type,
+	VkImageTiling                               tiling,
+	VkImageUsageFlags                           usage,
+	VkImageCreateFlags                          createFlags,
+	VkImageFormatProperties*                    pImageFormatProperties)
+{
+	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+
+	const VkPhysicalDeviceImageFormatInfo2KHR info = {
+		.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
+		.pNext = NULL,
+		.format = format,
+		.type = type,
+		.tiling = tiling,
+		.usage = usage,
+		.flags = createFlags,
+	};
+
+	return radv_get_image_format_properties(physical_device, &info,
+						pImageFormatProperties);
+}
+
+static void
+get_external_image_format_properties(const VkPhysicalDeviceImageFormatInfo2KHR *pImageFormatInfo,
+				     VkExternalMemoryPropertiesKHX *external_properties)
+{
+	VkExternalMemoryFeatureFlagBitsKHX flags = 0;
+	VkExternalMemoryHandleTypeFlagsKHX export_flags = 0;
+	VkExternalMemoryHandleTypeFlagsKHX compat_flags = 0;
+	switch (pImageFormatInfo->type) {
+	case VK_IMAGE_TYPE_2D:
+		flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_KHX|VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHX|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHX;
+		compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX;
+		break;
+	default:
+		break;
+	}
+
+	*external_properties = (VkExternalMemoryPropertiesKHX) {
+		.externalMemoryFeatures = flags,
+		.exportFromImportedHandleTypes = export_flags,
+		.compatibleHandleTypes = compat_flags,
+	};
+}
+
 VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR(
 	VkPhysicalDevice                            physicalDevice,
-	const VkPhysicalDeviceImageFormatInfo2KHR*  pImageFormatInfo,
-	VkImageFormatProperties2KHR                *pImageFormatProperties)
+	const VkPhysicalDeviceImageFormatInfo2KHR  *base_info,
+	VkImageFormatProperties2KHR                *base_props)
 {
-	return radv_GetPhysicalDeviceImageFormatProperties(physicalDevice,
-							   pImageFormatInfo->format,
-							   pImageFormatInfo->type,
-							   pImageFormatInfo->tiling,
-							   pImageFormatInfo->usage,
-							   pImageFormatInfo->flags,
-							   &pImageFormatProperties->imageFormatProperties);
+	RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
+	const VkPhysicalDeviceExternalImageFormatInfoKHX *external_info = NULL;
+	VkExternalImageFormatPropertiesKHX *external_props = NULL;
+	VkResult result;
+
+	result = radv_get_image_format_properties(physical_device, base_info,
+						&base_props->imageFormatProperties);
+	if (result != VK_SUCCESS)
+		return result;
+
+	   /* Extract input structs */
+	vk_foreach_struct_const(s, base_info->pNext) {
+		switch (s->sType) {
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHX:
+			external_info = (const void *) s;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* Extract output structs */
+	vk_foreach_struct(s, base_props->pNext) {
+		switch (s->sType) {
+		case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHX:
+			external_props = (void *) s;
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* From the Vulkan 1.0.42 spec:
+	 *
+	 *    If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2KHR will
+	 *    behave as if VkPhysicalDeviceExternalImageFormatInfoKHX was not
+	 *    present and VkExternalImageFormatPropertiesKHX will be ignored.
+	 */
+	if (external_info && external_info->handleType != 0) {
+		switch (external_info->handleType) {
+		case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX:
+			get_external_image_format_properties(base_info, &external_props->externalMemoryProperties);
+			break;
+		default:
+			/* From the Vulkan 1.0.42 spec:
+			 *
+			 *    If handleType is not compatible with the [parameters] specified
+			 *    in VkPhysicalDeviceImageFormatInfo2KHR, then
+			 *    vkGetPhysicalDeviceImageFormatProperties2KHR returns
+			 *    VK_ERROR_FORMAT_NOT_SUPPORTED.
+			 */
+			result = vk_errorf(VK_ERROR_FORMAT_NOT_SUPPORTED,
+					   "unsupported VkExternalMemoryTypeFlagBitsKHX 0x%x",
+					   external_info->handleType);
+			goto fail;
+		}
+	}
+
+	return VK_SUCCESS;
+
+fail:
+	if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
+		/* From the Vulkan 1.0.42 spec:
+		 *
+		 *    If the combination of parameters to
+		 *    vkGetPhysicalDeviceImageFormatProperties2KHR is not supported by
+		 *    the implementation for use in vkCreateImage, then all members of
+		 *    imageFormatProperties will be filled with zero.
+		 */
+		base_props->imageFormatProperties = (VkImageFormatProperties) {0};
+	}
+
+	return result;
 }

 void radv_GetPhysicalDeviceSparseImageFormatProperties(
@@ -1157,3 +1268,28 @@ void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR(
 	/* Sparse images are not yet supported. */
 	*pPropertyCount = 0;
 }
+
+void radv_GetPhysicalDeviceExternalBufferPropertiesKHX(
+	VkPhysicalDevice                            physicalDevice,
+	const VkPhysicalDeviceExternalBufferInfoKHX *pExternalBufferInfo,
+	VkExternalBufferPropertiesKHX               *pExternalBufferProperties)
+{
+	VkExternalMemoryFeatureFlagBitsKHX flags = 0;
+	VkExternalMemoryHandleTypeFlagsKHX export_flags = 0;
+	VkExternalMemoryHandleTypeFlagsKHX compat_flags = 0;
+	switch(pExternalBufferInfo->handleType) {
+	case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX:
+		flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_KHX |
+		        VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHX |
+		        VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHX;
+		compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX;
+		break;
+	default:
+		break;
+	}
+	pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryPropertiesKHX) {
+		.externalMemoryFeatures = flags,
+		.exportFromImportedHandleTypes = export_flags,
+		.compatibleHandleTypes = compat_flags,
+	};
+}
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -29,6 +29,7 @@
 #include "vk_format.h"
 #include "radv_radeon_winsys.h"
 #include "sid.h"
+#include "gfx9d.h"
 #include "util/debug.h"
 static unsigned
 radv_choose_tiling(struct radv_device *Device,
@@ -67,22 +68,15 @@ radv_init_surface(struct radv_device *device,

 	is_depth = vk_format_has_depth(desc);
 	is_stencil = vk_format_has_stencil(desc);
-	surface->npix_x = pCreateInfo->extent.width;
-	surface->npix_y = pCreateInfo->extent.height;
-	surface->npix_z = pCreateInfo->extent.depth;

 	surface->blk_w = vk_format_get_blockwidth(pCreateInfo->format);
 	surface->blk_h = vk_format_get_blockheight(pCreateInfo->format);
-	surface->blk_d = 1;
-	surface->array_size = pCreateInfo->arrayLayers;
-	surface->last_level = pCreateInfo->mipLevels - 1;

 	surface->bpe = vk_format_get_blocksize(pCreateInfo->format);
 	/* align byte per element on dword */
 	if (surface->bpe == 3) {
 		surface->bpe = 4;
 	}
-	surface->nsamples = pCreateInfo->samples ? pCreateInfo->samples : 1;
 	surface->flags = RADEON_SURF_SET(array_mode, MODE);

 	switch (pCreateInfo->imageType){
@@ -110,8 +104,7 @@ radv_init_surface(struct radv_device *device,
 	}

 	if (is_stencil)
-		surface->flags |= RADEON_SURF_SBUFFER |
-			RADEON_SURF_HAS_SBUFFER_MIPTREE;
+		surface->flags |= RADEON_SURF_SBUFFER;

 	surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;

@@ -137,9 +130,9 @@ static inline unsigned
 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
 {
 	if (stencil)
-		return image->surface.stencil_tiling_index[level];
+		return image->surface.u.legacy.stencil_tiling_index[level];
 	else
-		return image->surface.tiling_index[level];
+		return image->surface.u.legacy.tiling_index[level];
 }

 static unsigned radv_map_swizzle(unsigned swizzle)
@@ -197,33 +190,80 @@ radv_make_buffer_descriptor(struct radv_device *device,
 static void
 si_set_mutable_tex_desc_fields(struct radv_device *device,
 			       struct radv_image *image,
-			       const struct radeon_surf_level *base_level_info,
+			       const struct legacy_surf_level *base_level_info,
 			       unsigned base_level, unsigned first_level,
 			       unsigned block_width, bool is_stencil,
 			       uint32_t *state)
 {
 	uint64_t gpu_address = device->ws->buffer_get_va(image->bo) + image->offset;
-	uint64_t va = gpu_address + base_level_info->offset;
+	uint64_t va = gpu_address;
 	unsigned pitch = base_level_info->nblk_x * block_width;
-
-	state[1] &= C_008F14_BASE_ADDRESS_HI;
-	state[3] &= C_008F1C_TILING_INDEX;
-	state[4] &= C_008F20_PITCH_GFX6;
-	state[6] &= C_008F28_COMPRESSION_EN;
-
-	assert(!(va & 255));
+	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+	uint64_t meta_va = 0;
+	if (chip_class >= GFX9) {
+		if (is_stencil)
+			va += image->surface.u.gfx9.stencil_offset;
+		else
+			va += image->surface.u.gfx9.surf_offset;
+	} else
+		va += base_level_info->offset;

 	state[0] = va >> 8;
+	state[1] &= C_008F14_BASE_ADDRESS_HI;
 	state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
 	state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
 							     is_stencil));
 	state[4] |= S_008F20_PITCH_GFX6(pitch - 1);

-	if (image->surface.dcc_size && image->surface.level[first_level].dcc_enabled) {
-		state[6] |= S_008F28_COMPRESSION_EN(1);
-		state[7] = (gpu_address +
-			    image->dcc_offset +
-			    base_level_info->dcc_offset) >> 8;
+	if (chip_class >= VI) {
+		state[6] &= C_008F28_COMPRESSION_EN;
+		state[7] = 0;
+		if (image->surface.dcc_size && first_level < image->surface.num_dcc_levels) {
+			uint64_t meta_va = gpu_address + image->dcc_offset;
+			if (chip_class <= VI)
+				meta_va += base_level_info->dcc_offset;
+			state[6] |= S_008F28_COMPRESSION_EN(1);
+			state[7] = meta_va >> 8;
+
+		}
+	}
+
+	if (chip_class >= GFX9) {
+		state[3] &= C_008F1C_SW_MODE;
+		state[4] &= C_008F20_PITCH_GFX9;
+
+		if (is_stencil) {
+			state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode);
+			state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch);
+		} else {
+			state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode);
+			state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch);
+		}
+
+		state[5] &= C_008F24_META_DATA_ADDRESS &
+			    C_008F24_META_PIPE_ALIGNED &
+			    C_008F24_META_RB_ALIGNED;
+		if (meta_va) {
+			struct gfx9_surf_meta_flags meta;
+
+			if (image->dcc_offset)
+				meta = image->surface.u.gfx9.dcc;
+			else
+				meta = image->surface.u.gfx9.htile;
+
+			state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
+				    S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
+				    S_008F24_META_RB_ALIGNED(meta.rb_aligned);
+		}
+	} else {
+		/* SI-CI-VI */
+		unsigned pitch = base_level_info->nblk_x * block_width;
+		unsigned index = si_tile_mode_index(image, base_level, is_stencil);
+
+		state[3] &= C_008F1C_TILING_INDEX;
+		state[3] |= S_008F1C_TILING_INDEX(index);
+		state[4] &= C_008F20_PITCH_GFX6;
+		state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
 	}
 }

@@ -249,6 +289,36 @@ static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
 		unreachable("illegale image type");
 	}
 }
+
+static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
+{
+	unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+
+	if (swizzle[3] == VK_SWIZZLE_X) {
+		/* For the pre-defined border color values (white, opaque
+		 * black, transparent black), the only thing that matters is
+		 * that the alpha channel winds up in the correct place
+		 * (because the RGB channels are all the same) so either of
+		 * these enumerations will work.
+		 */
+		if (swizzle[2] == VK_SWIZZLE_Y)
+			bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
+		else
+			bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
+	} else if (swizzle[0] == VK_SWIZZLE_X) {
+		if (swizzle[1] == VK_SWIZZLE_Y)
+			bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
+		else
+			bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
+	} else if (swizzle[1] == VK_SWIZZLE_X) {
+		bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
+	} else if (swizzle[2] == VK_SWIZZLE_X) {
+		bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
+	}
+
+	return bc_swizzle;
+}
+
 /**
 * Build the sampler view descriptor for a texture.
 */
@@ -291,40 +361,59 @@ si_make_texture_descriptor(struct radv_device *device,
 		data_format = 0;
 	}

-	type = radv_tex_dim(image->type, view_type, image->array_size, image->samples,
+	type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
 			    (image->usage & VK_IMAGE_USAGE_STORAGE_BIT));
 	if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
 	        height = 1;
-		depth = image->array_size;
+		depth = image->info.array_size;
 	} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY ||
 		   type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
 		if (view_type != VK_IMAGE_VIEW_TYPE_3D)
-			depth = image->array_size;
+			depth = image->info.array_size;
 	} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
-		depth = image->array_size / 6;
+		depth = image->info.array_size / 6;

 	state[0] = 0;
 	state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
 		    S_008F14_NUM_FORMAT_GFX6(num_format));
 	state[2] = (S_008F18_WIDTH(width - 1) |
-		    S_008F18_HEIGHT(height - 1));
+		    S_008F18_HEIGHT(height - 1) |
+		    S_008F18_PERF_MOD(4));
 	state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
 		    S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
 		    S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
 		    S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
-		    S_008F1C_BASE_LEVEL(image->samples > 1 ?
+		    S_008F1C_BASE_LEVEL(image->info.samples > 1 ?
 					0 : first_level) |
-		    S_008F1C_LAST_LEVEL(image->samples > 1 ?
-					util_logbase2(image->samples) :
+		    S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
+					util_logbase2(image->info.samples) :
 					last_level) |
-		    S_008F1C_POW2_PAD(image->levels > 1) |
 		    S_008F1C_TYPE(type));
-	state[4] = S_008F20_DEPTH(depth - 1);
-	state[5] = (S_008F24_BASE_ARRAY(first_layer) |
-		    S_008F24_LAST_ARRAY(last_layer));
+	state[4] = 0;
+	state[5] = S_008F24_BASE_ARRAY(first_layer);
 	state[6] = 0;
 	state[7] = 0;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
+
+		/* Depth is the the last accessible layer on Gfx9.
+		 * The hw doesn't need to know the total number of layers.
+		 */
+		if (type == V_008F1C_SQ_RSRC_IMG_3D)
+			state[4] |= S_008F20_DEPTH(depth - 1);
+		else
+			state[4] |= S_008F20_DEPTH(last_layer);
+
+		state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
+		state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
+					     util_logbase2(image->info.samples) :
+					     last_level);
+	} else {
+		state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
+		state[4] |= S_008F20_DEPTH(depth - 1);
+		state[5] |= S_008F24_LAST_ARRAY(last_layer);
+	}
 	if (image->dcc_offset) {
 		unsigned swap = radv_translate_colorswap(vk_format, FALSE);

@@ -333,7 +422,7 @@ si_make_texture_descriptor(struct radv_device *device,
 		/* The last dword is unused by hw. The shader uses it to clear
 		 * bits in the first dword of sampler state.
 		 */
-		if (device->physical_device->rad_info.chip_class <= CIK && image->samples <= 1) {
+		if (device->physical_device->rad_info.chip_class <= CIK && image->info.samples <= 1) {
 			if (first_level == last_level)
 				state[7] = C_008F30_MAX_ANISO_RATIO;
 			else
@@ -343,46 +432,75 @@ si_make_texture_descriptor(struct radv_device *device,

 	/* Initialize the sampler view for FMASK. */
 	if (image->fmask.size) {
-		uint32_t fmask_format;
+		uint32_t fmask_format, num_format;
 		uint64_t gpu_address = device->ws->buffer_get_va(image->bo);
 		uint64_t va;

 		va = gpu_address + image->offset + image->fmask.offset;

-		switch (image->samples) {
-		case 2:
-			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
-			break;
-		case 4:
-			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
-			break;
-		case 8:
-			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
-			break;
-		default:
-			assert(0);
-			fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
+		if (device->physical_device->rad_info.chip_class >= GFX9) {
+			fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
+			switch (image->info.samples) {
+			case 2:
+				num_format = V_008F14_IMG_FMASK_8_2_2;
+				break;
+			case 4:
+				num_format = V_008F14_IMG_FMASK_8_4_4;
+				break;
+			case 8:
+				num_format = V_008F14_IMG_FMASK_32_8_8;
+				break;
+			default:
+				unreachable("invalid nr_samples");
+			}
+		} else {
+			switch (image->info.samples) {
+			case 2:
+				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
+				break;
+			case 4:
+				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
+				break;
+			case 8:
+				fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
+				break;
+			default:
+				assert(0);
+				fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
+			}
+			num_format = V_008F14_IMG_NUM_FORMAT_UINT;
 		}

 		fmask_state[0] = va >> 8;
 		fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
 			S_008F14_DATA_FORMAT_GFX6(fmask_format) |
-			S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_UINT);
+			S_008F14_NUM_FORMAT_GFX6(num_format);
 		fmask_state[2] = S_008F18_WIDTH(width - 1) |
 			S_008F18_HEIGHT(height - 1);
 		fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
 			S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
 			S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
 			S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
-			S_008F1C_TILING_INDEX(image->fmask.tile_mode_index) |
 			S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false));
-		fmask_state[4] = S_008F20_DEPTH(depth - 1) |
-			S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
-		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
-			S_008F24_LAST_ARRAY(last_layer);
+		fmask_state[4] = 0;
+		fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
 		fmask_state[6] = 0;
 		fmask_state[7] = 0;
-	}
+
+		if (device->physical_device->rad_info.chip_class >= GFX9) {
+			fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode);
+			fmask_state[4] |= S_008F20_DEPTH(last_layer) |
+					  S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch);
+			fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) |
+					  S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned);
+		} else {
+			fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
+			fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
+				S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
+			fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
+		}
+	} else if (fmask_state)
+		memset(fmask_state, 0, 8 * 4);
 }

 static void
@@ -410,13 +528,13 @@ radv_query_opaque_metadata(struct radv_device *device,

 	si_make_texture_descriptor(device, image, true,
 				   (VkImageViewType)image->type, image->vk_format,
-				   &fixedmapping, 0, image->levels - 1, 0,
-				   image->array_size,
-				   image->extent.width, image->extent.height,
-				   image->extent.depth,
+				   &fixedmapping, 0, image->info.levels - 1, 0,
+				   image->info.array_size,
+				   image->info.width, image->info.height,
+				   image->info.depth,
 				   desc, NULL);

-	si_set_mutable_tex_desc_fields(device, image, &image->surface.level[0], 0, 0,
+	si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0,
 				       image->surface.blk_w, false, desc);

 	/* Clear the base address and set the relative DCC offset. */
@@ -428,10 +546,10 @@ radv_query_opaque_metadata(struct radv_device *device,
 	memcpy(&md->metadata[2], desc, sizeof(desc));

 	/* Dwords [10:..] contain the mipmap level offsets. */
-	for (i = 0; i <= image->levels - 1; i++)
-		md->metadata[10+i] = image->surface.level[i].offset >> 8;
+	for (i = 0; i <= image->info.levels - 1; i++)
+		md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;

-	md->size_metadata = (11 + image->levels - 1) * 4;
+	md->size_metadata = (11 + image->info.levels - 1) * 4;
 }

 void
@@ -442,19 +560,23 @@ radv_init_metadata(struct radv_device *device,
 	struct radeon_surf *surface = &image->surface;

 	memset(metadata, 0, sizeof(*metadata));
-	metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ?
-		RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
-	metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ?
-		RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
-	metadata->pipe_config = surface->pipe_config;
-	metadata->bankw = surface->bankw;
-	metadata->bankh = surface->bankh;
-	metadata->tile_split = surface->tile_split;
-	metadata->mtilea = surface->mtilea;
-	metadata->num_banks = surface->num_banks;
-	metadata->stride = surface->level[0].pitch_bytes;
-	metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
+	} else {
+		metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
+			RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+		metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
+			RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
+		metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
+		metadata->u.legacy.bankw = surface->u.legacy.bankw;
+		metadata->u.legacy.bankh = surface->u.legacy.bankh;
+		metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
+		metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
+		metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
+		metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
+		metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
+	}
 	radv_query_opaque_metadata(device, image, metadata);
 }

@@ -466,14 +588,20 @@ radv_image_get_fmask_info(struct radv_device *device,
 			  struct radv_fmask_info *out)
 {
 	/* FMASK is allocated like an ordinary texture. */
-	struct radeon_surf fmask = image->surface;
-
+	struct radeon_surf fmask = {};
+	struct ac_surf_info info = image->info;
 	memset(out, 0, sizeof(*out));

-	fmask.bo_alignment = 0;
-	fmask.bo_size = 0;
-	fmask.nsamples = 1;
-	fmask.flags |= RADEON_SURF_FMASK;
+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		out->alignment = image->surface.u.gfx9.fmask_alignment;
+		out->size = image->surface.u.gfx9.fmask_size;
+		return;
+	}
+
+	fmask.blk_w = image->surface.blk_w;
+	fmask.blk_h = image->surface.blk_h;
+	info.samples = 1;
+	fmask.flags = image->surface.flags | RADEON_SURF_FMASK;

 	/* Force 2D tiling if it wasn't set. This may occur when creating
 	 * FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
@@ -481,8 +609,6 @@ radv_image_get_fmask_info(struct radv_device *device,
 	fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
 	fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);

-	fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
-
 	switch (nr_samples) {
 	case 2:
 	case 4:
@@ -495,25 +621,25 @@ radv_image_get_fmask_info(struct radv_device *device,
 		return;
 	}

-	device->ws->surface_init(device->ws, &fmask);
-	assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
+	device->ws->surface_init(device->ws, &info, &fmask);
+	assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);

-	out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
+	out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
 	if (out->slice_tile_max)
 		out->slice_tile_max -= 1;

-	out->tile_mode_index = fmask.tiling_index[0];
-	out->pitch_in_pixels = fmask.level[0].nblk_x;
-	out->bank_height = fmask.bankh;
-	out->alignment = MAX2(256, fmask.bo_alignment);
-	out->size = fmask.bo_size;
+	out->tile_mode_index = fmask.u.legacy.tiling_index[0];
+	out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
+	out->bank_height = fmask.u.legacy.bankh;
+	out->alignment = MAX2(256, fmask.surf_alignment);
+	out->size = fmask.surf_size;
 }

 static void
 radv_image_alloc_fmask(struct radv_device *device,
 		       struct radv_image *image)
 {
-	radv_image_get_fmask_info(device, image, image->samples, &image->fmask);
+	radv_image_get_fmask_info(device, image, image->info.samples, &image->fmask);

 	image->fmask.offset = align64(image->size, image->fmask.alignment);
 	image->size = image->fmask.offset + image->fmask.size;
@@ -529,6 +655,12 @@ radv_image_get_cmask_info(struct radv_device *device,
 	unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
 	unsigned cl_width, cl_height;

+	if (device->physical_device->rad_info.chip_class >= GFX9) {
+		out->alignment = image->surface.u.gfx9.cmask_alignment;
+		out->size = image->surface.u.gfx9.cmask_size;
+		return;
+	}
+
 	switch (num_pipes) {
 	case 2:
 		cl_width = 32;
@@ -553,8 +685,8 @@ radv_image_get_cmask_info(struct radv_device *device,

 	unsigned base_align = num_pipes * pipe_interleave_bytes;

-	unsigned width = align(image->surface.npix_x, cl_width*8);
-	unsigned height = align(image->surface.npix_y, cl_height*8);
+	unsigned width = align(image->info.width, cl_width*8);
+	unsigned height = align(image->info.height, cl_height*8);
 	unsigned slice_elements = (width * height) / (8*8);

 	/* Each element of CMASK is a nibble. */
@@ -565,7 +697,7 @@ radv_image_get_cmask_info(struct radv_device *device,
 		out->slice_tile_max -= 1;

 	out->alignment = MAX2(256, base_align);
-	out->size = (image->type == VK_IMAGE_TYPE_3D ? image->extent.depth : image->array_size) *
+	out->size = (image->type == VK_IMAGE_TYPE_3D ? image->info.depth : image->info.array_size) *
 		    align(slice_bytes, base_align);
 }

@@ -597,7 +729,7 @@ static void
 radv_image_alloc_htile(struct radv_device *device,
 		       struct radv_image *image)
 {
-	if ((device->debug_flags & RADV_DEBUG_NO_HIZ) || image->levels > 1) {
+	if ((device->debug_flags & RADV_DEBUG_NO_HIZ) || image->info.levels > 1) {
 		image->surface.htile_size = 0;
 		return;
 	}
@@ -636,11 +768,14 @@ radv_image_create(VkDevice _device,

 	memset(image, 0, sizeof(*image));
 	image->type = pCreateInfo->imageType;
-	image->extent = pCreateInfo->extent;
+	image->info.width = pCreateInfo->extent.width;
+	image->info.height = pCreateInfo->extent.height;
+	image->info.depth = pCreateInfo->extent.depth;
+	image->info.samples = pCreateInfo->samples;
+	image->info.array_size = pCreateInfo->arrayLayers;
+	image->info.levels = pCreateInfo->mipLevels;
+
 	image->vk_format = pCreateInfo->format;
-	image->levels = pCreateInfo->mipLevels;
-	image->array_size = pCreateInfo->arrayLayers;
-	image->samples = pCreateInfo->samples;
 	image->tiling = pCreateInfo->tiling;
 	image->usage = pCreateInfo->usage;
 	image->flags = pCreateInfo->flags;
@@ -648,15 +783,18 @@ radv_image_create(VkDevice _device,
 	image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
 	if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
 		for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
-			image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
+			if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHX)
+				image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
+			else
+				image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
 	}

 	radv_init_surface(device, &image->surface, create_info);

-	device->ws->surface_init(device->ws, &image->surface);
+	device->ws->surface_init(device->ws, &image->info, &image->surface);

-	image->size = image->surface.bo_size;
-	image->alignment = image->surface.bo_alignment;
+	image->size = image->surface.surf_size;
+	image->alignment = image->surface.surf_alignment;

 	if (image->exclusive || image->queue_family_mask == 1)
 		can_cmask_dcc = true;
@@ -669,22 +807,15 @@ radv_image_create(VkDevice _device,

 	if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
 	    pCreateInfo->mipLevels == 1 &&
-	    !image->surface.dcc_size && image->extent.depth == 1 && can_cmask_dcc)
+	    !image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc)
 		radv_image_alloc_cmask(device, image);
-	if (image->samples > 1 && vk_format_is_color(pCreateInfo->format)) {
+	if (image->info.samples > 1 && vk_format_is_color(pCreateInfo->format)) {
 		radv_image_alloc_fmask(device, image);
 	} else if (vk_format_is_depth(pCreateInfo->format)) {

 		radv_image_alloc_htile(device, image);
 	}

-
-	if (create_info->stride && create_info->stride != image->surface.level[0].pitch_bytes) {
-		image->surface.level[0].nblk_x = create_info->stride / image->surface.bpe;
-		image->surface.level[0].pitch_bytes = create_info->stride;
-		image->surface.level[0].slice_size = create_info->stride * image->surface.level[0].nblk_y;
-	}
-
 	if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
 		image->alignment = MAX2(image->alignment, 4096);
 		image->size = align64(image->size, image->alignment);
@@ -706,9 +837,7 @@ radv_image_create(VkDevice _device,
 void
 radv_image_view_init(struct radv_image_view *iview,
 		     struct radv_device *device,
-		     const VkImageViewCreateInfo* pCreateInfo,
-		     struct radv_cmd_buffer *cmd_buffer,
-		     VkImageUsageFlags usage_mask)
+		     const VkImageViewCreateInfo* pCreateInfo)
 {
 	RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
 	const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
@@ -717,11 +846,11 @@ radv_image_view_init(struct radv_image_view *iview,
 	switch (image->type) {
 	case VK_IMAGE_TYPE_1D:
 	case VK_IMAGE_TYPE_2D:
-		assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->array_size);
+		assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <= image->info.array_size);
 		break;
 	case VK_IMAGE_TYPE_3D:
 		assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1
-		       <= radv_minify(image->extent.depth, range->baseMipLevel));
+		       <= radv_minify(image->info.depth, range->baseMipLevel));
 		break;
 	default:
 		unreachable("bad VkImageType");
@@ -740,9 +869,9 @@ radv_image_view_init(struct radv_image_view *iview,
 	}

 	iview->extent = (VkExtent3D) {
-		.width  = radv_minify(image->extent.width , range->baseMipLevel),
-		.height = radv_minify(image->extent.height, range->baseMipLevel),
-		.depth  = radv_minify(image->extent.depth , range->baseMipLevel),
+		.width  = radv_minify(image->info.width , range->baseMipLevel),
+		.height = radv_minify(image->info.height, range->baseMipLevel),
+		.depth  = radv_minify(image->info.depth , range->baseMipLevel),
 	};

 	iview->extent.width = round_up_u32(iview->extent.width * vk_format_get_blockwidth(iview->vk_format),
@@ -769,91 +898,31 @@ radv_image_view_init(struct radv_image_view *iview,
 				   iview->descriptor,
 				   iview->fmask_descriptor);
 	si_set_mutable_tex_desc_fields(device, image,
-				       is_stencil ? &image->surface.stencil_level[range->baseMipLevel] : &image->surface.level[range->baseMipLevel], range->baseMipLevel,
+				       is_stencil ? &image->surface.u.legacy.stencil_level[range->baseMipLevel]
+				                  : &image->surface.u.legacy.level[range->baseMipLevel],
+				       range->baseMipLevel,
 				       range->baseMipLevel,
 				       blk_w, is_stencil, iview->descriptor);
 }

-void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
-					    struct radv_image *image, uint32_t micro_tile_mode)
-{
-	/* These magic numbers were copied from addrlib. It doesn't use any
-	 * definitions for them either. They are all 2D_TILED_THIN1 modes with
-	 * different bpp and micro tile mode.
-	 */
-	if (device->physical_device->rad_info.chip_class >= CIK) {
-		switch (micro_tile_mode) {
-		case 0: /* displayable */
-			image->surface.tiling_index[0] = 10;
-			break;
-		case 1: /* thin */
-			image->surface.tiling_index[0] = 14;
-			break;
-		case 3: /* rotated */
-			image->surface.tiling_index[0] = 28;
-			break;
-		default: /* depth, thick */
-			assert(!"unexpected micro mode");
-			return;
-		}
-	} else { /* SI */
-		switch (micro_tile_mode) {
-		case 0: /* displayable */
-			switch (image->surface.bpe) {
-			case 1:
-                            image->surface.tiling_index[0] = 10;
-                            break;
-			case 2:
-                            image->surface.tiling_index[0] = 11;
-                            break;
-			default: /* 4, 8 */
-                            image->surface.tiling_index[0] = 12;
-                            break;
-			}
-			break;
-		case 1: /* thin */
-			switch (image->surface.bpe) {
-			case 1:
-                                image->surface.tiling_index[0] = 14;
-                                break;
-			case 2:
-                                image->surface.tiling_index[0] = 15;
-                                break;
-			case 4:
-                                image->surface.tiling_index[0] = 16;
-                                break;
-			default: /* 8, 16 */
-                                image->surface.tiling_index[0] = 17;
-                                break;
-			}
-			break;
-		default: /* depth, thick */
-			assert(!"unexpected micro mode");
-			return;
-		}
-	}
-
-	image->surface.micro_tile_mode = micro_tile_mode;
-}
-
 bool radv_layout_has_htile(const struct radv_image *image,
-                           VkImageLayout layout)
+                           VkImageLayout layout,
+                           unsigned queue_mask)
 {
-	return (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
-		layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+	return image->surface.htile_size &&
+	       (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
+	        layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
+	       queue_mask == (1u << RADV_QUEUE_GENERAL);
 }

 bool radv_layout_is_htile_compressed(const struct radv_image *image,
-                                     VkImageLayout layout)
+                                     VkImageLayout layout,
+                                     unsigned queue_mask)
 {
-	return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
-}
-
-bool radv_layout_can_expclear(const struct radv_image *image,
-                              VkImageLayout layout)
-{
-	return (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
-		layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+	return image->surface.htile_size &&
+	       (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
+	        layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
+	       queue_mask == (1u << RADV_QUEUE_GENERAL);
 }

 bool radv_layout_can_fast_clear(const struct radv_image *image,
@@ -869,6 +938,8 @@ unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t f
 {
 	if (!image->exclusive)
 		return image->queue_family_mask;
+	if (family == VK_QUEUE_FAMILY_EXTERNAL_KHX)
+		return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
 	if (family == VK_QUEUE_FAMILY_IGNORED)
 		return 1u << queue_family;
 	return 1u << family;
@@ -914,14 +985,15 @@ void radv_GetImageSubresourceLayout(
 	RADV_FROM_HANDLE(radv_image, image, _image);
 	int level = pSubresource->mipLevel;
 	int layer = pSubresource->arrayLayer;
+	struct radeon_surf *surface = &image->surface;

-	pLayout->offset = image->surface.level[level].offset + image->surface.level[level].slice_size * layer;
-	pLayout->rowPitch = image->surface.level[level].pitch_bytes;
-	pLayout->arrayPitch = image->surface.level[level].slice_size;
-	pLayout->depthPitch = image->surface.level[level].slice_size;
-	pLayout->size = image->surface.level[level].slice_size;
+	pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
+	pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
+	pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
+	pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
+	pLayout->size = surface->u.legacy.level[level].slice_size;
 	if (image->type == VK_IMAGE_TYPE_3D)
-		pLayout->size *= image->surface.level[level].nblk_z;
+		pLayout->size *= u_minify(image->info.depth, level);
 }


@@ -939,7 +1011,7 @@ radv_CreateImageView(VkDevice _device,
 	if (view == NULL)
 		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

-	radv_image_view_init(view, device, pCreateInfo, NULL, ~0);
+	radv_image_view_init(view, device, pCreateInfo);

 	*pView = radv_image_view_to_handle(view);

--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -30,33 +30,35 @@
 #include <pwd.h>
 #include <sys/stat.h>

-void
-radv_meta_save(struct radv_meta_saved_state *state,
+static void
+radv_meta_save_novertex(struct radv_meta_saved_state *state,
 	       const struct radv_cmd_buffer *cmd_buffer,
 	       uint32_t dynamic_mask)
 {
 	state->old_pipeline = cmd_buffer->state.pipeline;
 	state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
-	memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings,
-	       sizeof(state->old_vertex_bindings));

 	state->dynamic_mask = dynamic_mask;
 	radv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic,
 				dynamic_mask);

 	memcpy(state->push_constants, cmd_buffer->push_constants, MAX_PUSH_CONSTANTS_SIZE);
+	state->vertex_saved = false;
 }

 void
 radv_meta_restore(const struct radv_meta_saved_state *state,
 		  struct radv_cmd_buffer *cmd_buffer)
 {
-	cmd_buffer->state.pipeline = state->old_pipeline;
-	radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
-	memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
-	       sizeof(state->old_vertex_bindings));
+	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
+			     radv_pipeline_to_handle(state->old_pipeline));
+	cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+	if (state->vertex_saved) {
+		memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
+		       sizeof(state->old_vertex_bindings));
+		cmd_buffer->state.vb_dirty |= (1 << RADV_META_VERTEX_BINDING_COUNT) - 1;
+	}

-	cmd_buffer->state.vb_dirty |= (1 << RADV_META_VERTEX_BINDING_COUNT) - 1;
 	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;

 	radv_dynamic_state_copy(&cmd_buffer->state.dynamic, &state->dynamic,
@@ -110,7 +112,8 @@ radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
 {
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
 			     radv_pipeline_to_handle(state->old_pipeline));
-	radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
+
+	cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;

 	if (push_constant_size) {
 		memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size);
@@ -335,8 +338,14 @@ radv_device_init_meta(struct radv_device *device)
 	result = radv_device_init_meta_resolve_compute_state(device);
 	if (result != VK_SUCCESS)
 		goto fail_resolve_compute;
+
+	result = radv_device_init_meta_resolve_fragment_state(device);
+	if (result != VK_SUCCESS)
+		goto fail_resolve_fragment;
 	return VK_SUCCESS;

+fail_resolve_fragment:
+	radv_device_finish_meta_resolve_compute_state(device);
 fail_resolve_compute:
 	radv_device_finish_meta_fast_clear_flush_state(device);
 fail_fast_clear:
@@ -373,6 +382,7 @@ radv_device_finish_meta(struct radv_device *device)
 	radv_device_finish_meta_buffer_state(device);
 	radv_device_finish_meta_fast_clear_flush_state(device);
 	radv_device_finish_meta_resolve_compute_state(device);
+	radv_device_finish_meta_resolve_fragment_state(device);

 	radv_store_meta_pipeline(device);
 	radv_pipeline_cache_finish(&device->meta_state.cache);
@@ -384,12 +394,212 @@ radv_device_finish_meta(struct radv_device *device)
 * should have no effect.
 */
 void
-radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state,
-					    struct radv_cmd_buffer *cmd_buffer)
+radv_meta_save_graphics_reset_vport_scissor_novertex(struct radv_meta_saved_state *saved_state,
+						     struct radv_cmd_buffer *cmd_buffer)
 {
 	uint32_t dirty_state = (1 << VK_DYNAMIC_STATE_VIEWPORT) | (1 << VK_DYNAMIC_STATE_SCISSOR);
-	radv_meta_save(saved_state, cmd_buffer, dirty_state);
+	radv_meta_save_novertex(saved_state, cmd_buffer, dirty_state);
 	cmd_buffer->state.dynamic.viewport.count = 0;
 	cmd_buffer->state.dynamic.scissor.count = 0;
 	cmd_buffer->state.dirty |= dirty_state;
 }
+
+nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
+{
+
+	nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(vs_b->shader, nir_intrinsic_load_vertex_id_zero_base);
+	nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
+	nir_builder_instr_insert(vs_b, &vertex_id->instr);
+
+	/* vertex 0 - -1.0, -1.0 */
+	/* vertex 1 - -1.0, 1.0 */
+	/* vertex 2 - 1.0, -1.0 */
+	/* so channel 0 is vertex_id != 2 ? -1.0 : 1.0
+	   channel 1 is vertex id != 1 ? -1.0 : 1.0 */
+
+	nir_ssa_def *c0cmp = nir_ine(vs_b, &vertex_id->dest.ssa,
+				     nir_imm_int(vs_b, 2));
+	nir_ssa_def *c1cmp = nir_ine(vs_b, &vertex_id->dest.ssa,
+				     nir_imm_int(vs_b, 1));
+
+	nir_ssa_def *comp[4];
+	comp[0] = nir_bcsel(vs_b, c0cmp,
+			    nir_imm_float(vs_b, -1.0),
+			    nir_imm_float(vs_b, 1.0));
+
+	comp[1] = nir_bcsel(vs_b, c1cmp,
+			    nir_imm_float(vs_b, -1.0),
+			    nir_imm_float(vs_b, 1.0));
+	comp[2] = comp2;
+	comp[3] = nir_imm_float(vs_b, 1.0);
+	nir_ssa_def *outvec = nir_vec(vs_b, comp, 4);
+
+	return outvec;
+}
+
+nir_ssa_def *radv_meta_gen_rect_vertices(nir_builder *vs_b)
+{
+	return radv_meta_gen_rect_vertices_comp2(vs_b, nir_imm_float(vs_b, 0.0));
+}
+
+/* vertex shader that generates vertices */
+nir_shader *
+radv_meta_build_nir_vs_generate_vertices(void)
+{
+	const struct glsl_type *vec4 = glsl_vec4_type();
+
+	nir_builder b;
+	nir_variable *v_position;
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_vs_gen_verts");
+
+	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+
+	v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
+					 "gl_Position");
+	v_position->data.location = VARYING_SLOT_POS;
+
+	nir_store_var(&b, v_position, outvec, 0xf);
+
+	return b.shader;
+}
+
+nir_shader *
+radv_meta_build_nir_fs_noop(void)
+{
+	nir_builder b;
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+	b.shader->info.name = ralloc_asprintf(b.shader,
+					       "meta_noop_fs");
+
+	return b.shader;
+}
+
+static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
+							    nir_ssa_def *input)
+{
+	nir_const_value v;
+	unsigned i;
+	v.u32[0] = 0x3b4d2e1c; // 0.00313080009
+
+	nir_ssa_def *cmp[3];
+	for (i = 0; i < 3; i++)
+		cmp[i] = nir_flt(b, nir_channel(b, input, i),
+				 nir_build_imm(b, 1, 32, v));
+
+	nir_ssa_def *ltvals[3];
+	v.f32[0] = 12.92;
+	for (i = 0; i < 3; i++)
+		ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
+				     nir_build_imm(b, 1, 32, v));
+
+	nir_ssa_def *gtvals[3];
+
+	for (i = 0; i < 3; i++) {
+		v.f32[0] = 1.0/2.4;
+		gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
+				     nir_build_imm(b, 1, 32, v));
+		v.f32[0] = 1.055;
+		gtvals[i] = nir_fmul(b, gtvals[i],
+				     nir_build_imm(b, 1, 32, v));
+		v.f32[0] = 0.055;
+		gtvals[i] = nir_fsub(b, gtvals[i],
+				     nir_build_imm(b, 1, 32, v));
+	}
+
+	nir_ssa_def *comp[4];
+	for (i = 0; i < 3; i++)
+		comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
+	comp[3] = nir_channels(b, input, 3);
+	return nir_vec(b, comp, 4);
+}
+
+void radv_meta_build_resolve_shader_core(nir_builder *b,
+					 bool is_integer,
+					 bool is_srgb,
+					 int samples,
+					 nir_variable *input_img,
+					 nir_variable *color,
+					 nir_ssa_def *img_coord)
+{
+	/* do a txf_ms on each sample */
+	nir_ssa_def *tmp;
+	nir_if *outer_if = NULL;
+
+	nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
+	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
+	tex->op = nir_texop_txf_ms;
+	tex->src[0].src_type = nir_tex_src_coord;
+	tex->src[0].src = nir_src_for_ssa(img_coord);
+	tex->src[1].src_type = nir_tex_src_ms_index;
+	tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
+	tex->dest_type = nir_type_float;
+	tex->is_array = false;
+	tex->coord_components = 2;
+	tex->texture = nir_deref_var_create(tex, input_img);
+	tex->sampler = NULL;
+
+	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
+	nir_builder_instr_insert(b, &tex->instr);
+
+	tmp = &tex->dest.ssa;
+
+	if (!is_integer && samples > 1) {
+		nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 1);
+		tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
+		tex_all_same->op = nir_texop_samples_identical;
+		tex_all_same->src[0].src_type = nir_tex_src_coord;
+		tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
+		tex_all_same->dest_type = nir_type_float;
+		tex_all_same->is_array = false;
+		tex_all_same->coord_components = 2;
+		tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img);
+		tex_all_same->sampler = NULL;
+
+		nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
+		nir_builder_instr_insert(b, &tex_all_same->instr);
+
+		nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
+		nir_if *if_stmt = nir_if_create(b->shader);
+		if_stmt->condition = nir_src_for_ssa(all_same);
+		nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
+
+		b->cursor = nir_after_cf_list(&if_stmt->then_list);
+		for (int i = 1; i < samples; i++) {
+			nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 2);
+			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
+			tex_add->op = nir_texop_txf_ms;
+			tex_add->src[0].src_type = nir_tex_src_coord;
+			tex_add->src[0].src = nir_src_for_ssa(img_coord);
+			tex_add->src[1].src_type = nir_tex_src_ms_index;
+			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
+			tex_add->dest_type = nir_type_float;
+			tex_add->is_array = false;
+			tex_add->coord_components = 2;
+			tex_add->texture = nir_deref_var_create(tex_add, input_img);
+			tex_add->sampler = NULL;
+
+			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
+			nir_builder_instr_insert(b, &tex_add->instr);
+
+			tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
+		}
+
+		tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
+		nir_store_var(b, color, tmp, 0xf);
+		b->cursor = nir_after_cf_list(&if_stmt->else_list);
+		outer_if = if_stmt;
+	}
+	nir_store_var(b, color, &tex->dest.ssa, 0xf);
+
+	if (outer_if)
+		b->cursor = nir_after_cf_node(&outer_if->cf_node);
+
+	if (is_srgb) {
+		nir_ssa_def *newv = nir_load_var(b, color);
+		newv = radv_meta_build_resolve_srgb_conversion(b, newv);
+		nir_store_var(b, color, newv, 0xf);
+	}
+}
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -35,6 +35,7 @@ extern "C" {
 #define RADV_META_VERTEX_BINDING_COUNT 2

 struct radv_meta_saved_state {
+	bool vertex_saved;
 	struct radv_vertex_binding old_vertex_bindings[RADV_META_VERTEX_BINDING_COUNT];
 	struct radv_descriptor_set *old_descriptor_set0;
 	struct radv_pipeline *old_pipeline;
@@ -90,9 +91,9 @@ void radv_device_finish_meta_query_state(struct radv_device *device);

 VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device);
 void radv_device_finish_meta_resolve_compute_state(struct radv_device *device);
-void radv_meta_save(struct radv_meta_saved_state *state,
-		    const struct radv_cmd_buffer *cmd_buffer,
-		    uint32_t dynamic_mask);
+
+VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device);
+void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device);

 void radv_meta_restore(const struct radv_meta_saved_state *state,
 		       struct radv_cmd_buffer *cmd_buffer);
@@ -200,8 +201,8 @@ void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 struct radv_image *image,
 					 const VkImageSubresourceRange *subresourceRange);

-void radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state,
-						 struct radv_cmd_buffer *cmd_buffer);
+void radv_meta_save_graphics_reset_vport_scissor_novertex(struct radv_meta_saved_state *saved_state,
+							  struct radv_cmd_buffer *cmd_buffer);

 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     struct radv_image *src_image,
@@ -211,9 +212,33 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     uint32_t region_count,
 				     const VkImageResolve *regions);

+void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
+				      struct radv_image *src_image,
+				      VkImageLayout src_image_layout,
+				      struct radv_image *dest_image,
+				      VkImageLayout dest_image_layout,
+				      uint32_t region_count,
+				      const VkImageResolve *regions);
+
 void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
 			       struct radv_image *image,
 			       struct radv_image *linear_image);
+
+/* common nir builder helpers */
+#include "nir/nir_builder.h"
+
+nir_ssa_def *radv_meta_gen_rect_vertices(nir_builder *vs_b);
+nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2);
+nir_shader *radv_meta_build_nir_vs_generate_vertices(void);
+nir_shader *radv_meta_build_nir_fs_noop(void);
+
+void radv_meta_build_resolve_shader_core(nir_builder *b,
+					 bool is_integer,
+					 bool is_srgb,
+					 int samples,
+					 nir_variable *input_img,
+					 nir_variable *color,
+					 nir_ssa_def *img_coord);
 #ifdef __cplusplus
 }
 #endif
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -38,25 +38,64 @@ build_nir_vertex_shader(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");

-	nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						   vec4, "a_pos");
-	pos_in->data.location = VERT_ATTRIB_GENERIC0;
 	nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 						    vec4, "gl_Position");
 	pos_out->data.location = VARYING_SLOT_POS;
-	nir_copy_var(&b, pos_out, pos_in);

-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec4, "a_tex_pos");
-	tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
 	nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 							vec4, "v_tex_pos");
 	tex_pos_out->data.location = VARYING_SLOT_VAR0;
 	tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
-	nir_copy_var(&b, tex_pos_out, tex_pos_in);

+	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+
+	nir_store_var(&b, pos_out, outvec, 0xf);
+
+	nir_intrinsic_instr *src_box = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	src_box->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	nir_intrinsic_set_base(src_box, 0);
+	nir_intrinsic_set_range(src_box, 16);
+	src_box->num_components = 4;
+	nir_ssa_dest_init(&src_box->instr, &src_box->dest, 4, 32, "src_box");
+	nir_builder_instr_insert(&b, &src_box->instr);
+
+	nir_intrinsic_instr *src0_z = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	src0_z->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	nir_intrinsic_set_base(src0_z, 16);
+	nir_intrinsic_set_range(src0_z, 4);
+	src0_z->num_components = 1;
+	nir_ssa_dest_init(&src0_z->instr, &src0_z->dest, 1, 32, "src0_z");
+	nir_builder_instr_insert(&b, &src0_z->instr);
+
+	nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_vertex_id_zero_base);
+	nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
+	nir_builder_instr_insert(&b, &vertex_id->instr);
+
+	/* vertex 0 - src0_x, src0_y, src0_z */
+	/* vertex 1 - src0_x, src1_y, src0_z*/
+	/* vertex 2 - src1_x, src0_y, src0_z */
+	/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
+	   channel 1 is vertex id != 1 ? src_y : src_y + w */
+
+	nir_ssa_def *c0cmp = nir_ine(&b, &vertex_id->dest.ssa,
+				     nir_imm_int(&b, 2));
+	nir_ssa_def *c1cmp = nir_ine(&b, &vertex_id->dest.ssa,
+				     nir_imm_int(&b, 1));
+
+	nir_ssa_def *comp[4];
+	comp[0] = nir_bcsel(&b, c0cmp,
+			    nir_channel(&b, &src_box->dest.ssa, 0),
+			    nir_channel(&b, &src_box->dest.ssa, 2));
+
+	comp[1] = nir_bcsel(&b, c1cmp,
+			    nir_channel(&b, &src_box->dest.ssa, 1),
+			    nir_channel(&b, &src_box->dest.ssa, 3));
+	comp[2] = &src0_z->dest.ssa;
+	comp[3] = nir_imm_float(&b, 1.0);
+	nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
+	nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
 	return b.shader;
 }

@@ -70,7 +109,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_fs.%d", tex_dim);
-	b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info.name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -124,7 +163,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_depth_fs.%d", tex_dim);
-	b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info.name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -178,7 +217,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);

 	sprintf(shader_name, "meta_blit_stencil_fs.%d", tex_dim);
-	b.shader->info->name = ralloc_strdup(b.shader, shader_name);
+	b.shader->info.name = ralloc_strdup(b.shader, shader_name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec4, "v_tex_pos");
@@ -236,65 +275,21 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               VkFilter blit_filter)
 {
 	struct radv_device *device = cmd_buffer->device;
-	unsigned offset = 0;
-	struct blit_vb_data {
-		float pos[2];
-		float tex_coord[3];
-	} vb_data[3];

-	assert(src_image->samples == dest_image->samples);
-	unsigned vb_size = 3 * sizeof(*vb_data);
-	vb_data[0] = (struct blit_vb_data) {
-		.pos = {
-			-1.0,
-			-1.0,
-		},
-		.tex_coord = {
-			(float)src_offset_0.x / (float)src_iview->extent.width,
-			(float)src_offset_0.y / (float)src_iview->extent.height,
-			(float)src_offset_0.z / (float)src_iview->extent.depth,
-		},
+	assert(src_image->info.samples == dest_image->info.samples);
+
+	float vertex_push_constants[5] = {
+		(float)src_offset_0.x / (float)src_iview->extent.width,
+		(float)src_offset_0.y / (float)src_iview->extent.height,
+		(float)src_offset_1.x / (float)src_iview->extent.width,
+		(float)src_offset_1.y / (float)src_iview->extent.height,
+		(float)src_offset_0.z / (float)src_iview->extent.depth,
 	};

-	vb_data[1] = (struct blit_vb_data) {
-		.pos = {
-			-1.0,
-			1.0,
-		},
-		.tex_coord = {
-			(float)src_offset_0.x / (float)src_iview->extent.width,
-			(float)src_offset_1.y / (float)src_iview->extent.height,
-			(float)src_offset_0.z / (float)src_iview->extent.depth,
-		},
-	};
-
-	vb_data[2] = (struct blit_vb_data) {
-		.pos = {
-			1.0,
-			-1.0,
-		},
-		.tex_coord = {
-			(float)src_offset_1.x / (float)src_iview->extent.width,
-			(float)src_offset_0.y / (float)src_iview->extent.height,
-			(float)src_offset_0.z / (float)src_iview->extent.depth,
-		},
-	};
-	radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
-
-	struct radv_buffer vertex_buffer = {
-		.device = device,
-		.size = vb_size,
-		.bo = cmd_buffer->upload.upload_bo,
-		.offset = offset,
-	};
-
-	radv_CmdBindVertexBuffers(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
-				  (VkBuffer[]) {
-						  radv_buffer_to_handle(&vertex_buffer)
-						  },
-				  (VkDeviceSize[]) {
-					  0,
-						  });
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.blit.pipeline_layout,
+			      VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
+			      vertex_push_constants);

 	VkSampler sampler;
 	radv_CreateSampler(radv_device_to_handle(device),
@@ -509,10 +504,10 @@ void radv_CmdBlitImage(
 	 *    vkCmdBlitImage must not be used for multisampled source or
 	 *    destination images. Use vkCmdResolveImage for this purpose.
 	 */
-	assert(src_image->samples == 1);
-	assert(dest_image->samples == 1);
+	assert(src_image->info.samples == 1);
+	assert(dest_image->info.samples == 1);

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {
 		const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
@@ -531,8 +526,7 @@ void radv_CmdBlitImage(
 						     .baseArrayLayer = src_res->baseArrayLayer,
 						     .layerCount = 1
 					     },
-						     },
-				     cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
+				     });

 		unsigned dst_start, dst_end;
 		if (dest_image->type == VK_IMAGE_TYPE_3D) {
@@ -580,12 +574,6 @@ void radv_CmdBlitImage(
 		dest_box.extent.height = abs(dst_y1 - dst_y0);

 		struct radv_image_view dest_iview;
-		unsigned usage;
-		if (dst_res->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
-			usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
-		else
-			usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
-
 		const unsigned num_layers = dst_end - dst_start;
 		for (unsigned i = 0; i < num_layers; i++) {
 			const VkOffset3D dest_offset_0 = {
@@ -625,8 +613,7 @@ void radv_CmdBlitImage(
 							     .baseArrayLayer = dest_array_slice,
 							     .layerCount = 1
 						     },
-					     },
-					     cmd_buffer, usage);
+					     });
 			meta_emit_blit(cmd_buffer,
 				       src_image, &src_iview,
 				       src_offset_0, src_offset_1,
@@ -765,31 +752,8 @@ radv_device_init_meta_blit_color(struct radv_device *device,

 		VkPipelineVertexInputStateCreateInfo vi_create_info = {
 			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-			.vertexBindingDescriptionCount = 1,
-			.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-				{
-					.binding = 0,
-					.stride = 5 * sizeof(float),
-					.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-				},
-			},
-			.vertexAttributeDescriptionCount = 2,
-			.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-				{
-					/* Position */
-					.location = 0,
-					.binding = 0,
-					.format = VK_FORMAT_R32G32_SFLOAT,
-					.offset = 0
-				},
-				{
-					/* Texture Coordinate */
-					.location = 1,
-					.binding = 0,
-					.format = VK_FORMAT_R32G32B32_SFLOAT,
-					.offset = 8
-				}
-			}
+			.vertexBindingDescriptionCount = 0,
+			.vertexAttributeDescriptionCount = 0,
 		};

 		VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -944,31 +908,8 @@ radv_device_init_meta_blit_depth(struct radv_device *device,

 	VkPipelineVertexInputStateCreateInfo vi_create_info = {
 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 1,
-		.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-			{
-				.binding = 0,
-				.stride = 5 * sizeof(float),
-				.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-			},
-		},
-		.vertexAttributeDescriptionCount = 2,
-		.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-			{
-				/* Position */
-				.location = 0,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32_SFLOAT,
-				.offset = 0
-			},
-			{
-				/* Texture Coordinate */
-				.location = 1,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32B32_SFLOAT,
-				.offset = 8
-			}
-		}
+		.vertexBindingDescriptionCount = 0,
+		.vertexAttributeDescriptionCount = 0,
 	};

 	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -1125,31 +1066,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,

 	VkPipelineVertexInputStateCreateInfo vi_create_info = {
 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 1,
-		.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-			{
-				.binding = 0,
-				.stride = 5 * sizeof(float),
-				.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-			},
-		},
-		.vertexAttributeDescriptionCount = 2,
-		.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-			{
-				/* Position */
-				.location = 0,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32_SFLOAT,
-				.offset = 0
-			},
-			{
-				/* Texture Coordinate */
-				.location = 1,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32B32_SFLOAT,
-				.offset = 8
-			}
-		}
+		.vertexBindingDescriptionCount = 0,
+		.vertexAttributeDescriptionCount = 0,
 	};

 	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -1308,11 +1226,15 @@ radv_device_init_meta_blit_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

+	const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, 20};
+
 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
 					   &(VkPipelineLayoutCreateInfo) {
 						   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 							   .setLayoutCount = 1,
 							   .pSetLayouts = &device->meta_state.blit.ds_layout,
+							   .pushConstantRangeCount = 1,
+							   .pPushConstantRanges = &push_constant_range,
 							   },
 					   &device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
 	if (result != VK_SUCCESS)
@@ -1329,12 +1251,10 @@ radv_device_init_meta_blit_state(struct radv_device *device)
 		goto fail;

 	result = radv_device_init_meta_blit_stencil(device, &vs);
-	if (result != VK_SUCCESS)
-		goto fail;
-	return VK_SUCCESS;

 fail:
 	ralloc_free(vs.nir);
-	radv_device_finish_meta_blit_state(device);
+	if (result != VK_SUCCESS)
+		radv_device_finish_meta_blit_state(device);
 	return result;
 }
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -53,7 +53,6 @@ enum blit2d_src_type {
 static void
 create_iview(struct radv_cmd_buffer *cmd_buffer,
             struct radv_meta_blit2d_surf *surf,
-             VkImageUsageFlags usage,
             struct radv_image_view *iview, VkFormat depth_format)
 {
 	VkFormat format;
@@ -76,7 +75,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
 					     .baseArrayLayer = surf->layer,
 					     .layerCount = 1
 				     },
-					     }, cmd_buffer, usage);
+			     });
 }

 static void
@@ -136,11 +135,10 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,

 		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
 				      device->meta_state.blit2d.p_layouts[src_type],
-				      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
+				      VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
 				      &src_buf->pitch);
 	} else {
-		create_iview(cmd_buffer, src_img, VK_IMAGE_USAGE_SAMPLED_BIT, &tmp->iview,
-			     depth_format);
+		create_iview(cmd_buffer, src_img, &tmp->iview, depth_format);

 		radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
 					      device->meta_state.blit2d.p_layouts[src_type],
@@ -179,15 +177,7 @@ blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
 		VkFormat depth_format,
                struct blit2d_dst_temps *tmp)
 {
-	VkImageUsageFlagBits bits;
-
-	if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT)
-		bits = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
-	else
-		bits = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
-
-	create_iview(cmd_buffer, dst, bits,
-		     &tmp->iview, depth_format);
+	create_iview(cmd_buffer, dst, &tmp->iview, depth_format);

 	radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
 			       &(VkFramebufferCreateInfo) {
@@ -268,69 +258,21 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 		struct blit2d_src_temps src_temps;
 		blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);

-		uint32_t offset = 0;
 		struct blit2d_dst_temps dst_temps;
 		blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
 				rects[r].dst_y + rects[r].height, depth_format, &dst_temps);

-		struct blit_vb_data {
-			float pos[2];
-			float tex_coord[2];
-		} vb_data[3];
-
-		unsigned vb_size = 3 * sizeof(*vb_data);
-
-		vb_data[0] = (struct blit_vb_data) {
-			.pos = {
-				-1.0,
-				-1.0,
-			},
-			.tex_coord = {
-				rects[r].src_x,
-				rects[r].src_y,
-			},
+		float vertex_push_constants[4] = {
+			rects[r].src_x,
+			rects[r].src_y,
+			rects[r].src_x + rects[r].width,
+			rects[r].src_y + rects[r].height,
 		};

-		vb_data[1] = (struct blit_vb_data) {
-			.pos = {
-				-1.0,
-				1.0,
-			},
-			.tex_coord = {
-				rects[r].src_x,
-				rects[r].src_y + rects[r].height,
-			},
-		};
-
-		vb_data[2] = (struct blit_vb_data) {
-			.pos = {
-				1.0,
-				-1.0,
-			},
-			.tex_coord = {
-				rects[r].src_x + rects[r].width,
-				rects[r].src_y,
-			},
-		};
-
-
-		radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
-
-		struct radv_buffer vertex_buffer = {
-			.device = device,
-			.size = vb_size,
-			.bo = cmd_buffer->upload.upload_bo,
-			.offset = offset,
-		};
-
-		radv_CmdBindVertexBuffers(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
-					  (VkBuffer[]) {
-						  radv_buffer_to_handle(&vertex_buffer),
-							  },
-					  (VkDeviceSize[]) {
-						  0,
-							  });
-
+		radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+				      device->meta_state.blit2d.p_layouts[src_type],
+				      VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
+				      vertex_push_constants);

 		if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
 			unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
@@ -433,25 +375,53 @@ build_nir_vertex_shader(void)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_vs");

-	nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						   vec4, "a_pos");
-	pos_in->data.location = VERT_ATTRIB_GENERIC0;
 	nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 						    vec4, "gl_Position");
 	pos_out->data.location = VARYING_SLOT_POS;
-	nir_copy_var(&b, pos_out, pos_in);

-	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
-						       vec2, "a_tex_pos");
-	tex_pos_in->data.location = VERT_ATTRIB_GENERIC1;
 	nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
 							vec2, "v_tex_pos");
 	tex_pos_out->data.location = VARYING_SLOT_VAR0;
 	tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
-	nir_copy_var(&b, tex_pos_out, tex_pos_in);

+	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+	nir_store_var(&b, pos_out, outvec, 0xf);
+
+	nir_intrinsic_instr *src_box = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	src_box->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	nir_intrinsic_set_base(src_box, 0);
+	nir_intrinsic_set_range(src_box, 16);
+	src_box->num_components = 4;
+	nir_ssa_dest_init(&src_box->instr, &src_box->dest, 4, 32, "src_box");
+	nir_builder_instr_insert(&b, &src_box->instr);
+
+	nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_vertex_id_zero_base);
+	nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
+	nir_builder_instr_insert(&b, &vertex_id->instr);
+
+	/* vertex 0 - src_x, src_y */
+	/* vertex 1 - src_x, src_y+h */
+	/* vertex 2 - src_x+w, src_y */
+	/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
+	   channel 1 is vertex id != 1 ? src_y : src_y + w */
+
+	nir_ssa_def *c0cmp = nir_ine(&b, &vertex_id->dest.ssa,
+				     nir_imm_int(&b, 2));
+	nir_ssa_def *c1cmp = nir_ine(&b, &vertex_id->dest.ssa,
+				     nir_imm_int(&b, 1));
+
+	nir_ssa_def *comp[2];
+	comp[0] = nir_bcsel(&b, c0cmp,
+			    nir_channel(&b, &src_box->dest.ssa, 0),
+			    nir_channel(&b, &src_box->dest.ssa, 2));
+
+	comp[1] = nir_bcsel(&b, c1cmp,
+			    nir_channel(&b, &src_box->dest.ssa, 1),
+			    nir_channel(&b, &src_box->dest.ssa, 3));
+	nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
+	nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
 	return b.shader;
 }

@@ -502,6 +472,8 @@ build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
 	sampler->data.binding = 0;

 	nir_intrinsic_instr *width = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(width, 16);
+	nir_intrinsic_set_range(width, 4);
 	width->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
 	width->num_components = 1;
 	nir_ssa_dest_init(&width->instr, &width->dest, 1, 32, "width");
@@ -532,31 +504,8 @@ build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,

 static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
 	.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-	.vertexBindingDescriptionCount = 1,
-	.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-		{
-			.binding = 0,
-			.stride = 4 * sizeof(float),
-			.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-		},
-	},
-	.vertexAttributeDescriptionCount = 2,
-	.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-		{
-			/* Position */
-			.location = 0,
-			.binding = 0,
-			.format = VK_FORMAT_R32G32_SFLOAT,
-			.offset = 0
-		},
-		{
-			/* Texture Coordinate */
-			.location = 1,
-			.binding = 0,
-			.format = VK_FORMAT_R32G32_SFLOAT,
-			.offset = 8
-		},
-	},
+	.vertexBindingDescriptionCount = 0,
+	.vertexAttributeDescriptionCount = 0,
 };

 static nir_shader *
@@ -568,7 +517,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, name);
+	b.shader->info.name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
@@ -597,7 +546,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, name);
+	b.shader->info.name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
@@ -626,7 +575,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, name);
+	b.shader->info.name = ralloc_strdup(b.shader, name);

 	nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
 						       vec2, "v_tex_pos");
@@ -754,8 +703,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
 						       .format = format,
 						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
 						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+						       .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+						       .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
 						       },
 					       .subpassCount = 1,
 					       .pSubpasses = &(VkSubpassDescription) {
@@ -764,12 +713,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
 						       .colorAttachmentCount = 1,
 						       .pColorAttachments = &(VkAttachmentReference) {
 							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+							       .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
 							},
 					       .pResolveAttachments = NULL,
 					       .pDepthStencilAttachment = &(VkAttachmentReference) {
 						       .attachment = VK_ATTACHMENT_UNUSED,
-						       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						       .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
 					       },
 					       .preserveAttachmentCount = 1,
 					       .pPreserveAttachments = (uint32_t[]) { 0 },
@@ -912,8 +861,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 							       .format = 0,
 							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
 							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-							       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+							       .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+							       .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
 						       },
 						       .subpassCount = 1,
 						       .pSubpasses = &(VkSubpassDescription) {
@@ -924,7 +873,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 						       .pResolveAttachments = NULL,
 						       .pDepthStencilAttachment = &(VkAttachmentReference) {
 							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+							       .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
 						       },
 						       .preserveAttachmentCount = 1,
 						       .pPreserveAttachments = (uint32_t[]) { 0 },
@@ -1067,8 +1016,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 							       .format = 0,
 							       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
 							       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
-							       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
-							       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+							       .initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+							       .finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
 						       },
 						       .subpassCount = 1,
 						       .pSubpasses = &(VkSubpassDescription) {
@@ -1079,7 +1028,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 						       .pResolveAttachments = NULL,
 						       .pDepthStencilAttachment = &(VkAttachmentReference) {
 							       .attachment = 0,
-							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+							       .layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
 						       },
 						       .preserveAttachmentCount = 1,
 						       .pPreserveAttachments = (uint32_t[]) { 0 },
@@ -1201,6 +1150,10 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)

 	zero(device->meta_state.blit2d);

+	const VkPushConstantRange push_constant_ranges[] = {
+		{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
+		{VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
+	};
 	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
 						&(VkDescriptorSetLayoutCreateInfo) {
 							.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -1224,6 +1177,8 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
 						   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 							   .setLayoutCount = 1,
 							   .pSetLayouts = &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_IMAGE],
+							   .pushConstantRangeCount = 1,
+							   .pPushConstantRanges = push_constant_ranges,
 							   },
 					   &device->meta_state.alloc, &device->meta_state.blit2d.p_layouts[BLIT2D_SRC_TYPE_IMAGE]);
 	if (result != VK_SUCCESS)
@@ -1247,14 +1202,14 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

-	const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4};
+
 	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
 					   &(VkPipelineLayoutCreateInfo) {
 						   .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
 						   .setLayoutCount = 1,
 						   .pSetLayouts = &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_BUFFER],
-						   .pushConstantRangeCount = 1,
-						   .pPushConstantRanges = &push_constant_range,
+						   .pushConstantRangeCount = 2,
+						   .pPushConstantRanges = push_constant_ranges,
 					   },
 					   &device->meta_state.alloc, &device->meta_state.blit2d.p_layouts[BLIT2D_SRC_TYPE_BUFFER]);
 	if (result != VK_SUCCESS)
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill");
-	b.shader->info->cs.local_size[0] = 64;
-	b.shader->info->cs.local_size[1] = 1;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
+	b.shader->info.cs.local_size[0] = 64;
+	b.shader->info.cs.local_size[1] = 1;
+	b.shader->info.cs.local_size[2] = 1;

 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info->cs.local_size[0],
-						b.shader->info->cs.local_size[1],
-						b.shader->info->cs.local_size[2], 0);
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

@@ -36,6 +36,8 @@ build_buffer_fill_shader(struct radv_device *dev)
 	nir_builder_instr_insert(&b, &dst_buf->instr);

 	nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(load, 0);
+	nir_intrinsic_set_range(load, 4);
 	load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	load->num_components = 1;
 	nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
@@ -60,17 +62,17 @@ build_buffer_copy_shader(struct radv_device *dev)
 	nir_builder b;

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy");
-	b.shader->info->cs.local_size[0] = 64;
-	b.shader->info->cs.local_size[1] = 1;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
+	b.shader->info.cs.local_size[0] = 64;
+	b.shader->info.cs.local_size[1] = 1;
+	b.shader->info.cs.local_size[2] = 1;

 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info->cs.local_size[0],
-						b.shader->info->cs.local_size[1],
-						b.shader->info->cs.local_size[2], 0);
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
@@ -42,10 +42,10 @@ build_nir_itob_compute_shader(struct radv_device *dev)
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_itob_cs");
-	b.shader->info->cs.local_size[0] = 16;
-	b.shader->info->cs.local_size[1] = 16;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs");
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
 						      sampler_type, "s_tex");
 	input_img->data.descriptor_set = 0;
@@ -59,21 +59,25 @@ build_nir_itob_compute_shader(struct radv_device *dev)
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info->cs.local_size[0],
-						b.shader->info->cs.local_size[1],
-						b.shader->info->cs.local_size[2], 0);
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);



 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(offset, 0);
+	nir_intrinsic_set_range(offset, 12);
 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	offset->num_components = 2;
 	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
 	nir_builder_instr_insert(&b, &offset->instr);

 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(stride, 0);
+	nir_intrinsic_set_range(stride, 12);
 	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
 	stride->num_components = 1;
 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
@@ -240,10 +244,10 @@ build_nir_btoi_compute_shader(struct radv_device *dev)
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_btoi_cs");
-	b.shader->info->cs.local_size[0] = 16;
-	b.shader->info->cs.local_size[1] = 16;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_cs");
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
 						      buf_type, "s_tex");
 	input_img->data.descriptor_set = 0;
@@ -257,19 +261,23 @@ build_nir_btoi_compute_shader(struct radv_device *dev)
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info->cs.local_size[0],
-						b.shader->info->cs.local_size[1],
-						b.shader->info->cs.local_size[2], 0);
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

 	nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(offset, 0);
+	nir_intrinsic_set_range(offset, 12);
 	offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	offset->num_components = 2;
 	nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
 	nir_builder_instr_insert(&b, &offset->instr);

 	nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(stride, 0);
+	nir_intrinsic_set_range(stride, 12);
 	stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
 	stride->num_components = 1;
 	nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
@@ -436,10 +444,10 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_itoi_cs");
-	b.shader->info->cs.local_size[0] = 16;
-	b.shader->info->cs.local_size[1] = 16;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs");
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;
 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
 						      buf_type, "s_tex");
 	input_img->data.descriptor_set = 0;
@@ -453,19 +461,23 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info->cs.local_size[0],
-						b.shader->info->cs.local_size[1],
-						b.shader->info->cs.local_size[2], 0);
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

 	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(src_offset, 0);
+	nir_intrinsic_set_range(src_offset, 16);
 	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	src_offset->num_components = 2;
 	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
 	nir_builder_instr_insert(&b, &src_offset->instr);

 	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(dst_offset, 0);
+	nir_intrinsic_set_range(dst_offset, 16);
 	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
 	dst_offset->num_components = 2;
 	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
@@ -622,10 +634,10 @@ build_nir_cleari_compute_shader(struct radv_device *dev)
 							     false,
 							     GLSL_TYPE_FLOAT);
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_cleari_cs");
-	b.shader->info->cs.local_size[0] = 16;
-	b.shader->info->cs.local_size[1] = 16;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_cs");
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;

 	nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
 						       img_type, "out_img");
@@ -635,13 +647,15 @@ build_nir_cleari_compute_shader(struct radv_device *dev)
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info->cs.local_size[0],
-						b.shader->info->cs.local_size[1],
-						b.shader->info->cs.local_size[2], 0);
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

 	nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(clear_val, 0);
+	nir_intrinsic_set_range(clear_val, 16);
 	clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	clear_val->num_components = 4;
 	nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
@@ -845,7 +859,6 @@ radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
 static void
 create_iview(struct radv_cmd_buffer *cmd_buffer,
             struct radv_meta_blit2d_surf *surf,
-             VkImageUsageFlags usage,
             struct radv_image_view *iview)
 {

@@ -862,7 +875,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
 					     .baseArrayLayer = surf->layer,
 					     .layerCount = 1
 				     },
-					     }, cmd_buffer, usage);
+			     });
 }

 static void
@@ -948,7 +961,7 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_device *device = cmd_buffer->device;
 	struct itob_temps temps;

-	create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview);
+	create_iview(cmd_buffer, src, &temps.src_iview);
 	create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &temps.dst_bview);
 	itob_bind_descriptors(cmd_buffer, &temps);

@@ -1034,7 +1047,7 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	struct btoi_temps temps;

 	create_bview(cmd_buffer, src->buffer, src->offset, src->format, &temps.src_bview);
-	create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview);
+	create_iview(cmd_buffer, dst, &temps.dst_iview);
 	btoi_bind_descriptors(cmd_buffer, &temps);

 	btoi_bind_pipeline(cmd_buffer);
@@ -1124,8 +1137,8 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_device *device = cmd_buffer->device;
 	struct itoi_temps temps;

-	create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview);
-	create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview);
+	create_iview(cmd_buffer, src, &temps.src_iview);
+	create_iview(cmd_buffer, dst, &temps.dst_iview);

 	itoi_bind_descriptors(cmd_buffer, &temps);

@@ -1196,7 +1209,7 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_device *device = cmd_buffer->device;
 	struct radv_image_view dst_iview;

-	create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &dst_iview);
+	create_iview(cmd_buffer, dst, &dst_iview);
 	cleari_bind_descriptors(cmd_buffer, &dst_iview);

 	cleari_bind_pipeline(cmd_buffer);
@@ -1213,5 +1226,5 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
 			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
 			      push_constants);

-	radv_unaligned_dispatch(cmd_buffer, dst->image->extent.width, dst->image->extent.height, 1);
+	radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
 }
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -27,17 +27,6 @@

 #include "util/format_rgb9e5.h"
 #include "vk_format.h"
-/** Vertex attributes for color clears.  */
-struct color_clear_vattrs {
-	float position[2];
-	VkClearColorValue color;
-};
-
-/** Vertex attributes for depthstencil clears.  */
-struct depthstencil_clear_vattrs {
-	float position[2];
-	float depth_clear;
-};

 enum {
 	DEPTH_CLEAR_SLOW,
@@ -56,47 +45,34 @@ build_color_shaders(struct nir_shader **out_vs,
 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);

-	vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
-	fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
+	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
+	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");

 	const struct glsl_type *position_type = glsl_vec4_type();
 	const struct glsl_type *color_type = glsl_vec4_type();

-	nir_variable *vs_in_pos =
-		nir_variable_create(vs_b.shader, nir_var_shader_in, position_type,
-				    "a_position");
-	vs_in_pos->data.location = VERT_ATTRIB_GENERIC0;
-
 	nir_variable *vs_out_pos =
 		nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
 				    "gl_Position");
 	vs_out_pos->data.location = VARYING_SLOT_POS;

-	nir_variable *vs_in_color =
-		nir_variable_create(vs_b.shader, nir_var_shader_in, color_type,
-				    "a_color");
-	vs_in_color->data.location = VERT_ATTRIB_GENERIC1;
-
-	nir_variable *vs_out_color =
-		nir_variable_create(vs_b.shader, nir_var_shader_out, color_type,
-				    "v_color");
-	vs_out_color->data.location = VARYING_SLOT_VAR0;
-	vs_out_color->data.interpolation = INTERP_MODE_FLAT;
-
-	nir_variable *fs_in_color =
-		nir_variable_create(fs_b.shader, nir_var_shader_in, color_type,
-				    "v_color");
-	fs_in_color->data.location = vs_out_color->data.location;
-	fs_in_color->data.interpolation = vs_out_color->data.interpolation;
+	nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(in_color_load, 0);
+	nir_intrinsic_set_range(in_color_load, 16);
+	in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
+	in_color_load->num_components = 4;
+	nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 4, 32, "clear color");
+	nir_builder_instr_insert(&fs_b, &in_color_load->instr);

 	nir_variable *fs_out_color =
 		nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
 				    "f_color");
 	fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;

-	nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
-	nir_copy_var(&vs_b, vs_out_color, vs_in_color);
-	nir_copy_var(&fs_b, fs_out_color, fs_in_color);
+	nir_store_var(&fs_b, fs_out_color, &in_color_load->dest.ssa, 0xf);
+
+	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
+	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);

 	const struct glsl_type *layer_type = glsl_int_type();
 	nir_variable *vs_out_layer =
@@ -121,6 +97,7 @@ create_pipeline(struct radv_device *device,
                const VkPipelineVertexInputStateCreateInfo *vi_state,
                const VkPipelineDepthStencilStateCreateInfo *ds_state,
                const VkPipelineColorBlendStateCreateInfo *cb_state,
+		const VkPipelineLayout layout,
 		const struct radv_graphics_pipeline_create_info *extra,
                const VkAllocationCallbacks *alloc,
                struct radv_pipeline **pipeline)
@@ -200,10 +177,11 @@ create_pipeline(struct radv_device *device,
 								       VK_DYNAMIC_STATE_STENCIL_REFERENCE,
 							       },
 						       },
-													    .flags = 0,
-														     .renderPass = radv_render_pass_to_handle(render_pass),
-														     .subpass = 0,
-														     },
+						    .layout = layout,
+						    .flags = 0,
+						    .renderPass = radv_render_pass_to_handle(render_pass),
+						    .subpass = 0,
+						},
 					       extra,
 					       alloc,
 					       &pipeline_h);
@@ -269,31 +247,8 @@ create_color_pipeline(struct radv_device *device,

 	const VkPipelineVertexInputStateCreateInfo vi_state = {
 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 1,
-		.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-			{
-				.binding = 0,
-				.stride = sizeof(struct color_clear_vattrs),
-				.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-			},
-		},
-		.vertexAttributeDescriptionCount = 2,
-		.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-			{
-				/* Position */
-				.location = 0,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32_SFLOAT,
-				.offset = offsetof(struct color_clear_vattrs, position),
-			},
-			{
-				/* Color */
-				.location = 1,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32B32A32_SFLOAT,
-				.offset = offsetof(struct color_clear_vattrs, color),
-			},
-		},
+		.vertexBindingDescriptionCount = 0,
+		.vertexAttributeDescriptionCount = 0,
 	};

 	const VkPipelineDepthStencilStateCreateInfo ds_state = {
@@ -326,6 +281,7 @@ create_color_pipeline(struct radv_device *device,
 	};
 	result = create_pipeline(device, radv_render_pass_from_handle(pass),
 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
+				 device->meta_state.clear_color_p_layout,
 				 &extra, &device->meta_state.alloc, pipeline);

 	return result;
@@ -368,7 +324,12 @@ radv_device_finish_meta_clear_state(struct radv_device *device)
 		}
 		destroy_render_pass(device, state->clear[i].depthstencil_rp);
 	}
-
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->clear_color_p_layout,
+				   &state->alloc);
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->clear_depth_p_layout,
+				   &state->alloc);
 }

 static void
@@ -382,14 +343,13 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 	const uint32_t subpass_att = clear_att->colorAttachment;
 	const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
 	const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-	const uint32_t samples = iview->image->samples;
+	const uint32_t samples = iview->image->info.samples;
 	const uint32_t samples_log2 = ffs(samples) - 1;
 	unsigned fs_key = radv_format_meta_fs_key(iview->vk_format);
 	struct radv_pipeline *pipeline;
 	VkClearColorValue clear_value = clear_att->clearValue.color;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
 	VkPipeline pipeline_h;
-	uint32_t offset;

 	if (fs_key == -1) {
 		radv_finishme("color clears incomplete");
@@ -407,29 +367,10 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 	assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
 	assert(clear_att->colorAttachment < subpass->color_count);

-	const struct color_clear_vattrs vertex_data[3] = {
-		{
-			.position = {
-				-1.0,
-				-1.0,
-			},
-			.color = clear_value,
-		},
-		{
-			.position = {
-				-1.0,
-				1.0,
-			},
-			.color = clear_value,
-		},
-		{
-			.position = {
-				1.0,
-				-1.0,
-			},
-			.color = clear_value,
-		},
-	};
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.clear_color_p_layout,
+			      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
+			      &clear_value);

 	struct radv_subpass clear_subpass = {
 		.color_count = 1,
@@ -441,19 +382,6 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,

 	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass, false);

-	radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
-	struct radv_buffer vertex_buffer = {
-		.device = device,
-		.size = sizeof(vertex_data),
-		.bo = cmd_buffer->upload.upload_bo,
-		.offset = offset,
-	};
-
-
-	radv_CmdBindVertexBuffers(cmd_buffer_h, 0, 1,
-					(VkBuffer[]) { radv_buffer_to_handle(&vertex_buffer) },
-					(VkDeviceSize[]) { 0 });
-
 	if (cmd_buffer->state.pipeline != pipeline) {
 		radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
 					   pipeline_h);
@@ -484,21 +412,25 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
 	nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
 	nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);

-	vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
-	fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
-	const struct glsl_type *position_type = glsl_vec4_type();
-
-	nir_variable *vs_in_pos =
-		nir_variable_create(vs_b.shader, nir_var_shader_in, position_type,
-				    "a_position");
-	vs_in_pos->data.location = VERT_ATTRIB_GENERIC0;
+	vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
+	fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
+	const struct glsl_type *position_out_type = glsl_vec4_type();

 	nir_variable *vs_out_pos =
-		nir_variable_create(vs_b.shader, nir_var_shader_out, position_type,
+		nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type,
 				    "gl_Position");
 	vs_out_pos->data.location = VARYING_SLOT_POS;

-	nir_copy_var(&vs_b, vs_out_pos, vs_in_pos);
+	nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(vs_b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(in_color_load, 0);
+	nir_intrinsic_set_range(in_color_load, 4);
+	in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&vs_b, 0));
+	in_color_load->num_components = 1;
+	nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
+	nir_builder_instr_insert(&vs_b, &in_color_load->instr);
+
+	nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, &in_color_load->dest.ssa);
+	nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);

 	const struct glsl_type *layer_type = glsl_int_type();
 	nir_variable *vs_out_layer =
@@ -562,24 +494,8 @@ create_depthstencil_pipeline(struct radv_device *device,

 	const VkPipelineVertexInputStateCreateInfo vi_state = {
 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 1,
-		.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-			{
-				.binding = 0,
-				.stride = sizeof(struct depthstencil_clear_vattrs),
-				.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-			},
-		},
-		.vertexAttributeDescriptionCount = 1,
-		.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-			{
-				/* Position */
-				.location = 0,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32B32_SFLOAT,
-				.offset = offsetof(struct depthstencil_clear_vattrs, position),
-			},
-		},
+		.vertexBindingDescriptionCount = 0,
+		.vertexAttributeDescriptionCount = 0,
 	};

 	const VkPipelineDepthStencilStateCreateInfo ds_state = {
@@ -619,14 +535,19 @@ create_depthstencil_pipeline(struct radv_device *device,
 	}
 	result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
 				 samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
+				 device->meta_state.clear_depth_p_layout,
 				 &extra, &device->meta_state.alloc, pipeline);
 	return result;
 }

-static bool depth_view_can_fast_clear(const struct radv_image_view *iview,
+static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
+				      const struct radv_image_view *iview,
 				      VkImageLayout layout,
 				      const VkClearRect *clear_rect)
 {
+	uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
+	                                                   cmd_buffer->queue_family_index,
+	                                                   cmd_buffer->queue_family_index);
 	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
 	    clear_rect->rect.extent.width != iview->extent.width ||
 	    clear_rect->rect.extent.height != iview->extent.height)
@@ -634,14 +555,15 @@ static bool depth_view_can_fast_clear(const struct radv_image_view *iview,
 	if (iview->image->surface.htile_size &&
 	    iview->base_mip == 0 &&
 	    iview->base_layer == 0 &&
-	    radv_layout_can_expclear(iview->image, layout) &&
-	    memcmp(&iview->extent, &iview->image->extent, sizeof(iview->extent)) == 0)
+	    radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
+	    !radv_image_extent_compare(iview->image, &iview->extent))
 		return true;
 	return false;
 }

 static struct radv_pipeline *
-pick_depthstencil_pipeline(struct radv_meta_state *meta_state,
+pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
+			   struct radv_meta_state *meta_state,
 			   const struct radv_image_view *iview,
 			   int samples_log2,
 			   VkImageAspectFlags aspects,
@@ -649,7 +571,7 @@ pick_depthstencil_pipeline(struct radv_meta_state *meta_state,
 			   const VkClearRect *clear_rect,
 			   VkClearDepthStencilValue clear_value)
 {
-	bool fast = depth_view_can_fast_clear(iview, layout, clear_rect);
+	bool fast = depth_view_can_fast_clear(cmd_buffer, iview, layout, clear_rect);
 	int index = DEPTH_CLEAR_SLOW;

 	if (fast) {
@@ -682,10 +604,9 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
 	VkImageAspectFlags aspects = clear_att->aspectMask;
 	const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
-	const uint32_t samples = iview->image->samples;
+	const uint32_t samples = iview->image->info.samples;
 	const uint32_t samples_log2 = ffs(samples) - 1;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	uint32_t offset;

 	assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT ||
 	       aspects == VK_IMAGE_ASPECT_STENCIL_BIT ||
@@ -693,48 +614,21 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 			   VK_IMAGE_ASPECT_STENCIL_BIT));
 	assert(pass_att != VK_ATTACHMENT_UNUSED);

-	const struct depthstencil_clear_vattrs vertex_data[3] = {
-		{
-			.position = {
-				-1.0,
-				-1.0
-			},
-			.depth_clear = clear_value.depth,
-		},
-		{
-			.position = {
-				-1.0,
-				1.0,
-			},
-			.depth_clear = clear_value.depth,
-		},
-		{
-			.position = {
-				1.0,
-				-1.0,
-			},
-			.depth_clear = clear_value.depth,
-		},
-	};
+	if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
+		clear_value.depth = 1.0f;

-	radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
-	struct radv_buffer vertex_buffer = {
-		.device = device,
-		.size = sizeof(vertex_data),
-		.bo = cmd_buffer->upload.upload_bo,
-		.offset = offset,
-	};
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.clear_depth_p_layout,
+			      VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
+			      &clear_value.depth);

 	if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
 		radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
 						  clear_value.stencil);
 	}

-	radv_CmdBindVertexBuffers(cmd_buffer_h, 0, 1,
-					(VkBuffer[]) { radv_buffer_to_handle(&vertex_buffer) },
-					(VkDeviceSize[]) { 0 });
-
-	struct radv_pipeline *pipeline = pick_depthstencil_pipeline(meta_state,
+	struct radv_pipeline *pipeline = pick_depthstencil_pipeline(cmd_buffer,
+								    meta_state,
 								    iview,
 								    samples_log2,
 								    aspects,
@@ -746,7 +640,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 					   radv_pipeline_to_handle(pipeline));
 	}

-	if (depth_view_can_fast_clear(iview, subpass->depth_stencil_attachment.layout, clear_rect))
+	if (depth_view_can_fast_clear(cmd_buffer, iview, subpass->depth_stencil_attachment.layout, clear_rect))
 		radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);

 	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
@@ -763,6 +657,95 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 	radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);
 }

+static bool
+emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
+		      const VkClearAttachment *clear_att,
+		      const VkClearRect *clear_rect,
+		      enum radv_cmd_flush_bits *pre_flush,
+		      enum radv_cmd_flush_bits *post_flush)
+{
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
+	VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
+	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+	const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
+	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
+	VkImageAspectFlags aspects = clear_att->aspectMask;
+	uint32_t clear_word;
+
+	if (!iview->image->surface.htile_size)
+		return false;
+
+	if (cmd_buffer->device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
+		return false;
+
+	if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
+		goto fail;
+
+	/* don't fast clear 3D */
+	if (iview->image->type == VK_IMAGE_TYPE_3D)
+		goto fail;
+
+	/* all layers are bound */
+	if (iview->base_layer > 0)
+		goto fail;
+	if (iview->image->info.array_size != iview->layer_count)
+		goto fail;
+
+	if (iview->image->info.levels > 1)
+		goto fail;
+
+	if (!radv_image_extent_compare(iview->image, &iview->extent))
+		goto fail;
+
+	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
+	    clear_rect->rect.extent.width != iview->image->info.width ||
+	    clear_rect->rect.extent.height != iview->image->info.height)
+		goto fail;
+
+	if (clear_rect->baseArrayLayer != 0)
+		goto fail;
+	if (clear_rect->layerCount != iview->image->info.array_size)
+		goto fail;
+
+	/* Don't do stencil clears till we have figured out if the clear words are
+	 * correct. */
+	if (vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT)
+		goto fail;
+
+	if (clear_value.depth == 1.0)
+		clear_word = 0xfffffff0;
+	else if (clear_value.depth == 0.0)
+		clear_word = 0;
+	else
+		goto fail;
+
+	if (pre_flush) {
+		cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+						 RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) & ~ *pre_flush;
+		*pre_flush |= cmd_buffer->state.flush_bits;
+	} else
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
+		                                RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
+
+	radv_fill_buffer(cmd_buffer, iview->image->bo,
+	                 iview->image->offset + iview->image->htile_offset,
+	                 iview->image->surface.htile_size, clear_word);
+
+
+	radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);
+	if (post_flush)
+		*post_flush |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+	                       RADV_CMD_FLAG_INV_VMEM_L1 |
+	                       RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+	else
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+	                                        RADV_CMD_FLAG_INV_VMEM_L1 |
+	                                        RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
+	return true;
+fail:
+	return false;
+}

 static VkFormat pipeline_formats[] = {
 	VK_FORMAT_R8G8B8A8_UNORM,
@@ -785,6 +768,34 @@ radv_device_init_meta_clear_state(struct radv_device *device)

 	memset(&device->meta_state.clear, 0, sizeof(device->meta_state.clear));

+	VkPipelineLayoutCreateInfo pl_color_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 0,
+		.pushConstantRangeCount = 1,
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
+	};
+
+	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
+					&pl_color_create_info,
+					&device->meta_state.alloc,
+					&device->meta_state.clear_color_p_layout);
+	if (res != VK_SUCCESS)
+		goto fail;
+
+	VkPipelineLayoutCreateInfo pl_depth_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 0,
+		.pushConstantRangeCount = 1,
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
+	};
+
+	res = radv_CreatePipelineLayout(radv_device_to_handle(device),
+					&pl_depth_create_info,
+					&device->meta_state.alloc,
+					&device->meta_state.clear_depth_p_layout);
+	if (res != VK_SUCCESS)
+		goto fail;
+
 	for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
 		uint32_t samples = 1 << i;
 		for (uint32_t j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
@@ -882,26 +893,30 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
 	/* all layers are bound */
 	if (iview->base_layer > 0)
 		goto fail;
-	if (iview->image->array_size != iview->layer_count)
+	if (iview->image->info.array_size != iview->layer_count)
 		goto fail;

-	if (iview->image->levels > 1)
+	if (iview->image->info.levels > 1)
 		goto fail;

-	if (iview->image->surface.level[0].mode < RADEON_SURF_MODE_1D)
+	if (iview->image->surface.u.legacy.level[0].mode < RADEON_SURF_MODE_1D)
 		goto fail;
-
-	if (memcmp(&iview->extent, &iview->image->extent, sizeof(iview->extent)))
+	if (!radv_image_extent_compare(iview->image, &iview->extent))
 		goto fail;

 	if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
-	    clear_rect->rect.extent.width != iview->image->extent.width ||
-	    clear_rect->rect.extent.height != iview->image->extent.height)
+	    clear_rect->rect.extent.width != iview->image->info.width ||
+	    clear_rect->rect.extent.height != iview->image->info.height)
 		goto fail;

 	if (clear_rect->baseArrayLayer != 0)
 		goto fail;
-	if (clear_rect->layerCount != iview->image->array_size)
+	if (clear_rect->layerCount != iview->image->info.array_size)
+		goto fail;
+
+	/* RB+ doesn't work with CMASK fast clear on Stoney. */
+	if (!iview->image->surface.dcc_size &&
+	    cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY)
 		goto fail;

 	/* DCC */
@@ -962,7 +977,9 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
 	} else {
 		assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
 						VK_IMAGE_ASPECT_STENCIL_BIT));
-		emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect);
+		if (!emit_fast_htile_clear(cmd_buffer, clear_att, clear_rect,
+		                           pre_flush, post_flush))
+			emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect);
 	}
 }

@@ -1006,7 +1023,7 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
 	if (!subpass_needs_clear(cmd_buffer))
 		return;

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	VkClearRect clear_rect = {
 		.rect = cmd_state->render_area,
@@ -1077,8 +1094,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
 					     .baseArrayLayer = range->baseArrayLayer + layer,
 					     .layerCount = 1
 				     },
-			     },
-			     cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+			     });

 	VkFramebuffer fb;
 	radv_CreateFramebuffer(device_h,
@@ -1211,7 +1227,7 @@ radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer,
 		const VkImageSubresourceRange *range = &ranges[r];
 		for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
 			const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D ?
-				radv_minify(image->extent.depth, range->baseMipLevel + l) :
+				radv_minify(image->info.depth, range->baseMipLevel + l) :
 				radv_get_layerCount(image, range);
 			for (uint32_t s = 0; s < layer_count; ++s) {

@@ -1254,7 +1270,7 @@ void radv_CmdClearColorImage(
 	if (cs)
 		radv_meta_begin_cleari(cmd_buffer, &saved_state.compute);
 	else
-		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
+		radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);

 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
 			     (const VkClearValue *) pColor,
@@ -1278,7 +1294,7 @@ void radv_CmdClearDepthStencilImage(
 	RADV_FROM_HANDLE(radv_image, image, image_h);
 	struct radv_meta_saved_state saved_state;

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	radv_cmd_clear_image(cmd_buffer, image, imageLayout,
 			     (const VkClearValue *) pDepthStencil,
@@ -1302,7 +1318,7 @@ void radv_CmdClearAttachments(
 	if (!cmd_buffer->state.subpass)
 		return;

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	/* FINISHME: We can do better than this dumb loop. It thrashes too much
 	 * state.
--- a/src/amd/vulkan/radv_meta_copy.c
+++ b/src/amd/vulkan/radv_meta_copy.c
@@ -118,12 +118,12 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
 	/* The Vulkan 1.0 spec says "dstImage must have a sample count equal to
 	 * VK_SAMPLE_COUNT_1_BIT."
 	 */
-	assert(image->samples == 1);
+	assert(image->info.samples == 1);

 	if (cs)
 		radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute);
 	else
-		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
+		radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {

@@ -337,11 +337,11 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
 	 *    vkCmdCopyImage can be used to copy image data between multisample
 	 *    images, but both images must have the same number of samples.
 	 */
-	assert(src_image->samples == dest_image->samples);
+	assert(src_image->info.samples == dest_image->info.samples);
 	if (cs)
 		radv_meta_begin_itoi(cmd_buffer, &saved_state.compute);
 	else
-		radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
+		radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);

 	for (unsigned r = 0; r < regionCount; r++) {
 		assert(pRegions[r].srcSubresource.aspectMask ==
@@ -447,8 +447,8 @@ void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
 	image_copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
 	image_copy.dstSubresource.layerCount = 1;

-	image_copy.extent.width = image->extent.width;
-	image_copy.extent.height = image->extent.height;
+	image_copy.extent.width = image->info.width;
+	image_copy.extent.height = image->info.height;
 	image_copy.extent.depth = 1;

 	meta_copy_image(cmd_buffer, image, linear_image,
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -26,53 +26,7 @@

 #include "radv_meta.h"
 #include "radv_private.h"
-#include "nir/nir_builder.h"
 #include "sid.h"
-/**
- * Vertex attributes used by all pipelines.
- */
-struct vertex_attrs {
-	float position[2]; /**< 3DPRIM_RECTLIST */
-};
-
-/* passthrough vertex shader */
-static nir_shader *
-build_nir_vs(void)
-{
-	const struct glsl_type *vec4 = glsl_vec4_type();
-
-	nir_builder b;
-	nir_variable *a_position;
-	nir_variable *v_position;
-
-	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_depth_decomp_vs");
-
-	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-					 "a_position");
-	a_position->data.location = VERT_ATTRIB_GENERIC0;
-
-	v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-					 "gl_Position");
-	v_position->data.location = VARYING_SLOT_POS;
-
-	nir_copy_var(&b, v_position, a_position);
-
-	return b.shader;
-}
-
-/* simple passthrough shader */
-static nir_shader *
-build_nir_fs(void)
-{
-	nir_builder b;
-
-	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info->name = ralloc_asprintf(b.shader,
-					       "meta_depth_decomp_noop_fs");
-
-	return b.shader;
-}

 static VkResult
 create_pass(struct radv_device *device)
@@ -124,7 +78,7 @@ create_pipeline(struct radv_device *device,
 	VkDevice device_h = radv_device_to_handle(device);

 	struct radv_shader_module fs_module = {
-		.nir = build_nir_fs(),
+		.nir = radv_meta_build_nir_fs_noop(),
 	};

 	if (!fs_module.nir) {
@@ -152,24 +106,8 @@ create_pipeline(struct radv_device *device,
 		},
 		.pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
 			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-			.vertexBindingDescriptionCount = 1,
-			.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-				{
-					.binding = 0,
-					.stride = sizeof(struct vertex_attrs),
-					.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-				},
-			},
-			.vertexAttributeDescriptionCount = 1,
-			.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-				{
-					/* Position */
-					.location = 0,
-					.binding = 0,
-					.format = VK_FORMAT_R32G32_SFLOAT,
-					.offset = offsetof(struct vertex_attrs, position),
-				},
-			},
+			.vertexBindingDescriptionCount = 0,
+			.vertexAttributeDescriptionCount = 0,
 		},
 		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
 			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
@@ -285,7 +223,7 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)

 	zero(device->meta_state.depth_decomp);

-	struct radv_shader_module vs_module = { .nir = build_nir_vs() };
+	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
 	if (!vs_module.nir) {
 		/* XXX: Need more accurate error */
 		res = VK_ERROR_OUT_OF_HOST_MEMORY;
@@ -318,45 +256,7 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
 		  const VkExtent2D *depth_decomp_extent,
 		  VkPipeline pipeline_h)
 {
-	struct radv_device *device = cmd_buffer->device;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	uint32_t offset;
-	const struct vertex_attrs vertex_data[3] = {
-		{
-			.position = {
-				-1.0,
-				-1.0,
-			},
-		},
-		{
-			.position = {
-				-1.0,
-				1.0,
-			},
-		},
-		{
-			.position = {
-				1.0,
-				-1.0,
-			},
-		},
-	};
-
-	radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
-	struct radv_buffer vertex_buffer = {
-		.device = device,
-		.size = sizeof(vertex_data),
-		.bo = cmd_buffer->upload.upload_bo,
-		.offset = offset,
-	};
-
-	VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer);
-
-	radv_CmdBindVertexBuffers(cmd_buffer_h,
-				  /*firstBinding*/ 0,
-				  /*bindingCount*/ 1,
-				  (VkBuffer[]) { vertex_buffer_h },
-				  (VkDeviceSize[]) { 0 });

 	RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h);

@@ -392,16 +292,16 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_meta_saved_pass_state saved_pass_state;
 	VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	uint32_t width = radv_minify(image->extent.width,
+	uint32_t width = radv_minify(image->info.width,
 				     subresourceRange->baseMipLevel);
-	uint32_t height = radv_minify(image->extent.height,
+	uint32_t height = radv_minify(image->info.height,
 				     subresourceRange->baseMipLevel);

 	if (!image->surface.htile_size)
 		return;
 	radv_meta_save_pass(&saved_pass_state, cmd_buffer);

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	for (uint32_t layer = 0; layer < radv_get_layerCount(image, subresourceRange); layer++) {
 		struct radv_image_view iview;
@@ -418,8 +318,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 						     .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
 						     .layerCount = 1,
 					     },
-				     },
-				     cmd_buffer, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
+				     });


 		VkFramebuffer fb_h;
--- a/src/amd/vulkan/radv_meta_fast_clear.c
+++ b/src/amd/vulkan/radv_meta_fast_clear.c
@@ -26,53 +26,7 @@

 #include "radv_meta.h"
 #include "radv_private.h"
-#include "nir/nir_builder.h"
 #include "sid.h"
-/**
- * Vertex attributes used by all pipelines.
- */
-struct vertex_attrs {
-	float position[2]; /**< 3DPRIM_RECTLIST */
-};
-
-/* passthrough vertex shader */
-static nir_shader *
-build_nir_vs(void)
-{
-	const struct glsl_type *vec4 = glsl_vec4_type();
-
-	nir_builder b;
-	nir_variable *a_position;
-	nir_variable *v_position;
-
-	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_fast_clear_vs");
-
-	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-					 "a_position");
-	a_position->data.location = VERT_ATTRIB_GENERIC0;
-
-	v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-					 "gl_Position");
-	v_position->data.location = VARYING_SLOT_POS;
-
-	nir_copy_var(&b, v_position, a_position);
-
-	return b.shader;
-}
-
-/* simple passthrough shader */
-static nir_shader *
-build_nir_fs(void)
-{
-	nir_builder b;
-
-	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info->name = ralloc_asprintf(b.shader,
-					      "meta_fast_clear_noop_fs");
-
-	return b.shader;
-}

 static VkResult
 create_pass(struct radv_device *device)
@@ -128,7 +82,7 @@ create_pipeline(struct radv_device *device,
 	VkDevice device_h = radv_device_to_handle(device);

 	struct radv_shader_module fs_module = {
-		.nir = build_nir_fs(),
+		.nir = radv_meta_build_nir_fs_noop(),
 	};

 	if (!fs_module.nir) {
@@ -154,24 +108,8 @@ create_pipeline(struct radv_device *device,

 	const VkPipelineVertexInputStateCreateInfo vi_state = {
 		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 1,
-		.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-			{
-				.binding = 0,
-				.stride = sizeof(struct vertex_attrs),
-				.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-			},
-		},
-		.vertexAttributeDescriptionCount = 1,
-		.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-			{
-				/* Position */
-				.location = 0,
-				.binding = 0,
-				.format = VK_FORMAT_R32G32_SFLOAT,
-				.offset = offsetof(struct vertex_attrs, position),
-			},
-		}
+		.vertexBindingDescriptionCount = 0,
+		.vertexAttributeDescriptionCount = 0,
 	};

 	const VkPipelineInputAssemblyStateCreateInfo ia_state = {
@@ -330,7 +268,7 @@ radv_device_init_meta_fast_clear_flush_state(struct radv_device *device)

 	zero(device->meta_state.fast_clear_flush);

-	struct radv_shader_module vs_module = { .nir = build_nir_vs() };
+	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
 	if (!vs_module.nir) {
 		/* XXX: Need more accurate error */
 		res = VK_ERROR_OUT_OF_HOST_MEMORY;
@@ -364,43 +302,6 @@ emit_fast_clear_flush(struct radv_cmd_buffer *cmd_buffer,
 {
 	struct radv_device *device = cmd_buffer->device;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	uint32_t offset;
-	const struct vertex_attrs vertex_data[3] = {
-		{
-			.position = {
-				-1.0,
-				-1.0,
-			},
-		},
-		{
-			.position = {
-				-1.0,
-				1.0,
-			},
-		},
-		{
-			.position = {
-				1.0,
-				-1.0,
-			},
-		},
-	};
-
-	radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
-	struct radv_buffer vertex_buffer = {
-		.device = device,
-		.size = sizeof(vertex_data),
-		.bo = cmd_buffer->upload.upload_bo,
-		.offset = offset,
-	};
-
-	VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer);
-
-	radv_CmdBindVertexBuffers(cmd_buffer_h,
-				  /*firstBinding*/ 0,
-				  /*bindingCount*/ 1,
-				  (VkBuffer[]) { vertex_buffer_h },
-				  (VkDeviceSize[]) { 0 });

 	VkPipeline pipeline_h;
 	if (fmask_decompress)
@@ -448,7 +349,7 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,

 	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
 	radv_meta_save_pass(&saved_pass_state, cmd_buffer);
-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	for (uint32_t layer = 0; layer < layer_count; ++layer) {
 		struct radv_image_view iview;
@@ -466,8 +367,7 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 						     .baseArrayLayer = subresourceRange->baseArrayLayer + layer,
 						     .layerCount = 1,
 					      },
-				     },
-				     cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+				     });

 		VkFramebuffer fb_h;
 		radv_CreateFramebuffer(device_h,
@@ -477,8 +377,8 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					.pAttachments = (VkImageView[]) {
 						radv_image_view_to_handle(&iview)
 					},
-				       .width = image->extent.width,
-				       .height = image->extent.height,
+				       .width = image->info.width,
+				       .height = image->info.height,
 				       .layers = 1
 				},
 				&cmd_buffer->pool->alloc,
@@ -495,8 +395,8 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 							      0,
 						      },
 						      .extent = {
-							      image->extent.width,
-							      image->extent.height,
+							      image->info.width,
+							      image->info.height,
 						      }
 					      },
 					      .clearValueCount = 0,
@@ -505,7 +405,7 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 				     VK_SUBPASS_CONTENTS_INLINE);

 		emit_fast_clear_flush(cmd_buffer,
-				      &(VkExtent2D) { image->extent.width, image->extent.height },
+				      &(VkExtent2D) { image->info.width, image->info.height },
 				      image->fmask.size > 0);
 		radv_CmdEndRenderPass(cmd_buffer_h);

--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -28,40 +28,8 @@
 #include "radv_private.h"
 #include "nir/nir_builder.h"
 #include "sid.h"
-/**
- * Vertex attributes used by all pipelines.
- */
-struct vertex_attrs {
-	float position[2]; /**< 3DPRIM_RECTLIST */
-};

-/* passthrough vertex shader */
-static nir_shader *
-build_nir_vs(void)
-{
-	const struct glsl_type *vec4 = glsl_vec4_type();
-
-	nir_builder b;
-	nir_variable *a_position;
-	nir_variable *v_position;
-
-	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "meta_resolve_vs");
-
-	a_position = nir_variable_create(b.shader, nir_var_shader_in, vec4,
-					 "a_position");
-	a_position->data.location = VERT_ATTRIB_GENERIC0;
-
-	v_position = nir_variable_create(b.shader, nir_var_shader_out, vec4,
-					 "gl_Position");
-	v_position->data.location = VARYING_SLOT_POS;
-
-	nir_copy_var(&b, v_position, a_position);
-
-	return b.shader;
-}
-
-/* simple passthrough shader */
+/* emit 0, 0, 0, 1 */
 static nir_shader *
 build_nir_fs(void)
 {
@@ -70,7 +38,7 @@ build_nir_fs(void)
 	nir_variable *f_color; /* vec4, fragment output color */

 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
-	b.shader->info->name = ralloc_asprintf(b.shader,
+	b.shader->info.name = ralloc_asprintf(b.shader,
 					       "meta_resolve_fs");

 	f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
@@ -174,24 +142,8 @@ create_pipeline(struct radv_device *device,
 					       },
 					       .pVertexInputState = &(VkPipelineVertexInputStateCreateInfo) {
 						       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-						       .vertexBindingDescriptionCount = 1,
-						       .pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
-							       {
-								       .binding = 0,
-								       .stride = sizeof(struct vertex_attrs),
-								       .inputRate = VK_VERTEX_INPUT_RATE_VERTEX
-							       },
-						       },
-						       .vertexAttributeDescriptionCount = 1,
-						       .pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
-							       {
-								       /* Position */
-								       .location = 0,
-								       .binding = 0,
-								       .format = VK_FORMAT_R32G32_SFLOAT,
-								       .offset = offsetof(struct vertex_attrs, position),
-							       },
-						       },
+						       .vertexBindingDescriptionCount = 0,
+						       .vertexAttributeDescriptionCount = 0,
 					       },
 					       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
 						       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
@@ -288,7 +240,7 @@ radv_device_init_meta_resolve_state(struct radv_device *device)

 	zero(device->meta_state.resolve);

-	struct radv_shader_module vs_module = { .nir = build_nir_vs() };
+	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
 	if (!vs_module.nir) {
 		/* XXX: Need more accurate error */
 		res = VK_ERROR_OUT_OF_HOST_MEMORY;
@@ -322,44 +274,8 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 {
 	struct radv_device *device = cmd_buffer->device;
 	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
-	uint32_t offset;
-	const struct vertex_attrs vertex_data[3] = {
-		{
-			.position = {
-				-1.0,
-				-1.0,
-			},
-		},
-		{
-			.position = {
-				-1.0,
-				1.0,
-			},
-		},
-		{
-			.position = {
-				1.0,
-				-1.0,
-			},
-		},
-	};

 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
-	radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
-	struct radv_buffer vertex_buffer = {
-		.device = device,
-		.size = sizeof(vertex_data),
-		.bo = cmd_buffer->upload.upload_bo,
-		.offset = offset,
-	};
-
-	VkBuffer vertex_buffer_h = radv_buffer_to_handle(&vertex_buffer);
-
-	radv_CmdBindVertexBuffers(cmd_buffer_h,
-				  /*firstBinding*/ 0,
-				  /*bindingCount*/ 1,
-				  (VkBuffer[]) { vertex_buffer_h },
-				  (VkDeviceSize[]) { 0 });

 	VkPipeline pipeline_h = device->meta_state.resolve.pipeline;
 	RADV_FROM_HANDLE(radv_pipeline, pipeline, pipeline_h);
@@ -387,6 +303,25 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
 }

+enum radv_resolve_method {
+	RESOLVE_HW,
+	RESOLVE_COMPUTE,
+	RESOLVE_FRAGMENT,
+};
+
+static void radv_pick_resolve_method_images(struct radv_image *src_image,
+					    struct radv_image *dest_image,
+					    enum radv_resolve_method *method)
+
+{
+	if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
+		if (dest_image->surface.num_dcc_levels > 0)
+			*method = RESOLVE_FRAGMENT;
+		else
+			*method = RESOLVE_COMPUTE;
+	}
+}
+
 void radv_CmdResolveImage(
 	VkCommandBuffer                             cmd_buffer_h,
 	VkImage                                     src_image_h,
@@ -402,28 +337,39 @@ void radv_CmdResolveImage(
 	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_saved_state saved_state;
 	VkDevice device_h = radv_device_to_handle(device);
-	bool use_compute_resolve = false;
-
+	enum radv_resolve_method resolve_method = RESOLVE_HW;
 	/* we can use the hw resolve only for single full resolves */
 	if (region_count == 1) {
 		if (regions[0].srcOffset.x ||
 		    regions[0].srcOffset.y ||
 		    regions[0].srcOffset.z)
-			use_compute_resolve = true;
+			resolve_method = RESOLVE_COMPUTE;
 		if (regions[0].dstOffset.x ||
 		    regions[0].dstOffset.y ||
 		    regions[0].dstOffset.z)
-			use_compute_resolve = true;
+			resolve_method = RESOLVE_COMPUTE;

-		if (regions[0].extent.width != src_image->extent.width ||
-		    regions[0].extent.height != src_image->extent.height ||
-		    regions[0].extent.depth != src_image->extent.depth)
-			use_compute_resolve = true;
+		if (regions[0].extent.width != src_image->info.width ||
+		    regions[0].extent.height != src_image->info.height ||
+		    regions[0].extent.depth != src_image->info.depth)
+			resolve_method = RESOLVE_COMPUTE;
 	} else
-		use_compute_resolve = true;
+		resolve_method = RESOLVE_COMPUTE;

-	if (use_compute_resolve) {
+	radv_pick_resolve_method_images(src_image, dest_image,
+					&resolve_method);

+	if (resolve_method == RESOLVE_FRAGMENT) {
+		radv_meta_resolve_fragment_image(cmd_buffer,
+						 src_image,
+						 src_image_layout,
+						 dest_image,
+						 dest_image_layout,
+						 region_count, regions);
+		return;
+	}
+
+	if (resolve_method == RESOLVE_COMPUTE) {
 		radv_meta_resolve_compute_image(cmd_buffer,
 						src_image,
 						src_image_layout,
@@ -433,12 +379,12 @@ void radv_CmdResolveImage(
 		return;
 	}

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

-	assert(src_image->samples > 1);
-	assert(dest_image->samples == 1);
+	assert(src_image->info.samples > 1);
+	assert(dest_image->info.samples == 1);

-	if (src_image->samples >= 16) {
+	if (src_image->info.samples >= 16) {
 		/* See commit aa3f9aaf31e9056a255f9e0472ebdfdaa60abe54 for the
 		 * glBlitFramebuffer workaround for samples >= 16.
 		 */
@@ -446,7 +392,7 @@ void radv_CmdResolveImage(
 			      "samples >= 16");
 	}

-	if (src_image->array_size > 1)
+	if (src_image->info.array_size > 1)
 		radv_finishme("vkCmdResolveImage: multisample array images");

 	if (dest_image->surface.dcc_size) {
@@ -512,8 +458,7 @@ void radv_CmdResolveImage(
 							     .baseArrayLayer = src_base_layer + layer,
 							     .layerCount = 1,
 						     },
-							     },
-					     cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
+					     });

 			struct radv_image_view dest_iview;
 			radv_image_view_init(&dest_iview, cmd_buffer->device,
@@ -529,8 +474,7 @@ void radv_CmdResolveImage(
 							     .baseArrayLayer = dest_base_layer + layer,
 							     .layerCount = 1,
 						     },
-							     },
-					     cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
+					      });

 			VkFramebuffer fb_h;
 			radv_CreateFramebuffer(device_h,
@@ -541,9 +485,9 @@ void radv_CmdResolveImage(
 							       radv_image_view_to_handle(&src_iview),
 							       radv_image_view_to_handle(&dest_iview),
 						       },
-						       .width = radv_minify(dest_image->extent.width,
+						       .width = radv_minify(dest_image->info.width,
 									    region->dstSubresource.mipLevel),
-						       .height = radv_minify(dest_image->extent.height,
+						       .height = radv_minify(dest_image->info.height,
 									      region->dstSubresource.mipLevel),
 						       .layers = 1
 					       },
@@ -599,6 +543,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
 	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
 	struct radv_meta_saved_state saved_state;
+	enum radv_resolve_method resolve_method = RESOLVE_HW;

 	/* FINISHME(perf): Skip clears for resolve attachments.
 	 *
@@ -612,7 +557,27 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 	if (!subpass->has_resolve)
 		return;

-	radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
+	for (uint32_t i = 0; i < subpass->color_count; ++i) {
+		VkAttachmentReference src_att = subpass->color_attachments[i];
+		VkAttachmentReference dest_att = subpass->resolve_attachments[i];
+		struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
+		struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
+
+		radv_pick_resolve_method_images(dst_img, src_img, &resolve_method);
+		if (resolve_method == RESOLVE_FRAGMENT) {
+			break;
+		}
+	}
+
+	if (resolve_method == RESOLVE_COMPUTE) {
+		radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);
+		return;
+	} else if (resolve_method == RESOLVE_FRAGMENT) {
+		radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);
+		return;
+	}
+
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	for (uint32_t i = 0; i < subpass->color_count; ++i) {
 		VkAttachmentReference src_att = subpass->color_attachments[i];
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -32,11 +32,10 @@
 #include "vk_format.h"

 static nir_shader *
-build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int samples)
+build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
 {
 	nir_builder b;
 	char name[64];
-	nir_if *outer_if = NULL;
 	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
 								 false,
 								 false,
@@ -45,12 +44,12 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
 							     false,
 							     false,
 							     GLSL_TYPE_FLOAT);
-	snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float");
+	snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, name);
-	b.shader->info->cs.local_size[0] = 16;
-	b.shader->info->cs.local_size[1] = 16;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, name);
+	b.shader->info.cs.local_size[0] = 16;
+	b.shader->info.cs.local_size[1] = 16;
+	b.shader->info.cs.local_size[2] = 1;

 	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
 						      sampler_type, "s_tex");
@@ -64,105 +63,40 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int sampl
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-						b.shader->info->cs.local_size[0],
-						b.shader->info->cs.local_size[1],
-						b.shader->info->cs.local_size[2], 0);
+						b.shader->info.cs.local_size[0],
+						b.shader->info.cs.local_size[1],
+						b.shader->info.cs.local_size[2], 0);

 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

 	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(src_offset, 0);
+	nir_intrinsic_set_range(src_offset, 16);
 	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
 	src_offset->num_components = 2;
 	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
 	nir_builder_instr_insert(&b, &src_offset->instr);

 	nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(dst_offset, 0);
+	nir_intrinsic_set_range(dst_offset, 16);
 	dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
 	dst_offset->num_components = 2;
 	nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
 	nir_builder_instr_insert(&b, &dst_offset->instr);

 	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
-	/* do a txf_ms on each sample */
-	nir_ssa_def *tmp;
+	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

-	nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
-	tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
-	tex->op = nir_texop_txf_ms;
-	tex->src[0].src_type = nir_tex_src_coord;
-	tex->src[0].src = nir_src_for_ssa(img_coord);
-	tex->src[1].src_type = nir_tex_src_ms_index;
-	tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
-	tex->dest_type = nir_type_float;
-	tex->is_array = false;
-	tex->coord_components = 2;
-	tex->texture = nir_deref_var_create(tex, input_img);
-	tex->sampler = NULL;
+	radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb, samples,
+					    input_img, color, img_coord);

-	nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
-	nir_builder_instr_insert(&b, &tex->instr);
-
-	tmp = &tex->dest.ssa;
-	nir_variable *color =
-		nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
-
-	if (!is_integer && samples > 1) {
-		nir_tex_instr *tex_all_same = nir_tex_instr_create(b.shader, 1);
-		tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
-		tex_all_same->op = nir_texop_samples_identical;
-		tex_all_same->src[0].src_type = nir_tex_src_coord;
-		tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
-		tex_all_same->dest_type = nir_type_float;
-		tex_all_same->is_array = false;
-		tex_all_same->coord_components = 2;
-		tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img);
-		tex_all_same->sampler = NULL;
-
-		nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
-		nir_builder_instr_insert(&b, &tex_all_same->instr);
-
-		nir_ssa_def *all_same = nir_ine(&b, &tex_all_same->dest.ssa, nir_imm_int(&b, 0));
-		nir_if *if_stmt = nir_if_create(b.shader);
-		if_stmt->condition = nir_src_for_ssa(all_same);
-		nir_cf_node_insert(b.cursor, &if_stmt->cf_node);
-
-		b.cursor = nir_after_cf_list(&if_stmt->then_list);
-		for (int i = 1; i < samples; i++) {
-			nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 2);
-			tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
-			tex_add->op = nir_texop_txf_ms;
-			tex_add->src[0].src_type = nir_tex_src_coord;
-			tex_add->src[0].src = nir_src_for_ssa(img_coord);
-			tex_add->src[1].src_type = nir_tex_src_ms_index;
-			tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
-			tex_add->dest_type = nir_type_float;
-			tex_add->is_array = false;
-			tex_add->coord_components = 2;
-			tex_add->texture = nir_deref_var_create(tex_add, input_img);
-			tex_add->sampler = NULL;
-
-			nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
-			nir_builder_instr_insert(&b, &tex_add->instr);
-
-			tmp = nir_fadd(&b, tmp, &tex_add->dest.ssa);
-		}
-
-		tmp = nir_fdiv(&b, tmp, nir_imm_float(&b, samples));
-		nir_store_var(&b, color, tmp, 0xf);
-		b.cursor = nir_after_cf_list(&if_stmt->else_list);
-		outer_if = if_stmt;
-	}
-	nir_store_var(&b, color, &tex->dest.ssa, 0xf);
-
-	if (outer_if)
-		b.cursor = nir_after_cf_node(&outer_if->cf_node);
-
-	nir_ssa_def *newv = nir_load_var(&b, color);
+	nir_ssa_def *outval = nir_load_var(&b, color);
 	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
 	store->src[0] = nir_src_for_ssa(coord);
 	store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
-	store->src[2] = nir_src_for_ssa(newv);
+	store->src[2] = nir_src_for_ssa(outval);
 	store->variables[0] = nir_deref_var_create(store, output_img);
 	nir_builder_instr_insert(&b, &store->instr);
 	return b.shader;
@@ -230,12 +164,13 @@ static VkResult
 create_resolve_pipeline(struct radv_device *device,
 			int samples,
 			bool is_integer,
+			bool is_srgb,
 			VkPipeline *pipeline)
 {
 	VkResult result;
 	struct radv_shader_module cs = { .nir = NULL };

-	cs.nir = build_resolve_compute_shader(device, is_integer, samples);
+	cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples);

 	/* compute shader */

@@ -282,12 +217,15 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device)
 	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
 		uint32_t samples = 1 << i;

-		res = create_resolve_pipeline(device, samples, false,
+		res = create_resolve_pipeline(device, samples, false, false,
 					      &state->resolve_compute.rc[i].pipeline);

-		res = create_resolve_pipeline(device, samples, true,
+		res = create_resolve_pipeline(device, samples, true, false,
 					      &state->resolve_compute.rc[i].i_pipeline);

+		res = create_resolve_pipeline(device, samples, false, true,
+					      &state->resolve_compute.rc[i].srgb_pipeline);
+
 	}

 	return res;
@@ -305,6 +243,10 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
 		radv_DestroyPipeline(radv_device_to_handle(device),
 				     state->resolve_compute.rc[i].i_pipeline,
 				     &state->alloc);
+
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_compute.rc[i].srgb_pipeline,
+				     &state->alloc);
 	}

 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
@@ -315,6 +257,78 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
 				   &state->alloc);
 }

+static void
+emit_resolve(struct radv_cmd_buffer *cmd_buffer,
+	     struct radv_image_view *src_iview,
+	     struct radv_image_view *dest_iview,
+	     const VkOffset2D *src_offset,
+             const VkOffset2D *dest_offset,
+             const VkExtent2D *resolve_extent)
+{
+	struct radv_device *device = cmd_buffer->device;
+	const uint32_t samples = src_iview->image->info.samples;
+	const uint32_t samples_log2 = ffs(samples) - 1;
+	radv_meta_push_descriptor_set(cmd_buffer,
+				      VK_PIPELINE_BIND_POINT_COMPUTE,
+				      device->meta_state.resolve_compute.p_layout,
+				      0, /* set */
+				      2, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+					{
+						.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+						.dstBinding = 0,
+						.dstArrayElement = 0,
+						.descriptorCount = 1,
+						.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+			                      .pImageInfo = (VkDescriptorImageInfo[]) {
+		                              {
+	                                      .sampler = VK_NULL_HANDLE,
+					      .imageView = radv_image_view_to_handle(src_iview),
+	                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL	                              },
+	                      }
+		              },
+		              {
+		                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+		                      .dstBinding = 1,
+		                      .dstArrayElement = 0,
+				      .descriptorCount = 1,
+				      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
+	                      .pImageInfo = (VkDescriptorImageInfo[]) {
+                              {
+                                      .sampler = VK_NULL_HANDLE,
+                                     .imageView = radv_image_view_to_handle(dest_iview),
+                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+                              },
+                      }
+			      }
+				      });
+
+	VkPipeline pipeline;
+	if (vk_format_is_int(src_iview->image->vk_format))
+		pipeline = device->meta_state.resolve_compute.rc[samples_log2].i_pipeline;
+	else if (vk_format_is_srgb(src_iview->image->vk_format))
+		pipeline = device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline;
+	else
+		pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline;
+	if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
+		radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
+				     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
+	}
+
+	unsigned push_constants[4] = {
+		src_offset->x,
+		src_offset->y,
+		dest_offset->x,
+		dest_offset->y,
+	};
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.resolve_compute.p_layout,
+			      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
+			      push_constants);
+	radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
+
+}
+
 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     struct radv_image *src_image,
 				     VkImageLayout src_image_layout,
@@ -323,10 +337,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 				     uint32_t region_count,
 				     const VkImageResolve *regions)
 {
-	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_saved_compute_state saved_state;
-	const uint32_t samples = src_image->samples;
-	const uint32_t samples_log2 = ffs(samples) - 1;

 	for (uint32_t r = 0; r < region_count; ++r) {
 		const VkImageResolve *region = &regions[r];
@@ -383,8 +394,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 							     .baseArrayLayer = src_base_layer + layer,
 							     .layerCount = 1,
 						     },
-					     },
-					     cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
+					     });

 			struct radv_image_view dest_iview;
 			radv_image_view_init(&dest_iview, cmd_buffer->device,
@@ -400,68 +410,108 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 							     .baseArrayLayer = dest_base_layer + layer,
 							     .layerCount = 1,
 						     },
-							     },
-					     cmd_buffer, VK_IMAGE_USAGE_STORAGE_BIT);
+					     });

-
-			radv_meta_push_descriptor_set(cmd_buffer,
-						      VK_PIPELINE_BIND_POINT_COMPUTE,
-						      device->meta_state.resolve_compute.p_layout,
-						      0, /* set */
-						      2, /* descriptorWriteCount */
-						      (VkWriteDescriptorSet[]) {
-						              {
-						                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						                      .dstBinding = 0,
-						                      .dstArrayElement = 0,
-						                      .descriptorCount = 1,
-						                      .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
-						                      .pImageInfo = (VkDescriptorImageInfo[]) {
-						                              {
-						                                      .sampler = VK_NULL_HANDLE,
-						                                      .imageView = radv_image_view_to_handle(&src_iview),
-						                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-						                              },
-						                      }
-						              },
-						              {
-						                      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
-						                      .dstBinding = 1,
-						                      .dstArrayElement = 0,
-						                      .descriptorCount = 1,
-						                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
-						                      .pImageInfo = (VkDescriptorImageInfo[]) {
-						                              {
-						                                      .sampler = VK_NULL_HANDLE,
-						                                      .imageView = radv_image_view_to_handle(&dest_iview),
-						                                      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
-						                              },
-						                      }
-					                      }
-				                      });
-
-			VkPipeline pipeline;
-			if (vk_format_is_int(src_image->vk_format))
-				pipeline = device->meta_state.resolve_compute.rc[samples_log2].i_pipeline;
-			else
-				pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline;
-			if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
-				radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-						     VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
-			}
-
-			unsigned push_constants[4] = {
-				srcOffset.x,
-				srcOffset.y,
-				dstOffset.x,
-				dstOffset.y,
-			};
-			radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-					      device->meta_state.resolve_compute.p_layout,
-					      VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
-					      push_constants);
-			radv_unaligned_dispatch(cmd_buffer, extent.width, extent.height, 1);
+			emit_resolve(cmd_buffer,
+				     &src_iview,
+				     &dest_iview,
+				     &(VkOffset2D) {srcOffset.x, srcOffset.y },
+				     &(VkOffset2D) {dstOffset.x, dstOffset.y },
+				     &(VkExtent2D) {extent.width, extent.height });
 		}
 	}
 	radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
 }
+
+/**
+ * Emit any needed resolves for the current subpass.
+ */
+void
+radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	struct radv_meta_saved_compute_state saved_state;
+	/* FINISHME(perf): Skip clears for resolve attachments.
+	 *
+	 * From the Vulkan 1.0 spec:
+	 *
+	 *    If the first use of an attachment in a render pass is as a resolve
+	 *    attachment, then the loadOp is effectively ignored as the resolve is
+	 *    guaranteed to overwrite all pixels in the render area.
+	 */
+
+	if (!subpass->has_resolve)
+		return;
+
+	for (uint32_t i = 0; i < subpass->color_count; ++i) {
+		VkAttachmentReference src_att = subpass->color_attachments[i];
+		VkAttachmentReference dest_att = subpass->resolve_attachments[i];
+		struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
+		struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+			continue;
+		if (dst_img->surface.dcc_size) {
+			radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff);
+			cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+		}
+
+		VkImageSubresourceRange range;
+		range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+		range.baseMipLevel = 0;
+		range.levelCount = 1;
+		range.baseArrayLayer = 0;
+		range.layerCount = 1;
+		radv_fast_clear_flush_image_inplace(cmd_buffer, src_iview->image, &range);
+	}
+
+	radv_meta_save_compute(&saved_state, cmd_buffer, 16);
+
+	for (uint32_t i = 0; i < subpass->color_count; ++i) {
+		VkAttachmentReference src_att = subpass->color_attachments[i];
+		VkAttachmentReference dest_att = subpass->resolve_attachments[i];
+		struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+		struct radv_image_view *dst_iview = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+			continue;
+
+		struct radv_subpass resolve_subpass = {
+			.color_count = 1,
+			.color_attachments = (VkAttachmentReference[]) { dest_att },
+			.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
+		};
+
+		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
+
+		/* Subpass resolves must respect the render area. We can ignore the
+		 * render area here because vkCmdBeginRenderPass set the render area
+		 * with 3DSTATE_DRAWING_RECTANGLE.
+		 *
+		 * XXX(chadv): Does the hardware really respect
+		 * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
+		 */
+		emit_resolve(cmd_buffer,
+			     src_iview,
+			     dst_iview,
+			     &(VkOffset2D) { 0, 0 },
+			     &(VkOffset2D) { 0, 0 },
+			     &(VkExtent2D) { fb->width, fb->height });
+	}
+
+	radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
+
+	for (uint32_t i = 0; i < subpass->color_count; ++i) {
+		VkAttachmentReference dest_att = subpass->resolve_attachments[i];
+		struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+			continue;
+		VkImageSubresourceRange range;
+		range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+		range.baseMipLevel = 0;
+		range.levelCount = 1;
+		range.baseArrayLayer = 0;
+		range.layerCount = 1;
+		radv_fast_clear_flush_image_inplace(cmd_buffer, dst_img, &range);
+	}
+}
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -0,0 +1,666 @@
+/*
+ * Copyright © 2016 Dave Airlie
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include "radv_meta.h"
+#include "radv_private.h"
+#include "nir/nir_builder.h"
+#include "sid.h"
+#include "vk_format.h"
+
+static nir_shader *
+build_nir_vertex_shader(void)
+{
+	const struct glsl_type *vec4 = glsl_vec4_type();
+	nir_builder b;
+
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
+
+	nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
+						    vec4, "gl_Position");
+	pos_out->data.location = VARYING_SLOT_POS;
+
+	nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
+
+	nir_store_var(&b, pos_out, outvec, 0xf);
+	return b.shader;
+}
+
+static nir_shader *
+build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
+{
+	nir_builder b;
+	char name[64];
+	const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
+	const struct glsl_type *vec4 = glsl_vec4_type();
+	const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
+								 false,
+								 false,
+								 GLSL_TYPE_FLOAT);
+
+	snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, name);
+
+	nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
+						      sampler_type, "s_tex");
+	input_img->data.descriptor_set = 0;
+	input_img->data.binding = 0;
+
+	nir_variable *fs_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "fs_pos_in");
+	fs_pos_in->data.location = VARYING_SLOT_POS;
+
+	nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
+						      vec4, "f_color");
+	color_out->data.location = FRAG_RESULT_DATA0;
+
+	nir_ssa_def *pos_in = nir_load_var(&b, fs_pos_in);
+	nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(src_offset, 0);
+	nir_intrinsic_set_range(src_offset, 8);
+	src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	src_offset->num_components = 2;
+	nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
+	nir_builder_instr_insert(&b, &src_offset->instr);
+
+	nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
+
+	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3);
+	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
+
+	radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb,samples,
+					    input_img, color, img_coord);
+
+	nir_ssa_def *outval = nir_load_var(&b, color);
+	nir_store_var(&b, color_out, outval, 0xf);
+	return b.shader;
+}
+
+
+static VkResult
+create_layout(struct radv_device *device)
+{
+	VkResult result;
+	/*
+	 * one descriptors for the image being sampled
+	 */
+	VkDescriptorSetLayoutCreateInfo ds_create_info = {
+		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+		.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
+		.bindingCount = 1,
+		.pBindings = (VkDescriptorSetLayoutBinding[]) {
+			{
+				.binding = 0,
+				.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+				.descriptorCount = 1,
+				.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+				.pImmutableSamplers = NULL
+			},
+		}
+	};
+
+	result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
+						&ds_create_info,
+						&device->meta_state.alloc,
+						&device->meta_state.resolve_fragment.ds_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+
+	VkPipelineLayoutCreateInfo pl_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+		.setLayoutCount = 1,
+		.pSetLayouts = &device->meta_state.resolve_fragment.ds_layout,
+		.pushConstantRangeCount = 1,
+		.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8},
+	};
+
+	result = radv_CreatePipelineLayout(radv_device_to_handle(device),
+					  &pl_create_info,
+					  &device->meta_state.alloc,
+					  &device->meta_state.resolve_fragment.p_layout);
+	if (result != VK_SUCCESS)
+		goto fail;
+	return VK_SUCCESS;
+fail:
+	return result;
+}
+
+static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
+	.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+	.vertexBindingDescriptionCount = 0,
+	.vertexAttributeDescriptionCount = 0,
+};
+
+static VkFormat pipeline_formats[] = {
+   VK_FORMAT_R8G8B8A8_UNORM,
+   VK_FORMAT_R8G8B8A8_UINT,
+   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_R16G16B16A16_UNORM,
+   VK_FORMAT_R16G16B16A16_SNORM,
+   VK_FORMAT_R16G16B16A16_UINT,
+   VK_FORMAT_R16G16B16A16_SINT,
+   VK_FORMAT_R32_SFLOAT,
+   VK_FORMAT_R32G32_SFLOAT,
+   VK_FORMAT_R32G32B32A32_SFLOAT
+};
+
+static VkResult
+create_resolve_pipeline(struct radv_device *device,
+			int samples_log2,
+			VkFormat format)
+{
+	VkResult result;
+	bool is_integer = false, is_srgb = false;
+	uint32_t samples = 1 << samples_log2;
+	unsigned fs_key = radv_format_meta_fs_key(format);
+	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
+	vi_create_info = &normal_vi_create_info;
+	if (vk_format_is_int(format))
+		is_integer = true;
+	else if (vk_format_is_srgb(format))
+		is_srgb = true;
+
+	struct radv_shader_module fs = { .nir = NULL };
+	fs.nir = build_resolve_fragment_shader(device, is_integer, is_srgb, samples);
+	struct radv_shader_module vs = {
+		.nir = build_nir_vertex_shader(),
+	};
+
+	VkRenderPass *rp = is_srgb ?
+		&device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
+		&device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+
+	assert(!*rp);
+
+	VkPipeline *pipeline = is_srgb ?
+		&device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
+		&device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+	assert(!*pipeline);
+
+	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+		{
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_VERTEX_BIT,
+			.module = radv_shader_module_to_handle(&vs),
+			.pName = "main",
+			.pSpecializationInfo = NULL
+		}, {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+			.module = radv_shader_module_to_handle(&fs),
+			.pName = "main",
+			.pSpecializationInfo = NULL
+		},
+	};
+
+
+	result = radv_CreateRenderPass(radv_device_to_handle(device),
+				       &(VkRenderPassCreateInfo) {
+					       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+					       .attachmentCount = 1,
+					       .pAttachments = &(VkAttachmentDescription) {
+						       .format = format,
+						       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+						       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+						       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
+						       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
+					       },
+					       .subpassCount = 1,
+					       .pSubpasses = &(VkSubpassDescription) {
+						       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+						       .inputAttachmentCount = 0,
+						       .colorAttachmentCount = 1,
+						       .pColorAttachments = &(VkAttachmentReference) {
+							       .attachment = 0,
+							       .layout = VK_IMAGE_LAYOUT_GENERAL,
+						},
+					       .pResolveAttachments = NULL,
+					       .pDepthStencilAttachment = &(VkAttachmentReference) {
+						       .attachment = VK_ATTACHMENT_UNUSED,
+						       .layout = VK_IMAGE_LAYOUT_GENERAL,
+					       },
+					       .preserveAttachmentCount = 1,
+					       .pPreserveAttachments = (uint32_t[]) { 0 },
+				       },
+				       .dependencyCount = 0,
+						}, &device->meta_state.alloc, rp);
+
+
+	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
+		.pStages = pipeline_shader_stages,
+		.pVertexInputState = vi_create_info,
+		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+			.primitiveRestartEnable = false,
+		},
+		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+			.viewportCount = 1,
+			.scissorCount = 1,
+		},
+		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+			.rasterizerDiscardEnable = false,
+			.polygonMode = VK_POLYGON_MODE_FILL,
+			.cullMode = VK_CULL_MODE_NONE,
+			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+		},
+		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+			.rasterizationSamples = 1,
+			.sampleShadingEnable = false,
+			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+		},
+		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+			.attachmentCount = 1,
+			.pAttachments = (VkPipelineColorBlendAttachmentState []) {
+				{ .colorWriteMask =
+				  VK_COLOR_COMPONENT_A_BIT |
+				  VK_COLOR_COMPONENT_R_BIT |
+				  VK_COLOR_COMPONENT_G_BIT |
+				  VK_COLOR_COMPONENT_B_BIT },
+			}
+		},
+		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+			.dynamicStateCount = 9,
+			.pDynamicStates = (VkDynamicState[]) {
+				VK_DYNAMIC_STATE_VIEWPORT,
+				VK_DYNAMIC_STATE_SCISSOR,
+				VK_DYNAMIC_STATE_LINE_WIDTH,
+				VK_DYNAMIC_STATE_DEPTH_BIAS,
+				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
+				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
+				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
+				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
+			},
+		},
+		.flags = 0,
+		.layout = device->meta_state.resolve_fragment.p_layout,
+		.renderPass = *rp,
+		.subpass = 0,
+	};
+
+	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
+		.use_rectlist = true
+	};
+
+	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
+					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
+					       &vk_pipeline_info, &radv_pipeline_info,
+					       &device->meta_state.alloc,
+					       pipeline);
+
+	ralloc_free(vs.nir);
+	ralloc_free(fs.nir);
+	if (result != VK_SUCCESS)
+		goto fail;
+
+	return VK_SUCCESS;
+fail:
+	ralloc_free(vs.nir);
+	ralloc_free(fs.nir);
+	return result;
+}
+
+VkResult
+radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
+{
+	struct radv_meta_state *state = &device->meta_state;
+	VkResult res;
+	memset(&state->resolve_fragment, 0, sizeof(state->resolve_fragment));
+
+	res = create_layout(device);
+	if (res != VK_SUCCESS)
+		return res;
+
+	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+		for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
+			res = create_resolve_pipeline(device, i, pipeline_formats[j]);
+		}
+
+		res = create_resolve_pipeline(device, i, VK_FORMAT_R8G8B8A8_SRGB);
+	}
+
+	return res;
+}
+
+void
+radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
+{
+	struct radv_meta_state *state = &device->meta_state;
+	for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
+		for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+			radv_DestroyRenderPass(radv_device_to_handle(device),
+					       state->resolve_fragment.rc[i].render_pass[j],
+					       &state->alloc);
+			radv_DestroyPipeline(radv_device_to_handle(device),
+					     state->resolve_fragment.rc[i].pipeline[j],
+					     &state->alloc);
+		}
+		radv_DestroyRenderPass(radv_device_to_handle(device),
+				       state->resolve_fragment.rc[i].srgb_render_pass,
+					       &state->alloc);
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     state->resolve_fragment.rc[i].srgb_pipeline,
+				     &state->alloc);
+	}
+
+	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
+					state->resolve_fragment.ds_layout,
+					&state->alloc);
+	radv_DestroyPipelineLayout(radv_device_to_handle(device),
+				   state->resolve_fragment.p_layout,
+				   &state->alloc);
+}
+
+static void
+emit_resolve(struct radv_cmd_buffer *cmd_buffer,
+	     struct radv_image_view *src_iview,
+	     struct radv_image_view *dest_iview,
+	     const VkOffset2D *src_offset,
+             const VkOffset2D *dest_offset,
+             const VkExtent2D *resolve_extent)
+{
+	struct radv_device *device = cmd_buffer->device;
+	VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
+	const uint32_t samples = src_iview->image->info.samples;
+	const uint32_t samples_log2 = ffs(samples) - 1;
+	radv_meta_push_descriptor_set(cmd_buffer,
+				      VK_PIPELINE_BIND_POINT_GRAPHICS,
+				      cmd_buffer->device->meta_state.resolve_fragment.p_layout,
+				      0, /* set */
+				      1, /* descriptorWriteCount */
+				      (VkWriteDescriptorSet[]) {
+					      {
+						      .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+					              .dstBinding = 0,
+					              .dstArrayElement = 0,
+					              .descriptorCount = 1,
+					              .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
+						      .pImageInfo = (VkDescriptorImageInfo[]) {
+						      {
+						      .sampler = VK_NULL_HANDLE,
+						      .imageView = radv_image_view_to_handle(src_iview),
+						      .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
+						      },
+						      }
+					      },
+				      });
+
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+
+	unsigned push_constants[2] = {
+		src_offset->x,
+		src_offset->y,
+	};
+	radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
+			      device->meta_state.resolve_fragment.p_layout,
+			      VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8,
+			      push_constants);
+
+	unsigned fs_key = radv_format_meta_fs_key(dest_iview->vk_format);
+	VkPipeline pipeline_h = vk_format_is_srgb(dest_iview->vk_format) ?
+		device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
+		device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+
+	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
+			     pipeline_h);
+
+	radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
+		.x = dest_offset->x,
+		.y = dest_offset->y,
+		.width = resolve_extent->width,
+		.height = resolve_extent->height,
+		.minDepth = 0.0f,
+		.maxDepth = 1.0f
+	});
+
+	radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
+		.offset = *dest_offset,
+		.extent = *resolve_extent,
+	});
+
+	radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
+	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
+}
+
+void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
+				      struct radv_image *src_image,
+				      VkImageLayout src_image_layout,
+				      struct radv_image *dest_image,
+				      VkImageLayout dest_image_layout,
+				      uint32_t region_count,
+				      const VkImageResolve *regions)
+{
+	struct radv_device *device = cmd_buffer->device;
+	struct radv_meta_saved_state saved_state;
+	const uint32_t samples = src_image->info.samples;
+	const uint32_t samples_log2 = ffs(samples) - 1;
+	unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
+	VkRenderPass rp;
+	for (uint32_t r = 0; r < region_count; ++r) {
+		const VkImageResolve *region = &regions[r];
+		const uint32_t src_base_layer =
+			radv_meta_get_iview_layer(src_image, &region->srcSubresource,
+						  &region->srcOffset);
+		VkImageSubresourceRange range;
+		range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+		range.baseMipLevel = region->srcSubresource.mipLevel;
+		range.levelCount = 1;
+		range.baseArrayLayer = src_base_layer;
+		range.layerCount = region->srcSubresource.layerCount;
+		radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
+	}
+
+	rp = vk_format_is_srgb(dest_image->vk_format) ?
+		device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
+		device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+
+	for (uint32_t r = 0; r < region_count; ++r) {
+		const VkImageResolve *region = &regions[r];
+
+		assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+		assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
+		assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
+
+		const uint32_t src_base_layer =
+			radv_meta_get_iview_layer(src_image, &region->srcSubresource,
+						  &region->srcOffset);
+
+		const uint32_t dest_base_layer =
+			radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
+						  &region->dstOffset);
+
+		const struct VkExtent3D extent =
+			radv_sanitize_image_extent(src_image->type, region->extent);
+		const struct VkOffset3D srcOffset =
+			radv_sanitize_image_offset(src_image->type, region->srcOffset);
+		const struct VkOffset3D dstOffset =
+			radv_sanitize_image_offset(dest_image->type, region->dstOffset);
+
+		for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
+		     ++layer) {
+
+			struct radv_image_view src_iview;
+			radv_image_view_init(&src_iview, cmd_buffer->device,
+					     &(VkImageViewCreateInfo) {
+						     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+							     .image = radv_image_to_handle(src_image),
+							     .viewType = radv_meta_get_view_type(src_image),
+							     .format = src_image->vk_format,
+							     .subresourceRange = {
+							     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+							     .baseMipLevel = region->srcSubresource.mipLevel,
+							     .levelCount = 1,
+							     .baseArrayLayer = src_base_layer + layer,
+							     .layerCount = 1,
+						     },
+					     });
+
+			struct radv_image_view dest_iview;
+			radv_image_view_init(&dest_iview, cmd_buffer->device,
+					     &(VkImageViewCreateInfo) {
+						     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+							     .image = radv_image_to_handle(dest_image),
+							     .viewType = radv_meta_get_view_type(dest_image),
+							     .format = dest_image->vk_format,
+							     .subresourceRange = {
+							     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+							     .baseMipLevel = region->dstSubresource.mipLevel,
+							     .levelCount = 1,
+							     .baseArrayLayer = dest_base_layer + layer,
+							     .layerCount = 1,
+						     },
+					     });
+
+
+			VkFramebuffer fb;
+			radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
+			       &(VkFramebufferCreateInfo) {
+				       .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+					       .attachmentCount = 1,
+					       .pAttachments = (VkImageView[]) {
+					       radv_image_view_to_handle(&dest_iview),
+				       },
+				       .width = extent.width,
+				       .height = extent.height,
+				       .layers = 1
+				}, &cmd_buffer->pool->alloc, &fb);
+
+			radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
+						&(VkRenderPassBeginInfo) {
+							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+								.renderPass = rp,
+								.framebuffer = fb,
+								.renderArea = {
+								.offset = { dstOffset.x, dstOffset.y, },
+								.extent = { extent.width, extent.height },
+							},
+								.clearValueCount = 0,
+								.pClearValues = NULL,
+						}, VK_SUBPASS_CONTENTS_INLINE);
+
+
+
+			emit_resolve(cmd_buffer,
+				     &src_iview,
+				     &dest_iview,
+				     &(VkOffset2D) { srcOffset.x, srcOffset.y },
+				     &(VkOffset2D) { dstOffset.x, dstOffset.y },
+				     &(VkExtent2D) { extent.width, extent.height });
+
+			radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
+
+			radv_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device), fb, &cmd_buffer->pool->alloc);
+		}
+	}
+
+	radv_meta_restore(&saved_state, cmd_buffer);
+}
+
+
+/**
+ * Emit any needed resolves for the current subpass.
+ */
+void
+radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	struct radv_meta_saved_state saved_state;
+
+	/* FINISHME(perf): Skip clears for resolve attachments.
+	 *
+	 * From the Vulkan 1.0 spec:
+	 *
+	 *    If the first use of an attachment in a render pass is as a resolve
+	 *    attachment, then the loadOp is effectively ignored as the resolve is
+	 *    guaranteed to overwrite all pixels in the render area.
+	 */
+
+	if (!subpass->has_resolve)
+		return;
+
+	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
+
+	for (uint32_t i = 0; i < subpass->color_count; ++i) {
+		VkAttachmentReference src_att = subpass->color_attachments[i];
+		VkAttachmentReference dest_att = subpass->resolve_attachments[i];
+		struct radv_image_view *dest_iview = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
+		struct radv_image *dst_img = dest_iview->image;
+		struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
+			continue;
+
+		if (dst_img->surface.dcc_size) {
+			radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff);
+			cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
+		}
+		{
+			VkImageSubresourceRange range;
+			range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+			range.baseMipLevel = 0;
+			range.levelCount = 1;
+			range.baseArrayLayer = 0;
+			range.layerCount = 1;
+			radv_fast_clear_flush_image_inplace(cmd_buffer, src_iview->image, &range);
+		}
+
+		struct radv_subpass resolve_subpass = {
+			.color_count = 1,
+			.color_attachments = (VkAttachmentReference[]) { dest_att },
+			.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
+		};
+
+		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
+
+		/* Subpass resolves must respect the render area. We can ignore the
+		 * render area here because vkCmdBeginRenderPass set the render area
+		 * with 3DSTATE_DRAWING_RECTANGLE.
+		 *
+		 * XXX(chadv): Does the hardware really respect
+		 * 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
+		 */
+		emit_resolve(cmd_buffer,
+			     src_iview,
+			     dest_iview,
+			     &(VkOffset2D) { 0, 0 },
+			     &(VkOffset2D) { 0, 0 },
+			     &(VkExtent2D) { fb->width, fb->height });
+	}
+
+	cmd_buffer->state.subpass = subpass;
+	radv_meta_restore(&saved_state, cmd_buffer);
+}
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -26,6 +26,7 @@
 */

 #include "util/mesa-sha1.h"
+#include "util/u_atomic.h"
 #include "radv_private.h"
 #include "nir/nir.h"
 #include "nir/nir_builder.h"
@@ -35,12 +36,14 @@
 #include <llvm-c/TargetMachine.h>

 #include "sid.h"
+#include "gfx9d.h"
 #include "r600d_common.h"
 #include "ac_binary.h"
 #include "ac_llvm_util.h"
 #include "ac_nir_to_llvm.h"
 #include "vk_format.h"
 #include "util/debug.h"
+#include "ac_exp_param.h"

 void radv_shader_variant_destroy(struct radv_device *device,
                                 struct radv_shader_variant *variant);
@@ -50,6 +53,8 @@ static const struct nir_shader_compiler_options nir_options = {
 	.lower_scmp = true,
 	.lower_flrp32 = true,
 	.lower_fsat = true,
+	.lower_fdiv = true,
+	.lower_sub = true,
 	.lower_pack_snorm_2x16 = true,
 	.lower_pack_snorm_4x8 = true,
 	.lower_pack_unorm_2x16 = true,
@@ -60,6 +65,7 @@ static const struct nir_shader_compiler_options nir_options = {
 	.lower_unpack_unorm_4x8 = true,
 	.lower_extract_byte = true,
 	.lower_extract_word = true,
+	.max_unroll_iterations = 32
 };

 VkResult radv_CreateShaderModule(
@@ -151,6 +157,12 @@ radv_optimize_nir(struct nir_shader *shader)
                NIR_PASS(progress, shader, nir_copy_prop);
                NIR_PASS(progress, shader, nir_opt_remove_phis);
                NIR_PASS(progress, shader, nir_opt_dce);
+                if (nir_opt_trivial_continues(shader)) {
+                        progress = true;
+                        NIR_PASS(progress, shader, nir_copy_prop);
+                        NIR_PASS(progress, shader, nir_opt_dce);
+                }
+                NIR_PASS(progress, shader, nir_opt_if);
                NIR_PASS(progress, shader, nir_opt_dead_cf);
                NIR_PASS(progress, shader, nir_opt_cse);
                NIR_PASS(progress, shader, nir_opt_peephole_select, 8);
@@ -158,6 +170,9 @@ radv_optimize_nir(struct nir_shader *shader)
                NIR_PASS(progress, shader, nir_opt_constant_folding);
                NIR_PASS(progress, shader, nir_opt_undef);
                NIR_PASS(progress, shader, nir_opt_conditional_discard);
+                if (shader->options->max_unroll_iterations) {
+                        NIR_PASS(progress, shader, nir_opt_loop_unroll, 0);
+                }
        } while (progress);
 }

@@ -251,7 +266,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 	}

 	/* Vulkan uses the separate-shader linking model */
-	nir->info->separate_shader = true;
+	nir->info.separate_shader = true;

 	nir_shader_gather_info(nir, entry_point->impl);

@@ -360,7 +375,7 @@ static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pip
 void radv_shader_variant_destroy(struct radv_device *device,
                                 struct radv_shader_variant *variant)
 {
-	if (__sync_fetch_and_sub(&variant->ref_count, 1) != 1)
+	if (!p_atomic_dec_zero(&variant->ref_count))
 		return;

 	device->ws->buffer_destroy(variant->bo);
@@ -526,8 +541,8 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
 	bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);

 	if (module->nir)
-		_mesa_sha1_compute(module->nir->info->name,
-				   strlen(module->nir->info->name),
+		_mesa_sha1_compute(module->nir->info.name,
+				   strlen(module->nir->info.name),
 				   module->sha1);

 	radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key, 0);
@@ -591,11 +606,14 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
 }

 static union ac_shader_variant_key
-radv_compute_tes_key(bool as_es)
+radv_compute_tes_key(bool as_es, bool export_prim_id)
 {
 	union ac_shader_variant_key key;
 	memset(&key, 0, sizeof(key));
 	key.tes.as_es = as_es;
+	/* export prim id only happens when no geom shader */
+	if (!as_es)
+		key.tes.export_prim_id = export_prim_id;
 	return key;
 }

@@ -626,13 +644,15 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
 	nir_shader *tes_nir, *tcs_nir;
 	void *tes_code = NULL, *tcs_code = NULL;
 	unsigned tes_code_size = 0, tcs_code_size = 0;
-	union ac_shader_variant_key tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline));
+	union ac_shader_variant_key tes_key;
 	union ac_shader_variant_key tcs_key;
 	bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);

+	tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline),
+				       pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input);
 	if (tes_module->nir)
-		_mesa_sha1_compute(tes_module->nir->info->name,
-				   strlen(tes_module->nir->info->name),
+		_mesa_sha1_compute(tes_module->nir->info.name,
+				   strlen(tes_module->nir->info.name),
 				   tes_module->sha1);
 	radv_hash_shader(tes_sha1, tes_module, tes_entrypoint, tes_spec_info, layout, &tes_key, 0);

@@ -644,8 +664,8 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
 		tcs_key = radv_compute_tcs_key(tes_variant->info.tes.primitive_mode, input_vertices);

 		if (tcs_module->nir)
-			_mesa_sha1_compute(tcs_module->nir->info->name,
-					   strlen(tcs_module->nir->info->name),
+			_mesa_sha1_compute(tcs_module->nir->info.name,
+					   strlen(tcs_module->nir->info.name),
 					   tcs_module->sha1);

 		radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
@@ -674,16 +694,16 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
 		return;

 	nir_lower_tes_patch_vertices(tes_nir,
-				     tcs_nir->info->tess.tcs_vertices_out);
+				     tcs_nir->info.tess.tcs_vertices_out);

 	tes_variant = radv_shader_variant_create(pipeline->device, tes_nir,
 						 layout, &tes_key, &tes_code,
 						 &tes_code_size, dump);

-	tcs_key = radv_compute_tcs_key(tes_nir->info->tess.primitive_mode, input_vertices);
+	tcs_key = radv_compute_tcs_key(tes_nir->info.tess.primitive_mode, input_vertices);
 	if (tcs_module->nir)
-		_mesa_sha1_compute(tcs_module->nir->info->name,
-				   strlen(tcs_module->nir->info->name),
+		_mesa_sha1_compute(tcs_module->nir->info.name,
+				   strlen(tcs_module->nir->info.name),
 				   tcs_module->sha1);

 	radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
@@ -1318,11 +1338,12 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
 		S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
 		EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
 		EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
+	ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9);

 	if (ms->num_samples > 1) {
 		unsigned log_samples = util_logbase2(ms->num_samples);
 		unsigned log_ps_iter_samples = util_logbase2(util_next_power_of_two(ps_iter_samples));
-		ms->pa_sc_mode_cntl_0 = S_028A48_MSAA_ENABLE(1);
+		ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
 		ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* CM_R_028BDC_PA_SC_LINE_CNTL */
 		ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
 			S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
@@ -1591,7 +1612,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 }

 static union ac_shader_variant_key
-radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls)
+radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls, bool export_prim_id)
 {
 	union ac_shader_variant_key key;
 	const VkPipelineVertexInputStateCreateInfo *input_state =
@@ -1601,6 +1622,7 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es,
 	key.vs.instance_rate_inputs = 0;
 	key.vs.as_es = as_es;
 	key.vs.as_ls = as_ls;
+	key.vs.export_prim_id = export_prim_id;

 	for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
 		unsigned binding;
@@ -1842,6 +1864,24 @@ static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
 	       S_028A40_GS_WRITE_OPTIMIZE(1);
 }

+static void calculate_vgt_gs_mode(struct radv_pipeline *pipeline)
+{
+	struct radv_shader_variant *vs;
+	vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] :  pipeline->shaders[MESA_SHADER_VERTEX]);
+
+	struct ac_vs_output_info *outinfo = &vs->info.vs.outinfo;
+
+	pipeline->graphics.vgt_primitiveid_en = false;
+	pipeline->graphics.vgt_gs_mode = 0;
+
+	if (radv_pipeline_has_gs(pipeline)) {
+		pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
+	} else if (outinfo->export_prim_id) {
+		pipeline->graphics.vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
+		pipeline->graphics.vgt_primitiveid_en = true;
+	}
+}
+
 static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
 {
 	struct radv_shader_variant *vs;
@@ -1869,6 +1909,25 @@ static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
 		clip_dist_mask;

 }
+
+static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
+{
+	uint32_t ps_input_cntl;
+	if (offset <= AC_EXP_PARAM_OFFSET_31) {
+		ps_input_cntl = S_028644_OFFSET(offset);
+		if (flat_shade)
+			ps_input_cntl |= S_028644_FLAT_SHADE(1);
+	} else {
+		/* The input is a DEFAULT_VAL constant. */
+		assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
+		       offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
+		offset -= AC_EXP_PARAM_DEFAULT_VAL_0000;
+		ps_input_cntl = S_028644_OFFSET(0x20) |
+			S_028644_DEFAULT_VAL(offset);
+	}
+	return ps_input_cntl;
+}
+
 static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 {
 	struct radv_shader_variant *ps, *vs;
@@ -1880,6 +1939,23 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 	outinfo = &vs->info.vs.outinfo;

 	unsigned ps_offset = 0;
+
+	if (ps->info.fs.prim_id_input) {
+		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID];
+		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+			pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+			++ps_offset;
+		}
+	}
+
+	if (ps->info.fs.layer_input) {
+		unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_LAYER];
+		if (vs_offset != AC_EXP_PARAM_UNDEFINED) {
+			pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, true);
+			++ps_offset;
+		}
+	}
+
 	if (ps->info.fs.has_pcoord) {
 		unsigned val;
 		val = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20);
@@ -1887,52 +1963,22 @@ static void calculate_ps_inputs(struct radv_pipeline *pipeline)
 		ps_offset++;
 	}

-	if (ps->info.fs.prim_id_input && (outinfo->prim_id_output != 0xffffffff)) {
-		unsigned vs_offset, flat_shade;
-		unsigned val;
-		vs_offset = outinfo->prim_id_output;
-		flat_shade = true;
-		val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-		pipeline->graphics.ps_input_cntl[ps_offset] = val;
-		++ps_offset;
-	}
-
-	if (ps->info.fs.layer_input && (outinfo->layer_output != 0xffffffff)) {
-		unsigned vs_offset, flat_shade;
-		unsigned val;
-		vs_offset = outinfo->layer_output;
-		flat_shade = true;
-		val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-		pipeline->graphics.ps_input_cntl[ps_offset] = val;
-		++ps_offset;
-	}
-
 	for (unsigned i = 0; i < 32 && (1u << i) <= ps->info.fs.input_mask; ++i) {
-		unsigned vs_offset, flat_shade;
-		unsigned val;
-
+		unsigned vs_offset;
+		bool flat_shade;
 		if (!(ps->info.fs.input_mask & (1u << i)))
 			continue;

-		if (!(outinfo->export_mask & (1u << i))) {
+		vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i];
+		if (vs_offset == AC_EXP_PARAM_UNDEFINED) {
 			pipeline->graphics.ps_input_cntl[ps_offset] = S_028644_OFFSET(0x20);
 			++ps_offset;
 			continue;
 		}

-		vs_offset = util_bitcount(outinfo->export_mask & ((1u << i) - 1));
-		if (outinfo->prim_id_output != 0xffffffff) {
-			if (vs_offset >= outinfo->prim_id_output)
-				vs_offset++;
-		}
-		if (outinfo->layer_output != 0xffffffff) {
-			if (vs_offset >= outinfo->layer_output)
-			  vs_offset++;
-		}
 		flat_shade = !!(ps->info.fs.flat_shaded_mask & (1u << ps_offset));

-		val = S_028644_OFFSET(vs_offset) | S_028644_FLAT_SHADE(flat_shade);
-		pipeline->graphics.ps_input_cntl[ps_offset] = val;
+		pipeline->graphics.ps_input_cntl[ps_offset] = offset_to_ps_input(vs_offset, flat_shade);
 		++ps_offset;
 	}

@@ -1967,62 +2013,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,

 	radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);

-	if (modules[MESA_SHADER_VERTEX]) {
-		bool as_es = false;
-		bool as_ls = false;
-		if (modules[MESA_SHADER_TESS_CTRL])
-			as_ls = true;
-		else if (modules[MESA_SHADER_GEOMETRY])
-			as_es = true;
-		union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls);
-
-		pipeline->shaders[MESA_SHADER_VERTEX] =
-			 radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX],
-					       pStages[MESA_SHADER_VERTEX]->pName,
-					       MESA_SHADER_VERTEX,
-					       pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
-					       pipeline->layout, &key);
-
-		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
-	}
-
-	if (modules[MESA_SHADER_GEOMETRY]) {
-		union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false);
-
-		pipeline->shaders[MESA_SHADER_GEOMETRY] =
-			 radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY],
-					       pStages[MESA_SHADER_GEOMETRY]->pName,
-					       MESA_SHADER_GEOMETRY,
-					       pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo,
-					       pipeline->layout, &key);
-
-		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
-
-		pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
-	} else
-		pipeline->graphics.vgt_gs_mode = 0;
-
-	if (modules[MESA_SHADER_TESS_EVAL]) {
-		assert(modules[MESA_SHADER_TESS_CTRL]);
-
-		radv_tess_pipeline_compile(pipeline,
-					   cache,
-					   modules[MESA_SHADER_TESS_CTRL],
-					   modules[MESA_SHADER_TESS_EVAL],
-					   pStages[MESA_SHADER_TESS_CTRL]->pName,
-					   pStages[MESA_SHADER_TESS_EVAL]->pName,
-					   pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
-					   pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo,
-					   pipeline->layout,
-					   pCreateInfo->pTessellationState->patchControlPoints);
-		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_TESS_EVAL) |
-			mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL);
-	}
-
 	if (!modules[MESA_SHADER_FRAGMENT]) {
 		nir_builder fs_b;
 		nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
-		fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "noop_fs");
+		fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
 		fs_m.nir = fs_b.shader;
 		modules[MESA_SHADER_FRAGMENT] = &fs_m;
 	}
@@ -2046,6 +2040,58 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 	if (fs_m.nir)
 		ralloc_free(fs_m.nir);

+	if (modules[MESA_SHADER_VERTEX]) {
+		bool as_es = false;
+		bool as_ls = false;
+		bool export_prim_id = false;
+		if (modules[MESA_SHADER_TESS_CTRL])
+			as_ls = true;
+		else if (modules[MESA_SHADER_GEOMETRY])
+			as_es = true;
+		else if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
+			export_prim_id = true;
+		union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls, export_prim_id);
+
+		pipeline->shaders[MESA_SHADER_VERTEX] =
+			 radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX],
+					       pStages[MESA_SHADER_VERTEX]->pName,
+					       MESA_SHADER_VERTEX,
+					       pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
+					       pipeline->layout, &key);
+
+		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
+	}
+
+	if (modules[MESA_SHADER_GEOMETRY]) {
+		union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false, false);
+
+		pipeline->shaders[MESA_SHADER_GEOMETRY] =
+			 radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY],
+					       pStages[MESA_SHADER_GEOMETRY]->pName,
+					       MESA_SHADER_GEOMETRY,
+					       pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo,
+					       pipeline->layout, &key);
+
+		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
+	}
+
+	if (modules[MESA_SHADER_TESS_EVAL]) {
+		assert(modules[MESA_SHADER_TESS_CTRL]);
+
+		radv_tess_pipeline_compile(pipeline,
+					   cache,
+					   modules[MESA_SHADER_TESS_CTRL],
+					   modules[MESA_SHADER_TESS_EVAL],
+					   pStages[MESA_SHADER_TESS_CTRL]->pName,
+					   pStages[MESA_SHADER_TESS_EVAL]->pName,
+					   pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
+					   pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo,
+					   pipeline->layout,
+					   pCreateInfo->pTessellationState->patchControlPoints);
+		pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_TESS_EVAL) |
+			mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL);
+	}
+
 	radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo, extra);
 	radv_pipeline_init_raster_state(pipeline, pCreateInfo);
 	radv_pipeline_init_multisample_state(pipeline, pCreateInfo);
@@ -2109,9 +2155,16 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 		ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
 		V_028710_SPI_SHADER_ZERO;

+	calculate_vgt_gs_mode(pipeline);
 	calculate_pa_cl_vs_out_cntl(pipeline);
 	calculate_ps_inputs(pipeline);

+	for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
+		if (pipeline->shaders[i]) {
+			pipeline->need_indirect_descriptor_sets |= pipeline->shaders[i]->info.need_indirect_descriptor_sets;
+		}
+	}
+
 	uint32_t stages = 0;
 	if (radv_pipeline_has_tess(pipeline)) {
 		stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) |
@@ -2123,10 +2176,15 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 				S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 		else
 			stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
+
 	} else if (radv_pipeline_has_gs(pipeline))
 		stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
 			S_028B54_GS_EN(1) |
 			S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
+
+	if (device->physical_device->rad_info.chip_class >= GFX9)
+		stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
+
 	pipeline->graphics.vgt_shader_stages_en = stages;

 	if (radv_pipeline_has_gs(pipeline))
@@ -2174,6 +2232,16 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 		pipeline->binding_stride[desc->binding] = desc->stride;
 	}

+	struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
+							     AC_UD_VS_BASE_VERTEX_START_INSTANCE);
+	if (loc->sgpr_idx != -1) {
+		pipeline->graphics.vtx_base_sgpr = radv_shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
+		pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
+		if (pipeline->shaders[MESA_SHADER_VERTEX]->info.info.vs.needs_draw_id)
+			pipeline->graphics.vtx_emit_num = 3;
+		else
+			pipeline->graphics.vtx_emit_num = 2;
+	}
 	if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
 		radv_dump_pipeline_stats(device, pipeline);
 	}
@@ -2270,6 +2338,7 @@ static VkResult radv_compute_pipeline_create(
 				       pipeline->layout, NULL);


+	pipeline->need_indirect_descriptor_sets |= pipeline->shaders[MESA_SHADER_COMPUTE]->info.need_indirect_descriptor_sets;
 	result = radv_pipeline_scratch_init(device, pipeline);
 	if (result != VK_SUCCESS) {
 		radv_pipeline_destroy(device, pipeline, pAllocator);
--- a/src/amd/vulkan/radv_pipeline_cache.c
+++ b/src/amd/vulkan/radv_pipeline_cache.c
@@ -23,6 +23,7 @@

 #include "util/mesa-sha1.h"
 #include "util/debug.h"
+#include "util/u_atomic.h"
 #include "radv_private.h"

 #include "ac_nir_to_llvm.h"
@@ -171,6 +172,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
 		variant->info = entry->variant_info;
 		variant->rsrc1 = entry->rsrc1;
 		variant->rsrc2 = entry->rsrc2;
+		variant->code_size = entry->code_size;
 		variant->ref_count = 1;

 		variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
@@ -183,7 +185,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
 		entry->variant = variant;
 	}

-	__sync_fetch_and_add(&entry->variant->ref_count, 1);
+	p_atomic_inc(&entry->variant->ref_count);
 	return entry->variant;
 }

@@ -275,7 +277,7 @@ radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
 		} else {
 			entry->variant = variant;
 		}
-		__sync_fetch_and_add(&variant->ref_count, 1);
+		p_atomic_inc(&variant->ref_count);
 		pthread_mutex_unlock(&cache->mutex);
 		return variant;
 	}
@@ -295,7 +297,7 @@ radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
 	entry->rsrc2 = variant->rsrc2;
 	entry->code_size = code_size;
 	entry->variant = variant;
-	__sync_fetch_and_add(&variant->ref_count, 1);
+	p_atomic_inc(&variant->ref_count);

 	radv_pipeline_cache_add_entry(cache, entry);

--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -47,12 +47,14 @@
 #include "compiler/shader_enums.h"
 #include "util/macros.h"
 #include "util/list.h"
-#include "util/vk_alloc.h"
 #include "main/macros.h"
+#include "vk_alloc.h"

 #include "radv_radeon_winsys.h"
 #include "ac_binary.h"
 #include "ac_nir_to_llvm.h"
+#include "ac_gpu_info.h"
+#include "ac_surface.h"
 #include "radv_debug.h"
 #include "radv_descriptor_set.h"

@@ -266,10 +268,14 @@ struct radv_physical_device {
 	char                                        path[20];
 	const char *                                name;
 	uint8_t                                     uuid[VK_UUID_SIZE];
+	uint8_t                                     device_uuid[VK_UUID_SIZE];

 	int local_fd;
 	struct wsi_device                       wsi_device;
 	struct radv_extensions                      extensions;
+
+	bool has_rbplus; /* if RB+ register exist */
+	bool rbplus_allowed; /* if RB+ is allowed */
 };

 struct radv_instance {
@@ -282,6 +288,7 @@ struct radv_instance {
 	struct radv_physical_device                 physicalDevices[RADV_MAX_DRM_DEVICES];

 	uint64_t debug_flags;
+	uint64_t perftest_flags;
 };

 VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -343,6 +350,8 @@ struct radv_meta_state {
 		struct radv_pipeline *depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
 	} clear[1 + MAX_SAMPLES_LOG2];

+	VkPipelineLayout                          clear_color_p_layout;
+	VkPipelineLayout                          clear_depth_p_layout;
 	struct {
 		VkRenderPass render_pass[NUM_META_FS_KEYS];

@@ -415,9 +424,22 @@ struct radv_meta_state {
 		struct {
 			VkPipeline                                pipeline;
 			VkPipeline                                i_pipeline;
+			VkPipeline                                srgb_pipeline;
 		} rc[MAX_SAMPLES_LOG2];
 	} resolve_compute;

+	struct {
+		VkDescriptorSetLayout                     ds_layout;
+		VkPipelineLayout                          p_layout;
+
+		struct {
+			VkRenderPass srgb_render_pass;
+			VkPipeline   srgb_pipeline;
+			VkRenderPass render_pass[NUM_META_FS_KEYS];
+			VkPipeline   pipeline[NUM_META_FS_KEYS];
+		} rc[MAX_SAMPLES_LOG2];
+	} resolve_fragment;
+
 	struct {
 		VkPipeline                                decompress_pipeline;
 		VkPipeline                                resummarize_pipeline;
@@ -495,7 +517,7 @@ struct radv_device {
 	int queue_count[RADV_MAX_QUEUE_FAMILIES];
 	struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
 	struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
-
+	struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
 	uint64_t debug_flags;

 	bool llvm_supports_spill;
@@ -570,6 +592,10 @@ struct radv_descriptor_pool {
 	uint64_t size;

 	struct list_head vram_list;
+
+	uint8_t *host_memory_base;
+	uint8_t *host_memory_ptr;
+	uint8_t *host_memory_end;
 };

 struct radv_descriptor_update_template_entry {
@@ -585,7 +611,6 @@ struct radv_descriptor_update_template_entry {
 	uint32_t dst_stride;

 	uint32_t buffer_offset;
-	uint32_t buffer_count;

 	/* Only valid for combined image samplers and samplers */
 	uint16_t has_sampler;
@@ -726,7 +751,6 @@ struct radv_attachment_state {
 struct radv_cmd_state {
 	uint32_t                                      vb_dirty;
 	radv_cmd_dirty_mask_t                         dirty;
-	bool                                          vertex_descriptors_dirty;
 	bool                                          push_descriptors_dirty;

 	struct radv_pipeline *                        pipeline;
@@ -741,9 +765,9 @@ struct radv_cmd_state {
 	struct radv_descriptor_set *                  descriptors[MAX_SETS];
 	struct radv_attachment_state *                attachments;
 	VkRect2D                                     render_area;
-	struct radv_buffer *                         index_buffer;
 	uint32_t                                     index_type;
-	uint32_t                                     index_offset;
+	uint64_t                                     index_va;
+	uint32_t                                     max_index_count;
 	int32_t                                      last_primitive_reset_en;
 	uint32_t                                     last_primitive_reset_index;
 	enum radv_cmd_flush_bits                     flush_bits;
@@ -791,8 +815,6 @@ struct radv_cmd_buffer {

 	struct radv_cmd_buffer_upload upload;

-	bool record_fail;
-
 	uint32_t scratch_size_needed;
 	uint32_t compute_scratch_size_needed;
 	uint32_t esgs_ring_size_needed;
@@ -800,7 +822,12 @@ struct radv_cmd_buffer {
 	bool tess_rings_needed;
 	bool sample_positions_needed;

+	bool record_fail;
+
 	int ring_offsets_idx; /* just used for verification */
+	uint32_t gfx9_fence_offset;
+	struct radeon_winsys_bo *gfx9_fence_bo;
+	uint32_t gfx9_fence_idx;
 };

 struct radv_image;
@@ -820,18 +847,29 @@ void si_write_scissors(struct radeon_winsys_cs *cs, int first,
 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 				   bool instanced_draw, bool indirect_draw,
 				   uint32_t draw_vertex_count);
+void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
+				enum chip_class chip_class,
+				bool is_mec,
+				unsigned event, unsigned event_flags,
+				unsigned data_sel,
+				uint64_t va,
+				uint32_t old_fence,
+				uint32_t new_fence);
+
+void si_emit_wait_fence(struct radeon_winsys_cs *cs,
+			uint64_t va, uint32_t ref,
+			uint32_t mask);
 void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
-                            enum chip_class chip_class,
-                            bool is_mec,
-                            enum radv_cmd_flush_bits flush_bits);
-void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
-                            enum chip_class chip_class,
-                            bool is_mec,
-                            enum radv_cmd_flush_bits flush_bits);
+			    enum chip_class chip_class,
+			    uint32_t *fence_ptr, uint64_t va,
+			    bool is_mec,
+			    enum radv_cmd_flush_bits flush_bits);
 void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 			   uint64_t src_va, uint64_t dest_va,
 			   uint64_t size);
+void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
+                        unsigned size);
 void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
 			    uint64_t size, unsigned value);
 void radv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer);
@@ -856,6 +894,8 @@ void
 radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
 void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
+void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
+void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
 void radv_cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
 unsigned radv_cayman_get_maxdist(int log_samples);
 void radv_device_init_msaa(struct radv_device *device);
@@ -1007,7 +1047,7 @@ struct radv_pipeline {
 	struct radv_pipeline_layout *                 layout;

 	bool                                         needs_data_cache;
-
+	bool					     need_indirect_descriptor_sets;
 	struct radv_shader_variant *                 shaders[MESA_SHADER_STAGES];
 	struct radv_shader_variant *gs_copy_shader;
 	VkShaderStageFlags                           active_stages;
@@ -1031,6 +1071,7 @@ struct radv_pipeline {
 			unsigned prim;
 			unsigned gs_out;
 			uint32_t vgt_gs_mode;
+			bool vgt_primitiveid_en;
 			bool prim_restart_enable;
 			unsigned esgs_ring_size;
 			unsigned gsvs_ring_size;
@@ -1038,6 +1079,8 @@ struct radv_pipeline {
 			uint32_t ps_input_cntl_num;
 			uint32_t pa_cl_vs_out_cntl;
 			uint32_t vgt_shader_stages_en;
+			uint32_t vtx_base_sgpr;
+			uint8_t vtx_emit_num;
 			struct radv_prim_vertex_count prim_vertex_count;
 			bool can_use_guardband;
 		} graphics;
@@ -1057,6 +1100,11 @@ static inline bool radv_pipeline_has_tess(struct radv_pipeline *pipeline)
 	return pipeline->shaders[MESA_SHADER_TESS_EVAL] ? true : false;
 }

+uint32_t radv_shader_stage_to_user_data_0(gl_shader_stage stage, bool has_gs, bool has_tess);
+struct ac_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
+					       gl_shader_stage stage,
+					       int idx);
+
 struct radv_graphics_pipeline_create_info {
 	bool use_rectlist;
 	bool db_depth_clear;
@@ -1141,10 +1189,7 @@ struct radv_image {
 	 */
 	VkFormat vk_format;
 	VkImageAspectFlags aspects;
-	VkExtent3D extent;
-	uint32_t levels;
-	uint32_t array_size;
-	uint32_t samples; /**< VkImageCreateInfo::samples */
+	struct ac_surf_info info;
 	VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
 	VkImageTiling tiling; /** VkImageCreateInfo::tiling */
 	VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
@@ -1167,12 +1212,22 @@ struct radv_image {
 	uint32_t clear_value_offset;
 };

+/* Whether the image has a htile that is known consistent with the contents of
+ * the image. */
 bool radv_layout_has_htile(const struct radv_image *image,
-                           VkImageLayout layout);
+                           VkImageLayout layout,
+                           unsigned queue_mask);
+
+/* Whether the image has a htile  that is known consistent with the contents of
+ * the image and is allowed to be in compressed form.
+ *
+ * If this is false reads that don't use the htile should be able to return
+ * correct results.
+ */
 bool radv_layout_is_htile_compressed(const struct radv_image *image,
-                                     VkImageLayout layout);
-bool radv_layout_can_expclear(const struct radv_image *image,
-                              VkImageLayout layout);
+                                     VkImageLayout layout,
+                                     unsigned queue_mask);
+
 bool radv_layout_can_fast_clear(const struct radv_image *image,
 			        VkImageLayout layout,
 			        unsigned queue_mask);
@@ -1185,7 +1240,7 @@ radv_get_layerCount(const struct radv_image *image,
 		    const VkImageSubresourceRange *range)
 {
 	return range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
-		image->array_size - range->baseArrayLayer : range->layerCount;
+		image->info.array_size - range->baseArrayLayer : range->layerCount;
 }

 static inline uint32_t
@@ -1193,7 +1248,7 @@ radv_get_levelCount(const struct radv_image *image,
 		    const VkImageSubresourceRange *range)
 {
 	return range->levelCount == VK_REMAINING_MIP_LEVELS ?
-		image->levels - range->baseMipLevel : range->levelCount;
+		image->info.levels - range->baseMipLevel : range->levelCount;
 }

 struct radeon_bo_metadata;
@@ -1220,7 +1275,6 @@ struct radv_image_view {

 struct radv_image_create_info {
 	const VkImageCreateInfo *vk_info;
-	uint32_t stride;
 	bool scanout;
 };

@@ -1231,11 +1285,8 @@ VkResult radv_image_create(VkDevice _device,

 void radv_image_view_init(struct radv_image_view *view,
 			  struct radv_device *device,
-			  const VkImageViewCreateInfo* pCreateInfo,
-			  struct radv_cmd_buffer *cmd_buffer,
-			  VkImageUsageFlags usage_mask);
-void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
-					    struct radv_image *image, uint32_t micro_tile_mode);
+			  const VkImageViewCreateInfo* pCreateInfo);
+
 struct radv_buffer_view {
 	struct radeon_winsys_bo *bo;
 	VkFormat vk_format;
@@ -1279,42 +1330,57 @@ radv_sanitize_image_offset(const VkImageType imageType,
 	}
 }

+static inline bool
+radv_image_extent_compare(const struct radv_image *image,
+			  const VkExtent3D *extent)
+{
+	if (extent->width != image->info.width ||
+	    extent->height != image->info.height ||
+	    extent->depth != image->info.depth)
+		return false;
+	return true;
+}
+
 struct radv_sampler {
 	uint32_t state[4];
 };

 struct radv_color_buffer_info {
-	uint32_t cb_color_base;
+	uint64_t cb_color_base;
+	uint64_t cb_color_cmask;
+	uint64_t cb_color_fmask;
+	uint64_t cb_dcc_base;
 	uint32_t cb_color_pitch;
 	uint32_t cb_color_slice;
 	uint32_t cb_color_view;
 	uint32_t cb_color_info;
 	uint32_t cb_color_attrib;
+	uint32_t cb_color_attrib2;
 	uint32_t cb_dcc_control;
-	uint32_t cb_color_cmask;
 	uint32_t cb_color_cmask_slice;
-	uint32_t cb_color_fmask;
 	uint32_t cb_color_fmask_slice;
 	uint32_t cb_clear_value0;
 	uint32_t cb_clear_value1;
-	uint32_t cb_dcc_base;
 	uint32_t micro_tile_mode;
+	uint32_t gfx9_epitch;
 };

 struct radv_ds_buffer_info {
+	uint64_t db_z_read_base;
+	uint64_t db_stencil_read_base;
+	uint64_t db_z_write_base;
+	uint64_t db_stencil_write_base;
+	uint64_t db_htile_data_base;
 	uint32_t db_depth_info;
 	uint32_t db_z_info;
 	uint32_t db_stencil_info;
-	uint32_t db_z_read_base;
-	uint32_t db_stencil_read_base;
-	uint32_t db_z_write_base;
-	uint32_t db_stencil_write_base;
 	uint32_t db_depth_view;
 	uint32_t db_depth_size;
 	uint32_t db_depth_slice;
 	uint32_t db_htile_surface;
-	uint32_t db_htile_data_base;
 	uint32_t pa_su_poly_offset_db_fmt_cntl;
+	uint32_t db_z_info2;
+	uint32_t db_stencil_info2;
 	float offset_scale;
 };

@@ -1343,8 +1409,8 @@ struct radv_subpass_barrier {

 struct radv_subpass {
 	uint32_t                                     input_count;
-	VkAttachmentReference *                      input_attachments;
 	uint32_t                                     color_count;
+	VkAttachmentReference *                      input_attachments;
 	VkAttachmentReference *                      color_attachments;
 	VkAttachmentReference *                      resolve_attachments;
 	VkAttachmentReference                        depth_stencil_attachment;
--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -44,11 +44,6 @@ static unsigned get_max_db(struct radv_device *device)
 	unsigned num_db = device->physical_device->rad_info.num_render_backends;
 	MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;

-	if (device->physical_device->rad_info.chip_class == SI)
-		num_db = 8;
-	else
-		num_db = MAX2(8, num_db);
-
 	/* Otherwise we need to change the query reset procedure */
 	assert(rb_mask == ((1ull << num_db) - 1));

@@ -77,6 +72,8 @@ static struct nir_ssa_def *
 radv_load_push_int(nir_builder *b, unsigned offset, const char *name)
 {
 	nir_intrinsic_instr *flags = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
+	nir_intrinsic_set_base(flags, 0);
+	nir_intrinsic_set_range(flags, 16);
 	flags->src[0] = nir_src_for_ssa(nir_imm_int(b, offset));
 	flags->num_components = 1;
 	nir_ssa_dest_init(&flags->instr, &flags->dest, 1, 32, name);
@@ -125,10 +122,10 @@ build_occlusion_query_shader(struct radv_device *device) {
 	 */
 	nir_builder b;
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "occlusion_query");
-	b.shader->info->cs.local_size[0] = 64;
-	b.shader->info->cs.local_size[1] = 1;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "occlusion_query");
+	b.shader->info.cs.local_size[0] = 64;
+	b.shader->info.cs.local_size[1] = 1;
+	b.shader->info.cs.local_size[2] = 1;

 	nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
 	nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
@@ -158,9 +155,9 @@ build_occlusion_query_shader(struct radv_device *device) {
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-	                                        b.shader->info->cs.local_size[0],
-	                                        b.shader->info->cs.local_size[1],
-	                                        b.shader->info->cs.local_size[2], 0);
+	                                        b.shader->info.cs.local_size[0],
+	                                        b.shader->info.cs.local_size[1],
+	                                        b.shader->info.cs.local_size[2], 0);
 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
 	global_id = nir_channel(&b, global_id, 0); // We only care about x here.

@@ -320,10 +317,10 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
 	 */
 	nir_builder b;
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
-	b.shader->info->name = ralloc_strdup(b.shader, "pipeline_statistics_query");
-	b.shader->info->cs.local_size[0] = 64;
-	b.shader->info->cs.local_size[1] = 1;
-	b.shader->info->cs.local_size[2] = 1;
+	b.shader->info.name = ralloc_strdup(b.shader, "pipeline_statistics_query");
+	b.shader->info.cs.local_size[0] = 64;
+	b.shader->info.cs.local_size[1] = 1;
+	b.shader->info.cs.local_size[2] = 1;

 	nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");

@@ -350,9 +347,9 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
 	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
 	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
 	nir_ssa_def *block_size = nir_imm_ivec4(&b,
-	                                        b.shader->info->cs.local_size[0],
-	                                        b.shader->info->cs.local_size[1],
-	                                        b.shader->info->cs.local_size[2], 0);
+	                                        b.shader->info.cs.local_size[0],
+	                                        b.shader->info.cs.local_size[1],
+	                                        b.shader->info.cs.local_size[2], 0);
 	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
 	global_id = nir_channel(&b, global_id, 0); // We only care about x here.

@@ -612,12 +609,10 @@ VkResult radv_device_init_meta_query_state(struct radv_device *device)
 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
 					     1, &pipeline_statistics_vk_pipeline_info, NULL,
 					     &device->meta_state.query.pipeline_statistics_query_pipeline);
-	if (result != VK_SUCCESS)
-		goto fail;

-	return VK_SUCCESS;
 fail:
-	radv_device_finish_meta_query_state(device);
+	if (result != VK_SUCCESS)
+		radv_device_finish_meta_query_state(device);
 	ralloc_free(occlusion_cs.nir);
 	ralloc_free(pipeline_statistics_cs.nir);
 	return result;
@@ -997,13 +992,7 @@ void radv_CmdCopyQueryPoolResults(
 				uint64_t avail_va = va + pool->availability_offset + 4 * query;

 				/* This waits on the ME. All copies below are done on the ME */
-				radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-				radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
-				radeon_emit(cs, avail_va);
-				radeon_emit(cs, avail_va >> 32);
-				radeon_emit(cs, 1); /* reference value */
-				radeon_emit(cs, 0xffffffff); /* mask */
-				radeon_emit(cs, 4); /* poll interval */
+				si_emit_wait_fence(cs, avail_va, 1, 0xffffffff);
 			}
 		}
 		radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
@@ -1026,13 +1015,7 @@ void radv_CmdCopyQueryPoolResults(
 				uint64_t avail_va = va + pool->availability_offset + 4 * query;

 				/* This waits on the ME. All copies below are done on the ME */
-				radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
-				radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
-				radeon_emit(cs, avail_va);
-				radeon_emit(cs, avail_va >> 32);
-				radeon_emit(cs, 1); /* reference value */
-				radeon_emit(cs, 0xffffffff); /* mask */
-				radeon_emit(cs, 4); /* poll interval */
+				si_emit_wait_fence(cs, avail_va, 1, 0xffffffff);
 			}
 			if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
 				uint64_t avail_va = va + pool->availability_offset + 4 * query;
@@ -1156,7 +1139,7 @@ void radv_CmdEndQuery(

 		break;
 	case VK_QUERY_TYPE_PIPELINE_STATISTICS:
-		radeon_check_space(cmd_buffer->device->ws, cs, 10);
+		radeon_check_space(cmd_buffer->device->ws, cs, 16);

 		va += pipelinestat_block_size;

@@ -1165,13 +1148,11 @@ void radv_CmdEndQuery(
 		radeon_emit(cs, va);
 		radeon_emit(cs, va >> 32);

-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
-				EVENT_INDEX(5));
-		radeon_emit(cs, avail_va);
-		radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1));
-		radeon_emit(cs, 1);
-		radeon_emit(cs, 0);
+		si_cs_emit_write_event_eop(cs,
+					   cmd_buffer->device->physical_device->rad_info.chip_class,
+					   false,
+					   EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
+					   1, avail_va, 0, 1);
 		break;
 	default:
 		unreachable("ending unhandled query type");
@@ -1194,32 +1175,40 @@ void radv_CmdWriteTimestamp(

 	cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5);

-	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 12);
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);

-	if (mec) {
-		radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
-		radeon_emit(cs, 3 << 29);
+	switch(pipelineStage) {
+	case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
+		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+		radeon_emit(cs, COPY_DATA_COUNT_SEL | COPY_DATA_WR_CONFIRM |
+		                COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
+		                COPY_DATA_DST_SEL(V_370_MEM_ASYNC));
+		radeon_emit(cs, 0);
+		radeon_emit(cs, 0);
 		radeon_emit(cs, query_va);
 		radeon_emit(cs, query_va >> 32);
-		radeon_emit(cs, 0);
-		radeon_emit(cs, 0);
-	} else {
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-		radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
-		radeon_emit(cs, query_va);
-		radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
-		radeon_emit(cs, 0);
-		radeon_emit(cs, 0);
-	}

-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
-	radeon_emit(cs, S_370_DST_SEL(mec ? V_370_MEM_ASYNC : V_370_MEMORY_SYNC) |
-		    S_370_WR_CONFIRM(1) |
-		    S_370_ENGINE_SEL(V_370_ME));
-	radeon_emit(cs, avail_va);
-	radeon_emit(cs, avail_va >> 32);
-	radeon_emit(cs, 1);
+		radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
+		radeon_emit(cs, S_370_DST_SEL(V_370_MEM_ASYNC) |
+		                S_370_WR_CONFIRM(1) |
+		                S_370_ENGINE_SEL(V_370_ME));
+		radeon_emit(cs, avail_va);
+		radeon_emit(cs, avail_va >> 32);
+		radeon_emit(cs, 1);
+		break;
+	default:
+		si_cs_emit_write_event_eop(cs,
+					   cmd_buffer->device->physical_device->rad_info.chip_class,
+					   mec,
+					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
+					   3, query_va, 0, 0);
+		si_cs_emit_write_event_eop(cs,
+					   cmd_buffer->device->physical_device->rad_info.chip_class,
+					   mec,
+					   V_028A90_BOTTOM_OF_PIPE_TS, 0,
+					   1, avail_va, 0, 1);
+		break;
+	}

 	assert(cmd_buffer->cs->cdw <= cdw_max);
 }
--- a/src/amd/vulkan/radv_radeon_winsys.h
+++ b/src/amd/vulkan/radv_radeon_winsys.h
@@ -35,6 +35,10 @@
 #include "main/macros.h"
 #include "amd_family.h"

+struct radeon_info;
+struct ac_surf_info;
+struct radeon_surf;
+
 #define FREE(x) free(x)

 enum radeon_bo_domain { /* bitfield */
@@ -71,63 +75,6 @@ struct radeon_winsys_cs {
 	uint32_t *buf; /* The base pointer of the chunk. */
 };

-struct radeon_info {
-	/* PCI info: domain:bus:dev:func */
-	uint32_t                    pci_domain;
-	uint32_t                    pci_bus;
-	uint32_t                    pci_dev;
-	uint32_t                    pci_func;
-
-	/* Device info. */
-	uint32_t                    pci_id;
-	enum radeon_family          family;
-	const char                  *name;
-	enum chip_class             chip_class;
-	uint32_t                    gart_page_size;
-	uint64_t                    gart_size;
-	uint64_t                    vram_size;
-	uint64_t                    visible_vram_size;
-	bool                        has_dedicated_vram;
-	bool                     has_virtual_memory;
-	bool                        gfx_ib_pad_with_type2;
-	bool                     has_uvd;
-	uint32_t                    sdma_rings;
-	uint32_t                    compute_rings;
-	uint32_t                    vce_fw_version;
-	uint32_t                    vce_harvest_config;
-	uint32_t                    clock_crystal_freq; /* in kHz */
-
-	/* Kernel info. */
-	uint32_t                    drm_major; /* version */
-	uint32_t                    drm_minor;
-	uint32_t                    drm_patchlevel;
-	bool                     has_userptr;
-
-	/* Shader cores. */
-	uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
-	uint32_t                    max_shader_clock;
-	uint32_t                    num_good_compute_units;
-	uint32_t                    max_se; /* shader engines */
-	uint32_t                    max_sh_per_se; /* shader arrays per shader engine */
-
-	/* Render backends (color + depth blocks). */
-	uint32_t                    r300_num_gb_pipes;
-	uint32_t                    r300_num_z_pipes;
-	uint32_t                    r600_gb_backend_map; /* R600 harvest config */
-	bool                     r600_gb_backend_map_valid;
-	uint32_t                    r600_num_banks;
-	uint32_t                    num_render_backends;
-	uint32_t                    num_tile_pipes; /* pipe count from PIPE_CONFIG */
-	uint32_t                    pipe_interleave_bytes;
-	uint32_t                    enabled_rb_mask; /* GCN harvest config */
-
-	/* Tile modes. */
-	uint32_t                    si_tile_mode_array[32];
-	uint32_t                    cik_macrotile_mode_array[16];
-};
-
-#define RADEON_SURF_MAX_LEVEL                   32
-
 #define RADEON_SURF_TYPE_MASK                   0xFF
 #define RADEON_SURF_TYPE_SHIFT                  0
 #define     RADEON_SURF_TYPE_1D                     0
@@ -138,93 +85,11 @@ struct radeon_info {
 #define     RADEON_SURF_TYPE_2D_ARRAY               5
 #define RADEON_SURF_MODE_MASK                   0xFF
 #define RADEON_SURF_MODE_SHIFT                  8
-#define     RADEON_SURF_MODE_LINEAR_ALIGNED         1
-#define     RADEON_SURF_MODE_1D                     2
-#define     RADEON_SURF_MODE_2D                     3
-#define RADEON_SURF_SCANOUT                     (1 << 16)
-#define RADEON_SURF_ZBUFFER                     (1 << 17)
-#define RADEON_SURF_SBUFFER                     (1 << 18)
-#define RADEON_SURF_Z_OR_SBUFFER                (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
-#define RADEON_SURF_HAS_SBUFFER_MIPTREE         (1 << 19)
-#define RADEON_SURF_HAS_TILE_MODE_INDEX         (1 << 20)
-#define RADEON_SURF_FMASK                       (1 << 21)
-#define RADEON_SURF_DISABLE_DCC                 (1 << 22)
-#define RADEON_SURF_TC_COMPATIBLE_HTILE         (1 << 23)

 #define RADEON_SURF_GET(v, field)   (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
 #define RADEON_SURF_SET(v, field)   (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
 #define RADEON_SURF_CLR(v, field)   ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))

-struct radeon_surf_level {
-	uint64_t                    offset;
-	uint64_t                    slice_size;
-	uint32_t                    npix_x;
-	uint32_t                    npix_y;
-	uint32_t                    npix_z;
-	uint32_t                    nblk_x;
-	uint32_t                    nblk_y;
-	uint32_t                    nblk_z;
-	uint32_t                    pitch_bytes;
-	uint32_t                    mode;
-	uint64_t                    dcc_offset;
-	uint64_t                    dcc_fast_clear_size;
-	bool                        dcc_enabled;
-};
-
-
-/* surface defintions from the winsys */
-struct radeon_surf {
-	/* These are inputs to the calculator. */
-	uint32_t                    npix_x;
-	uint32_t                    npix_y;
-	uint32_t                    npix_z;
-	uint32_t                    blk_w;
-	uint32_t                    blk_h;
-	uint32_t                    blk_d;
-	uint32_t                    array_size;
-	uint32_t                    last_level;
-	uint32_t                    bpe;
-	uint32_t                    nsamples;
-	uint32_t                    flags;
-
-	/* These are return values. Some of them can be set by the caller, but
-	 * they will be treated as hints (e.g. bankw, bankh) and might be
-	 * changed by the calculator.
-	 */
-	uint64_t                    bo_size;
-	uint64_t                    bo_alignment;
-	/* This applies to EG and later. */
-	uint32_t                    bankw;
-	uint32_t                    bankh;
-	uint32_t                    mtilea;
-	uint32_t                    tile_split;
-	uint32_t                    stencil_tile_split;
-	uint64_t                    stencil_offset;
-	struct radeon_surf_level    level[RADEON_SURF_MAX_LEVEL];
-	struct radeon_surf_level    stencil_level[RADEON_SURF_MAX_LEVEL];
-	uint32_t                    tiling_index[RADEON_SURF_MAX_LEVEL];
-	uint32_t                    stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
-	uint32_t                    pipe_config;
-	uint32_t                    num_banks;
-	uint32_t                    macro_tile_index;
-	uint32_t                    micro_tile_mode; /* displayable, thin, depth, rotated */
-
-	/* Whether the depth miptree or stencil miptree as used by the DB are
-	 * adjusted from their TC compatible form to ensure depth/stencil
-	 * compatibility. If either is true, the corresponding plane cannot be
-	 * sampled from.
-	 */
-	bool                        depth_adjusted;
-	bool                        stencil_adjusted;
-
-	uint64_t                    dcc_size;
-	uint64_t                    dcc_alignment;
-
-	uint64_t                    htile_size;
-	uint64_t                    htile_slice_size;
-	uint64_t                    htile_alignment;
-};
-
 enum radeon_bo_layout {
 	RADEON_LAYOUT_LINEAR = 0,
 	RADEON_LAYOUT_TILED,
@@ -238,16 +103,25 @@ struct radeon_bo_metadata {
 	/* Tiling flags describing the texture layout for display code
 	 * and DRI sharing.
 	 */
-	enum radeon_bo_layout   microtile;
-	enum radeon_bo_layout   macrotile;
-	unsigned                pipe_config;
-	unsigned                bankw;
-	unsigned                bankh;
-	unsigned                tile_split;
-	unsigned                mtilea;
-	unsigned                num_banks;
-	unsigned                stride;
-	bool                    scanout;
+	union {
+		struct {
+			enum radeon_bo_layout   microtile;
+			enum radeon_bo_layout   macrotile;
+			unsigned                pipe_config;
+			unsigned                bankw;
+			unsigned                bankh;
+			unsigned                tile_split;
+			unsigned                mtilea;
+			unsigned                num_banks;
+			unsigned                stride;
+			bool                    scanout;
+		} legacy;
+
+		struct {
+			/* surface flags */
+			unsigned swizzle_mode:5;
+		} gfx9;
+	} u;

 	/* Additional metadata associated with the buffer, in bytes.
 	 * The maximum size is 64 * 4. This is opaque for the winsys & kernel.
@@ -334,6 +208,7 @@ struct radeon_winsys {
 	void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);

 	int (*surface_init)(struct radeon_winsys *ws,
+			    const struct ac_surf_info *surf_info,
 			    struct radeon_surf *surf);

 	int (*surface_best)(struct radeon_winsys *ws,
--- a/src/amd/vulkan/radv_wsi.c
+++ b/src/amd/vulkan/radv_wsi.c
@@ -26,7 +26,7 @@
 #include "radv_private.h"
 #include "radv_meta.h"
 #include "wsi_common.h"
-#include "util/vk_util.h"
+#include "vk_util.h"

 static const struct wsi_callbacks wsi_cbs = {
   .get_phys_device_format_properties = radv_GetPhysicalDeviceFormatProperties,
@@ -224,7 +224,7 @@ radv_wsi_image_create(VkDevice device_h,
 	*memory_p = memory_h;
 	*size = image->size;
 	*offset = image->offset;
-	*row_pitch = surface->level[0].pitch_bytes;
+	*row_pitch = surface->u.legacy.level[0].nblk_x * surface->bpe;
 	return VK_SUCCESS;
 fail_alloc_memory:
 	radv_FreeMemory(device_h, memory_h, pAllocator);
@@ -438,7 +438,7 @@ VkResult radv_AcquireNextImageKHR(
 	VkResult result = swapchain->acquire_next_image(swapchain, timeout, semaphore,
 	                                                pImageIndex);

-	if (fence && result == VK_SUCCESS) {
+	if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) {
 		fence->submitted = true;
 		fence->signalled = true;
 	}
@@ -460,16 +460,20 @@ VkResult radv_QueuePresentKHR(
 		RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
 		struct radeon_winsys_cs *cs;
 		const VkPresentRegionKHR *region = NULL;
+		VkResult item_result;

 		assert(radv_device_from_handle(swapchain->device) == queue->device);
 		if (swapchain->fences[0] == VK_NULL_HANDLE) {
-			result = radv_CreateFence(radv_device_to_handle(queue->device),
+			item_result = radv_CreateFence(radv_device_to_handle(queue->device),
 						  &(VkFenceCreateInfo) {
 							  .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
 								  .flags = 0,
 								  }, &swapchain->alloc, &swapchain->fences[0]);
-			if (result != VK_SUCCESS)
-				return result;
+			if (pPresentInfo->pResults != NULL)
+				pPresentInfo->pResults[i] = item_result;
+			result = result == VK_SUCCESS ? item_result : result;
+			if (item_result != VK_SUCCESS)
+				continue;
 		} else {
 			radv_ResetFences(radv_device_to_handle(queue->device),
 					 1, &swapchain->fences[0]);
@@ -493,12 +497,15 @@ VkResult radv_QueuePresentKHR(
 		if (regions && regions->pRegions)
 			region = &regions->pRegions[i];

-		result = swapchain->queue_present(swapchain,
+		item_result = swapchain->queue_present(swapchain,
 						  pPresentInfo->pImageIndices[i],
 						  region);
 		/* TODO: What if one of them returns OUT_OF_DATE? */
-		if (result != VK_SUCCESS)
-			return result;
+		if (pPresentInfo->pResults != NULL)
+			pPresentInfo->pResults[i] = item_result;
+		result = result == VK_SUCCESS ? item_result : result;
+		if (item_result != VK_SUCCESS)
+			continue;

 		VkFence last = swapchain->fences[2];
 		swapchain->fences[2] = swapchain->fences[1];
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -30,6 +30,7 @@
 #include "radv_private.h"
 #include "radv_cs.h"
 #include "sid.h"
+#include "gfx9d.h"
 #include "radv_util.h"
 #include "main/macros.h"

@@ -241,6 +242,9 @@ si_emit_config(struct radv_physical_device *physical_device,
 	radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);

 	radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
+	radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
+	if (physical_device->rad_info.chip_class >= GFX9)
+		radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 0);
 	radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
 	if (physical_device->rad_info.chip_class < CIK)
 		radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
@@ -297,6 +301,7 @@ si_emit_config(struct radv_physical_device *physical_device,
 		raster_config_1 = 0x0000002a;
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		raster_config = 0x16000012;
 		raster_config_1 = 0x00000000;
 		break;
@@ -327,24 +332,28 @@ si_emit_config(struct radv_physical_device *physical_device,
 		raster_config_1 = 0x00000000;
 		break;
 	default:
-		fprintf(stderr,
-			"radeonsi: Unknown GPU, using 0 for raster_config\n");
-		raster_config = 0x00000000;
-		raster_config_1 = 0x00000000;
+		if (physical_device->rad_info.chip_class <= VI) {
+			fprintf(stderr,
+				"radeonsi: Unknown GPU, using 0 for raster_config\n");
+			raster_config = 0x00000000;
+			raster_config_1 = 0x00000000;
+		}
 		break;
 	}

 	/* Always use the default config when all backends are enabled
 	 * (or when we failed to determine the enabled backends).
 	 */
-	if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
-		radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
-				       raster_config);
-		if (physical_device->rad_info.chip_class >= CIK)
-			radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
-					       raster_config_1);
-	} else {
-		si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
+	if (physical_device->rad_info.chip_class <= VI) {
+		if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
+			radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
+					       raster_config);
+			if (physical_device->rad_info.chip_class >= CIK)
+				radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
+						       raster_config_1);
+		} else {
+			si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
+		}
 	}

 	radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
@@ -368,22 +377,31 @@ si_emit_config(struct radv_physical_device *physical_device,
 			       S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
 			       S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));

-	radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
-	radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
-	radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
+	if (physical_device->rad_info.chip_class >= GFX9) {
+		radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
+		radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
+		radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
+	} else {
+		radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
+		radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
+		radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
+	}

 	if (physical_device->rad_info.chip_class >= CIK) {
-		/* If this is 0, Bonaire can hang even if GS isn't being used.
-		 * Other chips are unaffected. These are suboptimal values,
-		 * but we don't use on-chip GS.
-		 */
-		radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
-				       S_028A44_ES_VERTS_PER_SUBGRP(64) |
-				       S_028A44_GS_PRIMS_PER_SUBGRP(4));
-
-		radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
-		radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
-		radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+		if (physical_device->rad_info.chip_class >= GFX9) {
+			radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
+		} else {
+			radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
+			radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
+			radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
+			/* If this is 0, Bonaire can hang even if GS isn't being used.
+			 * Other chips are unaffected. These are suboptimal values,
+			 * but we don't use on-chip GS.
+			 */
+			radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
+					       S_028A44_ES_VERTS_PER_SUBGRP(64) |
+					       S_028A44_GS_PRIMS_PER_SUBGRP(4));
+		}
 		radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));

 		if (physical_device->rad_info.num_good_compute_units /
@@ -434,9 +452,41 @@ si_emit_config(struct radv_physical_device *physical_device,
 		radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
 	}

-	if (physical_device->rad_info.family == CHIP_STONEY)
+	if (physical_device->has_rbplus)
 		radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);

+	if (physical_device->rad_info.chip_class >= GFX9) {
+		unsigned num_se = physical_device->rad_info.max_se;
+		unsigned pc_lines = 0;
+
+		switch (physical_device->rad_info.family) {
+		case CHIP_VEGA10:
+			pc_lines = 4096;
+			break;
+		case CHIP_RAVEN:
+			pc_lines = 1024;
+			break;
+		default:
+			assert(0);
+		}
+
+		radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
+				       S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
+		radeon_set_context_reg(cs, R_028064_DB_RENDER_FILTER, 0);
+		/* TODO: We can use this to disable RBs for rendering to GART: */
+		radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
+		radeon_set_context_reg(cs, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
+		/* TODO: Enable the binner: */
+		radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
+				       S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+				       S_028C44_DISABLE_START_OF_PRIM(1));
+		radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1,
+				       S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
+				       S_028C48_MAX_PRIM_PER_BATCH(1023));
+		radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
+				       S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
+		radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
+	}
 	si_emit_compute(physical_device, cs);
 }

@@ -650,6 +700,9 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,

 	multi_instances_smaller_than_primgroup = indirect_draw || (instanced_draw &&
 								   num_prims < primgroup_size);
+	if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
+		ia_switch_on_eoi = true;
+
 	if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
 		/* SWITCH_ON_EOI must be set if PrimID is used. */
 		if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id ||
@@ -666,12 +719,14 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 		/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
 		if (cmd_buffer->device->has_distributed_tess) {
 			if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
-				partial_es_wave = true;
+				if (chip_class <= VI)
+					partial_es_wave = true;

 				if (family == CHIP_TONGA ||
 				    family == CHIP_FIJI ||
 				    family == CHIP_POLARIS10 ||
-				    family == CHIP_POLARIS11)
+				    family == CHIP_POLARIS11 ||
+				    family == CHIP_POLARIS12)
 					partial_vs_wave = true;
 			} else {
 				partial_vs_wave = true;
@@ -733,10 +788,15 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 		assert(wd_switch_on_eop || !ia_switch_on_eop);
 	}
 	/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
-	if (ia_switch_on_eoi)
+	if (chip_class <= VI && ia_switch_on_eoi)
 		partial_es_wave = true;

 	if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
+
+		if (radv_pipeline_has_gs(cmd_buffer->state.pipeline) &&
+		    cmd_buffer->state.pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.uses_prim_id)
+			ia_switch_on_eoi = true;
+
 		/* GS requirement. */
 		if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3)
 			partial_es_wave = true;
@@ -755,22 +815,88 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 		S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
 		S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
 		S_028AA8_WD_SWITCH_ON_EOP(chip_class >= CIK ? wd_switch_on_eop : 0) |
-		S_028AA8_MAX_PRIMGRP_IN_WAVE(chip_class >= VI ?
-					     max_primgroup_in_wave : 0);
+		/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
+		S_028AA8_MAX_PRIMGRP_IN_WAVE(chip_class == VI ?
+					     max_primgroup_in_wave : 0) |
+		S_030960_EN_INST_OPT_BASIC(chip_class >= GFX9) |
+		S_030960_EN_INST_OPT_ADV(chip_class >= GFX9);

 }

+void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
+				enum chip_class chip_class,
+				bool is_mec,
+				unsigned event, unsigned event_flags,
+				unsigned data_sel,
+				uint64_t va,
+				uint32_t old_fence,
+				uint32_t new_fence)
+{
+	unsigned op = EVENT_TYPE(event) |
+		EVENT_INDEX(5) |
+		event_flags;
+	unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
+
+	if (chip_class >= GFX9 || is_gfx8_mec) {
+		radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, 0));
+		radeon_emit(cs, op);
+		radeon_emit(cs, EOP_DATA_SEL(data_sel));
+		radeon_emit(cs, va);            /* address lo */
+		radeon_emit(cs, va >> 32);      /* address hi */
+		radeon_emit(cs, new_fence);     /* immediate data lo */
+		radeon_emit(cs, 0); /* immediate data hi */
+		if (!is_gfx8_mec)
+			radeon_emit(cs, 0); /* unused */
+	} else {
+		if (chip_class == CIK ||
+		    chip_class == VI) {
+			/* Two EOP events are required to make all engines go idle
+			 * (and optional cache flushes executed) before the timestamp
+			 * is written.
+			 */
+			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+			radeon_emit(cs, op);
+			radeon_emit(cs, va);
+			radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
+			radeon_emit(cs, old_fence); /* immediate data */
+			radeon_emit(cs, 0); /* unused */
+		}
+
+		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
+		radeon_emit(cs, op);
+		radeon_emit(cs, va);
+		radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
+		radeon_emit(cs, new_fence); /* immediate data */
+		radeon_emit(cs, 0); /* unused */
+	}
+}
+
+void
+si_emit_wait_fence(struct radeon_winsys_cs *cs,
+		   uint64_t va, uint32_t ref,
+		   uint32_t mask)
+{
+	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+	radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+	radeon_emit(cs, ref); /* reference value */
+	radeon_emit(cs, mask); /* mask */
+	radeon_emit(cs, 4); /* poll interval */
+}
+
 static void
 si_emit_acquire_mem(struct radeon_winsys_cs *cs,
-                    bool is_mec,
+                    bool is_mec, bool is_gfx9,
                    unsigned cp_coher_cntl)
 {
-	if (is_mec) {
+	if (is_mec || is_gfx9) {
+		uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff;
 		radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
-		                            PKT3_SHADER_TYPE_S(1));
+		                            PKT3_SHADER_TYPE_S(is_mec));
 		radeon_emit(cs, cp_coher_cntl);   /* CP_COHER_CNTL */
 		radeon_emit(cs, 0xffffffff);      /* CP_COHER_SIZE */
-		radeon_emit(cs, 0xff);            /* CP_COHER_SIZE_HI */
+		radeon_emit(cs, hi_val);          /* CP_COHER_SIZE_HI */
 		radeon_emit(cs, 0);               /* CP_COHER_BASE */
 		radeon_emit(cs, 0);               /* CP_COHER_BASE_HI */
 		radeon_emit(cs, 0x0000000A);      /* POLL_INTERVAL */
@@ -787,42 +913,45 @@ si_emit_acquire_mem(struct radeon_winsys_cs *cs,
 void
 si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
                       enum chip_class chip_class,
+		       uint32_t *flush_cnt,
+		       uint64_t flush_va,
                       bool is_mec,
                       enum radv_cmd_flush_bits flush_bits)
 {
 	unsigned cp_coher_cntl = 0;
-
+	uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+					     RADV_CMD_FLAG_FLUSH_AND_INV_DB);
+	
 	if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
 		cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
 	if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
 		cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);

-	if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
-		cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
-			S_0085F0_CB0_DEST_BASE_ENA(1) |
-			S_0085F0_CB1_DEST_BASE_ENA(1) |
-			S_0085F0_CB2_DEST_BASE_ENA(1) |
-			S_0085F0_CB3_DEST_BASE_ENA(1) |
-			S_0085F0_CB4_DEST_BASE_ENA(1) |
-			S_0085F0_CB5_DEST_BASE_ENA(1) |
-			S_0085F0_CB6_DEST_BASE_ENA(1) |
-			S_0085F0_CB7_DEST_BASE_ENA(1);
+	if (chip_class <= VI) {
+		if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
+			cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
+				S_0085F0_CB0_DEST_BASE_ENA(1) |
+				S_0085F0_CB1_DEST_BASE_ENA(1) |
+				S_0085F0_CB2_DEST_BASE_ENA(1) |
+				S_0085F0_CB3_DEST_BASE_ENA(1) |
+				S_0085F0_CB4_DEST_BASE_ENA(1) |
+				S_0085F0_CB5_DEST_BASE_ENA(1) |
+				S_0085F0_CB6_DEST_BASE_ENA(1) |
+				S_0085F0_CB7_DEST_BASE_ENA(1);

-		/* Necessary for DCC */
-		if (chip_class >= VI) {
-			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
-			radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
-			                            EVENT_INDEX(5));
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0);
-			radeon_emit(cs, 0);
+			/* Necessary for DCC */
+			if (chip_class >= VI) {
+				si_cs_emit_write_event_eop(cs,
+							   chip_class,
+							   is_mec,
+							   V_028A90_FLUSH_AND_INV_CB_DATA_TS,
+							   0, 0, 0, 0, 0);
+			}
+		}
+		if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
+			cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
+				S_0085F0_DB_DEST_BASE_ENA(1);
 		}
-	}
-
-	if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
-		cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
-			S_0085F0_DB_DEST_BASE_ENA(1);
 	}

 	if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
@@ -835,8 +964,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
 	}

-	if (!(flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
-					      RADV_CMD_FLAG_FLUSH_AND_INV_DB))) {
+	if (!flush_cb_db) {
 		if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
 			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
 			radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
@@ -851,6 +979,54 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 		radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
 	}

+	if (chip_class >= GFX9 && flush_cb_db) {
+		unsigned cb_db_event, tc_flags;
+
+		/* Set the CB/DB flush event. */
+		switch (flush_cb_db) {
+		case RADV_CMD_FLAG_FLUSH_AND_INV_CB:
+			cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
+			break;
+		case RADV_CMD_FLAG_FLUSH_AND_INV_DB:
+			cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
+			break;
+		default:
+			/* both CB & DB */
+			cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
+		}
+
+		/* TC    | TC_WB         = invalidate L2 data
+		 * TC_MD | TC_WB         = invalidate L2 metadata
+		 * TC    | TC_WB | TC_MD = invalidate L2 data & metadata
+		 *
+		 * The metadata cache must always be invalidated for coherency
+		 * between CB/DB and shaders. (metadata = HTILE, CMASK, DCC)
+		 *
+		 * TC must be invalidated on GFX9 only if the CB/DB surface is
+		 * not pipe-aligned. If the surface is RB-aligned, it might not
+		 * strictly be pipe-aligned since RB alignment takes precendence.
+		 */
+		tc_flags = EVENT_TC_WB_ACTION_ENA |
+			   EVENT_TC_MD_ACTION_ENA;
+
+		/* Ideally flush TC together with CB/DB. */
+		if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
+			tc_flags |= EVENT_TC_ACTION_ENA |
+				    EVENT_TCL1_ACTION_ENA;
+
+			/* Clear the flags. */
+		        flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
+					 RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
+					 RADV_CMD_FLAG_INV_VMEM_L1);
+		}
+		assert(flush_cnt);
+		uint32_t old_fence = (*flush_cnt)++;
+
+		si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags, 1,
+					   flush_va, old_fence, *flush_cnt);
+		si_emit_wait_fence(cs, flush_va, *flush_cnt, 0xffffffff);
+	}
+
 	/* VGT state sync */
 	if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@@ -860,7 +1036,11 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
 	/* Make sure ME is idle (it executes most packets) before continuing.
 	 * This prevents read-after-write hazards between PFP and ME.
 	 */
-	if ((cp_coher_cntl || (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) &&
+	if ((cp_coher_cntl ||
+	     (flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
+			    RADV_CMD_FLAG_INV_VMEM_L1 |
+			    RADV_CMD_FLAG_INV_GLOBAL_L2 |
+			    RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
 	    !is_mec) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
@@ -868,27 +1048,39 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,

 	if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
 	    (chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
-		cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
-		if (chip_class >= VI)
-			cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
-	} else	if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
-		cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) |
-		                 S_0301F0_TC_NC_ACTION_ENA(1);
-
-		/* L2 writeback doesn't combine with L1 invalidate */
-		si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
-
+		si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
+				    cp_coher_cntl |
+				    S_0085F0_TC_ACTION_ENA(1) |
+				    S_0085F0_TCL1_ACTION_ENA(1) |
+				    S_0301F0_TC_WB_ACTION_ENA(chip_class >= VI));
 		cp_coher_cntl = 0;
+	} else {
+		if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
+			/* WB = write-back
+			 * NC = apply to non-coherent MTYPEs
+			 *      (i.e. MTYPE <= 1, which is what we use everywhere)
+			 *
+			 * WB doesn't work without NC.
+			 */
+			si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
+					    cp_coher_cntl |
+					    S_0301F0_TC_WB_ACTION_ENA(1) |
+					    S_0301F0_TC_NC_ACTION_ENA(1));
+			cp_coher_cntl = 0;
+		}
+		if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
+			si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
+					    cp_coher_cntl |
+					    S_0085F0_TCL1_ACTION_ENA(1));
+			cp_coher_cntl = 0;
+		}
 	}

-	if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
-		cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
-
 	/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
 	 * Therefore, it should be last. Done in PFP.
 	 */
 	if (cp_coher_cntl)
-		si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
+		si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9, cp_coher_cntl);
 }

 void
@@ -905,67 +1097,118 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
 	                                          RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
 	                                          RADV_CMD_FLAG_VGT_FLUSH);

+	if (!cmd_buffer->state.flush_bits)
+		return;
+
+	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);

+	uint32_t *ptr = NULL;
+	uint64_t va = 0;
+	if (chip_class == GFX9) {
+		va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->gfx9_fence_bo) + cmd_buffer->gfx9_fence_offset;
+		ptr = &cmd_buffer->gfx9_fence_idx;
+	}
 	si_cs_emit_cache_flush(cmd_buffer->cs,
 	                       cmd_buffer->device->physical_device->rad_info.chip_class,
+			       ptr, va,
 	                       radv_cmd_buffer_uses_mec(cmd_buffer),
 	                       cmd_buffer->state.flush_bits);


-	if (cmd_buffer->state.flush_bits)
-		radv_cmd_buffer_trace_emit(cmd_buffer);
+	radv_cmd_buffer_trace_emit(cmd_buffer);
 	cmd_buffer->state.flush_bits = 0;
 }


 /* Set this if you want the 3D engine to wait until CP DMA is done.
 * It should be set on the last CP DMA packet. */
-#define R600_CP_DMA_SYNC	(1 << 0) /* R600+ */
+#define CP_DMA_SYNC	(1 << 0)

 /* Set this if the source data was used as a destination in a previous CP DMA
 * packet. It's for preventing a read-after-write (RAW) hazard between two
 * CP DMA packets. */
-#define SI_CP_DMA_RAW_WAIT	(1 << 1) /* SI+ */
-#define CIK_CP_DMA_USE_L2	(1 << 2)
+#define CP_DMA_RAW_WAIT	(1 << 1)
+#define CP_DMA_USE_L2	(1 << 2)
+#define CP_DMA_CLEAR	(1 << 3)

 /* Alignment for optimal performance. */
-#define CP_DMA_ALIGNMENT	32
-/* The max number of bytes to copy per packet. */
-#define CP_DMA_MAX_BYTE_COUNT	((1 << 21) - CP_DMA_ALIGNMENT)
+#define SI_CPDMA_ALIGNMENT	32

-static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
-				       uint64_t dst_va, uint64_t src_va,
-				       unsigned size, unsigned flags)
+/* The max number of bytes that can be copied per packet. */
+static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
+{
+	unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
+			       S_414_BYTE_COUNT_GFX9(~0u) :
+			       S_414_BYTE_COUNT_GFX6(~0u);
+
+	/* make it aligned for optimal performance */
+	return max & ~(SI_CPDMA_ALIGNMENT - 1);
+}
+
+/* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
+ * a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
+ * clear value.
+ */
+static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
+			   uint64_t dst_va, uint64_t src_va,
+			   unsigned size, unsigned flags)
 {
 	struct radeon_winsys_cs *cs = cmd_buffer->cs;
-	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
-	uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0;
-	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
-	uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
-			   S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
-			   S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
+	uint32_t header = 0, command = 0;

 	assert(size);
-	assert((size & ((1<<21)-1)) == size);
+	assert(size <= cp_dma_max_byte_count(cmd_buffer));

 	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
+		command |= S_414_BYTE_COUNT_GFX9(size);
+	else
+		command |= S_414_BYTE_COUNT_GFX6(size);
+
+	/* Sync flags. */
+	if (flags & CP_DMA_SYNC)
+		header |= S_411_CP_SYNC(1);
+	else {
+		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
+			command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
+		else
+			command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
+	}
+
+	if (flags & CP_DMA_RAW_WAIT)
+		command |= S_414_RAW_WAIT(1);
+
+	/* Src and dst flags. */
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
+	    !(flags & CP_DMA_CLEAR) &&
+	    src_va == dst_va)
+		header |= S_411_DSL_SEL(V_411_NOWHERE); /* prefetch only */
+	else if (flags & CP_DMA_USE_L2)
+		header |= S_411_DSL_SEL(V_411_DST_ADDR_TC_L2);
+
+	if (flags & CP_DMA_CLEAR)
+		header |= S_411_SRC_SEL(V_411_DATA);
+	else if (flags & CP_DMA_USE_L2)
+		header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);

 	if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
 		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
-		radeon_emit(cs, sync_flag | sel);	/* CP_SYNC [31] */
+		radeon_emit(cs, header);
 		radeon_emit(cs, src_va);		/* SRC_ADDR_LO [31:0] */
 		radeon_emit(cs, src_va >> 32);		/* SRC_ADDR_HI [31:0] */
 		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [31:0] */
-		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+		radeon_emit(cs, command);
 	} else {
+		assert(!(flags & CP_DMA_USE_L2));
+		header |= S_411_SRC_ADDR_HI(src_va >> 32);
 		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
 		radeon_emit(cs, src_va);			/* SRC_ADDR_LO [31:0] */
-		radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
+		radeon_emit(cs, header);			/* SRC_ADDR_HI [15:0] + flags. */
 		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
 		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
+		radeon_emit(cs, command);
 	}

 	/* CP DMA is executed in ME, but index buffers are read by PFP.
@@ -973,7 +1216,7 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
 	 * indices. If we wanted to execute CP DMA in PFP, this packet
 	 * should precede it.
 	 */
-	if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
+	if ((flags & CP_DMA_SYNC) && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
 		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
 		radeon_emit(cs, 0);
 	}
@@ -981,45 +1224,14 @@ static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
 	radv_cmd_buffer_trace_emit(cmd_buffer);
 }

-/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
-static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
-					uint64_t dst_va, unsigned size,
-					uint32_t clear_value, unsigned flags)
+void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
+                        unsigned size)
 {
-	struct radeon_winsys_cs *cs = cmd_buffer->cs;
-	uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
-	uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0;
-	uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
-	uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
+	uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
+	uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT -1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;

-	assert(size);
-	assert((size & ((1<<21)-1)) == size);
-
-	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
-
-	if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
-		radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
-		radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
-		radeon_emit(cs, clear_value);		/* DATA [31:0] */
-		radeon_emit(cs, 0);
-		radeon_emit(cs, dst_va);		/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, dst_va >> 32);		/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
-	} else {
-		radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
-		radeon_emit(cs, clear_value);		/* DATA [31:0] */
-		radeon_emit(cs, sync_flag | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
-		radeon_emit(cs, dst_va);			/* DST_ADDR_LO [31:0] */
-		radeon_emit(cs, (dst_va >> 32) & 0xffff);	/* DST_ADDR_HI [15:0] */
-		radeon_emit(cs, size | wr_confirm | raw_wait);	/* COMMAND [29:22] | BYTE_COUNT [20:0] */
-	}
-
-	/* See "copy_buffer" for explanation. */
-	if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
-		radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-		radeon_emit(cs, 0);
-	}
-	radv_cmd_buffer_trace_emit(cmd_buffer);
+	si_emit_cp_dma(cmd_buffer, aligned_va, aligned_va,
+		       aligned_size, CP_DMA_USE_L2);
 }

 static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
@@ -1031,14 +1243,14 @@ static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_
 	 */
 	if (cmd_buffer->state.flush_bits) {
 		si_emit_cache_flush(cmd_buffer);
-		*flags |= SI_CP_DMA_RAW_WAIT;
+		*flags |= CP_DMA_RAW_WAIT;
 	}

 	/* Do the synchronization after the last dma, so that all data
 	 * is written to memory.
 	 */
 	if (byte_count == remaining_size)
-		*flags |= R600_CP_DMA_SYNC;
+		*flags |= CP_DMA_SYNC;
 }

 static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
@@ -1046,20 +1258,20 @@ static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigne
 	uint64_t va;
 	uint32_t offset;
 	unsigned dma_flags = 0;
-	unsigned buf_size = CP_DMA_ALIGNMENT * 2;
+	unsigned buf_size = SI_CPDMA_ALIGNMENT * 2;
 	void *ptr;

-	assert(size < CP_DMA_ALIGNMENT);
+	assert(size < SI_CPDMA_ALIGNMENT);

-	radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, CP_DMA_ALIGNMENT,  &offset, &ptr);
+	radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, SI_CPDMA_ALIGNMENT,  &offset, &ptr);

 	va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
 	va += offset;

 	si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags);

-	si_emit_cp_dma_copy_buffer(cmd_buffer, va, va + CP_DMA_ALIGNMENT, size,
-				   dma_flags);
+	si_emit_cp_dma(cmd_buffer, va, va + SI_CPDMA_ALIGNMENT, size,
+		       dma_flags);
 }

 void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
@@ -1076,15 +1288,15 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 		 * just to align the internal counter. Otherwise, the DMA engine
 		 * would slow down by an order of magnitude for following copies.
 		 */
-		if (size % CP_DMA_ALIGNMENT)
-			realign_size = CP_DMA_ALIGNMENT - (size % CP_DMA_ALIGNMENT);
+		if (size % SI_CPDMA_ALIGNMENT)
+			realign_size = SI_CPDMA_ALIGNMENT - (size % SI_CPDMA_ALIGNMENT);

 		/* If the copy begins unaligned, we must start copying from the next
 		 * aligned block and the skipped part should be copied after everything
 		 * else has been copied. Only the src alignment matters, not dst.
 		 */
-		if (src_va % CP_DMA_ALIGNMENT) {
-			skipped_size = CP_DMA_ALIGNMENT - (src_va % CP_DMA_ALIGNMENT);
+		if (src_va % SI_CPDMA_ALIGNMENT) {
+			skipped_size = SI_CPDMA_ALIGNMENT - (src_va % SI_CPDMA_ALIGNMENT);
 			/* The main part will be skipped if the size is too small. */
 			skipped_size = MIN2(skipped_size, size);
 			size -= skipped_size;
@@ -1095,14 +1307,14 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,

 	while (size) {
 		unsigned dma_flags = 0;
-		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+		unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));

 		si_cp_dma_prepare(cmd_buffer, byte_count,
 				  size + skipped_size + realign_size,
 				  &dma_flags);

-		si_emit_cp_dma_copy_buffer(cmd_buffer, main_dest_va, main_src_va,
-					   byte_count, dma_flags);
+		si_emit_cp_dma(cmd_buffer, main_dest_va, main_src_va,
+			       byte_count, dma_flags);

 		size -= byte_count;
 		main_src_va += byte_count;
@@ -1116,8 +1328,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
 				  size + skipped_size + realign_size,
 				  &dma_flags);

-		si_emit_cp_dma_copy_buffer(cmd_buffer, dest_va, src_va,
-					   skipped_size, dma_flags);
+		si_emit_cp_dma(cmd_buffer, dest_va, src_va,
+			       skipped_size, dma_flags);
 	}
 	if (realign_size)
 		si_cp_dma_realign_engine(cmd_buffer, realign_size);
@@ -1133,14 +1345,14 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
 	assert(va % 4 == 0 && size % 4 == 0);

 	while (size) {
-		unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
-		unsigned dma_flags = 0;
+		unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
+		unsigned dma_flags = CP_DMA_CLEAR;

 		si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);

 		/* Emit the clear packet. */
-		si_emit_cp_dma_clear_buffer(cmd_buffer, va, byte_count, value,
-					    dma_flags);
+		si_emit_cp_dma(cmd_buffer, va, value, byte_count,
+			       dma_flags);

 		size -= byte_count;
 		va += byte_count;
--- a/src/amd/vulkan/vk_format.h
+++ b/src/amd/vulkan/vk_format.h
@@ -396,6 +396,13 @@ vk_format_is_int(VkFormat format)
 	return channel >= 0 && desc->channel[channel].pure_integer;
 }

+static inline bool
+vk_format_is_srgb(VkFormat format)
+{
+	const struct vk_format_description *desc = vk_format_description(format);
+	return desc->colorspace == VK_FORMAT_COLORSPACE_SRGB;
+}
+
 static inline VkFormat
 vk_format_stencil_only(VkFormat format)
 {
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -467,25 +467,29 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
 	struct amdgpu_bo_metadata metadata = {0};
 	uint32_t tiling_flags = 0;

-	if (md->macrotile == RADEON_LAYOUT_TILED)
-		tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
-	else if (md->microtile == RADEON_LAYOUT_TILED)
-		tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
-	else
-		tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
+	if (bo->ws->info.chip_class >= GFX9) {
+		tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
+	} else {
+		if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
+			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
+		else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
+			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
+		else
+			tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */

-	tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
-	tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
-	tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
-	if (md->tile_split)
-		tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->tile_split));
-	tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
-	tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
+		tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
+		tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
+		tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
+		if (md->u.legacy.tile_split)
+			tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
+		tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
+		tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);

-	if (md->scanout)
-		tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
-	else
-		tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
+		if (md->u.legacy.scanout)
+			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
+		else
+			tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
+	}

 	metadata.tiling_info = tiling_flags;
 	metadata.size_metadata = md->size_metadata;
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
@@ -90,25 +90,26 @@ static int ring_to_hw_ip(enum ring_type ring)
 }

 static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
-					 struct amdgpu_cs_fence *fence,
+					 struct radv_amdgpu_fence *fence,
 					 struct amdgpu_cs_request *req)
 {
-	fence->context = ctx->ctx;
-	fence->ip_type = req->ip_type;
-	fence->ip_instance = req->ip_instance;
-	fence->ring = req->ring;
-	fence->fence = req->seq_no;
+	fence->fence.context = ctx->ctx;
+	fence->fence.ip_type = req->ip_type;
+	fence->fence.ip_instance = req->ip_instance;
+	fence->fence.ring = req->ring;
+	fence->fence.fence = req->seq_no;
+	fence->user_ptr = (volatile uint64_t*)(ctx->fence_map + (req->ip_type * MAX_RINGS_PER_TYPE + req->ring) * sizeof(uint64_t));
 }

 static struct radeon_winsys_fence *radv_amdgpu_create_fence()
 {
-	struct radv_amdgpu_cs_fence *fence = calloc(1, sizeof(struct amdgpu_cs_fence));
+	struct radv_amdgpu_fence *fence = calloc(1, sizeof(struct radv_amdgpu_fence));
 	return (struct radeon_winsys_fence*)fence;
 }

 static void radv_amdgpu_destroy_fence(struct radeon_winsys_fence *_fence)
 {
-	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	free(fence);
 }

@@ -117,16 +118,23 @@ static bool radv_amdgpu_fence_wait(struct radeon_winsys *_ws,
 			      bool absolute,
 			      uint64_t timeout)
 {
-	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	unsigned flags = absolute ? AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE : 0;
 	int r;
 	uint32_t expired = 0;

+	if (fence->user_ptr) {
+		if (*fence->user_ptr >= fence->fence.fence)
+			return true;
+		if (!absolute && !timeout)
+			return false;
+	}
+
 	/* Now use the libdrm query. */
-	r = amdgpu_cs_query_fence_status(fence,
-					 timeout,
-					 flags,
-					 &expired);
+	r = amdgpu_cs_query_fence_status(&fence->fence,
+	                                 timeout,
+	                                 flags,
+	                                 &expired);

 	if (r) {
 		fprintf(stderr, "amdgpu: radv_amdgpu_cs_query_fence_status failed.\n");
@@ -619,6 +627,16 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
 	return r;
 }

+static struct amdgpu_cs_fence_info radv_set_cs_fence(struct radv_amdgpu_ctx *ctx, int ip_type, int ring)
+{
+	struct amdgpu_cs_fence_info ret = {0};
+	if (ctx->fence_map) {
+		ret.handle = radv_amdgpu_winsys_bo(ctx->fence_bo)->bo;
+		ret.offset = (ip_type * MAX_RINGS_PER_TYPE + ring) * sizeof(uint64_t);
+	}
+	return ret;
+}
+
 static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
 				    struct amdgpu_cs_request *request)
 {
@@ -637,7 +655,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 {
 	int r;
 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
 	amdgpu_bo_list_handle bo_list;
 	struct amdgpu_cs_request request = {0};
@@ -676,6 +694,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
 	request.number_of_ibs = 1;
 	request.ibs = &cs0->ib;
 	request.resources = bo_list;
+	request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);

 	if (initial_preamble_cs) {
 		request.ibs = ibs;
@@ -713,7 +732,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 {
 	int r;
 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	amdgpu_bo_list_handle bo_list;
 	struct amdgpu_cs_request request;

@@ -740,6 +759,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
 		request.resources = bo_list;
 		request.number_of_ibs = cnt + !!preamble_cs;
 		request.ibs = ibs;
+		request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);

 		if (preamble_cs) {
 			ibs[0] = radv_amdgpu_cs(preamble_cs)->ib;
@@ -789,14 +809,14 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 {
 	int r;
 	struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
-	struct amdgpu_cs_fence *fence = (struct amdgpu_cs_fence *)_fence;
+	struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
 	struct radv_amdgpu_cs *cs0 = radv_amdgpu_cs(cs_array[0]);
 	struct radeon_winsys *ws = (struct radeon_winsys*)cs0->ws;
 	amdgpu_bo_list_handle bo_list;
 	struct amdgpu_cs_request request;
 	uint32_t pad_word = 0xffff1000U;

-	if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
+	if (radv_amdgpu_winsys(ws)->info.chip_class == SI)
 		pad_word = 0x80000000;

 	assert(cs_count);
@@ -858,6 +878,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
 		request.resources = bo_list;
 		request.number_of_ibs = 1;
 		request.ibs = &ib;
+		request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);

 		r = amdgpu_cs_submit(ctx->ctx, 0, &request, 1);
 		if (r) {
@@ -910,7 +931,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
 	if (!cs->ws->use_ib_bos) {
 		ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
 							   cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
-	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
+	} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
 		ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
 							    cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
 	} else {
@@ -931,6 +952,9 @@ static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
 {
 	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
 	void *ret = NULL;
+
+	if (!cs->ib_buffer)
+		return NULL;
 	for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
 		struct radv_amdgpu_winsys_bo *bo;

@@ -949,10 +973,15 @@ static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
                                       uint32_t trace_id)
 {
 	struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
+	void *ib = cs->base.buf;
+	int num_dw = cs->base.cdw;

-	ac_parse_ib(file,
-		    radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
-		    cs->ib.size, trace_id,  "main IB", cs->ws->info.chip_class,
+	if (cs->ws->use_ib_bos) {
+		ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
+		num_dw = cs->ib.size;
+	}
+	assert(ib);
+	ac_parse_ib(file, ib, num_dw, trace_id, "main IB", cs->ws->info.chip_class,
 		    radv_amdgpu_winsys_get_cpu_addr, cs);
 }

@@ -970,6 +999,15 @@ static struct radeon_winsys_ctx *radv_amdgpu_ctx_create(struct radeon_winsys *_w
 		goto error_create;
 	}
 	ctx->ws = ws;
+
+	assert(AMDGPU_HW_IP_NUM * MAX_RINGS_PER_TYPE * sizeof(uint64_t) <= 4096);
+	ctx->fence_bo = ws->base.buffer_create(&ws->base, 4096, 8,
+	                                      RADEON_DOMAIN_GTT,
+	                                      RADEON_FLAG_CPU_ACCESS);
+	if (ctx->fence_bo)
+		ctx->fence_map = (uint64_t*)ws->base.buffer_map(ctx->fence_bo);
+	if (ctx->fence_map)
+		memset(ctx->fence_map, 0, 4096);
 	return (struct radeon_winsys_ctx *)ctx;
 error_create:
 	FREE(ctx);
@@ -979,6 +1017,7 @@ error_create:
 static void radv_amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
 {
 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
+	ctx->ws->base.buffer_destroy(ctx->fence_bo);
 	amdgpu_cs_ctx_free(ctx->ctx);
 	FREE(ctx);
 }
@@ -989,9 +1028,9 @@ static bool radv_amdgpu_ctx_wait_idle(struct radeon_winsys_ctx *rwctx,
 	struct radv_amdgpu_ctx *ctx = (struct radv_amdgpu_ctx *)rwctx;
 	int ip_type = ring_to_hw_ip(ring_type);

-	if (ctx->last_submission[ip_type][ring_index].fence) {
+	if (ctx->last_submission[ip_type][ring_index].fence.fence) {
 		uint32_t expired;
-		int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index],
+		int ret = amdgpu_cs_query_fence_status(&ctx->last_submission[ip_type][ring_index].fence,
 		                                       1000000000ull, 0, &expired);

 		if (ret || !expired)
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.h
@@ -42,10 +42,19 @@ enum {
 	MAX_RINGS_PER_TYPE = 8
 };

+
+struct radv_amdgpu_fence {
+	struct amdgpu_cs_fence fence;
+	volatile uint64_t *user_ptr;
+};
+
 struct radv_amdgpu_ctx {
 	struct radv_amdgpu_winsys *ws;
 	amdgpu_context_handle ctx;
-	struct amdgpu_cs_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
+	struct radv_amdgpu_fence last_submission[AMDGPU_HW_IP_DMA + 1][MAX_RINGS_PER_TYPE];
+
+	struct radeon_winsys_bo *fence_bo;
+	uint64_t *fence_map;
 };

 static inline struct radv_amdgpu_ctx *
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_surface.c
@@ -35,63 +35,39 @@
 #include "radv_amdgpu_surface.h"
 #include "sid.h"

-#ifndef NO_ENTRIES
-#define NO_ENTRIES 32
-#endif
+#include "ac_surface.h"

-#ifndef NO_MACRO_ENTRIES
-#define NO_MACRO_ENTRIES 16
-#endif
-
-#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
-#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
-#endif
-
-static int radv_amdgpu_surface_sanity(const struct radeon_surf *surf)
+static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
+				      const struct radeon_surf *surf)
 {
 	unsigned type = RADEON_SURF_GET(surf->flags, TYPE);

 	if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX))
 		return -EINVAL;

-	/* all dimension must be at least 1 ! */
-	if (!surf->npix_x || !surf->npix_y || !surf->npix_z ||
-	    !surf->array_size)
+	if (!surf->blk_w || !surf->blk_h)
 		return -EINVAL;

-	if (!surf->blk_w || !surf->blk_h || !surf->blk_d)
-		return -EINVAL;
-
-	switch (surf->nsamples) {
-	case 1:
-	case 2:
-	case 4:
-	case 8:
-		break;
-	default:
-		return -EINVAL;
-	}
-
 	switch (type) {
 	case RADEON_SURF_TYPE_1D:
-		if (surf->npix_y > 1)
+		if (surf_info->height > 1)
 			return -EINVAL;
 		/* fall through */
 	case RADEON_SURF_TYPE_2D:
 	case RADEON_SURF_TYPE_CUBEMAP:
-		if (surf->npix_z > 1 || surf->array_size > 1)
+		if (surf_info->depth > 1 || surf_info->array_size > 1)
 			return -EINVAL;
 		break;
 	case RADEON_SURF_TYPE_3D:
-		if (surf->array_size > 1)
+		if (surf_info->array_size > 1)
 			return -EINVAL;
 		break;
 	case RADEON_SURF_TYPE_1D_ARRAY:
-		if (surf->npix_y > 1)
+		if (surf_info->height > 1)
 			return -EINVAL;
 		/* fall through */
 	case RADEON_SURF_TYPE_2D_ARRAY:
-		if (surf->npix_z > 1)
+		if (surf_info->depth > 1)
 			return -EINVAL;
 		break;
 	default:
@@ -100,453 +76,28 @@ static int radv_amdgpu_surface_sanity(const struct radeon_surf *surf)
 	return 0;
 }

-static void *ADDR_API radv_allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
-{
-	return malloc(pInput->sizeInBytes);
-}
-
-static ADDR_E_RETURNCODE ADDR_API radv_freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
-{
-	free(pInput->pVirtAddr);
-	return ADDR_OK;
-}
-
-ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, int rev_id,
-				    enum chip_class chip_class)
-{
-	ADDR_CREATE_INPUT addrCreateInput = {0};
-	ADDR_CREATE_OUTPUT addrCreateOutput = {0};
-	ADDR_REGISTER_VALUE regValue = {0};
-	ADDR_CREATE_FLAGS createFlags = {{0}};
-	ADDR_E_RETURNCODE addrRet;
-
-	addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
-	addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
-
-	regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
-	regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
-	regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
-
-	regValue.backendDisables = amdinfo->backend_disable[0];
-	regValue.pTileConfig = amdinfo->gb_tile_mode;
-	regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode);
-	if (chip_class == SI) {
-		regValue.pMacroTileConfig = NULL;
-		regValue.noOfMacroEntries = 0;
-	} else {
-		regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode;
-		regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode);
-	}
-
-	createFlags.value = 0;
-	createFlags.useTileIndex = 1;
-
-	addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
-	addrCreateInput.chipFamily = family;
-	addrCreateInput.chipRevision = rev_id;
-	addrCreateInput.createFlags = createFlags;
-	addrCreateInput.callbacks.allocSysMem = radv_allocSysMem;
-	addrCreateInput.callbacks.freeSysMem = radv_freeSysMem;
-	addrCreateInput.callbacks.debugPrint = 0;
-	addrCreateInput.regValue = regValue;
-
-	addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
-	if (addrRet != ADDR_OK)
-		return NULL;
-
-	return addrCreateOutput.hLib;
-}
-
-static int radv_compute_level(ADDR_HANDLE addrlib,
-                              struct radeon_surf *surf, bool is_stencil,
-                              unsigned level, unsigned type, bool compressed,
-                              ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
-                              ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
-                              ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
-                              ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
-{
-	struct radeon_surf_level *surf_level;
-	ADDR_E_RETURNCODE ret;
-
-	AddrSurfInfoIn->mipLevel = level;
-	AddrSurfInfoIn->width = u_minify(surf->npix_x, level);
-	AddrSurfInfoIn->height = u_minify(surf->npix_y, level);
-
-	if (type == RADEON_SURF_TYPE_3D)
-		AddrSurfInfoIn->numSlices = u_minify(surf->npix_z, level);
-	else if (type == RADEON_SURF_TYPE_CUBEMAP)
-		AddrSurfInfoIn->numSlices = 6;
-	else
-		AddrSurfInfoIn->numSlices = surf->array_size;
-
-	if (level > 0) {
-		/* Set the base level pitch. This is needed for calculation
-		 * of non-zero levels. */
-		if (is_stencil)
-			AddrSurfInfoIn->basePitch = surf->stencil_level[0].nblk_x;
-		else
-			AddrSurfInfoIn->basePitch = surf->level[0].nblk_x;
-
-		/* Convert blocks to pixels for compressed formats. */
-		if (compressed)
-			AddrSurfInfoIn->basePitch *= surf->blk_w;
-	}
-
-	ret = AddrComputeSurfaceInfo(addrlib,
-				     AddrSurfInfoIn,
-				     AddrSurfInfoOut);
-	if (ret != ADDR_OK)
-		return ret;
-
-	surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level];
-	surf_level->offset = align64(surf->bo_size, AddrSurfInfoOut->baseAlign);
-	surf_level->slice_size = AddrSurfInfoOut->sliceSize;
-	surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe);
-	surf_level->npix_x = u_minify(surf->npix_x, level);
-	surf_level->npix_y = u_minify(surf->npix_y, level);
-	surf_level->npix_z = u_minify(surf->npix_z, level);
-	surf_level->nblk_x = AddrSurfInfoOut->pitch;
-	surf_level->nblk_y = AddrSurfInfoOut->height;
-	if (type == RADEON_SURF_TYPE_3D)
-		surf_level->nblk_z = AddrSurfInfoOut->depth;
-	else
-		surf_level->nblk_z = 1;
-
-	switch (AddrSurfInfoOut->tileMode) {
-	case ADDR_TM_LINEAR_ALIGNED:
-		surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
-		break;
-	case ADDR_TM_1D_TILED_THIN1:
-		surf_level->mode = RADEON_SURF_MODE_1D;
-		break;
-	case ADDR_TM_2D_TILED_THIN1:
-		surf_level->mode = RADEON_SURF_MODE_2D;
-		break;
-	default:
-		assert(0);
-	}
-
-	if (is_stencil)
-		surf->stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
-	else
-		surf->tiling_index[level] = AddrSurfInfoOut->tileIndex;
-
-	surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
-
-	/* Clear DCC fields at the beginning. */
-	surf_level->dcc_offset = 0;
-	surf_level->dcc_enabled = false;
-
-	/* The previous level's flag tells us if we can use DCC for this level. */
-	if (AddrSurfInfoIn->flags.dccCompatible &&
-	    (level == 0 || AddrDccOut->subLvlCompressible)) {
-		AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
-		AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
-		AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
-		AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
-		AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
-
-		ret = AddrComputeDccInfo(addrlib,
-					 AddrDccIn,
-					 AddrDccOut);
-
-		if (ret == ADDR_OK) {
-			surf_level->dcc_offset = surf->dcc_size;
-			surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
-			surf_level->dcc_enabled = true;
-			surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
-			surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
-		}
-	}
-
-	if (!is_stencil && AddrSurfInfoIn->flags.depth &&
-	    surf_level->mode == RADEON_SURF_MODE_2D && level == 0) {
-		ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
-		ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
-		AddrHtileIn.flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible;
-		AddrHtileIn.pitch = AddrSurfInfoOut->pitch;
-		AddrHtileIn.height = AddrSurfInfoOut->height;
-		AddrHtileIn.numSlices = AddrSurfInfoOut->depth;
-		AddrHtileIn.blockWidth = ADDR_HTILE_BLOCKSIZE_8;
-		AddrHtileIn.blockHeight = ADDR_HTILE_BLOCKSIZE_8;
-		AddrHtileIn.pTileInfo = AddrSurfInfoOut->pTileInfo;
-		AddrHtileIn.tileIndex = AddrSurfInfoOut->tileIndex;
-		AddrHtileIn.macroModeIndex = AddrSurfInfoOut->macroModeIndex;
-
-		ret = AddrComputeHtileInfo(addrlib,
-		                           &AddrHtileIn,
-		                           &AddrHtileOut);
-
-		if (ret == ADDR_OK) {
-			surf->htile_size = AddrHtileOut.htileBytes;
-			surf->htile_slice_size = AddrHtileOut.sliceSize;
-			surf->htile_alignment = AddrHtileOut.baseAlign;
-		}
-	}
-	return 0;
-}
-
-static void radv_set_micro_tile_mode(struct radeon_surf *surf,
-                                     struct radeon_info *info)
-{
-	uint32_t tile_mode = info->si_tile_mode_array[surf->tiling_index[0]];
-
-	if (info->chip_class >= CIK)
-		surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
-	else
-		surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
-}
-
-static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
-{
-	unsigned index, tileb;
-
-	tileb = 8 * 8 * surf->bpe;
-	tileb = MIN2(surf->tile_split, tileb);
-
-	for (index = 0; tileb > 64; index++)
-		tileb >>= 1;
-
-	assert(index < 16);
-	return index;
-}
-
 static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
+					   const struct ac_surf_info *surf_info,
 					   struct radeon_surf *surf)
 {
 	struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
-	unsigned level, mode, type;
-	bool compressed;
-	ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
-	ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
-	ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
-	ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
-	ADDR_TILEINFO AddrTileInfoIn = {0};
-	ADDR_TILEINFO AddrTileInfoOut = {0};
+	unsigned mode, type;
 	int r;

-	r = radv_amdgpu_surface_sanity(surf);
+	r = radv_amdgpu_surface_sanity(surf_info, surf);
 	if (r)
 		return r;

-	AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
-	AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
-	AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
-	AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
-	AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
-
 	type = RADEON_SURF_GET(surf->flags, TYPE);
 	mode = RADEON_SURF_GET(surf->flags, MODE);
-	compressed = surf->blk_w == 4 && surf->blk_h == 4;

-	/* MSAA and FMASK require 2D tiling. */
-	if (surf->nsamples > 1 ||
-	    (surf->flags & RADEON_SURF_FMASK))
-		mode = RADEON_SURF_MODE_2D;
+	struct ac_surf_config config;

-	/* DB doesn't support linear layouts. */
-	if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
-	    mode < RADEON_SURF_MODE_1D)
-		mode = RADEON_SURF_MODE_1D;
+	memcpy(&config.info, surf_info, sizeof(config.info));
+	config.is_3d = !!(type == RADEON_SURF_TYPE_3D);
+	config.is_cube = !!(type == RADEON_SURF_TYPE_CUBEMAP);

-	/* Set the requested tiling mode. */
-	switch (mode) {
-	case RADEON_SURF_MODE_LINEAR_ALIGNED:
-		AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
-		break;
-	case RADEON_SURF_MODE_1D:
-		AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
-		break;
-	case RADEON_SURF_MODE_2D:
-		AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
-		break;
-	default:
-		assert(0);
-	}
-
-	/* The format must be set correctly for the allocation of compressed
-	 * textures to work. In other cases, setting the bpp is sufficient. */
-	if (compressed) {
-		switch (surf->bpe) {
-		case 8:
-			AddrSurfInfoIn.format = ADDR_FMT_BC1;
-			break;
-		case 16:
-			AddrSurfInfoIn.format = ADDR_FMT_BC3;
-			break;
-		default:
-			assert(0);
-		}
-	} else {
-		AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
-	}
-
-	AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf->nsamples;
-	AddrSurfInfoIn.tileIndex = -1;
-
-	/* Set the micro tile type. */
-	if (surf->flags & RADEON_SURF_SCANOUT)
-		AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
-	else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
-		AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
-	else
-		AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
-
-	AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
-	AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
-	AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
-	AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
-	AddrSurfInfoIn.flags.pow2Pad = surf->last_level > 0;
-	AddrSurfInfoIn.flags.opt4Space = 1;
-
-	/* DCC notes:
-	 * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
-	 *   with samples >= 4.
-	 * - Mipmapped array textures have low performance (discovered by a closed
-	 *   driver team).
-	 */
-	AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
-		!(surf->flags & RADEON_SURF_DISABLE_DCC) &&
-		!compressed && AddrDccIn.numSamples <= 1 &&
-		((surf->array_size == 1 && surf->npix_z == 1) ||
-		 surf->last_level == 0);
-
-	AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
-	AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;
-
-	/* noStencil = 0 can result in a depth part that is incompatible with
-	 * mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in
-	 * this case, we may end up setting stencil_adjusted).
-	 *
-	 * TODO: update addrlib to a newer version, remove this, and
-	 * use flags.matchStencilTileCfg = 1 as an alternative fix.
-	 */
-	if (surf->last_level > 0)
-		AddrSurfInfoIn.flags.noStencil = 1;
-
-	/* Set preferred macrotile parameters. This is usually required
-	 * for shared resources. This is for 2D tiling only. */
-	if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
-	    surf->bankw && surf->bankh && surf->mtilea && surf->tile_split) {
-		/* If any of these parameters are incorrect, the calculation
-		 * will fail. */
-		AddrTileInfoIn.banks = surf->num_banks;
-		AddrTileInfoIn.bankWidth = surf->bankw;
-		AddrTileInfoIn.bankHeight = surf->bankh;
-		AddrTileInfoIn.macroAspectRatio = surf->mtilea;
-		AddrTileInfoIn.tileSplitBytes = surf->tile_split;
-		AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */
-		AddrSurfInfoIn.flags.opt4Space = 0;
-		AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
-
-		/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
-		 * the tile index, because we are expected to know it if
-		 * we know the other parameters.
-		 *
-		 * This is something that can easily be fixed in Addrlib.
-		 * For now, just figure it out here.
-		 * Note that only 2D_TILE_THIN1 is handled here.
-		 */
-		assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
-		assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
-
-		if (ws->info.chip_class == SI) {
-			if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
-				if (surf->bpe == 2)
-					AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
-				else
-					AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
-			} else {
-				if (surf->bpe == 1)
-					AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
-				else if (surf->bpe == 2)
-					AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
-				else if (surf->bpe == 4)
-					AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
-				else
-					AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
-			}
-		} else {
-			if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
-				AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
-			else
-				AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
-			AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
-		}
-	}
-
-	surf->bo_size = 0;
-	surf->dcc_size = 0;
-	surf->dcc_alignment = 1;
-	surf->htile_size = surf->htile_slice_size = 0;
-	surf->htile_alignment = 1;
-
-	/* Calculate texture layout information. */
-	for (level = 0; level <= surf->last_level; level++) {
-		r = radv_compute_level(ws->addrlib, surf, false, level, type, compressed,
-				       &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
-		if (r)
-			break;
-
-		if (level == 0) {
-			surf->bo_alignment = AddrSurfInfoOut.baseAlign;
-			surf->pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1;
-			radv_set_micro_tile_mode(surf, &ws->info);
-
-			/* For 2D modes only. */
-			if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
-				surf->bankw = AddrSurfInfoOut.pTileInfo->bankWidth;
-				surf->bankh = AddrSurfInfoOut.pTileInfo->bankHeight;
-				surf->mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio;
-				surf->tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
-				surf->num_banks = AddrSurfInfoOut.pTileInfo->banks;
-				surf->macro_tile_index = AddrSurfInfoOut.macroModeIndex;
-			} else {
-				surf->macro_tile_index = 0;
-			}
-		}
-	}
-
-	/* Calculate texture layout information for stencil. */
-	if (surf->flags & RADEON_SURF_SBUFFER) {
-		AddrSurfInfoIn.bpp = 8;
-		AddrSurfInfoIn.flags.depth = 0;
-		AddrSurfInfoIn.flags.stencil = 1;
-		/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
-		AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
-
-		for (level = 0; level <= surf->last_level; level++) {
-			r = radv_compute_level(ws->addrlib, surf, true, level, type, compressed,
-					       &AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
-			if (r)
-				return r;
-
-			/* DB uses the depth pitch for both stencil and depth. */
-			if (surf->stencil_level[level].nblk_x != surf->level[level].nblk_x)
-				surf->stencil_adjusted = true;
-
-			if (level == 0) {
-				/* For 2D modes only. */
-				if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
-					surf->stencil_tile_split =
-						AddrSurfInfoOut.pTileInfo->tileSplitBytes;
-				}
-			}
-		}
-	}
-
-	/* Recalculate the whole DCC miptree size including disabled levels.
-	 * This is what addrlib does, but calling addrlib would be a lot more
-	 * complicated.
-	 */
-#if 0
-	if (surf->dcc_size && surf->last_level > 0) {
-		surf->dcc_size = align64(surf->bo_size >> 8,
-					 ws->info.pipe_interleave_bytes *
-					 ws->info.num_tile_pipes);
-	}
-#endif
-	return 0;
+	return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
 }

 static int radv_amdgpu_winsys_surface_best(struct radeon_winsys *rws,
--- a/Show More
+++ b/Show More