docs: add sha256 checksums for 18.3.6

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
docs: add release notes for 18.3.6
2019-04-05 12:00:12 +01:00 · 2019-04-05 11:59:15 +01:00 · 2019-04-05 11:31:05 +01:00 · 2019-04-05 11:31:05 +01:00 · 2019-04-05 11:31:05 +01:00 · 2019-04-05 11:31:05 +01:00
1294 changed files with 97727 additions and 18778 deletions
--- a/.mailmap
+++ b/.mailmap
@@ -145,6 +145,11 @@ Edward O'Callaghan <funfunctor@folklore1984.net> <eocallaghan@alterapraxis.com>
 Emeric Grange <emeric.grange@gmail.com> Emeric <emeric.grange@gmail.com>

 Emil Velikov <emil.l.velikov@gmail.com> <emil.velikov@collabora.com>
+Emil Velikov <emil.l.velikov@gmail.com> <emil.veliko@collabora.com>
+Emil Velikov <emil.l.velikov@gmail.com> <emil.velikov@collabora.co.uk>
+Emil Velikov <emil.l.velikov@gmail.com> <emil.veliikov@collabora.com>
+Emil Velikov <emil.l.velikov@gmail.com> <emil.velikov@gmail.com>
+Emil Velikov <emil.l.velikov@gmail.com> <emmil.velikov@collabora.com>

 Eric Anholt <eric@anholt.net> Eric Anholt <anholt@FreeBSD.org>

--- a/.travis.yml
+++ b/.travis.yml
@@ -9,9 +9,9 @@ cache:

 env:
  global:
-    - XORG_RELEASES=http://xorg.freedesktop.org/releases/individual
-    - XCB_RELEASES=http://xcb.freedesktop.org/dist
-    - WAYLAND_RELEASES=http://wayland.freedesktop.org/releases
+    - XORG_RELEASES=https://xorg.freedesktop.org/releases/individual
+    - XCB_RELEASES=https://xcb.freedesktop.org/dist
+    - WAYLAND_RELEASES=https://wayland.freedesktop.org/releases
    - XORGMACROS_VERSION=util-macros-1.19.0
    - GLPROTO_VERSION=glproto-1.4.17
    - DRI2PROTO_VERSION=dri2proto-2.8
@@ -35,28 +35,33 @@ matrix:
    - env:
        - LABEL="meson Vulkan"
        - BUILD=meson
-        - MESON_OPTIONS="-Ddri-drivers=[] -Dgallium-drivers=[]"
-        - LLVM_VERSION=5.0
+        - DRI_DRIVERS=""
+        - GALLIUM_DRIVERS=""
+        - VULKAN_DRIVERS="intel,amd"
+        - LLVM_VERSION=6.0
        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
      addons:
        apt:
          sources:
-            - llvm-toolchain-trusty-5.0
+            - llvm-toolchain-trusty-6.0
+            # llvm-6 requires libstdc++4.9 which is not in main repo
+            - ubuntu-toolchain-r-test
          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
            # From sources above
-            - llvm-5.0-dev
+            - llvm-6.0-dev
            # Common
            - xz-utils
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - python3.5
            - python3-pip
    - env:
        - LABEL="meson loaders/classic DRI"
        - BUILD=meson
-        - MESON_OPTIONS="-Dvulkan-drivers=[] -Dgallium-drivers=[]"
+        - DRI_DRIVERS="i915,i965,r100,r200,swrast,nouveau"
+        - GALLIUM_DRIVERS=""
+        - VULKAN_DRIVERS=""
      addons:
        apt:
          packages:
@@ -66,6 +71,7 @@ matrix:
            - libx11-xcb-dev
            - libxdamage-dev
            - libxfixes-dev
+            - python3.5
            - python3-pip
    - env:
        - LABEL="make loaders/classic DRI"
@@ -87,6 +93,7 @@ matrix:
            - libx11-xcb-dev
            - libxdamage-dev
            - libxfixes-dev
+            - python3-pip
    - env:
        # NOTE: Building SWR is 2x (yes two) times slower than all the other
        # gallium drivers combined.
@@ -95,10 +102,8 @@ matrix:
        - BUILD=make
        - MAKEFLAGS="-j4"
        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=5.0
+        - LLVM_VERSION=6.0
        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - OVERRIDE_CC="gcc-4.8"
-        - OVERRIDE_CXX="g++-4.8"
        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
        - DRI_DRIVERS=""
        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
@@ -108,24 +113,25 @@ matrix:
      addons:
        apt:
          sources:
-            - llvm-toolchain-trusty-5.0
+            - llvm-toolchain-trusty-6.0
+            # llvm-6 requires libstdc++4.9 which is not in main repo
+            - ubuntu-toolchain-r-test
          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
            # From sources above
-            - llvm-5.0-dev
+            - llvm-6.0-dev
            # Common
            - xz-utils
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
    - env:
        - LABEL="make Gallium Drivers RadeonSI"
        - BUILD=make
        - MAKEFLAGS="-j4"
        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=5.0
+        - LLVM_VERSION=6.0
        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
        - DRI_DRIVERS=""
@@ -136,18 +142,19 @@ matrix:
      addons:
        apt:
          sources:
-            - llvm-toolchain-trusty-5.0
+            - llvm-toolchain-trusty-6.0
+            # llvm-6 requires libstdc++4.9 which is not in main repo
+            - ubuntu-toolchain-r-test
          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
            # From sources above
-            - llvm-5.0-dev
+            - llvm-6.0-dev
            # Common
            - xz-utils
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
    - env:
        - LABEL="make Gallium Drivers Other"
        - BUILD=make
@@ -179,8 +186,8 @@ matrix:
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
    - env:
-        # NOTE: Analogous to SWR above, building Clover is quite slow.
        - LABEL="make Gallium ST Clover LLVM-3.9"
        - BUILD=make
        - MAKEFLAGS="-j4"
@@ -217,8 +224,8 @@ matrix:
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
    - env:
-        # NOTE: Analogous to SWR above, building Clover is quite slow.
        - LABEL="make Gallium ST Clover LLVM-4.0"
        - BUILD=make
        - MAKEFLAGS="-j4"
@@ -252,8 +259,8 @@ matrix:
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
    - env:
-        # NOTE: Analogous to SWR above, building Clover is quite slow.
        - LABEL="make Gallium ST Clover LLVM-5.0"
        - BUILD=make
        - MAKEFLAGS="-j4"
@@ -265,7 +272,7 @@ matrix:
        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
        - DRI_DRIVERS=""
        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="r600,radeonsi"
+        - GALLIUM_DRIVERS="r600"
        - VULKAN_DRIVERS=""
        - LIBUNWIND_FLAGS="--enable-libunwind"
      addons:
@@ -287,8 +294,8 @@ matrix:
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
    - env:
-        # NOTE: Analogous to SWR above, building Clover is quite slow.
        - LABEL="make Gallium ST Clover LLVM-6.0"
        - BUILD=make
        - MAKEFLAGS="-j4"
@@ -305,7 +312,7 @@ matrix:
        apt:
          sources:
            - llvm-toolchain-trusty-6.0
-            # llvm-6 depends on gcc-4.9 which is not in main repo
+            # llvm-6 requires libstdc++4.9 which is not in main repo
            - ubuntu-toolchain-r-test
          packages:
            - libclc-dev
@@ -319,6 +326,39 @@ matrix:
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
+    - env:
+        - LABEL="make Gallium ST Clover LLVM-7"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=7
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS="r600,radeonsi"
+        - VULKAN_DRIVERS=""
+        - LIBUNWIND_FLAGS="--enable-libunwind"
+      addons:
+        apt:
+          sources:
+            - sourceline: 'deb http://apt.llvm.org/trusty/ llvm-toolchain-trusty-7 main'
+              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
+            # llvm-7 requires libstdc++4.9 which is not in main repo
+            - ubuntu-toolchain-r-test
+          packages:
+            - libclc-dev
+            # From sources above
+            - llvm-7-dev
+            - clang-7
+            - libclang-7-dev
+            # Common
+            - xz-utils
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+            - libunwind8-dev
    - env:
        - LABEL="make Gallium ST Other"
        - BUILD=make
@@ -354,12 +394,13 @@ matrix:
            - libx11-xcb-dev
            - libelf-dev
            - libunwind8-dev
+            - python3-pip
    - env:
        - LABEL="make Vulkan"
        - BUILD=make
        - MAKEFLAGS="-j4"
        - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
-        - LLVM_VERSION=5.0
+        - LLVM_VERSION=6.0
        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland"
        - DRI_DRIVERS=""
@@ -370,17 +411,18 @@ matrix:
      addons:
        apt:
          sources:
-            - llvm-toolchain-trusty-5.0
+            - llvm-toolchain-trusty-6.0
+            # llvm-6 requires libstdc++4.9 which is not in main repo
+            - ubuntu-toolchain-r-test
          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
            # From sources above
-            - llvm-5.0-dev
+            - llvm-6.0-dev
            # Common
            - xz-utils
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
+            - python3-pip
    - env:
        - LABEL="scons"
        - BUILD=scons
@@ -424,21 +466,19 @@ matrix:
        - BUILD=scons
        - SCONSFLAGS="-j4"
        - SCONS_TARGET="swr=1"
-        - LLVM_VERSION=5.0
+        - LLVM_VERSION=6.0
        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
        # Keep it symmetrical to the make build. There's no actual SWR, yet.
        - SCONS_CHECK_COMMAND="true"
-        - OVERRIDE_CC="gcc-4.8"
-        - OVERRIDE_CXX="g++-4.8"
      addons:
        apt:
          sources:
-            - llvm-toolchain-trusty-5.0
+            - llvm-toolchain-trusty-6.0
+            # llvm-6 requires libstdc++4.9 which is not in main repo
+            - ubuntu-toolchain-r-test
          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
            # From sources above
-            - llvm-5.0-dev
+            - llvm-6.0-dev
            # Common
            - xz-utils
            - x11proto-xf86vidmode-dev
@@ -455,7 +495,6 @@ matrix:
    - env:
        - LABEL="macOS meson"
        - BUILD=meson
-        - MESON_OPTIONS="-Degl=false"
      os: osx

 before_install:
@@ -482,18 +521,24 @@ before_install:
    fi

 install:
-  - pip2 install --user mako
-
  # Install a more modern meson from pip, since the version in the
-  # ubuntu repos is often quite old. Avoid >=0.45.0 as it needs python
-  # 3.5+
+  # ubuntu repos is often quite old. This requires python>=3.5, so
+  # let's make it default
  - if test "x$BUILD" = xmeson; then
-      pip3 install --user "meson<0.45.0";
+      sudo update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.5 10;
+      pip3 install --user meson;
+      pip3 install --user mako;
+    fi
+
+  # Install autotools build dependencies
+  - if test "x$BUILD" = xmake; then
+      pip2 install --user mako;
    fi

  # Install a more modern scons from pip.
  - if test "x$BUILD" = xscons; then
      pip2 install --user "scons>=2.4";
+      pip2 install --user mako;
    fi

  # Since libdrm gets updated in configure.ac regularly, try to pick up the
@@ -535,7 +580,7 @@ install:
      tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2
      (cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install)

-      wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
+      wget https://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
      tar -jxvf $LIBDRM_VERSION.tar.bz2
      (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install)

@@ -551,11 +596,11 @@ install:
      tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
      (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)

-      wget http://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
+      wget https://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
      tar -jxvf $LIBVDPAU_VERSION.tar.bz2
      (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)

-      wget http://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
+      wget https://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
      tar -jxvf $LIBVA_VERSION.tar.bz2
      (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)

@@ -639,6 +684,14 @@ script:
  - |
    if test "x$BUILD" = xmeson; then

+      if test "x$TRAVIS_OS_NAME" == xosx; then
+        MESON_OPTIONS="-Degl=false"
+      fi
+
+      if test "x$TRAVIS_OS_NAME" == xlinux; then
+        MESON_OPTIONS="-Ddri-drivers=${DRI_DRIVERS:-[]} -Dgallium-drivers=${GALLIUM_DRIVERS:-[]} -Dvulkan-drivers=${VULKAN_DRIVERS:-[]}"
+      fi
+
      # Travis CI has moved to LLVM 5.0, and meson is detecting
      # automatically the available version in /usr/local/bin based on
      # the PATH env variable order preference.
--- a/Android.common.mk
+++ b/Android.common.mk
@@ -76,6 +76,8 @@ LOCAL_CFLAGS += \
 	-DMAJOR_IN_SYSMACROS \
 	-DVK_USE_PLATFORM_ANDROID_KHR \
 	-fvisibility=hidden \
+	-fno-math-errno \
+	-fno-trapping-math \
 	-Wno-sign-compare

 LOCAL_CPPFLAGS += \
@@ -89,6 +91,13 @@ LOCAL_CPPFLAGS += \
 LOCAL_CONLYFLAGS += \
 	-std=c99

+# c11 timespec_get is part of bionic as well
+# https://android-review.googlesource.com/c/718518
+# This means releases from P and earlier won't need this
+ifeq ($(filter 5 6 7 8 9, $(MESA_ANDROID_MAJOR_VERSION)),)
+LOCAL_CFLAGS += -DHAVE_TIMESPEC_GET
+endif
+
 ifeq ($(strip $(MESA_ENABLE_ASM)),true)
 ifeq ($(TARGET_ARCH),x86)
 LOCAL_CFLAGS += \
--- a/CleanSpec.mk
+++ b/CleanSpec.mk
@@ -10,7 +10,7 @@ $(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/STATIC_LIBRARIES/libmesa_*_interm
 $(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/SHARED_LIBRARIES/i9?5_dri_intermediates)
 $(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/SHARED_LIBRARIES/libglapi_intermediates)
 $(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/SHARED_LIBRARIES/libGLES_mesa_intermediates)
-$(call add-clean-step, rm -rf $(HOST_OUT_release)/*/EXECUTABLES/mesa_*_intermediates)
-$(call add-clean-step, rm -rf $(HOST_OUT_release)/*/EXECUTABLES/glsl_compiler_intermediates)
-$(call add-clean-step, rm -rf $(HOST_OUT_release)/*/STATIC_LIBRARIES/libmesa_*_intermediates)
+$(call add-clean-step, rm -rf $(HOST_OUT)/*/EXECUTABLES/mesa_*_intermediates)
+$(call add-clean-step, rm -rf $(HOST_OUT)/*/EXECUTABLES/glsl_compiler_intermediates)
+$(call add-clean-step, rm -rf $(HOST_OUT)/*/STATIC_LIBRARIES/libmesa_*_intermediates)
 $(call add-clean-step, rm -rf $(PRODUCT_OUT)/*/SHARED_LIBRARIES/*_dri_intermediates)
--- a/1
+++ b/1
@@ -31,6 +31,7 @@ import common
 # Minimal scons version

 EnsureSConsVersion(2, 4)
+EnsurePythonVersion(2, 7)


 #######################################################################
--- a/2
+++ b/2
@@ -1 +1 @@
-18.2.7
+18.3.6
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -33,31 +33,41 @@ branches:
 # - https://www.appveyor.com/blog/2014/06/04/shallow-clone-for-git-repositories
 clone_depth: 100

+# https://www.appveyor.com/docs/build-cache/
 cache:
- win_flex_bison-2.5.9.zip
- llvm-5.0.1-msvc2015-mtd.7z
+- '%LOCALAPPDATA%\pip\Cache -> appveyor.yml'
+- win_flex_bison-2.5.15.zip
+- llvm-5.0.1-msvc2017-mtd.7z

-os: Visual Studio 2015
+os: Visual Studio 2017
+
+init:
+# Appveyor defaults core.autocrlf to input instead of the default (true), but
+# that can hide problems processing CRLF text on Windows
+- git config --global core.autocrlf true

 environment:
-  WINFLEXBISON_ARCHIVE: win_flex_bison-2.5.9.zip
-  LLVM_ARCHIVE: llvm-5.0.1-msvc2015-mtd.7z
+  WINFLEXBISON_VERSION: 2.5.15
+  LLVM_ARCHIVE: llvm-5.0.1-msvc2017-mtd.7z

 install:
+# Check git config
+- git config core.autocrlf
 # Check pip
 - python --version
 - python -m pip --version
 # Install Mako
- python -m pip install Mako==1.0.6
+- python -m pip install Mako==1.0.7
 # Install pywin32 extensions, needed by SCons
 - python -m pip install pypiwin32
 # Install python wheels, necessary to install SCons via pip
 - python -m pip install wheel
 # Install SCons
- python -m pip install scons==2.5.1
+- python -m pip install scons==3.0.1
 - scons --version
 # Install flex/bison
- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "https://downloads.sourceforge.net/project/winflexbison/old_versions/%WINFLEXBISON_ARCHIVE%"
+- set WINFLEXBISON_ARCHIVE=win_flex_bison-%WINFLEXBISON_VERSION%.zip
+- if not exist "%WINFLEXBISON_ARCHIVE%" appveyor DownloadFile "https://github.com/lexxmark/winflexbison/releases/download/v%WINFLEXBISON_VERSION%/%WINFLEXBISON_ARCHIVE%"
 - 7z x -y -owinflexbison\ "%WINFLEXBISON_ARCHIVE%" > nul
 - set Path=%CD%\winflexbison;%Path%
 - win_flex --version
@@ -69,10 +79,10 @@ install:
 - set LLVM=%CD%\llvm

 build_script:
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.0 llvm=1
+- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.1 llvm=1

 after_build:
- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.0 llvm=1 check
+- scons -j%NUMBER_OF_PROCESSORS% MSVC_VERSION=14.1 llvm=1 check


 # It's possible to setup notification here, as described in
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -1,39 +1,54 @@
-# fixes:  This commit has more than one Fixes tag but the commit it
-#         addresses didn't land in branch.
-6ff1c479968819b93c46d24bd898e89ce14ac401 autotools: don't ship the git_sha1.h generated in git in the tarballs
-# pick:   This commit addresses a regression introduced by previous
-#         commit fa9e6c235da, which didn't make it for 18.2.
-a72dbc461bdb7714656e62cd8f4b00a404c2e6e0 mesa: allow GL_UNSIGNED_BYTE type for SNORM reads
-# fixes:  This commit has more than one Fixes tag but the commit it
-#         addresses didn't land in branch.
-c9f54486959716762e6818dabb0a73a8cd46df67 radeonsi: fix regression in indirect input swizzles.
-# extra:  Just some comments update.
-2ad9917e187c1e9dbb053d3c98aa0e39fa374059 anv/blorp: Fix a comment as per Nanley's review feedback
-# fixes:  This commit was immediately reverted by commit 2dce1175c1c.
-4aec44c0d9c4c0649c362199fac97efe0a3b38a4 i965/tools: 32bit compilation with meson
-# pick:   This commit was reverted by commit 95bb7d82ca8.
-90819abb56f6b1a0cd4946b13b6caf24fb46e500 radv: fix descriptor pool allocation size
-# pick:   There is a specific patch for stable branch for this commit.
-0d495bec25bd7584de4e988c2b4528c1996bc1d0 radeonsi: NaN should pass kill_if
-# pick:   This commit reverts 0fa9e6d7b30 which did not land in branch.
-aa02d7e8781c25ee18b6da97606300808c84973a Revert "anv/skylake: disable ForceThreadDispatchEnable"
-# pick:   Explicit 18.3 only nominations.
-b1b2dd06a7b777e862b525302b15bcaf407d3648 radv: add missing TFB queries support to CmdCopyQueryPoolsResults()
-e0c7114eb3c19d4c2653f661698a6baa3bc9bedf st/mesa: disable L3 thread pinning
-b5f213bb1dcde22949dffe9d3a431fecd5d0f33b radv: binding streamout buffers doesn't change context regs
-9367514524f70faad99c721bac92339c8ff8bad9 radeonsi: fix video APIs on Raven2
-ea9f95e2a67eca90bb84eea24e7b4b804b3b1345 radeonsi: go back to using bottom-of-pipe for beginning of TIME_ELAPSED
-# fixes:  This commit was reverted by commit 5f312e95f87.
-a9031bf9b55602d93cccef6c926e2179c23205b4 i965/batch: avoid reverting batch buffer if saved state is an empty
-# extra:  intel/aub_viewer is not present in branch
-ac324a6809c09c54d3b0bfdb00e5e62987ec4ad8 intel/aub_viewer: fix dynamic state printing
-0db898cef2f5a455138e5845689c075aadba1c1f intel/aub_viewer: Print blend states properly
-# fixes: This commit requires commits 854202f70e6 and 84bc5738401 which did not
+# fixes: Commit was squashed into the respective offenders
+c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix
+# fixes: The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a
+ff6f1dd0d3c6b4c15ca51b478b2884d14f6a1e06 meson: libfreedreno depends upon libdrm (for fence support)
+
+# fixes: This commit requires commits aeaf8dbd097 and 7484bc894b9 which did not
 #        land in branch.
-c120dbfe4d18240315ecec9b43a61aeb9ab239ac mesa/main: fix incorrect depth-error
-# fixes: This commit fixes commits b4476138d5ad and aa0fed10d357 which did not
-#        land in branch.
-d0c7b079d07f751eb37ecaa45a2a6db920d71d7a freedreno: Fix autotools build.
-# pick:  While this commit does not include the proper CC tag, it was intended
-#        to be applied only in 18.3 branch.
-017199d2d2e4c57015bc60edfcc656062c3a7472 mesa: Revert INTEL_fragment_shader_ordering support
+f67dea5e19ef14187be0e8d0f61b1f764c7ccb4f radv: Fix multiview depth clears
+
+# stable The commits aren't suitable in their present form.
+bfe31c5e461a1330d6f606bf5310685eff1198dd nir/builder: Add nir_i2i and nir_u2u helpers which take a bit size
+abfe674c54bee6f8fdcae411b07db89c10b9d530 spirv: Handle arbitrary bit sizes for deref array indices
+
+# warn   The commits refer stale sha, yet don't fix anything in particular.
+98984b7cdd79c15cc7331c791f8be61e873b8bbd Revert "mapi/new: sort by slot number"
+9f86f1da7c68b5b900cd6f60925610ff1225a72d egl: add glvnd entrypoints for EGL_MESA_query_driver
+
+# stable Explicit 19.0 only nomination.
+38f542783faa360020b77fdd76b97f207a9e0068 v50,nvc0: add explicit settings for recent caps
+
+# stable Explicit 19.0 only nominations.
+399215eb7a0517463e5757c598d6cff6ae2301d0 nvc0: add support for handling indirect draws with attrib conversion
+4443b6ddf2e08d06f3d0457cf20a2e04244cde37 nvc0/ir: always use CG mode for loads from atomic-only buffers
+5de5beedf21306b01730085f8e03d8f424729016 nvc0/ir: fix second tex argument after levelZero optimization
+162352e6711b3ceab114686f7a3248074339e7f7 nvc0: fix 3d images on kepler
+e00799d3dc0595dc3998dbf199ceec8b1eece966 nv50,nvc0: use condition for occlusion queries when already complete
+6adb9b38bfb1f6ee4c94596bf0744225aa8e967a nvc0: stick zero values for the compute invocation counts
+04593d9a73ea257a36cc3b9fb5cd41427beaaea5 gk110/ir: Add rcp f64 implementation
+7937408052a1896f0b08b0110bb8a1790eeee351 gk110/ir: Add rsq f64 implementation
+656ad060518d067a3b311db8c2de2a396fb41898 gk110/ir: Use the new rcp/rsq in library
+12669d29705a26478aa691cb454149628be65f17 gk104/ir: Use the new rcp/rsq in library
+815a8e59c6d462a7008653ea9e3010d40b6ba589 gm107/ir: add fp64 rcp
+cce495572136a606dd2a35e79f45080c3796e2cc gm107/ir: add fp64 rsq
+6010d7b8e8bee1bcea2b329cf6d3b44c5fc3ca66 gallium: add PIPE_CAP_MAX_VARYINGS
+cbd1ad6165f0aea7fb7c6fd1b36ad5317dd65cb7 st/mesa: require RGBA2, RGB4, and RGBA4 to be renderable
+
+# stable The commit addresses functionality not present in branch
+1b8983c25be19073c02fe9630e949be55f8280fa radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8
+
+# stable Explicit 19.0 only nominations, require work which did not land in branch
+8ab95b849e66f3221d80a67eef2ec6e3730901a8 anv: Refactor descriptor pushing a bit
+5c30fffeec1732c21d600c036f95f8cdb1bb5487 anv: Take references to push descriptor set layouts
+
+# sha    Commit references invalid sha - a offender did not land in branch.
+85ee157283c667372baf7c03259cba08853f0067 gitlab-ci: autotools needs to be told which llvm version to use
+
+# revert The commit was reverted shortly after it was applied on master
+47fc359822494935852de1e70e4d840b2fe6a25c anv: release memory allocated by glsl types during spirv_to_nir
+
+# stable Explicitly 19.0 nomination, seemingly a performance patch
+5ef2b8f1f2ebcdb4ffe5c98b3f4f48e584cb4b22 nir: Add a pass for lowering IO back to vector when possible
+
+# fixes Temporary block perf. fix depending on previous opt. pass
+6d5d89d25a0a4299dbfcbfeca71b6c7e65ef3d45 intel/nir: Vectorize all IO
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -13,39 +13,54 @@

 is_stable_nomination()
 {
-	git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
+	git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
 }

 is_typod_nomination()
 {
-	git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
+	git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
 }

+fixes=
+
 # Helper to handle various mistypos of the fixes tag.
 # The tag string itself is passed as argument and normalised within.
+#
+# Resulting string in the global variable "fixes" and contains entries
+# in the form "fixes:$sha"
 is_sha_nomination()
 {
 	fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \
 		sed -e 's/'"$2"'/\nfixes:/Ig' | \
 		grep -Eo 'fixes:[a-f0-9]{8,40}'`

-	fixes_count=`echo "$fixes" | wc -l`
+	fixes_count=`echo "$fixes" | grep "fixes:" | wc -l`
 	if test $fixes_count -eq 0; then
-		return 0
+		return 1
 	fi
+
+	# Throw a warning for each invalid sha
 	while test $fixes_count -gt 0; do
 		# Treat only the current line
 		id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
 		fixes_count=$(($fixes_count-1))
-
-		# Bail out if we cannot find suitable id.
-		# Any specific validation the $id is valid and not some junk, is
-		# implied with the follow up code
-		if test "x$id" = x; then
-			continue
+		if ! git show $id >/dev/null 2>&1; then
+			echo WARNING: Commit $1 lists invalid sha $id
 		fi
+	done

-		#Check if the offending commit is in branch.
+	return 0
+}
+
+# Checks if at least one of offending commits, listed in the global
+# "fixes", is in branch.
+sha_in_range()
+{
+	fixes_count=`echo "$fixes" | grep "fixes:" | wc -l`
+	while test $fixes_count -gt 0; do
+		# Treat only the current line
+		id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
+		fixes_count=$(($fixes_count-1))

 		# Be that cherry-picked ...
 		# ... or landed before the branchpoint.
@@ -103,22 +118,32 @@ do
 		continue
 	fi

-	if is_stable_nomination "$sha"; then
-		tag=stable
-	elif is_typod_nomination "$sha"; then
-		tag=typod
-	elif is_fixes_nomination "$sha"; then
+	if is_fixes_nomination "$sha"; then
 		tag=fixes
 	elif is_brokenby_nomination "$sha"; then
 		tag=brokenby
 	elif is_revert_nomination "$sha"; then
 		tag=revert
+	elif is_stable_nomination "$sha"; then
+		tag=stable
+	elif is_typod_nomination "$sha"; then
+		tag=typod
 	else
 		continue
 	fi

+	case "$tag" in
+	fixes | brokenby | revert )
+		if ! sha_in_range; then
+			continue
+		fi
+		;;
+	* )
+		;;
+	esac
+
 	printf "[ %8s ] " "$tag"
-	git --no-pager show --summary --oneline $sha
+	git --no-pager show --no-patch --oneline $sha
 done

 rm -f already_picked
--- a/bin/git_sha1_gen.py
+++ b/bin/git_sha1_gen.py
@@ -1,5 +1,3 @@
-#!/usr/bin/env python
-
 """
 Generate the contents of the git_sha1.h file.
 The output of this script goes to stdout.
@@ -28,22 +26,25 @@ def get_git_sha1():
        git_sha1 = ''
    return git_sha1

+def write_if_different(contents):
+    """
+    Avoid touching the output file if it doesn't need modifications
+    Useful to avoid triggering rebuilds when nothing has changed.
+    """
+    if os.path.isfile(args.output):
+        with open(args.output, 'r') as file:
+            if file.read() == contents:
+                return
+    with open(args.output, 'w') as file:
+        file.write(contents)
+
 parser = argparse.ArgumentParser()
 parser.add_argument('--output', help='File to write the #define in',
-        required=True)
+                    required=True)
 args = parser.parse_args()

 git_sha1 = os.environ.get('MESA_GIT_SHA1_OVERRIDE', get_git_sha1())[:10]
 if git_sha1:
-    git_sha1_h_in_path = os.path.join(os.path.dirname(sys.argv[0]),
-            '..', 'src', 'git_sha1.h.in')
-    with open(git_sha1_h_in_path , 'r') as git_sha1_h_in:
-        new_sha1 = git_sha1_h_in.read().replace('@VCS_TAG@', git_sha1)
-        if os.path.isfile(args.output):
-            with open(args.output, 'r') as git_sha1_h:
-                if git_sha1_h.read() == new_sha1:
-                    quit()
-        with open(args.output, 'w') as git_sha1_h:
-            git_sha1_h.write(new_sha1)
+    write_if_different('#define MESA_GIT_SHA1 " (git-' + git_sha1 + ')"')
 else:
-    open(args.output, 'w').close()
+    write_if_different('#define MESA_GIT_SHA1 ""')
--- a/bin/install_megadrivers.py
+++ b/bin/install_megadrivers.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # encoding=utf-8
 # Copyright © 2017-2018 Intel Corporation

@@ -36,7 +35,11 @@ def main():
    args = parser.parse_args()

    if os.path.isabs(args.libdir):
-        to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
+        destdir = os.environ.get('DESTDIR')
+        if destdir:
+            to = os.path.join(destdir, args.libdir[1:])
+        else:
+            to = args.libdir
    else:
        to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)

@@ -46,7 +49,6 @@ def main():
        if os.path.lexists(to):
            os.unlink(to)
        os.makedirs(to)
-    shutil.copy(args.megadriver, master)

    for driver in args.drivers:
        abs_driver = os.path.join(to, driver)
--- a/common.py
+++ b/common.py
@@ -99,8 +99,6 @@ def AddOptions(opts):
                        'enable static code analysis where available', 'no'))
    opts.Add(BoolOption('asan', 'enable Address Sanitizer', 'no'))
    opts.Add('toolchain', 'compiler toolchain', default_toolchain)
-    opts.Add(BoolOption('gles', 'EXPERIMENTAL: enable OpenGL ES support',
-                        'no'))
    opts.Add(BoolOption('llvm', 'use LLVM', default_llvm))
    opts.Add(BoolOption('openmp', 'EXPERIMENTAL: compile with openmp (swrast)',
                        'no'))
@@ -110,3 +108,4 @@ def AddOptions(opts):
    opts.Add(BoolOption('swr', 'Build OpenSWR', 'no'))
    if host_platform == 'windows':
        opts.Add('MSVC_VERSION', 'Microsoft Visual C/C++ version')
+        opts.Add('MSVC_USE_SCRIPT', 'Microsoft Visual C/C++ vcvarsall script', True)
--- a/configure.ac
+++ b/configure.ac
@@ -74,11 +74,10 @@ AC_SUBST([OPENCL_VERSION])
 # in the first entry.
 LIBDRM_REQUIRED=2.4.75
 LIBDRM_RADEON_REQUIRED=2.4.71
-LIBDRM_AMDGPU_REQUIRED=2.4.91
+LIBDRM_AMDGPU_REQUIRED=2.4.95
 LIBDRM_INTEL_REQUIRED=2.4.75
 LIBDRM_NVVIEUX_REQUIRED=2.4.66
 LIBDRM_NOUVEAU_REQUIRED=2.4.66
-LIBDRM_FREEDRENO_REQUIRED=2.4.92
 LIBDRM_ETNAVIV_REQUIRED=2.4.89
 LIBDRM_VC4_REQUIRED=2.4.89

@@ -108,9 +107,9 @@ dnl LLVM versions
 LLVM_REQUIRED_GALLIUM=3.3.0
 LLVM_REQUIRED_OPENCL=3.9.0
 LLVM_REQUIRED_R600=3.9.0
-LLVM_REQUIRED_RADEONSI=5.0.0
-LLVM_REQUIRED_RADV=5.0.0
-LLVM_REQUIRED_SWR=5.0.0
+LLVM_REQUIRED_RADEONSI=6.0.0
+LLVM_REQUIRED_RADV=6.0.0
+LLVM_REQUIRED_SWR=6.0.0

 dnl Check for progs
 AC_PROG_CPP
@@ -125,7 +124,8 @@ AC_PROG_GREP
 AC_PROG_NM
 AM_PROG_AS
 AX_CHECK_GNU_MAKE
-AC_CHECK_PROGS([PYTHON2], [python2.7 python2 python])
+AM_PATH_PYTHON([2.7],, [AM_PATH_PYTHON([3.4],, [:])])
+
 AC_PROG_SED
 AC_PROG_MKDIR_P

@@ -157,7 +157,7 @@ fi

 AX_CHECK_PYTHON_MAKO_MODULE($PYTHON_MAKO_REQUIRED)

-if test -z "$PYTHON2"; then
+if test "$PYTHON" = ":"; then
    if test ! -f "$srcdir/src/util/format_srgb.c"; then
        AC_MSG_ERROR([Python not found - unable to generate sources])
    fi
@@ -310,6 +310,7 @@ AX_CHECK_COMPILE_FLAG([-Werror=missing-prototypes],            [CFLAGS="$CFLAGS
 AX_CHECK_COMPILE_FLAG([-Wmissing-prototypes],                  [CFLAGS="$CFLAGS -Wmissing-prototypes"])
 dnl Dylan Baker: gcc and clang always accepr -Wno-*, hence check for the original warning, then set the no-* flag
 AX_CHECK_COMPILE_FLAG([-Wmissing-field-initializers],          [CFLAGS="$CFLAGS -Wno-missing-field-initializers"])
+AX_CHECK_COMPILE_FLAG([-Wformat-truncation],                   [CFLAGS="$CFLAGS -Wno-format-truncation"])
 AX_CHECK_COMPILE_FLAG([-fno-math-errno],                       [CFLAGS="$CFLAGS -fno-math-errno"])

 AX_CHECK_COMPILE_FLAG([-fno-trapping-math],                    [CFLAGS="$CFLAGS -fno-trapping-math"])
@@ -329,6 +330,7 @@ AX_CHECK_COMPILE_FLAG([-fno-math-errno],                       [CXXFLAGS="$CXXFL
 AX_CHECK_COMPILE_FLAG([-fno-trapping-math],                    [CXXFLAGS="$CXXFLAGS -fno-trapping-math"])
 AX_CHECK_COMPILE_FLAG([-fvisibility=hidden],                   [VISIBILITY_CXXFLAGS="-fvisibility=hidden"])
 AX_CHECK_COMPILE_FLAG([-Wmissing-field-initializers],          [CXXFLAGS="$CXXFLAGS -Wno-missing-field-initializers"])
+AX_CHECK_COMPILE_FLAG([-Wformat-truncation],                   [CXXFLAGS="$CXXFLAGS -Wno-format-truncation"])
 AC_LANG_POP([C++])

 # Flags to help ensure that certain portions of the code -- and only those
@@ -976,6 +978,22 @@ if test "x$pthread_stubs_possible" = xyes; then
    PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4)
 fi

+save_LIBS="$LIBS"
+LIBS="$PTHREAD_LIBS"
+AC_MSG_CHECKING(whether pthread_setaffinity_np is supported)
+AC_LINK_IFELSE([AC_LANG_SOURCE([[
+    #define _GNU_SOURCE
+    #include <pthread.h>
+    int main() {
+       void *a = (void*) &pthread_setaffinity_np;
+       long b = (long) a;
+       return (int) b;
+    }]])],
+  [DEFINES="$DEFINES -DHAVE_PTHREAD_SETAFFINITY"];
+   AC_MSG_RESULT([yes]),
+   AC_MSG_RESULT([no]))
+LIBS="$save_LIBS"
+
 dnl Check for futex for fast inline simple_mtx_t.
 AC_CHECK_HEADER([linux/futex.h], [DEFINES="$DEFINES -DHAVE_LINUX_FUTEX_H"])

@@ -1698,6 +1716,8 @@ xdri)
            if test x"$enable_dri" = xyes; then
               dri_modules="$dri_modules xcb-dri2 >= $XCBDRI2_REQUIRED"
            fi
+
+            dri_modules="$dri_modules xxf86vm"
        fi
        if test x"$dri_platform" = xapple ; then
            DEFINES="$DEFINES -DGLX_USE_APPLEGL"
@@ -1707,8 +1727,6 @@ xdri)
        fi
    fi

-    dri_modules="$dri_modules xxf86vm"
-
    PKG_CHECK_MODULES([DRIGL], [$dri_modules])
    GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules"
    X11_INCLUDES="$X11_INCLUDES $DRIGL_CFLAGS"
@@ -1826,6 +1844,10 @@ for plat in $platforms; do
        PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
                          WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
                          WAYLAND_SCANNER='')
+        PKG_CHECK_EXISTS([wayland-scanner >= 1.15],
+                          AC_SUBST(SCANNER_ARG, 'private-code'),
+                          AC_SUBST(SCANNER_ARG, 'code'))
+
        if test "x$WAYLAND_SCANNER" = x; then
            AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner], [:])
        fi
@@ -1842,6 +1864,7 @@ for plat in $platforms; do
        ;;

    drm)
+        test "x$enable_egl" = "xyes" &&
        test "x$enable_gbm" = "xno" &&
                AC_MSG_ERROR([EGL platform drm needs gbm])
        DEFINES="$DEFINES -DHAVE_DRM_PLATFORM"
@@ -2698,7 +2721,6 @@ if test -n "$with_gallium_drivers"; then
            ;;
        xfreedreno)
            HAVE_GALLIUM_FREEDRENO=yes
-            PKG_CHECK_MODULES([FREEDRENO], [libdrm >= $LIBDRM_FREEDRENO_REQUIRED libdrm_freedreno >= $LIBDRM_FREEDRENO_REQUIRED])
            require_libdrm "freedreno"
            ;;
        xetnaviv)
@@ -2813,9 +2835,8 @@ if test -n "$with_gallium_drivers"; then
 fi

 # XXX: Keep in sync with LLVM_REQUIRED_SWR
-AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x5.0.0 -a \
-                                              "x$LLVM_VERSION" != x5.0.1 -a \
-                                              "x$LLVM_VERSION" != x5.0.2)
+AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x6.0.0 -a \
+                                              "x$LLVM_VERSION" != x6.0.1)

 if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
    llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
@@ -3172,6 +3193,7 @@ AC_CONFIG_FILES([Makefile
                 src/mesa/main/tests/Makefile
                 src/mesa/state_tracker/tests/Makefile
                 src/util/Makefile
+                 src/util/tests/fast_idiv_by_const/Makefile
                 src/util/tests/hash_table/Makefile
                 src/util/tests/set/Makefile
                 src/util/tests/string_buffer/Makefile
@@ -3343,7 +3365,7 @@ if test "x$enable_llvm" = xyes; then
    echo "        LLVM_LDFLAGS:    $LLVM_LDFLAGS"
    echo ""
 fi
-echo "        PYTHON2:         $PYTHON2"
+echo "        PYTHON:         $PYTHON"

 echo ""
 echo "        Run '${MAKE-make}' to build Mesa"
--- a/docs/autoconf.html
+++ b/docs/autoconf.html
@@ -94,6 +94,13 @@ Currently there's only one config file provided when dri drivers are
 enabled - it's <code>drirc</code>.</p>
 </dd>

+<dt><code>--datadir=DIR</code></dt>
+<dd><p>This option specifies the directory where the data files will
+be installed. The default is <code>${prefix}/share</code>.
+Currently when dri drivers are enabled, <code>drirc.d/</code> is at
+this place.</p>
+</dd>
+
 <dt><code>--enable-static, --disable-shared</code></dt>
 <dd><p>By default, Mesa
 will build shared libraries. Either of these options will force static
--- a/docs/download.html
+++ b/docs/download.html
@@ -102,9 +102,9 @@ In the past, GLUT, GLU and the Mesa demos were released in conjunction with
 Mesa releases.  But since GLUT, GLU and the demos change infrequently, they
 were split off into their own git repositories:

-<a href="https://cgit.freedesktop.org/mesa/glut/">GLUT</a>,
-<a href="https://cgit.freedesktop.org/mesa/glu/">GLU</a> and
-<a href="https://cgit.freedesktop.org/mesa/demos/">Demos</a>,
+<a href="https://gitlab.freedesktop.org/mesa/glut">GLUT</a>,
+<a href="https://gitlab.freedesktop.org/mesa/glu">GLU</a> and
+<a href="https://gitlab.freedesktop.org/mesa/demos">Demos</a>,
 </p>

 </div>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -146,8 +146,8 @@ your system. For example under the default settings you may end up with a 1GB
 cache for x86_64 and another 1GB cache for i386.
 <li>MESA_GLSL_CACHE_DIR - if set, determines the directory to be used
 for the on-disk cache of compiled GLSL programs. If this variable is
-not set, then the cache will be stored in $XDG_CACHE_HOME/mesa (if
-that variable is set), or else within .cache/mesa within the user's
+not set, then the cache will be stored in $XDG_CACHE_HOME/mesa_shader_cache (if
+that variable is set), or else within .cache/mesa_shader_cache within the user's
 home directory.
 <li>MESA_GLSL - <a href="shading.html#envvars">shading language compiler options</a>
 <li>MESA_NO_MINMAX_CACHE - when set, the minmax index cache is globally disabled.
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -196,7 +196,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, r600, radeonsi
  GL_MAX_VERTEX_ATTRIB_STRIDE                           DONE (all drivers)
  GL_ARB_buffer_storage                                 DONE (freedreno, i965, nv50, llvmpipe, swr)
  GL_ARB_clear_texture                                  DONE (i965, nv50, llvmpipe, softpipe, swr)
-  GL_ARB_enhanced_layouts                               DONE (i965, nv50, llvmpipe, softpipe)
+  GL_ARB_enhanced_layouts                               DONE (i965, nv50, llvmpipe, softpipe, virgl)
  - compile-time constant expressions                   DONE
  - explicit byte offsets for blocks                    DONE
  - forced alignment within blocks                      DONE
@@ -219,7 +219,7 @@ GL 4.5, GLSL 4.50 -- all DONE: nvc0, radeonsi
  GL_ARB_direct_state_access                            DONE (all drivers)
  GL_ARB_get_texture_sub_image                          DONE (all drivers)
  GL_ARB_shader_texture_image_samples                   DONE (i965, nv50, r600, virgl)
-  GL_ARB_texture_barrier                                DONE (freedreno, i965, nv50, r600)
+  GL_ARB_texture_barrier                                DONE (freedreno, i965, nv50, r600, virgl)
  GL_KHR_context_flush_control                          DONE (all - but needs GLX/EGL extension to be useful)
  GL_KHR_robustness                                     DONE (i965)
  GL_EXT_shader_integer_mix                             DONE (all drivers that support GLSL)
@@ -308,7 +308,7 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GL_ARB_sample_locations                               DONE (nvc0)
  GL_ARB_seamless_cubemap_per_texture                   DONE (freedreno, i965, nvc0, radeonsi, r600, softpipe, swr, virgl)
  GL_ARB_shader_ballot                                  DONE (i965/gen8+, nvc0, radeonsi)
-  GL_ARB_shader_clock                                   DONE (i965/gen7+, nv50, nvc0, r600, radeonsi)
+  GL_ARB_shader_clock                                   DONE (i965/gen7+, nv50, nvc0, r600, radeonsi, virgl)
  GL_ARB_shader_stencil_export                          DONE (i965/gen9+, r600, radeonsi, softpipe, llvmpipe, swr, virgl)
  GL_ARB_shader_viewport_layer_array                    DONE (i965/gen6+, nvc0, radeonsi)
  GL_ARB_sparse_buffer                                  DONE (radeonsi/CIK+)
@@ -319,13 +319,14 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GL_EXT_memory_object                                  DONE (radeonsi)
  GL_EXT_memory_object_fd                               DONE (radeonsi)
  GL_EXT_memory_object_win32                            not started
+  GL_EXT_render_snorm                                   DONE (i965)
  GL_EXT_semaphore                                      DONE (radeonsi)
  GL_EXT_semaphore_fd                                   DONE (radeonsi)
  GL_EXT_semaphore_win32                                not started
  GL_EXT_texture_norm16                                 DONE (i965, r600, radeonsi, nvc0)
  GL_KHR_blend_equation_advanced_coherent               DONE (i965/gen9+)
  GL_KHR_texture_compression_astc_hdr                   DONE (i965/bxt)
-  GL_KHR_texture_compression_astc_sliced_3d             DONE (i965/gen9+)
+  GL_KHR_texture_compression_astc_sliced_3d             DONE (i965/gen9+, radeonsi)
  GL_OES_depth_texture_cube_map                         DONE (all drivers that support GLSL 1.30+)
  GL_OES_EGL_image                                      DONE (all drivers)
  GL_OES_EGL_image_external                             DONE (all drivers)
@@ -343,6 +344,63 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
  GLX_ARB_robustness_application_isolation              not started
  GLX_ARB_robustness_share_group_isolation              not started

+GL_EXT_direct_state_access subfeatures (in the spec order):
+  GL 1.1: Client commands                               not started
+  GL 1.0-1.3: Matrix and transpose matrix commands      not started
+  GL 1.1-1.2: Texture commands                          not started
+  GL 1.2: 3D texture commands                           not started
+  GL 1.2.1: Multitexture commands                       not started
+  GL 1.2.1-3.0: Indexed texture commands                not started
+  GL 1.2.1-3.0: Indexed generic queries                 not started
+  GL 1.2.1: EnableIndexed.. Get*Indexed                 not started
+  GL_ARB_vertex_program                                 not started
+  GL 1.3: Compressed texture and multitexture commands  not started
+  GL 1.5: Buffer commands                               not started
+  GL 2.0-2.1: Uniform and uniform matrix commands       not started
+  GL_EXT_texture_buffer_object                          not started
+  GL_EXT_texture_integer                                not started
+  GL_EXT_gpu_shader4                                    not started
+  GL_EXT_gpu_program_parameters                         not started
+  GL_NV_gpu_program4                                    n/a
+  GL_NV_framebuffer_multisample_coverage                n/a
+  GL 3.0: Renderbuffer/framebuffer commands, Gen*Mipmap not started
+  GL 3.0: CopyBuffer command                            not started
+  GL_EXT_geometry_shader4 commands (expose in GL 3.2)   not started
+  GL_NV_explicit_multisample                            n/a
+  GL 3.0: Vertex array/attrib/query/map commands        not started
+  Matrix GL tokens                                      not started
+
+GL_EXT_direct_state_access additions from other extensions (complete list):
+  GL_AMD_framebuffer_sample_positions                   n/a
+  GL_AMD_gpu_shader_int64                               not started
+  GL_ARB_bindless_texture                               not started
+  GL_ARB_buffer_storage                                 not started
+  GL_ARB_clear_buffer_object                            not started
+  GL_ARB_framebuffer_no_attachments                     not started
+  GL_ARB_gpu_shader_fp64                                not started
+  GL_ARB_instanced_arrays                               not started
+  GL_ARB_internalformat_query2                          not started
+  GL_ARB_sparse_texture                                 n/a
+  GL_ARB_sparse_buffer                                  not started
+  GL_ARB_texture_buffer_range                           not started
+  GL_ARB_texture_storage                                not started
+  GL_ARB_texture_storage_multisample                    not started
+  GL_ARB_vertex_attrib_64bit                            not started
+  GL_ARB_vertex_attrib_binding                          not started
+  GL_EXT_buffer_storage                                 not started
+  GL_EXT_external_buffer                                not started
+  GL_EXT_separate_shader_objects                        n/a
+  GL_EXT_sparse_texture                                 n/a
+  GL_EXT_texture_storage                                n/a
+  GL_EXT_vertex_attrib_64bit                            not started
+  GL_EXT_EGL_image_storage                              n/a
+  GL_NV_bindless_texture                                n/a
+  GL_NV_gpu_shader5                                     n/a
+  GL_NV_texture_multisample                             n/a
+  GL_NV_vertex_buffer_unified_memory                    n/a
+  GL_NVX_linked_gpu_multicast                           n/a
+  GLX_NV_copy_buffer                                    n/a
+
 The following extensions are not part of any OpenGL or OpenGL ES version, and
 we DO NOT WANT implementations of these extensions for Mesa.

--- a/docs/helpwanted.html
+++ b/docs/helpwanted.html
@@ -47,7 +47,7 @@ You can find some further To-do lists here:
 <b>Common To-Do lists:</b>
 </p>
 <ul>
-  <li><a href="https://cgit.freedesktop.org/mesa/mesa/tree/docs/features.txt">
+  <li><a href="https://gitlab.freedesktop.org/mesa/mesa/blob/master/docs/features.txt">
    <b>features.txt</b></a> - Status of OpenGL 3.x / 4.x features in Mesa.</li>
 </ul>

--- a/docs/index.html
+++ b/docs/index.html
@@ -15,6 +15,61 @@
 <div class="content">

 <h1>News</h1>
+<h2>October 31, 2018</h2>
+<p>
+<a href="relnotes/18.2.4.html">Mesa 18.2.4</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>October 19, 2018</h2>
+<p>
+<a href="relnotes/18.2.3.html">Mesa 18.2.3</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>October 5, 2018</h2>
+<p>
+<a href="relnotes/18.2.2.html">Mesa 18.2.2</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 24, 2018</h2>
+<p>
+<a href="relnotes/18.1.9.html">Mesa 18.1.9</a> is released.
+This is a bug-fix release.
+<br>
+NOTE: It is anticipated that 18.1.9 will be the final release in the
+18.1 series. Users of 18.1 are encouraged to migrate to the 18.2
+series in order to obtain future fixes.
+</p>
+
+<h2>September 21, 2018</h2>
+<p>
+<a href="relnotes/18.2.1.html">Mesa 18.2.1</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>September 7, 2018</h2>
+<p>
+<a href="relnotes/18.1.8.html">Mesa 18.1.8</a> and
+<a href="relnotes/18.2.0.html">Mesa 18.2.0</a> are released.
+
+These are, respectively, a bug-fix release from the 18.1 branch and a
+new development release.  See the release notes for more information
+about the releases.
+</p>
+
+<h2>August 24, 2018</h2>
+<p>
+<a href="relnotes/18.1.7.html">Mesa 18.1.7</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>August 13, 2018</h2>
+<p>
+<a href="relnotes/18.1.6.html">Mesa 18.1.6</a> is released.
+This is a bug-fix release.
+</p>

 <h2>July 27, 2018</h2>
 <p>
--- a/docs/install.html
+++ b/docs/install.html
@@ -57,7 +57,7 @@ willing to maintain support for other compiler get in touch.
 <ul>
 <li>GCC 4.2.0 or later (some parts of Mesa may require later versions)
 <li>clang - exact minimum requirement is currently unknown.
-<li>Microsoft Visual Studio 2013 Update 4 or later is required, for building on Windows.
+<li>Microsoft Visual Studio 2015 or later is required, for building on Windows.
 </ul>


@@ -72,7 +72,7 @@ you think you've spotted a bug let developers know by filing a

 <ul>
 <li><a href="https://www.python.org/">Python</a> - Python is required.
-Version 2.6.4 or later should work.
+Version 2.7 or later should work.
 </li>
 <li><a href="http://www.makotemplates.org/">Python Mako module</a> -
 Python Mako module is required. Version 0.8.0 or later should work.
--- a/docs/meson.html
+++ b/docs/meson.html
@@ -21,10 +21,10 @@
 <p><strong>The Meson build system is generally considered stable and ready
 for production</strong></p>

-<p>The meson build is tested on on Linux, macOS, Cygwin and Haiku, it should
-work on FreeBSD, DragonflyBSD, NetBSD, and OpenBSD.</p>
+<p>The meson build is tested on Linux, macOS, Cygwin and Haiku, FreeBSD,
+DragonflyBSD, NetBSD, and should work on OpenBSD.</p>

-<p><strong>Mesa requires Meson >= 0.44.1 to build.</strong>
+<p><strong>Mesa requires Meson >= 0.45.0 to build.</strong>

 Some older versions of meson do not check that they are too old and will error
 out in odd ways.
@@ -95,41 +95,49 @@ each configuration you want to build
 recommended in the documentation</a>
 </p>

+<p>
+Autotools automatically updates translation files as part of the build process,
+meson does not do this. Instead if you want translated drirc files you will need 
+to invoke non-default targets for ninja to update them:
+<code>ninja -C build/ xmlpool-pot xmlpool-update-po xmlpool-gmo</code>
+</p>
+
 <dl>
 <dt><code>Environment Variables</code></dt>
 <dd><p>Meson supports the standard CC and CXX environment variables for
 changing the default compiler, and CFLAGS, CXXFLAGS, and LDFLAGS for setting
-options to the compiler and linker.
+options to the compiler and linker during the initial configuration.

-The default compilers depends on your operating system. Meson supports most of
-the popular compilers, a complete list is available
-<a href="http://mesonbuild.com/Reference-tables.html#compiler-ids">here</a>.
-
-These arguments are consumed and stored by meson when it is initialized or
-re-initialized. Therefore passing them to meson configure will not do anything,
-and passing them to ninja will only do something if ninja decides to
-re-initialize meson, for example, if a meson.build file has been changed.
-Changing these variables will not cause all targets to be rebuilt, so running
-ninja clean is recommended when changing CFLAGS or CXXFLAGS. Meson will never
-change compiler in a configured build directory.
+These arguments are consumed and stored by meson when it is initialized. To
+change these flags after the build is initialized (or when doing a first
+initialization), consider using <code>-D${lang}_args</code> and
+<code>-D${lang}_link_args</code> instead. Meson will never change compiler in a
+configured build directory.
 </p>

 <pre>
    CC=clang CXX=clang++ meson build-clang
    ninja -C build-clang
    ninja -C build-clang clean
-    touch meson.build
-    CFLAGS=-Wno-typedef-redefinition ninja -C build-clang
+    meson configure build -Dc_args="-Wno-typedef-redefinition"
+    ninja -C build-clang
 </pre>

+<p>
+The default compilers depends on your operating system. Meson supports most of
+the popular compilers, a complete list is available
+<a href="http://mesonbuild.com/Reference-tables.html#compiler-ids">here</a>.
+</p>
+
 <p>Meson also honors <code>DESTDIR</code> for installs</p>
 </dd>


 <dt><code>LLVM</code></dt>
-<dd><p>Meson includes upstream logic to wrap llvm-config using it's standard
+<dd><p>Meson includes upstream logic to wrap llvm-config using its standard
 dependency interface. It will search <code>$PATH</code> (or <code>%PATH%</code> on windows) for
-llvm-config, so using an LLVM from a non-standard path is as easy as
+llvm-config (and llvm-config$version and llvm-config-$version), so using an
+LLVM from a non-standard path is as easy as
 <code>PATH=/path/with/llvm-config:$PATH meson build</code>.
 </p></dd>
 </dl>
--- a/docs/release-calendar.html
+++ b/docs/release-calendar.html
@@ -39,48 +39,47 @@ if you'd like to nominate a patch in the next stable release.
 <th>Notes</th>
 </tr>
 <tr>
-<td rowspan="3">18.1</td>
-<td>2018-08-10</td>
-<td>18.1.6</td>
-<td>Dylan Baker</td>
+<td rowspan="3">18.2</td>
+<td>2018-11-14</td>
+<td>18.2.5</td>
+<td>Juan A. Suarez</td>
+<td/>
+</tr>
+<tr>
+<td>2018-11-28</td>
+<td>18.2.6</td>
+<td>Juan A. Suarez</td>
+<td/>
+</tr>
+<tr>
+<td>2018-12-12</td>
+<td>18.2.7</td>
+<td>Juan A. Suarez</td>
+<td>Last planned 18.2.x release</td>
+</tr>
+<td rowspan="4">18.3</td>
+<td>2018-10-31</td>
+<td>18.3.0-rc1</td>
+<td>Emil Velikov</td>
 <td></td>
 </tr>
 <tr>
-<td>2018-08-24</td>
-<td>18.1.7</td>
-<td>Dylan Baker</td>
-<td></td>
+<td>2018-11-07</td>
+<td>18.3.0-rc2</td>
+<td>Emil Velikov</td>
+<td/>
 </tr>
 <tr>
-<td>2018-09-07</td>
-<td>18.1.8</td>
-<td>Dylan Baker</td>
-<td>Last planned 18.1.x release</td>
+<td>2018-11-14</td>
+<td>18.3.0-rc3</td>
+<td>Emil Velikov</td>
+<td/>
 </tr>
 <tr>
-<td rowspan="4">18.2</td>
-<td>2018-08-01</td>
-<td>18.2.0rc1</td>
-<td>Andres Gomez</td>
-<td></td>
-</tr>
-<tr>
-<td>2018-08-08</td>
-<td>18.2.0rc2</td>
-<td>Andres Gomez</td>
-<td></td>
-</tr>
-<tr>
-<td>2018-08-15</td>
-<td>18.2.0rc3</td>
-<td>Andres Gomez</td>
-<td></td>
-</tr>
-<tr>
-<td>2018-08-22</td>
-<td>18.2.0rc4</td>
-<td>Andres Gomez</td>
-<td>Last planned RC/Final release</td>
+<td>2018-11-21</td>
+<td>18.3.0-rc4</td>
+<td>Emil Velikov</td>
+<td>Last planned RC/final release</td>
 </tr>
 </table>

--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -21,6 +21,7 @@
 <li><a href="#overview">Overview</a>
 <li><a href="#schedule">Release schedule</a>
 <li><a href="#pickntest">Cherry-pick and test</a>
+<li><a href="#stagingbranch">Staging branch</a>
 <li><a href="#branch">Making a branchpoint</a>
 <li><a href="#prerelease">Pre-release announcement</a>
 <li><a href="#release">Making a new release</a>
@@ -209,6 +210,25 @@ system and making some every day's use until the release may be a good
 idea too.
 </p>

+<h1 id="stagingbranch">Staging branch</h1>
+
+<p>
+A live branch, which contains the currently merge/rejected patches is available
+in the main repository under <code>staging/X.Y</code>. For example:
+</p>
+<pre>
+	staging/18.1 - WIP branch for the 18.1 series
+	staging/18.2 - WIP branch for the 18.2 series
+</pre>
+
+<p>
+Notes:
+</p>
+<ul>
+<li>People are encouraged to test the branch and report regressions.</li>
+<li>The branch history is not stable and it <strong>will</strong> be rebased,</li>
+</ul>
+

 <h1 id="branch">Making a branchpoint</h1>

@@ -492,10 +512,10 @@ Here is one solution that I've been using.
 	# Drop LLVM_CONFIG, if applicable:
 	# unset LLVM_CONFIG

-	__glxinfo_cmd='glxinfo 2>&amp;1 | egrep -o "Mesa.*|Gallium.*|.*dri\.so"'
-	__glxgears_cmd='glxgears 2>&amp;1 | grep -v "configuration file"'
-	__es2info_cmd='es2_info 2>&amp;1 | egrep "GL_VERSION|GL_RENDERER|.*dri\.so"'
-	__es2gears_cmd='es2gears_x11 2>&amp;1 | grep -v "configuration file"'
+	__glxinfo_cmd='glxinfo 2&gt;&amp;1 | egrep -o "Mesa.*|Gallium.*|.*dri\.so"'
+	__glxgears_cmd='glxgears 2&gt;&amp;1 | grep -v "configuration file"'
+	__es2info_cmd='es2_info 2&gt;&amp;1 | egrep "GL_VERSION|GL_RENDERER|.*dri\.so"'
+	__es2gears_cmd='es2gears_x11 2&gt;&amp;1 | grep -v "configuration file"'
 	test "x$LD_LIBRARY_PATH" != 'x' &amp;&amp; __old_ld="$LD_LIBRARY_PATH"
 	export LD_LIBRARY_PATH=`pwd`/test/usr/local/lib/:"${__old_ld}"
 	export LIBGL_DRIVERS_PATH=`pwd`/test/usr/local/lib/dri/
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,15 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/18.2.4.html">18.2.4 release notes</a>
+<li><a href="relnotes/18.2.3.html">18.2.3 release notes</a>
+<li><a href="relnotes/18.2.2.html">18.2.2 release notes</a>
+<li><a href="relnotes/18.1.9.html">18.1.9 release notes</a>
+<li><a href="relnotes/18.2.1.html">18.2.1 release notes</a>
+<li><a href="relnotes/18.2.0.html">18.2.0 release notes</a>
+<li><a href="relnotes/18.1.8.html">18.1.8 release notes</a>
+<li><a href="relnotes/18.1.7.html">18.1.7 release notes</a>
+<li><a href="relnotes/18.1.6.html">18.1.6 release notes</a>
 <li><a href="relnotes/18.1.5.html">18.1.5 release notes</a>
 <li><a href="relnotes/18.1.4.html">18.1.4 release notes</a>
 <li><a href="relnotes/18.1.3.html">18.1.3 release notes</a>
--- a/docs/relnotes/18.1.6.html
+++ b/docs/relnotes/18.1.6.html
@@ -0,0 +1,188 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.1.6 Release Notes / August 13 2018</h1>
+
+<p>
+Mesa 18.1.6 is a bug fix release which fixes bugs found since the 18.1.5 release.
+</p>
+<p>
+Mesa 18.1.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+580e03328ffefe1fd43b19ab7669f20d931601a1c0a4c0f8b9c65d6e81a06df3  mesa-18.1.6.tar.gz
+bb7ce759069801804fcfb8152da3457f76cd7b4e0096e4870ff5adcb5c894289  mesa-18.1.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+
+<p>None</p>
+
+<h2>Bug fixes</h2>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=13728">Bug 13728</a> - [G965] Some objects in Neverwinter Nights Linux version not displayed correctly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98699">Bug 98699</a> - &quot;float[a+++4 ? 1:1] f;&quot; crashes glsl_compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99730">Bug 99730</a> - Metro Redux game(s) needs override for midshader extension declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106382">Bug 106382</a> - Shader cache breaks INTEL_DEBUG=shader_time</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107117">Bug 107117</a> - mesa-18.1: regression with TFP on intel with modesettings and glamor acceleration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107212">Bug 107212</a> - Dual-Core CPU E5500 / G45: RetroArch with reicast core results in corrupted graphics</li>
+
+</ul>
+
+<h2>Changes</h2>
+<p>Adam Jackson (1):</p>
+<ul>
+  <li>glx: GLX_MESA_multithread_makecurrent is direct-only</li>
+</ul>
+
+<p>Andres Gomez (3):</p>
+<ul>
+  <li>ddebug: use util_snprintf() in dd_get_debug_filename_and_mkdir</li>
+  <li>gallium/aux/util: use util_snprintf() in test_texture_barrier</li>
+  <li>glsl: use util_snprintf()</li>
+</ul>
+
+<p>Christian Gmeiner (1):</p>
+<ul>
+  <li>etnaviv: fix typo in query names</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>r600: reduce num compute threads to 1024.</li>
+</ul>
+
+<p>Dylan Baker (6):</p>
+<ul>
+  <li>docs: Add sha-256 sums for 18.1.5</li>
+  <li>nir/meson: fix c vs cpp args for nir test</li>
+  <li>gallium: fix ddebug on windows</li>
+  <li>cherry-ignore: add patches that get-pick-list is finding in error</li>
+  <li>cherry-ignore: Add some additional patches that are for 18.2</li>
+  <li>bump version to 18.1.6</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>swr: don't export swr_create_screen_internal</li>
+  <li>automake: require shared glapi when using DRI based libGL</li>
+  <li>autotools: error out when using the broken --with-{gl, osmesa}-lib-name</li>
+  <li>autotools: error out when building with mangling and glvnd</li>
+  <li>autotools: use correct gl.pc LIBS when using glvnd</li>
+</ul>
+
+<p>Eric Anholt (4):</p>
+<ul>
+  <li>vc4: Fix a leak of the no-vertex-elements workaround BO.</li>
+  <li>vc4: Respect a sampler view's first_layer field.</li>
+  <li>vc4: Ignore samplers for finding uniform offsets.</li>
+  <li>egl: Fix leak of X11 pixmaps backing pbuffers in DRI3.</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>meson, install_megadrivers: Also remove stale symlinks</li>
+</ul>
+
+<p>Jan Vesely (2):</p>
+<ul>
+  <li>clover: Reduce wait_count in abort path.</li>
+  <li>clover: Don't extend illegal integer types.</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>nir: Take if uses into account in ssa_def_components_read</li>
+  <li>i965/fs: Flag all slots of a flat input as flat</li>
+</ul>
+
+<p>Jon Turney (1):</p>
+<ul>
+  <li>meson: use correct keyword to fix a meson warning</li>
+</ul>
+
+<p>Jordan Justen (2):</p>
+<ul>
+  <li>i965, anv: Use INTEL_DEBUG for disk_cache driver flags</li>
+  <li>i965: Disable shader cache with INTEL_DEBUG=shader_time</li>
+</ul>
+
+<p>Juan A. Suarez Romero (2):</p>
+<ul>
+  <li>wayland/egl: update surface size on window resize</li>
+  <li>wayland/egl: initialize window surface size to window size</li>
+</ul>
+
+<p>Karol Herbst (2):</p>
+<ul>
+  <li>nir/lower_int64: mark all metadata as dirty</li>
+  <li>nvc0/ir: return 0 in imageLoad on incomplete textures</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>intel: Fix SIMD16 unaligned payload GRF reads on Gen4-5.</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>ac/surface: fix MSAA corruption on Vega due to FMASK tile swizzle</li>
+</ul>
+
+<p>Mauro Rossi (2):</p>
+<ul>
+  <li>radv: generate entrypoints for VK_ANDROID_native_buffer</li>
+  <li>radv: move vk_format_table.c to generated sources</li>
+</ul>
+
+<p>Olivier Fourdan (1):</p>
+<ul>
+  <li>dri3: For 1.2, use root window instead of pixmap drawable</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>glsl: handle error case with ast_post_inc, ast_post_dec</li>
+</ul>
+
+<p>Vlad Golovkin (1):</p>
+<ul>
+  <li>swr: Remove unnecessary memset call</li>
+</ul>
+
+<p>vadym.shovkoplias (1):</p>
+<ul>
+  <li>drirc: Allow extension midshader for Metro Redux</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.1.7.html
+++ b/docs/relnotes/18.1.7.html
@@ -0,0 +1,104 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.1.7 Release Notes / August 24 2018</h1>
+
+<p>
+Mesa 18.1.7 is a bug fix release which fixes bugs found since the 18.1.6 release.
+</p>
+<p>
+Mesa 18.1.7 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+0c3c240bcd1352d179e65993214f9d55a399beac852c3ab4433e8df9b6c51c83  mesa-18.1.7.tar.gz
+655e3b32ce3bdddd5e6e8768596e5d4bdef82d0dd37067c324cc4b2daa207306  mesa-18.1.7.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+
+<p>None</p>
+
+<h2>Bug fixes</h2>
+<ul>
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105975">Bug 105975</a> - i965 always reports 0 viewport subpixel bits</li>
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107098">Bug 107098</a> - Segfault after munmap(kms_sw_dt-&gt;ro_mapped)</li>
+</ul>
+
+<h2>Changes</h2>
+<p>Alexander Tsoy (1):</p>
+<ul>
+  <li>meson: fix build for egl platform_x11 without dri3 and gbm</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (1):</p>
+<ul>
+  <li>radv: Fix missing Android platform define.</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>i965: Advertise 8 bits subpixel precision for viewport bounds on gen6+</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>r600/eg: rework atomic counter emission with flushes</li>
+</ul>
+
+<p>Dylan Baker (7):</p>
+<ul>
+  <li>docs: Add sha256 sums for 18.1.6</li>
+  <li>cherry-ignore: Add additional 18.2 only patches</li>
+  <li>cherry-ignore: Add more 18.2 patches</li>
+  <li>cherry-ignore: Add more 18.2 patches</li>
+  <li>cherry-ignore: Add a couple of patches with &gt; 1 fixes tags</li>
+  <li>cherry-ignore: more 18.2 patches</li>
+  <li>bump version for 18.1.7 release</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>intel: Switch the order of the 2x MSAA sample positions</li>
+  <li>anv/lower_ycbcr: Use the binding array size for bounds checks</li>
+</ul>
+
+<p>Ray Strode (1):</p>
+<ul>
+  <li>gallium/winsys/kms: don't unmap what wasn't mapped</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv/winsys: fix creating the BO list for virtual buffers</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>radv: add Doom workaround</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.1.8.html
+++ b/docs/relnotes/18.1.8.html
@@ -0,0 +1,180 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.1.8 Release Notes / September 7 2018</h1>
+
+<p>
+Mesa 18.1.8 is a bug fix release which fixes bugs found since the 18.1.7 release.
+</p>
+<p>
+Mesa 18.1.8 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+8ec62f215dd1bb3910987f9941c6fc31632a0874e618815cf1e8e29445c86e0a  mesa-18.1.8.tar.gz
+bd1be67fe9c73b517765264ac28911c84144682d28dbff140e1c2deb2f44c21b  mesa-18.1.8.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93355">Bug 93355</a> - [BXT,SKLGT4e] intermittent ext_framebuffer_multisample.accuracy fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101247">Bug 101247</a> - Mesa fails to link GLSL programs with unused output blocks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104809">Bug 104809</a> - anv: DOOM 2016 and Wolfenstein II:The New Colossus crash due to not having depthBoundsTest</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105904">Bug 105904</a> - Needed to delete mesa shader cache after driver upgrade for 32 bit wine vulkan programs to work.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106738">Bug 106738</a> - No test for miptrees with DRI modifiers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106865">Bug 106865</a> - [GLK] piglit.spec.ext_framebuffer_multisample.accuracy stencil tests fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107359">Bug 107359</a> - [Regression] [bisected] [OpenGL CTS] [SKL,BDW] KHR-GL46.texture_barrier*-texels, GTF-GL46.gtf21.GL2FixedTests.buffer_corners.buffer_corners, and GTF-GL46.gtf21.GL2FixedTests.stencil_plane_corners.stencil_plane_corners fail with some configuration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107477">Bug 107477</a> - [DXVK] Setting high shader quality in GTA V results in LLVM error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107579">Bug 107579</a> - [SNB] The graphic corruption when we reuse the GS compiled and used for TFB when statebuffer contain magic trash in the unused space</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107601">Bug 107601</a> - Rise of the Tomb Raider Segmentation Fault when the game starts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107760">Bug 107760</a> - GPU Hang when Playing DiRT 3 Complete Edition using Steam Play with DXVK</li>
+
+</ul>
+
+<h2>Changes</h2>
+<p>Andrii Simiklit (1):</p>
+<ul>
+  <li>i965/gen6/xfb: handle case where transform feedback is not active</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Add missing checks in radv_get_image_format_properties.</li>
+  <li>radv: Fix CMASK dimensions.</li>
+  <li>radv: Use a lower max offchip buffer count.</li>
+</ul>
+
+<p>Christian Gmeiner (1):</p>
+<ul>
+  <li>tegra: fix memory leak</li>
+</ul>
+
+<p>Daniel Stone (1):</p>
+<ul>
+  <li>st/dri: Don't expose sRGB formats to clients</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>ac/radeonsi: fix CIK copy max size</li>
+</ul>
+
+<p>Dylan Baker (10):</p>
+<ul>
+  <li>docs: Add mesa 18.1.7 notes</li>
+  <li>cherry-ignore: add a patch</li>
+  <li>cherry-ignore: Add more 18.2 only patches</li>
+  <li>meson: Actually load translation files</li>
+  <li>cherry-ignore: Add more 18.2 patches</li>
+  <li>cherry-ignore: Add additional patch</li>
+  <li>cherry-ignore: Add patch that doesn't apply to 18.1</li>
+  <li>cherry-ignore: Add a couple of two fixes warning patches</li>
+  <li>cherry-ignore: Add patch that needs more significant patches to function</li>
+  <li>Bump version to 18.1.8</li>
+</ul>
+
+<p>Emil Velikov (1):</p>
+<ul>
+  <li>docs: update required mako version</li>
+</ul>
+
+<p>Grazvydas Ignotas (1):</p>
+<ul>
+  <li>radv: place pointer length into cache uuid</li>
+</ul>
+
+<p>Gurchetan Singh (2):</p>
+<ul>
+  <li>meson: fix egl build for surfaceless</li>
+  <li>meson: fix egl build for android</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>i965/vec4: Clamp indirect tes input array reads with 0x0fffffff</li>
+  <li>i965/vec4: Correctly handle uniform sources in generate_tes_add_indirect_urb_offset</li>
+</ul>
+
+<p>Jason Ekstrand (5):</p>
+<ul>
+  <li>anv: Fill holes in the VF VUE to zero</li>
+  <li>nir/algebraic: Be more careful converting ushr to extract_u8/16</li>
+  <li>egl/dri2: Add a helper for the number of planes for a FOURCC format</li>
+  <li>egl/dri2: Guard against invalid fourcc formats</li>
+  <li>anv/blorp: Do more flushing around HiZ clears</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>egl/wayland: do not leak wl_buffer when it is locked</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: blorp: support multiple aspect blits</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>glapi: actually implement GL_EXT_robustness for GLES</li>
+</ul>
+
+<p>Nanley Chery (7):</p>
+<ul>
+  <li>intel/isl: Avoid tiling some 16K-wide render targets</li>
+  <li>i965: Make blt_pitch public</li>
+  <li>i965/miptree: Drop an if case from retile_as_linear</li>
+  <li>i965/miptree: Use the correct BLT pitch</li>
+  <li>i965/miptree: Use miptree_map in map_blit functions</li>
+  <li>i965/miptree: Fix can_blit_slice()</li>
+  <li>i965/gen7_urb: Re-emit PUSH_CONSTANT_ALLOC on some gen9</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: fix passing clip/cull distances from VS to PS</li>
+</ul>
+
+<p>vadym.shovkoplias (1):</p>
+<ul>
+  <li>glsl/linker: Allow unused in blocks which are not declated on previous stage</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.1.9.html
+++ b/docs/relnotes/18.1.9.html
@@ -0,0 +1,178 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.1.8 Release Notes / September 24 2018</h1>
+
+<p>
+Mesa 18.1.9 is a bug fix release which fixes bugs found since the 18.1.8 release.
+</p>
+<p>
+Mesa 18.1.9 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+0f825dc834b1b3e3d9a6c3ce58b42977f0d9a248a7627a36dd3b313ffe41a499  mesa-18.1.9.tar.gz
+55f5778d58a710a63d6635f000535768faf7db9e8144dc0f4fd1989f936c1a83  mesa-18.1.9.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+<h2>Bug fixes</h2>
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103241">Bug 103241</a> - Anv crashes when using 64-bit vertex inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104926">Bug 104926</a> - swrast: Mesa 17.3.3 produces:  HW cursor for format 875713089 not supported</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107280">Bug 107280</a> - [DXVK] Batman: Arkham City with tessellation enabled hangs on SKL GT4</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107772">Bug 107772</a> - Mesa preprocessor matches if(def)s &amp; endifs incorrectly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107779">Bug 107779</a> - Access violation with some games</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107810">Bug 107810</a> - The 'va_end' call is missed after 'va_copy' in 'util_vsnprintf' function under windows</li>
+
+</ul>
+
+<h2>Changes</h2>
+<p>Andrii Simiklit (4):</p>
+<ul>
+  <li>apple/glx/log: added missing va_end() after va_copy()</li>
+  <li>mesa/util: don't use the same 'va_list' instance twice</li>
+  <li>mesa/util: don't ignore NULL returned from 'malloc'</li>
+  <li>mesa/util: add missing va_end() after va_copy()</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (4):</p>
+<ul>
+  <li>radv: Use build ID if available for cache UUID.</li>
+  <li>radv: Only allow 16 user SGPRs for compute on GFX9+.</li>
+  <li>radv: Set the user SGPR MSB for Vega.</li>
+  <li>radv: Fix driver UUID SHA1 init.</li>
+</ul>
+
+<p>Christopher Egert (1):</p>
+<ul>
+  <li>radeon: fix ColorMask</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>virgl: don't send a shader create with no data. (v2)</li>
+</ul>
+
+<p>Dylan Baker (10):</p>
+<ul>
+  <li>docs/relnotes: Add sha256 sums for mesa 18.1.8</li>
+  <li>cherry-ignore: Add additional 18.2 patch</li>
+  <li>meson: Print a message about why a libdrm version was selected</li>
+  <li>cherry-ignore: add another 18.2 patch</li>
+  <li>cherry-ignore: Add patches that don't apply cleanly and are for developer tools</li>
+  <li>cherry-ignore: Add more 18.2 patches</li>
+  <li>cherry-ignore: add 18.2 patchs</li>
+  <li>cherry-ignore: add a patch that was reverted on master</li>
+  <li>cherry-ignore: one final update</li>
+  <li>Bump version to 18.1.9</li>
+</ul>
+
+<p>Erik Faye-Lund (2):</p>
+<ul>
+  <li>winsys/virgl: avoid unintended behavior</li>
+  <li>virgl: adjust strides when mapping temp-resources</li>
+</ul>
+
+<p>Gert Wollny (1):</p>
+<ul>
+  <li>winsys/virgl: correct resource and handle allocation (v2)</li>
+</ul>
+
+<p>Jason Ekstrand (6):</p>
+<ul>
+  <li>anv/pipeline: Only consider double elements which actually exist</li>
+  <li>i965: Workaround the gen9 hw astc5x5 sampler bug</li>
+  <li>anv: Re-emit vertex buffers when the pipeline changes</li>
+  <li>anv: Disable the vertex cache when tessellating on SKL GT4</li>
+  <li>anv: Clamp scissors to the framebuffer boundary</li>
+  <li>anv/query: Write both dwords in emit_zero_queries</li>
+</ul>
+
+<p>Josh Pieper (1):</p>
+<ul>
+  <li>st/mesa: Validate the result of pipe_transfer_map in make_texture (v2)</li>
+</ul>
+
+<p>Kenneth Feng (1):</p>
+<ul>
+  <li>amd: Add Picasso device id</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>st/mesa: help fix stencil border color for GL_DEPTH_STENCIL textures</li>
+  <li>radeonsi: fix HTILE for NPOT textures with mipmapping on SI/CI</li>
+  <li>r600: fix HTILE for NPOT textures with mipmapping</li>
+  <li>radeonsi: fix printing a BO list into ddebug reports</li>
+</ul>
+
+<p>Mathias Fröhlich (1):</p>
+<ul>
+  <li>tnl: Fix green gun regression in xonotic.</li>
+</ul>
+
+<p>Mauro Rossi (3):</p>
+<ul>
+  <li>android: broadcom/genxml: fix collision with intel/genxml header-gen macro</li>
+  <li>android: broadcom/cle: add gallium include path</li>
+  <li>android: broadcom/cle: export the broadcom top level path headers</li>
+</ul>
+
+<p>Michal Srb (1):</p>
+<ul>
+  <li>st/dri: don't set queryDmaBufFormats/queryDmaBufModifiers if the driver does not implement it</li>
+</ul>
+
+<p>Michel Dänzer (1):</p>
+<ul>
+  <li>loader/dri3: Only wait for back buffer fences in dri3_get_buffer</li>
+</ul>
+
+<p>Pierre Moreau (1):</p>
+<ul>
+  <li>nvir: Always split 64-bit IMAD/IMUL operations</li>
+</ul>
+
+<p>Sergii Romantsov (1):</p>
+<ul>
+  <li>intel: compiler option msse2 and mstackrealign</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>glsl: fixer lexer for unreachable defines</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.2.5.html
+++ b/docs/relnotes/18.2.5.html
@@ -1,172 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 18.2.5 Release Notes / November 15, 2018</h1>
-
-<p>
-Mesa 18.2.5 is a bug fix release which fixes bugs found since the 18.2.4 release.
-</p>
-<p>
-Mesa 18.2.5 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-dddc28928b6f4083a0d5120b58c1c8e2dc189ab5c14299c08a386607fdbbdce7  mesa-18.2.5.tar.gz
-b12c32872832e5353155e1e8026e1f1ab75bba9dc5b178d712045684d26c2b73  mesa-18.2.5.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-
-<h2>Bug fixes</h2>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105731">Bug 105731</a> - linker error &quot;fragment shader input ... has no matching output in the previous stage&quot; when previous stage's output declaration in a separate shader object</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107511">Bug 107511</a> - KHR/khrplatform.h not always installed when needed</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107626">Bug 107626</a> - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test &quot;arb_texture_multisample-large-float-texture&quot; with parameter --fp16</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108082">Bug 108082</a> - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108560">Bug 108560</a> - Mesa 32 is built without sse</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Andre Heider (1):</p>
-<ul>
-  <li>st/nine: fix stack corruption due to ABI mismatch</li>
-</ul>
-
-<p>Andrii Simiklit (1):</p>
-<ul>
-  <li>i965/batch: don't ignore the 'brw_new_batch' call for a 'new batch'</li>
-</ul>
-
-<p>Dylan Baker (2):</p>
-<ul>
-  <li>meson: link gallium nine with pthreads</li>
-  <li>meson: fix libatomic tests</li>
-</ul>
-
-<p>Emil Velikov (2):</p>
-<ul>
-  <li>egl/glvnd: correctly report errors when vendor cannot be found</li>
-  <li>m4: add Werror when checking for compiler flags</li>
-</ul>
-
-<p>Eric Engestrom (6):</p>
-<ul>
-  <li>svga: add missing meson build dependency</li>
-  <li>clover: add missing meson build dependency</li>
-  <li>wsi/wayland: use proper VkResult type</li>
-  <li>wsi/wayland: only finish() a successfully init()ed display</li>
-  <li>configure: install KHR/khrplatform.h when needed</li>
-  <li>meson: install KHR/khrplatform.h when needed</li>
-</ul>
-
-<p>Gert Wollny (1):</p>
-<ul>
-  <li>virgl/vtest-winsys: Use virgl version of bind flags</li>
-</ul>
-
-<p>Jonathan Gray (1):</p>
-<ul>
-  <li>intel/tools: include stdarg.h in error2aub</li>
-</ul>
-
-<p>Juan A. Suarez Romero (4):</p>
-<ul>
-  <li>docs: add sha256 checksums for 18.2.4</li>
-  <li>cherry-ignore: add explicit 18.3 only nominations</li>
-  <li>cherry-ignore: i965/batch: avoid reverting batch buffer if saved state is an empty</li>
-  <li>Update version to 18.2.5</li>
-</ul>
-
-<p>Lionel Landwerlin (1):</p>
-<ul>
-  <li>anv/android: mark gralloc allocated BOs as external</li>
-</ul>
-
-<p>Marek Olšák (3):</p>
-<ul>
-  <li>ac: fix ac_build_fdiv for f64</li>
-  <li>st/va: fix incorrect use of resource_destroy</li>
-  <li>include: update GL &amp; GLES headers (v2)</li>
-</ul>
-
-<p>Matt Turner (2):</p>
-<ul>
-  <li>util/ralloc: Switch from DEBUG to NDEBUG</li>
-  <li>util/ralloc: Make sizeof(linear_header) a multiple of 8</li>
-</ul>
-
-<p>Olivier Fourdan (1):</p>
-<ul>
-  <li>wayland/egl: Resize EGL surface on update buffer for swrast</li>
-</ul>
-
-<p>Rhys Perry (1):</p>
-<ul>
-  <li>glsl_to_tgsi: don't create 64-bit integer MAD/FMA</li>
-</ul>
-
-<p>Samuel Pitoiset (2):</p>
-<ul>
-  <li>radv: disable conditional rendering for vkCmdCopyQueryPoolResults()</li>
-  <li>radv: only expose VK_SUBGROUP_FEATURE_ARITHMETIC_BIT for VI+</li>
-</ul>
-
-<p>Sergii Romantsov (1):</p>
-<ul>
-  <li>autotools: library-dependency when no sse and 32-bit</li>
-</ul>
-
-<p>Timothy Arceri (4):</p>
-<ul>
-  <li>st/mesa: calculate buffer size correctly for packed uniforms</li>
-  <li>st/glsl_to_nir: fix next_stage gathering</li>
-  <li>nir: add glsl_type_is_integer() helper</li>
-  <li>nir: don't pack varyings ints with floats unless flat</li>
-</ul>
-
-<p>Vadym Shovkoplias (1):</p>
-<ul>
-  <li>glsl/linker: Fix out variables linking during single stage</li>
-</ul>
-
-<p>Vinson Lee (1):</p>
-<ul>
-  <li>r600/sb: Fix constant logical operand in assert.</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/18.2.6.html
+++ b/docs/relnotes/18.2.6.html
@@ -1,179 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 18.2.6 Release Notes / November 28, 2018</h1>
-
-<p>
-Mesa 18.2.6 is a bug fix release which fixes bugs found since the 18.2.5 release.
-</p>
-<p>
-Mesa 18.2.6 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-e0ea1236dbc6c412b02e1b5d7f838072525971a6630246fa82ae4466a6d8a587  mesa-18.2.6.tar.gz
-9ebafa4f8249df0c718e93b9ca155e3593a1239af303aa2a8b0f2056a7efdc12  mesa-18.2.6.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-
-<h2>Bug fixes</h2>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107626">Bug 107626</a> - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test &quot;arb_texture_multisample-large-float-texture&quot; with parameter --fp16</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107856">Bug 107856</a> - i965 incorrectly calculates the number of layers for texture views (assert)</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108630">Bug 108630</a> - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108713">Bug 108713</a> - Gallium: use after free with transform feedback</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108829">Bug 108829</a> - [meson] libglapi exports internal API</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Andrii Simiklit (1):</p>
-<ul>
-  <li>i965/batch: avoid reverting batch buffer if saved state is an empty</li>
-</ul>
-
-<p>Bas Nieuwenhuizen (1):</p>
-<ul>
-  <li>radv: Fix opaque metadata descriptor last layer.</li>
-</ul>
-
-<p>Brian Paul (1):</p>
-<ul>
-  <li>scons/svga: remove opt from the list of valid build types</li>
-</ul>
-
-<p>Danylo Piliaiev (1):</p>
-<ul>
-  <li>i965: Fix calculation of layers array length for isl_view</li>
-</ul>
-
-<p>Dylan Baker (2):</p>
-<ul>
-  <li>meson: Don't set -Wall</li>
-  <li>meson: Don't force libva to required from auto</li>
-</ul>
-
-<p>Emil Velikov (13):</p>
-<ul>
-  <li>bin/get-pick-list.sh: simplify git oneline printing</li>
-  <li>bin/get-pick-list.sh: prefix output with "[stable] "</li>
-  <li>bin/get-pick-list.sh: handle "typod" usecase.</li>
-  <li>bin/get-pick-list.sh: handle the fixes tag</li>
-  <li>bin/get-pick-list.sh: tweak the commit sha matching pattern</li>
-  <li>bin/get-pick-list.sh: flesh out is_sha_nomination</li>
-  <li>bin/get-pick-list.sh: handle fixes tag with missing colon</li>
-  <li>bin/get-pick-list.sh: handle unofficial "broken by" tag</li>
-  <li>bin/get-pick-list.sh: use test instead of [ ]</li>
-  <li>bin/get-pick-list.sh: handle reverts prior to the branchpoint</li>
-  <li>travis: drop unneeded x11proto-xf86vidmode-dev</li>
-  <li>glx: make xf86vidmode mandatory for direct rendering</li>
-  <li>travis: adding missing x11-xcb for meson+vulkan</li>
-</ul>
-
-<p>Eric Anholt (1):</p>
-<ul>
-  <li>vc4: Make sure we make ro scanout resources for create_with_modifiers.</li>
-</ul>
-
-<p>Eric Engestrom (5):</p>
-<ul>
-  <li>meson: only run vulkan's meson.build when building vulkan</li>
-  <li>gbm: remove unnecessary meson include</li>
-  <li>meson: fix wayland-less builds</li>
-  <li>egl: add missing glvnd entrypoint for EGL_ANDROID_blob_cache</li>
-  <li>glapi: add missing visibility args</li>
-</ul>
-
-<p>Erik Faye-Lund (1):</p>
-<ul>
-  <li>mesa/main: remove bogus error for zero-sized images</li>
-</ul>
-
-<p>Gert Wollny (3):</p>
-<ul>
-  <li>mesa: Reference count shaders that are used by transform feedback objects</li>
-  <li>r600: clean up the GS ring buffers when the context is destroyed</li>
-  <li>glsl: free or reuse memory allocated for TF varying</li>
-</ul>
-
-<p>Jason Ekstrand (2):</p>
-<ul>
-  <li>nir/lower_alu_to_scalar: Don't try to lower unpack_32_2x16</li>
-  <li>anv: Put robust buffer access in the pipeline hash</li>
-</ul>
-
-<p>Juan A. Suarez Romero (6):</p>
-<ul>
-  <li>cherry-ignore: add explicit 18.3 only nominations</li>
-  <li>cherry-ignore: intel/aub_viewer: fix dynamic state printing</li>
-  <li>cherry-ignore: intel/aub_viewer: Print blend states properly</li>
-  <li>cherry-ignore: mesa/main: fix incorrect depth-error</li>
-  <li>docs: add sha256 checksums for 18.2.5</li>
-  <li>Update version to 18.2.6</li>
-</ul>
-
-<p>Karol Herbst (1):</p>
-<ul>
-  <li>nir/spirv: cast shift operand to u32</li>
-</ul>
-
-<p>Kenneth Graunke (1):</p>
-<ul>
-  <li>i965: Add PCI IDs for new Amberlake parts that are Coffeelake based</li>
-</ul>
-
-<p>Lionel Landwerlin (1):</p>
-<ul>
-  <li>egl/dri: fix error value with unknown drm format</li>
-</ul>
-
-<p>Marek Olšák (2):</p>
-<ul>
-  <li>winsys/amdgpu: fix a buffer leak in amdgpu_bo_from_handle</li>
-  <li>winsys/amdgpu: fix a device handle leak in amdgpu_winsys_create</li>
-</ul>
-
-<p>Rodrigo Vivi (4):</p>
-<ul>
-  <li>i965: Add a new CFL PCI ID.</li>
-  <li>intel: aubinator: Adding missed platforms to the error message.</li>
-  <li>intel: Introducing Amber Lake platform</li>
-  <li>intel: Introducing Whiskey Lake platform</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/18.2.7.html
+++ b/docs/relnotes/18.2.7.html
@@ -1,166 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 18.2.7 Release Notes / December 13, 2018</h1>
-
-<p>
-Mesa 18.2.7 is a bug fix release which fixes bugs found since the 18.2.6 release.
-</p>
-<p>
-Mesa 18.2.7 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-TBD
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-
-<h2>Bug fixes</h2>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106577">Bug 106577</a> - broken rendering with nine and nouveau (GM107)</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108245">Bug 108245</a> - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108311">Bug 108311</a> - Query buffer object support is broken on r600.</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108894">Bug 108894</a> - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108909">Bug 108909</a> - Vkd3d test failure test_resolve_non_issued_query_data()</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108914">Bug 108914</a> - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108925">Bug 108925</a> - vkCmdCopyQueryPoolResults(VK_QUERY_RESULT_WAIT_BIT) for timestamps with large query count hangs</li>
-
-</ul>
-
-
-<h2>Changes</h2>
-
-<p>Alex Smith (1):</p>
-<ul>
-  <li>radv: Flush before vkCmdWriteTimestamp() if needed</li>
-</ul>
-
-<p>Bas Nieuwenhuizen (4):</p>
-<ul>
-  <li>radv: Align large buffers to the fragment size.</li>
-  <li>radv: Clamp gfx9 image view extents to the allocated image extents.</li>
-  <li>radv/android: Mark android WSI image as shareable.</li>
-  <li>radv/android: Use buffer metadata to determine scanout compat.</li>
-</ul>
-
-<p>Dave Airlie (2):</p>
-<ul>
-  <li>r600: make suballocator 256-bytes align</li>
-  <li>radv: use 3d shader for gfx9 copies if dst is 3d</li>
-</ul>
-
-<p>Emil Velikov (2):</p>
-<ul>
-  <li>egl/wayland: bail out when drmGetMagic fails</li>
-  <li>egl/wayland: plug memory leak in drm_handle_device()</li>
-</ul>
-
-<p>Eric Anholt (3):</p>
-<ul>
-  <li>v3d: Fix a leak of the transfer helper on screen destroy.</li>
-  <li>vc4: Fix a leak of the transfer helper on screen destroy.</li>
-  <li>v3d: Fix a leak of the disassembled instruction string during debug dumps.</li>
-</ul>
-
-<p>Eric Engestrom (3):</p>
-<ul>
-  <li>anv: correctly use vulkan 1.0 by default</li>
-  <li>wsi/display: fix mem leak when freeing swapchains</li>
-  <li>vulkan/wsi: fix s/,/;/ typo</li>
-</ul>
-
-<p>Gurchetan Singh (3):</p>
-<ul>
-  <li>virgl: quadruple command buffer size</li>
-  <li>virgl: avoid large inline transfers</li>
-  <li>virgl: don't mark buffers as unclean after a write</li>
-</ul>
-
-<p>Juan A. Suarez Romero (4):</p>
-<ul>
-  <li>docs: add sha256 checksums for 18.2.6</li>
-  <li>cherry-ignore: freedreno: Fix autotools build.</li>
-  <li>cherry-ignore: mesa: Revert INTEL_fragment_shader_ordering support</li>
-  <li>Update version to 18.2.7</li>
-</ul>
-
-<p>Karol Herbst (1):</p>
-<ul>
-  <li>nv50,nvc0: Fix gallium nine regression regarding sampler bindings</li>
-</ul>
-
-<p>Lionel Landwerlin (2):</p>
-<ul>
-  <li>anv: flush pipeline before query result copies</li>
-  <li>anv/query: flush render target before copying results</li>
-</ul>
-
-<p>Michal Srb (2):</p>
-<ul>
-  <li>gallium: Constify drisw_loader_funcs struct</li>
-  <li>drisw: Use separate drisw_loader_funcs for shm</li>
-</ul>
-
-<p>Nicolai Hähnle (2):</p>
-<ul>
-  <li>egl/wayland: rather obvious build fix</li>
-  <li>meson: link LLVM 'native' component when LLVM is available</li>
-</ul>
-
-<p>Samuel Pitoiset (1):</p>
-<ul>
-  <li>radv: rework the TC-compat HTILE hardware bug with COND_EXEC</li>
-</ul>
-
-<p>Thomas Hellstrom (2):</p>
-<ul>
-  <li>st/xa: Fix a memory leak</li>
-  <li>winsys/svga: Fix a memory leak</li>
-</ul>
-
-<p>Tobias Klausmann (1):</p>
-<ul>
-  <li>amd/vulkan: meson build - use radv_deps for libvulkan_radeon</li>
-</ul>
-
-<p>Vinson Lee (1):</p>
-<ul>
-  <li>st/xvmc: Add X11 include path.</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/18.3.0.html
+++ b/docs/relnotes/18.3.0.html
@@ -0,0 +1,283 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.0 Release Notes / December 7, 2018</h1>
+
+<p>
+Mesa 18.3.0 is a new development release. People who are concerned
+with stability and reliability should stick with a previous release or
+wait for Mesa 18.3.1.
+</p>
+<p>
+Mesa 18.3.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<p>
+libwayland-egl is now distributed by Wayland (since 1.15,
+<a href="https://lists.freedesktop.org/archives/wayland-devel/2018-April/037767.html">see announcement</a>),
+and has been removed from Mesa in this release. Make sure you're using
+an up-to-date version of Wayland to keep the functionality.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+17a124d4dbc712505d22a7815c9b0cee22214c96c8abb91539a2b1351e38a000  mesa-18.3.0.tar.gz
+b63f947e735d6ef3dfaa30c789a9adfbae18aea671191eaacde95a18c17fc38a  mesa-18.3.0.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+
+<p>
+Note: some of the new features are only available with certain drivers.
+</p>
+
+<ul>
+<li>GL_AMD_depth_clamp_separate on r600, radeonsi.</li>
+<li>GL_AMD_framebuffer_multisample_advanced on radeonsi.</li>
+<li>GL_AMD_gpu_shader_int64 on i965, nvc0, radeonsi.</li>
+<li>GL_AMD_multi_draw_indirect on all GL 4.x drivers.</li>
+<li>GL_AMD_query_buffer_object on i965, nvc0, r600, radeonsi.</li>
+<li>GL_EXT_disjoint_timer_query on radeonsi and most other Gallium drivers (ES extension)</li>
+<li>GL_EXT_texture_compression_s3tc on all drivers (ES extension)<li>
+<li>GL_EXT_vertex_attrib_64bit on i965, nvc0, radeonsi.</li>
+<li>GL_EXT_window_rectangles on radeonsi.</li>
+<li>GL_KHR_texture_compression_astc_sliced_3d on radeonsi.</li>
+<li>GL_NV_fragment_shader_interlock on i965.</li>
+<li>EGL_EXT_device_base for all drivers.</li>
+<li>EGL_EXT_device_drm for all drivers.</li>
+<li>EGL_MESA_device_software for all drivers.</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=13728">Bug 13728</a> - [G965] Some objects in Neverwinter Nights Linux version not displayed correctly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91433">Bug 91433</a> - piglit.spec.arb_depth_buffer_float.fbo-depth-gl_depth_component32f-copypixels fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93355">Bug 93355</a> - [BXT,SKLGT4e] intermittent ext_framebuffer_multisample.accuracy fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94957">Bug 94957</a> - dEQP failures on llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98699">Bug 98699</a> - &quot;float[a+++4 ? 1:1] f;&quot; crashes glsl_compiler</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99507">Bug 99507</a> - Corrupted frame contents with Vulkan version of DOTA2, Talos Principle and Sascha Willems' demos when they're run Vsynched in fullscreen</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99730">Bug 99730</a> - Metro Redux game(s) needs override for midshader extension declaration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100200">Bug 100200</a> - Default Unreal Engine 4 frag shader fails to compile</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101247">Bug 101247</a> - Mesa fails to link GLSL programs with unused output blocks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102597">Bug 102597</a> - [Regression] mpv, high rendering times (two to three times higher)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103241">Bug 103241</a> - Anv crashes when using 64-bit vertex inputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104809">Bug 104809</a> - anv: DOOM 2016 and Wolfenstein II:The New Colossus crash due to not having depthBoundsTest</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104926">Bug 104926</a> - swrast: Mesa 17.3.3 produces:  HW cursor for format 875713089 not supported</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105333">Bug 105333</a> - [gallium-nine] missing geometry after commit ac: replace ac_build_kill with ac_build_kill_if_false</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105371">Bug 105371</a> - r600_shader_from_tgsi - GPR limit exceeded - shader requires 360 registers</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105731">Bug 105731</a> - linker error &quot;fragment shader input ... has no matching output in the previous stage&quot; when previous stage's output declaration in a separate shader object</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105904">Bug 105904</a> - Needed to delete mesa shader cache after driver upgrade for 32 bit wine vulkan programs to work.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105975">Bug 105975</a> - i965 always reports 0 viewport subpixel bits</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106231">Bug 106231</a> - llvmpipe blends produce bad code after llvm patch https://reviews.llvm.org/D44785</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106283">Bug 106283</a> - Shader replacements works only for limited use cases</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106577">Bug 106577</a> - broken rendering with nine and nouveau (GM107)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106833">Bug 106833</a> - glLinkProgram is expected to fail when vertex attribute aliasing happens on ES3.0 context or later</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106865">Bug 106865</a> - [GLK] piglit.spec.ext_framebuffer_multisample.accuracy stencil tests fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106980">Bug 106980</a> - Basemark GPU vulkan benchmark hangs on GFX9</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106997">Bug 106997</a> - [Regression]. Dying light game is crashing on latest mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107088">Bug 107088</a> - [GEN8+] Hang when discarding a fragment if dual source blending is enabled but shader doesn't support it</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107098">Bug 107098</a> - Segfault after munmap(kms_sw_dt-&gt;ro_mapped)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107212">Bug 107212</a> - Dual-Core CPU E5500 / G45: RetroArch with reicast core results in corrupted graphics</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107223">Bug 107223</a> - [GEN9+] 50% perf drop in SynMark Fill* tests (E2E RBC gets disabled?)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107276">Bug 107276</a> - radv: OpBitfieldUExtract returns incorrect result when count is zero</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107280">Bug 107280</a> - [DXVK] Batman: Arkham City with tessellation enabled hangs on SKL GT4</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107313">Bug 107313</a> - Meson instructions on web site are non-optimal</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107359">Bug 107359</a> - [Regression] [bisected] [OpenGL CTS] [SKL,BDW] KHR-GL46.texture_barrier*-texels, GTF-GL46.gtf21.GL2FixedTests.buffer_corners.buffer_corners, and GTF-GL46.gtf21.GL2FixedTests.stencil_plane_corners.stencil_plane_corners fail with some configuration</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107460">Bug 107460</a> - radv: OpControlBarrier does not always work correctly (bisected)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107477">Bug 107477</a> - [DXVK] Setting high shader quality in GTA V results in LLVM error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107483">Bug 107483</a> - DispatchSanity_test.GL31_CORE regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107487">Bug 107487</a> - [intel] [tools] intel gpu tools don't honor -D tools=[]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107488">Bug 107488</a> - gl.h:2090: error: redefinition of typedef ‘GLeglImageOES’</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107510">Bug 107510</a> - [GEN8+] up to 10% perf drop on several 3D benchmarks</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107511">Bug 107511</a> - KHR/khrplatform.h not always installed when needed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107524">Bug 107524</a> - Broken packDouble2x32 at llvmpipe</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107544">Bug 107544</a> - intel/decoder: out of bounds group_iter</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107547">Bug 107547</a> - shader crashing glsl_compiler (uniform block assigned to vec2, then component substraced by 1)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107550">Bug 107550</a> - &quot;0[2]&quot; as function parameter hits assert</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107565">Bug 107565</a> - TypeError: __init__() got an unexpected keyword argument 'future_imports'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107579">Bug 107579</a> - [SNB] The graphic corruption when we reuse the GS compiled and used for TFB when statebuffer contain magic trash in the unused space</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107601">Bug 107601</a> - Rise of the Tomb Raider Segmentation Fault when the game starts</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107610">Bug 107610</a> - Dolphin emulator mis-renders shadow overlay in Super Mario Sunshine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107626">Bug 107626</a> - [SNB] The graphical corruption and GPU hang occur sometimes on the piglit test &quot;arb_texture_multisample-large-float-texture&quot; with parameter --fp16</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107658">Bug 107658</a> - [Regression] [bisected] [OpenGLES CTS] KHR-GLES3.packed_pixels.*rectangle.r*8_snorm</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107734">Bug 107734</a> - [GLSL] glsl-fface-invariant, glsl-fcoord-invariant and glsl-pcoord-invariant should fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107745">Bug 107745</a> - [bisected] [bdw bsw] piglit.spec.arb_fragment_shader_interlock.arb_fragment_shader_interlock-image-load-store failure</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107760">Bug 107760</a> - GPU Hang when Playing DiRT 3 Complete Edition using Steam Play with DXVK</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107765">Bug 107765</a> - [regression] Batman Arkham City crashes with DXVK under wine</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107772">Bug 107772</a> - Mesa preprocessor matches if(def)s &amp; endifs incorrectly</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107779">Bug 107779</a> - Access violation with some games</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107786">Bug 107786</a> - [DXVK] MSAA reflections are broken in GTA V</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107806">Bug 107806</a> - glsl_get_natural_size_align_bytes() ABORT with GfxBench Vulkan AztecRuins</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107810">Bug 107810</a> - The 'va_end' call is missed after 'va_copy' in 'util_vsnprintf' function under windows</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107832">Bug 107832</a> - Gallium picking A16L16 formats when emulating INTENSITY16 conflicts with mesa</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107843">Bug 107843</a> - 32bit Mesa build failes with meson.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107856">Bug 107856</a> - i965 incorrectly calculates the number of layers for texture views (assert)</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107857">Bug 107857</a> - GPU hang - GS_EMIT without shader outputs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107865">Bug 107865</a> - swr fail to build with llvm-libs 6.0.1</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107869">Bug 107869</a> - u_thread.h:87:4: error: use of undeclared identifier 'cpu_set_t'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107870">Bug 107870</a> - Undefined symbols for architecture x86_64: &quot;_util_cpu_caps&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107879">Bug 107879</a> - crash happens when link program</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107891">Bug 107891</a> - [wine, regression, bisected] RAGE, Wolfenstein The New Order hangs in menu</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107923">Bug 107923</a> - build_id.c:126: multiple definition of `build_id_length'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107926">Bug 107926</a> - [anv] Rise of the Tomb Raider always misrendering, segfault and gpu hang.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107941">Bug 107941</a> - GPU hang and system crash with Dota 2 using Vulkan</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107971">Bug 107971</a> - SPV_GOOGLE_hlsl_functionality1 / SPV_GOOGLE_decorate_string</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108012">Bug 108012</a> - Compiler crashes on access of non-existent member incremental operations</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108024">Bug 108024</a> - [Debian Stretch]Fail to build because &quot;xcb_randr_lease_t&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108082">Bug 108082</a> - warning: unknown warning option '-Wno-format-truncation' [-Wunknown-warning-option]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108109">Bug 108109</a> - [GLSL] no-overloads.vert fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108112">Bug 108112</a> - [vulkancts] some of the coherent memory tests fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108113">Bug 108113</a> - [vulkancts] r32g32b32 transfer operations not implemented</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108115">Bug 108115</a> - [vulkancts] dEQP-VK.subgroups.vote.graphics.subgroupallequal.* fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108164">Bug 108164</a> - [radv] VM faults since 5d6a560a2986c9ab421b3c7904d29bb7bc35e36f</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108245">Bug 108245</a> - RADV/Vega: Low mip levels of large BCn textures get corrupted by vkCmdCopyBufferToImage</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108272">Bug 108272</a> - [polaris10] opencl-mesa: Anything using OpenCL segfaults, XFX Radeon RX 580</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108311">Bug 108311</a> - Query buffer object support is broken on r600.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108319">Bug 108319</a> - [GLK BXT BSW] Assertion in piglit.spec.arb_gpu_shader_fp64.execution.built-in-functions.vs-sign-sat-neg-abs</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108491">Bug 108491</a> - Commit baa38c14 causes output issues on my VEGA with RADV</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108524">Bug 108524</a> - [RADV]  GPU lockup on event synchronization</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108530">Bug 108530</a> - (mesa-18.3) [Tracker] Mesa 18.3 Release Tracker</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108532">Bug 108532</a> - make check nir_copy_prop_vars_test.store_store_load_different_components regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108560">Bug 108560</a> - Mesa 32 is built without sse</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108595">Bug 108595</a> - ir3_compiler valgrind build error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108617">Bug 108617</a> - [deqp] Mesa fails conformance for egl_ext_device</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108630">Bug 108630</a> - [G965] piglit.spec.!opengl 1_2.tex3d-maxsize spins forever</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108635">Bug 108635</a> - Mesa master commit 68dc591af16ebb36814e4c187e4998948103c99c causes XWayland to segfault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108713">Bug 108713</a> - Gallium: use after free with transform feedback</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108829">Bug 108829</a> - [meson] libglapi exports internal API</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108894">Bug 108894</a> - [anv] vkCmdCopyBuffer() and vkCmdCopyQueryPoolResults() write-after-write hazard</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108909">Bug 108909</a> - Vkd3d test failure test_resolve_non_issued_query_data()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108914">Bug 108914</a> - blocky shadow artifacts in The Forest with DXVK, RADV_DEBUG=nohiz fixes this</li>
+
+<h2>Changes</h2>
+
+<ul>
+<li>TBD</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.3.1.html
+++ b/docs/relnotes/18.3.1.html
@@ -0,0 +1,63 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.1 Release Notes / December 11, 2018</h1>
+
+<p>
+Mesa 18.3.1 is a bug fix release which fixes bugs found since the 18.3.0 release.
+</p>
+<p>
+Mesa 18.3.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+256d0c3d88e380c1b8e3fc5c6ac34001e3b7c30458b8b852407ec68b8ccd9fda  mesa-18.3.1.tar.gz
+5b1f827d28684a25f6657289f8b7d47ac56395988c7ac23e0ec9a62b644bdc63  mesa-18.3.1.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+<p>None</p>
+
+
+<h2>Changes</h2>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.0</li>
+  <li>Update version to 18.3.1</li>
+</ul>
+
+<p>Jason Ekstrand (1):</p>
+<ul>
+  <li>anv,radv: Disable VK_EXT_pci_bus_info</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.3.2.html
+++ b/docs/relnotes/18.3.2.html
@@ -0,0 +1,265 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.2 Release Notes / January 17, 2019</h1>
+
+<p>
+Mesa 18.3.2 is a bug fix release which fixes bugs found since the 18.3.1 release.
+</p>
+<p>
+Mesa 18.3.2 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+1cde4fafd40cd1ad4ee3a13b364b7a0175a08b7afdd127fb46f918c1e1dfd4b0  mesa-18.3.2.tar.gz
+f7ce7181c07b6d8e0132da879af1729523a6c8aa87f79a9d59dfd064024cfb35  mesa-18.3.2.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106595">Bug 106595</a> - [RADV] Rendering distortions only when MSAA is enabled</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107728">Bug 107728</a> - Wrong background in Sascha Willem's Multisampling Demo</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108114">Bug 108114</a> - [vulkancts] new VK_KHR_16bit_storage tests fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108116">Bug 108116</a> - [vulkancts] stencil partial clear tests fail.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108624">Bug 108624</a> - [regression][bisected] &quot;nir: Copy propagation between blocks&quot; regression</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108910">Bug 108910</a> - Vkd3d test failure test_multisample_array_texture()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108911">Bug 108911</a> - Vkd3d test failure test_clear_render_target_view()</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108943">Bug 108943</a> - Build fails on ppc64le with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109072">Bug 109072</a> - GPU hang in blender 2.80</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109081">Bug 109081</a> - [bisected] [HSW] Regression in clipping.user_defined.clip_* vulkancts tests</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109151">Bug 109151</a> - [KBL-G][vulkan] dEQP-VK.texture.explicit_lod.2d.sizes.31x55_nearest_linear_mipmap_nearest_repeat failed verification.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109202">Bug 109202</a> - nv50_ir.cpp:749:19: error: cannot use typeid with -fno-rtti</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109204">Bug 109204</a> - [regression, bisected] retroarch's crt-royale shader crash radv</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alex Deucher (3):</p>
+<ul>
+  <li>pci_ids: add new vega10 pci ids</li>
+  <li>pci_ids: add new vega20 pci id</li>
+  <li>pci_ids: add new VegaM pci id</li>
+</ul>
+
+<p>Alexander von Gluck IV (1):</p>
+<ul>
+  <li>egl/haiku: Fix reference to disp vs dpy</li>
+</ul>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>glsl: correct typo in GLSL compilation error message</li>
+  <li>glsl/linker: specify proper direction in location aliasing error</li>
+</ul>
+
+<p>Axel Davy (3):</p>
+<ul>
+  <li>st/nine: Fix volumetexture dtor on ctor failure</li>
+  <li>st/nine: Bind src not dst in nine_context_box_upload</li>
+  <li>st/nine: Add src reference to nine_context_range_upload</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (5):</p>
+<ul>
+  <li>radv: Do a cache flush if needed before reading predicates.</li>
+  <li>radv: Implement buffer stores with less than 4 components.</li>
+  <li>anv/android: Do not reject storage images.</li>
+  <li>radv: Fix rasterization precision bits.</li>
+  <li>spirv: Fix matrix parameters in function calls.</li>
+</ul>
+
+<p>Caio Marcelo de Oliveira Filho (3):</p>
+<ul>
+  <li>nir: properly clear the entry sources in copy_prop_vars</li>
+  <li>nir: properly find the entry to keep in copy_prop_vars</li>
+  <li>nir: remove dead code from copy_prop_vars</li>
+</ul>
+
+<p>Dave Airlie (2):</p>
+<ul>
+  <li>radv/xfb: fix counter buffer bounds checks.</li>
+  <li>virgl/vtest: fix front buffer flush with protocol version 0.</li>
+</ul>
+
+<p>Dylan Baker (6):</p>
+<ul>
+  <li>meson: Fix ppc64 little endian detection</li>
+  <li>meson: Add support for gnu hurd</li>
+  <li>meson: Add toggle for glx-direct</li>
+  <li>meson: Override C++ standard to gnu++11 when building with altivec on ppc64</li>
+  <li>meson: Error out if building nouveau and using LLVM without rtti</li>
+  <li>autotools: Remove tegra vdpau driver</li>
+</ul>
+
+<p>Emil Velikov (12):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.1</li>
+  <li>bin/get-pick-list.sh: rework handing of sha nominations</li>
+  <li>bin/get-pick-list.sh: warn when commit lists invalid sha</li>
+  <li>cherry-ignore: meson: libfreedreno depends upon libdrm (for fence support)</li>
+  <li>glx: mandate xf86vidmode only for "drm" dri platforms</li>
+  <li>meson: don't require glx/egl/gbm with gallium drivers</li>
+  <li>pipe-loader: meson: reference correct library</li>
+  <li>TODO: glx: meson: build dri based glx tests, only with -Dglx=dri</li>
+  <li>glx: meson: drop includes from a link-only library</li>
+  <li>glx: meson: wire up the dispatch-index-check test</li>
+  <li>glx/test: meson: assorted include fixes</li>
+  <li>Update version to 18.3.2</li>
+</ul>
+
+<p>Eric Anholt (6):</p>
+<ul>
+  <li>v3d: Fix a leak of the transfer helper on screen destroy.</li>
+  <li>vc4: Fix a leak of the transfer helper on screen destroy.</li>
+  <li>v3d: Fix a leak of the disassembled instruction string during debug dumps.</li>
+  <li>v3d: Make sure that a thrsw doesn't split a multop from its umul24.</li>
+  <li>v3d: Add missing flagging of SYNCB as a TSY op.</li>
+  <li>gallium/ttn: Fix setup of outputs_written.</li>
+</ul>
+
+<p>Erik Faye-Lund (2):</p>
+<ul>
+  <li>virgl: wrap vertex element state in a struct</li>
+  <li>virgl: work around bad assumptions in virglrenderer</li>
+</ul>
+
+<p>Francisco Jerez (5):</p>
+<ul>
+  <li>intel/fs: Handle source modifiers in lower_integer_multiplication().</li>
+  <li>intel/fs: Implement quad swizzles on ICL+.</li>
+  <li>intel/fs: Fix bug in lower_simd_width while splitting an instruction which was already split.</li>
+  <li>intel/eu/gen7: Fix brw_MOV() with DF destination and strided source.</li>
+  <li>intel/fs: Respect CHV/BXT regioning restrictions in copy propagation pass.</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>i965/vec4/dce: Don't narrow the write mask if the flags are used</li>
+  <li>Revert "nir/lower_indirect: Bail early if modes == 0"</li>
+</ul>
+
+<p>Jan Vesely (1):</p>
+<ul>
+  <li>clover: Fix build after clang r348827</li>
+</ul>
+
+<p>Jason Ekstrand (6):</p>
+<ul>
+  <li>nir/constant_folding: Fix source bit size logic</li>
+  <li>intel/blorp: Be more conservative about copying clear colors</li>
+  <li>spirv: Handle any bit size in vector_insert/extract</li>
+  <li>anv/apply_pipeline_layout: Set the cursor in lower_res_reindex_intrinsic</li>
+  <li>spirv: Sign-extend array indices</li>
+  <li>intel/peephole_ffma: Fix swizzle propagation</li>
+</ul>
+
+<p>Karol Herbst (1):</p>
+<ul>
+  <li>nv50/ir: fix use-after-free in ConstantFolding::visit</li>
+</ul>
+
+<p>Kirill Burtsev (1):</p>
+<ul>
+  <li>loader: free error state, when checking the drawable type</li>
+</ul>
+
+<p>Lionel Landwerlin (5):</p>
+<ul>
+  <li>anv: don't do partial resolve on layer &gt; 0</li>
+  <li>i965: include draw_params/derived_draw_params for VF cache workaround</li>
+  <li>i965: add CS stall on VF invalidation workaround</li>
+  <li>anv: explictly specify format for blorp ccs/mcs op</li>
+  <li>anv: flush fast clear colors into compressed surfaces</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>st/mesa: don't leak pipe_surface if pipe_context is not current</li>
+</ul>
+
+<p>Mario Kleiner (1):</p>
+<ul>
+  <li>radeonsi: Fix use of 1- or 2- component GL_DOUBLE vbo's.</li>
+</ul>
+
+<p>Nicolai Hähnle (1):</p>
+<ul>
+  <li>meson: link LLVM 'native' component when LLVM is available</li>
+</ul>
+
+<p>Rhys Perry (3):</p>
+<ul>
+  <li>radv: don't set surf_index for stencil-only images</li>
+  <li>ac/nir,radv,radeonsi/nir: use correct indices for interpolation intrinsics</li>
+  <li>ac: split 16-bit ssbo loads that may not be dword aligned</li>
+</ul>
+
+<p>Rob Clark (2):</p>
+<ul>
+  <li>freedreno/drm: fix memory leak</li>
+  <li>mesa/st/nir: fix missing nir_compact_varyings</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: switch on EOP when primitive restart is enabled with triangle strips</li>
+</ul>
+
+<p>Timothy Arceri (2):</p>
+<ul>
+  <li>tgsi/scan: fix loop exit point in tgsi_scan_tess_ctrl()</li>
+  <li>tgsi/scan: correctly walk instructions in tgsi_scan_tess_ctrl()</li>
+</ul>
+
+<p>Vinson Lee (2):</p>
+<ul>
+  <li>meson: Fix typo.</li>
+  <li>meson: Fix libsensors detection.</li>
+</ul>
+
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.3.3.html
+++ b/docs/relnotes/18.3.3.html
@@ -0,0 +1,208 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.3 Release Notes / January 31, 2019</h1>
+
+<p>
+Mesa 18.3.3 is a bug fix release which fixes bugs found since the 18.3.2 release.
+</p>
+<p>
+Mesa 18.3.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6b9893942fe8011c7736d51448deb6ef80ece2257e0fac27b02e997a6605d5e4  mesa-18.3.3.tar.gz
+2ab6886a6966c532ccbcc3b240925e681464b658244f0cbed752615af3936299  mesa-18.3.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108877">Bug 108877</a> - OpenGL CTS gl43 test cases were interrupted due to segment fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109023">Bug 109023</a> - error: inlining failed in call to always_inline ‘__m512 _mm512_and_ps(__m512, __m512)’: target specific option mismatch</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109129">Bug 109129</a> - format_types.h:1220: undefined reference to `_mm256_cvtps_ph'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109229">Bug 109229</a> - glLinkProgram locks up for ~30 seconds</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109242">Bug 109242</a> - [RADV] The Witcher 3 system freeze</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109488">Bug 109488</a> - Mesa 18.3.2 crash on a specific fragment shader (assert triggered) / already fixed on the master branch.</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>bin/get-pick-list.sh: fix the oneline printing</li>
+  <li>bin/get-pick-list.sh: fix redirection in sh</li>
+</ul>
+
+<p>Axel Davy (1):</p>
+<ul>
+  <li>st/nine: Immediately upload user provided textures</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Only use 32 KiB per threadgroup on Stoney.</li>
+  <li>radv: Set partial_vs_wave for pipelines with just GS, not tess.</li>
+  <li>nir: Account for atomics in copy propagation.</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>gallium/swr: Fix multi-context sync fence deadlock.</li>
+</ul>
+
+<p>Carsten Haitzler (Rasterman) (2):</p>
+<ul>
+  <li>vc4: Use named parameters for the NEON inline asm.</li>
+  <li>vc4: Declare the cpu pointers as being modified in NEON asm.</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>glsl: Fix copying function's out to temp if dereferenced by array</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>dri_interface: add put shm image2 (v2)</li>
+  <li>glx: add support for putimageshm2 path (v2)</li>
+  <li>gallium: use put image shm2 path (v2)</li>
+</ul>
+
+<p>Dylan Baker (4):</p>
+<ul>
+  <li>meson: allow building dri driver without window system if osmesa is classic</li>
+  <li>meson: fix swr KNL build</li>
+  <li>meson: Fix compiler checks for SWR with ICC</li>
+  <li>meson: Add warnings and errors when using ICC</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.2</li>
+  <li>cherry-ignore: radv: Fix multiview depth clears</li>
+  <li>cherry-ignore: spirv: Handle arbitrary bit sizes for deref array indices</li>
+  <li>cherry-ignore: WARNING: Commit XXX lists invalid sha</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>vc4: Don't leak the GPU fd for renderonly usage.</li>
+  <li>vc4: Enable NEON asm on meson cross-builds.</li>
+</ul>
+
+<p>Eric Engestrom (2):</p>
+<ul>
+  <li>configure: EGL requirements only apply if EGL is built</li>
+  <li>meson/vdpau: add missing soversion</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>anv/device: fix maximum number of images supported</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>anv/nir: Rework arguments to apply_pipeline_layout</li>
+  <li>anv: Only parse pImmutableSamplers if the descriptor has samplers</li>
+  <li>nir/xfb: Fix offset accounting for dvec3/4</li>
+</ul>
+
+<p>Karol Herbst (2):</p>
+<ul>
+  <li>nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise instructions</li>
+  <li>glsl/lower_output_reads: set invariant and precise flags on temporaries</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: fix invalid binding table index computation</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>radeonsi: also apply the GS hang workaround to draws without tessellation</li>
+  <li>radeonsi: fix a u_blitter crash after a shader with FBFETCH</li>
+  <li>radeonsi: fix rendering to tiny viewports where the viewport center is &gt; 8K</li>
+  <li>st/mesa: purge framebuffers when unbinding a context</li>
+</ul>
+
+<p>Niklas Haas (1):</p>
+<ul>
+  <li>radv: correctly use vulkan 1.0 by default</li>
+</ul>
+
+<p>Pierre Moreau (1):</p>
+<ul>
+  <li>meson: Fix with_gallium_icd to with_opencl_icd</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>loader: fix the no-modifiers case</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: clean up setting partial_es_wave for distributed tess on VI</li>
+</ul>
+
+<p>Timothy Arceri (5):</p>
+<ul>
+  <li>ac/nir_to_llvm: fix interpolateAt* for arrays</li>
+  <li>ac/nir_to_llvm: fix clamp shadow reference for more hardware</li>
+  <li>radv/ac: fix some fp16 handling</li>
+  <li>glsl: use remap location when serialising uniform program resource data</li>
+  <li>glsl: Copy function out to temp if we don't directly ref a variable</li>
+</ul>
+
+<p>Tomeu Vizoso (1):</p>
+<ul>
+  <li>etnaviv: Consolidate buffer references from framebuffers</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>meson: Fix typo.</li>
+</ul>
+
+
+
+</div>
+</body>
+</html>
+
--- a/docs/relnotes/18.3.4.html
+++ b/docs/relnotes/18.3.4.html
@@ -0,0 +1,180 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.4 Release Notes / February 18, 2019</h1>
+
+<p>
+Mesa 18.3.4 is a bug fix release which fixes bugs found since the 18.3.3 release.
+</p>
+<p>
+Mesa 18.3.4 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+e22e6fe4c3aca80fe872a0a7285b6c5523e0cfc0bfb57ffcc3b3d66d292593e4  mesa-18.3.4.tar.gz
+32314da4365d37f80d84f599bd9625b00161c273c39600ba63b45002d500bb07  mesa-18.3.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109107">Bug 109107</a> - gallium/st/va: change va max_profiles when using Radeon VCN Hardware</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109401">Bug 109401</a> - [DXVK] Project Cars rendering problems</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109543">Bug 109543</a> - After upgrade mesa to 19.0.0~rc1 all vulkan based application stop working [&quot;vulkan-cube&quot; received SIGSEGV in radv_pipeline_init_blend_state at ../src/amd/vulkan/radv_pipeline.c:699]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109603">Bug 109603</a> - nir_instr_as_deref: Assertion `parent &amp;&amp; parent-&gt;type == nir_instr_type_deref' failed.</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bart Oldeman (1):</p>
+<ul>
+  <li>gallium-xlib: query MIT-SHM before using it.</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>radv: Only look at pImmutableSamples if the descriptor has a sampler.</li>
+  <li>amd/common: Use correct writemask for shared memory stores.</li>
+</ul>
+
+<p>Dylan Baker (2):</p>
+<ul>
+  <li>get-pick-list: Add --pretty=medium to the arguments for Cc patches</li>
+  <li>meson: Add dependency on genxml to anvil</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.3</li>
+  <li>cherry-ignore: nv50,nvc0: add explicit settings for recent caps</li>
+  <li>cherry-ignore: add more 19.0 only nominations from Ilia</li>
+  <li>cherry-ignore: radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8</li>
+  <li>Update version to 18.3.4</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Fix copy-and-paste fail in backport of NEON asm fixes.</li>
+</ul>
+
+<p>Eric Engestrom (2):</p>
+<ul>
+  <li>xvmc: fix string comparison</li>
+  <li>xvmc: fix string comparison</li>
+</ul>
+
+<p>Ernestas Kulik (2):</p>
+<ul>
+  <li>vc4: Fix leak in HW queries error path</li>
+  <li>v3d: Fix leak in resource setup error path</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0: we have 16k-sized framebuffers, fix default scissors</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>intel/fs: Handle IMAGE_SIZE in size_read() and is_send_from_grf()</li>
+  <li>intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode</li>
+  <li>nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>anv/cmd_buffer: check for NULL framebuffer</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>st/mesa: Limit GL_MAX_[NATIVE_]PROGRAM_PARAMETERS_ARB to 2048</li>
+</ul>
+
+<p>Kristian H. Kristensen (1):</p>
+<ul>
+  <li>freedreno/a6xx: Emit blitter dst with OUT_RELOCW</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>st/va: fix the incorrect max profiles report</li>
+  <li>st/va/vp9: set max reference as default of VP9 reference number</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>meson: drop the xcb-xrandr version requirement</li>
+  <li>gallium/u_threaded: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>radeonsi: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>winsys/amdgpu: don't drop manually added fence dependencies</li>
+</ul>
+
+<p>Mario Kleiner (2):</p>
+<ul>
+  <li>egl/wayland: Allow client-&gt;server format conversion for PRIME offload. (v2)</li>
+  <li>egl/wayland-drm: Only announce formats via wl_drm which the driver supports.</li>
+</ul>
+
+<p>Oscar Blumberg (1):</p>
+<ul>
+  <li>radeonsi: Fix guardband computation for large render targets</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: stop frob'ing pipe_resource::nr_samples</li>
+</ul>
+
+<p>Rodrigo Vivi (1):</p>
+<ul>
+  <li>intel: Add more PCI Device IDs for Coffee Lake and Ice Lake.</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>radv: fix compiler issues with GCC 9</li>
+  <li>radv: always export gl_SampleMask when the fragment shader uses it</li>
+</ul>
+
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.3.5.html
+++ b/docs/relnotes/18.3.5.html
@@ -0,0 +1,271 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.5 Release Notes / March 18, 2019</h1>
+
+<p>
+Mesa 18.3.5 is a bug fix release which fixes bugs found since the 18.3.4 release.
+</p>
+<p>
+Mesa 18.3.5 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+5f40a336cb2af9b1d66fa243bb03c2c8a3f9b3f067aab6aaaad4316d1bc0e58b  mesa-18.3.5.tar.gz
+4027aea82cc63240b3fcf60eec9eea882955f098c989b29357b01d1695747953  mesa-18.3.5.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104297">Bug 104297</a> - [i965] Downward causes GPU hangs and misrendering on Haswell</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104602">Bug 104602</a> - [apitrace] Graphical artifacts in Civilization VI on RX Vega</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107052">Bug 107052</a> - [Regression][bisected]. Crookz - The Big Heist Demo can't be launched despite the &quot;true&quot; flag in &quot;drirc&quot;</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108457">Bug 108457</a> - [OpenGL CTS] KHR-GL46.tessellation_shader.single.xfb_captures_data_from_correct_stage fails</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108999">Bug 108999</a> - Calculating the scissors fields when the y is flipped (0 on top) can generate negative numbers that will cause assertion failure later on.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109328">Bug 109328</a> - [BSW BXT GLK] dEQP-VK.subgroups.arithmetic.subgroup regressions</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109443">Bug 109443</a> - Build failure with MSVC when using Scons &gt;= 3.0.2</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109451">Bug 109451</a> - [IVB,SNB] LINE_STRIPs following a TRIANGLE_FAN fail to use primitive restart</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109594">Bug 109594</a> - totem assert failure: totem: src/intel/genxml/gen9_pack.h:72: __gen_uint: La declaración `v &lt;= max' no se cumple.</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109597">Bug 109597</a> - wreckfest issues with transparent objects &amp; skybox</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109601">Bug 109601</a> - [Regression] RuneLite GPU rendering broken on 18.3.x</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109735">Bug 109735</a> - [Regression] broken font with mesa_vulkan_overlay</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Alok Hota (1):</p>
+<ul>
+  <li>swr/rast: bypass size limit for non-sampled textures</li>
+</ul>
+
+<p>Andrii Simiklit (1):</p>
+<ul>
+  <li>i965: re-emit index buffer state on a reset option change.</li>
+</ul>
+
+<p>Axel Davy (2):</p>
+<ul>
+  <li>st/nine: Ignore window size if error</li>
+  <li>st/nine: Ignore multisample quality level if no ms</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (4):</p>
+<ul>
+  <li>radv: Sync ETC2 whitelisted devices.</li>
+  <li>radv: Fix float16 interpolation set up.</li>
+  <li>radv: Allow interpolation on non-float types.</li>
+  <li>radv: Interpolate less aggressively.</li>
+</ul>
+
+<p>Carlos Garnacho (1):</p>
+<ul>
+  <li>wayland/egl: Ensure EGL surface is resized on DRI update_buffers()</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>glsl/linker: Fix unmatched TCS outputs being reduced to local variable</li>
+</ul>
+
+<p>David Shao (1):</p>
+<ul>
+  <li>meson: ensure that xmlpool_options.h is generated for gallium targets that need it</li>
+</ul>
+
+<p>Eleni Maria Stea (1):</p>
+<ul>
+  <li>i965: fixed clamping in set_scissor_bits when the y is flipped</li>
+</ul>
+
+<p>Emil Velikov (7):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.4</li>
+  <li>meson: egl: correctly manage loader/xmlconfig</li>
+  <li>cherry-ignore: add 19.0 only anv/push buffer nominations</li>
+  <li>cherry-ignore: add gitlab-ci fixup commit</li>
+  <li>cherry-ignore: ignore glsl_types memory cleanup patch</li>
+  <li>cherry-ignore: add explicit 19.0 performance optimisations</li>
+  <li>Update version to 18.3.5</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>egl: fix libdrm-less builds</li>
+</ul>
+
+<p>Francisco Jerez (1):</p>
+<ul>
+  <li>intel/fs: Implement extended strides greater than 4 for IR source regions.</li>
+</ul>
+
+<p>Ian Romanick (2):</p>
+<ul>
+  <li>intel/fs: nir_op_extract_i8 extracts a byte, not a word</li>
+  <li>intel/fs: Fix extract_u8 of an odd byte from a 64-bit integer</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>glsl: fix recording of variables for XFB in TCS shaders</li>
+</ul>
+
+<p>Jason Ekstrand (10):</p>
+<ul>
+  <li>intel/fs: Bail in optimize_extract_to_float if we have modifiers</li>
+  <li>compiler/types: Add a contains_64bit helper</li>
+  <li>nir/xfb: Properly align 64-bit values</li>
+  <li>nir/xfb: Work in terms of components rather than slots</li>
+  <li>nir/xfb: Handle compact arrays in gather_xfb_info</li>
+  <li>anv: Count surfaces for non-YCbCr images in GetDescriptorSetLayoutSupport</li>
+  <li>spirv: OpImageQueryLod requires a sampler</li>
+  <li>spirv: Pull offset/stride from the pointer for OpArrayLength</li>
+  <li>glsl/list: Add a list variant of insert_after</li>
+  <li>glsl/lower_vector_derefs: Don't use a temporary for TCS outputs</li>
+</ul>
+
+<p>Jose Maria Casanova Crespo (1):</p>
+<ul>
+  <li>glsl: TCS outputs can not be transform feedback candidates on GLES</li>
+</ul>
+
+<p>José Fonseca (1):</p>
+<ul>
+  <li>scons: Workaround failures with MSVC when using SCons 3.0.[2-4].</li>
+</ul>
+
+<p>Juan A. Suarez Romero (3):</p>
+<ul>
+  <li>genxml: add missing field values for 3DSTATE_SF</li>
+  <li>anv: advertise 8 subpixel precision bits</li>
+  <li>anv: destroy descriptor sets when pool gets reset</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>intel/fs: Fix opt_peephole_csel to not throw away saturates.</li>
+</ul>
+
+<p>Kevin Strasser (1):</p>
+<ul>
+  <li>egl/dri: Avoid out of bounds array access</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>intel: fix urb size for CFL GT1</li>
+</ul>
+
+<p>Marek Olšák (5):</p>
+<ul>
+  <li>radeonsi: add driconf option radeonsi_enable_nir</li>
+  <li>radeonsi: always enable NIR for Civilization 6 to fix corruption</li>
+  <li>driconf: add Civ6Sub executable for Civilization 6</li>
+  <li>tgsi: don't set tgsi_info::uses_bindless_images for constbufs and hw atomics</li>
+  <li>radeonsi: compile clear and copy buffer compute shaders on demand</li>
+</ul>
+
+<p>Mauro Rossi (2):</p>
+<ul>
+  <li>android: anv: fix generated files depedencies (v2)</li>
+  <li>android: anv: fix libexpat shared dependency</li>
+</ul>
+
+<p>Ray Zhang (1):</p>
+<ul>
+  <li>glx: fix shared memory leak in X11</li>
+</ul>
+
+<p>Rhys Perry (2):</p>
+<ul>
+  <li>radv: bitcast 16-bit outputs to integers</li>
+  <li>radv: ensure export arguments are always float</li>
+</ul>
+
+<p>Samuel Pitoiset (8):</p>
+<ul>
+  <li>radv: write the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: fix writing the alpha channel of MRT0 when alpha coverage is enabled</li>
+  <li>radv: fix clearing attachments in secondary command buffers</li>
+  <li>radv: fix out-of-bounds access when copying descriptors BO list</li>
+  <li>radv: don't copy buffer descriptors list for samplers</li>
+  <li>radv: properly align the fence and EOP bug VA on GFX9</li>
+  <li>radv: fix pointSizeRange limits</li>
+  <li>radv: always initialize HTILE when the src layout is UNDEFINED</li>
+</ul>
+
+<p>Sergii Romantsov (2):</p>
+<ul>
+  <li>dri: meson: do not prefix user provided dri-drivers-path</li>
+  <li>d3d: meson: do not prefix user provided d3d-drivers-path</li>
+</ul>
+
+<p>Tapani Pälli (3):</p>
+<ul>
+  <li>nir: initialize value in copy_prop_vars_block</li>
+  <li>anv: retain the is_array state in create_plane_tex_instr_implicit</li>
+  <li>anv: destroy descriptor sets when pool gets destroyed</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>glsl: fix shader cache for packed param list</li>
+</ul>
+
+<p>Yevhenii Kolesnikov (1):</p>
+<ul>
+  <li>i965: Fix allow_higher_compat_version workaround limited by OpenGL 3.0</li>
+</ul>
+
+<p>pal1000 (1):</p>
+<ul>
+  <li>scons: Compatibility with Scons development version string</li>
+</ul>
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/18.3.6.html
+++ b/docs/relnotes/18.3.6.html
@@ -0,0 +1,169 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.6 Release Notes / April 5, 2019</h1>
+
+<p>
+Mesa 18.3.6 is a bug fix release which fixes bugs found since the 18.3.5 release.
+</p>
+<p>
+Mesa 18.3.6 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+4619d92afadf7072f7956599a2ccd0934fc45b4ddbc2eb865bdcb50ddf963f87  mesa-18.3.6.tar.gz
+aaf17638dcf5a90b93b6389e152fdc9ef147768b09598f24d2c5cf482fcfc705  mesa-18.3.6.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100316">Bug 100316</a> - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108766">Bug 108766</a> - Mesa built with meson has RPATH entries</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109648">Bug 109648</a> - AMD Raven hang during va-api decoding</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109980">Bug 109980</a> - [i915 CI][HSW] spec&#64;arb_fragment_shader_interlock&#64;arb_fragment_shader_interlock-image-load-store - fail</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110211">Bug 110211</a> - If DESTDIR is set to an empty string, the dri drivers are not installed</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110221">Bug 110221</a> - build error with meson</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110259">Bug 110259</a> - radv: Sampling depth-stencil image in GENERAL layout returns nothing but zero (regression, bisected)</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (4):</p>
+<ul>
+  <li>glsl: correctly validate component layout qualifier for dvec{3,4}</li>
+  <li>glsl/linker: don't fail non static used inputs without matching outputs</li>
+  <li>glsl/linker: simplify xfb_offset vs xfb_stride overflow check</li>
+  <li>Revert "glsl: relax input-&gt;output validation for SSO programs"</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>radv: Use correct image view comparison for fast clears.</li>
+  <li>ac/nir: Return frag_coord as integer.</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>glsl: Cross validate variable's invariance by explicit invariance only</li>
+</ul>
+
+<p>Dave Airlie (1):</p>
+<ul>
+  <li>softpipe: fix texture view crashes</li>
+</ul>
+
+<p>Dylan Baker (1):</p>
+<ul>
+  <li>bin/install_megadrivers.py: Correctly handle DESTDIR=''</li>
+</ul>
+
+<p>Emil Velikov (2):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.5</li>
+  <li>Update version to 18.3.6</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>dri3: Return the current swap interval from glXGetSwapIntervalMESA().</li>
+</ul>
+
+<p>Eric Engestrom (1):</p>
+<ul>
+  <li>meson: strip rpath from megadrivers</li>
+</ul>
+
+<p>Jason Ekstrand (2):</p>
+<ul>
+  <li>anv/pass: Flag the need for a RT flush for resolve attachments</li>
+  <li>Revert "anv/radv: release memory allocated by glsl types during spirv_to_nir"</li>
+</ul>
+
+<p>Józef Kucia (2):</p>
+<ul>
+  <li>mesa: Fix GL_NUM_DEVICE_UUIDS_EXT</li>
+  <li>radv: Fix driverUUID</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>radeon/vcn: add H.264 constrained baseline support</li>
+  <li>radeon/vcn/vp9: search the render target from the whole list</li>
+</ul>
+
+<p>Marek Olšák (1):</p>
+<ul>
+  <li>radeonsi: fix assertion failure by using the correct type</li>
+</ul>
+
+<p>Mark Janes (1):</p>
+<ul>
+  <li>mesa: properly report the length of truncated log messages</li>
+</ul>
+
+<p>Plamena Manolova (1):</p>
+<ul>
+  <li>i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>radv: fix binding transform feedback buffers</li>
+  <li>radv: do not always initialize HTILE in compressed state</li>
+</ul>
+
+<p>Tapani Pälli (1):</p>
+<ul>
+  <li>anv/radv: release memory allocated by glsl types during spirv_to_nir</li>
+</ul>
+
+<p>Timothy Arceri (1):</p>
+<ul>
+  <li>st/glsl_to_nir: fix incorrect arrary access</li>
+</ul>
+
+<p>Tobias Klausmann (1):</p>
+<ul>
+  <li>vulkan/util: meson build - add wayland client include</li>
+</ul>
+
+</div>
+</body>
+</html>
+
+
--- a/docs/repository.html
+++ b/docs/repository.html
@@ -35,9 +35,9 @@ You may access the repository either as an

 <p>
 You may also 
-<a href="https://cgit.freedesktop.org/mesa/mesa/"
+<a href="https://gitlab.freedesktop.org/mesa/mesa"
 >browse the main Mesa git repository</a> and the
-<a href="https://cgit.freedesktop.org/mesa/demos"
+<a href="https://gitlab.freedesktop.org/mesa/demos"
 >Mesa demos and tests git repository</a>.
 </p>

@@ -52,7 +52,7 @@ To get the Mesa sources anonymously (read-only):
 <li>Install the git software on your computer if needed.<br><br>
 <li>Get an initial, local copy of the repository with:
    <pre>
-    git clone git://anongit.freedesktop.org/git/mesa/mesa
+    git clone https://gitlab.freedesktop.org/mesa/mesa.git
    </pre>
 <li>Later, you can update your tree from the master repository with:
    <pre>
@@ -60,7 +60,7 @@ To get the Mesa sources anonymously (read-only):
    </pre>
 <li>If you also want the Mesa demos/tests repository:
    <pre>
-    git clone git://anongit.freedesktop.org/git/mesa/demos
+    git clone https://gitlab.freedesktop.org/mesa/demos.git
    </pre>
 </ol>

@@ -98,24 +98,17 @@ on a particular driver, add a new extension, etc.) in the bugzilla record.
 </ol>

 <p>
-Once your account is established:
-</p>
+Once your account is established, you can update your push url to use SSH:
+<pre>
+git remote set-url --push <em>origin</em> git@gitlab.freedesktop.org:mesa/mesa.git
+</pre>

-<ol>
-<li>Get an initial, local copy of the repository with:
-    <pre>
-    git clone git+ssh://username@git.freedesktop.org/git/mesa/mesa
-    </pre>
-    Replace <em>username</em> with your actual login name.<br><br>
-<li>Later, you can update your tree from the master repository with:
-    <pre>
-    git pull origin
-    </pre>
-<li>If you also want the Mesa demos/tests repository:
-    <pre>
-    git clone git+ssh://username@git.freedesktop.org/git/mesa/demos
-    </pre>
-</ol>
+You can also use <a href="https://gitlab.freedesktop.org/profile/personal_access_tokens">personal access tokens</a>
+to push over HTTPS instead (useful for people behind strict proxies).
+In this case, create a token, and put it in the url as shown here:
+<pre>
+git remote set-url --push <em>origin</em> https://<em>USER</em>:<em>TOKEN</em>@gitlab.freedesktop.org/mesa/mesa.git
+</pre>


 <h2>Windows Users</h2>
@@ -149,12 +142,12 @@ code while a branch has the latest stable code.
 </p>

 <p>
-The command <code>git-branch</code> will list all available branches.
+The command <code>git branch</code> will list all available branches.
 </p>

 <p>
 Questions about branch status/activity should be posted to the
-mesa3d-dev mailing list.
+mesa-dev mailing list.
 </p>

 <h2>Developer Git Tips</h2>
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -85,7 +85,7 @@ should match the filenames of the corresponding dumped shaders.
 <p>
 Setting <b>MESA_SHADER_CAPTURE_PATH</b> to a directory will cause the compiler
 to write <tt>.shader_test</tt> files for use with
-<a href="https://cgit.freedesktop.org/mesa/shader-db">shader-db</a>, a tool
+<a href="https://gitlab.freedesktop.org/mesa/shader-db">shader-db</a>, a tool
 which compiler developers can use to gather statistics about shaders
 (instructions, cycles, memory accesses, and so on).
 </p>
--- a/docs/sourcedocs.html
+++ b/docs/sourcedocs.html
@@ -31,7 +31,7 @@ the <code>doxygen</code> directory and run <code>make</code>.

 <p>
 For an example of Doxygen usage in Mesa, see a recent source file
-such as <a href="https://cgit.freedesktop.org/mesa/mesa/tree/src/mesa/main/bufferobj.c">bufferobj.c</a>.
+such as <a href="https://gitlab.freedesktop.org/mesa/mesa/blob/master/src/mesa/main/bufferobj.c">bufferobj.c</a>.
 </p>


--- a/docs/specs/EGL_MESA_device_software.txt
+++ b/docs/specs/EGL_MESA_device_software.txt
@@ -0,0 +1,82 @@
+Name
+
+    MESA_device_software
+
+Name Strings
+
+    EGL_MESA_device_software
+
+Contributors
+
+    Adam Jackson <ajax@redhat.com>
+    Emil Velikov <emil.velikov@collabora.com>
+
+Contacts
+
+    Adam Jackson <ajax@redhat.com>
+
+Status
+
+    DRAFT
+
+Version
+
+    Version 2, 2018-10-03
+
+Number
+
+    EGL Extension #TODO
+
+Extension Type
+
+    EGL device extension
+
+Dependencies
+
+    Requires EGL_EXT_device_query.
+
+    This extension is written against the EGL 1.5 Specification.
+
+Overview
+
+    This extension defines a software EGL "device". The device is not backed by
+    any actual device node and simply renders into client memory.
+
+    By defining this as an extension, EGL_EXT_device_enumeration is able to
+    sanely enumerate a software device.
+
+New Types
+
+    None
+
+New Procedures and Functions
+
+    None
+
+New Tokens
+
+    None
+
+Additions to the EGL Specification
+
+    None
+
+New Behavior
+
+    The device list produced by eglQueryDevicesEXT will include a software
+    device. This can be distinguished from other device classes in the usual
+    way by calling eglQueryDeviceStringEXT(EGL_EXTENSIONS) and matching this
+    extension's string in the result.
+
+Issues
+
+    None
+
+Revision History
+
+    Version 2, 2018-10-03 (Emil Velikov)
+        - Drop "fallback" from "software fallback device"
+        - Add Emil Velikov as contributor
+
+    Version 1, 2017-07-06 (Adam Jackson)
+        - Initial version
--- a/docs/specs/INTEL_shader_atomic_float_minmax.txt
+++ b/docs/specs/INTEL_shader_atomic_float_minmax.txt
@@ -0,0 +1,200 @@
+Name
+
+    INTEL_shader_atomic_float_minmax
+
+Name Strings
+
+    GL_INTEL_shader_atomic_float_minmax
+
+Contact
+
+    Ian Romanick (ian . d . romanick 'at' intel . com)
+
+Contributors
+
+
+Status
+
+    In progress
+
+Version
+
+    Last Modified Date: 06/22/2018
+    Revision: 4
+
+Number
+
+    TBD
+
+Dependencies
+
+    OpenGL 4.2, OpenGL ES 3.1, ARB_shader_storage_buffer_object, or
+    ARB_compute_shader is required.
+
+    This extension is written against version 4.60 of the OpenGL Shading
+    Language Specification.
+
+Overview
+
+    This extension provides GLSL built-in functions allowing shaders to
+    perform atomic read-modify-write operations to floating-point buffer
+    variables and shared variables.  Minimum, maximum, exchange, and
+    compare-and-swap are enabled.
+
+
+New Procedures and Functions
+
+    None.
+
+New Tokens
+
+    None.
+
+IP Status
+
+    None.
+
+Modifications to the OpenGL Shading Language Specification, Version 4.60
+
+    Including the following line in a shader can be used to control the
+    language features described in this extension:
+
+      #extension GL_INTEL_shader_atomic_float_minmax : <behavior>
+
+    where <behavior> is as specified in section 3.3.
+
+    New preprocessor #defines are added to the OpenGL Shading Language:
+
+      #define GL_INTEL_shader_atomic_float_minmax   1
+
+Additions to Chapter 8 of the OpenGL Shading Language Specification
+(Built-in Functions)
+
+    Modify Section 8.11, "Atomic Memory Functions"
+
+    (add a new row after the existing "atomicMin" table row, p. 179)
+
+        float atomicMin(inout float mem, float data)
+
+
+        Computes a new value by taking the minimum of the value of data and
+        the contents of mem.  If one of these is an IEEE signaling NaN (i.e.,
+        a NaN with the most-significant bit of the mantissa cleared), it is
+        always considered smaller.  If one of these is an IEEE quiet NaN
+        (i.e., a NaN with the most-significant bit of the mantissa set), it is
+        always considered larger.  If both are IEEE quiet NaNs or both are
+        IEEE signaling NaNs, the result of the comparison is undefined.
+
+    (add a new row after the exiting "atomicMax" table row, p. 179)
+
+        float atomicMax(inout float mem, float data)
+
+        Computes a new value by taking the maximum of the value of data and
+        the contents of mem.  If one of these is an IEEE signaling NaN (i.e.,
+        a NaN with the most-significant bit of the mantissa cleared), it is
+        always considered larger.  If one of these is an IEEE quiet NaN (i.e.,
+        a NaN with the most-significant bit of the mantissa set), it is always
+        considered smaller.  If both are IEEE quiet NaNs or both are IEEE
+        signaling NaNs, the result of the comparison is undefined.
+
+    (add to "atomicExchange" table cell, p. 180)
+
+        float atomicExchange(inout float mem, float data)
+
+    (add to "atomicCompSwap" table cell, p. 180)
+
+        float atomicCompSwap(inout float mem, float compare, float data)
+
+Interactions with OpenGL 4.6 and ARB_gl_spirv
+
+    If OpenGL 4.6 or ARB_gl_spirv is supported, then
+    SPV_INTEL_shader_atomic_float_minmax must also be supported.
+
+    The AtomicFloatMinmaxINTEL capability is available whenever the OpenGL or
+    OpenGL ES implementation supports INTEL_shader_atomic_float_minmax.
+
+Issues
+
+    1) Why call this extension INTEL_shader_atomic_float_minmax?
+
+    RESOLVED: Several other extensions already set the precedent of
+    VENDOR_shader_atomic_float and VENDOR_shader_atomic_float64 for extensions
+    that enable floating-point atomic operations.  Using that as a base for
+    the name seems logical.
+
+    There already exists NV_shader_atomic_float, but the two extensions have
+    nearly zero overlap in functionality.  NV_shader_atomic_float adds
+    atomicAdd and image atomic operations that currently shipping Intel GPUs
+    do not support.  Calling this extension INTEL_shader_atomic_float would
+    likely have been confusing.
+
+    Adding something to describe the actual functions added by this extension
+    seemed reasonable.  INTEL_shader_atomic_float_compare was considered, but
+    that name was deemed to be not properly descriptive.  Calling this
+    extension INTEL_shader_atomic_float_min_max_exchange_compswap is right
+    out.
+
+    2) What atomic operations should we support for floating-point targets?
+
+    RESOLVED.  Exchange, min, max, and compare-swap make sense, and these are
+    all supported by the hardware.  Future extensions may add other functions.
+
+    For buffer variables and shared variables it is not possible to bit-cast
+    the memory location in GLSL, so existing integer operations, such as
+    atomicOr, cannot be used.  However, the underlying hardware implementation
+    can do this by treating the memory as an integer.  It would be possible to
+    implement atomicNegate using this technique with atomicXor.  It is unclear
+    whether this provides any actual utility.
+
+    3) What should be said about the NaN behavior?
+
+    RESOLVED.  There are several aspects of NaN behavior that should be
+    documented in this extension.  However, some of this behavior varies based
+    on NaN concepts that do not exist in the GLSL specification.
+
+    * atomicCompSwap performs the comparison as the floating-point equality
+      operator (==).  That is, if either 'mem' or 'compare' is NaN, the
+      comparison result is always false.
+
+    * atomicMin and atomicMax implement the IEEE specification with respect to
+      NaN.  IEEE considers two different kinds of NaN: signaling NaN and quiet
+      NaN.  A quiet NaN has the most significant bit of the mantissa set, and
+      a signaling NaN does not.  This concept does not exist in SPIR-V,
+      Vulkan, or OpenGL.  Let qNaN denote a quiet NaN and sNaN denote a
+      signaling NaN.  atomicMin and atomicMax specifically implement
+
+      - fmin(qNaN, x) = fmin(x, qNaN) = fmax(qNaN, x) = fmax(x, qNaN) = x
+      - fmin(sNaN, x) = fmin(x, sNaN) = fmax(sNaN, x) = fmax(x, sNaN) = sNaN
+      - fmin(sNaN, qNaN) = fmin(qNaN, sNaN) = fmax(sNaN, qNaN) =
+        fmax(qNaN, sNaN) = sNaN
+      - fmin(sNaN, sNaN) = sNaN.  This specification does not define which of
+        the two arguments is stored.
+      - fmax(sNaN, sNaN) = sNaN.  This specification does not define which of
+        the two arguments is stored.
+      - fmin(qNaN, qNaN) = qNaN.  This specification does not define which of
+        the two arguments is stored.
+      - fmax(qNaN, qNaN) = qNaN.  This specification does not define which of
+        the two arguments is stored.
+
+    Further details are available in the Skylake Programmer's Reference
+    Manuals available at
+    https://01.org/linuxgraphics/documentation/hardware-specification-prms.
+
+    4) What about atomicMin and atomicMax with (+0.0, -0.0) or (-0.0, +0.0)
+    arguments?
+
+    RESOLVED.  atomicMin should store -0.0, and atomicMax should store +0.0.
+    Due to a known issue in shipping Skylake GPUs, the incorrectly signed 0 is
+    stored.  This behavior may change in later GPUs.
+
+Revision History
+
+    Rev  Date        Author    Changes
+    ---  ----------  --------  ---------------------------------------------
+      1  04/19/2018  idr       Initial version
+      2  05/05/2018  idr       Describe interactions with the capabilities
+                               added by SPV_INTEL_shader_atomic_float_minmax.
+      3  05/29/2018  idr       Remove mention of 64-bit float support.
+      4  06/22/2018  idr       Resolve issue #2.
+                               Add issue #3 (regarding NaN behavior).
+                               Add issue #4 (regarding atomicMin(-0, +0).
--- a/docs/submittingpatches.html
+++ b/docs/submittingpatches.html
@@ -251,6 +251,9 @@ If you are not the author of the original patch, please Cc: them in your
 nomination request.
 </p>

+<p>
+The current patch status can be observed in the <a href="releasing.html#stagingbranch">staging branch</a>.
+</p>

 <h3 id="thetag">The stable tag</h3>

--- a/docs/utilities.html
+++ b/docs/utilities.html
@@ -17,7 +17,7 @@
 <h1>Development Utilities</h1>

 <dl>
-  <dt><a href="https://cgit.freedesktop.org/mesa/demos">Mesa demos collection</a></dt>
+  <dt><a href="https://gitlab.freedesktop.org/mesa/demos">Mesa demos collection</a></dt>
  <dd>includes several utility routines in the <code>src/util/</code>
  directory.</dd>

--- a/docs/vmware-guest.html
+++ b/docs/vmware-guest.html
@@ -43,6 +43,23 @@ This requires:
 Otherwise, OpenGL 2.1 is supported.
 </p>

+<p>
+With the Fall 2018 Workstation 15 / Fusion 11 releases, additional
+features are supported in the driver:
+<ul>
+<li>Multisample antialiasing (2x, 4x)
+<li>GL_ARB/AMD_draw_buffers_blend
+<li>GL_ARB_sample_shading
+<li>GL_ARB_texture_cube_map_array
+<li>GL_ARB_texture_gather
+<li>GL_ARB_texture_query_lod
+<li>GL_EXT/OES_draw_buffers_indexed
+</ul>
+<p>
+This requires version 2.15.0 or later of the vmwgfx kernel module and
+the VM must be configured for hardware version 16 or later.
+</p>
+
 <p>
 OpenGL 3.3 support can be disabled by setting the environment variable
 SVGA_VGPU10=0.
@@ -126,7 +143,7 @@ Begin by saving your current directory location:
 <ul>
 <li>Mesa/Gallium master branch. This code is used to build libGL, and the direct rendering svga driver for libGL, vmwgfx_dri.so, and the X acceleration library libxatracker.so.x.x.x. 
  <pre>
-  git clone git://anongit.freedesktop.org/git/mesa/mesa
+  git clone https://gitlab.freedesktop.org/mesa/mesa.git
  </pre>
 <li>VMware Linux guest kernel module. Note that this repo contains the complete DRM and TTM code. The vmware-specific driver is really only the files prefixed with vmwgfx. 
  <pre>
@@ -136,7 +153,7 @@ Begin by saving your current directory location:
 Most distros ship with this but it's safest to install a newer version.
 To get the latest code from git:
  <pre>
-  git clone git://anongit.freedesktop.org/git/mesa/drm
+  git clone https://gitlab.freedesktop.org/mesa/drm.git
  </pre>
 <li>xf86-video-vmware. The chainloading driver, vmware_drv.so, the legacy driver vmwlegacy_drv.so, and the vmwgfx driver vmwgfx_drv.so. 
  <pre>
--- a/include/EGL/eglext.h
+++ b/include/EGL/eglext.h
@@ -33,12 +33,12 @@ extern "C" {
 ** used to make the header, and the header can be found at
 **   http://www.khronos.org/registry/egl
 **
-** Khronos $Git commit SHA1: a732b061e7 $ on $Git commit date: 2017-06-17 23:27:53 +0100 $
+** Khronos $Git commit SHA1: bae3518c48 $ on $Git commit date: 2018-05-17 10:56:57 -0700 $
 */

 #include <EGL/eglplatform.h>

-#define EGL_EGLEXT_VERSION 20170627
+#define EGL_EGLEXT_VERSION 20180517

 /* Generated C header for:
 * API: egl
@@ -495,6 +495,47 @@ EGLAPI EGLClientBuffer EGLAPIENTRY eglCreateNativeClientBufferANDROID (const EGL
 #define EGL_FRONT_BUFFER_AUTO_REFRESH_ANDROID 0x314C
 #endif /* EGL_ANDROID_front_buffer_auto_refresh */

+#ifndef EGL_ANDROID_get_frame_timestamps
+#define EGL_ANDROID_get_frame_timestamps 1
+typedef khronos_stime_nanoseconds_t EGLnsecsANDROID;
+#define EGL_TIMESTAMP_PENDING_ANDROID     EGL_CAST(EGLnsecsANDROID,-2)
+#define EGL_TIMESTAMP_INVALID_ANDROID     EGL_CAST(EGLnsecsANDROID,-1)
+#define EGL_TIMESTAMPS_ANDROID            0x3430
+#define EGL_COMPOSITE_DEADLINE_ANDROID    0x3431
+#define EGL_COMPOSITE_INTERVAL_ANDROID    0x3432
+#define EGL_COMPOSITE_TO_PRESENT_LATENCY_ANDROID 0x3433
+#define EGL_REQUESTED_PRESENT_TIME_ANDROID 0x3434
+#define EGL_RENDERING_COMPLETE_TIME_ANDROID 0x3435
+#define EGL_COMPOSITION_LATCH_TIME_ANDROID 0x3436
+#define EGL_FIRST_COMPOSITION_START_TIME_ANDROID 0x3437
+#define EGL_LAST_COMPOSITION_START_TIME_ANDROID 0x3438
+#define EGL_FIRST_COMPOSITION_GPU_FINISHED_TIME_ANDROID 0x3439
+#define EGL_DISPLAY_PRESENT_TIME_ANDROID  0x343A
+#define EGL_DEQUEUE_READY_TIME_ANDROID    0x343B
+#define EGL_READS_DONE_TIME_ANDROID       0x343C
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCOMPOSITORTIMINGSUPPORTEDANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLint name);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETCOMPOSITORTIMINGANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLint numTimestamps,  const EGLint *names, EGLnsecsANDROID *values);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETNEXTFRAMEIDANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR *frameId);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETFRAMETIMESTAMPSUPPORTEDANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLint timestamp);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLGETFRAMETIMESTAMPSANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR frameId, EGLint numTimestamps,  const EGLint *timestamps, EGLnsecsANDROID *values);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglGetCompositorTimingSupportedANDROID (EGLDisplay dpy, EGLSurface surface, EGLint name);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetCompositorTimingANDROID (EGLDisplay dpy, EGLSurface surface, EGLint numTimestamps,  const EGLint *names, EGLnsecsANDROID *values);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetNextFrameIdANDROID (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR *frameId);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetFrameTimestampSupportedANDROID (EGLDisplay dpy, EGLSurface surface, EGLint timestamp);
+EGLAPI EGLBoolean EGLAPIENTRY eglGetFrameTimestampsANDROID (EGLDisplay dpy, EGLSurface surface, EGLuint64KHR frameId, EGLint numTimestamps,  const EGLint *timestamps, EGLnsecsANDROID *values);
+#endif
+#endif /* EGL_ANDROID_get_frame_timestamps */
+
+#ifndef EGL_ANDROID_get_native_client_buffer
+#define EGL_ANDROID_get_native_client_buffer 1
+struct AHardwareBuffer;
+typedef EGLClientBuffer (EGLAPIENTRYP PFNEGLGETNATIVECLIENTBUFFERANDROIDPROC) (const struct AHardwareBuffer *buffer);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLClientBuffer EGLAPIENTRY eglGetNativeClientBufferANDROID (const struct AHardwareBuffer *buffer);
+#endif
+#endif /* EGL_ANDROID_get_native_client_buffer */
+
 #ifndef EGL_ANDROID_image_native_buffer
 #define EGL_ANDROID_image_native_buffer 1
 #define EGL_NATIVE_BUFFER_ANDROID         0x3140
@@ -514,7 +555,6 @@ EGLAPI EGLint EGLAPIENTRY eglDupNativeFenceFDANDROID (EGLDisplay dpy, EGLSyncKHR

 #ifndef EGL_ANDROID_presentation_time
 #define EGL_ANDROID_presentation_time 1
-typedef khronos_stime_nanoseconds_t EGLnsecsANDROID;
 typedef EGLBoolean (EGLAPIENTRYP PFNEGLPRESENTATIONTIMEANDROIDPROC) (EGLDisplay dpy, EGLSurface surface, EGLnsecsANDROID time);
 #ifdef EGL_EGLEXT_PROTOTYPES
 EGLAPI EGLBoolean EGLAPIENTRY eglPresentationTimeANDROID (EGLDisplay dpy, EGLSurface surface, EGLnsecsANDROID time);
@@ -578,6 +618,16 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQuerySurfacePointerANGLE (EGLDisplay dpy, EGLSu
 #define EGL_EXT_client_extensions 1
 #endif /* EGL_EXT_client_extensions */

+#ifndef EGL_EXT_client_sync
+#define EGL_EXT_client_sync 1
+#define EGL_SYNC_CLIENT_EXT               0x3364
+#define EGL_SYNC_CLIENT_SIGNAL_EXT        0x3365
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLCLIENTSIGNALSYNCEXTPROC) (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglClientSignalSyncEXT (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#endif
+#endif /* EGL_EXT_client_sync */
+
 #ifndef EGL_EXT_compositor
 #define EGL_EXT_compositor 1
 #define EGL_PRIMARY_COMPOSITOR_CONTEXT_EXT 0x3460
@@ -723,6 +773,11 @@ EGLAPI EGLBoolean EGLAPIENTRY eglQueryDmaBufModifiersEXT (EGLDisplay dpy, EGLint
 #endif
 #endif /* EGL_EXT_image_dma_buf_import_modifiers */

+#ifndef EGL_EXT_image_gl_colorspace
+#define EGL_EXT_image_gl_colorspace 1
+#define EGL_GL_COLORSPACE_DEFAULT_EXT     0x314D
+#endif /* EGL_EXT_image_gl_colorspace */
+
 #ifndef EGL_EXT_image_implicit_sync_control
 #define EGL_EXT_image_implicit_sync_control 1
 #define EGL_IMPORT_SYNC_TYPE_EXT          0x3470
@@ -858,6 +913,14 @@ EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersWithDamageEXT (EGLDisplay dpy, EGLSu
 #endif
 #endif /* EGL_EXT_swap_buffers_with_damage */

+#ifndef EGL_EXT_sync_reuse
+#define EGL_EXT_sync_reuse 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLUNSIGNALSYNCEXTPROC) (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglUnsignalSyncEXT (EGLDisplay dpy, EGLSync sync, const EGLAttrib *attrib_list);
+#endif
+#endif /* EGL_EXT_sync_reuse */
+
 #ifndef EGL_EXT_yuv_surface
 #define EGL_EXT_yuv_surface 1
 #define EGL_YUV_ORDER_EXT                 0x3301
@@ -988,6 +1051,11 @@ EGLAPI EGLBoolean EGLAPIENTRY eglSwapBuffersRegion2NOK (EGLDisplay dpy, EGLSurfa
 #define EGL_AUTO_STEREO_NV                0x3136
 #endif /* EGL_NV_3dvision_surface */

+#ifndef EGL_NV_context_priority_realtime
+#define EGL_NV_context_priority_realtime 1
+#define EGL_CONTEXT_PRIORITY_REALTIME_NV  0x3357
+#endif /* EGL_NV_context_priority_realtime */
+
 #ifndef EGL_NV_coverage_sample
 #define EGL_NV_coverage_sample 1
 #define EGL_COVERAGE_BUFFERS_NV           0x30E0
@@ -1055,9 +1123,9 @@ EGLAPI EGLBoolean EGLAPIENTRY eglPostSubBufferNV (EGLDisplay dpy, EGLSurface sur
 #define EGL_YUV_PLANE0_TEXTURE_UNIT_NV    0x332C
 #define EGL_YUV_PLANE1_TEXTURE_UNIT_NV    0x332D
 #define EGL_YUV_PLANE2_TEXTURE_UNIT_NV    0x332E
-typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALATTRIBSNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, EGLAttrib *attrib_list);
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMCONSUMERGLTEXTUREEXTERNALATTRIBSNVPROC) (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
 #ifdef EGL_EGLEXT_PROTOTYPES
-EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalAttribsNV (EGLDisplay dpy, EGLStreamKHR stream, EGLAttrib *attrib_list);
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalAttribsNV (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
 #endif
 #endif /* EGL_NV_stream_consumer_gltexture_yuv */

@@ -1097,6 +1165,14 @@ EGLAPI EGLBoolean EGLAPIENTRY eglStreamConsumerGLTextureExternalAttribsNV (EGLDi
 #define EGL_STREAM_FIFO_SYNCHRONOUS_NV    0x3336
 #endif /* EGL_NV_stream_fifo_synchronous */

+#ifndef EGL_NV_stream_flush
+#define EGL_NV_stream_flush 1
+typedef EGLBoolean (EGLAPIENTRYP PFNEGLSTREAMFLUSHNVPROC) (EGLDisplay dpy, EGLStreamKHR stream);
+#ifdef EGL_EGLEXT_PROTOTYPES
+EGLAPI EGLBoolean EGLAPIENTRY eglStreamFlushNV (EGLDisplay dpy, EGLStreamKHR stream);
+#endif
+#endif /* EGL_NV_stream_flush */
+
 #ifndef EGL_NV_stream_frame_limits
 #define EGL_NV_stream_frame_limits 1
 #define EGL_PRODUCER_MAX_FRAME_HINT_NV    0x3337
--- a/include/GL/gl.h
+++ b/include/GL/gl.h
@@ -2086,7 +2086,7 @@ typedef void (APIENTRYP PFNGLBLENDEQUATIONSEPARATEATIPROC) (GLenum modeRGB, GLen


 /* GL_OES_EGL_image */
-#ifndef GL_OES_EGL_image
+#if !defined(GL_OES_EGL_image) && !defined(GL_EXT_EGL_image_storage)
 typedef void* GLeglImageOES;
 #endif

--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -589,7 +589,7 @@ struct __DRIdamageExtensionRec {
 * SWRast Loader extension.
 */
 #define __DRI_SWRAST_LOADER "DRI_SWRastLoader"
-#define __DRI_SWRAST_LOADER_VERSION 4
+#define __DRI_SWRAST_LOADER_VERSION 5
 struct __DRIswrastLoaderExtensionRec {
    __DRIextension base;

@@ -649,6 +649,23 @@ struct __DRIswrastLoaderExtensionRec {
    void (*getImageShm)(__DRIdrawable *readable,
                        int x, int y, int width, int height,
                        int shmid, void *loaderPrivate);
+
+   /**
+     * Put shm image to drawable (v2)
+     *
+     * The original version fixes srcx/y to 0, and expected
+     * the offset to be adjusted. This version allows src x,y
+     * to not be included in the offset. This is needed to
+     * avoid certain overflow checks in the X server, that
+     * result in lost rendering.
+     *
+     * \since 5
+     */
+    void (*putImageShm2)(__DRIdrawable *drawable, int op,
+                         int x, int y,
+                         int width, int height, int stride,
+                         int shmid, char *shmaddr, unsigned offset,
+                         void *loaderPrivate);
 };

 /**
@@ -746,7 +763,8 @@ struct __DRIuseInvalidateExtensionRec {
 #define __DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS	46
 #define __DRI_ATTRIB_YINVERTED			47
 #define __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE	48
-#define __DRI_ATTRIB_MAX			(__DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE + 1)
+#define __DRI_ATTRIB_MUTABLE_RENDER_BUFFER	49 /* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR */
+#define __DRI_ATTRIB_MAX			50

 /* __DRI_ATTRIB_RENDER_TYPE */
 #define __DRI_ATTRIB_RGBA_BIT			0x01	
@@ -1888,9 +1906,57 @@ struct __DRI2rendererQueryExtensionRec {
 * Image Loader extension. Drivers use this to allocate color buffers
 */

+/**
+ * See __DRIimageLoaderExtensionRec::getBuffers::buffer_mask.
+ */
 enum __DRIimageBufferMask {
   __DRI_IMAGE_BUFFER_BACK = (1 << 0),
-   __DRI_IMAGE_BUFFER_FRONT = (1 << 1)
+   __DRI_IMAGE_BUFFER_FRONT = (1 << 1),
+
+   /**
+    * A buffer shared between application and compositor. The buffer may be
+    * simultaneously accessed by each.
+    *
+    * A shared buffer is equivalent to an EGLSurface whose EGLConfig contains
+    * EGL_MUTABLE_RENDER_BUFFER_BIT_KHR and whose active EGL_RENDER_BUFFER (as
+    * opposed to any pending, requested change to EGL_RENDER_BUFFER) is
+    * EGL_SINGLE_BUFFER.
+    *
+    * If buffer_mask contains __DRI_IMAGE_BUFFER_SHARED, then must contains no
+    * other bits. As a corollary, a __DRIdrawable that has a "shared" buffer
+    * has no front nor back buffer.
+    *
+    * The loader returns __DRI_IMAGE_BUFFER_SHARED in buffer_mask if and only
+    * if:
+    *     - The loader supports __DRI_MUTABLE_RENDER_BUFFER_LOADER.
+    *     - The driver supports __DRI_MUTABLE_RENDER_BUFFER_DRIVER.
+    *     - The EGLConfig of the drawable EGLSurface contains
+    *       EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
+    *     - The EGLContext's EGL_RENDER_BUFFER is EGL_SINGLE_BUFFER.
+    *       Equivalently, the EGLSurface's active EGL_RENDER_BUFFER (as
+    *       opposed to any pending, requested change to EGL_RENDER_BUFFER) is
+    *       EGL_SINGLE_BUFFER. (See the EGL 1.5 and
+    *       EGL_KHR_mutable_render_buffer spec for details about "pending" vs
+    *       "active" EGL_RENDER_BUFFER state).
+    *
+    * A shared buffer is similar to a front buffer in that all rendering to the
+    * buffer should appear promptly on the screen. It is different from
+    * a front buffer in that its behavior is independent from the
+    * GL_DRAW_BUFFER state. Specifically, if GL_DRAW_FRAMEBUFFER is 0 and the
+    * __DRIdrawable's buffer_mask is __DRI_IMAGE_BUFFER_SHARED, then all
+    * rendering should appear promptly on the screen if GL_DRAW_BUFFER is not
+    * GL_NONE.
+    *
+    * The difference between a shared buffer and a front buffer is motivated
+    * by the constraints of Android and OpenGL ES. OpenGL ES does not support
+    * front-buffer rendering. Android's SurfaceFlinger protocol provides the
+    * EGL driver only a back buffer and no front buffer. The shared buffer
+    * mode introduced by EGL_KHR_mutable_render_buffer is a backdoor though
+    * EGL that allows Android OpenGL ES applications to render to what is
+    * effectively the front buffer, a backdoor that required no change to the
+    * OpenGL ES API and little change to the SurfaceFlinger API.
+    */
+   __DRI_IMAGE_BUFFER_SHARED = (1 << 2),
 };

 struct __DRIimageList {
@@ -1915,7 +1981,8 @@ struct __DRIimageLoaderExtensionRec {
    * \param stamp              Address of variable to be updated when
    *                           getBuffers must be called again
    * \param loaderPrivate      The loaderPrivate for driDrawable
-    * \param buffer_mask        Set of buffers to allocate
+    * \param buffer_mask        Set of buffers to allocate. A bitmask of
+    *                           __DRIimageBufferMask.
    * \param buffers            Returned buffers
    */
   int (*getBuffers)(__DRIdrawable *driDrawable,
@@ -2029,4 +2096,85 @@ struct __DRIbackgroundCallableExtensionRec {
   GLboolean (*isThreadSafe)(void *loaderPrivate);
 };

+/**
+ * The driver portion of EGL_KHR_mutable_render_buffer.
+ *
+ * If the driver creates a __DRIconfig with
+ * __DRI_ATTRIB_MUTABLE_RENDER_BUFFER, then it must support this extension.
+ *
+ * To support this extension:
+ *
+ *    - The driver should create at least one __DRIconfig with
+ *      __DRI_ATTRIB_MUTABLE_RENDER_BUFFER. This is strongly recommended but
+ *      not required.
+ *
+ *    - The driver must be able to handle __DRI_IMAGE_BUFFER_SHARED if
+ *      returned by __DRIimageLoaderExtension:getBuffers().
+ *
+ *    - When rendering to __DRI_IMAGE_BUFFER_SHARED, it must call
+ *      __DRImutableRenderBufferLoaderExtension::displaySharedBuffer() in
+ *      response to glFlush and glFinish.  (This requirement is not documented
+ *      in EGL_KHR_mutable_render_buffer, but is a de-facto requirement in the
+ *      Android ecosystem. Android applications expect that glFlush will
+ *      immediately display the buffer when in shared buffer mode, and Android
+ *      drivers comply with this expectation).  It :may: call
+ *      displaySharedBuffer() more often than required.
+ *
+ *    - When rendering to __DRI_IMAGE_BUFFER_SHARED, it must ensure that the
+ *      buffer is always in a format compatible for display because the
+ *      display engine (usually SurfaceFlinger or hwcomposer) may display the
+ *      image at any time, even concurrently with 3D rendering. For example,
+ *      display hardware and the GL hardware may be able to access the buffer
+ *      simultaneously. In particular, if the buffer is compressed then take
+ *      care that SurfaceFlinger and hwcomposer can consume the compression
+ *      format.
+ *
+ * \see __DRI_IMAGE_BUFFER_SHARED
+ * \see __DRI_ATTRIB_MUTABLE_RENDER_BUFFER
+ * \see __DRI_MUTABLE_RENDER_BUFFER_LOADER
+ */
+#define __DRI_MUTABLE_RENDER_BUFFER_DRIVER "DRI_MutableRenderBufferDriver"
+#define __DRI_MUTABLE_RENDER_BUFFER_DRIVER_VERSION 1
+
+typedef struct __DRImutableRenderBufferDriverExtensionRec __DRImutableRenderBufferDriverExtension;
+struct __DRImutableRenderBufferDriverExtensionRec {
+   __DRIextension base;
+};
+
+/**
+ * The loader portion of EGL_KHR_mutable_render_buffer.
+ *
+ * Requires loader extension DRI_IMAGE_LOADER, through which the loader sends
+ * __DRI_IMAGE_BUFFER_SHARED to the driver.
+ *
+ * \see __DRI_MUTABLE_RENDER_BUFFER_DRIVER
+ */
+#define __DRI_MUTABLE_RENDER_BUFFER_LOADER "DRI_MutableRenderBufferLoader"
+#define __DRI_MUTABLE_RENDER_BUFFER_LOADER_VERSION 1
+
+typedef struct __DRImutableRenderBufferLoaderExtensionRec __DRImutableRenderBufferLoaderExtension;
+struct __DRImutableRenderBufferLoaderExtensionRec {
+   __DRIextension base;
+
+   /**
+    * Inform the display engine (that is, SurfaceFlinger and/or hwcomposer)
+    * that the __DRIdrawable has new content.
+    *
+    * The display engine may ignore this call, for example, if it continually
+    * refreshes and displays the buffer on every frame, as in
+    * EGL_ANDROID_front_buffer_auto_refresh. On the other extreme, the display
+    * engine may refresh and display the buffer only in frames in which the
+    * driver calls this.
+    *
+    * If the fence_fd is not -1, then the display engine will display the
+    * buffer only after the fence signals.
+    *
+    * The drawable's current __DRIimageBufferMask, as returned by
+    * __DRIimageLoaderExtension::getBuffers(), must be
+    * __DRI_IMAGE_BUFFER_SHARED.
+    */
+   void (*displaySharedBuffer)(__DRIdrawable *drawable, int fence_fd,
+                               void *loaderPrivate);
+};
+
 #endif
--- a/include/c11/threads_win32.h
+++ b/include/c11/threads_win32.h
@@ -76,18 +76,8 @@ Configuration macro:
 #endif

 /* Visual Studio 2015 and later */
-#if _MSC_VER >= 1900
-#define HAVE_TIMESPEC
+#ifdef _MSC_VER
 #define HAVE_TIMESPEC_GET
-#elif defined(__MINGW32__)
-#define HAVE_TIMESPEC
-#endif
-
-#ifndef HAVE_TIMESPEC
-struct timespec {
-    time_t tv_sec;
-    long tv_nsec;
-};
 #endif

 /*---------------------------- macros ----------------------------*/
--- a/include/c99_compat.h
+++ b/include/c99_compat.h
@@ -36,8 +36,8 @@
 */
 #if defined(_MSC_VER)

-#  if _MSC_VER < 1800 || (_MSC_FULL_VER < 180031101 && !defined(__clang__))
-#    error "Microsoft Visual Studio 2013 Update 4 or higher required"
+#  if _MSC_VER < 1900
+#    error "Microsoft Visual Studio 2015 or higher required"
 #  endif

   /*
--- a/include/d3dadapter/present.h
+++ b/include/d3dadapter/present.h
@@ -125,7 +125,7 @@ struct ID3DPresent
 #define ID3DPresent_SetCursorPos(p,a) (p)->lpVtbl->SetCursorPos(p,a)
 #define ID3DPresent_SetCursor(p,a,b,c) (p)->lpVtbl->SetCursor(p,a,b,c)
 #define ID3DPresent_SetGammaRamp(p,a,b) (p)->lpVtbl->SetGammaRamp(p,a,b)
-#define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowSize(p,a,b,c,d)
+#define ID3DPresent_GetWindowInfo(p,a,b,c,d) (p)->lpVtbl->GetWindowInfo(p,a,b,c,d)
 #define ID3DPresent_GetWindowOccluded(p) (p)->lpVtbl->GetWindowOccluded(p)
 #define ID3DPresent_ResolutionMismatch(p) (p)->lpVtbl->ResolutionMismatch(p)
 #define ID3DPresent_CreateThread(p,a,b) (p)->lpVtbl->CreateThread(p,a,b)
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -171,6 +171,7 @@ CHIPSET(0x3185, glk_2x6, "Intel(R) UHD Graphics 600 (Geminilake 2x6)")
 CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
 CHIPSET(0x3E93, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
 CHIPSET(0x3E99, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
+CHIPSET(0x3E9C, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
 CHIPSET(0x3E91, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
 CHIPSET(0x3E92, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
 CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
@@ -203,6 +204,10 @@ CHIPSET(0x5A54, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
 CHIPSET(0x8A50, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
 CHIPSET(0x8A51, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
 CHIPSET(0x8A52, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
+CHIPSET(0x8A56, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
+CHIPSET(0x8A57, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
+CHIPSET(0x8A58, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
+CHIPSET(0x8A59, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
 CHIPSET(0x8A5A, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
 CHIPSET(0x8A5B, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
 CHIPSET(0x8A5C, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -219,6 +219,7 @@ CHIPSET(0x699F, POLARIS12)

 CHIPSET(0x694C, VEGAM)
 CHIPSET(0x694E, VEGAM)
+CHIPSET(0x694F, VEGAM)

 CHIPSET(0x6860, VEGA10)
 CHIPSET(0x6861, VEGA10)
@@ -227,8 +228,14 @@ CHIPSET(0x6863, VEGA10)
 CHIPSET(0x6864, VEGA10)
 CHIPSET(0x6867, VEGA10)
 CHIPSET(0x6868, VEGA10)
-CHIPSET(0x687F, VEGA10)
+CHIPSET(0x6869, VEGA10)
+CHIPSET(0x686A, VEGA10)
+CHIPSET(0x686B, VEGA10)
 CHIPSET(0x686C, VEGA10)
+CHIPSET(0x686D, VEGA10)
+CHIPSET(0x686E, VEGA10)
+CHIPSET(0x686F, VEGA10)
+CHIPSET(0x687F, VEGA10)

 CHIPSET(0x69A0, VEGA12)
 CHIPSET(0x69A1, VEGA12)
@@ -240,6 +247,7 @@ CHIPSET(0x66A0, VEGA20)
 CHIPSET(0x66A1, VEGA20)
 CHIPSET(0x66A2, VEGA20)
 CHIPSET(0x66A3, VEGA20)
+CHIPSET(0x66A4, VEGA20)
 CHIPSET(0x66A7, VEGA20)
 CHIPSET(0x66AF, VEGA20)

--- a/include/vulkan/vulkan.h
+++ b/include/vulkan/vulkan.h
@@ -24,6 +24,10 @@
 #include "vulkan_android.h"
 #endif

+#ifdef VK_USE_PLATFORM_FUCHSIA
+#include <zircon/types.h>
+#include "vulkan_fuchsia.h"
+#endif

 #ifdef VK_USE_PLATFORM_IOS_MVK
 #include "vulkan_ios.h"
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
--- a/include/vulkan/vulkan_fuchsia.h
+++ b/include/vulkan/vulkan_fuchsia.h
@@ -0,0 +1,58 @@
+#ifndef VULKAN_FUCHSIA_H_
+#define VULKAN_FUCHSIA_H_ 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+** Copyright (c) 2015-2018 The Khronos Group Inc.
+**
+** Licensed under the Apache License, Version 2.0 (the "License");
+** you may not use this file except in compliance with the License.
+** You may obtain a copy of the License at
+**
+**     http://www.apache.org/licenses/LICENSE-2.0
+**
+** Unless required by applicable law or agreed to in writing, software
+** distributed under the License is distributed on an "AS IS" BASIS,
+** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+** See the License for the specific language governing permissions and
+** limitations under the License.
+*/
+
+/*
+** This header is generated from the Khronos Vulkan XML API Registry.
+**
+*/
+
+
+#define VK_FUCHSIA_imagepipe_surface 1
+#define VK_FUCHSIA_IMAGEPIPE_SURFACE_SPEC_VERSION 1
+#define VK_FUCHSIA_IMAGEPIPE_SURFACE_EXTENSION_NAME "VK_FUCHSIA_imagepipe_surface"
+
+typedef VkFlags VkImagePipeSurfaceCreateFlagsFUCHSIA;
+
+typedef struct VkImagePipeSurfaceCreateInfoFUCHSIA {
+    VkStructureType                         sType;
+    const void*                             pNext;
+    VkImagePipeSurfaceCreateFlagsFUCHSIA    flags;
+    zx_handle_t                             imagePipeHandle;
+} VkImagePipeSurfaceCreateInfoFUCHSIA;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkCreateImagePipeSurfaceFUCHSIA)(VkInstance instance, const VkImagePipeSurfaceCreateInfoFUCHSIA* pCreateInfo, const VkAllocationCallbacks* pAllocator, VkSurfaceKHR* pSurface);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkCreateImagePipeSurfaceFUCHSIA(
+    VkInstance                                  instance,
+    const VkImagePipeSurfaceCreateInfoFUCHSIA*  pCreateInfo,
+    const VkAllocationCallbacks*                pAllocator,
+    VkSurfaceKHR*                               pSurface);
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/m4/ax_check_python_mako_module.m4
+++ b/m4/ax_check_python_mako_module.m4
@@ -45,13 +45,14 @@ AC_DEFUN([AX_CHECK_PYTHON_MAKO_MODULE],
 try:
    import sys
    import mako
+    import distutils.version
 except ImportError as err:
    sys.exit(err)
 else:
-    ver_req = map(int, '$1'.split('.'))
-    ver_act = map(int, mako.__version__.split('.'))
+    ver_req = distutils.version.LooseVersion('$1')
+    ver_act = distutils.version.LooseVersion(mako.__version__)
    sys.exit(int(ver_req > ver_act))
-    " | $PYTHON2 -
+    " | $PYTHON -

    if test $? -ne 0 ; then
       AC_MSG_RESULT(no)
--- a/meson.build
+++ b/meson.build
@@ -1,4 +1,4 @@
-# Copyright © 2017-2018 Intel Corporation
+# Copyright © 2017-2019 Intel Corporation

 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -25,8 +25,8 @@ project(
    [find_program('python', 'python2', 'python3'), 'bin/meson_get_version.py']
  ).stdout(),
  license : 'MIT',
-  meson_version : '>= 0.44.1',
-  default_options : ['buildtype=debugoptimized', 'c_std=c99', 'cpp_std=c++11']
+  meson_version : '>= 0.45',
+  default_options : ['buildtype=debugoptimized', 'b_ndebug=if-release', 'c_std=c99', 'cpp_std=c++11']
 )

 cc = meson.get_compiler('c')
@@ -54,6 +54,7 @@ with_valgrind = get_option('valgrind')
 with_libunwind = get_option('libunwind')
 with_asm = get_option('asm')
 with_glx_read_only_text = get_option('glx-read-only-text')
+with_glx_direct = get_option('glx-direct')
 with_osmesa = get_option('osmesa')
 with_swr_arches = get_option('swr-arches')
 with_tools = get_option('tools')
@@ -63,11 +64,11 @@ endif

 dri_drivers_path = get_option('dri-drivers-path')
 if dri_drivers_path == ''
-  dri_drivers_path = join_paths(get_option('libdir'), 'dri')
+  dri_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'dri')
 endif
 dri_search_path = get_option('dri-search-path')
 if dri_search_path == ''
-  dri_search_path = join_paths(get_option('prefix'), dri_drivers_path)
+  dri_search_path = dri_drivers_path
 endif

 with_gles1 = get_option('gles1')
@@ -102,13 +103,15 @@ if _drivers.contains('auto')
    elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
      _drivers = []
    else
-      error('Unknown architecture. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.')
+      error('Unknown architecture @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
+            host_machine.cpu_family()))
    endif
  elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
    # only swrast would make sense here, but gallium swrast is a much better default
    _drivers = []
  else
-    error('Unknown OS. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.')
+    error('Unknown OS @0@. Please pass -Ddri-drivers to set driver options. Patches gladly accepted to fix this.'.format(
+          host_machine.system()))
  endif
 endif

@@ -135,12 +138,14 @@ if _drivers.contains('auto')
        'tegra', 'virgl', 'swrast',
      ]
    else
-      error('Unknown architecture. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.')
+      error('Unknown architecture @0@. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.'.format(
+            host_machine.cpu_family()))
    endif
  elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
    _drivers = ['swrast']
  else
-    error('Unknown OS. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.')
+    error('Unknown OS @0@. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.'.format(
+          host_machine.system()))
  endif
 endif
 with_gallium_pl111 = _drivers.contains('pl111')
@@ -160,6 +165,14 @@ with_gallium_svga = _drivers.contains('svga')
 with_gallium_virgl = _drivers.contains('virgl')
 with_gallium_swr = _drivers.contains('swr')

+if cc.get_id() == 'intel'
+  if meson.version().version_compare('< 0.49.0')
+    error('Meson does not have sufficient support of ICC before 0.49.0 to compile mesa')
+  elif with_gallium_swr and meson.version().version_compare('== 0.49.0')
+    warning('Meson as of 0.49.0 is sufficient for compiling mesa with ICC, but there are some caveats with SWR. 0.49.1 should resolve all of these')
+  endif
+endif
+
 with_gallium = _drivers.length() != 0 and _drivers != ['']

 if with_gallium and system_has_kms_drm
@@ -175,14 +188,18 @@ if _vulkan_drivers.contains('auto')
  if system_has_kms_drm
    if host_machine.cpu_family().startswith('x86')
      _vulkan_drivers = ['amd', 'intel']
+    elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
+      _vulkan_drivers = []
    else
-      error('Unknown architecture. Please pass -Dvulkan-drivers to set driver options. Patches gladly accepted to fix this.')
+      error('Unknown architecture @0@. Please pass -Dvulkan-drivers to set driver options. Patches gladly accepted to fix this.'.format(
+            host_machine.cpu_family()))
    endif
  elif ['darwin', 'windows', 'cygwin', 'haiku'].contains(host_machine.system())
    # No vulkan driver supports windows or macOS currently
    _vulkan_drivers = []
  else
-    error('Unknown OS. Please pass -Dvulkan-drivers to set driver options. Patches gladly accepted to fix this.')
+    error('Unknown OS @0@. Please pass -Dvulkan-drivers to set driver options. Patches gladly accepted to fix this.'.format(
+          host_machine.system()))
  endif
 endif

@@ -215,8 +232,6 @@ elif system_has_kms_drm
 else
  # FIXME: haiku doesn't use dri, and xlib doesn't use dri, probably should
  # assert here that one of those cases has been met.
-  # FIXME: GNU (hurd) ends up here as well, but meson doesn't officially
-  # support Hurd at time of writing (2017/11)
  # FIXME: illumos ends up here as well
  with_dri_platform = 'none'
 endif
@@ -230,7 +245,8 @@ if _platforms.contains('auto')
  elif ['haiku'].contains(host_machine.system())
    _platforms = ['haiku']
  else
-    error('Unknown OS. Please pass -Dplatforms to set platforms. Patches gladly accepted to fix this.')
+    error('Unknown OS @0@. Please pass -Dplatforms to set platforms. Patches gladly accepted to fix this.'.format(
+          host_machine.system()))
  endif
 endif

@@ -307,7 +323,7 @@ elif _egl == 'true'
  elif not with_shared_glapi
    error('EGL requires shared-glapi')
  elif not with_platforms
-    error('No platforms specified, consider -Dplatforms=drm,x11 at least')
+    error('No platforms specified, consider -Dplatforms=drm,x11,surfaceless at least')
  elif not ['disabled', 'dri'].contains(with_glx)
    error('EGL requires dri, but a GLX is being built without dri')
  elif ['darwin', 'windows'].contains(host_machine.system())
@@ -361,9 +377,6 @@ if with_glvnd
  endif
 endif

-# TODO: toggle for this
-with_glx_direct = true
-
 if with_vulkan_icd_dir == ''
  with_vulkan_icd_dir = join_paths(get_option('datadir'), 'vulkan/icd.d')
 endif
@@ -379,9 +392,9 @@ endif
 if with_any_vk and (with_platform_x11 and not with_dri3)
  error('Vulkan drivers require dri3 for X11 support')
 endif
-if with_dri or with_gallium
-  if with_glx == 'disabled' and not with_egl and not with_platform_haiku
-    error('building dri or gallium drivers require at least one window system')
+if with_dri
+  if with_glx == 'disabled' and not with_egl and not with_gbm and with_osmesa != 'classic'
+    error('building dri drivers require at least one windowing system or classic osmesa')
  endif
 endif

@@ -602,7 +615,7 @@ with_gallium_xa = _xa != 'false'

 d3d_drivers_path = get_option('d3d-drivers-path')
 if d3d_drivers_path == ''
-  d3d_drivers_path = join_paths(get_option('libdir'), 'd3d')
+  d3d_drivers_path = join_paths(get_option('prefix'), get_option('libdir'), 'd3d')
 endif

 with_gallium_st_nine =  get_option('gallium-nine')
@@ -611,7 +624,7 @@ if with_gallium_st_nine
    error('The nine state tracker requires gallium softpipe/llvmpipe.')
  elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600
            or with_gallium_r300 or with_gallium_svga or with_gallium_i915)
-    error('The nine state tracker requires at least on non-swrast gallium driver.')
+    error('The nine state tracker requires at least one non-swrast gallium driver.')
  endif
  if not with_dri3
    error('Using nine with wine requires dri3')
@@ -619,7 +632,12 @@ if with_gallium_st_nine
 endif

 if get_option('power8') != 'false'
-  if host_machine.cpu_family() == 'ppc64le'
+  # on old versions of meson the cpu family would return as ppc64le on little
+  # endian power8, this was changed in 0.48 such that the family would always
+  # be ppc64 regardless of endianness, and the the machine.endian() value
+  # should be checked. Since we support versions < 0.48 we need to use
+  # startswith.
+  if host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little'
    if cc.get_id() == 'gcc' and cc.version().version_compare('< 4.8')
      error('Altivec is not supported with gcc version < 4.8.')
    endif
@@ -641,6 +659,7 @@ if get_option('power8') != 'false'
 endif

 _opencl = get_option('gallium-opencl')
+clover_cpp_std = []
 if _opencl != 'disabled'
  if not with_gallium
    error('OpenCL Clover implementation requires at least one gallium driver.')
@@ -649,10 +668,18 @@ if _opencl != 'disabled'
  dep_clc = dependency('libclc')
  with_gallium_opencl = true
  with_opencl_icd = _opencl == 'icd'
+
+  if host_machine.cpu_family().startswith('ppc') and cpp.compiles('''
+      #if !defined(__VEC__) || !defined(__ALTIVEC__)
+      #error "AltiVec not enabled"
+      #endif''',
+      name : 'Altivec')
+    clover_cpp_std += ['cpp_std=gnu++11']
+  endif
 else
  dep_clc = null_dep
  with_gallium_opencl = false
-  with_gallium_icd = false
+  with_opencl_icd = false
 endif

 gl_pkgconfig_c_flags = []
@@ -700,10 +727,16 @@ if with_platform_haiku
  pre_args += '-DHAVE_HAIKU_PLATFORM'
 endif

-prog_python2 = find_program('python2')
-has_mako = run_command(prog_python2, '-c', 'import mako')
+prog_python = import('python3').find_python()
+has_mako = run_command(
+  prog_python, '-c',
+  '''
+from distutils.version import StrictVersion
+import mako
+assert StrictVersion(mako.__version__) > StrictVersion("0.8.0")
+  ''')
 if has_mako.returncode() != 0
-  error('Python (2.x) mako module required to build mesa.')
+  error('Python (3.x) mako module >= 0.8.0 required to build mesa.')
 endif

 if cc.get_id() == 'gcc' and cc.version().version_compare('< 4.4.6')
@@ -766,22 +799,26 @@ if cc.compiles('int foo(void) __attribute__((__noreturn__));',
 endif

 # TODO: this is very incomplete
-if ['linux', 'cygwin'].contains(host_machine.system())
+if ['linux', 'cygwin', 'gnu'].contains(host_machine.system())
  pre_args += '-D_GNU_SOURCE'
 endif

 # Check for generic C arguments
 c_args = []
 foreach a : ['-Werror=implicit-function-declaration',
-             '-Werror=missing-prototypes', '-fno-math-errno',
+             '-Werror=missing-prototypes', '-Werror=return-type',
+             '-fno-math-errno',
             '-fno-trapping-math', '-Qunused-arguments']
  if cc.has_argument(a)
    c_args += a
  endif
 endforeach
-if cc.has_argument('-Wmissing-field-initializers')
-  c_args += '-Wno-missing-field-initializers'
-endif
+
+foreach a : ['missing-field-initializers', 'format-truncation']
+  if cc.has_argument('-W' + a)
+    c_args += '-Wno-' + a
+  endif
+endforeach

 c_vis_args = []
 if cc.has_argument('-fvisibility=hidden')
@@ -790,7 +827,8 @@ endif

 # Check for generic C++ arguments
 cpp_args = []
-foreach a : ['-fno-math-errno', '-fno-trapping-math',
+foreach a : ['-Werror=return-type',
+             '-fno-math-errno', '-fno-trapping-math',
             '-Qunused-arguments']
  if cpp.has_argument(a)
    cpp_args += a
@@ -800,7 +838,7 @@ endforeach
 # For some reason, the test for -Wno-foo always succeeds with gcc, even if the
 # option is not supported. Hence, check for -Wfoo instead.

-foreach a : ['non-virtual-dtor', 'missing-field-initializers']
+foreach a : ['non-virtual-dtor', 'missing-field-initializers', 'format-truncation']
  if cpp.has_argument('-W' + a)
    cpp_args += '-Wno-' + a
  endif
@@ -920,7 +958,7 @@ endif
 with_asm_arch = ''
 if with_asm
  if host_machine.cpu_family() == 'x86'
-    if system_has_kms_drm
+    if system_has_kms_drm or host_machine.system() == 'gnu'
      with_asm_arch = 'x86'
      pre_args += ['-DUSE_X86_ASM', '-DUSE_MMX_ASM', '-DUSE_3DNOW_ASM',
                   '-DUSE_SSE_ASM']
@@ -949,7 +987,7 @@ if with_asm
      with_asm_arch = 'sparc'
      pre_args += ['-DUSE_SPARC_ASM']
    endif
-  elif host_machine.cpu_family() == 'ppc64le'
+  elif host_machine.cpu_family().startswith('ppc64') and host_machine.endian() == 'little'
    if system_has_kms_drm
      with_asm_arch = 'ppc64le'
      pre_args += ['-DUSE_PPC64LE_ASM']
@@ -1059,6 +1097,13 @@ pre_args += '-DHAVE_ZLIB'
 dep_thread = dependency('threads')
 if dep_thread.found() and host_machine.system() != 'windows'
  pre_args += '-DHAVE_PTHREAD'
+  if cc.has_function(
+      'pthread_setaffinity_np',
+      dependencies : dep_thread,
+      prefix : '#include <pthread.h>',
+      args : '-D_GNU_SOURCE')
+    pre_args += '-DHAVE_PTHREAD_SETAFFINITY'
+  endif
 endif
 dep_expat = dependency('expat')
 # this only exists on linux so either this is linux and it will be found, or
@@ -1073,14 +1118,12 @@ dep_libdrm_amdgpu = null_dep
 dep_libdrm_radeon = null_dep
 dep_libdrm_nouveau = null_dep
 dep_libdrm_etnaviv = null_dep
-dep_libdrm_freedreno = null_dep
 dep_libdrm_intel = null_dep

-_drm_amdgpu_ver = '2.4.91'
+_drm_amdgpu_ver = '2.4.95'
 _drm_radeon_ver = '2.4.71'
 _drm_nouveau_ver = '2.4.66'
 _drm_etnaviv_ver = '2.4.89'
-_drm_freedreno_ver = '2.4.92'
 _drm_intel_ver = '2.4.75'
 _drm_ver = '2.4.75'

@@ -1091,7 +1134,6 @@ _libdrm_checks = [
              with_gallium_r300 or with_gallium_r600)],
  ['nouveau', (with_gallium_nouveau or with_dri_nouveau)],
  ['etnaviv', with_gallium_etnaviv],
-  ['freedreno', with_gallium_freedreno],
 ]

 # VC4 only needs core libdrm support of this version, not a libdrm_vc4
@@ -1149,45 +1191,40 @@ if with_gallium_opencl
    'all-targets', 'linker', 'coverage', 'instrumentation', 'ipo', 'irreader',
    'lto', 'option', 'objcarcopts', 'profiledata',
  ]
-  llvm_optional_modules += ['coroutines', 'opencl']
+  llvm_optional_modules += ['coroutines']
 endif

-if with_amd_vk or with_gallium_radeonsi or with_gallium_swr
-  _llvm_version = '>= 5.0.0'
+if with_amd_vk or with_gallium_radeonsi
+  _llvm_version = '>= 6.0.0'
+elif with_gallium_swr
+  _llvm_version = '>= 6.0.0'
 elif with_gallium_opencl or with_gallium_r600
  _llvm_version = '>= 3.9.0'
 else
  _llvm_version = '>= 3.3.0'
 endif

+_shared_llvm = get_option('shared-llvm')
+
 _llvm = get_option('llvm')
-if _llvm == 'auto'
+dep_llvm = null_dep
+with_llvm = false
+if _llvm != 'false'
  dep_llvm = dependency(
    'llvm',
    version : _llvm_version,
    modules : llvm_modules,
    optional_modules : llvm_optional_modules,
-    required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or with_gallium_opencl,
+    required : (
+      with_amd_vk or with_gallium_radeonsi or with_gallium_swr or
+      with_gallium_opencl or _llvm == 'true'
+    ),
+    static : not _shared_llvm,
  )
  with_llvm = dep_llvm.found()
-elif _llvm == 'true'
-  dep_llvm = dependency(
-    'llvm',
-    version : _llvm_version,
-    modules : llvm_modules,
-    optional_modules : llvm_optional_modules,
-  )
-  with_llvm = true
-else
-  dep_llvm = null_dep
-  with_llvm = false
 endif
 if with_llvm
  _llvm_version = dep_llvm.version().split('.')
-  # Development versions of LLVM have an 'svn' or 'git' suffix, we don't want
-  # that for our version checks.
-  # svn suffixes are stripped by meson as of 0.43, and git suffixes are
-  # strippped as of 0.44, but we support older meson versions.

  # 3 digits versions in LLVM only started from 3.4.1 on
  if dep_llvm.version().version_compare('>= 3.4.1')
@@ -1196,11 +1233,6 @@ if with_llvm
    _llvm_patch = '0'
  endif

-  if _llvm_patch.endswith('svn')
-    _llvm_patch = _llvm_patch.split('s')[0]
-  elif _llvm_patch.contains('git')
-    _llvm_patch = _llvm_patch.split('g')[0]
-  endif
  pre_args += [
    '-DHAVE_LLVM=0x0@0@0@1@'.format(_llvm_version[0], _llvm_version[1]),
    '-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
@@ -1210,6 +1242,9 @@ if with_llvm
  # programs, so we need to build all C++ code in mesa without rtti as well to
  # ensure that linking works.
  if dep_llvm.get_configtool_variable('has-rtti') == 'NO'
+    if with_gallium_nouveau
+      error('The Nouveau driver requires rtti. You either need to turn off nouveau or use an LLVM built with LLVM_ENABLE_RTTI.')
+    endif
    cpp_args += '-fno-rtti'
  endif
 elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
@@ -1284,7 +1319,13 @@ endif
 # TODO: symbol mangling

 if with_platform_wayland
-  prog_wl_scanner = find_program('wayland-scanner')
+  dep_wl_scanner = dependency('wayland-scanner', native: true)
+  prog_wl_scanner = find_program(dep_wl_scanner.get_pkgconfig_variable('wayland_scanner'))
+  if dep_wl_scanner.version().version_compare('>= 1.15')
+    wl_scanner_arg = 'private-code'
+  else
+    wl_scanner_arg = 'code'
+  endif
  dep_wl_protocols = dependency('wayland-protocols', version : '>= 1.8')
  dep_wayland_client = dependency('wayland-client', version : '>=1.11')
  dep_wayland_server = dependency('wayland-server', version : '>=1.11')
@@ -1330,7 +1371,6 @@ if with_platform_x11
    dep_xdamage = dependency('xdamage', version : '>= 1.1')
    dep_xfixes = dependency('xfixes')
    dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
-    dep_xxf86vm = dependency('xxf86vm')
  endif
  if (with_any_vk or with_glx == 'dri' or
       (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or
@@ -1357,6 +1397,7 @@ if with_platform_x11
  if with_glx == 'dri'
    if with_dri_platform == 'drm'
      dep_dri2proto = dependency('dri2proto', version : '>= 2.8')
+      dep_xxf86vm = dependency('xxf86vm')
    endif
    dep_glproto = dependency('glproto', version : '>= 1.4.14')
  endif
@@ -1366,7 +1407,7 @@ if with_platform_x11
    dep_xcb_xfixes = dependency('xcb-xfixes')
  endif
  if with_xlib_lease
-    dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12')
+    dep_xcb_xrandr = dependency('xcb-randr')
    dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3')
  endif
 endif
@@ -1377,7 +1418,7 @@ endif

 _sensors = get_option('lmsensors')
 if _sensors != 'false'
-  dep_lmsensors = cc.find_library('libsensors', required : _sensors == 'true')
+  dep_lmsensors = cc.find_library('sensors', required : _sensors == 'true')
  if dep_lmsensors.found()
    pre_args += '-DHAVE_LIBSENSORS=1'
  endif
@@ -1407,8 +1448,8 @@ elif with_glx == 'dri'
    'xcb-glx >= 1.8.1']
  if with_dri_platform == 'drm'
    gl_priv_reqs += 'xcb-dri2 >= 1.8'
+    gl_priv_reqs += 'xxf86vm'
  endif
-  gl_priv_reqs += 'xxf86vm'
 endif
 if dep_libdrm.found()
  gl_priv_reqs += 'libdrm >= 2.4.75'
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -25,7 +25,7 @@ option(
  choices : [
    '', 'auto', 'x11', 'wayland', 'drm', 'surfaceless', 'haiku', 'android',
  ],
-  description : 'comma separated list of window systems to support. If this is set to auto all platforms applicable to the OS will be enabled.'
+  description : 'window systems to support. If this is set to `auto`, all platforms applicable will be enabled.'
 )
 option(
  'dri3',
@@ -237,6 +237,12 @@ option(
  choices : ['auto', 'true', 'false'],
  description : 'Build with LLVM support.'
 )
+option(
+  'shared-llvm',
+  type : 'boolean',
+  value : true,
+  description : 'Whether to link llvm shared or statically.'
+)
 option(
  'valgrind',
  type : 'combo',
@@ -295,8 +301,8 @@ option(
  'tools',
  type : 'array',
  value : [],
-  choices : ['freedreno', 'glsl', 'intel', 'nir', 'nouveau', 'xvmc', 'all'],
-  description : 'List of tools to build.',
+  choices : ['freedreno', 'glsl', 'intel', 'intel-ui', 'nir', 'nouveau', 'xvmc', 'all'],
+  description : 'List of tools to build. (Note: `intel-ui` selects `intel`)',
 )
 option(
  'power8',
@@ -312,3 +318,9 @@ option(
  choices : ['auto', 'true', 'false'],
  description : 'Enable VK_EXT_acquire_xlib_display.'
 )
+option(
+  'glx-direct',
+  type : 'boolean',
+  value : true,
+  description : 'Enable direct rendering in GLX and EGL for DRI',
+)
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -48,7 +48,12 @@ import source_list
 # a path directly. We want to support both, so we need to detect the SCons version,
 # for which no API is provided by SCons 8-P

-scons_version = tuple(map(int, SCons.__version__.split('.')))
+# Scons version string has consistently been in this format:
+# MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
+# so this formula should cover all versions regardless of type
+# stable, alpha or beta.
+# For simplicity alpha and beta flags are removed.
+scons_version = tuple(map(int, SCons.__version__.split('.')[:3]))

 def quietCommandLines(env):
    # Quiet command lines
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -29,6 +29,7 @@ Frontend-tool for Gallium3D architecture.
 # SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #

+from __future__ import print_function

 import distutils.version
 import os
@@ -221,10 +222,6 @@ def generate(env):
    env['suncc'] = env['platform'] == 'sunos' and os.path.basename(env['CC']) == 'cc'
    env['icc'] = 'icc' == os.path.basename(env['CC'])

-    if env['msvc'] and env['toolchain'] == 'default' and env['machine'] == 'x86_64':
-        # MSVC x64 support is broken in earlier versions of scons
-        env.EnsurePythonVersion(2, 0)
-
    # shortcuts
    machine = env['machine']
    platform = env['platform']
@@ -311,7 +308,20 @@ def generate(env):
    if env.GetOption('num_jobs') <= 1:
        env.SetOption('num_jobs', num_jobs())

-    env.Decider('MD5-timestamp')
+    # Speed up dependency checking.  See
+    # - https://github.com/SCons/scons/wiki/GoFastButton
+    # - https://bugs.freedesktop.org/show_bug.cgi?id=109443
+
+    # Scons version string has consistently been in this format:
+    # MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
+    # so this formula should cover all versions regardless of type
+    # stable, alpha or beta.
+    # For simplicity alpha and beta flags are removed.
+
+    scons_version = distutils.version.StrictVersion('.'.join(SCons.__version__.split('.')[:3]))
+    if scons_version < distutils.version.StrictVersion('3.0.2') or \
+       scons_version > distutils.version.StrictVersion('3.0.4'):
+        env.Decider('MD5-timestamp')
    env.SetOption('max_drift', 60)

    # C preprocessor options
@@ -679,6 +689,18 @@ def generate(env):
    env.PkgCheckModules('XF86VIDMODE', ['xxf86vm'])
    env.PkgCheckModules('DRM', ['libdrm >= 2.4.75'])

+    if not os.path.exists("src/util/format_srgb.c"):
+        print("Checking for Python Mako module (>= 0.8.0)... ", end='')
+        try:
+            import mako
+        except ImportError:
+            print("no")
+            exit(1)
+        if distutils.version.StrictVersion(mako.__version__) < distutils.version.StrictVersion('0.8.0'):
+            print("no")
+            exit(1)
+        print("yes")
+
    if env['x11']:
        env.Append(CPPPATH = env['X11_CPPPATH'])

--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -99,9 +99,6 @@ def generate(env):
            return

        env.Prepend(CPPPATH = [os.path.join(llvm_dir, 'include')])
-        env.AppendUnique(CPPDEFINES = [
-            'HAVE_STDINT_H',
-        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
        if llvm_version >= distutils.version.LooseVersion('5.0'):
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -20,13 +20,13 @@
 # IN THE SOFTWARE.

 .PHONY: git_sha1.h
-git_sha1.h: $(top_srcdir)/src/git_sha1.h.in
+git_sha1.h:
 	@echo "updating $@"
-	@$(PYTHON2) $(top_srcdir)/bin/git_sha1_gen.py --output $@
+	@$(PYTHON) $(top_srcdir)/bin/git_sha1_gen.py --output $@

 BUILT_SOURCES = git_sha1.h
 CLEANFILES = $(BUILT_SOURCES)
-EXTRA_DIST = git_sha1.h.in meson.build
+EXTRA_DIST = meson.build

 SUBDIRS = . gtest util mapi/glapi/gen mapi

@@ -39,6 +39,7 @@ gl_HEADERS = \
  $(top_srcdir)/include/GL/gl_mangle.h
 endif

+# TODO: When building with glvnd, glvnd itself should be providing gl.pc and the headers
 if HAVE_GLX
 glxdir = $(includedir)/GL
 glx_HEADERS = \
--- a/src/SConscript
+++ b/src/SConscript
@@ -42,10 +42,6 @@ env.Append(CPPPATH = ["#" + env['build_dir']])
 if env['platform'] != 'windows':
    SConscript('loader/SConscript')

-# When env['gles'] is set, the targets defined in mapi/glapi/SConscript are not
-# used.  libgl-xlib and libgl-gdi adapt themselves to use the targets defined
-# in mapi/glapi-shared/SConscript.  mesa/SConscript also adapts itself to
-# enable OpenGL ES support.
 SConscript('mapi/glapi/gen/SConscript')
 SConscript('mapi/glapi/SConscript')

@@ -61,8 +57,5 @@ if not env['embedded']:
    if env['platform'] == 'haiku':
        SConscript('egl/SConscript')

-    if env['gles']:
-        SConscript('mapi/shared-glapi/SConscript')
-
 SConscript('gallium/SConscript')

--- a/src/amd/Makefile.addrlib.am
+++ b/src/amd/Makefile.addrlib.am
@@ -24,6 +24,7 @@ ADDRLIB_LIBS = addrlib/libamdgpu_addrlib.la
 addrlib_libamdgpu_addrlib_la_CPPFLAGS = \
 	$(DEFINES) \
 	-I$(top_srcdir)/src/ \
+	-I$(top_srcdir)/include \
 	-I$(srcdir)/common \
 	-I$(srcdir)/addrlib \
 	-I$(srcdir)/addrlib/core \
--- a/src/amd/Makefile.common.am
+++ b/src/amd/Makefile.common.am
@@ -66,6 +66,6 @@ common_libamd_common_la_LIBADD = $(LIBELF_LIBS)

 common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h
 	$(AM_V_at)$(MKDIR_P) $(@D)
-	$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h > $@
+	$(AM_V_GEN) $(PYTHON) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h > $@

 BUILT_SOURCES = $(AMD_GENERATED_FILES)
--- a/src/amd/addrlib/addrinterface.cpp
+++ b/src/amd/addrlib/addrinterface.cpp
@@ -36,6 +36,8 @@

 #include "addrcommon.h"

+#include "util/macros.h"
+
 using namespace Addr;

 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -832,7 +834,7 @@ BOOL_32 ADDR_API ElemGetExportNorm(
    Addr::Lib* pLib = Lib::GetLib(hLib);
    BOOL_32 enabled = FALSE;

-    ADDR_E_RETURNCODE returnCode = ADDR_OK;
+    MAYBE_UNUSED ADDR_E_RETURNCODE returnCode = ADDR_OK;

    if (pLib != NULL)
    {
--- a/src/amd/addrlib/amdgpu_asic_addr.h
+++ b/src/amd/addrlib/amdgpu_asic_addr.h
@@ -90,6 +90,7 @@
 #define AMDGPU_VEGA20_RANGE     0x28, 0xFF

 #define AMDGPU_RAVEN_RANGE      0x01, 0x81
+#define AMDGPU_RAVEN2_RANGE     0x81, 0xFF

 #define AMDGPU_EXPAND_FIX(x) x
 #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
@@ -132,5 +133,6 @@
 #define ASICREV_IS_VEGA20_P(r)         ASICREV_IS(r, VEGA20)

 #define ASICREV_IS_RAVEN(r)            ASICREV_IS(r, RAVEN)
+#define ASICREV_IS_RAVEN2(r)           ASICREV_IS(r, RAVEN2)

 #endif // _AMDGPU_ASIC_ADDR_H
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -37,6 +37,8 @@

 #include "amdgpu_asic_addr.h"

+#include "util/macros.h"
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////////////////////////

@@ -1289,7 +1291,7 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
            break;
        case FAMILY_RV:
            m_settings.isArcticIsland = 1;
-            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
+            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision) || ASICREV_IS_RAVEN2(uChipRevision);

            if (m_settings.isRaven)
            {
@@ -2470,7 +2472,7 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlComputeBlock256Equation(
    // Post validation
    if (ret == ADDR_OK)
    {
-        Dim2d microBlockDim = Block256_2d[elementBytesLog2];
+        MAYBE_UNUSED Dim2d microBlockDim = Block256_2d[elementBytesLog2];
        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 0)) ==
                    (microBlockDim.w * (1 << elementBytesLog2)));
        ADDR_ASSERT((2u << GetMaxValidChannelIndex(pEquation->addr, 8, 1)) == microBlockDim.h);
@@ -3876,7 +3878,7 @@ ADDR_E_RETURNCODE Gfx9Lib::ComputeStereoInfo(
            const UINT_32        numBankBits       = GetBankXorBits(blkSizeLog2);
            const UINT_32        bppLog2           = Log2(pIn->bpp >> 3);
            const UINT_32        maxYCoordBlock256 = Log2(Block256_2d[bppLog2].h) - 1;
-            const ADDR_EQUATION *pEqToCheck        = &m_equationTable[eqIndex];
+            MAYBE_UNUSED const ADDR_EQUATION *pEqToCheck = &m_equationTable[eqIndex];

            ADDR_ASSERT(maxYCoordBlock256 ==
                        GetMaxValidChannelIndex(&pEqToCheck->addr[0], GetBlockSizeLog2(ADDR_SW_256B), 1));
--- a/src/amd/addrlib/meson.build
+++ b/src/amd/addrlib/meson.build
@@ -57,7 +57,7 @@ libamdgpu_addrlib = static_library(
    include_directories(
      'core', 'inc/chip/gfx9', 'inc/chip/r800', 'gfx9/chip', 'r800/chip',
    ),
-    inc_amd_common, inc_src,
+    inc_amd_common, inc_common, inc_src,
  ],
  cpp_args : cpp_vis_args,
 )
--- a/src/amd/addrlib/r800/egbaddrlib.cpp
+++ b/src/amd/addrlib/r800/egbaddrlib.cpp
@@ -33,6 +33,8 @@

 #include "egbaddrlib.h"

+#include "util/macros.h"
+
 namespace Addr
 {
 namespace V1
@@ -979,7 +981,7 @@ BOOL_32 EgBasedLib::SanityCheckMacroTiled(
    ) const
 {
    BOOL_32 valid       = TRUE;
-    UINT_32 numPipes    = HwlGetPipes(pTileInfo);
+    MAYBE_UNUSED UINT_32 numPipes = HwlGetPipes(pTileInfo);

    switch (pTileInfo->banks)
    {
@@ -4108,7 +4110,7 @@ UINT_64 EgBasedLib::HwlGetSizeAdjustmentMicroTiled(
    ) const
 {
    UINT_64 logicalSliceSize;
-    UINT_64 physicalSliceSize;
+    MAYBE_UNUSED UINT_64 physicalSliceSize;

    UINT_32 pitch   = *pPitch;
    UINT_32 height  = *pHeight;
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -99,7 +99,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 	struct drm_amdgpu_info_device device_info = {};
 	struct amdgpu_buffer_size_alignments alignment_info = {};
 	struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
-	struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {};
+	struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {}, vcn_jpeg = {};
 	struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
 	struct amdgpu_gds_resource_info gds = {};
 	uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
@@ -186,6 +186,14 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		}
 	}

+	if (info->drm_major == 3 && info->drm_minor >= 27) {
+		r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_JPEG, 0, &vcn_jpeg);
+		if (r) {
+			fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_jpeg) failed.\n");
+			return false;
+		}
+	}
+
 	r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0,
 					&info->me_fw_version,
 					&info->me_fw_feature);
@@ -255,9 +263,6 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		info->gart_size = meminfo.gtt.total_heap_size;
 		info->vram_size = meminfo.vram.total_heap_size;
 		info->vram_vis_size = meminfo.cpu_accessible_vram.total_heap_size;
-
-		info->max_alloc_size = MAX2(meminfo.vram.max_allocation,
-					    meminfo.gtt.max_allocation);
 	} else {
 		/* This is a deprecated interface, which reports usable sizes
 		 * (total minus pinned), but the pinned size computation is
@@ -289,11 +294,6 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		info->gart_size = gtt.heap_size;
 		info->vram_size = vram.heap_size;
 		info->vram_vis_size = vram_vis.heap_size;
-
-		/* The kernel can split large buffers in VRAM but not in GTT, so large
-		 * allocations can fail or cause buffer movement failures in the kernel.
-		 */
-		info->max_alloc_size = MAX2(info->vram_size * 0.9, info->gart_size * 0.7);
 	}

 	/* Set chip identification. */
@@ -301,7 +301,11 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 	info->vce_harvest_config = amdinfo->vce_harvest_config;

 	switch (info->pci_id) {
-#define CHIPSET(pci_id, cfamily) case pci_id: info->family = CHIP_##cfamily; break;
+#define CHIPSET(pci_id, cfamily) \
+	case pci_id: \
+		info->family = CHIP_##cfamily; \
+		info->name = #cfamily; \
+		break;
 #include "pci_ids/radeonsi_pci_ids.h"
 #undef CHIPSET

@@ -310,6 +314,12 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		return false;
 	}

+	/* Raven2 uses the same PCI IDs as Raven1, but different revision IDs. */
+	if (info->family == CHIP_RAVEN && amdinfo->chip_rev >= 0x8) {
+		info->family = CHIP_RAVEN2;
+		info->name = "RAVEN2";
+	}
+
 	if (info->family >= CHIP_VEGA10)
 		info->chip_class = GFX9;
 	else if (info->family >= CHIP_TONGA)
@@ -327,6 +337,14 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 	info->has_dedicated_vram =
 		!(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);

+	/* The kernel can split large buffers in VRAM but not in GTT, so large
+	 * allocations can fail or cause buffer movement failures in the kernel.
+	 */
+	if (info->has_dedicated_vram)
+		info->max_alloc_size = info->vram_size * 0.8;
+	else
+		info->max_alloc_size = info->gart_size * 0.7;
+
 	/* Set hardware information. */
 	info->gds_size = gds.gds_total_size;
 	info->gds_gfx_partition_size = gds.gds_gfx_partition_size;
@@ -336,7 +354,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 	info->max_se = amdinfo->num_shader_engines;
 	info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine;
 	info->has_hw_decode =
-		(uvd.available_rings != 0) || (vcn_dec.available_rings != 0);
+		(uvd.available_rings != 0) || (vcn_dec.available_rings != 0) ||
+		(vcn_jpeg.available_rings != 0);
 	info->uvd_fw_version =
 		uvd.available_rings ? uvd_version : 0;
 	info->vce_fw_version =
@@ -410,6 +429,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		for (j = 0; j < info->max_sh_per_se; j++)
 			info->num_good_compute_units +=
 				util_bitcount(amdinfo->cu_bitmap[i][j]);
+	info->num_good_cu_per_sh = info->num_good_compute_units /
+				   (info->max_se * info->max_sh_per_se);

 	memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
 		sizeof(amdinfo->gb_tile_mode));
@@ -433,6 +454,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 	ib_align = MAX2(ib_align, vce.ib_start_alignment);
 	ib_align = MAX2(ib_align, vcn_dec.ib_start_alignment);
 	ib_align = MAX2(ib_align, vcn_enc.ib_start_alignment);
+	ib_align = MAX2(ib_align, vcn_jpeg.ib_start_alignment);
       assert(ib_align);
 	info->ib_start_alignment = ib_align;

@@ -539,6 +561,7 @@ void ac_print_gpu_info(struct radeon_info *info)
 	printf("Shader core info:\n");
 	printf("    max_shader_clock = %i\n", info->max_shader_clock);
 	printf("    num_good_compute_units = %i\n", info->num_good_compute_units);
+	printf("    num_good_cu_per_sh = %i\n", info->num_good_cu_per_sh);
 	printf("    num_tcc_blocks = %i\n", info->num_tcc_blocks);
 	printf("    max_se = %i\n", info->max_se);
 	printf("    max_sh_per_se = %i\n", info->max_sh_per_se);
@@ -636,9 +659,10 @@ ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family)
 void
 ac_get_raster_config(struct radeon_info *info,
 		     uint32_t *raster_config_p,
-		     uint32_t *raster_config_1_p)
+		     uint32_t *raster_config_1_p,
+		     uint32_t *se_tile_repeat_p)
 {
-	unsigned raster_config, raster_config_1;
+	unsigned raster_config, raster_config_1, se_tile_repeat;

 	switch (info->family) {
 	/* 1 SE / 1 RB */
@@ -715,8 +739,16 @@ ac_get_raster_config(struct radeon_info *info,
 		raster_config_1 = 0x0000002a;
 	}

+	unsigned se_width = 8 << G_028350_SE_XSEL_GFX6(raster_config);
+	unsigned se_height = 8 << G_028350_SE_YSEL_GFX6(raster_config);
+
+	/* I don't know how to calculate this, though this is probably a good guess. */
+	se_tile_repeat = MAX2(se_width, se_height) * info->max_se;
+
 	*raster_config_p = raster_config;
 	*raster_config_1_p = raster_config_1;
+	if (se_tile_repeat_p)
+		*se_tile_repeat_p = se_tile_repeat;
 }

 void
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -47,6 +47,7 @@ struct radeon_info {
 	uint32_t                    pci_func;

 	/* Device info. */
+	const char                  *name;
 	uint32_t                    pci_id;
 	enum radeon_family          family;
 	enum chip_class             chip_class;
@@ -115,6 +116,7 @@ struct radeon_info {
 	uint32_t                    r600_max_quad_pipes; /* wave size / 16 */
 	uint32_t                    max_shader_clock;
 	uint32_t                    num_good_compute_units;
+	uint32_t                    num_good_cu_per_sh;
 	uint32_t                    num_tcc_blocks;
 	uint32_t                    max_se; /* shader engines */
 	uint32_t                    max_sh_per_se; /* shader arrays per shader engine */
@@ -148,7 +150,8 @@ void ac_print_gpu_info(struct radeon_info *info);
 int ac_get_gs_table_depth(enum chip_class chip_class, enum radeon_family family);
 void ac_get_raster_config(struct radeon_info *info,
 			  uint32_t *raster_config_p,
-			  uint32_t *raster_config_1_p);
+			  uint32_t *raster_config_1_p,
+			  uint32_t *se_tile_repeat_p);
 void ac_get_harvested_configs(struct radeon_info *info,
 			      unsigned raster_config,
 			      unsigned *cik_raster_config_1_p,
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -87,6 +87,8 @@ ac_llvm_context_init(struct ac_llvm_context *ctx,
 	ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
 	ctx->v8i32 = LLVMVectorType(ctx->i32, 8);

+	ctx->i16_0 = LLVMConstInt(ctx->i16, 0, false);
+	ctx->i16_1 = LLVMConstInt(ctx->i16, 1, false);
 	ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
 	ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
 	ctx->i64_0 = LLVMConstInt(ctx->i64, 0, false);
@@ -182,7 +184,7 @@ ac_get_type_size(LLVMTypeRef type)
 	case LLVMDoubleTypeKind:
 		return 8;
 	case LLVMPointerTypeKind:
-		if (LLVMGetPointerAddressSpace(type) == AC_CONST_32BIT_ADDR_SPACE)
+		if (LLVMGetPointerAddressSpace(type) == AC_ADDR_SPACE_CONST_32BIT)
 			return 4;
 		return 8;
 	case LLVMVectorTypeKind:
@@ -348,6 +350,12 @@ ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
 	return phi;
 }

+void ac_build_s_barrier(struct ac_llvm_context *ctx)
+{
+	ac_build_intrinsic(ctx, "llvm.amdgcn.s.barrier", ctx->voidt, NULL,
+			   0, AC_FUNC_ATTR_CONVERGENT);
+}
+
 /* Prevent optimizations (at least of memory accesses) across the current
 * point in the program by emitting empty inline assembly that is marked as
 * having side effects.
@@ -562,6 +570,22 @@ LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
 	return ac_build_expand(ctx, value, num_channels, 4);
 }

+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value)
+{
+	unsigned type_size = ac_get_type_size(LLVMTypeOf(value));
+	const char *name;
+
+	if (type_size == 2)
+		name = "llvm.rint.f16";
+	else if (type_size == 4)
+		name = "llvm.rint.f32";
+	else
+		name = "llvm.rint.f64";
+
+	return ac_build_intrinsic(ctx, name, LLVMTypeOf(value), &value, 1,
+				  AC_FUNC_ATTR_READNONE);
+}
+
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
 	      LLVMValueRef num,
@@ -583,6 +607,67 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
 	return ret;
 }

+/* See fast_idiv_by_const.h. */
+/* Set: increment = util_fast_udiv_info::increment ? multiplier : 0; */
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+				LLVMValueRef num,
+				LLVMValueRef multiplier,
+				LLVMValueRef pre_shift,
+				LLVMValueRef post_shift,
+				LLVMValueRef increment)
+{
+	LLVMBuilderRef builder = ctx->builder;
+
+	num = LLVMBuildLShr(builder, num, pre_shift, "");
+	num = LLVMBuildMul(builder,
+			   LLVMBuildZExt(builder, num, ctx->i64, ""),
+			   LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+	num = LLVMBuildAdd(builder, num,
+			   LLVMBuildZExt(builder, increment, ctx->i64, ""), "");
+	num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+	num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+	return LLVMBuildLShr(builder, num, post_shift, "");
+}
+
+/* See fast_idiv_by_const.h. */
+/* If num != UINT_MAX, this more efficient version can be used. */
+/* Set: increment = util_fast_udiv_info::increment; */
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+				    LLVMValueRef num,
+				    LLVMValueRef multiplier,
+				    LLVMValueRef pre_shift,
+				    LLVMValueRef post_shift,
+				    LLVMValueRef increment)
+{
+	LLVMBuilderRef builder = ctx->builder;
+
+	num = LLVMBuildLShr(builder, num, pre_shift, "");
+	num = LLVMBuildNUWAdd(builder, num, increment, "");
+	num = LLVMBuildMul(builder,
+			   LLVMBuildZExt(builder, num, ctx->i64, ""),
+			   LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+	num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+	num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+	return LLVMBuildLShr(builder, num, post_shift, "");
+}
+
+/* See fast_idiv_by_const.h. */
+/* Both operands must fit in 31 bits and the divisor must not be 1. */
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+					      LLVMValueRef num,
+					      LLVMValueRef multiplier,
+					      LLVMValueRef post_shift)
+{
+	LLVMBuilderRef builder = ctx->builder;
+
+	num = LLVMBuildMul(builder,
+			   LLVMBuildZExt(builder, num, ctx->i64, ""),
+			   LLVMBuildZExt(builder, multiplier, ctx->i64, ""), "");
+	num = LLVMBuildLShr(builder, num, LLVMConstInt(ctx->i64, 32, 0), "");
+	num = LLVMBuildTrunc(builder, num, ctx->i32, "");
+	return LLVMBuildLShr(builder, num, post_shift, "");
+}
+
 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
 * already multiplied by two. id is the cube face number.
@@ -680,8 +765,7 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 	LLVMValueRef invma;

 	if (is_array && !is_lod) {
-		LLVMValueRef tmp = coords_arg[3];
-		tmp = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &tmp, 1, 0);
+		LLVMValueRef tmp = ac_build_round(ctx, coords_arg[3]);

 		/* Section 8.9 (Texture Functions) of the GLSL 4.50 spec says:
 		 *
@@ -776,8 +860,7 @@ ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 	if (is_array) {
 		/* for cube arrays coord.z = coord.w(array_index) * 8 + face */
 		/* coords_arg.w component - array_index for cube arrays */
-		LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
-		coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
+		coords[2] = ac_build_fmad(ctx, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), coords[2]);
 	}

 	memcpy(coords_arg, coords, sizeof(coords));
@@ -813,6 +896,37 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
 				  ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
 }

+LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+		       LLVMValueRef llvm_chan,
+		       LLVMValueRef attr_number,
+		       LLVMValueRef params,
+		       LLVMValueRef i,
+		       LLVMValueRef j)
+{
+	LLVMValueRef args[6];
+	LLVMValueRef p1;
+
+	args[0] = i;
+	args[1] = llvm_chan;
+	args[2] = attr_number;
+	args[3] = ctx->i1false;
+	args[4] = params;
+
+	p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1.f16",
+				ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
+
+	args[0] = p1;
+	args[1] = j;
+	args[2] = llvm_chan;
+	args[3] = attr_number;
+	args[4] = ctx->i1false;
+	args[5] = params;
+
+	return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2.f16",
+				  ctx->f16, args, 6, AC_FUNC_ATTR_READNONE);
+}
+
 LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
 		       LLVMValueRef parameter,
@@ -837,11 +951,18 @@ ac_build_gep0(struct ac_llvm_context *ctx,
 	      LLVMValueRef index)
 {
 	LLVMValueRef indices[2] = {
-		LLVMConstInt(ctx->i32, 0, 0),
+		ctx->i32_0,
 		index,
 	};
-	return LLVMBuildGEP(ctx->builder, base_ptr,
-			    indices, 2, "");
+	return LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");
+}
+
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+				  LLVMValueRef index)
+{
+	return LLVMBuildPointerCast(ctx->builder,
+				    ac_build_gep0(ctx, ptr, index),
+				    LLVMTypeOf(ptr), "");
 }

 void
@@ -862,14 +983,39 @@ ac_build_indexed_store(struct ac_llvm_context *ctx,
 * \param uniform   Whether the base_ptr and index can be assumed to be
 *                  dynamically uniform (i.e. load to an SGPR)
 * \param invariant Whether the load is invariant (no other opcodes affect it)
+ * \param no_unsigned_wraparound
+ *    For all possible re-associations and re-distributions of an expression
+ *    "base_ptr + index * elemsize" into "addr + offset" (excluding GEPs
+ *    without inbounds in base_ptr), this parameter is true if "addr + offset"
+ *    does not result in an unsigned integer wraparound. This is used for
+ *    optimal code generation of 32-bit pointer arithmetic.
+ *
+ *    For example, a 32-bit immediate offset that causes a 32-bit unsigned
+ *    integer wraparound can't be an imm offset in s_load_dword, because
+ *    the instruction performs "addr + offset" in 64 bits.
+ *
+ *    Expected usage for bindless textures by chaining GEPs:
+ *      // possible unsigned wraparound, don't use InBounds:
+ *      ptr1 = LLVMBuildGEP(base_ptr, index);
+ *      image = load(ptr1); // becomes "s_load ptr1, 0"
+ *
+ *      ptr2 = LLVMBuildInBoundsGEP(ptr1, 32 / elemsize);
+ *      sampler = load(ptr2); // becomes "s_load ptr1, 32" thanks to InBounds
 */
 static LLVMValueRef
 ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
-		     LLVMValueRef index, bool uniform, bool invariant)
+		     LLVMValueRef index, bool uniform, bool invariant,
+		     bool no_unsigned_wraparound)
 {
 	LLVMValueRef pointer, result;
+	LLVMValueRef indices[2] = {ctx->i32_0, index};
+
+	if (no_unsigned_wraparound &&
+	    LLVMGetPointerAddressSpace(LLVMTypeOf(base_ptr)) == AC_ADDR_SPACE_CONST_32BIT)
+		pointer = LLVMBuildInBoundsGEP(ctx->builder, base_ptr, indices, 2, "");
+	else
+		pointer = LLVMBuildGEP(ctx->builder, base_ptr, indices, 2, "");

-	pointer = ac_build_gep0(ctx, base_ptr, index);
 	if (uniform)
 		LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
 	result = LLVMBuildLoad(ctx->builder, pointer, "");
@@ -881,19 +1027,28 @@ ac_build_load_custom(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
 			   LLVMValueRef index)
 {
-	return ac_build_load_custom(ctx, base_ptr, index, false, false);
+	return ac_build_load_custom(ctx, base_ptr, index, false, false, false);
 }

 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
 				     LLVMValueRef base_ptr, LLVMValueRef index)
 {
-	return ac_build_load_custom(ctx, base_ptr, index, false, true);
+	return ac_build_load_custom(ctx, base_ptr, index, false, true, false);
 }

+/* This assumes that there is no unsigned integer wraparound during the address
+ * computation, excluding all GEPs within base_ptr. */
 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
 				   LLVMValueRef base_ptr, LLVMValueRef index)
 {
-	return ac_build_load_custom(ctx, base_ptr, index, true, true);
+	return ac_build_load_custom(ctx, base_ptr, index, true, true, true);
+}
+
+/* See ac_build_load_custom() documentation. */
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+				   LLVMValueRef base_ptr, LLVMValueRef index)
+{
+	return ac_build_load_custom(ctx, base_ptr, index, true, true, false);
 }

 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
@@ -952,7 +1107,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 		LLVMValueRef args[] = {
 			ac_to_float(ctx, vdata),
 			LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-			LLVMConstInt(ctx->i32, 0, 0),
+			ctx->i32_0,
 			offset,
 			LLVMConstInt(ctx->i1, glc, 0),
 			LLVMConstInt(ctx->i1, slc, 0),
@@ -980,8 +1135,8 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 	LLVMValueRef args[] = {
 		vdata,
 		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-		LLVMConstInt(ctx->i32, 0, 0),
-		voffset ? voffset : LLVMConstInt(ctx->i32, 0, 0),
+		ctx->i32_0,
+		voffset ? voffset : ctx->i32_0,
 		soffset,
 		LLVMConstInt(ctx->i32, inst_offset, 0),
 		LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
@@ -1013,7 +1168,7 @@ ac_build_buffer_load_common(struct ac_llvm_context *ctx,
 {
 	LLVMValueRef args[] = {
 		LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
-		vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
+		vindex ? vindex : ctx->i32_0,
 		voffset,
 		LLVMConstInt(ctx->i1, glc, 0),
 		LLVMConstInt(ctx->i1, slc, 0)
@@ -1108,7 +1263,7 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
                                                  bool can_speculate)
 {
 	LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 2, 0), "");
-	LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, LLVMConstInt(ctx->i32, 1, 0), "");
+	LLVMValueRef stride = LLVMBuildExtractElement(ctx->builder, rsrc, ctx->i32_1, "");
 	stride = LLVMBuildLShr(ctx->builder, stride, LLVMConstInt(ctx->i32, 16, 0), "");

 	LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->builder,
@@ -1129,7 +1284,8 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
 			    LLVMValueRef vindex,
 			    LLVMValueRef voffset,
 				LLVMValueRef soffset,
-				LLVMValueRef immoffset)
+				LLVMValueRef immoffset,
+				LLVMValueRef glc)
 {
 	const char *name = "llvm.amdgcn.tbuffer.load.i32";
 	LLVMTypeRef type = ctx->i32;
@@ -1141,7 +1297,7 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
 				immoffset,
 				LLVMConstInt(ctx->i32, V_008F0C_BUF_DATA_FORMAT_16, false),
 				LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, false),
-				ctx->i1false,
+				glc,
 				ctx->i1false,
 	};
 	LLVMValueRef res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
@@ -1175,7 +1331,7 @@ ac_get_thread_id(struct ac_llvm_context *ctx)

 	LLVMValueRef tid_args[2];
 	tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
-	tid_args[1] = LLVMConstInt(ctx->i32, 0, false);
+	tid_args[1] = ctx->i32_0;
 	tid_args[1] = ac_build_intrinsic(ctx,
 					 "llvm.amdgcn.mbcnt.lo", ctx->i32,
 					 tid_args, 2, AC_FUNC_ATTR_READNONE);
@@ -1342,7 +1498,7 @@ ac_build_imsb(struct ac_llvm_context *ctx,
 	LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
 	LLVMValueRef cond = LLVMBuildOr(ctx->builder,
 					LLVMBuildICmp(ctx->builder, LLVMIntEQ,
-						      arg, LLVMConstInt(ctx->i32, 0, 0), ""),
+						      arg, ctx->i32_0, ""),
 					LLVMBuildICmp(ctx->builder, LLVMIntEQ,
 						      arg, all_ones, ""), "");

@@ -1358,17 +1514,31 @@ ac_build_umsb(struct ac_llvm_context *ctx,
 	LLVMTypeRef type;
 	LLVMValueRef highest_bit;
 	LLVMValueRef zero;
+	unsigned bitsize;

-	if (ac_get_elem_bits(ctx, LLVMTypeOf(arg)) == 64) {
+	bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(arg));
+	switch (bitsize) {
+	case 64:
 		intrin_name = "llvm.ctlz.i64";
 		type = ctx->i64;
 		highest_bit = LLVMConstInt(ctx->i64, 63, false);
 		zero = ctx->i64_0;
-	} else {
+		break;
+	case 32:
 		intrin_name = "llvm.ctlz.i32";
 		type = ctx->i32;
 		highest_bit = LLVMConstInt(ctx->i32, 31, false);
 		zero = ctx->i32_0;
+		break;
+	case 16:
+		intrin_name = "llvm.ctlz.i16";
+		type = ctx->i16;
+		highest_bit = LLVMConstInt(ctx->i16, 15, false);
+		zero = ctx->i16_0;
+		break;
+	default:
+		unreachable(!"invalid bitsize");
+		break;
 	}

 	LLVMValueRef params[2] = {
@@ -1874,83 +2044,24 @@ LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
 				  args, 2, AC_FUNC_ATTR_READNONE);
 }

-/* Upper 16 bits must be zero. */
-static LLVMValueRef ac_llvm_pack_two_int16(struct ac_llvm_context *ctx,
-					   LLVMValueRef val[2])
-{
-	return LLVMBuildOr(ctx->builder, val[0],
-			   LLVMBuildShl(ctx->builder, val[1],
-					LLVMConstInt(ctx->i32, 16, 0),
-					""), "");
-}
-
-/* Upper 16 bits are ignored and will be dropped. */
-static LLVMValueRef ac_llvm_pack_two_int32_as_int16(struct ac_llvm_context *ctx,
-						    LLVMValueRef val[2])
-{
-	LLVMValueRef v[2] = {
-		LLVMBuildAnd(ctx->builder, val[0],
-			     LLVMConstInt(ctx->i32, 0xffff, 0), ""),
-		val[1],
-	};
-	return ac_llvm_pack_two_int16(ctx, v);
-}
-
 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
 				     LLVMValueRef args[2])
 {
-	if (HAVE_LLVM >= 0x0600) {
-		LLVMValueRef res =
-			ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
-					   ctx->v2i16, args, 2,
-					   AC_FUNC_ATTR_READNONE);
-		return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-	}
-
-	LLVMValueRef val[2];
-
-	for (int chan = 0; chan < 2; chan++) {
-		/* Clamp between [-1, 1]. */
-		val[chan] = ac_build_fmin(ctx, args[chan], ctx->f32_1);
-		val[chan] = ac_build_fmax(ctx, val[chan], LLVMConstReal(ctx->f32, -1));
-		/* Convert to a signed integer in [-32767, 32767]. */
-		val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
-					  LLVMConstReal(ctx->f32, 32767), "");
-		/* If positive, add 0.5, else add -0.5. */
-		val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
-				LLVMBuildSelect(ctx->builder,
-					LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
-						      val[chan], ctx->f32_0, ""),
-					LLVMConstReal(ctx->f32, 0.5),
-					LLVMConstReal(ctx->f32, -0.5), ""), "");
-		val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
-	}
-	return ac_llvm_pack_two_int32_as_int16(ctx, val);
+	LLVMValueRef res =
+		ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.i16",
+				   ctx->v2i16, args, 2,
+				   AC_FUNC_ATTR_READNONE);
+	return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
 }

 LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
 				     LLVMValueRef args[2])
 {
-	if (HAVE_LLVM >= 0x0600) {
-		LLVMValueRef res =
-			ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
-					   ctx->v2i16, args, 2,
-					   AC_FUNC_ATTR_READNONE);
-		return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-	}
-
-	LLVMValueRef val[2];
-
-	for (int chan = 0; chan < 2; chan++) {
-		val[chan] = ac_build_clamp(ctx, args[chan]);
-		val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
-					  LLVMConstReal(ctx->f32, 65535), "");
-		val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
-					  LLVMConstReal(ctx->f32, 0.5), "");
-		val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
-					    ctx->i32, "");
-	}
-	return ac_llvm_pack_two_int32_as_int16(ctx, val);
+	LLVMValueRef res =
+		ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pknorm.u16",
+				   ctx->v2i16, args, 2,
+				   AC_FUNC_ATTR_READNONE);
+	return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
 }

 /* The 8-bit and 10-bit clamping is for HW workarounds. */
@@ -1967,10 +2078,9 @@ LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
 		bits != 10 ? max_rgb : ctx->i32_1;
 	LLVMValueRef min_alpha =
 		bits != 10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
-	bool has_intrinsic = HAVE_LLVM >= 0x0600;

 	/* Clamp. */
-	if (!has_intrinsic || bits != 16) {
+	if (bits != 16) {
 		for (int i = 0; i < 2; i++) {
 			bool alpha = hi && i == 1;
 			args[i] = ac_build_imin(ctx, args[i],
@@ -1980,15 +2090,11 @@ LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
 		}
 	}

-	if (has_intrinsic) {
-		LLVMValueRef res =
-			ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
-					   ctx->v2i16, args, 2,
-					   AC_FUNC_ATTR_READNONE);
-		return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-	}
-
-	return ac_llvm_pack_two_int32_as_int16(ctx, args);
+	LLVMValueRef res =
+		ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.i16",
+				   ctx->v2i16, args, 2,
+				   AC_FUNC_ATTR_READNONE);
+	return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
 }

 /* The 8-bit and 10-bit clamping is for HW workarounds. */
@@ -2001,10 +2107,9 @@ LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
 		bits == 8 ? 255 : bits == 10 ? 1023 : 65535, 0);
 	LLVMValueRef max_alpha =
 		bits != 10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
-	bool has_intrinsic = HAVE_LLVM >= 0x0600;

 	/* Clamp. */
-	if (!has_intrinsic || bits != 16) {
+	if (bits != 16) {
 		for (int i = 0; i < 2; i++) {
 			bool alpha = hi && i == 1;
 			args[i] = ac_build_umin(ctx, args[i],
@@ -2012,37 +2117,23 @@ LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
 		}
 	}

-	if (has_intrinsic) {
-		LLVMValueRef res =
-			ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
-					   ctx->v2i16, args, 2,
-					   AC_FUNC_ATTR_READNONE);
-		return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
-	}
-
-	return ac_llvm_pack_two_int16(ctx, args);
+	LLVMValueRef res =
+		ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pk.u16",
+				   ctx->v2i16, args, 2,
+				   AC_FUNC_ATTR_READNONE);
+	return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
 }

 LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1)
 {
-	assert(HAVE_LLVM >= 0x0600);
 	return ac_build_intrinsic(ctx, "llvm.amdgcn.wqm.vote", ctx->i1,
 				  &i1, 1, AC_FUNC_ATTR_READNONE);
 }

 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1)
 {
-	if (HAVE_LLVM >= 0x0600) {
-		ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
-				   &i1, 1, 0);
-		return;
-	}
-
-	LLVMValueRef value = LLVMBuildSelect(ctx->builder, i1,
-					     LLVMConstReal(ctx->f32, 1),
-					     LLVMConstReal(ctx->f32, -1), "");
-	ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
-			   &value, 1, AC_FUNC_ATTR_LEGACY);
+	ac_build_intrinsic(ctx, "llvm.amdgcn.kill", ctx->voidt,
+			   &i1, 1, 0);
 }

 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
@@ -2062,6 +2153,20 @@ LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
 				  AC_FUNC_ATTR_READNONE);
 }

+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+			   LLVMValueRef s1, LLVMValueRef s2)
+{
+	return LLVMBuildAdd(ctx->builder,
+			    LLVMBuildMul(ctx->builder, s0, s1, ""), s2, "");
+}
+
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+			   LLVMValueRef s1, LLVMValueRef s2)
+{
+	return LLVMBuildFAdd(ctx->builder,
+			     LLVMBuildFMul(ctx->builder, s0, s1, ""), s2, "");
+}
+
 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16)
 {
 	LLVMValueRef args[1] = {
@@ -2099,14 +2204,25 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
 	LLVMValueRef cmp, val, zero, one;
 	LLVMTypeRef type;

-	if (bitsize == 32) {
-		type = ctx->i32;
-		zero = ctx->i32_0;
-		one = ctx->i32_1;
-	} else {
+	switch (bitsize) {
+	case 64:
 		type = ctx->i64;
 		zero = ctx->i64_0;
 		one = ctx->i64_1;
+		break;
+	case 32:
+		type = ctx->i32;
+		zero = ctx->i32_0;
+		one = ctx->i32_1;
+		break;
+	case 16:
+		type = ctx->i16;
+		zero = ctx->i16_0;
+		one = ctx->i16_1;
+		break;
+	default:
+		unreachable(!"invalid bitsize");
+		break;
 	}

 	cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
@@ -2139,6 +2255,66 @@ LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
 	return val;
 }

+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0)
+{
+	LLVMValueRef result;
+	unsigned bitsize;
+
+	bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+	switch (bitsize) {
+	case 64:
+		result = ac_build_intrinsic(ctx, "llvm.ctpop.i64", ctx->i64,
+					    (LLVMValueRef []) { src0 }, 1,
+					    AC_FUNC_ATTR_READNONE);
+
+		result = LLVMBuildTrunc(ctx->builder, result, ctx->i32, "");
+		break;
+	case 32:
+		result = ac_build_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32,
+					    (LLVMValueRef []) { src0 }, 1,
+					    AC_FUNC_ATTR_READNONE);
+		break;
+	case 16:
+		result = ac_build_intrinsic(ctx, "llvm.ctpop.i16", ctx->i16,
+					    (LLVMValueRef []) { src0 }, 1,
+					    AC_FUNC_ATTR_READNONE);
+		break;
+	default:
+		unreachable(!"invalid bitsize");
+		break;
+	}
+
+	return result;
+}
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+				       LLVMValueRef src0)
+{
+	LLVMValueRef result;
+	unsigned bitsize;
+
+	bitsize = ac_get_elem_bits(ctx, LLVMTypeOf(src0));
+
+	switch (bitsize) {
+	case 32:
+		result = ac_build_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32,
+					    (LLVMValueRef []) { src0 }, 1,
+					    AC_FUNC_ATTR_READNONE);
+		break;
+	case 16:
+		result = ac_build_intrinsic(ctx, "llvm.bitreverse.i16", ctx->i16,
+					    (LLVMValueRef []) { src0 }, 1,
+					    AC_FUNC_ATTR_READNONE);
+		break;
+	default:
+		unreachable(!"invalid bitsize");
+		break;
+	}
+
+	return result;
+}
+
 #define AC_EXP_TARGET		0
 #define AC_EXP_ENABLED_CHANNELS 1
 #define AC_EXP_OUT0		2
@@ -2427,7 +2603,7 @@ void ac_declare_lds_as_pointer(struct ac_llvm_context *ctx)
 {
 	unsigned lds_size = ctx->chip_class >= CIK ? 65536 : 32768;
 	ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32_0,
-				     LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_LOCAL_ADDR_SPACE),
+				     LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), AC_ADDR_SPACE_LDS),
 				     "lds");
 }

@@ -2454,14 +2630,25 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
 	const char *intrin_name;
 	LLVMTypeRef type;
 	LLVMValueRef zero;
-	if (src0_bitsize == 64) {
+
+	switch (src0_bitsize) {
+	case 64:
 		intrin_name = "llvm.cttz.i64";
 		type = ctx->i64;
 		zero = ctx->i64_0;
-	} else {
+		break;
+	case 32:
 		intrin_name = "llvm.cttz.i32";
 		type = ctx->i32;
 		zero = ctx->i32_0;
+		break;
+	case 16:
+		intrin_name = "llvm.cttz.i16";
+		type = ctx->i16;
+		zero = ctx->i16_0;
+		break;
+	default:
+		unreachable(!"invalid bitsize");
 	}

 	LLVMValueRef params[2] = {
@@ -2476,7 +2663,7 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
 		 *
 		 * The hardware already implements the correct behavior.
 		 */
-		LLVMConstInt(ctx->i1, 1, false),
+		ctx->i1true,
 	};

 	LLVMValueRef lsb = ac_build_intrinsic(ctx, intrin_name, type,
@@ -2498,7 +2685,7 @@ LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
 LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type)
 {
 	return LLVMPointerType(LLVMArrayType(elem_type, 0),
-			       AC_CONST_ADDR_SPACE);
+			       AC_ADDR_SPACE_CONST);
 }

 LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
@@ -2507,7 +2694,7 @@ LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type)
 		return ac_array_in_const_addr_space(elem_type);

 	return LLVMPointerType(LLVMArrayType(elem_type, 0),
-			       AC_CONST_32BIT_ADDR_SPACE);
+			       AC_ADDR_SPACE_CONST_32BIT);
 }

 static struct ac_llvm_flow *
@@ -2681,7 +2868,7 @@ void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
 	if_cond_emit(ctx, cond, label_id);
 }

-LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
+LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
 			     const char *name)
 {
 	LLVMBuilderRef builder = ac->builder;
@@ -2699,18 +2886,15 @@ LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
 	}

 	res = LLVMBuildAlloca(first_builder, type, name);
-	LLVMBuildStore(builder, LLVMConstNull(type), res);
-
 	LLVMDisposeBuilder(first_builder);
-
 	return res;
 }

-LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac,
+LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac,
 				   LLVMTypeRef type, const char *name)
 {
-	LLVMValueRef ptr = ac_build_alloca(ac, type, name);
-	LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr);
+	LLVMValueRef ptr = ac_build_alloca_undef(ac, type, name);
+	LLVMBuildStore(ac->builder, LLVMConstNull(type), ptr);
 	return ptr;
 }

@@ -2729,9 +2913,11 @@ LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
 	if (count == num_components)
 		return value;

-	LLVMValueRef masks[] = {
-	    LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
-	    LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
+	LLVMValueRef masks[MAX2(count, 2)];
+	masks[0] = ctx->i32_0;
+	masks[1] = ctx->i32_1;
+	for (unsigned i = 2; i < count; i++)
+		masks[i] = LLVMConstInt(ctx->i32, i, false);

 	if (count == 1)
 		return LLVMBuildExtractElement(ctx->builder, value, masks[0],
@@ -3251,7 +3437,7 @@ ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
 		unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3)
 {
 	unsigned mask = dpp_quad_perm(lane0, lane1, lane2, lane3);
-	if (ctx->chip_class >= VI && HAVE_LLVM >= 0x0600) {
+	if (ctx->chip_class >= VI) {
 		return ac_build_dpp(ctx, src, src, mask, 0xf, 0xf, false);
 	} else {
 		return ac_build_ds_swizzle(ctx, src, (1 << 15) | mask);
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -37,12 +37,20 @@ extern "C" {
 #define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)

 enum {
-	/* CONST is the only address space that selects SMEM loads */
-	AC_CONST_ADDR_SPACE = HAVE_LLVM >= 0x700 ? 4 : 2,
-	AC_LOCAL_ADDR_SPACE = 3,
-	AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */
+	AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+	AC_ADDR_SPACE_GLOBAL = 1,
+	AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+	AC_ADDR_SPACE_LDS = 3,
+	AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+	AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
 };

+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT	0xcf7f
+#define LGKM_CNT	0xc07f
+#define EXP_CNT		0xcf0f
+#define VM_CNT		0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+
 struct ac_llvm_flow;

 struct ac_llvm_context {
@@ -68,6 +76,8 @@ struct ac_llvm_context {
 	LLVMTypeRef v4f32;
 	LLVMTypeRef v8i32;

+	LLVMValueRef i16_0;
+	LLVMValueRef i16_1;
 	LLVMValueRef i32_0;
 	LLVMValueRef i32_1;
 	LLVMValueRef i64_0;
@@ -133,6 +143,7 @@ ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
 	     unsigned count_incoming, LLVMValueRef *values,
 	     LLVMBasicBlockRef *blocks);

+void ac_build_s_barrier(struct ac_llvm_context *ctx);
 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
 				   LLVMValueRef *pvgpr);

@@ -167,12 +178,30 @@ LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
 				     LLVMValueRef value,
 				     unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);

 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
 	      LLVMValueRef num,
 	      LLVMValueRef den);

+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+				LLVMValueRef num,
+				LLVMValueRef multiplier,
+				LLVMValueRef pre_shift,
+				LLVMValueRef post_shift,
+				LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+				    LLVMValueRef num,
+				    LLVMValueRef multiplier,
+				    LLVMValueRef pre_shift,
+				    LLVMValueRef post_shift,
+				    LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+					      LLVMValueRef num,
+					      LLVMValueRef multiplier,
+					      LLVMValueRef post_shift);
+
 void
 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
 		       bool is_deriv, bool is_array, bool is_lod,
@@ -188,6 +217,14 @@ ac_build_fs_interp(struct ac_llvm_context *ctx,
 		   LLVMValueRef i,
 		   LLVMValueRef j);

+LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+		       LLVMValueRef llvm_chan,
+		       LLVMValueRef attr_number,
+		       LLVMValueRef params,
+		       LLVMValueRef i,
+		       LLVMValueRef j);
+
 LLVMValueRef
 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
 		       LLVMValueRef parameter,
@@ -199,6 +236,8 @@ LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
 	      LLVMValueRef base_ptr,
 	      LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+				  LLVMValueRef index);

 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
@@ -211,6 +250,8 @@ LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
 				     LLVMValueRef base_ptr, LLVMValueRef index);
 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
 				   LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+				   LLVMValueRef base_ptr, LLVMValueRef index);

 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
@@ -261,7 +302,8 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
 			    LLVMValueRef vindex,
 			    LLVMValueRef voffset,
 				LLVMValueRef soffset,
-				LLVMValueRef immoffset);
+				LLVMValueRef immoffset,
+				LLVMValueRef glc);

 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
@@ -400,6 +442,10 @@ void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
 			  LLVMValueRef offset, LLVMValueRef width,
 			  bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+			   LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+			   LLVMValueRef s1, LLVMValueRef s2);

 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);

@@ -412,6 +458,11 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
 			    unsigned bitsize);

+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+				       LLVMValueRef src0);
+
 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
 			    LLVMValueRef main_fn,
 			    uint8_t *vs_output_param_offset,
--- a/src/amd/common/ac_llvm_helper.cpp
+++ b/src/amd/common/ac_llvm_helper.cpp
@@ -92,11 +92,7 @@ LLVMBuilderRef ac_create_builder(LLVMContextRef ctx,
 		llvm::unwrap(builder)->setFastMathFlags(flags);
 		break;
 	case AC_FLOAT_MODE_UNSAFE_FP_MATH:
-#if HAVE_LLVM >= 0x0600
 		flags.setFast();
-#else
-		flags.setUnsafeAlgebra();
-#endif
 		llvm::unwrap(builder)->setFastMathFlags(flags);
 		break;
 	}
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -135,6 +135,8 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
 		return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
 	case CHIP_VEGA20:
 		return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902";
+	case CHIP_RAVEN2:
+		return "gfx902"; /* TODO: use gfx909 when it's available */
 	default:
 		return "";
 	}
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -311,9 +311,18 @@ static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
 }

 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
-			     LLVMValueRef src0)
+			     LLVMValueRef src0,
+			     unsigned bitsize)
 {
-	return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
+	LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
+					   LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
+					   "");
+	result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
+
+	if (bitsize == 32)
+		return result;
+
+	return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
 }

 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
@@ -427,7 +436,7 @@ static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
 		/* FIXME: LLVM 7 returns incorrect result when count is 0.
 		 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
 		 */
-		LLVMValueRef zero = LLVMConstInt(ctx->i32, 0, false);
+		LLVMValueRef zero = ctx->i32_0;
 		LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
 		LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, "");

@@ -486,7 +495,7 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
 					  LLVMValueRef src0)
 {
 	LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
-	LLVMValueRef temps[2], result, val;
+	LLVMValueRef temps[2], val;
 	int i;

 	for (i = 0; i < 2; i++) {
@@ -495,12 +504,7 @@ static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
 		val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
 		temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
 	}
-
-	result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
-					ctx->i32_0, "");
-	result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
-					ctx->i32_1, "");
-	return result;
+	return ac_build_gather_values(ctx, temps, 2);
 }

 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
@@ -841,15 +845,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
 		break;
 	case nir_op_bitfield_reverse:
-		result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
+		result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
 		break;
 	case nir_op_bit_count:
-		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32)
-			result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
-		else {
-			result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE);
-			result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
-		}
+		result = ac_build_bit_count(&ctx->ac, src[0]);
 		break;
 	case nir_op_vec2:
 	case nir_op_vec3:
@@ -942,7 +941,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
 		break;
 	case nir_op_b2f:
-		result = emit_b2f(&ctx->ac, src[0]);
+		result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
 		break;
 	case nir_op_f2b:
 		result = emit_f2b(&ctx->ac, src[0]);
@@ -1004,10 +1003,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)

 	case nir_op_pack_64_2x32_split: {
 		LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
-		tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-					     src[0], ctx->ac.i32_0, "");
-		tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
-					     src[1], ctx->ac.i32_1, "");
+		tmp = ac_build_gather_values(&ctx->ac, src, 2);
 		result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
 		break;
 	}
@@ -1405,7 +1401,7 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
 		LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
 		res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
 		LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
-		cond = LLVMBuildTrunc(ctx->ac.builder, cond, LLVMInt1Type(), "");
+		cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
 		LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
 					LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
 					LLVMConstInt(ctx->ac.i32, 4, false)};
@@ -1443,7 +1439,7 @@ static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueR
                                         unsigned start, unsigned count)
 {
 	LLVMValueRef mask[] = {
-	LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
+	ctx->i32_0, ctx->i32_1,
 	LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };

 	unsigned src_elements = ac_get_llvm_num_components(src);
@@ -1469,6 +1465,11 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 	LLVMValueRef src_data = get_src(ctx, instr->src[0]);
 	int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
 	unsigned writemask = nir_intrinsic_write_mask(instr);
+	enum gl_access_qualifier access = nir_intrinsic_access(instr);
+	LLVMValueRef glc = ctx->ac.i1false;
+
+	if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
+		glc = ctx->ac.i1true;

 	LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
 				        get_src(ctx, instr->src[1]), true);
@@ -1525,7 +1526,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 				ctx->ac.i32_0,
 				LLVMConstInt(ctx->ac.i32, 2, false), // dfmt (= 16bit)
 				LLVMConstInt(ctx->ac.i32, 4, false), // nfmt (= uint)
-				ctx->ac.i1false,
+				glc,
 				ctx->ac.i1false,
 			};
 			ac_build_intrinsic(&ctx->ac, store_name,
@@ -1553,7 +1554,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 				rsrc,
 				ctx->ac.i32_0, /* vindex */
 				offset,
-				ctx->ac.i1false,  /* glc */
+				glc,
 				ctx->ac.i1false,  /* slc */
 			};
 			ac_build_intrinsic(&ctx->ac, store_name,
@@ -1578,7 +1579,7 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
 						 true);
 	params[arg_count++] = ctx->ac.i32_0; /* vindex */
 	params[arg_count++] = get_src(ctx, instr->src[1]);      /* voffset */
-	params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false);  /* slc */
+	params[arg_count++] = ctx->ac.i1false;  /* slc */

 	switch (instr->intrinsic) {
 	case nir_intrinsic_ssbo_atomic_add:
@@ -1621,31 +1622,45 @@ static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
                                      const nir_intrinsic_instr *instr)
 {
-	LLVMValueRef results[2];
-	int load_bytes;
 	int elem_size_bytes = instr->dest.ssa.bit_size / 8;
 	int num_components = instr->num_components;
-	int num_bytes = num_components * elem_size_bytes;
+	enum gl_access_qualifier access = nir_intrinsic_access(instr);
+	LLVMValueRef glc = ctx->ac.i1false;

-	for (int i = 0; i < num_bytes; i += load_bytes) {
-		load_bytes = MIN2(num_bytes - i, 16);
-		const char *load_name;
-		LLVMTypeRef data_type;
-		LLVMValueRef offset = get_src(ctx, instr->src[1]);
-		LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i, false);
-		LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
-							get_src(ctx, instr->src[0]), false);
-		LLVMValueRef vindex = ctx->ac.i32_0;
+	if (access & (ACCESS_VOLATILE | ACCESS_COHERENT))
+		glc = ctx->ac.i1true;

-		int idx = i ? 1 : 0;
+	LLVMValueRef offset = get_src(ctx, instr->src[1]);
+	LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
+						get_src(ctx, instr->src[0]), false);
+	LLVMValueRef vindex = ctx->ac.i32_0;
+
+	LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
+	LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
+
+	LLVMValueRef results[4];
+	for (int i = 0; i < num_components;) {
+		int num_elems = num_components - i;
+		if (elem_size_bytes < 4)
+			num_elems = 1;
+		if (num_elems * elem_size_bytes > 16)
+			num_elems = 16 / elem_size_bytes;
+		int load_bytes = num_elems * elem_size_bytes;
+
+		LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
+
+		LLVMValueRef ret;
 		if (load_bytes == 2) {
-			results[idx] = ac_build_tbuffer_load_short(&ctx->ac,
-								   rsrc,
-								   vindex,
-								   offset,
-								   ctx->ac.i32_0,
-								   immoffset);
+			ret = ac_build_tbuffer_load_short(&ctx->ac,
+							  rsrc,
+							  vindex,
+							  offset,
+							  ctx->ac.i32_0,
+							  immoffset,
+							  glc);
 		} else {
+			const char *load_name;
+			LLVMTypeRef data_type;
 			switch (load_bytes) {
 			case 16:
 			case 12:
@@ -1668,36 +1683,26 @@ static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
 				rsrc,
 				vindex,
 				LLVMBuildAdd(ctx->ac.builder, offset, immoffset, ""),
-				ctx->ac.i1false,
+				glc,
 				ctx->ac.i1false,
 			};
-			results[idx] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
-			unsigned num_elems = ac_get_type_size(data_type) / elem_size_bytes;
-			LLVMTypeRef resTy = LLVMVectorType(LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size), num_elems);
-			results[idx] = LLVMBuildBitCast(ctx->ac.builder, results[idx], resTy, "");
-		}
-	}
-
-	assume(results[0]);
-	LLVMValueRef ret = results[0];
-	if (num_bytes > 16 || num_components == 3) {
-		LLVMValueRef masks[] = {
-		        LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
-		        LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
-		};
-
-		if (num_bytes > 16 && num_components == 3) {
-			/* we end up with a v2i64 and i64 but shuffle fails on that */
-			results[1] = ac_build_expand(&ctx->ac, results[1], 1, 2);
+			ret = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
 		}

-		LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
-		ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
-					     results[num_bytes > 16 ? 1 : 0], swizzle, "");
+		LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
+		ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
+		ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
+
+		LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
+		ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
+
+		for (unsigned j = 0; j < num_elems; j++) {
+			results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
+		}
+		i += num_elems;
 	}

-	return LLVMBuildBitCast(ctx->ac.builder, ret,
-	                        get_def_type(ctx, &instr->dest.ssa), "");
+	return ac_build_gather_values(&ctx->ac, results, num_components);
 }

 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
@@ -1722,7 +1727,8 @@ static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
 								 ctx->ac.i32_0,
 								 offset,
 								 ctx->ac.i32_0,
-								 LLVMConstInt(ctx->ac.i32, 2 * i, 0));
+								 LLVMConstInt(ctx->ac.i32, 2 * i, 0),
+								 ctx->ac.i1false);
 		}
 		ret = ac_build_gather_values(&ctx->ac, results, num_components);
 	} else {
@@ -2066,7 +2072,7 @@ visit_store_var(struct ac_nir_context *ctx,
 		int writemask = instr->const_index[0];
 		LLVMValueRef address = get_src(ctx, instr->src[0]);
 		LLVMValueRef val = get_src(ctx, instr->src[1]);
-		if (util_is_power_of_two_nonzero(writemask)) {
+		if (writemask == (1u << ac_get_llvm_num_components(val)) - 1) {
 			val = LLVMBuildBitCast(
 			   ctx->ac.builder, val,
 			   LLVMGetElementType(LLVMTypeOf(address)), "");
@@ -2252,7 +2258,8 @@ static void get_image_coords(struct ac_nir_context *ctx,
 							       fmask_load_address[1],
 							       fmask_load_address[2],
 							       sample_index,
-							       get_image_descriptor(ctx, instr, AC_DESC_FMASK, false));
+							       get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
+										AC_DESC_FMASK, NULL, false, false));
 	}
 	if (count == 1 && !gfx9_1d) {
 		if (instr->src[1].ssa->num_components)
@@ -2350,7 +2357,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 					    glsl_sampler_type_is_array(type));
 		args.dmask = 15;
 		args.attributes = AC_FUNC_ATTR_READONLY;
-		if (var->data.image._volatile || var->data.image.coherent)
+		if (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT))
 			args.cache_policy |= ac_glc;

 		res = ac_build_image_opcode(&ctx->ac, &args);
@@ -2371,17 +2378,27 @@ static void visit_image_store(struct ac_nir_context *ctx,
 		glc = ctx->ac.i1true;

 	if (dim == GLSL_SAMPLER_DIM_BUF) {
+		char name[48];
+		const char *types[] = { "f32", "v2f32", "v4f32" };
 		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
+		LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
+		unsigned src_channels = ac_get_llvm_num_components(src);

-		params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3])); /* data */
+		if (src_channels == 3)
+			src = ac_build_expand(&ctx->ac, src, 3, 4);
+
+		params[0] = src; /* data */
 		params[1] = rsrc;
 		params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 						    ctx->ac.i32_0, ""); /* vindex */
 		params[3] = ctx->ac.i32_0; /* voffset */
+		snprintf(name, sizeof(name), "%s.%s",
+		                            "llvm.amdgcn.buffer.store.format",
+		         types[CLAMP(src_channels, 1, 3) - 1]);
+
 		params[4] = glc;  /* glc */
 		params[5] = ctx->ac.i1false;  /* slc */
-		ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
-				   params, 6, 0);
+		ac_build_intrinsic(&ctx->ac, name, ctx->ac.voidt, params, 6, 0);
 	} else {
 		struct ac_image_args args = {};
 		args.opcode = ac_image_store;
@@ -2391,7 +2408,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
 		args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
 					    glsl_sampler_type_is_array(type));
 		args.dmask = 15;
-		if (force_glc || var->data.image._volatile || var->data.image.coherent)
+		if (force_glc || (var->data.image.access & (ACCESS_VOLATILE | ACCESS_COHERENT)))
 			args.cache_policy |= ac_glc;

 		ac_build_image_opcode(&ctx->ac, &args);
@@ -2545,10 +2562,6 @@ static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
 	return res;
 }

-#define NOOP_WAITCNT 0xf7f
-#define LGKM_CNT 0x07f
-#define VM_CNT 0xf70
-
 static void emit_membar(struct ac_llvm_context *ac,
 			const nir_intrinsic_instr *instr)
 {
@@ -2584,8 +2597,7 @@ void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
 		ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
 		return;
 	}
-	ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier",
-			   ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
+	ac_build_s_barrier(ac);
 }

 static void emit_discard(struct ac_nir_context *ctx,
@@ -2599,7 +2611,7 @@ static void emit_discard(struct ac_nir_context *ctx,
 				     ctx->ac.i32_0, "");
 	} else {
 		assert(instr->intrinsic == nir_intrinsic_discard);
-		cond = LLVMConstInt(ctx->ac.i1, false, 0);
+		cond = ctx->ac.i1false;
 	}

 	ctx->abi->emit_kill(ctx->abi, cond);
@@ -2657,7 +2669,7 @@ visit_first_invocation(struct ac_nir_context *ctx)
 	LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);

 	/* The second argument is whether cttz(0) should be defined, but we do not care. */
-	LLVMValueRef args[] = {active_set, LLVMConstInt(ctx->ac.i1, 0, false)};
+	LLVMValueRef args[] = {active_set, ctx->ac.i1false};
 	LLVMValueRef result =  ac_build_intrinsic(&ctx->ac,
 	                                          "llvm.cttz.i64",
 	                                          ctx->ac.i64, args, 2,
@@ -2790,15 +2802,16 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
 				 const nir_intrinsic_instr *instr)
 {
 	LLVMValueRef result[4];
-	LLVMValueRef interp_param, attr_number;
+	LLVMValueRef interp_param;
 	unsigned location;
 	unsigned chan;
 	LLVMValueRef src_c0 = NULL;
 	LLVMValueRef src_c1 = NULL;
 	LLVMValueRef src0 = NULL;

-	nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
-	int input_index = var->data.location - VARYING_SLOT_VAR0;
+	nir_deref_instr *deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+	nir_variable *var = nir_deref_instr_get_variable(deref_instr);
+	int input_base = ctx->abi->fs_input_attr_indices[var->data.location - VARYING_SLOT_VAR0];
 	switch (instr->intrinsic) {
 	case nir_intrinsic_interp_deref_at_centroid:
 		location = INTERP_CENTROID;
@@ -2828,7 +2841,6 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
 		src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
 	}
 	interp_param = ctx->abi->lookup_interp_param(ctx->abi, var->data.interpolation, location);
-	attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);

 	if (location == INTERP_CENTER) {
 		LLVMValueRef ij_out[2];
@@ -2856,11 +2868,8 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
 			interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
 						     ctx->ac.f32, "");

-			temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, src_c0, "");
-			temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, "");
-
-			temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, src_c1, "");
-			temp2 = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, "");
+			temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
+			temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);

 			ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
 						     temp2, ctx->ac.i32, "");
@@ -2869,26 +2878,65 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,

 	}

+	LLVMValueRef array_idx = ctx->ac.i32_0;
+	while(deref_instr->deref_type != nir_deref_type_var) {
+		if (deref_instr->deref_type == nir_deref_type_array) {
+			unsigned array_size = glsl_get_aoa_size(deref_instr->type);
+			if (!array_size)
+				array_size = 1;
+
+			LLVMValueRef offset;
+			nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index);
+			if (const_value) {
+				offset = LLVMConstInt(ctx->ac.i32, array_size * const_value->u32[0], false);
+			} else {
+				LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
+
+				offset = LLVMBuildMul(ctx->ac.builder, indirect,
+						      LLVMConstInt(ctx->ac.i32, array_size, false), "");
+			}
+
+			array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, offset, "");
+			deref_instr = nir_src_as_deref(deref_instr->parent);
+		} else {
+			unreachable("Unsupported deref type");
+		}
+
+	}
+
+	unsigned input_array_size = glsl_get_aoa_size(var->type);
+	if (!input_array_size)
+		input_array_size = 1;
+
 	for (chan = 0; chan < 4; chan++) {
+		LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, input_array_size));
 		LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);

-		if (interp_param) {
-			interp_param = LLVMBuildBitCast(ctx->ac.builder,
-							interp_param, ctx->ac.v2f32, "");
-			LLVMValueRef i = LLVMBuildExtractElement(
-				ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
-			LLVMValueRef j = LLVMBuildExtractElement(
-				ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+		for (unsigned idx = 0; idx < input_array_size; ++idx) {
+			LLVMValueRef v, attr_number;

-			result[chan] = ac_build_fs_interp(&ctx->ac,
-							  llvm_chan, attr_number,
-							  ctx->abi->prim_mask, i, j);
-		} else {
-			result[chan] = ac_build_fs_interp_mov(&ctx->ac,
-							      LLVMConstInt(ctx->ac.i32, 2, false),
-							      llvm_chan, attr_number,
-							      ctx->abi->prim_mask);
+			attr_number = LLVMConstInt(ctx->ac.i32, input_base + idx, false);
+			if (interp_param) {
+				interp_param = LLVMBuildBitCast(ctx->ac.builder,
+							interp_param, ctx->ac.v2f32, "");
+				LLVMValueRef i = LLVMBuildExtractElement(
+					ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
+				LLVMValueRef j = LLVMBuildExtractElement(
+					ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
+
+				v = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
+						       ctx->abi->prim_mask, i, j);
+			} else {
+				v = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false),
+							   llvm_chan, attr_number, ctx->abi->prim_mask);
+			}
+
+			gather = LLVMBuildInsertElement(ctx->ac.builder, gather, v,
+							LLVMConstInt(ctx->ac.i32, idx, false), "");
 		}
+
+		result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, array_idx, "");
+
 	}
 	return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
 					      var->data.location_frac);
@@ -2984,7 +3032,8 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 			ctx->abi->frag_pos[2],
 			ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
 		};
-		result = ac_build_gather_values(&ctx->ac, values, 4);
+		result = ac_to_integer(&ctx->ac,
+		                       ac_build_gather_values(&ctx->ac, values, 4));
 		break;
 	}
 	case nir_intrinsic_load_front_face:
@@ -3331,7 +3380,7 @@ static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
 				      LLVMValueRef coord)
 {
 	coord = ac_to_float(ctx, coord);
-	coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
+	coord = ac_build_round(ctx, coord);
 	coord = ac_to_integer(ctx, coord);
 	return coord;
 }
@@ -3451,7 +3500,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
 	 * It's unnecessary if the original texture format was
 	 * Z32_FLOAT, but we don't know that here.
 	 */
-	if (args.compare && ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
+	if (args.compare && ctx->ac.chip_class >= VI && ctx->abi->clamp_shadow_reference)
 		args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));

 	/* pack derivatives */
@@ -3643,7 +3692,6 @@ static void visit_post_phi(struct ac_nir_context *ctx,

 static void phi_post_pass(struct ac_nir_context *ctx)
 {
-	struct hash_entry *entry;
 	hash_table_foreach(ctx->phis, entry) {
 		visit_post_phi(ctx, (nir_phi_instr*)entry->key,
 		               (LLVMValueRef)entry->data);
@@ -3843,7 +3891,7 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
 		}
 	}

-	bool is_16bit = glsl_type_is_16bit(variable->type);
+	bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
 	LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
 	for (unsigned i = 0; i < attrib_count; ++i) {
 		for (unsigned chan = 0; chan < 4; chan++) {
@@ -3948,7 +3996,7 @@ setup_shared(struct ac_nir_context *ctx,
 			LLVMAddGlobalInAddressSpace(
 			   ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type),
 			   variable->name ? variable->name : "",
-			   AC_LOCAL_ADDR_SPACE);
+			   AC_ADDR_SPACE_LDS);
 		_mesa_hash_table_insert(ctx->vars, variable, shared);
 	}
 }
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -77,6 +77,9 @@ struct ac_shader_abi {
 	 */
 	LLVMValueRef *inputs;

+	/* Varying -> attribute number mapping. Also NIR-only */
+	unsigned fs_input_attr_indices[MAX_VARYING];
+
 	void (*emit_outputs)(struct ac_shader_abi *abi,
 			     unsigned max_outputs,
 			     LLVMValueRef *addrs);
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -151,6 +151,10 @@ static void addrlib_family_rev_id(enum radeon_family family,
 		*addrlib_family = FAMILY_RV;
 		*addrlib_revid = get_first(AMDGPU_RAVEN_RANGE);
 		break;
+	case CHIP_RAVEN2:
+		*addrlib_family = FAMILY_RV;
+		*addrlib_revid = get_first(AMDGPU_RAVEN2_RANGE);
+		break;
 	default:
 		fprintf(stderr, "amdgpu: Unknown family.\n");
 	}
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -97,6 +97,7 @@ enum radeon_family {
    CHIP_VEGA12,
    CHIP_VEGA20,
    CHIP_RAVEN,
+    CHIP_RAVEN2,
    CHIP_LAST,
 };

--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -4457,6 +4457,9 @@
 #define   S_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((unsigned)(x) & 0x1F) << 2)
 #define   G_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) >> 2) & 0x1F)
 #define   C_028424_OVERWRITE_COMBINER_WATERMARK                       0xFFFFFF83
+#define   S_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((unsigned)(x) & 0x1) << 10) /* Raven2+ */
+#define   G_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((x) >> 10) & 0x1)
+#define   C_028424_DISABLE_CONSTANT_ENCODE_REG                        0xFFFFFBFF
 #define R_02842C_DB_STENCIL_CONTROL                                     0x02842C
 #define   S_02842C_STENCILFAIL(x)                                     (((unsigned)(x) & 0x0F) << 0)
 #define   G_02842C_STENCILFAIL(x)                                     (((x) >> 0) & 0x0F)
--- a/src/amd/common/meson.build
+++ b/src/amd/common/meson.build
@@ -22,7 +22,7 @@ sid_tables_h = custom_target(
  'sid_tables_h',
  input : ['sid_tables.py', 'sid.h', 'gfx9d.h'],
  output : 'sid_tables.h',
-  command : [prog_python2, '@INPUT@'],
+  command : [prog_python, '@INPUT@'],
  capture : true,
 )

--- a/src/amd/common/sid.h
+++ b/src/amd/common/sid.h
@@ -119,6 +119,7 @@
 #define			STRMOUT_OFFSET_FROM_VGT_FILLED_SIZE	1
 #define			STRMOUT_OFFSET_FROM_MEM			2
 #define			STRMOUT_OFFSET_NONE			3
+#define		STRMOUT_DATA_TYPE(x)		(((unsigned)(x) & 0x1) << 7)
 #define		STRMOUT_SELECT_BUFFER(x)	(((unsigned)(x) & 0x3) << 8)
 #define PKT3_DRAW_INDEX_OFFSET_2               0x35
 #define PKT3_WRITE_DATA                        0x37
@@ -144,7 +145,10 @@
 #define PKT3_MPEG_INDEX                        0x3A /* not on CIK */
 #define PKT3_WAIT_REG_MEM                      0x3C
 #define		WAIT_REG_MEM_EQUAL		3
+#define		WAIT_REG_MEM_NOT_EQUAL		4
+#define		WAIT_REG_MEM_GREATER_OR_EQUAL   5
 #define         WAIT_REG_MEM_MEM_SPACE(x)       (((unsigned)(x) & 0x3) << 4)
+#define         WAIT_REG_MEM_PFP		(1 << 8)
 #define PKT3_MEM_WRITE                         0x3D /* not on CIK */
 #define PKT3_INDIRECT_BUFFER_CIK               0x3F /* new on CIK */
 #define   R_3F0_IB_BASE_LO                     0x3F0
@@ -159,20 +163,30 @@
 #define PKT3_COPY_DATA			       0x40
 #define		COPY_DATA_SRC_SEL(x)		((x) & 0xf)
 #define			COPY_DATA_REG		0
-#define			COPY_DATA_MEM		1
+#define			COPY_DATA_SRC_MEM	1 /* only valid as source */
+#define                 COPY_DATA_TC_L2         2
+#define                 COPY_DATA_GDS           3
 #define                 COPY_DATA_PERF          4
 #define                 COPY_DATA_IMM           5
 #define                 COPY_DATA_TIMESTAMP     9
 #define		COPY_DATA_DST_SEL(x)		(((unsigned)(x) & 0xf) << 8)
-#define                 COPY_DATA_MEM_ASYNC     5
+#define                 COPY_DATA_DST_MEM_GRBM	1 /* sync across GRBM, deprecated */
+#define                 COPY_DATA_TC_L2         2
+#define                 COPY_DATA_GDS           3
+#define                 COPY_DATA_PERF          4
+#define                 COPY_DATA_DST_MEM       5
 #define		COPY_DATA_COUNT_SEL		(1 << 16)
 #define		COPY_DATA_WR_CONFIRM		(1 << 20)
+#define		COPY_DATA_ENGINE_PFP		(1 << 30)
 #define PKT3_PFP_SYNC_ME		       0x42
 #define PKT3_SURFACE_SYNC                      0x43 /* deprecated on CIK, use ACQUIRE_MEM */
 #define PKT3_ME_INITIALIZE                     0x44 /* not on CIK */
 #define PKT3_COND_WRITE                        0x45
 #define PKT3_EVENT_WRITE                       0x46
 #define PKT3_EVENT_WRITE_EOP                   0x47 /* not on GFX9 */
+#define         EOP_DST_SEL(x)				((x) << 16)
+#define			EOP_DST_SEL_MEM			0
+#define			EOP_DST_SEL_TC_L2		1
 #define         EOP_INT_SEL(x)                          ((x) << 24)
 #define			EOP_INT_SEL_NONE			0
 #define			EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM	3
@@ -181,6 +195,8 @@
 #define			EOP_DATA_SEL_VALUE_32BIT	1
 #define			EOP_DATA_SEL_VALUE_64BIT	2
 #define			EOP_DATA_SEL_TIMESTAMP		3
+#define			EOP_DATA_SEL_GDS		5
+#define		EOP_DATA_GDS(dw_offset, num_dwords)	((dw_offset) | ((unsigned)(num_dwords) << 16))
 /* CP DMA bug: Any use of CP_DMA.DST_SEL=TC must be avoided when EOS packets
 * are used. Use DST_SEL=MC instead. For prefetch, use SRC_SEL=TC and
 * DST_SEL=MC. Only CIK chips are affected.
@@ -294,11 +310,13 @@
 #define       V_500_GDS			1 /* program SAS to 1 as well */
 #define       V_500_DATA		2
 #define       V_500_SRC_ADDR_TC_L2	3 /* new for CIK */
+#define     S_500_DST_CACHE_POLICY(x)	(((unsigned)(x) & 0x3) << 25) /* CIK+ */
 #define     S_500_DST_SEL(x)		(((unsigned)(x) & 0x3) << 20)
 #define       V_500_DST_ADDR		0
 #define       V_500_GDS			1 /* program DAS to 1 as well */
 #define       V_500_NOWHERE		2 /* new for GFX9 */
 #define       V_500_DST_ADDR_TC_L2	3 /* new for CIK */
+#define     S_500_SRC_CACHE_POLICY(x)	(((unsigned)(x) & 0x3) << 13) /* CIK+ */
 #define     S_500_ENGINE(x)		((x) & 0x1)
 #define       V_500_ME			0
 #define       V_500_PFP			1
@@ -5282,6 +5300,22 @@
 #define   S_02820C_CLIP_RULE(x)                                       (((unsigned)(x) & 0xFFFF) << 0)
 #define   G_02820C_CLIP_RULE(x)                                       (((x) >> 0) & 0xFFFF)
 #define   C_02820C_CLIP_RULE                                          0xFFFF0000
+#define     V_02820C_OUT                                            0x0001
+#define     V_02820C_IN_0                                           0x0002
+#define     V_02820C_IN_1                                           0x0004
+#define     V_02820C_IN_10                                          0x0008
+#define     V_02820C_IN_2                                           0x0010
+#define     V_02820C_IN_20                                          0x0020
+#define     V_02820C_IN_21                                          0x0040
+#define     V_02820C_IN_210                                         0x0080
+#define     V_02820C_IN_3                                           0x0100
+#define     V_02820C_IN_30                                          0x0200
+#define     V_02820C_IN_31                                          0x0400
+#define     V_02820C_IN_310                                         0x0800
+#define     V_02820C_IN_32                                          0x1000
+#define     V_02820C_IN_320                                         0x2000
+#define     V_02820C_IN_321                                         0x4000
+#define     V_02820C_IN_3210                                        0x8000
 #define R_028210_PA_SC_CLIPRECT_0_TL                                    0x028210
 #define   S_028210_TL_X(x)                                            (((unsigned)(x) & 0x7FFF) << 0)
 #define   G_028210_TL_X(x)                                            (((x) >> 0) & 0x7FFF)
@@ -9122,6 +9156,10 @@
 #define    CIK_SDMA_PACKET_TRAP                    0x6
 #define    CIK_SDMA_PACKET_SEMAPHORE               0x7
 #define    CIK_SDMA_PACKET_CONSTANT_FILL           0xb
+#define    CIK_SDMA_OPCODE_TIMESTAMP               0xd
+#define        SDMA_TS_SUB_OPCODE_SET_LOCAL_TIMESTAMP     0x0
+#define        SDMA_TS_SUB_OPCODE_GET_LOCAL_TIMESTAMP     0x1
+#define        SDMA_TS_SUB_OPCODE_GET_GLOBAL_TIMESTAMP    0x2
 #define    CIK_SDMA_PACKET_SRBM_WRITE              0xe
 /* There is apparently an undocumented HW "feature" that
   prevents the HW from copying past 256 bytes of (1 << 22) */
--- a/src/amd/vulkan/Android.mk
+++ b/src/amd/vulkan/Android.mk
@@ -74,7 +74,8 @@ LOCAL_C_INCLUDES := \
 	$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util

 LOCAL_WHOLE_STATIC_LIBRARIES := \
-	libmesa_vulkan_util
+	libmesa_vulkan_util \
+	libmesa_git_sha1

 LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.c
 LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.h
--- a/src/amd/vulkan/Makefile.am
+++ b/src/amd/vulkan/Makefile.am
@@ -140,7 +140,7 @@ vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml

 radv_entrypoints.c: radv_entrypoints_gen.py radv_extensions.py $(vulkan_api_xml)
 	$(MKDIR_GEN)
-	$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py \
+	$(AM_V_GEN)$(PYTHON) $(srcdir)/radv_entrypoints_gen.py \
 		--xml $(vulkan_api_xml) \
 		--outdir $(builddir)
 radv_entrypoints.h: radv_entrypoints.c
@@ -148,7 +148,7 @@ radv_entrypoints.h: radv_entrypoints.c
 radv_extensions.c: radv_extensions.py \
 		 $(vulkan_api_xml)
 	$(MKDIR_GEN)
-	$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_extensions.py \
+	$(AM_V_GEN)$(PYTHON) $(srcdir)/radv_extensions.py \
 		--xml $(vulkan_api_xml) \
 		--out-c radv_extensions.c \
 		--out-h radv_extensions.h
@@ -157,7 +157,7 @@ radv_extensions.h: radv_extensions.c
 vk_format_table.c: vk_format_table.py \
 		   vk_format_parse.py \
                   vk_format_layout.csv
-	$(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@
+	$(PYTHON) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@

 BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json
@@ -190,11 +190,11 @@ icdconf_DATA = radeon_icd.@host_cpu@.json
 noinst_DATA = dev_icd.json

 dev_icd.json : radv_extensions.py radv_icd.py
-	$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_icd.py \
+	$(AM_V_GEN)$(PYTHON) $(srcdir)/radv_icd.py \
 		--lib-path="${abs_top_builddir}/${LIB_DIR}" --out $@

 radeon_icd.@host_cpu@.json : radv_extensions.py radv_icd.py
-	$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_icd.py \
+	$(AM_V_GEN)$(PYTHON) $(srcdir)/radv_icd.py \
 		--lib-path="${libdir}" --out $@

 include $(top_srcdir)/install-lib-links.mk
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -23,7 +23,7 @@ radv_entrypoints = custom_target(
  input : ['radv_entrypoints_gen.py', vk_api_xml],
  output : ['radv_entrypoints.h', 'radv_entrypoints.c'],
  command : [
-    prog_python2, '@INPUT0@', '--xml', '@INPUT1@', '--outdir',
+    prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--outdir',
    meson.current_build_dir()
  ],
  depend_files : files('radv_extensions.py'),
@@ -34,7 +34,7 @@ radv_extensions_c = custom_target(
  input : ['radv_extensions.py', vk_api_xml],
  output : ['radv_extensions.c', 'radv_extensions.h'],
  command : [
-    prog_python2, '@INPUT0@', '--xml', '@INPUT1@', '--out-c', '@OUTPUT0@',
+    prog_python, '@INPUT0@', '--xml', '@INPUT1@', '--out-c', '@OUTPUT0@',
    '--out-h', '@OUTPUT1@'
  ],
 )
@@ -43,7 +43,7 @@ vk_format_table_c = custom_target(
  'vk_format_table.c',
  input : ['vk_format_table.py', 'vk_format_layout.csv'],
  output : 'vk_format_table.c',
-  command : [prog_python2, '@INPUT@'],
+  command : [prog_python, '@INPUT@'],
  depend_files : files('vk_format_parse.py'),
  capture : true,
 )
@@ -129,7 +129,7 @@ endif

 libvulkan_radeon = shared_library(
  'vulkan_radeon',
-  [libradv_files, radv_entrypoints, radv_extensions_c, vk_format_table_c],
+  [libradv_files, radv_entrypoints, radv_extensions_c, vk_format_table_c, sha1_h],
  include_directories : [
    inc_common, inc_amd, inc_amd_common, inc_compiler, inc_vulkan_util,
    inc_vulkan_wsi,
@@ -154,7 +154,7 @@ radeon_icd = custom_target(
  input : 'radv_icd.py',
  output : 'radeon_icd.@0@.json'.format(host_machine.cpu()),
  command : [
-    prog_python2, '@INPUT@',
+    prog_python, '@INPUT@',
    '--lib-path', join_paths(get_option('prefix'), get_option('libdir')),
    '--out', '@OUTPUT@',
  ],
@@ -169,7 +169,7 @@ radv_dev_icd = custom_target(
  input : 'radv_icd.py',
  output : 'dev_icd.json',
  command : [
-    prog_python2, '@INPUT@', '--lib-path', meson.current_build_dir(),
+    prog_python, '@INPUT@', '--lib-path', meson.current_build_dir(),
    '--out', '@OUTPUT@'
  ],
  depend_files : files('radv_extensions.py'),
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -196,6 +196,23 @@ radv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer,
 	cmd_buffer->state.dirty |= dest_mask;
 }

+static void
+radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer,
+			  struct radv_pipeline *pipeline)
+{
+	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+	struct radv_shader_info *info;
+
+	if (!pipeline->streamout_shader)
+		return;
+
+	info = &pipeline->streamout_shader->info.info;
+	for (int i = 0; i < MAX_SO_BUFFERS; i++)
+		so->stride_in_dw[i] = info->so.strides[i];
+
+	so->enabled_stream_buffers_mask = info->so.enabled_stream_buffers_mask;
+}
+
 bool radv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
 {
 	return cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE &&
@@ -321,13 +338,13 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 		unsigned eop_bug_offset;
 		void *fence_ptr;

-		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0,
+		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8,
 					     &cmd_buffer->gfx9_fence_offset,
 					     &fence_ptr);
 		cmd_buffer->gfx9_fence_bo = cmd_buffer->upload.upload_bo;

 		/* Allocate a buffer for the EOP bug on GFX9. */
-		radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
+		radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
 					     &eop_bug_offset, &fence_ptr);
 		cmd_buffer->gfx9_eop_bug_va =
 			radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -397,6 +414,8 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
 			     unsigned *out_offset,
 			     void **ptr)
 {
+	assert(util_is_power_of_two_nonzero(alignment));
+
 	uint64_t offset = align(cmd_buffer->upload.offset, alignment);
 	if (offset + size > cmd_buffer->upload.size) {
 		if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
@@ -1325,7 +1344,7 @@ radv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 		++reg_count;

 	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 			COPY_DATA_DST_SEL(COPY_DATA_REG) |
 			(reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
 	radeon_emit(cs, va);
@@ -1455,7 +1474,7 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;

 	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 			COPY_DATA_DST_SEL(COPY_DATA_REG) |
 			COPY_DATA_COUNT_SEL);
 	radeon_emit(cs, va);
@@ -1903,10 +1922,96 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
 	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
 }

+static void
+radv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
+{
+	struct radv_pipeline *pipeline = cmd_buffer->state.pipeline;
+	struct radv_userdata_info *loc;
+	uint32_t base_reg;
+
+	for (unsigned stage = 0; stage < MESA_SHADER_STAGES; ++stage) {
+		if (!radv_get_shader(pipeline, stage))
+			continue;
+
+		loc = radv_lookup_user_sgpr(pipeline, stage,
+					    AC_UD_STREAMOUT_BUFFERS);
+		if (loc->sgpr_idx == -1)
+			continue;
+
+		base_reg = pipeline->user_data_0[stage];
+
+		radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
+					 base_reg + loc->sgpr_idx * 4, va, false);
+	}
+
+	if (pipeline->gs_copy_shader) {
+		loc = &pipeline->gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
+		if (loc->sgpr_idx != -1) {
+			base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
+
+			radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
+						 base_reg + loc->sgpr_idx * 4, va, false);
+		}
+	}
+}
+
+static void
+radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
+{
+	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) {
+		struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+		struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+		unsigned so_offset;
+		void *so_ptr;
+		uint64_t va;
+
+		/* Allocate some descriptor state for streamout buffers. */
+		if (!radv_cmd_buffer_upload_alloc(cmd_buffer,
+						  MAX_SO_BUFFERS * 16, 256,
+						  &so_offset, &so_ptr))
+			return;
+
+		for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) {
+			struct radv_buffer *buffer = sb[i].buffer;
+			uint32_t *desc = &((uint32_t *)so_ptr)[i * 4];
+
+			if (!(so->enabled_mask & (1 << i)))
+				continue;
+
+			va = radv_buffer_get_va(buffer->bo) + buffer->offset;
+
+			va += sb[i].offset;
+
+			/* Set the descriptor.
+			 *
+			 * On VI, the format must be non-INVALID, otherwise
+			 * the buffer will be considered not bound and store
+			 * instructions will be no-ops.
+			 */
+			desc[0] = va;
+			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+			desc[2] = 0xffffffff;
+			desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+				  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+				  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+				  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+		}
+
+		va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+		va += so_offset;
+
+		radv_emit_streamout_buffers(cmd_buffer, va);
+	}
+
+	cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+}
+
 static void
 radv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
 {
 	radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty);
+	radv_flush_streamout_descriptors(cmd_buffer);
 	radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
 	radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
 }
@@ -1997,7 +2102,8 @@ static void radv_stage_flush(struct radv_cmd_buffer *cmd_buffer,
 	                             VK_PIPELINE_STAGE_VERTEX_SHADER_BIT |
 				     VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT |
 				     VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT |
-				     VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT)) {
+				     VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT |
+				     VK_PIPELINE_STAGE_TRANSFORM_FEEDBACK_BIT_EXT)) {
 		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
 	}
 }
@@ -2021,6 +2127,8 @@ radv_src_access_flush(struct radv_cmd_buffer *cmd_buffer,
 	for_each_bit(b, src_flags) {
 		switch ((VkAccessFlagBits)(1 << b)) {
 		case VK_ACCESS_SHADER_WRITE_BIT:
+		case VK_ACCESS_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
+		case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
 			flush_bits |= RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
 			break;
 		case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:
@@ -2058,6 +2166,7 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
 	bool flush_CB_meta = true, flush_DB_meta = true;
 	enum radv_cmd_flush_bits flush_bits = 0;
 	bool flush_CB = true, flush_DB = true;
+	bool image_is_coherent = false;
 	uint32_t b;

 	if (image) {
@@ -2070,23 +2179,42 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
 			flush_CB_meta = false;
 		if (!radv_image_has_htile(image))
 			flush_DB_meta = false;
+
+		if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
+			if (image->info.samples == 1 &&
+			    (image->usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
+					     VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
+			    !vk_format_is_stencil(image->vk_format)) {
+				/* Single-sample color and single-sample depth
+				 * (not stencil) are coherent with shaders on
+				 * GFX9.
+				 */
+				image_is_coherent = true;
+			}
+		}
 	}

 	for_each_bit(b, dst_flags) {
 		switch ((VkAccessFlagBits)(1 << b)) {
 		case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
 		case VK_ACCESS_INDEX_READ_BIT:
+		case VK_ACCESS_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
 			break;
 		case VK_ACCESS_UNIFORM_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
 			break;
 		case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
-		case VK_ACCESS_SHADER_READ_BIT:
 		case VK_ACCESS_TRANSFER_READ_BIT:
 		case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 |
 			              RADV_CMD_FLAG_INV_GLOBAL_L2;
 			break;
+		case VK_ACCESS_SHADER_READ_BIT:
+			flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
+
+			if (!image_is_coherent)
+				flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
+			break;
 		case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
 			if (flush_CB)
 				flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
@@ -2307,20 +2435,6 @@ VkResult radv_ResetCommandBuffer(
 	return radv_reset_cmd_buffer(cmd_buffer);
 }

-static void emit_gfx_buffer_state(struct radv_cmd_buffer *cmd_buffer)
-{
-	struct radv_device *device = cmd_buffer->device;
-	if (device->gfx_init) {
-		uint64_t va = radv_buffer_get_va(device->gfx_init);
-		radv_cs_add_buffer(device->ws, cmd_buffer->cs, device->gfx_init);
-		radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
-		radeon_emit(cmd_buffer->cs, va);
-		radeon_emit(cmd_buffer->cs, va >> 32);
-		radeon_emit(cmd_buffer->cs, device->gfx_init_size_dw & 0xffff);
-	} else
-		si_init_config(cmd_buffer);
-}
-
 VkResult radv_BeginCommandBuffer(
 	VkCommandBuffer commandBuffer,
 	const VkCommandBufferBeginInfo *pBeginInfo)
@@ -2346,21 +2460,6 @@ VkResult radv_BeginCommandBuffer(
 	cmd_buffer->state.predication_type = -1;
 	cmd_buffer->usage_flags = pBeginInfo->flags;

-	/* setup initial configuration into command buffer */
-	if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
-		switch (cmd_buffer->queue_family_index) {
-		case RADV_QUEUE_GENERAL:
-			emit_gfx_buffer_state(cmd_buffer);
-			break;
-		case RADV_QUEUE_COMPUTE:
-			si_init_compute(cmd_buffer);
-			break;
-		case RADV_QUEUE_TRANSFER:
-		default:
-			break;
-		}
-	}
-
 	if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
 	    (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
 		assert(pBeginInfo->pInheritanceInfo);
@@ -2754,6 +2853,7 @@ void radv_CmdBindPipeline(
 		cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;

 		radv_bind_dynamic_state(cmd_buffer, &pipeline->dynamic_state);
+		radv_bind_streamout_state(cmd_buffer, pipeline);

 		if (pipeline->graphics.esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
 			cmd_buffer->esgs_ring_size_needed = pipeline->graphics.esgs_ring_size;
@@ -3176,12 +3276,13 @@ static void radv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned in

 static void
 radv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer,
-                         uint32_t vertex_count)
+                         uint32_t vertex_count,
+			 bool use_opaque)
 {
 	radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
 	radeon_emit(cmd_buffer->cs, vertex_count);
 	radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX |
-	                            S_0287F0_USE_OPAQUE(0));
+	                            S_0287F0_USE_OPAQUE(use_opaque));
 }

 static void
@@ -3285,6 +3386,12 @@ struct radv_draw_info {
 	 */
 	struct radv_buffer *count_buffer;
 	uint64_t count_buffer_offset;
+
+	/**
+	 * Stream output parameters resource.
+	 */
+	struct radv_buffer *strmout_buffer;
+	uint64_t strmout_buffer_offset;
 };

 static void
@@ -3295,6 +3402,27 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
 	struct radeon_winsys *ws = cmd_buffer->device->ws;
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;

+	if (info->strmout_buffer) {
+		uint64_t va = radv_buffer_get_va(info->strmout_buffer->bo);
+
+		va += info->strmout_buffer->offset +
+		      info->strmout_buffer_offset;
+
+		radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE,
+				       info->stride);
+
+		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
+		radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
+				COPY_DATA_DST_SEL(COPY_DATA_REG) |
+				COPY_DATA_WR_CONFIRM);
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
+		radeon_emit(cs, 0); /* unused */
+
+		radv_cs_add_buffer(ws, cs, info->strmout_buffer->bo);
+	}
+
 	if (info->indirect) {
 		uint64_t va = radv_buffer_get_va(info->indirect->bo);
 		uint64_t count_va = 0;
@@ -3379,14 +3507,17 @@ radv_emit_draw_packets(struct radv_cmd_buffer *cmd_buffer,
 			}
 		} else {
 			if (!state->subpass->view_mask) {
-				radv_cs_emit_draw_packet(cmd_buffer, info->count);
+				radv_cs_emit_draw_packet(cmd_buffer,
+							 info->count,
+							 !!info->strmout_buffer);
 			} else {
 				unsigned i;
 				for_each_bit(i, state->subpass->view_mask) {
 					radv_emit_view_index(cmd_buffer, i);

 					radv_cs_emit_draw_packet(cmd_buffer,
-								 info->count);
+								 info->count,
+								 !!info->strmout_buffer);
 				}
 			}
 		}
@@ -3419,8 +3550,13 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,

 	uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;

-	/* Index & Vertex buffer don't change context regs, and pipeline is handled later. */
-	used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_PIPELINE);
+	/* Index, vertex and streamout buffers don't change context regs, and
+	 * pipeline is handled later.
+	 */
+	used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
+			 RADV_CMD_DIRTY_VERTEX_BUFFER |
+			 RADV_CMD_DIRTY_STREAMOUT_BUFFER |
+			 RADV_CMD_DIRTY_PIPELINE);

 	/* Assume all state changes except  these two can imply context rolls. */
 	if (cmd_buffer->state.dirty & used_states)
@@ -3480,6 +3616,8 @@ static void
 radv_draw(struct radv_cmd_buffer *cmd_buffer,
 	  const struct radv_draw_info *info)
 {
+	struct radeon_info *rad_info =
+		&cmd_buffer->device->physical_device->rad_info;
 	bool has_prefetch =
 		cmd_buffer->device->physical_device->rad_info.chip_class >= CIK;
 	bool pipeline_is_dirty =
@@ -3549,6 +3687,16 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 		}
 	}

+	/* Workaround for a VGT hang when streamout is enabled.
+	 * It must be done after drawing.
+	 */
+	if (cmd_buffer->state.streamout.streamout_enabled &&
+	    (rad_info->family == CHIP_HAWAII ||
+	     rad_info->family == CHIP_TONGA ||
+	     rad_info->family == CHIP_FIJI)) {
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
+	}
+
 	assert(cmd_buffer->cs->cdw <= cdw_max);
 	radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
 }
@@ -3779,7 +3927,7 @@ radv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
 		if (loc->sgpr_idx != -1) {
 			for (unsigned i = 0; i < 3; ++i) {
 				radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_MEM) |
+				radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
 						COPY_DATA_DST_SEL(COPY_DATA_REG));
 				radeon_emit(cs, (va +  4 * i));
 				radeon_emit(cs, (va + 4 * i) >> 32);
@@ -4097,10 +4245,15 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
 	if (!radv_image_has_htile(image))
 		return;

-	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
-	           radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
-		/* TODO: merge with the clear if applicable */
-		radv_initialize_htile(cmd_buffer, image, range, 0);
+	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
+		uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
+
+		if (radv_layout_is_htile_compressed(image, dst_layout,
+						    dst_queue_mask)) {
+			clear_value = 0;
+		}
+
+		radv_initialize_htile(cmd_buffer, image, range, clear_value);
 	} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
 	           radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
 		uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
@@ -4511,6 +4664,8 @@ void radv_CmdBeginConditionalRenderingEXT(
 		draw_visible = false;
 	}

+	si_emit_cache_flush(cmd_buffer);
+
 	/* Enable predication for this command buffer. */
 	si_emit_set_predication_state(cmd_buffer, draw_visible, va);
 	cmd_buffer->state.predicating = true;
@@ -4533,3 +4688,233 @@ void radv_CmdEndConditionalRenderingEXT(
 	cmd_buffer->state.predication_type = -1;
 	cmd_buffer->state.predication_va = 0;
 }
+
+/* VK_EXT_transform_feedback */
+void radv_CmdBindTransformFeedbackBuffersEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstBinding,
+    uint32_t                                    bindingCount,
+    const VkBuffer*                             pBuffers,
+    const VkDeviceSize*                         pOffsets,
+    const VkDeviceSize*                         pSizes)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+	uint8_t enabled_mask = 0;
+
+	assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
+	for (uint32_t i = 0; i < bindingCount; i++) {
+		uint32_t idx = firstBinding + i;
+
+		sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
+		sb[idx].offset = pOffsets[i];
+		sb[idx].size = pSizes[i];
+
+		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
+				   sb[idx].buffer->bo);
+
+		enabled_mask |= 1 << idx;
+	}
+
+	cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
+
+	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
+}
+
+static void
+radv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+	radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
+	radeon_emit(cs,
+		    S_028B94_STREAMOUT_0_EN(so->streamout_enabled) |
+		    S_028B94_RAST_STREAM(0) |
+		    S_028B94_STREAMOUT_1_EN(so->streamout_enabled) |
+		    S_028B94_STREAMOUT_2_EN(so->streamout_enabled) |
+		    S_028B94_STREAMOUT_3_EN(so->streamout_enabled));
+	radeon_emit(cs, so->hw_enabled_mask &
+			so->enabled_stream_buffers_mask);
+}
+
+static void
+radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
+{
+	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+	bool old_streamout_enabled = so->streamout_enabled;
+	uint32_t old_hw_enabled_mask = so->hw_enabled_mask;
+
+	so->streamout_enabled = enable;
+
+	so->hw_enabled_mask = so->enabled_mask |
+			      (so->enabled_mask << 4) |
+			      (so->enabled_mask << 8) |
+			      (so->enabled_mask << 12);
+
+	if ((old_streamout_enabled != so->streamout_enabled) ||
+	    (old_hw_enabled_mask != so->hw_enabled_mask))
+		radv_emit_streamout_enable(cmd_buffer);
+}
+
+static void radv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	unsigned reg_strmout_cntl;
+
+	/* The register is at different places on different ASICs. */
+	if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
+		reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
+		radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
+	} else {
+		reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
+		radeon_set_config_reg(cs, reg_strmout_cntl, 0);
+	}
+
+	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
+
+	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+	radeon_emit(cs, WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
+	radeon_emit(cs, reg_strmout_cntl >> 2);  /* register */
+	radeon_emit(cs, 0);
+	radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
+	radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
+	radeon_emit(cs, 4); /* poll interval */
+}
+
+void radv_CmdBeginTransformFeedbackEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstCounterBuffer,
+    uint32_t                                    counterBufferCount,
+    const VkBuffer*                             pCounterBuffers,
+    const VkDeviceSize*                         pCounterBufferOffsets)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
+	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	uint32_t i;
+
+	radv_flush_vgt_streamout(cmd_buffer);
+
+	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+	for_each_bit(i, so->enabled_mask) {
+		int32_t counter_buffer_idx = i - firstCounterBuffer;
+		if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+			counter_buffer_idx = -1;
+
+		/* SI binds streamout buffers as shader resources.
+		 * VGT only counts primitives and tells the shader through
+		 * SGPRs what to do.
+		 */
+		radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
+		radeon_emit(cs, sb[i].size >> 2);	/* BUFFER_SIZE (in DW) */
+		radeon_emit(cs, so->stride_in_dw[i]);			/* VTX_STRIDE (in DW) */
+
+		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+			/* The array of counter buffers is optional. */
+			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+			uint64_t va = radv_buffer_get_va(buffer->bo);
+
+			va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];
+
+			/* Append */
+			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+			radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+					STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+					STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
+			radeon_emit(cs, 0); /* unused */
+			radeon_emit(cs, 0); /* unused */
+			radeon_emit(cs, va); /* src address lo */
+			radeon_emit(cs, va >> 32); /* src address hi */
+
+			radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+		} else {
+			/* Start from the beginning. */
+			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+			radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+					STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+					STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
+			radeon_emit(cs, 0); /* unused */
+			radeon_emit(cs, 0); /* unused */
+			radeon_emit(cs, 0); /* unused */
+			radeon_emit(cs, 0); /* unused */
+		}
+	}
+
+	radv_set_streamout_enable(cmd_buffer, true);
+}
+
+void radv_CmdEndTransformFeedbackEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    firstCounterBuffer,
+    uint32_t                                    counterBufferCount,
+    const VkBuffer*                             pCounterBuffers,
+    const VkDeviceSize*                         pCounterBufferOffsets)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
+	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	uint32_t i;
+
+	radv_flush_vgt_streamout(cmd_buffer);
+
+	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
+	for_each_bit(i, so->enabled_mask) {
+		int32_t counter_buffer_idx = i - firstCounterBuffer;
+		if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
+			counter_buffer_idx = -1;
+
+		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
+			/* The array of counters buffer is optional. */
+			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
+			uint64_t va = radv_buffer_get_va(buffer->bo);
+
+			va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];
+
+			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
+			radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
+					STRMOUT_DATA_TYPE(1) | /* offset in bytes */
+					STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
+					STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
+			radeon_emit(cs, va);		/* dst address lo */
+			radeon_emit(cs, va >> 32);	/* dst address hi */
+			radeon_emit(cs, 0);		/* unused */
+			radeon_emit(cs, 0);		/* unused */
+
+			radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
+		}
+
+		/* Deactivate transform feedback by zeroing the buffer size.
+		 * The counters (primitives generated, primitives emitted) may
+		 * be enabled even if there is not buffer bound. This ensures
+		 * that the primitives-emitted query won't increment.
+		 */
+		radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 0);
+	}
+
+	radv_set_streamout_enable(cmd_buffer, false);
+}
+
+void radv_CmdDrawIndirectByteCountEXT(
+    VkCommandBuffer                             commandBuffer,
+    uint32_t                                    instanceCount,
+    uint32_t                                    firstInstance,
+    VkBuffer                                    _counterBuffer,
+    VkDeviceSize                                counterBufferOffset,
+    uint32_t                                    counterOffset,
+    uint32_t                                    vertexStride)
+{
+	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
+	RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
+	struct radv_draw_info info = {};
+
+	info.instance_count = instanceCount;
+	info.first_instance = firstInstance;
+	info.strmout_buffer = counterBuffer;
+	info.strmout_buffer_offset = counterBufferOffset;
+	info.stride = vertexStride;
+
+	radv_draw(cmd_buffer, &info);
+}
--- a/src/amd/vulkan/radv_descriptor_set.c
+++ b/src/amd/vulkan/radv_descriptor_set.c
@@ -84,7 +84,9 @@ VkResult radv_CreateDescriptorSetLayout(
 	uint32_t immutable_sampler_count = 0;
 	for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
 		max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
-		if (pCreateInfo->pBindings[j].pImmutableSamplers)
+		if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+		     pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+		     pCreateInfo->pBindings[j].pImmutableSamplers)
 			immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
 	}

@@ -182,7 +184,9 @@ VkResult radv_CreateDescriptorSetLayout(
 			set_layout->has_variable_descriptors = true;
 		}

-		if (binding->pImmutableSamplers) {
+		if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
+		     binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
+		    binding->pImmutableSamplers) {
 			set_layout->binding[b].immutable_samplers_offset = samplers_offset;
 			set_layout->binding[b].immutable_samplers_equal =
 				has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
@@ -533,7 +537,7 @@ VkResult radv_CreateDescriptorPool(
 {
 	RADV_FROM_HANDLE(radv_device, device, _device);
 	struct radv_descriptor_pool *pool;
-	int size = sizeof(struct radv_descriptor_pool);
+	uint64_t size = sizeof(struct radv_descriptor_pool);
 	uint64_t bo_size = 0, bo_count = 0, range_count = 0;


@@ -961,9 +965,11 @@ void radv_update_descriptor_sets(
 			}
 			src_ptr += src_binding_layout->size / 4;
 			dst_ptr += dst_binding_layout->size / 4;
-			dst_buffer_list[j] = src_buffer_list[j];
-			++src_buffer_list;
-			++dst_buffer_list;
+
+			if (src_binding_layout->type != VK_DESCRIPTOR_TYPE_SAMPLER) {
+				/* Sampler descriptors don't have a buffer list. */
+				dst_buffer_list[j] = src_buffer_list[j];
+			}
 		}
 	}
 }
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -43,6 +43,7 @@
 #include "ac_llvm_util.h"
 #include "vk_format.h"
 #include "sid.h"
+#include "git_sha1.h"
 #include "gfx9d.h"
 #include "addrlib/gfx9/chip/gfx9_enum.h"
 #include "util/build_id.h"
@@ -112,6 +113,7 @@ radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
 	case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
 	case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
 	case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
+	case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
 	default: chip_string = "AMD RADV unknown"; break;
 	}

@@ -255,8 +257,6 @@ radv_physical_device_init(struct radv_physical_device *device,

 	if (strcmp(version->name, "amdgpu")) {
 		drmFreeVersion(version);
-		if (master_fd != -1)
-			close(master_fd);
 		close(fd);

 		if (instance->debug_flags & RADV_DEBUG_STARTUP)
@@ -329,7 +329,7 @@ radv_physical_device_init(struct radv_physical_device *device,
 	    device->rad_info.chip_class > GFX9)
 		fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");

-	radv_get_driver_uuid(&device->device_uuid);
+	radv_get_driver_uuid(&device->driver_uuid);
 	radv_get_device_uuid(&device->rad_info, &device->device_uuid);

 	if (device->rad_info.family == CHIP_STONEY ||
@@ -337,7 +337,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 		device->has_rbplus = true;
 		device->rbplus_allowed = device->rad_info.family == CHIP_STONEY ||
 					 device->rad_info.family == CHIP_VEGA12 ||
-		                         device->rad_info.family == CHIP_RAVEN;
+		                         device->rad_info.family == CHIP_RAVEN ||
+		                         device->rad_info.family == CHIP_RAVEN2;
 	}

 	/* The mere presence of CLEAR_STATE in the IB causes random GPU hangs
@@ -363,6 +364,15 @@ radv_physical_device_init(struct radv_physical_device *device,
 	radv_physical_device_init_mem_types(device);
 	radv_fill_device_extension_table(device, &device->supported_extensions);

+	device->bus_info = *drm_device->businfo.pci;
+
+	if ((device->instance->debug_flags & RADV_DEBUG_INFO))
+		ac_print_gpu_info(&device->rad_info);
+
+	/* The WSI is structured as a layer on top of the driver, so this has
+	 * to be the last part of initialization (at least until we get other
+	 * semi-layers).
+	 */
 	result = radv_init_wsi(device);
 	if (result != VK_SUCCESS) {
 		device->ws->destroy(device->ws);
@@ -370,9 +380,6 @@ radv_physical_device_init(struct radv_physical_device *device,
 		goto fail;
 	}

-	if ((device->instance->debug_flags & RADV_DEBUG_INFO))
-		ac_print_gpu_info(&device->rad_info);
-
 	return VK_SUCCESS;

 fail:
@@ -518,7 +525,7 @@ VkResult radv_CreateInstance(
 	    pCreateInfo->pApplicationInfo->apiVersion != 0) {
 		client_version = pCreateInfo->pApplicationInfo->apiVersion;
 	} else {
-		radv_EnumerateInstanceVersion(&client_version);
+		client_version = VK_API_VERSION_1_0;
 	}

 	instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
@@ -719,8 +726,7 @@ void radv_GetPhysicalDeviceFeatures(
 		.alphaToOne                               = true,
 		.multiViewport                            = true,
 		.samplerAnisotropy                        = true,
-		.textureCompressionETC2                   = pdevice->rad_info.chip_class >= GFX9 ||
-		                                            pdevice->rad_info.family == CHIP_STONEY,
+		.textureCompressionETC2                   = radv_device_supports_etc(pdevice),
 		.textureCompressionASTC_LDR               = false,
 		.textureCompressionBC                     = true,
 		.occlusionQueryPrecise                    = true,
@@ -741,7 +747,7 @@ void radv_GetPhysicalDeviceFeatures(
 		.shaderCullDistance                       = true,
 		.shaderFloat64                            = true,
 		.shaderInt64                              = true,
-		.shaderInt16                              = false,
+		.shaderInt16                              = pdevice->rad_info.chip_class >= GFX9 && HAVE_LLVM >= 0x700,
 		.sparseBinding                            = true,
 		.variableMultisampleRate                  = true,
 		.inheritedQueries                         = true,
@@ -787,7 +793,7 @@ void radv_GetPhysicalDeviceFeatures2(
 			features->storageBuffer16BitAccess = enabled;
 			features->uniformAndStorageBuffer16BitAccess = enabled;
 			features->storagePushConstant16 = enabled;
-			features->storageInputOutput16 = enabled;
+			features->storageInputOutput16 = enabled && HAVE_LLVM >= 0x900;
 			break;
 		}
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
@@ -835,6 +841,13 @@ void radv_GetPhysicalDeviceFeatures2(
 			features->vertexAttributeInstanceRateZeroDivisor = VK_TRUE;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
+			VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
+				(VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
+			features->transformFeedback = true;
+			features->geometryStreams = true;
+			break;
+		}
 		default:
 			break;
 		}
@@ -922,9 +935,9 @@ void radv_GetPhysicalDeviceProperties(
 			2048,
 			2048
 		},
-		.subPixelPrecisionBits                    = 4 /* FIXME */,
-		.subTexelPrecisionBits                    = 4 /* FIXME */,
-		.mipmapPrecisionBits                      = 4 /* FIXME */,
+		.subPixelPrecisionBits                    = 8,
+		.subTexelPrecisionBits                    = 8,
+		.mipmapPrecisionBits                      = 8,
 		.maxDrawIndexedIndexValue                 = UINT32_MAX,
 		.maxDrawIndirectCount                     = UINT32_MAX,
 		.maxSamplerLodBias                        = 16,
@@ -963,8 +976,8 @@ void radv_GetPhysicalDeviceProperties(
 		.maxClipDistances                         = 8,
 		.maxCullDistances                         = 8,
 		.maxCombinedClipAndCullDistances          = 8,
-		.discreteQueuePriorities                  = 1,
-		.pointSizeRange                           = { 0.125, 255.875 },
+		.discreteQueuePriorities                  = 2,
+		.pointSizeRange                           = { 0.0, 8192.0 },
 		.lineWidthRange                           = { 0.0, 7.9921875 },
 		.pointSizeGranularity                     = (1.0 / 8.0),
 		.lineWidthGranularity                     = (1.0 / 128.0),
@@ -1082,9 +1095,7 @@ void radv_GetPhysicalDeviceProperties2(
 			properties->shaderArraysPerEngineCount =
 				pdevice->rad_info.max_sh_per_se;
 			properties->computeUnitsPerShaderArray =
-				pdevice->rad_info.num_good_compute_units /
-					(pdevice->rad_info.max_se *
-					 pdevice->rad_info.max_sh_per_se);
+				pdevice->rad_info.num_good_cu_per_sh;
 			properties->simdPerComputeUnit = 4;
 			properties->wavefrontsPerSimd =
 				pdevice->rad_info.family == CHIP_TONGA ||
@@ -1154,6 +1165,73 @@ void radv_GetPhysicalDeviceProperties2(
 			properties->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
+			VkPhysicalDeviceProtectedMemoryProperties *properties =
+				(VkPhysicalDeviceProtectedMemoryProperties *)ext;
+			properties->protectedNoFault = false;
+			break;
+		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
+			VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
+				(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
+			properties->primitiveOverestimationSize = 0;
+			properties->maxExtraPrimitiveOverestimationSize = 0;
+			properties->extraPrimitiveOverestimationSizeGranularity = 0;
+			properties->primitiveUnderestimation = VK_FALSE;
+			properties->conservativePointAndLineRasterization = VK_FALSE;
+			properties->degenerateTrianglesRasterized = VK_FALSE;
+			properties->degenerateLinesRasterized = VK_FALSE;
+			properties->fullyCoveredFragmentShaderInputVariable = VK_FALSE;
+			properties->conservativeRasterizationPostDepthCoverage = VK_FALSE;
+			break;
+		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
+			VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
+				(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
+			properties->pciDomain = pdevice->bus_info.domain;
+			properties->pciBus = pdevice->bus_info.bus;
+			properties->pciDevice = pdevice->bus_info.dev;
+			properties->pciFunction = pdevice->bus_info.func;
+			break;
+		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES_KHR: {
+			VkPhysicalDeviceDriverPropertiesKHR *driver_props =
+				(VkPhysicalDeviceDriverPropertiesKHR *) ext;
+
+			driver_props->driverID = VK_DRIVER_ID_MESA_RADV_KHR;
+			memset(driver_props->driverName, 0, VK_MAX_DRIVER_NAME_SIZE_KHR);
+			strcpy(driver_props->driverName, "radv");
+
+			memset(driver_props->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE_KHR);
+			snprintf(driver_props->driverInfo, VK_MAX_DRIVER_INFO_SIZE_KHR,
+				"Mesa " PACKAGE_VERSION MESA_GIT_SHA1
+				" (LLVM %d.%d.%d)",
+				 (HAVE_LLVM >> 8) & 0xff, HAVE_LLVM & 0xff,
+				 MESA_LLVM_VERSION_PATCH);
+
+			driver_props->conformanceVersion = (VkConformanceVersionKHR) {
+				.major = 1,
+				.minor = 1,
+				.subminor = 2,
+				.patch = 0,
+			};
+			break;
+		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
+			VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
+				(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
+			properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
+			properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
+			properties->maxTransformFeedbackBufferSize = UINT32_MAX;
+			properties->maxTransformFeedbackStreamDataSize = 512;
+			properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
+			properties->maxTransformFeedbackBufferDataStride = 512;
+			properties->transformFeedbackQueries = true;
+			properties->transformFeedbackStreamsLinesTriangles = false;
+			properties->transformFeedbackRasterizationStreamSelect = false;
+			properties->transformFeedbackDraw = true;
+			break;
+		}
 		default:
 			break;
 		}
@@ -1437,6 +1515,28 @@ static int radv_get_device_extension_index(const char *name)
 	return -1;
 }

+static int
+radv_get_int_debug_option(const char *name, int default_value)
+{
+	const char *str;
+	int result;
+
+	str = getenv(name);
+	if (!str) {
+		result = default_value;
+	} else {
+		char *endptr;
+
+		result = strtol(str, &endptr, 0);
+		if (str == endptr) {
+			/* No digits founs. */
+			result = default_value;
+		}
+	}
+
+	return result;
+}
+
 VkResult radv_CreateDevice(
 	VkPhysicalDevice                            physicalDevice,
 	const VkDeviceCreateInfo*                   pCreateInfo,
@@ -1532,11 +1632,13 @@ VkResult radv_CreateDevice(

 	device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
 			((device->instance->perftest_flags & RADV_PERFTEST_BINNING) ||
-			 device->physical_device->rad_info.family == CHIP_RAVEN);
+			 device->physical_device->rad_info.family == CHIP_RAVEN ||
+			 device->physical_device->rad_info.family == CHIP_RAVEN2);

 	/* Disabled and not implemented for now. */
 	device->dfsm_allowed = device->pbb_allowed &&
-	                       device->physical_device->rad_info.family == CHIP_RAVEN;
+	                       (device->physical_device->rad_info.family == CHIP_RAVEN ||
+	                        device->physical_device->rad_info.family == CHIP_RAVEN2);

 #ifdef ANDROID
 	device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
@@ -1632,6 +1734,13 @@ VkResult radv_CreateDevice(

 	device->mem_cache = radv_pipeline_cache_from_handle(pc);

+	device->force_aniso =
+		MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
+	if (device->force_aniso >= 0) {
+		fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
+			1 << util_logbase2(device->force_aniso));
+	}
+
 	*pDevice = radv_device_to_handle(device);
 	return VK_SUCCESS;

@@ -2092,6 +2201,33 @@ radv_emit_global_shader_pointers(struct radv_queue *queue,
 	}
 }

+static void
+radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
+{
+	struct radv_device *device = queue->device;
+
+	if (device->gfx_init) {
+		uint64_t va = radv_buffer_get_va(device->gfx_init);
+
+		radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+		radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
+
+		radv_cs_add_buffer(device->ws, cs, device->gfx_init);
+	} else {
+		struct radv_physical_device *physical_device = device->physical_device;
+		si_emit_graphics(physical_device, cs);
+	}
+}
+
+static void
+radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
+{
+	struct radv_physical_device *physical_device = queue->device->physical_device;
+	si_emit_compute(physical_device, cs);
+}
+
 static VkResult
 radv_get_preamble_cs(struct radv_queue *queue,
                     uint32_t scratch_size,
@@ -2246,6 +2382,18 @@ radv_get_preamble_cs(struct radv_queue *queue,
 		if (scratch_bo)
 			radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);

+		/* Emit initial configuration. */
+		switch (queue->queue_family_index) {
+		case RADV_QUEUE_GENERAL:
+			radv_init_graphics_state(cs, queue);
+			break;
+		case RADV_QUEUE_COMPUTE:
+			radv_init_compute_state(cs, queue);
+			break;
+		case RADV_QUEUE_TRANSFER:
+			break;
+		}
+
 		if (descriptor_bo != queue->descriptor_bo) {
 			uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);

@@ -3791,7 +3939,7 @@ radv_init_dcc_control_reg(struct radv_device *device,
 	unsigned max_compressed_block_size;
 	unsigned independent_64b_blocks;

-	if (device->physical_device->rad_info.chip_class < VI)
+	if (!radv_image_has_dcc(iview->image))
 		return 0;

 	if (iview->image->info.samples > 1) {
@@ -4391,13 +4539,26 @@ radv_tex_filter_mode(VkSamplerReductionModeEXT mode)
 	return 0;
 }

+static uint32_t
+radv_get_max_anisotropy(struct radv_device *device,
+			const VkSamplerCreateInfo *pCreateInfo)
+{
+	if (device->force_aniso >= 0)
+		return device->force_aniso;
+
+	if (pCreateInfo->anisotropyEnable &&
+	    pCreateInfo->maxAnisotropy > 1.0f)
+		return (uint32_t)pCreateInfo->maxAnisotropy;
+
+	return 0;
+}
+
 static void
 radv_init_sampler(struct radv_device *device,
 		  struct radv_sampler *sampler,
 		  const VkSamplerCreateInfo *pCreateInfo)
 {
-	uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
-					(uint32_t) pCreateInfo->maxAnisotropy : 0;
+	uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
 	uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
 	bool is_vi = (device->physical_device->rad_info.chip_class >= VI);
 	unsigned filter_mode = SQ_IMG_FILTER_MODE_BLEND;
@@ -4828,3 +4989,122 @@ radv_GetDeviceGroupPeerMemoryFeatures(
 	                       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
 	                       VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
 }
+
+static const VkTimeDomainEXT radv_time_domains[] = {
+	VK_TIME_DOMAIN_DEVICE_EXT,
+	VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
+	VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
+};
+
+VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
+	VkPhysicalDevice                             physicalDevice,
+	uint32_t                                     *pTimeDomainCount,
+	VkTimeDomainEXT                              *pTimeDomains)
+{
+	int d;
+	VK_OUTARRAY_MAKE(out, pTimeDomains, pTimeDomainCount);
+
+	for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
+		vk_outarray_append(&out, i) {
+			*i = radv_time_domains[d];
+		}
+	}
+
+	return vk_outarray_status(&out);
+}
+
+static uint64_t
+radv_clock_gettime(clockid_t clock_id)
+{
+	struct timespec current;
+	int ret;
+
+	ret = clock_gettime(clock_id, &current);
+	if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
+		ret = clock_gettime(CLOCK_MONOTONIC, &current);
+	if (ret < 0)
+		return 0;
+
+	return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
+}
+
+VkResult radv_GetCalibratedTimestampsEXT(
+	VkDevice                                     _device,
+	uint32_t                                     timestampCount,
+	const VkCalibratedTimestampInfoEXT           *pTimestampInfos,
+	uint64_t                                     *pTimestamps,
+	uint64_t                                     *pMaxDeviation)
+{
+	RADV_FROM_HANDLE(radv_device, device, _device);
+	uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
+	int d;
+	uint64_t begin, end;
+        uint64_t max_clock_period = 0;
+
+	begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+	for (d = 0; d < timestampCount; d++) {
+		switch (pTimestampInfos[d].timeDomain) {
+		case VK_TIME_DOMAIN_DEVICE_EXT:
+			pTimestamps[d] = device->ws->query_value(device->ws,
+								 RADEON_TIMESTAMP);
+                        uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
+                        max_clock_period = MAX2(max_clock_period, device_period);
+			break;
+		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
+			pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
+                        max_clock_period = MAX2(max_clock_period, 1);
+			break;
+
+		case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
+			pTimestamps[d] = begin;
+			break;
+		default:
+			pTimestamps[d] = 0;
+			break;
+		}
+	}
+
+	end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
+
+        /*
+         * The maximum deviation is the sum of the interval over which we
+         * perform the sampling and the maximum period of any sampled
+         * clock. That's because the maximum skew between any two sampled
+         * clock edges is when the sampled clock with the largest period is
+         * sampled at the end of that period but right at the beginning of the
+         * sampling interval and some other clock is sampled right at the
+         * begining of its sampling period and right at the end of the
+         * sampling interval. Let's assume the GPU has the longest clock
+         * period and that the application is sampling GPU and monotonic:
+         *
+         *                               s                 e
+         *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
+         *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+         *
+         *                               g
+         *		  0         1         2         3
+         *	GPU       -----_____-----_____-----_____-----_____
+         *
+         *                                                m
+         *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
+         *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
+         *
+         *	Interval                     <----------------->
+         *	Deviation           <-------------------------->
+         *
+         *		s  = read(raw)       2
+         *		g  = read(GPU)       1
+         *		m  = read(monotonic) 2
+         *		e  = read(raw)       b
+         *
+         * We round the sample interval up by one tick to cover sampling error
+         * in the interval clock
+         */
+
+        uint64_t sample_interval = end - begin + 1;
+
+        *pMaxDeviation = sample_interval + max_clock_period;
+
+	return VK_SUCCESS;
+}
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -59,6 +59,7 @@ EXTENSIONS = [
    Extension('VK_KHR_device_group',                      1, True),
    Extension('VK_KHR_device_group_creation',             1, True),
    Extension('VK_KHR_draw_indirect_count',               1, True),
+    Extension('VK_KHR_driver_properties',                 1, True),
    Extension('VK_KHR_external_fence',                    1, 'device->rad_info.has_syncobj_wait_for_submit'),
    Extension('VK_KHR_external_fence_capabilities',       1, True),
    Extension('VK_KHR_external_fence_fd',                 1, 'device->rad_info.has_syncobj_wait_for_submit'),
@@ -92,7 +93,9 @@ EXTENSIONS = [
    Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+    Extension('VK_EXT_calibrated_timestamps',             1, True),
    Extension('VK_EXT_conditional_rendering',             1, True),
+    Extension('VK_EXT_conservative_rasterization',        1, 'device->rad_info.chip_class >= GFX9'),
    Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_display_control',                   1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_debug_report',                      9, True),
@@ -102,9 +105,11 @@ EXTENSIONS = [
    Extension('VK_EXT_external_memory_dma_buf',           1, True),
    Extension('VK_EXT_external_memory_host',              1, 'device->rad_info.has_userptr'),
    Extension('VK_EXT_global_priority',                   1, 'device->rad_info.has_ctx_priority'),
+    Extension('VK_EXT_pci_bus_info',                      1, False),
    Extension('VK_EXT_sampler_filter_minmax',             1, 'device->rad_info.chip_class >= CIK'),
    Extension('VK_EXT_shader_viewport_index_layer',       1, True),
    Extension('VK_EXT_shader_stencil_export',             1, True),
+    Extension('VK_EXT_transform_feedback',                1, True),
    Extension('VK_EXT_vertex_attribute_divisor',          3, True),
    Extension('VK_AMD_draw_indirect_count',               1, True),
    Extension('VK_AMD_gcn_shader',                        1, True),
@@ -112,6 +117,8 @@ EXTENSIONS = [
    Extension('VK_AMD_shader_core_properties',            1, True),
    Extension('VK_AMD_shader_info',                       1, True),
    Extension('VK_AMD_shader_trinary_minmax',             1, True),
+    Extension('VK_GOOGLE_decorate_string',                1, True),
+    Extension('VK_GOOGLE_hlsl_functionality1',            1, True),
 ]

 class VkVersion:
@@ -147,14 +154,15 @@ class VkVersion:
        patch = self.patch if self.patch is not None else 0
        return (self.major << 22) | (self.minor << 12) | patch

-    def __cmp__(self, other):
+    def __gt__(self, other):
        # If only one of them has a patch version, "ignore" it by making
        # other's patch version match self.
        if (self.patch is None) != (other.patch is None):
            other = copy.copy(other)
            other.patch = self.patch

-        return self.__int_ver().__cmp__(other.__int_ver())
+        return self.__int_ver() > other.__int_ver()
+

 MAX_API_VERSION = VkVersion(MAX_API_VERSION)

--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -595,6 +595,14 @@ static bool radv_is_filter_minmax_format_supported(VkFormat format)
 	}
 }

+bool
+radv_device_supports_etc(struct radv_physical_device *physical_device)
+{
+	return physical_device->rad_info.family == CHIP_VEGA10 ||
+	       physical_device->rad_info.family == CHIP_RAVEN ||
+	       physical_device->rad_info.family == CHIP_STONEY;
+}
+
 static void
 radv_physical_device_get_format_properties(struct radv_physical_device *physical_device,
 					   VkFormat format,
@@ -612,9 +620,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 	}

 	if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
-	    physical_device->rad_info.family != CHIP_VEGA10 &&
-	    physical_device->rad_info.family != CHIP_RAVEN &&
-	    physical_device->rad_info.family != CHIP_STONEY) {
+	    !radv_device_supports_etc(physical_device)) {
 		out_properties->linearTilingFeatures = linear;
 		out_properties->optimalTilingFeatures = tiled;
 		out_properties->bufferFeatures = buffer;
@@ -645,9 +651,12 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 			if (radv_is_filter_minmax_format_supported(format))
 				 tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_MINMAX_BIT_EXT;

-			/* GFX9 doesn't support linear depth surfaces */
-			if (physical_device->rad_info.chip_class >= GFX9)
-				linear = 0;
+			/* Don't support blitting surfaces with depth/stencil. */
+			if (vk_format_is_depth(format) && vk_format_is_stencil(format))
+				tiled &= ~VK_FORMAT_FEATURE_BLIT_DST_BIT;
+
+			/* Don't support linear depth surfaces */
+			linear = 0;
 		}
 	} else {
 		bool linear_sampling;
@@ -664,6 +673,13 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
 				linear |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
 				tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
 			}
+
+			/* Don't support blitting for R32G32B32 formats. */
+			if (format == VK_FORMAT_R32G32B32_SFLOAT ||
+			    format == VK_FORMAT_R32G32B32_UINT ||
+			    format == VK_FORMAT_R32G32B32_SINT) {
+				linear &= ~VK_FORMAT_FEATURE_BLIT_SRC_BIT;
+			}
 		}
 		if (radv_is_colorbuffer_format_supported(format, &blendable)) {
 			linear |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT | VK_FORMAT_FEATURE_BLIT_DST_BIT;
@@ -1088,6 +1104,20 @@ static VkResult radv_get_image_format_properties(struct radv_physical_device *ph
 		sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
 	}

+	if (info->tiling == VK_IMAGE_TILING_LINEAR &&
+	    (info->format == VK_FORMAT_R32G32B32_SFLOAT ||
+	     info->format == VK_FORMAT_R32G32B32_SINT ||
+	     info->format == VK_FORMAT_R32G32B32_UINT)) {
+		/* R32G32B32 is a weird format and the driver currently only
+		 * supports the barely minimum.
+		 * TODO: Implement more if we really need to.
+		 */
+		if (info->type == VK_IMAGE_TYPE_3D)
+			goto unsupported;
+		maxArraySize = 1;
+		maxMipLevels = 1;
+	}
+
 	if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
 		if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
 			goto unsupported;
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -72,11 +72,8 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device,
 	if (device->physical_device->rad_info.chip_class < VI)
 		return false;

-	if (pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT)
-		return false;
-
-	if (pCreateInfo->flags & (VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT |
-				  VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
+	if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) ||
+	    (pCreateInfo->flags & VK_IMAGE_CREATE_EXTENDED_USAGE_BIT_KHR))
 		return false;

 	if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
@@ -100,6 +97,26 @@ radv_use_tc_compat_htile_for_image(struct radv_device *device,
 	    pCreateInfo->format != VK_FORMAT_D16_UNORM)
 		return false;

+	if (pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
+		const struct VkImageFormatListCreateInfoKHR *format_list =
+			(const struct  VkImageFormatListCreateInfoKHR *)
+				vk_find_struct_const(pCreateInfo->pNext,
+						     IMAGE_FORMAT_LIST_CREATE_INFO_KHR);
+
+		/* We have to ignore the existence of the list if viewFormatCount = 0 */
+		if (format_list && format_list->viewFormatCount) {
+			/* compatibility is transitive, so we only need to check
+			 * one format with everything else.
+			 */
+			for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
+				if (pCreateInfo->format != format_list->pViewFormats[i])
+					return false;
+			}
+		} else {
+			return false;
+		}
+	}
+
 	return true;
 }

@@ -914,7 +931,9 @@ radv_image_can_enable_fmask(struct radv_image *image)
 static inline bool
 radv_image_can_enable_htile(struct radv_image *image)
 {
-	return image->info.levels == 1 && vk_format_is_depth(image->vk_format);
+	return image->info.levels == 1 &&
+	       vk_format_is_depth(image->vk_format) &&
+	       image->info.width * image->info.height >= 8 * 8;
 }

 VkResult
@@ -966,7 +985,7 @@ radv_image_create(VkDevice _device,

 	image->shareable = vk_find_struct_const(pCreateInfo->pNext,
 	                                        EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHR) != NULL;
-	if (!vk_format_is_depth(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
+	if (!vk_format_is_depth_or_stencil(pCreateInfo->format) && !create_info->scanout && !image->shareable) {
 		image->info.surf_index = &device->image_mrt_offset_counter;
 	}

--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -234,15 +234,12 @@ radv_builtin_cache_path(char *path)
 	const char *suffix2 = "/.cache/radv_builtin_shaders";
 	struct passwd pwd, *result;
 	char path2[PATH_MAX + 1]; /* PATH_MAX is not a real max,but suffices here. */
+	int ret;

 	if (xdg_cache_home) {
-
-		if (strlen(xdg_cache_home) + strlen(suffix) > PATH_MAX)
-			return false;
-
-		strcpy(path, xdg_cache_home);
-		strcat(path, suffix);
-		return true;
+		ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
+			       xdg_cache_home, suffix, sizeof(void *) * 8);
+		return ret > 0 && ret < PATH_MAX + 1;
 	}

 	getpwuid_r(getuid(), &pwd, path2, PATH_MAX - strlen(suffix2), &result);
@@ -253,23 +250,25 @@ radv_builtin_cache_path(char *path)
 	strcat(path, "/.cache");
 	mkdir(path, 0755);

-	strcat(path, suffix);
-	return true;
+	ret = snprintf(path, PATH_MAX + 1, "%s%s%zd",
+		       pwd.pw_dir, suffix2, sizeof(void *) * 8);
+	return ret > 0 && ret < PATH_MAX + 1;
 }

-static void
+static bool
 radv_load_meta_pipeline(struct radv_device *device)
 {
 	char path[PATH_MAX + 1];
 	struct stat st;
 	void *data = NULL;
+	bool ret = false;

 	if (!radv_builtin_cache_path(path))
-		return;
+		return false;

 	int fd = open(path, O_RDONLY);
 	if (fd < 0)
-		return;
+		return false;
 	if (fstat(fd, &st))
 		goto fail;
 	data = malloc(st.st_size);
@@ -278,10 +277,11 @@ radv_load_meta_pipeline(struct radv_device *device)
 	if(read(fd, data, st.st_size) == -1)
 		goto fail;

-	radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
+	ret = radv_pipeline_cache_load(&device->meta_state.cache, data, st.st_size);
 fail:
 	free(data);
 	close(fd);
+	return ret;
 }

 static void
@@ -330,6 +330,8 @@ radv_device_init_meta(struct radv_device *device)
 {
 	VkResult result;

+	memset(&device->meta_state, 0, sizeof(device->meta_state));
+
 	device->meta_state.alloc = (VkAllocationCallbacks) {
 		.pUserData = device,
 		.pfnAllocation = meta_alloc,
@@ -339,21 +341,24 @@ radv_device_init_meta(struct radv_device *device)

 	device->meta_state.cache.alloc = device->meta_state.alloc;
 	radv_pipeline_cache_init(&device->meta_state.cache, device);
-	radv_load_meta_pipeline(device);
+	bool loaded_cache = radv_load_meta_pipeline(device);
+	bool on_demand = !loaded_cache;

-	result = radv_device_init_meta_clear_state(device);
+	mtx_init(&device->meta_state.mtx, mtx_plain);
+
+	result = radv_device_init_meta_clear_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_clear;

-	result = radv_device_init_meta_resolve_state(device);
+	result = radv_device_init_meta_resolve_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_resolve;

-	result = radv_device_init_meta_blit_state(device);
+	result = radv_device_init_meta_blit_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_blit;

-	result = radv_device_init_meta_blit2d_state(device);
+	result = radv_device_init_meta_blit2d_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_blit2d;

@@ -361,7 +366,7 @@ radv_device_init_meta(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail_bufimage;

-	result = radv_device_init_meta_depth_decomp_state(device);
+	result = radv_device_init_meta_depth_decomp_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_depth_decomp;

@@ -369,19 +374,19 @@ radv_device_init_meta(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail_buffer;

-	result = radv_device_init_meta_query_state(device);
+	result = radv_device_init_meta_query_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_query;

-	result = radv_device_init_meta_fast_clear_flush_state(device);
+	result = radv_device_init_meta_fast_clear_flush_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_fast_clear;

-	result = radv_device_init_meta_resolve_compute_state(device);
+	result = radv_device_init_meta_resolve_compute_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_resolve_compute;

-	result = radv_device_init_meta_resolve_fragment_state(device);
+	result = radv_device_init_meta_resolve_fragment_state(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail_resolve_fragment;
 	return VK_SUCCESS;
@@ -407,6 +412,7 @@ fail_blit:
 fail_resolve:
 	radv_device_finish_meta_clear_state(device);
 fail_clear:
+	mtx_destroy(&device->meta_state.mtx);
 	radv_pipeline_cache_finish(&device->meta_state.cache);
 	return result;
 }
@@ -428,6 +434,7 @@ radv_device_finish_meta(struct radv_device *device)

 	radv_store_meta_pipeline(device);
 	radv_pipeline_cache_finish(&device->meta_state.cache);
+	mtx_destroy(&device->meta_state.mtx);
 }

 nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2)
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -58,34 +58,34 @@ struct radv_meta_saved_state {
 	VkRect2D render_area;
 };

-VkResult radv_device_init_meta_clear_state(struct radv_device *device);
+VkResult radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_clear_state(struct radv_device *device);

-VkResult radv_device_init_meta_resolve_state(struct radv_device *device);
+VkResult radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_resolve_state(struct radv_device *device);

-VkResult radv_device_init_meta_depth_decomp_state(struct radv_device *device);
+VkResult radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_depth_decomp_state(struct radv_device *device);

-VkResult radv_device_init_meta_fast_clear_flush_state(struct radv_device *device);
+VkResult radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device);

-VkResult radv_device_init_meta_blit_state(struct radv_device *device);
+VkResult radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_blit_state(struct radv_device *device);

-VkResult radv_device_init_meta_blit2d_state(struct radv_device *device);
+VkResult radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_blit2d_state(struct radv_device *device);

 VkResult radv_device_init_meta_buffer_state(struct radv_device *device);
 void radv_device_finish_meta_buffer_state(struct radv_device *device);

-VkResult radv_device_init_meta_query_state(struct radv_device *device);
+VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_query_state(struct radv_device *device);

-VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device);
+VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_resolve_compute_state(struct radv_device *device);

-VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device);
+VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device, bool on_demand);
 void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device);

 void radv_meta_save(struct radv_meta_saved_state *saved_state,
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -31,6 +31,13 @@ struct blit_region {
 	VkExtent3D dest_extent;
 };

+static VkResult
+build_pipeline(struct radv_device *device,
+               VkImageAspectFlagBits aspect,
+               enum glsl_sampler_dim tex_dim,
+               unsigned fs_key,
+               VkPipeline *pipeline);
+
 static nir_shader *
 build_nir_vertex_shader(void)
 {
@@ -273,6 +280,20 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
 	return b.shader;
 }

+static enum glsl_sampler_dim
+translate_sampler_dim(VkImageType type) {
+	switch(type) {
+	case VK_IMAGE_TYPE_1D:
+		return GLSL_SAMPLER_DIM_1D;
+	case VK_IMAGE_TYPE_2D:
+		return GLSL_SAMPLER_DIM_2D;
+	case VK_IMAGE_TYPE_3D:
+		return GLSL_SAMPLER_DIM_3D;
+	default:
+		unreachable("Unhandled image type");
+	}
+}
+
 static void
 meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               struct radv_image *src_image,
@@ -286,7 +307,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
               VkOffset2D dest_offset_0,
               VkOffset2D dest_offset_1,
               VkRect2D dest_box,
-               VkFilter blit_filter)
+               VkSampler sampler)
 {
 	struct radv_device *device = cmd_buffer->device;
 	uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
@@ -310,17 +331,6 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 			      VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
 			      vertex_push_constants);

-	VkSampler sampler;
-	radv_CreateSampler(radv_device_to_handle(device),
-				 &(VkSamplerCreateInfo) {
-					 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
-						 .magFilter = blit_filter,
-						 .minFilter = blit_filter,
-						 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-						 .addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-						 .addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
-						 }, &cmd_buffer->pool->alloc, &sampler);
-
 	VkFramebuffer fb;
 	radv_CreateFramebuffer(radv_device_to_handle(device),
 			       &(VkFramebufferCreateInfo) {
@@ -333,11 +343,12 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 				       .height = dst_height,
 				       .layers = 1,
 				}, &cmd_buffer->pool->alloc, &fb);
-	VkPipeline pipeline;
+	VkPipeline* pipeline = NULL;
+	unsigned fs_key = 0;
 	switch (src_iview->aspect_mask) {
 	case VK_IMAGE_ASPECT_COLOR_BIT: {
-		unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
 		unsigned dst_layout = radv_meta_dst_layout_from_layout(dest_image_layout);
+		fs_key = radv_format_meta_fs_key(dest_image->vk_format);

 		radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 					      &(VkRenderPassBeginInfo) {
@@ -353,16 +364,16 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 						       }, VK_SUBPASS_CONTENTS_INLINE);
 		switch (src_image->type) {
 		case VK_IMAGE_TYPE_1D:
-			pipeline = device->meta_state.blit.pipeline_1d_src[fs_key];
+			pipeline = &device->meta_state.blit.pipeline_1d_src[fs_key];
 			break;
 		case VK_IMAGE_TYPE_2D:
-			pipeline = device->meta_state.blit.pipeline_2d_src[fs_key];
+			pipeline = &device->meta_state.blit.pipeline_2d_src[fs_key];
 			break;
 		case VK_IMAGE_TYPE_3D:
-			pipeline = device->meta_state.blit.pipeline_3d_src[fs_key];
+			pipeline = &device->meta_state.blit.pipeline_3d_src[fs_key];
 			break;
 		default:
-			unreachable(!"bad VkImageType");
+			unreachable("bad VkImageType");
 		}
 		break;
 	}
@@ -382,16 +393,16 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 						       }, VK_SUBPASS_CONTENTS_INLINE);
 		switch (src_image->type) {
 		case VK_IMAGE_TYPE_1D:
-			pipeline = device->meta_state.blit.depth_only_1d_pipeline;
+			pipeline = &device->meta_state.blit.depth_only_1d_pipeline;
 			break;
 		case VK_IMAGE_TYPE_2D:
-			pipeline = device->meta_state.blit.depth_only_2d_pipeline;
+			pipeline = &device->meta_state.blit.depth_only_2d_pipeline;
 			break;
 		case VK_IMAGE_TYPE_3D:
-			pipeline = device->meta_state.blit.depth_only_3d_pipeline;
+			pipeline = &device->meta_state.blit.depth_only_3d_pipeline;
 			break;
 		default:
-			unreachable(!"bad VkImageType");
+			unreachable("bad VkImageType");
 		}
 		break;
 	}
@@ -411,25 +422,33 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 						       }, VK_SUBPASS_CONTENTS_INLINE);
 		switch (src_image->type) {
 		case VK_IMAGE_TYPE_1D:
-			pipeline = device->meta_state.blit.stencil_only_1d_pipeline;
+			pipeline = &device->meta_state.blit.stencil_only_1d_pipeline;
 			break;
 		case VK_IMAGE_TYPE_2D:
-			pipeline = device->meta_state.blit.stencil_only_2d_pipeline;
+			pipeline = &device->meta_state.blit.stencil_only_2d_pipeline;
 			break;
 		case VK_IMAGE_TYPE_3D:
-			pipeline = device->meta_state.blit.stencil_only_3d_pipeline;
+			pipeline = &device->meta_state.blit.stencil_only_3d_pipeline;
 			break;
 		default:
-			unreachable(!"bad VkImageType");
+			unreachable("bad VkImageType");
 		}
 		break;
 	}
 	default:
-		unreachable(!"bad VkImageType");
+		unreachable("bad VkImageType");
+	}
+
+	if (!*pipeline) {
+		VkResult ret = build_pipeline(device, src_iview->aspect_mask, translate_sampler_dim(src_image->type), fs_key, pipeline);
+		if (ret != VK_SUCCESS) {
+			cmd_buffer->record_result = ret;
+			goto fail_pipeline;
+		}
 	}

 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
-			     VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
+			     VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);

 	radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
 			              device->meta_state.blit.pipeline_layout,
@@ -471,6 +490,7 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,

 	radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);

+fail_pipeline:
 	radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));

 	/* At the point where we emit the draw call, all data from the
@@ -479,8 +499,6 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
 	/* TODO: above comment is not valid for at least descriptor sets/pools,
 	 * as we may not free them till after execution finishes. Check others. */

-	radv_DestroySampler(radv_device_to_handle(device), sampler,
-			    &cmd_buffer->pool->alloc);
 	radv_DestroyFramebuffer(radv_device_to_handle(device), fb,
 				&cmd_buffer->pool->alloc);
 }
@@ -519,8 +537,10 @@ void radv_CmdBlitImage(
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_image, src_image, srcImage);
 	RADV_FROM_HANDLE(radv_image, dest_image, destImage);
+	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_saved_state saved_state;
 	bool old_predicating;
+	VkSampler sampler;

 	/* From the Vulkan 1.0 spec:
 	 *
@@ -530,6 +550,16 @@ void radv_CmdBlitImage(
 	assert(src_image->info.samples == 1);
 	assert(dest_image->info.samples == 1);

+	radv_CreateSampler(radv_device_to_handle(device),
+			   &(VkSamplerCreateInfo) {
+				.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+				.magFilter = filter,
+				.minFilter = filter,
+				.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+				.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+				.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+			   }, &cmd_buffer->pool->alloc, &sampler);
+
 	radv_meta_save(&saved_state, cmd_buffer,
 		       RADV_META_SAVE_GRAPHICS_PIPELINE |
 		       RADV_META_SAVE_CONSTANTS |
@@ -651,7 +681,7 @@ void radv_CmdBlitImage(
 				       dest_image, &dest_iview, destImageLayout,
 				       dest_offset_0, dest_offset_1,
 				       dest_box,
-				       filter);
+				       sampler);
 		}
 	}

@@ -659,6 +689,9 @@ void radv_CmdBlitImage(
 	cmd_buffer->state.predicating = old_predicating;

 	radv_meta_restore(&saved_state, cmd_buffer);
+
+	radv_DestroySampler(radv_device_to_handle(device), sampler,
+			    &cmd_buffer->pool->alloc);
 }

 void
@@ -714,34 +747,188 @@ radv_device_finish_meta_blit_state(struct radv_device *device)
 					state->blit.ds_layout, &state->alloc);
 }

-static VkFormat pipeline_formats[] = {
-   VK_FORMAT_R8G8B8A8_UNORM,
-   VK_FORMAT_R8G8B8A8_UINT,
-   VK_FORMAT_R8G8B8A8_SINT,
-   VK_FORMAT_A2R10G10B10_UINT_PACK32,
-   VK_FORMAT_A2R10G10B10_SINT_PACK32,
-   VK_FORMAT_R16G16B16A16_UNORM,
-   VK_FORMAT_R16G16B16A16_SNORM,
-   VK_FORMAT_R16G16B16A16_UINT,
-   VK_FORMAT_R16G16B16A16_SINT,
-   VK_FORMAT_R32_SFLOAT,
-   VK_FORMAT_R32G32_SFLOAT,
-   VK_FORMAT_R32G32B32A32_SFLOAT
-};
+static VkResult
+build_pipeline(struct radv_device *device,
+               VkImageAspectFlagBits aspect,
+               enum glsl_sampler_dim tex_dim,
+               unsigned fs_key,
+               VkPipeline *pipeline)
+{
+	VkResult result = VK_SUCCESS;
+
+	mtx_lock(&device->meta_state.mtx);
+
+	if (*pipeline) {
+		mtx_unlock(&device->meta_state.mtx);
+		return VK_SUCCESS;
+	}
+
+	struct radv_shader_module fs = {0};
+	struct radv_shader_module vs = {.nir = build_nir_vertex_shader()};
+	VkRenderPass rp;
+
+	switch(aspect) {
+	case VK_IMAGE_ASPECT_COLOR_BIT:
+		fs.nir = build_nir_copy_fragment_shader(tex_dim);
+		rp = device->meta_state.blit.render_pass[fs_key][0];
+		break;
+	case VK_IMAGE_ASPECT_DEPTH_BIT:
+		fs.nir = build_nir_copy_fragment_shader_depth(tex_dim);
+		rp = device->meta_state.blit.depth_only_rp[0];
+		break;
+	case VK_IMAGE_ASPECT_STENCIL_BIT:
+		fs.nir = build_nir_copy_fragment_shader_stencil(tex_dim);
+		rp = device->meta_state.blit.stencil_only_rp[0];
+		break;
+	default:
+		unreachable("Unhandled aspect");
+	}
+	VkPipelineVertexInputStateCreateInfo vi_create_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+		.vertexBindingDescriptionCount = 0,
+		.vertexAttributeDescriptionCount = 0,
+	};
+
+	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
+		{
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_VERTEX_BIT,
+			.module = radv_shader_module_to_handle(&vs),
+			.pName = "main",
+			.pSpecializationInfo = NULL
+		}, {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+			.module = radv_shader_module_to_handle(&fs),
+			.pName = "main",
+			.pSpecializationInfo = NULL
+		},
+	};
+
+	VkGraphicsPipelineCreateInfo vk_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
+		.pStages = pipeline_shader_stages,
+		.pVertexInputState = &vi_create_info,
+		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
+			.primitiveRestartEnable = false,
+		},
+		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+			.viewportCount = 1,
+			.scissorCount = 1,
+		},
+		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+			.rasterizerDiscardEnable = false,
+			.polygonMode = VK_POLYGON_MODE_FILL,
+			.cullMode = VK_CULL_MODE_NONE,
+			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
+		},
+		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+			.rasterizationSamples = 1,
+			.sampleShadingEnable = false,
+			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
+		},
+		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
+			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+			.dynamicStateCount = 4,
+			.pDynamicStates = (VkDynamicState[]) {
+				VK_DYNAMIC_STATE_VIEWPORT,
+				VK_DYNAMIC_STATE_SCISSOR,
+				VK_DYNAMIC_STATE_LINE_WIDTH,
+				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
+			},
+		},
+		.flags = 0,
+		.layout = device->meta_state.blit.pipeline_layout,
+		.renderPass = rp,
+		.subpass = 0,
+	};
+
+	VkPipelineColorBlendStateCreateInfo color_blend_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+		.attachmentCount = 1,
+		.pAttachments = (VkPipelineColorBlendAttachmentState []) {
+			{
+				.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
+						  VK_COLOR_COMPONENT_R_BIT |
+						  VK_COLOR_COMPONENT_G_BIT |
+						  VK_COLOR_COMPONENT_B_BIT },
+			}
+		};
+
+	VkPipelineDepthStencilStateCreateInfo depth_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+		.depthTestEnable = true,
+		.depthWriteEnable = true,
+		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
+	};
+
+	VkPipelineDepthStencilStateCreateInfo stencil_info = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+		.depthTestEnable = false,
+		.depthWriteEnable = false,
+		.stencilTestEnable = true,
+		.front = {
+			.failOp = VK_STENCIL_OP_REPLACE,
+			.passOp = VK_STENCIL_OP_REPLACE,
+			.depthFailOp = VK_STENCIL_OP_REPLACE,
+			.compareOp = VK_COMPARE_OP_ALWAYS,
+			.compareMask = 0xff,
+			.writeMask = 0xff,
+			.reference = 0
+		},
+		.back = {
+			.failOp = VK_STENCIL_OP_REPLACE,
+			.passOp = VK_STENCIL_OP_REPLACE,
+			.depthFailOp = VK_STENCIL_OP_REPLACE,
+			.compareOp = VK_COMPARE_OP_ALWAYS,
+			.compareMask = 0xff,
+			.writeMask = 0xff,
+			.reference = 0
+		},
+		.depthCompareOp = VK_COMPARE_OP_ALWAYS,
+	};
+
+	switch(aspect) {
+	case VK_IMAGE_ASPECT_COLOR_BIT:
+		vk_pipeline_info.pColorBlendState = &color_blend_info;
+		break;
+	case VK_IMAGE_ASPECT_DEPTH_BIT:
+		vk_pipeline_info.pDepthStencilState = &depth_info;
+		break;
+	case VK_IMAGE_ASPECT_STENCIL_BIT:
+		vk_pipeline_info.pDepthStencilState = &stencil_info;
+		break;
+	default:
+		unreachable("Unhandled aspect");
+	}
+
+	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
+		.use_rectlist = true
+	};
+
+	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
+	                                       radv_pipeline_cache_to_handle(&device->meta_state.cache),
+	                                       &vk_pipeline_info, &radv_pipeline_info,
+	                                       &device->meta_state.alloc, pipeline);
+	ralloc_free(vs.nir);
+	ralloc_free(fs.nir);
+	mtx_unlock(&device->meta_state.mtx);
+	return result;
+}

 static VkResult
-radv_device_init_meta_blit_color(struct radv_device *device,
-				 struct radv_shader_module *vs)
+radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
 {
-	struct radv_shader_module fs_1d = {0}, fs_2d = {0}, fs_3d = {0};
 	VkResult result;

-	fs_1d.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_1D);
-	fs_2d.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_2D);
-	fs_3d.nir = build_nir_copy_fragment_shader(GLSL_SAMPLER_DIM_3D);
-
-	for (unsigned i = 0; i < ARRAY_SIZE(pipeline_formats); ++i) {
-		unsigned key = radv_format_meta_fs_key(pipeline_formats[i]);
+	for (unsigned i = 0; i < NUM_META_FS_KEYS; ++i) {
+		unsigned key = radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]);
 		for(unsigned j = 0; j < RADV_META_DST_LAYOUT_COUNT; ++j) {
 			VkImageLayout layout = radv_meta_dst_layout_to_layout(j);
 			result = radv_CreateRenderPass(radv_device_to_handle(device),
@@ -749,7 +936,7 @@ radv_device_init_meta_blit_color(struct radv_device *device,
 							.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
 								.attachmentCount = 1,
 								.pAttachments = &(VkAttachmentDescription) {
-								.format = pipeline_formats[i],
+								.format = radv_fs_key_format_exemplars[i],
 								.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
 								.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
 								.initialLayout = layout,
@@ -778,108 +965,18 @@ radv_device_init_meta_blit_color(struct radv_device *device,
 				goto fail;
 		}

-		VkPipelineVertexInputStateCreateInfo vi_create_info = {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-			.vertexBindingDescriptionCount = 0,
-			.vertexAttributeDescriptionCount = 0,
-		};
+		if (on_demand)
+			continue;

-		VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-			{
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-				.stage = VK_SHADER_STAGE_VERTEX_BIT,
-				.module = radv_shader_module_to_handle(vs),
-				.pName = "main",
-				.pSpecializationInfo = NULL
-			}, {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-				.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-				.module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
-				.pName = "main",
-				.pSpecializationInfo = NULL
-			},
-		};
-
-		const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-			.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-			.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-			.pStages = pipeline_shader_stages,
-			.pVertexInputState = &vi_create_info,
-			.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-				.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-				.primitiveRestartEnable = false,
-			},
-			.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-				.viewportCount = 1,
-				.scissorCount = 1,
-			},
-			.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-				.rasterizerDiscardEnable = false,
-				.polygonMode = VK_POLYGON_MODE_FILL,
-				.cullMode = VK_CULL_MODE_NONE,
-				.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-			},
-			.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-				.rasterizationSamples = 1,
-				.sampleShadingEnable = false,
-				.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-			},
-			.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-				.attachmentCount = 1,
-				.pAttachments = (VkPipelineColorBlendAttachmentState []) {
-					{ .colorWriteMask =
-					VK_COLOR_COMPONENT_A_BIT |
-					VK_COLOR_COMPONENT_R_BIT |
-					VK_COLOR_COMPONENT_G_BIT |
-					VK_COLOR_COMPONENT_B_BIT },
-				}
-			},
-			.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-				.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-				.dynamicStateCount = 4,
-				.pDynamicStates = (VkDynamicState[]) {
-					VK_DYNAMIC_STATE_VIEWPORT,
-					VK_DYNAMIC_STATE_SCISSOR,
-					VK_DYNAMIC_STATE_LINE_WIDTH,
-					VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				},
-			},
-			.flags = 0,
-			.layout = device->meta_state.blit.pipeline_layout,
-			.renderPass = device->meta_state.blit.render_pass[key][0],
-			.subpass = 0,
-		};
-
-		const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-			.use_rectlist = true
-		};
-
-		pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_1d);
-		result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-						radv_pipeline_cache_to_handle(&device->meta_state.cache),
-						&vk_pipeline_info, &radv_pipeline_info,
-						&device->meta_state.alloc, &device->meta_state.blit.pipeline_1d_src[key]);
+		result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_1D, key, &device->meta_state.blit.pipeline_1d_src[key]);
 		if (result != VK_SUCCESS)
 			goto fail;

-		pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_2d);
-		result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-						radv_pipeline_cache_to_handle(&device->meta_state.cache),
-						&vk_pipeline_info, &radv_pipeline_info,
-						&device->meta_state.alloc, &device->meta_state.blit.pipeline_2d_src[key]);
+		result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_2D, key, &device->meta_state.blit.pipeline_2d_src[key]);
 		if (result != VK_SUCCESS)
 			goto fail;

-		pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_3d);
-		result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-						radv_pipeline_cache_to_handle(&device->meta_state.cache),
-						&vk_pipeline_info, &radv_pipeline_info,
-						&device->meta_state.alloc, &device->meta_state.blit.pipeline_3d_src[key]);
+		result = build_pipeline(device, VK_IMAGE_ASPECT_COLOR_BIT, GLSL_SAMPLER_DIM_3D, key, &device->meta_state.blit.pipeline_3d_src[key]);
 		if (result != VK_SUCCESS)
 			goto fail;

@@ -887,23 +984,14 @@ radv_device_init_meta_blit_color(struct radv_device *device,

 	result = VK_SUCCESS;
 fail:
-	ralloc_free(fs_1d.nir);
-	ralloc_free(fs_2d.nir);
-	ralloc_free(fs_3d.nir);
 	return result;
 }

 static VkResult
-radv_device_init_meta_blit_depth(struct radv_device *device,
-				 struct radv_shader_module *vs)
+radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand)
 {
-	struct radv_shader_module fs_1d = {0}, fs_2d = {0}, fs_3d = {0};
 	VkResult result;

-	fs_1d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_1D);
-	fs_2d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_2D);
-	fs_3d.nir = build_nir_copy_fragment_shader_depth(GLSL_SAMPLER_DIM_3D);
-
 	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
 		VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
 		result = radv_CreateRenderPass(radv_device_to_handle(device),
@@ -937,134 +1025,30 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
 			goto fail;
 	}

-	VkPipelineVertexInputStateCreateInfo vi_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 0,
-		.vertexAttributeDescriptionCount = 0,
-	};
+	if (on_demand)
+		return VK_SUCCESS;

-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = radv_shader_module_to_handle(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = &vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1,
-			.sampleShadingEnable = false,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 0,
-			.pAttachments = NULL,
-		},
-		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-			.depthTestEnable = true,
-			.depthWriteEnable = true,
-			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 9,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_DEPTH_BIAS,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-				VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
-				VK_DYNAMIC_STATE_STENCIL_REFERENCE,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.blit.pipeline_layout,
-		.renderPass = device->meta_state.blit.depth_only_rp[0],
-		.subpass = 0,
-	};
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_1d);
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc, &device->meta_state.blit.depth_only_1d_pipeline);
+	result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.depth_only_1d_pipeline);
 	if (result != VK_SUCCESS)
 		goto fail;

-	pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_2d);
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc, &device->meta_state.blit.depth_only_2d_pipeline);
+	result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.depth_only_2d_pipeline);
 	if (result != VK_SUCCESS)
 		goto fail;

-	pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_3d);
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc, &device->meta_state.blit.depth_only_3d_pipeline);
+	result = build_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.depth_only_3d_pipeline);
 	if (result != VK_SUCCESS)
 		goto fail;

 fail:
-	ralloc_free(fs_1d.nir);
-	ralloc_free(fs_2d.nir);
-	ralloc_free(fs_3d.nir);
 	return result;
 }

 static VkResult
-radv_device_init_meta_blit_stencil(struct radv_device *device,
-				   struct radv_shader_module *vs)
+radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand)
 {
-	struct radv_shader_module fs_1d = {0}, fs_2d = {0}, fs_3d = {0};
 	VkResult result;

-	fs_1d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_1D);
-	fs_2d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_2D);
-	fs_3d.nir = build_nir_copy_fragment_shader_stencil(GLSL_SAMPLER_DIM_3D);
-
 	for (enum radv_blit_ds_layout ds_layout = RADV_BLIT_DS_LAYOUT_TILE_ENABLE; ds_layout < RADV_BLIT_DS_LAYOUT_COUNT; ds_layout++) {
 		VkImageLayout layout = radv_meta_blit_ds_to_layout(ds_layout);
 		result = radv_CreateRenderPass(radv_device_to_handle(device),
@@ -1098,145 +1082,30 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
 	if (result != VK_SUCCESS)
 		goto fail;

-	VkPipelineVertexInputStateCreateInfo vi_create_info = {
-		.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
-		.vertexBindingDescriptionCount = 0,
-		.vertexAttributeDescriptionCount = 0,
-	};
+	if (on_demand)
+		return VK_SUCCESS;

-	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
-		{
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_VERTEX_BIT,
-			.module = radv_shader_module_to_handle(vs),
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		}, {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
-			.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
-			.module = VK_NULL_HANDLE, /* TEMPLATE VALUE! FILL ME IN! */
-			.pName = "main",
-			.pSpecializationInfo = NULL
-		},
-	};
-
-	const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
-		.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
-		.stageCount = ARRAY_SIZE(pipeline_shader_stages),
-		.pStages = pipeline_shader_stages,
-		.pVertexInputState = &vi_create_info,
-		.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
-			.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
-			.primitiveRestartEnable = false,
-		},
-		.pViewportState = &(VkPipelineViewportStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
-			.viewportCount = 1,
-			.scissorCount = 1,
-		},
-		.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
-			.rasterizerDiscardEnable = false,
-			.polygonMode = VK_POLYGON_MODE_FILL,
-			.cullMode = VK_CULL_MODE_NONE,
-			.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
-		},
-		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1,
-			.sampleShadingEnable = false,
-			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
-		},
-		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
-			.attachmentCount = 0,
-			.pAttachments = NULL,
-		},
-		.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
-			.depthTestEnable = false,
-			.depthWriteEnable = false,
-			.stencilTestEnable = true,
-			.front = {
-				.failOp = VK_STENCIL_OP_REPLACE,
-				.passOp = VK_STENCIL_OP_REPLACE,
-				.depthFailOp = VK_STENCIL_OP_REPLACE,
-				.compareOp = VK_COMPARE_OP_ALWAYS,
-				.compareMask = 0xff,
-				.writeMask = 0xff,
-				.reference = 0
-			},
-			.back = {
-				.failOp = VK_STENCIL_OP_REPLACE,
-				.passOp = VK_STENCIL_OP_REPLACE,
-				.depthFailOp = VK_STENCIL_OP_REPLACE,
-				.compareOp = VK_COMPARE_OP_ALWAYS,
-				.compareMask = 0xff,
-				.writeMask = 0xff,
-				.reference = 0
-			},
-			.depthCompareOp = VK_COMPARE_OP_ALWAYS,
-		},
-		.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
-			.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
-			.dynamicStateCount = 6,
-			.pDynamicStates = (VkDynamicState[]) {
-				VK_DYNAMIC_STATE_VIEWPORT,
-				VK_DYNAMIC_STATE_SCISSOR,
-				VK_DYNAMIC_STATE_LINE_WIDTH,
-				VK_DYNAMIC_STATE_DEPTH_BIAS,
-				VK_DYNAMIC_STATE_BLEND_CONSTANTS,
-				VK_DYNAMIC_STATE_DEPTH_BOUNDS,
-			},
-		},
-		.flags = 0,
-		.layout = device->meta_state.blit.pipeline_layout,
-		.renderPass = device->meta_state.blit.stencil_only_rp[0],
-		.subpass = 0,
-	};
-
-	const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
-		.use_rectlist = true
-	};
-
-	pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_1d);
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc, &device->meta_state.blit.stencil_only_1d_pipeline);
+	result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_1D, 0, &device->meta_state.blit.stencil_only_1d_pipeline);
 	if (result != VK_SUCCESS)
 		goto fail;

-	pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_2d);
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc, &device->meta_state.blit.stencil_only_2d_pipeline);
+	result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_2D, 0, &device->meta_state.blit.stencil_only_2d_pipeline);
 	if (result != VK_SUCCESS)
 		goto fail;

-	pipeline_shader_stages[1].module = radv_shader_module_to_handle(&fs_3d);
-	result = radv_graphics_pipeline_create(radv_device_to_handle(device),
-					       radv_pipeline_cache_to_handle(&device->meta_state.cache),
-					       &vk_pipeline_info, &radv_pipeline_info,
-					       &device->meta_state.alloc, &device->meta_state.blit.stencil_only_3d_pipeline);
+	result = build_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, GLSL_SAMPLER_DIM_3D, 0, &device->meta_state.blit.stencil_only_3d_pipeline);
 	if (result != VK_SUCCESS)
 		goto fail;


 fail:
-	ralloc_free(fs_1d.nir);
-	ralloc_free(fs_2d.nir);
-	ralloc_free(fs_3d.nir);
 	return result;
 }

 VkResult
-radv_device_init_meta_blit_state(struct radv_device *device)
+radv_device_init_meta_blit_state(struct radv_device *device, bool on_demand)
 {
 	VkResult result;
-	struct radv_shader_module vs = {0};

 	VkDescriptorSetLayoutCreateInfo ds_layout_info = {
 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -1273,20 +1142,17 @@ radv_device_init_meta_blit_state(struct radv_device *device)
 	if (result != VK_SUCCESS)
 		goto fail;

-	vs.nir = build_nir_vertex_shader();
-
-	result = radv_device_init_meta_blit_color(device, &vs);
+	result = radv_device_init_meta_blit_color(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail;

-	result = radv_device_init_meta_blit_depth(device, &vs);
+	result = radv_device_init_meta_blit_depth(device, on_demand);
 	if (result != VK_SUCCESS)
 		goto fail;

-	result = radv_device_init_meta_blit_stencil(device, &vs);
+	result = radv_device_init_meta_blit_stencil(device, on_demand);

 fail:
-	ralloc_free(vs.nir);
 	if (result != VK_SUCCESS)
 		radv_device_finish_meta_blit_state(device);
 	return result;
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -35,6 +35,22 @@ enum blit2d_src_type {
 	BLIT2D_NUM_SRC_TYPES,
 };

+static VkResult
+blit2d_init_color_pipeline(struct radv_device *device,
+			   enum blit2d_src_type src_type,
+			   VkFormat format,
+			   uint32_t log2_samples);
+
+static VkResult
+blit2d_init_depth_only_pipeline(struct radv_device *device,
+				enum blit2d_src_type src_type,
+				uint32_t log2_samples);
+
+static VkResult
+blit2d_init_stencil_only_pipeline(struct radv_device *device,
+				  enum blit2d_src_type src_type,
+				  uint32_t log2_samples);
+
 static void
 create_iview(struct radv_cmd_buffer *cmd_buffer,
             struct radv_meta_blit2d_surf *surf,
@@ -268,6 +284,14 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 				unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
 				unsigned dst_layout = radv_meta_dst_layout_from_layout(dst->current_layout);

+				if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key] == VK_NULL_HANDLE) {
+					VkResult ret = blit2d_init_color_pipeline(device, src_type, radv_fs_key_format_exemplars[fs_key], log2_samples);
+					if (ret != VK_SUCCESS) {
+						cmd_buffer->record_result = ret;
+						goto fail_pipeline;
+					}
+				}
+
 				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 							&(VkRenderPassBeginInfo) {
 								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
@@ -285,6 +309,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
 				bind_pipeline(cmd_buffer, src_type, fs_key, log2_samples);
 			} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
 				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+				if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type] == VK_NULL_HANDLE) {
+					VkResult ret = blit2d_init_depth_only_pipeline(device, src_type, log2_samples);
+					if (ret != VK_SUCCESS) {
+						cmd_buffer->record_result = ret;
+						goto fail_pipeline;
+					}
+				}
+
 				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 							&(VkRenderPassBeginInfo) {
 								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
@@ -303,6 +336,15 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,

 			} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
 				enum radv_blit_ds_layout ds_layout = radv_meta_blit_ds_to_type(dst->current_layout);
+
+				if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type] == VK_NULL_HANDLE) {
+					VkResult ret = blit2d_init_stencil_only_pipeline(device, src_type, log2_samples);
+					if (ret != VK_SUCCESS) {
+						cmd_buffer->record_result = ret;
+						goto fail_pipeline;
+					}
+				}
+
 				radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
 							&(VkRenderPassBeginInfo) {
 								.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
@@ -337,26 +379,10 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,



-			if (log2_samples > 0) {
-				for (uint32_t sample = 0; sample < src_img->image->info.samples; sample++) {
-					uint32_t sample_mask = 1 << sample;
-					radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-							      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-							      VK_SHADER_STAGE_FRAGMENT_BIT, 20, 4,
-							      &sample);
-
-					radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
-							      device->meta_state.blit2d[log2_samples].p_layouts[src_type],
-							      VK_SHADER_STAGE_FRAGMENT_BIT, 24, 4,
-							      &sample_mask);
-
-					radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
-				}
-			}
-			else
-				radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
+			radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
 			radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));

+fail_pipeline:
 			/* At the point where we emit the draw call, all data from the
 			* descriptor sets, etc. has been used.  We are free to delete it.
 			*/
@@ -477,10 +503,7 @@ build_nir_texel_fetch(struct nir_builder *b, struct radv_device *device,
 		tex_pos_3d = nir_vec(b, chans, 3);
 	}
 	if (is_multisampled) {
-		sample_idx = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
-		nir_intrinsic_set_base(sample_idx, 20);
-		nir_intrinsic_set_range(sample_idx, 4);
-		sample_idx->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
+		sample_idx = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_sample_id);
 		sample_idx->num_components = 1;
 		nir_ssa_dest_init(&sample_idx->instr, &sample_idx->dest, 1, 32, "sample_idx");
 		nir_builder_instr_insert(b, &sample_idx->instr);
@@ -535,7 +558,6 @@ build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
 	nir_ssa_def *pos_y = nir_channel(b, tex_pos, 1);
 	pos_y = nir_imul(b, pos_y, &width->dest.ssa);
 	pos_x = nir_iadd(b, pos_x, pos_y);
-	//pos_x = nir_iadd(b, pos_x, nir_imm_int(b, 100000));

 	nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;

@@ -562,27 +584,6 @@ static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
 	.vertexAttributeDescriptionCount = 0,
 };

-static void
-build_nir_store_sample_mask(struct nir_builder *b)
-{
-	nir_intrinsic_instr *sample_mask = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
-	nir_intrinsic_set_base(sample_mask, 24);
-	nir_intrinsic_set_range(sample_mask, 4);
-	sample_mask->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
-	sample_mask->num_components = 1;
-	nir_ssa_dest_init(&sample_mask->instr, &sample_mask->dest, 1, 32, "sample_mask");
-	nir_builder_instr_insert(b, &sample_mask->instr);
-
-	const struct glsl_type *sample_mask_out_type = glsl_uint_type();
-
-	nir_variable *sample_mask_out =
-		nir_variable_create(b->shader, nir_var_shader_out,
-				    sample_mask_out_type, "sample_mask_out");
-	sample_mask_out->data.location = FRAG_RESULT_SAMPLE_MASK;
-
-	nir_store_var(b, sample_mask_out, &sample_mask->dest.ssa, 0x1);
-}
-
 static nir_shader *
 build_nir_copy_fragment_shader(struct radv_device *device,
                               texel_fetch_build_func txf_func, const char* name, bool is_3d,
@@ -603,10 +604,6 @@ build_nir_copy_fragment_shader(struct radv_device *device,
 						      vec4, "f_color");
 	color_out->data.location = FRAG_RESULT_DATA0;

-	if (is_multisampled) {
-		build_nir_store_sample_mask(&b);
-	}
-
 	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
 	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);

@@ -636,10 +633,6 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
 						      vec4, "f_color");
 	color_out->data.location = FRAG_RESULT_DEPTH;

-	if (is_multisampled) {
-		build_nir_store_sample_mask(&b);
-	}
-
 	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
 	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);

@@ -669,10 +662,6 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
 						      vec4, "f_color");
 	color_out->data.location = FRAG_RESULT_STENCIL;

-	if (is_multisampled) {
-		build_nir_store_sample_mask(&b);
-	}
-
 	nir_ssa_def *pos_int = nir_f2i32(&b, nir_load_var(&b, tex_pos_in));
 	nir_ssa_def *tex_pos = nir_channels(&b, pos_int, 0x3);

@@ -737,6 +726,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
 	unsigned fs_key = radv_format_meta_fs_key(format);
 	const char *name;

+	mtx_lock(&device->meta_state.mtx);
+	if (device->meta_state.blit2d[log2_samples].pipelines[src_type][fs_key]) {
+		mtx_unlock(&device->meta_state.mtx);
+		return VK_SUCCESS;
+	}
+
 	texel_fetch_build_func src_func;
 	switch(src_type) {
 	case BLIT2D_SRC_TYPE_IMAGE:
@@ -845,7 +840,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
 		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
 			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
 			.rasterizationSamples = 1 << log2_samples,
-			.sampleShadingEnable = false,
+			.sampleShadingEnable = log2_samples > 1,
+			.minSampleShading = 1.0,
 			.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
 		},
 		.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
@@ -894,6 +890,7 @@ blit2d_init_color_pipeline(struct radv_device *device,
 	ralloc_free(vs.nir);
 	ralloc_free(fs.nir);

+	mtx_unlock(&device->meta_state.mtx);
 	return result;
 }

@@ -905,6 +902,12 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 	VkResult result;
 	const char *name;

+	mtx_lock(&device->meta_state.mtx);
+	if (device->meta_state.blit2d[log2_samples].depth_only_pipeline[src_type]) {
+		mtx_unlock(&device->meta_state.mtx);
+		return VK_SUCCESS;
+	}
+
 	texel_fetch_build_func src_func;
 	switch(src_type) {
 	case BLIT2D_SRC_TYPE_IMAGE:
@@ -1057,6 +1060,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 	ralloc_free(vs.nir);
 	ralloc_free(fs.nir);

+	mtx_unlock(&device->meta_state.mtx);
 	return result;
 }

@@ -1068,6 +1072,12 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 	VkResult result;
 	const char *name;

+	mtx_lock(&device->meta_state.mtx);
+	if (device->meta_state.blit2d[log2_samples].stencil_only_pipeline[src_type]) {
+		mtx_unlock(&device->meta_state.mtx);
+		return VK_SUCCESS;
+	}
+
 	texel_fetch_build_func src_func;
 	switch(src_type) {
 	case BLIT2D_SRC_TYPE_IMAGE:
@@ -1236,24 +1246,10 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 	ralloc_free(vs.nir);
 	ralloc_free(fs.nir);

+	mtx_unlock(&device->meta_state.mtx);
 	return result;
 }

-static VkFormat pipeline_formats[] = {
-   VK_FORMAT_R8G8B8A8_UNORM,
-   VK_FORMAT_R8G8B8A8_UINT,
-   VK_FORMAT_R8G8B8A8_SINT,
-   VK_FORMAT_A2R10G10B10_UINT_PACK32,
-   VK_FORMAT_A2R10G10B10_SINT_PACK32,
-   VK_FORMAT_R16G16B16A16_UNORM,
-   VK_FORMAT_R16G16B16A16_SNORM,
-   VK_FORMAT_R16G16B16A16_UINT,
-   VK_FORMAT_R16G16B16A16_SINT,
-   VK_FORMAT_R32_SFLOAT,
-   VK_FORMAT_R32G32_SFLOAT,
-   VK_FORMAT_R32G32B32A32_SFLOAT
-};
-
 static VkResult
 meta_blit2d_create_pipe_layout(struct radv_device *device,
 			       int idx,
@@ -1263,7 +1259,7 @@ meta_blit2d_create_pipe_layout(struct radv_device *device,
 	VkDescriptorType desc_type = (idx == BLIT2D_SRC_TYPE_BUFFER) ? VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER : VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
 	const VkPushConstantRange push_constant_ranges[] = {
 		{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
-		{VK_SHADER_STAGE_FRAGMENT_BIT, 16, 12},
+		{VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
 	};
 	int num_push_constant_range = (idx != BLIT2D_SRC_TYPE_IMAGE || log2_samples > 0) ? 2 : 1;

@@ -1302,7 +1298,7 @@ fail:
 }

 VkResult
-radv_device_init_meta_blit2d_state(struct radv_device *device)
+radv_device_init_meta_blit2d_state(struct radv_device *device, bool on_demand)
 {
 	VkResult result;
 	bool create_3d = device->physical_device->rad_info.chip_class >= GFX9;
@@ -1320,8 +1316,11 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
 			if (result != VK_SUCCESS)
 				goto fail;

-			for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
-				result = blit2d_init_color_pipeline(device, src, pipeline_formats[j], log2_samples);
+			if (on_demand)
+				continue;
+
+			for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
+				result = blit2d_init_color_pipeline(device, src, radv_fs_key_format_exemplars[j], log2_samples);
 				if (result != VK_SUCCESS)
 					goto fail;
 			}
--- a/src/amd/vulkan/radv_meta_buffer.c
+++ b/src/amd/vulkan/radv_meta_buffer.c
@@ -513,7 +513,7 @@ void radv_CmdUpdateBuffer(
 	if (!dataSize)
 		return;

-	if (dataSize < RADV_BUFFER_OPS_CS_THRESHOLD) {
+	if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
 		si_emit_cache_flush(cmd_buffer);

 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
--- a/src/amd/vulkan/radv_meta_bufimage.c
+++ b/src/amd/vulkan/radv_meta_bufimage.c
--- a/Show More
+++ b/Show More
@@ -1 +1 @@
 .2.7
 .3.6