nir/spirv: return after emitting a branch in block

When emitting a branch in a block, it does not make sense to continue processing further instructions, as they will not be reachable. This fixes a nasty case with a loop with a branch that both then-part and else-part exits the loop: %1 = OpLabel OpLoopMerge %2 %3 None OpBranchConditional %false %2 %2 %3 = OpLabel OpBranch %1 %2 = OpLabel [...] We know that block %1 will branch always to block %2, which is the merge block for the loop. And thus a break is emitted. If we keep continuing processing further instructions, we will be processing the branch conditional and thus emitting the proper NIR conditional, which leads to instructions after the break. This fixes dEQP-VK.graphicsfuzz.continue-and-merge. CC: Jason Ekstrand <jason@jlekstrand.net> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
egl/android: replace magic 0=CbCr,1=CrCb with simple enum
2019-02-28 09:47:06 +01:00 · 2019-02-28 07:44:46 +00:00 · 2019-02-27 22:18:24 -08:00 · 2019-02-28 14:23:02 +10:00 · 2019-02-28 11:54:06 +11:00 · 2019-02-28 11:47:37 +11:00
806 changed files with 96402 additions and 27283 deletions
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -0,0 +1,499 @@
+# This is the tag of the docker image used for the build jobs. If the
+# image doesn't exist yet, the containers-build stage generates it.
+#
+# In order to generate a new image, one should generally change the tag.
+# While removing the image from the registry would also work, that's not
+# recommended except for ephemeral images during development: Replacing
+# an image after a significant amount of time might pull in newer
+# versions of gcc/clang or other packages, which might break the build
+# with older commits using the same tag.
+#
+# After merging a change resulting in generating a new image to the
+# main repository, it's recommended to remove the image from the source
+# repository's container registry, so that the image from the main
+# repository's registry will be used there as well.
+#
+# The format of the tag is "%Y-%m-%d-${counter}" where ${counter} stays
+# at "01" unless you have multiple updates on the same day :)
+variables:
+  UBUNTU_TAG: 2019-02-12-01
+  UBUNTU_IMAGE: "$CI_REGISTRY_IMAGE/ubuntu:$UBUNTU_TAG"
+  UBUNTU_IMAGE_MAIN: "registry.freedesktop.org/mesa/mesa/ubuntu:$UBUNTU_TAG"
+
+cache:
+  paths:
+    - ccache
+
+stages:
+  - containers-build
+  - build+test
+
+
+# When to automatically run the CI
+.ci-run-policy:
+  only:
+    - master
+    - merge_requests
+    - /^ci([-/].*)?$/
+
+
+# CONTAINERS
+
+containers:ubuntu:
+  extends: .ci-run-policy
+  stage: containers-build
+  image: docker:stable
+  services:
+    - docker:dind
+  variables:
+    DOCKER_HOST: tcp://docker:2375
+    DOCKER_DRIVER: overlay2
+  script:
+    # Enable experimental features such as `docker manifest inspect`
+    - mkdir -p ~/.docker
+    - "echo '{\"experimental\": \"enabled\"}' > ~/.docker/config.json"
+    - docker login -u gitlab-ci-token -p $CI_JOB_TOKEN $CI_REGISTRY
+    # Check if the image (with the specific tag) already exists
+    - docker manifest inspect $UBUNTU_IMAGE && exit || true
+    # Try to re-use the image from the main repository's registry
+    - docker image pull $UBUNTU_IMAGE_MAIN &&
+      docker image tag $UBUNTU_IMAGE_MAIN $UBUNTU_IMAGE &&
+      docker image push $UBUNTU_IMAGE && exit || true
+    - docker build -t $UBUNTU_IMAGE -f .gitlab-ci/Dockerfile.ubuntu .
+    - docker push $UBUNTU_IMAGE
+
+
+# BUILD
+
+.build:
+  extends: .ci-run-policy
+  image: $UBUNTU_IMAGE
+  stage: build+test
+  artifacts:
+    when: on_failure
+    untracked: true
+  # Use ccache transparently, and print stats before/after
+  before_script:
+    - export PATH="/usr/lib/ccache:$PATH"
+    - export CCACHE_BASEDIR="$PWD"
+    - export CCACHE_DIR="$PWD/ccache"
+    - export CCACHE_COMPILERCHECK=content
+    - ccache --zero-stats || true
+    - ccache --show-stats || true
+  after_script:
+    - export CCACHE_DIR="$PWD/ccache"
+    - ccache --show-stats
+
+.meson-build:
+  extends: .build
+  script:
+    # We need to control the version of llvm-config we're using, so we'll
+    # generate a native file to do so. This requires meson >=0.49
+    - if test -n "$LLVM_VERSION"; then
+        LLVM_CONFIG="llvm-config-${LLVM_VERSION}";
+        echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > native.file;
+        $LLVM_CONFIG --version;
+      else
+        touch native.file;
+      fi
+    - meson --version
+    - meson _build
+            --native-file=native.file
+            -D build-tests=true
+            -D libunwind=${UNWIND}
+            ${DRI_LOADERS}
+            -D dri-drivers=${DRI_DRIVERS:-[]}
+            ${GALLIUM_ST}
+            -D gallium-drivers=${GALLIUM_DRIVERS:-[]}
+            -D vulkan-drivers=${VULKAN_DRIVERS:-[]}
+    - cd _build
+    - meson configure
+    - ninja -j4
+    - ninja test
+
+.make-build:
+  extends: .build
+  variables:
+    MAKEFLAGS: "-j4"
+  script:
+    - if test -n "$LLVM_VERSION"; then
+        export LLVM_CONFIG="llvm-config-${LLVM_VERSION}";
+      fi
+    - mkdir build
+    - cd build
+    - ../autogen.sh
+        --enable-autotools
+        --enable-debug
+        $LIBUNWIND_FLAGS
+        $DRI_LOADERS
+        --with-dri-drivers=$DRI_DRIVERS
+        $GALLIUM_ST
+        --with-gallium-drivers=$GALLIUM_DRIVERS
+        --with-vulkan-drivers=$VULKAN_DRIVERS
+        --disable-llvm-shared-libs
+    - make
+    - eval $MAKE_CHECK_COMMAND
+
+.scons-build:
+  extends: .build
+  variables:
+    SCONSFLAGS: "-j4"
+  script:
+    - if test -n "$LLVM_VERSION"; then
+        export LLVM_CONFIG="llvm-config-${LLVM_VERSION}";
+      fi
+    - scons $SCONS_TARGET
+    - eval $SCONS_CHECK_COMMAND
+
+build:meson-vulkan:
+  extends: .meson-build
+  variables:
+    UNWIND: "false"
+    DRI_LOADERS: >
+      -D glx=disabled
+      -D gbm=false
+      -D egl=false
+      -D platforms=x11,wayland,drm
+      -D osmesa=none
+    GALLIUM_ST: >
+      -D dri3=true
+      -D gallium-vdpau=false
+      -D gallium-xvmc=false
+      -D gallium-omx=disabled
+      -D gallium-va=false
+      -D gallium-xa=false
+      -D gallium-nine=false
+      -D gallium-opencl=disabled
+    VULKAN_DRIVERS: intel,amd
+    LLVM_VERSION: "7"
+
+build:meson-loader-classic-dri:
+  extends: .meson-build
+  variables:
+    UNWIND: "false"
+    DRI_LOADERS: >
+      -D glx=dri
+      -D gbm=true
+      -D egl=true
+      -D platforms=x11,wayland,drm,surfaceless
+      -D osmesa=classic
+    DRI_DRIVERS: "i915,i965,r100,r200,swrast,nouveau"
+    GALLIUM_ST: >
+      -D dri3=true
+      -D gallium-vdpau=false
+      -D gallium-xvmc=false
+      -D gallium-omx=disabled
+      -D gallium-va=false
+      -D gallium-xa=false
+      -D gallium-nine=false
+      -D gallium-opencl=disabled
+
+build:meson-glvnd:
+  extends: .meson-build
+  variables:
+    UNWIND: "true"
+    DRI_LOADERS: >
+      -D glvnd=true
+      -D egl=true
+      -D gbm=true
+      -D glx=dri
+    DRI_DRIVERS: "i965"
+    GALLIUM_ST: >
+      -D gallium-vdpau=false
+      -D gallium-xvmc=false
+      -D gallium-omx=disabled
+      -D gallium-va=false
+      -D gallium-xa=false
+      -D gallium-nine=false
+      -D gallium-opencl=disabled
+
+# NOTE: Building SWR is 2x (yes two) times slower than all the other
+# gallium drivers combined.
+# Start this early so that it doesn't hunder the run time.
+build:meson-gallium-swr:
+  extends: .meson-build
+  variables:
+    UNWIND: "true"
+    DRI_LOADERS: >
+      -D glx=disabled
+      -D egl=false
+      -D gbm=false
+    GALLIUM_ST: >
+      -D dri3=false
+      -D gallium-vdpau=false
+      -D gallium-xvmc=false
+      -D gallium-omx=disabled
+      -D gallium-va=false
+      -D gallium-xa=false
+      -D gallium-nine=false
+      -D gallium-opencl=disabled
+    GALLIUM_DRIVERS: "swr"
+    LLVM_VERSION: "6.0"
+
+build:meson-gallium-radeonsi:
+  extends: .meson-build
+  variables:
+    UNWIND: "true"
+    DRI_LOADERS: >
+      -D glx=disabled
+      -D egl=false
+      -D gbm=false
+    GALLIUM_ST: >
+      -D dri3=false
+      -D gallium-vdpau=false
+      -D gallium-xvmc=false
+      -D gallium-omx=disabled
+      -D gallium-va=false
+      -D gallium-xa=false
+      -D gallium-nine=false
+      -D gallium-opencl=disabled
+    GALLIUM_DRIVERS: "radeonsi"
+    LLVM_VERSION: "7"
+
+build:meson-gallium-drivers-other:
+  extends: .meson-build
+  variables:
+    UNWIND: "true"
+    DRI_LOADERS: >
+      -D glx=disabled
+      -D egl=false
+      -D gbm=false
+    GALLIUM_ST: >
+      -D dri3=false
+      -D gallium-vdpau=false
+      -D gallium-xvmc=false
+      -D gallium-omx=disabled
+      -D gallium-va=false
+      -D gallium-xa=false
+      -D gallium-nine=false
+      -D gallium-opencl=disabled
+    GALLIUM_DRIVERS: "i915,iris,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv"
+    LLVM_VERSION: "5.0"
+
+build:meson-gallium-clover-llvm5:
+  extends: .meson-build
+  variables:
+    UNWIND: "true"
+    DRI_LOADERS: >
+      -D glx=disabled
+      -D egl=false
+      -D gbm=false
+    GALLIUM_ST: >
+      -D dri3=false
+      -D gallium-vdpau=false
+      -D gallium-xvmc=false
+      -D gallium-omx=disabled
+      -D gallium-va=false
+      -D gallium-xa=false
+      -D gallium-nine=false
+      -D gallium-opencl=icd
+    GALLIUM_DRIVERS: "r600"
+    LLVM_VERSION: "5.0"
+
+build:meson-gallium-clover-llvm6:
+  extends: build:meson-gallium-clover-llvm5
+  variables:
+    LLVM_VERSION: "6.0"
+
+build:meson-gallium-clover-llvm7:
+  extends: build:meson-gallium-clover-llvm5
+  variables:
+    GALLIUM_DRIVERS: "r600,radeonsi"
+    LLVM_VERSION: "7"
+
+build:meson-gallium-st-other:
+  extends: .meson-build
+  variables:
+    UNWIND: "true"
+    DRI_LOADERS: >
+      -D glx=disabled
+      -D egl=false
+      -D gbm=false
+    GALLIUM_ST: >
+      -D dri3=true
+      -D gallium-vdpau=true
+      -D gallium-xvmc=true
+      -D gallium-omx=bellagio
+      -D gallium-va=true
+      -D gallium-xa=true
+      -D gallium-nine=true
+      -D gallium-opencl=disabled
+      -D osmesa=gallium
+    GALLIUM_DRIVERS: "nouveau,swrast"
+    LLVM_VERSION: "5.0"
+
+build:make-vulkan:
+  extends: .make-build
+  variables:
+    MAKE_CHECK_COMMAND: "make -C src/gtest check && make -C src/intel check"
+    LLVM_VERSION: "7"
+    DRI_LOADERS: >
+      --disable-glx
+      --disable-gbm
+      --disable-egl
+      --with-platforms=x11,wayland,drm
+    DRI_DRIVERS: ""
+    GALLIUM_ST: >
+      --enable-dri
+      --enable-dri3
+      --disable-opencl
+      --disable-xa
+      --disable-nine
+      --disable-xvmc
+      --disable-vdpau
+      --disable-va
+      --disable-omx-bellagio
+      --disable-gallium-osmesa
+    VULKAN_DRIVERS: intel,radeon
+    LIBUNWIND_FLAGS: --disable-libunwind
+
+build:make-loader-classic-dri:
+  extends: .make-build
+  variables:
+    MAKE_CHECK_COMMAND: "make check"
+    DRI_LOADERS: >
+      --enable-glx
+      --enable-gbm
+      --enable-egl
+      --with-platforms=x11,wayland,drm,surfaceless
+      --enable-osmesa
+    DRI_DRIVERS: "i915,i965,radeon,r200,swrast,nouveau"
+    GALLIUM_ST: >
+      --enable-dri
+      --disable-opencl
+      --disable-xa
+      --disable-nine
+      --disable-xvmc
+      --disable-vdpau
+      --disable-va
+      --disable-omx-bellagio
+      --disable-gallium-osmesa
+    LIBUNWIND_FLAGS: --disable-libunwind
+
+# NOTE: Building SWR is 2x (yes two) times slower than all the other
+# gallium drivers combined.
+# Start this early so that it doesn't hunder the run time.
+build:make-gallium-drivers-swr:
+  extends: .make-build
+  variables:
+    MAKE_CHECK_COMMAND: "true"
+    LLVM_VERSION: "6.0"
+    DRI_LOADERS: >
+      --disable-glx
+      --disable-gbm
+      --disable-egl
+    GALLIUM_ST: >
+      --enable-dri
+      --disable-opencl
+      --disable-xa
+      --disable-nine
+      --disable-xvmc
+      --disable-vdpau
+      --disable-va
+      --disable-omx-bellagio
+      --disable-gallium-osmesa
+    GALLIUM_DRIVERS: "swr"
+    LIBUNWIND_FLAGS: --enable-libunwind
+
+build:make-gallium-drivers-radeonsi:
+  extends: build:make-gallium-drivers-swr
+  variables:
+    LLVM_VERSION: "7"
+    GALLIUM_DRIVERS: "radeonsi"
+
+build:make-gallium-drivers-other:
+  extends: build:make-gallium-drivers-swr
+  variables:
+    LLVM_VERSION: "3.9"
+    GALLIUM_DRIVERS: "i915,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv"
+
+build:make-gallium-st-clover-llvm-39:
+  extends: .make-build
+  variables:
+    MAKE_CHECK_COMMAND: "true"
+    LLVM_VERSION: "3.9"
+    DRI_LOADERS: >
+      --disable-glx
+      --disable-gbm
+      --disable-egl
+    GALLIUM_ST: >
+      --disable-dri
+      --enable-opencl
+      --enable-opencl-icd
+      --enable-llvm
+      --disable-xa
+      --disable-nine
+      --disable-xvmc
+      --disable-vdpau
+      --disable-va
+      --disable-omx-bellagio
+      --disable-gallium-osmesa
+    GALLIUM_DRIVERS: "r600"
+    LIBUNWIND_FLAGS: --enable-libunwind
+
+build:make-gallium-st-clover-llvm-4:
+  extends: build:make-gallium-st-clover-llvm-39
+  variables:
+    LLVM_VERSION: "4.0"
+
+build:make-gallium-st-clover-llvm-5:
+  extends: build:make-gallium-st-clover-llvm-39
+  variables:
+    LLVM_VERSION: "5.0"
+
+build:make-gallium-st-clover-llvm-6:
+  extends: build:make-gallium-st-clover-llvm-39
+  variables:
+    LLVM_VERSION: "6.0"
+
+build:make-gallium-st-clover-llvm-7:
+  extends: build:make-gallium-st-clover-llvm-39
+  variables:
+    LLVM_VERSION: "7"
+    GALLIUM_DRIVERS: "r600,radeonsi"
+
+build:make-gallium-st-other:
+  extends: .make-build
+  variables:
+    MAKE_CHECK_COMMAND: "true"
+    # We should be testing 3.3, but 3.9 is the oldest that still exists in ubuntu
+    LLVM_VERSION: "3.9"
+    DRI_LOADERS: >
+      --disable-glx
+      --disable-gbm
+      --disable-egl
+    GALLIUM_ST: >
+      --enable-dri
+      --disable-opencl
+      --enable-xa
+      --enable-nine
+      --enable-xvmc
+      --enable-vdpau
+      --enable-va
+      --enable-omx-bellagio
+      --enable-gallium-osmesa
+    # We need swrast for osmesa and nine.
+    # i915 most likely doesn't work with most ST.
+    # Regardless - we're doing a quick build test here.
+    GALLIUM_DRIVERS: "i915,swrast"
+    LIBUNWIND_FLAGS: --enable-libunwind
+
+build:scons-nollvm:
+  extends: .scons-build
+  variables:
+    SCONS_TARGET: "llvm=0"
+    SCONS_CHECK_COMMAND: "scons llvm=0 check"
+
+build:scons-llvm:
+  extends: .scons-build
+  variables:
+    SCONS_TARGET: "llvm=1"
+    SCONS_CHECK_COMMAND: "scons llvm=1 check"
+    LLVM_VERSION: "3.9"
+
+build:scons-swr:
+  extends: .scons-build
+  variables:
+    SCONS_TARGET: "swr=1"
+    SCONS_CHECK_COMMAND: "true"
+    LLVM_VERSION: "6.0"
--- a/.gitlab-ci/Dockerfile.ubuntu
+++ b/.gitlab-ci/Dockerfile.ubuntu
@@ -0,0 +1,165 @@
+FROM ubuntu:bionic
+
+RUN apt-get update
+RUN apt-get upgrade -y
+RUN apt-get install -y \
+      curl \
+      wget \
+      gnupg \
+      software-properties-common
+
+RUN curl -fsSL https://apt.llvm.org/llvm-snapshot.gpg.key | apt-key add -
+RUN add-apt-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-7 main"
+
+RUN apt-get update
+RUN apt-get install -y \
+      pkg-config \
+      libdrm-dev \
+      libpciaccess-dev \
+      libxrandr-dev \
+      libxdamage-dev \
+      libxfixes-dev \
+      libxshmfence-dev \
+      libxxf86vm-dev \
+      libvdpau-dev \
+      libva-dev \
+      llvm-3.9-dev \
+      libclang-3.9-dev \
+      llvm-4.0-dev \
+      libclang-4.0-dev \
+      llvm-5.0-dev \
+      llvm-6.0-dev \
+      llvm-7-dev \
+      clang-5.0 \
+      libclang-5.0-dev \
+      clang-6.0 \
+      libclang-6.0-dev \
+      clang-7 \
+      libclang-7-dev \
+      libclc-dev \
+      libxvmc-dev \
+      libomxil-bellagio-dev \
+      xz-utils \
+      libexpat1-dev \
+      libx11-xcb-dev \
+      x11proto-xf86vidmode-dev \
+      libelf-dev \
+      libunwind8-dev \
+      libglvnd-dev \
+      python2.7 \
+      python-pip \
+      python-setuptools \
+      python3.5 \
+      python3-pip \
+      python3-setuptools
+
+RUN apt-get install -y \
+      libxcb-randr0
+
+# autotools build deps
+RUN apt-get install -y \
+      autoconf \
+      automake \
+      xutils-dev \
+      libtool \
+      bison \
+      flex \
+      gettext \
+      make
+
+# dependencies where we want a specific version
+ENV XORG_RELEASES              https://xorg.freedesktop.org/releases/individual
+ENV XCB_RELEASES               https://xcb.freedesktop.org/dist
+ENV WAYLAND_RELEASES           https://wayland.freedesktop.org/releases
+
+ENV XORGMACROS_VERSION         util-macros-1.19.0
+ENV GLPROTO_VERSION            glproto-1.4.17
+ENV DRI2PROTO_VERSION          dri2proto-2.8
+ENV LIBPCIACCESS_VERSION       libpciaccess-0.13.4
+ENV LIBDRM_VERSION             libdrm-2.4.97
+ENV XCBPROTO_VERSION           xcb-proto-1.13
+ENV RANDRPROTO_VERSION         randrproto-1.3.0
+ENV LIBXRANDR_VERSION          libXrandr-1.3.0
+ENV LIBXCB_VERSION             libxcb-1.13
+ENV LIBXSHMFENCE_VERSION       libxshmfence-1.3
+ENV LIBVDPAU_VERSION           libvdpau-1.1
+ENV LIBVA_VERSION              libva-1.7.0
+ENV LIBWAYLAND_VERSION         wayland-1.15.0
+ENV WAYLAND_PROTOCOLS_VERSION  wayland-protocols-1.8
+
+RUN wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2
+RUN tar -xvf $XORGMACROS_VERSION.tar.bz2 && rm $XORGMACROS_VERSION.tar.bz2
+RUN (cd $XORGMACROS_VERSION && ./configure && make install) && rm -rf $XORGMACROS_VERSION
+
+RUN wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2
+RUN tar -xvf $GLPROTO_VERSION.tar.bz2 && rm $GLPROTO_VERSION.tar.bz2
+RUN (cd $GLPROTO_VERSION && ./configure && make install) && rm -rf $GLPROTO_VERSION
+
+RUN wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2
+RUN tar -xvf $DRI2PROTO_VERSION.tar.bz2 && rm $DRI2PROTO_VERSION.tar.bz2
+RUN (cd $DRI2PROTO_VERSION && ./configure && make install) && rm -rf $DRI2PROTO_VERSION
+
+RUN wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2
+RUN tar -xvf $XCBPROTO_VERSION.tar.bz2 && rm $XCBPROTO_VERSION.tar.bz2
+RUN (cd $XCBPROTO_VERSION && ./configure && make install) && rm -rf $XCBPROTO_VERSION
+
+RUN wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2
+RUN tar -xvf $LIBXCB_VERSION.tar.bz2 && rm $LIBXCB_VERSION.tar.bz2
+RUN (cd $LIBXCB_VERSION && ./configure && make install) && rm -rf $LIBXCB_VERSION
+
+RUN wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2
+RUN tar -xvf $LIBPCIACCESS_VERSION.tar.bz2 && rm $LIBPCIACCESS_VERSION.tar.bz2
+RUN (cd $LIBPCIACCESS_VERSION && ./configure && make install) && rm -rf $LIBPCIACCESS_VERSION
+
+RUN wget https://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
+RUN tar -xvf $LIBDRM_VERSION.tar.bz2 && rm $LIBDRM_VERSION.tar.bz2
+RUN (cd $LIBDRM_VERSION && ./configure --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install) && rm -rf $LIBDRM_VERSION
+
+RUN wget $XORG_RELEASES/proto/$RANDRPROTO_VERSION.tar.bz2
+RUN tar -xvf $RANDRPROTO_VERSION.tar.bz2 && rm $RANDRPROTO_VERSION.tar.bz2
+RUN (cd $RANDRPROTO_VERSION && ./configure && make install) && rm -rf $RANDRPROTO_VERSION
+
+RUN wget $XORG_RELEASES/lib/$LIBXRANDR_VERSION.tar.bz2
+RUN tar -xvf $LIBXRANDR_VERSION.tar.bz2 && rm $LIBXRANDR_VERSION.tar.bz2
+RUN (cd $LIBXRANDR_VERSION && ./configure && make install) && rm -rf $LIBXRANDR_VERSION
+
+RUN wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
+RUN tar -xvf $LIBXSHMFENCE_VERSION.tar.bz2 && rm $LIBXSHMFENCE_VERSION.tar.bz2
+RUN (cd $LIBXSHMFENCE_VERSION && ./configure && make install) && rm -rf $LIBXSHMFENCE_VERSION
+
+RUN wget https://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
+RUN tar -xvf $LIBVDPAU_VERSION.tar.bz2 && rm $LIBVDPAU_VERSION.tar.bz2
+RUN (cd $LIBVDPAU_VERSION && ./configure && make install) && rm -rf $LIBVDPAU_VERSION
+
+RUN wget https://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
+RUN tar -xvf $LIBVA_VERSION.tar.bz2 && rm $LIBVA_VERSION.tar.bz2
+RUN (cd $LIBVA_VERSION && ./configure --disable-wayland --disable-dummy-driver && make install) && rm -rf $LIBVA_VERSION
+
+RUN wget $WAYLAND_RELEASES/$LIBWAYLAND_VERSION.tar.xz
+RUN tar -xvf $LIBWAYLAND_VERSION.tar.xz && rm $LIBWAYLAND_VERSION.tar.xz
+RUN (cd $LIBWAYLAND_VERSION && ./configure --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install) && rm -rf $LIBWAYLAND_VERSION
+
+RUN wget $WAYLAND_RELEASES/$WAYLAND_PROTOCOLS_VERSION.tar.xz
+RUN tar -xvf $WAYLAND_PROTOCOLS_VERSION.tar.xz && rm $WAYLAND_PROTOCOLS_VERSION.tar.xz
+RUN (cd $WAYLAND_PROTOCOLS_VERSION && ./configure && make install) && rm -rf $WAYLAND_PROTOCOLS_VERSION
+
+
+RUN apt-get install -y unzip
+
+# Meson requires ninja >= 1.6, but xenial has 1.3.x
+RUN wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip
+RUN unzip ninja-linux.zip && rm ninja-linux.zip
+RUN mv ninja /usr/bin/
+
+
+RUN pip3 install 'meson>=0.49'
+RUN pip2 install 'scons>=2.4'
+
+RUN pip2 install mako
+RUN pip3 install mako
+
+# Use ccache to speed up builds
+RUN apt-get install -y ccache
+
+# Cleanup workdir
+WORKDIR /
--- a/.mailmap
+++ b/.mailmap
@@ -265,6 +265,9 @@ Kristian Høgsberg <krh@bitplanet.net> <krh@hinata.boston.redhat.com>
 Kristian Høgsberg <krh@bitplanet.net> <krh@sasori.boston.redhat.com>
 Kristian Høgsberg <krh@bitplanet.net> <krh@temari.boston.redhat.com>
 Kristian Høgsberg <krh@bitplanet.net> <kristian.h.kristensen@intel.com>
+Kristian Høgsberg <krh@bitplanet.net> <hoegsberg@chromium.org>
+Kristian Høgsberg <krh@bitplanet.net> <hoegsberg@google.com>
+Kristian Høgsberg <krh@bitplanet.net> <hoegsberg@gmail.com>

 Krzesimir Nowak <qdlacz@gmail.com> <krzesimir@kinvolk.io>

--- a/.travis.yml
+++ b/.travis.yml
@@ -3,643 +3,14 @@ language: c
 dist: xenial

 cache:
-  apt: true
  ccache: true

 env:
  global:
-    - XORG_RELEASES=https://xorg.freedesktop.org/releases/individual
-    - XCB_RELEASES=https://xcb.freedesktop.org/dist
-    - WAYLAND_RELEASES=https://wayland.freedesktop.org/releases
-    - XORGMACROS_VERSION=util-macros-1.19.0
-    - GLPROTO_VERSION=glproto-1.4.17
-    - DRI2PROTO_VERSION=dri2proto-2.8
-    - LIBPCIACCESS_VERSION=libpciaccess-0.13.4
-    - LIBDRM_VERSION=libdrm-2.4.97
-    - XCBPROTO_VERSION=xcb-proto-1.13
-    - RANDRPROTO_VERSION=randrproto-1.3.0
-    - LIBXRANDR_VERSION=libXrandr-1.3.0
-    - LIBXCB_VERSION=libxcb-1.13
-    - LIBXSHMFENCE_VERSION=libxshmfence-1.2
-    - LIBVDPAU_VERSION=libvdpau-1.1
-    - LIBVA_VERSION=libva-1.7.0
-    - LIBWAYLAND_VERSION=wayland-1.15.0
-    - WAYLAND_PROTOCOLS_VERSION=wayland-protocols-1.8
-    - PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig:$HOME/prefix/share/pkgconfig
-    - LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH"
-    - PATH="$HOME/prefix/bin:$PATH"
+    - PKG_CONFIG_PATH="$PKG_CONFIG_PATH"

 matrix:
  include:
-    - env:
-        - LABEL="meson Vulkan"
-        - BUILD=meson
-        - UNWIND="false"
-        - DRI_LOADERS="-Dglx=disabled -Dgbm=false -Degl=false -Dplatforms=x11,wayland,drm -Dosmesa=none"
-        - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
-        - VULKAN_DRIVERS="intel,amd"
-        - LLVM_VERSION=7
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          sources:
-            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
-              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
-          packages:
-            - llvm-7-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="meson loaders/classic DRI"
-        - BUILD=meson
-        - UNWIND="false"
-        - DRI_LOADERS="-Dglx=dri -Dgbm=true -Degl=true -Dplatforms=x11,wayland,drm,surfaceless -Dosmesa=classic"
-        - DRI_DRIVERS="i915,i965,r100,r200,swrast,nouveau"
-        - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
-      addons:
-        apt:
-          packages:
-            - xz-utils
-            - x11proto-xf86vidmode-dev
-            - libxxf86vm-dev
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libxdamage-dev
-            - libxfixes-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make loaders/classic DRI"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="make check"
-        - DRI_LOADERS="--enable-glx --enable-gbm --enable-egl --with-platforms=x11,drm,surfaceless,wayland --enable-osmesa"
-        - DRI_DRIVERS="i915,i965,radeon,r200,swrast,nouveau"
-        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS=""
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--disable-libunwind"
-      addons:
-        apt:
-          packages:
-            - xz-utils
-            - x11proto-xf86vidmode-dev
-            - libxxf86vm-dev
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libxdamage-dev
-            - libxfixes-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        # NOTE: Building SWR is 2x (yes two) times slower than all the other
-        # gallium drivers combined.
-        # Start this early so that it doesn't hunder the run time.
-        - LABEL="meson Gallium Drivers SWR"
-        - BUILD=meson
-        - UNWIND="true"
-        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
-        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
-        - GALLIUM_DRIVERS="swr"
-        - LLVM_VERSION=6.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          packages:
-            - llvm-6.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="meson Gallium Drivers RadeonSI"
-        - BUILD=meson
-        - UNWIND="true"
-        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
-        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
-        - GALLIUM_DRIVERS="radeonsi"
-        - LLVM_VERSION=7
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          sources:
-            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
-              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
-          packages:
-            # From sources above
-            - llvm-7-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="meson Gallium Drivers Other"
-        - BUILD=meson
-        - UNWIND="true"
-        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
-        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=disabled"
-        - GALLIUM_DRIVERS="i915,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv"
-        - LLVM_VERSION=5.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            - llvm-5.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="meson Gallium ST Clover LLVM-5.0"
-        - BUILD=meson
-        - UNWIND="true"
-        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
-        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd"
-        - GALLIUM_DRIVERS="r600"
-        - LLVM_VERSION=5.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          packages:
-            - libclc-dev
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            - llvm-5.0-dev
-            - clang-5.0
-            - libclang-5.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="meson Gallium ST Clover LLVM-6.0"
-        - BUILD=meson
-        - UNWIND="true"
-        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
-        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd"
-        - GALLIUM_DRIVERS="r600"
-        - LLVM_VERSION=6.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          packages:
-            - libclc-dev
-            - llvm-6.0-dev
-            - clang-6.0
-            - libclang-6.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="meson Gallium ST Clover LLVM-7"
-        - BUILD=meson
-        - UNWIND="true"
-        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
-        - GALLIUM_ST="-Ddri3=false -Dgallium-vdpau=false -Dgallium-xvmc=false -Dgallium-omx=disabled -Dgallium-va=false -Dgallium-xa=false -Dgallium-nine=false -Dgallium-opencl=icd"
-        - GALLIUM_DRIVERS="r600,radeonsi"
-        - LLVM_VERSION=7
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          sources:
-            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
-              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
-          packages:
-            - libclc-dev
-            # From sources above
-            - llvm-7-dev
-            - clang-7
-            - libclang-7-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="meson Gallium ST Other"
-        - BUILD=meson
-        - UNWIND="true"
-        - DRI_LOADERS="-Dglx=disabled -Degl=false -Dgbm=false"
-        - GALLIUM_ST="-Ddri3=true -Dgallium-vdpau=true -Dgallium-xvmc=true -Dgallium-omx=bellagio -Dgallium-va=true -Dgallium-xa=true -Dgallium-nine=true -Dgallium-opencl=disabled -Dosmesa=gallium"
-        # We need swrast for osmesa and nine.
-        # Nouveau supports, or builds at least against all ST.
-        - GALLIUM_DRIVERS="nouveau,swrast"
-        - LLVM_VERSION=5.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          packages:
-            - llvm-5.0-dev
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            # Nine requires gcc 4.6... which is the one we have right ?
-            - libxvmc-dev
-            # Build locally, for now.
-            #- libvdpau-dev
-            #- libva-dev
-            - libomxil-bellagio-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3.5
-            - python3-pip
-            - python3-setuptools
-    - env:
-        # NOTE: Building SWR is 2x (yes two) times slower than all the other
-        # gallium drivers combined.
-        # Start this early so that it doesn't hunder the run time.
-        - LABEL="make Gallium Drivers SWR"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=6.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="swr"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          packages:
-            - llvm-6.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Gallium Drivers RadeonSI"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=7
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="radeonsi"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          sources:
-            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
-              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
-          packages:
-            # From sources above
-            - llvm-7-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Gallium Drivers Other"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=3.9
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="i915,nouveau,kmsro,r300,r600,freedreno,svga,swrast,v3d,vc4,virgl,etnaviv"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            - llvm-3.9-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Gallium ST Clover LLVM-3.9"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=3.9
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="r600"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          packages:
-            - libclc-dev
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            - llvm-3.9-dev
-            - clang-3.9
-            - libclang-3.9-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Gallium ST Clover LLVM-4.0"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=4.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="r600"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          packages:
-            - libclc-dev
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            - llvm-4.0-dev
-            - clang-4.0
-            - libclang-4.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Gallium ST Clover LLVM-5.0"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=5.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="r600"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          packages:
-            - libclc-dev
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            - llvm-5.0-dev
-            - clang-5.0
-            - libclang-5.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Gallium ST Clover LLVM-6.0"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=6.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="r600"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          packages:
-            - libclc-dev
-            - llvm-6.0-dev
-            - clang-6.0
-            - libclang-6.0-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Gallium ST Clover LLVM-7"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=7
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS="r600,radeonsi"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          sources:
-            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
-              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
-          packages:
-            - libclc-dev
-            # From sources above
-            - llvm-7-dev
-            - clang-7
-            - libclang-7-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-    - env:
-        - LABEL="make Gallium ST Other"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="true"
-        - LLVM_VERSION=3.5
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--enable-dri --disable-opencl --enable-xa --enable-nine --enable-xvmc --enable-vdpau --enable-va --enable-omx-bellagio --enable-gallium-osmesa"
-        # We need swrast for osmesa and nine.
-        # i915 most likely doesn't work with most ST.
-        # Regardless - we're doing a quick build test here.
-        - GALLIUM_DRIVERS="i915,swrast"
-        - VULKAN_DRIVERS=""
-        - LIBUNWIND_FLAGS="--enable-libunwind"
-      addons:
-        apt:
-          packages:
-            # We actually want to test against llvm-3.3, yet 3.5 is available
-            - llvm-3.5-dev
-            # Nine requires gcc 4.6... which is the one we have right ?
-            - libxvmc-dev
-            # Build locally, for now.
-            #- libvdpau-dev
-            #- libva-dev
-            - libomxil-bellagio-dev
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - libunwind8-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="make Vulkan"
-        - BUILD=make
-        - MAKEFLAGS="-j4"
-        - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
-        - LLVM_VERSION=7
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland"
-        - DRI_DRIVERS=""
-        - GALLIUM_ST="--enable-dri --enable-dri3 --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx-bellagio --disable-gallium-osmesa"
-        - GALLIUM_DRIVERS=""
-        - VULKAN_DRIVERS="intel,radeon"
-        - LIBUNWIND_FLAGS="--disable-libunwind"
-      addons:
-        apt:
-          sources:
-            - sourceline: 'deb http://apt.llvm.org/xenial/ llvm-toolchain-xenial-7 main'
-              key_url: https://apt.llvm.org/llvm-snapshot.gpg.key
-          packages:
-            # From sources above
-            - llvm-7-dev
-            # Common
-            - xz-utils
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-            - python3-pip
-            - python3-setuptools
-    - env:
-        - LABEL="scons"
-        - BUILD=scons
-        - SCONSFLAGS="-j4"
-        # Explicitly disable.
-        - SCONS_TARGET="llvm=0"
-        # Keep it symmetrical to the make build.
-        - SCONS_CHECK_COMMAND="scons llvm=0 check"
-      addons:
-        apt:
-          packages:
-            # Common
-            - xz-utils
-            - x11proto-xf86vidmode-dev
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-    - env:
-        - LABEL="scons LLVM"
-        - BUILD=scons
-        - SCONSFLAGS="-j4"
-        - SCONS_TARGET="llvm=1"
-        # Keep it symmetrical to the make build.
-        - SCONS_CHECK_COMMAND="scons llvm=1 check"
-        - LLVM_VERSION=3.5
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-      addons:
-        apt:
-          packages:
-            # LLVM packaging is broken and misses these dependencies
-            - libedit-dev
-            # We actually want to test against llvm-3.3, yet 3.5 is available
-            - llvm-3.5-dev
-            # Common
-            - xz-utils
-            - x11proto-xf86vidmode-dev
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
-    - env:
-        - LABEL="scons SWR"
-        - BUILD=scons
-        - SCONSFLAGS="-j4"
-        - SCONS_TARGET="swr=1"
-        - LLVM_VERSION=6.0
-        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
-        # Keep it symmetrical to the make build. There's no actual SWR, yet.
-        - SCONS_CHECK_COMMAND="true"
-      addons:
-        apt:
-          packages:
-            - llvm-6.0-dev
-            # Common
-            - xz-utils
-            - x11proto-xf86vidmode-dev
-            - libexpat1-dev
-            - libx11-xcb-dev
-            - libelf-dev
    - env:
        - LABEL="macOS make"
        - BUILD=make
@@ -691,114 +62,9 @@ install:
      pip2 install --user mako;
    fi

-  # Install a more modern scons from pip.
-  - if test "x$BUILD" = xscons; then
-      pip2 install --user "scons>=2.4";
-      pip2 install --user mako;
-    fi
-
  # Install dependencies where we require specific versions (or where
  # disallowed by Travis CI's package whitelisting).

-  - |
-    if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
-      wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2
-      tar -jxvf $XORGMACROS_VERSION.tar.bz2
-      (cd $XORGMACROS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2
-      tar -jxvf $GLPROTO_VERSION.tar.bz2
-      (cd $GLPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2
-      tar -jxvf $DRI2PROTO_VERSION.tar.bz2
-      (cd $DRI2PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2
-      tar -jxvf $XCBPROTO_VERSION.tar.bz2
-      (cd $XCBPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2
-      tar -jxvf $LIBXCB_VERSION.tar.bz2
-      (cd $LIBXCB_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2
-      tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2
-      (cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget https://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
-      tar -jxvf $LIBDRM_VERSION.tar.bz2
-      (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install)
-
-      wget $XORG_RELEASES/proto/$RANDRPROTO_VERSION.tar.bz2
-      tar -jxvf $RANDRPROTO_VERSION.tar.bz2
-      (cd $RANDRPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/lib/$LIBXRANDR_VERSION.tar.bz2
-      tar -jxvf $LIBXRANDR_VERSION.tar.bz2
-      (cd $LIBXRANDR_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
-      tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
-      (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget https://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
-      tar -jxvf $LIBVDPAU_VERSION.tar.bz2
-      (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      wget https://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
-      tar -jxvf $LIBVA_VERSION.tar.bz2
-      (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
-
-      wget $WAYLAND_RELEASES/$LIBWAYLAND_VERSION.tar.xz
-      tar -axvf $LIBWAYLAND_VERSION.tar.xz
-      (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
-
-      wget $WAYLAND_RELEASES/$WAYLAND_PROTOCOLS_VERSION.tar.xz
-      tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz
-      (cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install)
-
-      # Meson requires ninja >= 1.6, but xenial has 1.3.x
-      wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip
-      unzip ninja-linux.zip
-      mv ninja $HOME/prefix/bin/
-
-      # Generate this header since one is missing on the Travis instance
-      mkdir -p linux
-      printf "%s\n" \
-           "#ifndef _LINUX_MEMFD_H" \
-           "#define _LINUX_MEMFD_H" \
-           "" \
-           "#define MFD_CLOEXEC             0x0001U" \
-           "#define MFD_ALLOW_SEALING       0x0002U" \
-           "" \
-           "#endif /* _LINUX_MEMFD_H */" > linux/memfd.h
-
-      # Generate this header, including the missing SYS_memfd_create
-      # macro, which is not provided by the header in the Travis
-      # instance
-      mkdir -p sys
-      printf "%s\n" \
-           "#ifndef _SYSCALL_H" \
-           "#define _SYSCALL_H      1" \
-           "" \
-           "#include <asm/unistd.h>" \
-           "" \
-           "#ifndef _LIBC" \
-           "# include <bits/syscall.h>" \
-           "#endif" \
-           "" \
-           "#ifndef __NR_memfd_create" \
-           "# define __NR_memfd_create 319 /* Taken from <asm/unistd_64.h> */" \
-           "#endif" \
-           "" \
-           "#ifndef SYS_memfd_create" \
-           "# define SYS_memfd_create __NR_memfd_create" \
-           "#endif" \
-           "" \
-           "#endif" > sys/syscall.h
-    fi
-
 script:
  - if test "x$BUILD" = xmake; then
      export CFLAGS="$CFLAGS -isystem`pwd`";
@@ -819,10 +85,6 @@ script:
      make && eval $MAKE_CHECK_COMMAND;
    fi

-  - if test "x$BUILD" = xscons; then
-      scons $SCONS_TARGET && eval $SCONS_CHECK_COMMAND;
-    fi
-
  - |
    if test "x$BUILD" = xmeson; then
      if test -n "$LLVM_CONFIG"; then
--- a/Android.mk
+++ b/Android.mk
@@ -24,7 +24,7 @@
 # BOARD_GPU_DRIVERS should be defined.  The valid values are
 #
 #   classic drivers: i915 i965
-#   gallium drivers: swrast freedreno i915g nouveau kmsro r300g r600g radeonsi vc4 virgl vmwgfx etnaviv
+#   gallium drivers: swrast freedreno i915g nouveau kmsro r300g r600g radeonsi vc4 virgl vmwgfx etnaviv iris
 #
 # The main target is libGLES_mesa.  For each classic driver enabled, a DRI
 # module will also be built.  DRI modules will be loaded by libGLES_mesa.
@@ -59,7 +59,8 @@ gallium_drivers := \
 	vmwgfx.HAVE_GALLIUM_VMWGFX \
 	vc4.HAVE_GALLIUM_VC4 \
 	virgl.HAVE_GALLIUM_VIRGL \
-	etnaviv.HAVE_GALLIUM_ETNAVIV
+	etnaviv.HAVE_GALLIUM_ETNAVIV \
+	iris.HAVE_GALLIUM_IRIS

 ifeq ($(BOARD_GPU_DRIVERS),all)
 MESA_BUILD_CLASSIC := $(filter HAVE_%, $(subst ., , $(classic_drivers)))
--- a/README.rst
+++ b/README.rst
@@ -9,25 +9,6 @@ This repository lives at https://gitlab.freedesktop.org/mesa/mesa.
 Other repositories are likely forks, and code found there is not supported.


-Build status
------------
-
-Travis:
-
-.. image:: https://travis-ci.org/mesa3d/mesa.svg?branch=master
-    :target: https://travis-ci.org/mesa3d/mesa
-
-Appveyor:
-
-.. image:: https://img.shields.io/appveyor/ci/mesa3d/mesa.svg
-    :target: https://ci.appveyor.com/project/mesa3d/mesa
-
-Coverity:
-
-.. image:: https://scan.coverity.com/projects/139/badge.svg?flat=1
-    :target: https://scan.coverity.com/projects/mesa
-
-
 Build & install
 ---------------

--- a/2
+++ b/2
@@ -1 +1 @@
-19.0.4
+19.1.0-devel
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -1,33 +0,0 @@
-# Both of these were already merged with different shas
-da48cba61ef6fefb799bf96e6364b70dbf4ec712
-c812c740e60c14060eb89db66039111881a0f42f
-
-# The commit these fix was reverted from 19.0, but fixed for 19.1 due
-# to the number of fixes required to make that commit work
-8d8f80af3a17354508f2ec9d6559c915d5be351d
-0c0c69729b6d72a5297122856c8fe48510e90764
-0881e90c09965818b02e359474a6f7446b41d647
-b031c643491a92a5574c7a4bd659df33f2d89bb6
-
-# These were manually rebased by Jason, thanks!
-8ab95b849e66f3221d80a67eef2ec6e3730901a8
-5c30fffeec1732c21d600c036f95f8cdb1bb5487
-
-# This doesn't actually appliy to 19.0
-29179f58c6ba8099859ea25900214dbbd3814a92
-
-# This was superceeded by a manual backport from ken
-6981069fc805da1afc867ca3c905075d146d7ff9
-
-# This was manually backported
-0bc1942c9ddce4e796322a7561f06af5dec0decd
-
-# This doesn't need to be applied, it already seems to exist in stable.
-80dc78407d0d1e03ceddf8889b217e8fd113568d
-
-# This was backported manually
-4f18c43d1df64135e8968a7d4fbfd2c9918b76ae
-
-# These were de-nominated since they don't apply nicley
-88105375c978f9de82af8c654051e5aa16d61614
-c9358621276ae49162e58d4a16fe37abda6a347f
--- a/bin/install_megadrivers.py
+++ b/bin/install_megadrivers.py
@@ -35,11 +35,7 @@ def main():
    args = parser.parse_args()

    if os.path.isabs(args.libdir):
-        destdir = os.environ.get('DESTDIR')
-        if destdir:
-            to = os.path.join(destdir, args.libdir[1:])
-        else:
-            to = args.libdir
+        to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
    else:
        to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)

@@ -49,6 +45,7 @@ def main():
        if os.path.lexists(to):
            os.unlink(to)
        os.makedirs(to)
+    shutil.copy(args.megadriver, master)

    for driver in args.drivers:
        abs_driver = os.path.join(to, driver)
@@ -70,14 +67,7 @@ def main():
                name, ext = os.path.splitext(name)
        finally:
            os.chdir(ret)
-
-    # Remove meson-created master .so and symlinks
    os.unlink(master)
-    name, ext = os.path.splitext(master)
-    while ext != '.so':
-        if os.path.lexists(name):
-            os.unlink(name)
-        name, ext = os.path.splitext(name)


 if __name__ == '__main__':
--- a/bin/meson-options.py
+++ b/bin/meson-options.py
@@ -0,0 +1,63 @@
+#!/usr/bin/env python3
+
+from os import get_terminal_size
+from textwrap import wrap
+from mesonbuild import coredata
+from mesonbuild import optinterpreter
+
+(COLUMNS, _) = get_terminal_size()
+
+def describe_option(option_name: str, option_default_value: str,
+                    option_type: str, option_message: str) -> None:
+    print('name:    ' + option_name)
+    print('default: ' + option_default_value)
+    print('type:    ' + option_type)
+    for line in wrap(option_message, width=COLUMNS - 9):
+        print('         ' + line)
+    print('---')
+
+oi = optinterpreter.OptionInterpreter('')
+oi.process('meson_options.txt')
+
+for (name, value) in oi.options.items():
+    if isinstance(value, coredata.UserStringOption):
+        describe_option(name,
+                        value.value,
+                        'string',
+                        "You can type what you want, but make sure it makes sense")
+    elif isinstance(value, coredata.UserBooleanOption):
+        describe_option(name,
+                        'true' if value.value else 'false',
+                        'boolean',
+                        "You can set it to 'true' or 'false'")
+    elif isinstance(value, coredata.UserIntegerOption):
+        describe_option(name,
+                        str(value.value),
+                        'integer',
+                        "You can set it to any integer value between '{}' and '{}'".format(value.min_value, value.max_value))
+    elif isinstance(value, coredata.UserUmaskOption):
+        describe_option(name,
+                        str(value.value),
+                        'umask',
+                        "You can set it to 'preserve' or a value between '0000' and '0777'")
+    elif isinstance(value, coredata.UserComboOption):
+        choices = '[' + ', '.join(["'" + v + "'" for v in value.choices]) + ']'
+        describe_option(name,
+                        value.value,
+                        'combo',
+                        "You can set it to any one of those values: " + choices)
+    elif isinstance(value, coredata.UserArrayOption):
+        choices = '[' + ', '.join(["'" + v + "'" for v in value.choices]) + ']'
+        value = '[' + ', '.join(["'" + v + "'" for v in value.value]) + ']'
+        describe_option(name,
+                        value,
+                        'array',
+                        "You can set it to one or more of those values: " + choices)
+    elif isinstance(value, coredata.UserFeatureOption):
+        describe_option(name,
+                        value.value,
+                        'feature',
+                        "You can set it to 'auto', 'enabled', or 'disabled'")
+    else:
+        print(name + ' is an option of a type unknown to this script')
+        print('---')
--- a/configure.ac
+++ b/configure.ac
@@ -122,7 +122,7 @@ LLVM_REQUIRED_OPENCL=3.9.0
 LLVM_REQUIRED_R600=3.9.0
 LLVM_REQUIRED_RADEONSI=7.0.0
 LLVM_REQUIRED_RADV=7.0.0
-LLVM_REQUIRED_SWR=7.0.0
+LLVM_REQUIRED_SWR=6.0.0

 dnl Check for progs
 AC_PROG_CPP
@@ -2357,7 +2357,7 @@ if test "x$enable_xvmc" = xyes -o \
        "x$enable_omx_tizonia" = xyes -o \
        "x$enable_va" = xyes; then
    if echo $platforms | grep -q "x11"; then
-        PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED libdrm >= $LIBDRM_REQUIRED])
+        PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
    fi
    need_gallium_vl_winsys=yes
 fi
@@ -2845,8 +2845,8 @@ if test -n "$with_gallium_drivers"; then
 fi

 # XXX: Keep in sync with LLVM_REQUIRED_SWR
-AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x7.0.0 -a \
-                                              "x$LLVM_VERSION" != x7.0.1)
+AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x6.0.0 -a \
+                                              "x$LLVM_VERSION" != x6.0.1)

 if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
    llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
--- a/docs/bugs.html
+++ b/docs/bugs.html
@@ -14,7 +14,7 @@
 <iframe src="contents.html"></iframe>
 <div class="content">

-<h1>Bug Database</h1>
+<h1>Report a bug</h1>

 <p>
 The Mesa bug database is hosted on
--- a/docs/contents.html
+++ b/docs/contents.html
@@ -49,10 +49,10 @@
 <li><a href="precompiled.html" target="_parent">Precompiled Libraries</a>
 </ul>

-<b>Resources</b>
+<b>Need help?</b>
 <ul>
 <li><a href="lists.html" target="_parent">Mailing Lists</a>
-<li><a href="bugs.html" target="_parent">Bug Database</a>
+<li><a href="bugs.html" target="_parent">Report a bug</a>
 <li><a href="webmaster.html" target="_parent">Webmaster</a>
 <li><a href="https://dri.freedesktop.org/" target="_parent">Mesa/DRI Wiki</a>
 </ul>
--- a/docs/envvars.html
+++ b/docs/envvars.html
@@ -338,9 +338,6 @@ See src/mesa/state_tracker/st_debug.c for other options.
 for details.
 <li>SVGA_EXTRA_LOGGING - if set, enables extra logging to the vmware.log file,
 such as the OpenGL program's name and command line arguments.
-<li>SVGA_NO_LOGGING - if set, disables logging to the vmware.log file.
-This is useful when using Valgrind because it otherwise crashes when
-initializing the host log feature.
 <li>See the driver code for other, lesser-used variables.
 </ul>

--- a/docs/features.txt
+++ b/docs/features.txt
@@ -204,7 +204,7 @@ GL 4.4, GLSL 4.40 -- all DONE: i965/gen8+, nvc0, r600, radeonsi
  - specified transform/feedback layout                 DONE
  - input/output block locations                        DONE
  GL_ARB_multi_bind                                     DONE (all drivers)
-  GL_ARB_query_buffer_object                            DONE (i965/hsw+)
+  GL_ARB_query_buffer_object                            DONE (i965/hsw+, virgl)
  GL_ARB_texture_mirror_clamp_to_edge                   DONE (i965, nv50, llvmpipe, softpipe, swr, virgl)
  GL_ARB_texture_stencil8                               DONE (freedreno, i965/hsw+, nv50, llvmpipe, softpipe, swr, virgl)
  GL_ARB_vertex_type_10f_11f_11f_rev                    DONE (i965, nv50, llvmpipe, softpipe, swr, virgl)
--- a/docs/index.html
+++ b/docs/index.html
@@ -15,6 +15,18 @@
 <div class="content">

 <h1>News</h1>
+<h2>February 18, 2019</h2>
+<p>
+<a href="relnotes/18.3.4.html">Mesa 18.3.4</a> is released.
+This is a bug-fix release.
+</p>
+
+<h2>January 31, 2019</h2>
+<p>
+<a href="relnotes/18.3.3.html">Mesa 18.3.3</a> is released.
+This is a bug-fix release.
+</p>
+
 <h2>January 17, 2019</h2>
 <p>
 <a href="relnotes/18.3.2.html">Mesa 18.3.2</a> is released.
--- a/docs/meson.html
+++ b/docs/meson.html
@@ -58,7 +58,9 @@ and your local settings.
 <p>
 Meson does not currently support listing options before configure a build
 directory, but this feature is being discussed upstream.
-For now, the only way to see what options exist is to look at the
+For now, we have a <code>bin/meson-options.py</code> script that prints
+the options for you.
+If that script doesn't work for some reason, you can always look in the
 <code>meson_options.txt</code> file at the root of the project.
 </p>

--- a/docs/release-calendar.html
+++ b/docs/release-calendar.html
@@ -49,19 +49,7 @@ if you'd like to nominate a patch in the next stable release.
 <th>Notes</th>
 </tr>
 <tr>
-<td rowspan="4">18.3</td>
-<td>2019-01-30</td>
-<td>18.3.3</td>
-<td>Emil Velikov</td>
-<td>
-</tr>
-<tr>
-<td>2019-02-13</td>
-<td>18.3.4</td>
-<td>Emil Velikov</td>
-<td>
-</tr>
-<tr>
+<td rowspan="2">18.3</td>
 <td>2019-02-27</td>
 <td>18.3.5</td>
 <td>Emil Velikov</td>
--- a/docs/relnotes.html
+++ b/docs/relnotes.html
@@ -21,6 +21,8 @@ The release notes summarize what's new or changed in each Mesa release.
 </p>

 <ul>
+<li><a href="relnotes/18.3.4.html">18.3.4 release notes</a>
+<li><a href="relnotes/18.3.3.html">18.3.3 release notes</a>
 <li><a href="relnotes/18.3.2.html">18.3.2 release notes</a>
 <li><a href="relnotes/18.2.8.html">18.2.8 release notes</a>
 <li><a href="relnotes/18.2.7.html">18.2.7 release notes</a>
--- a/docs/relnotes/18.3.3.html
+++ b/docs/relnotes/18.3.3.html
@@ -0,0 +1,208 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.3 Release Notes / January 31, 2019</h1>
+
+<p>
+Mesa 18.3.3 is a bug fix release which fixes bugs found since the 18.3.2 release.
+</p>
+<p>
+Mesa 18.3.3 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+6b9893942fe8011c7736d51448deb6ef80ece2257e0fac27b02e997a6605d5e4  mesa-18.3.3.tar.gz
+2ab6886a6966c532ccbcc3b240925e681464b658244f0cbed752615af3936299  mesa-18.3.3.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108877">Bug 108877</a> - OpenGL CTS gl43 test cases were interrupted due to segment fault</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109023">Bug 109023</a> - error: inlining failed in call to always_inline ‘__m512 _mm512_and_ps(__m512, __m512)’: target specific option mismatch</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109129">Bug 109129</a> - format_types.h:1220: undefined reference to `_mm256_cvtps_ph'</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109229">Bug 109229</a> - glLinkProgram locks up for ~30 seconds</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109242">Bug 109242</a> - [RADV] The Witcher 3 system freeze</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109488">Bug 109488</a> - Mesa 18.3.2 crash on a specific fragment shader (assert triggered) / already fixed on the master branch.</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Andres Gomez (2):</p>
+<ul>
+  <li>bin/get-pick-list.sh: fix the oneline printing</li>
+  <li>bin/get-pick-list.sh: fix redirection in sh</li>
+</ul>
+
+<p>Axel Davy (1):</p>
+<ul>
+  <li>st/nine: Immediately upload user provided textures</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (3):</p>
+<ul>
+  <li>radv: Only use 32 KiB per threadgroup on Stoney.</li>
+  <li>radv: Set partial_vs_wave for pipelines with just GS, not tess.</li>
+  <li>nir: Account for atomics in copy propagation.</li>
+</ul>
+
+<p>Bruce Cherniak (1):</p>
+<ul>
+  <li>gallium/swr: Fix multi-context sync fence deadlock.</li>
+</ul>
+
+<p>Carsten Haitzler (Rasterman) (2):</p>
+<ul>
+  <li>vc4: Use named parameters for the NEON inline asm.</li>
+  <li>vc4: Declare the cpu pointers as being modified in NEON asm.</li>
+</ul>
+
+<p>Danylo Piliaiev (1):</p>
+<ul>
+  <li>glsl: Fix copying function's out to temp if dereferenced by array</li>
+</ul>
+
+<p>Dave Airlie (3):</p>
+<ul>
+  <li>dri_interface: add put shm image2 (v2)</li>
+  <li>glx: add support for putimageshm2 path (v2)</li>
+  <li>gallium: use put image shm2 path (v2)</li>
+</ul>
+
+<p>Dylan Baker (4):</p>
+<ul>
+  <li>meson: allow building dri driver without window system if osmesa is classic</li>
+  <li>meson: fix swr KNL build</li>
+  <li>meson: Fix compiler checks for SWR with ICC</li>
+  <li>meson: Add warnings and errors when using ICC</li>
+</ul>
+
+<p>Emil Velikov (4):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.2</li>
+  <li>cherry-ignore: radv: Fix multiview depth clears</li>
+  <li>cherry-ignore: spirv: Handle arbitrary bit sizes for deref array indices</li>
+  <li>cherry-ignore: WARNING: Commit XXX lists invalid sha</li>
+</ul>
+
+<p>Eric Anholt (2):</p>
+<ul>
+  <li>vc4: Don't leak the GPU fd for renderonly usage.</li>
+  <li>vc4: Enable NEON asm on meson cross-builds.</li>
+</ul>
+
+<p>Eric Engestrom (2):</p>
+<ul>
+  <li>configure: EGL requirements only apply if EGL is built</li>
+  <li>meson/vdpau: add missing soversion</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>anv/device: fix maximum number of images supported</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>anv/nir: Rework arguments to apply_pipeline_layout</li>
+  <li>anv: Only parse pImmutableSamplers if the descriptor has samplers</li>
+  <li>nir/xfb: Fix offset accounting for dvec3/4</li>
+</ul>
+
+<p>Karol Herbst (2):</p>
+<ul>
+  <li>nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise instructions</li>
+  <li>glsl/lower_output_reads: set invariant and precise flags on temporaries</li>
+</ul>
+
+<p>Lionel Landwerlin (1):</p>
+<ul>
+  <li>anv: fix invalid binding table index computation</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>radeonsi: also apply the GS hang workaround to draws without tessellation</li>
+  <li>radeonsi: fix a u_blitter crash after a shader with FBFETCH</li>
+  <li>radeonsi: fix rendering to tiny viewports where the viewport center is &gt; 8K</li>
+  <li>st/mesa: purge framebuffers when unbinding a context</li>
+</ul>
+
+<p>Niklas Haas (1):</p>
+<ul>
+  <li>radv: correctly use vulkan 1.0 by default</li>
+</ul>
+
+<p>Pierre Moreau (1):</p>
+<ul>
+  <li>meson: Fix with_gallium_icd to with_opencl_icd</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>loader: fix the no-modifiers case</li>
+</ul>
+
+<p>Samuel Pitoiset (1):</p>
+<ul>
+  <li>radv: clean up setting partial_es_wave for distributed tess on VI</li>
+</ul>
+
+<p>Timothy Arceri (5):</p>
+<ul>
+  <li>ac/nir_to_llvm: fix interpolateAt* for arrays</li>
+  <li>ac/nir_to_llvm: fix clamp shadow reference for more hardware</li>
+  <li>radv/ac: fix some fp16 handling</li>
+  <li>glsl: use remap location when serialising uniform program resource data</li>
+  <li>glsl: Copy function out to temp if we don't directly ref a variable</li>
+</ul>
+
+<p>Tomeu Vizoso (1):</p>
+<ul>
+  <li>etnaviv: Consolidate buffer references from framebuffers</li>
+</ul>
+
+<p>Vinson Lee (1):</p>
+<ul>
+  <li>meson: Fix typo.</li>
+</ul>
+
+
+
+</div>
+</body>
+</html>
+
--- a/docs/relnotes/18.3.4.html
+++ b/docs/relnotes/18.3.4.html
@@ -0,0 +1,180 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 18.3.4 Release Notes / February 18, 2019</h1>
+
+<p>
+Mesa 18.3.4 is a bug fix release which fixes bugs found since the 18.3.3 release.
+</p>
+<p>
+Mesa 18.3.4 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+
+<h2>SHA256 checksums</h2>
+<pre>
+e22e6fe4c3aca80fe872a0a7285b6c5523e0cfc0bfb57ffcc3b3d66d292593e4  mesa-18.3.4.tar.gz
+32314da4365d37f80d84f599bd9625b00161c273c39600ba63b45002d500bb07  mesa-18.3.4.tar.xz
+</pre>
+
+
+<h2>New features</h2>
+<p>None</p>
+
+
+<h2>Bug fixes</h2>
+
+<ul>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109107">Bug 109107</a> - gallium/st/va: change va max_profiles when using Radeon VCN Hardware</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109401">Bug 109401</a> - [DXVK] Project Cars rendering problems</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109543">Bug 109543</a> - After upgrade mesa to 19.0.0~rc1 all vulkan based application stop working [&quot;vulkan-cube&quot; received SIGSEGV in radv_pipeline_init_blend_state at ../src/amd/vulkan/radv_pipeline.c:699]</li>
+
+<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109603">Bug 109603</a> - nir_instr_as_deref: Assertion `parent &amp;&amp; parent-&gt;type == nir_instr_type_deref' failed.</li>
+
+</ul>
+
+
+<h2>Changes</h2>
+
+<p>Bart Oldeman (1):</p>
+<ul>
+  <li>gallium-xlib: query MIT-SHM before using it.</li>
+</ul>
+
+<p>Bas Nieuwenhuizen (2):</p>
+<ul>
+  <li>radv: Only look at pImmutableSamples if the descriptor has a sampler.</li>
+  <li>amd/common: Use correct writemask for shared memory stores.</li>
+</ul>
+
+<p>Dylan Baker (2):</p>
+<ul>
+  <li>get-pick-list: Add --pretty=medium to the arguments for Cc patches</li>
+  <li>meson: Add dependency on genxml to anvil</li>
+</ul>
+
+<p>Emil Velikov (5):</p>
+<ul>
+  <li>docs: add sha256 checksums for 18.3.3</li>
+  <li>cherry-ignore: nv50,nvc0: add explicit settings for recent caps</li>
+  <li>cherry-ignore: add more 19.0 only nominations from Ilia</li>
+  <li>cherry-ignore: radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8</li>
+  <li>Update version to 18.3.4</li>
+</ul>
+
+<p>Eric Anholt (1):</p>
+<ul>
+  <li>vc4: Fix copy-and-paste fail in backport of NEON asm fixes.</li>
+</ul>
+
+<p>Eric Engestrom (2):</p>
+<ul>
+  <li>xvmc: fix string comparison</li>
+  <li>xvmc: fix string comparison</li>
+</ul>
+
+<p>Ernestas Kulik (2):</p>
+<ul>
+  <li>vc4: Fix leak in HW queries error path</li>
+  <li>v3d: Fix leak in resource setup error path</li>
+</ul>
+
+<p>Iago Toral Quiroga (1):</p>
+<ul>
+  <li>intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments</li>
+</ul>
+
+<p>Ilia Mirkin (1):</p>
+<ul>
+  <li>nvc0: we have 16k-sized framebuffers, fix default scissors</li>
+</ul>
+
+<p>Jason Ekstrand (3):</p>
+<ul>
+  <li>intel/fs: Handle IMAGE_SIZE in size_read() and is_send_from_grf()</li>
+  <li>intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode</li>
+  <li>nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks</li>
+</ul>
+
+<p>Juan A. Suarez Romero (1):</p>
+<ul>
+  <li>anv/cmd_buffer: check for NULL framebuffer</li>
+</ul>
+
+<p>Kenneth Graunke (1):</p>
+<ul>
+  <li>st/mesa: Limit GL_MAX_[NATIVE_]PROGRAM_PARAMETERS_ARB to 2048</li>
+</ul>
+
+<p>Kristian H. Kristensen (1):</p>
+<ul>
+  <li>freedreno/a6xx: Emit blitter dst with OUT_RELOCW</li>
+</ul>
+
+<p>Leo Liu (2):</p>
+<ul>
+  <li>st/va: fix the incorrect max profiles report</li>
+  <li>st/va/vp9: set max reference as default of VP9 reference number</li>
+</ul>
+
+<p>Marek Olšák (4):</p>
+<ul>
+  <li>meson: drop the xcb-xrandr version requirement</li>
+  <li>gallium/u_threaded: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>radeonsi: fix EXPLICIT_FLUSH for flush offsets &gt; 0</li>
+  <li>winsys/amdgpu: don't drop manually added fence dependencies</li>
+</ul>
+
+<p>Mario Kleiner (2):</p>
+<ul>
+  <li>egl/wayland: Allow client-&gt;server format conversion for PRIME offload. (v2)</li>
+  <li>egl/wayland-drm: Only announce formats via wl_drm which the driver supports.</li>
+</ul>
+
+<p>Oscar Blumberg (1):</p>
+<ul>
+  <li>radeonsi: Fix guardband computation for large render targets</li>
+</ul>
+
+<p>Rob Clark (1):</p>
+<ul>
+  <li>freedreno: stop frob'ing pipe_resource::nr_samples</li>
+</ul>
+
+<p>Rodrigo Vivi (1):</p>
+<ul>
+  <li>intel: Add more PCI Device IDs for Coffee Lake and Ice Lake.</li>
+</ul>
+
+<p>Samuel Pitoiset (2):</p>
+<ul>
+  <li>radv: fix compiler issues with GCC 9</li>
+  <li>radv: always export gl_SampleMask when the fragment shader uses it</li>
+</ul>
+
+
+
+</div>
+</body>
+</html>
--- a/docs/relnotes/19.0.0.html
+++ b/docs/relnotes/19.0.0.html
--- a/docs/relnotes/19.0.1.html
+++ b/docs/relnotes/19.0.1.html
@@ -1,159 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 19.0.1 Release Notes / March 27, 2019</h1>
-
-<p>
-Mesa 19.0.1 is a bug fix release which fixes bugs found since the 19.0.0 release.
-</p>
-<p>
-Mesa 19.0.1 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-f1dd1980ed628edea3935eed7974fbc5d8353e9578c562728b880d63ac613dbd  mesa-19.0.1.tar.gz
-6884163c0ea9e4c98378ab8fecd72fe7b5f437713a14471beda378df247999d4  mesa-19.0.1.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-<p>None</p>
-
-
-<h2>Bug fixes</h2>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100316">Bug 100316</a> - Linking GLSL 1.30 shaders with invariant and deprecated variables triggers an 'mismatching invariant qualifiers' error</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=107563">Bug 107563</a> - [RADV] Broken rendering in Unity demos</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109698">Bug 109698</a> - dri.pc contents invalid when built with meson</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109980">Bug 109980</a> - [i915 CI][HSW] spec&#64;arb_fragment_shader_interlock&#64;arb_fragment_shader_interlock-image-load-store - fail</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110211">Bug 110211</a> - If DESTDIR is set to an empty string, the dri drivers are not installed</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110221">Bug 110221</a> - build error with meson</li>
-
-</ul>
-
-<h2>Changes</h2>
-
-<p>Andres Gomez (4):</p>
-<ul>
-  <li>glsl: correctly validate component layout qualifier for dvec{3,4}</li>
-  <li>glsl/linker: don't fail non static used inputs without matching outputs</li>
-  <li>glsl/linker: simplify xfb_offset vs xfb_stride overflow check</li>
-  <li>Revert "glsl: relax input-&gt;output validation for SSO programs"</li>
-</ul>
-
-<p>Bas Nieuwenhuizen (2):</p>
-<ul>
-  <li>radv: Use correct image view comparison for fast clears.</li>
-  <li>ac/nir: Return frag_coord as integer.</li>
-</ul>
-
-<p>Danylo Piliaiev (2):</p>
-<ul>
-  <li>anv: Treat zero size XFB buffer as disabled</li>
-  <li>glsl: Cross validate variable's invariance by explicit invariance only</li>
-</ul>
-
-<p>Dave Airlie (1):</p>
-<ul>
-  <li>softpipe: fix texture view crashes</li>
-</ul>
-
-<p>Dylan Baker (5):</p>
-<ul>
-  <li>docs: Add SHA256 sums for 19.0.0</li>
-  <li>cherry-ignore: Add commit that doesn't apply</li>
-  <li>bin/install_megadrivers.py: Correctly handle DESTDIR=''</li>
-  <li>bin/install_megadrivers.py: Fix regression for set DESTDIR</li>
-  <li>bump version for 19.0.1</li>
-</ul>
-
-<p>Eric Anholt (1):</p>
-<ul>
-  <li>v3d: Fix leak of the renderonly struct on screen destruction.</li>
-</ul>
-
-<p>Jason Ekstrand (6):</p>
-<ul>
-  <li>glsl/lower_vector_derefs: Don't use a temporary for TCS outputs</li>
-  <li>glsl/list: Add a list variant of insert_after</li>
-  <li>anv/pass: Flag the need for a RT flush for resolve attachments</li>
-  <li>nir/builder: Add a vector extract helper</li>
-  <li>nir: Add a new pass to lower array dereferences on vectors</li>
-  <li>intel/nir: Lower array-deref-of-vector UBO and SSBO loads</li>
-</ul>
-
-<p>Józef Kucia (2):</p>
-<ul>
-  <li>radv: Fix driverUUID</li>
-  <li>mesa: Fix GL_NUM_DEVICE_UUIDS_EXT</li>
-</ul>
-
-<p>Kenneth Graunke (1):</p>
-<ul>
-  <li>intel/fs: Fix opt_peephole_csel to not throw away saturates.</li>
-</ul>
-
-<p>Kevin Strasser (1):</p>
-<ul>
-  <li>egl/dri: Avoid out of bounds array access</li>
-</ul>
-
-<p>Mark Janes (1):</p>
-<ul>
-  <li>mesa: properly report the length of truncated log messages</li>
-</ul>
-
-<p>Plamena Manolova (1):</p>
-<ul>
-  <li>i965: Disable ARB_fragment_shader_interlock for platforms prior to GEN9</li>
-</ul>
-
-<p>Samuel Pitoiset (3):</p>
-<ul>
-  <li>radv: set the maximum number of IBs per submit to 192</li>
-  <li>radv: always initialize HTILE when the src layout is UNDEFINED</li>
-  <li>radv: fix binding transform feedback buffers</li>
-</ul>
-
-<p>Sergii Romantsov (1):</p>
-<ul>
-  <li>d3d: meson: do not prefix user provided d3d-drivers-path</li>
-</ul>
-
-<p>Tapani Pälli (2):</p>
-<ul>
-  <li>isl: fix automake build when sse41 is not supported</li>
-  <li>anv/radv: release memory allocated by glsl types during spirv_to_nir</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/19.0.2.html
+++ b/docs/relnotes/19.0.2.html
@@ -1,122 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 19.0.2 Release Notes / April 10, 2019</h1>
-
-<p>
-Mesa 19.0.2 is a bug fix release which fixes bugs found since the 19.0.1 release.
-</p>
-<p>
-Mesa 19.0.2 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-SHA256: eb972fc11d4e1261d34ec0b91a701f158d4870c0428fb108353ae7eab64b1118  mesa-19.0.2.tar.gz
-SHA256: 1a2edc3ce56906a676c91e6851298db45903df1f5cb9827395a922c1452db802  mesa-19.0.2.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-
-
-<h2>Bug fixes</h2>
-
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108766">Bug 108766</a> - Mesa built with meson has RPATH entries</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109648">Bug 109648</a> - AMD Raven hang during va-api decoding</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110257">Bug 110257</a> - Major artifacts in mpeg2 vaapi hw decoding</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110259">Bug 110259</a> - radv: Sampling depth-stencil image in GENERAL layout returns nothing but zero (regression, bisected)</li>
-
-</ul>
-
-<h2>Changes</h2>
-
-
-<p>Boyuan Zhang (1):</p>
-<ul>
-  <li>st/va: reverse qt matrix back to its original order</li>
-</ul>
-
-<p>Caio Marcelo de Oliveira Filho (1):</p>
-<ul>
-  <li>nir: Take if_uses into account when repairing SSA</li>
-</ul>
-
-<p>Dylan Baker (2):</p>
-<ul>
-  <li>docs: Add SHA256 sums for mesa 19.0.1</li>
-  <li>VERSION: bump version for 19.0.2</li>
-</ul>
-
-<p>Eric Anholt (3):</p>
-<ul>
-  <li>dri3: Return the current swap interval from glXGetSwapIntervalMESA().</li>
-  <li>v3d: Bump the maximum texture size to 4k for V3D 4.x.</li>
-  <li>v3d: Don't try to use the TFU blit path if a scissor is enabled.</li>
-</ul>
-
-<p>Eric Engestrom (1):</p>
-<ul>
-  <li>meson: strip rpath from megadrivers</li>
-</ul>
-
-<p>Jason Ekstrand (1):</p>
-<ul>
-  <li>Revert "anv/radv: release memory allocated by glsl types during spirv_to_nir"</li>
-</ul>
-
-<p>Karol Herbst (1):</p>
-<ul>
-  <li>nir/print: fix printing the image_array intrinsic index</li>
-</ul>
-
-<p>Leo Liu (2):</p>
-<ul>
-  <li>radeon/vcn: add H.264 constrained baseline support</li>
-  <li>radeon/vcn/vp9: search the render target from the whole list</li>
-</ul>
-
-<p>Lionel Landwerlin (1):</p>
-<ul>
-  <li>intel: add dependency on genxml generated files</li>
-</ul>
-
-<p>Marek Olšák (1):</p>
-<ul>
-  <li>radeonsi: fix assertion failure by using the correct type</li>
-</ul>
-
-<p>Samuel Pitoiset (2):</p>
-<ul>
-  <li>radv: skip updating depth/color metadata for conditional rendering</li>
-  <li>radv: do not always initialize HTILE in compressed state</li>
-</ul>
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/19.0.3.html
+++ b/docs/relnotes/19.0.3.html
@@ -1,148 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 19.0.3 Release Notes / April 24, 2019</h1>
-
-<p>
-Mesa 19.0.3 is a bug fix release which fixes bugs found since the l9.0.2 release.
-</p>
-<p>
-Mesa 19.0.3 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-59543ec3c9f8c72990e77887f13d1678cb6739e5d5f56abc21ebf9e772389c5e  mesa-19.0.3.tar.gz
-f027244e38dc309a4c12db45ef79be81ab62c797a50a88d566e4edb6159fc4d5  mesa-19.0.3.tar.xz
-</pre>
-
-
-<h2>New features</h2>
-
-<p>N/A</p>
-
-<h2>Bug fixes</h2>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108879">Bug 108879</a> - [CIK] [regression] All opencl apps hangs indefinitely in si_create_context</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110201">Bug 110201</a> - [ivb] mesa 19.0.0 breaks rendering in kitty</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110356">Bug 110356</a> - install_megadrivers.py creates new dangling symlink [bisected]</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110441">Bug 110441</a> - [llvmpipe] complex-loop-analysis-bug regression</li>
-
-</ul>
-
-<h2>Changes</h2>
-
-<p>Andres Gomez (1):</p>
-<ul>
-  <li>glsl/linker: location aliasing requires types to have the same width</li>
-</ul>
-
-<p>Bas Nieuwenhuizen (1):</p>
-<ul>
-  <li>ac: Move has_local_buffers disable to radeonsi.</li>
-</ul>
-
-<p>Chia-I Wu (1):</p>
-<ul>
-  <li>virgl: fix fence fd version check</li>
-</ul>
-
-<p>Danylo Piliaiev (1):</p>
-<ul>
-  <li>intel/compiler: Do not reswizzle dst if instruction writes to flag register</li>
-</ul>
-
-<p>Dylan Baker (2):</p>
-<ul>
-  <li>docs: Add sha256 sums for 19.0.2</li>
-  <li>Bump version for 19.0.3</li>
-</ul>
-
-<p>Eric Anholt (1):</p>
-<ul>
-  <li>nir: Fix deref offset calculation for structs.</li>
-</ul>
-
-<p>Eric Engestrom (1):</p>
-<ul>
-  <li>meson: remove meson-created megadrivers symlinks</li>
-</ul>
-
-<p>Jason Ekstrand (2):</p>
-<ul>
-  <li>anv/pipeline: Fix MEDIA_VFE_STATE::PerThreadScratchSpace on gen7</li>
-  <li>anv: Add a #define for the max binding table size</li>
-</ul>
-
-<p>Juan A. Suarez Romero (1):</p>
-<ul>
-  <li>meson: Add dependency on genxml to anvil genfiles</li>
-</ul>
-
-<p>Kenneth Graunke (2):</p>
-<ul>
-  <li>glsl: Set location on structure-split sampler uniform variables</li>
-  <li>Revert "glsl: Set location on structure-split sampler uniform variables"</li>
-</ul>
-
-<p>Lionel Landwerlin (2):</p>
-<ul>
-  <li>anv: fix uninitialized pthread cond clock domain</li>
-  <li>intel/devinfo: fix missing num_thread_per_eu on ICL</li>
-</ul>
-
-<p>Lubomir Rintel (2):</p>
-<ul>
-  <li>gallivm: guess CPU features also on ARM</li>
-  <li>gallivm: disable NEON instructions if they are not supported</li>
-</ul>
-
-<p>Marek Olšák (1):</p>
-<ul>
-  <li>radeonsi: use CP DMA for the null const buffer clear on CIK</li>
-</ul>
-
-<p>Rhys Perry (1):</p>
-<ul>
-  <li>nir,ac/nir: fix cube_face_coord</li>
-</ul>
-
-<p>Roland Scheidegger (1):</p>
-<ul>
-  <li>gallivm: fix bogus assert in get_indirect_index</li>
-</ul>
-
-<p>Samuel Pitoiset (2):</p>
-<ul>
-  <li>ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+</li>
-  <li>radv: do not load vertex attributes that are not provided by the pipeline</li>
-</ul>
-
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/19.0.4.html
+++ b/docs/relnotes/19.0.4.html
@@ -1,242 +0,0 @@
-<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
-<html lang="en">
-<head>
-  <meta http-equiv="content-type" content="text/html; charset=utf-8">
-  <title>Mesa Release Notes</title>
-  <link rel="stylesheet" type="text/css" href="../mesa.css">
-</head>
-<body>
-
-<div class="header">
-  <h1>The Mesa 3D Graphics Library</h1>
-</div>
-
-<iframe src="../contents.html"></iframe>
-<div class="content">
-
-<h1>Mesa 19.0.4 Release Notes / May 9, 2019</h1>
-
-<p>
-Mesa 19.0.4 is a bug fix release which fixes bugs found since the 19.0.3 release.
-</p>
-<p>
-Mesa 19.0.4 implements the OpenGL 4.5 API, but the version reported by
-glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
-glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
-Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
-4.5 is <strong>only</strong> available if requested at context creation.
-Compatibility contexts may report a lower version depending on each driver.
-</p>
-
-
-<h2>SHA256 checksums</h2>
-<pre>
-TBD
-</pre>
-
-
-<h2>New features</h2>
-
-<p>N/A</p>
-
-<h2>Bug fixes</h2>
-
-<ul>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99781">Bug 99781</a> - Some Unity games fail assertion on startup in glXCreateContextAttribsARB</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100239">Bug 100239</a> - Incorrect rendering in CS:GO</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108540">Bug 108540</a> - vkAcquireNextImageKHR blocks when timeout=0 in Wayland</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110143">Bug 110143</a> - Doom 3: BFG Edition - Steam and GOG.com - white flickering screen</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110291">Bug 110291</a> - Vega 64 GPU hang running Space Engineers</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110355">Bug 110355</a> - radeonsi: GTK elements become invisible in some applications (GIMP, LibreOffice)</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110573">Bug 110573</a> - Mesa vulkan-radeon 19.0.3 system freeze and visual artifacts (RADV)</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110590">Bug 110590</a> - [Regression][Bisected] GTAⅣ under wine fails with GLXBadFBConfig</li>
-
-<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110632">Bug 110632</a> - &quot;glx: Fix synthetic error generation in __glXSendError&quot; broke wine games on 32-bit</li>
-
-</ul>
-
-<h2>Changes</h2>
-
-<p>Alejandro Piñeiro (1):</p>
-<ul>
-  <li>docs: document MESA_GLSL=errors keyword</li>
-</ul>
-
-<p>Andrii Simiklit (1):</p>
-<ul>
-  <li>egl: return correct error code for a case req ver &lt; 3 with forward-compatible</li>
-</ul>
-
-<p>Axel Davy (1):</p>
-<ul>
-  <li>st/nine: Fix D3DWindowBuffer_release for old wine nine support</li>
-</ul>
-
-<p>Bas Nieuwenhuizen (1):</p>
-<ul>
-  <li>radv: Disable VK_EXT_descriptor_indexing.</li>
-</ul>
-
-<p>Brian Paul (1):</p>
-<ul>
-  <li>svga: add SVGA_NO_LOGGING env var (v2)</li>
-</ul>
-
-<p>Caio Marcelo de Oliveira Filho (1):</p>
-<ul>
-  <li>spirv: Handle SpvOpDecorateId</li>
-</ul>
-
-<p>Charmaine Lee (1):</p>
-<ul>
-  <li>svga: move host logging to winsys</li>
-</ul>
-
-<p>Chuck Atkins (1):</p>
-<ul>
-  <li>meson: Fix missing glproto dependency for gallium-glx</li>
-</ul>
-
-<p>Daniel Stone (1):</p>
-<ul>
-  <li>vulkan/wsi/wayland: Respect non-blocking AcquireNextImage</li>
-</ul>
-
-<p>Dave Airlie (2):</p>
-<ul>
-  <li>r600: reset tex array override even when no view bound</li>
-  <li>util/bitset: fix bitset range mask calculations.</li>
-</ul>
-
-<p>Dylan Baker (7):</p>
-<ul>
-  <li>docs: Add SHA256 sums for mesa 19.0.3</li>
-  <li>cherry-ignore: Add a patch that was manually backported</li>
-  <li>cherry-ignore: Add more backported patches</li>
-  <li>cherry-ignore: Add another patch</li>
-  <li>cherry-ignore: Add more patches</li>
-  <li>meson: Force the use of config-tool for llvm</li>
-  <li>VERSION: bump for 19.0.4 release</li>
-</ul>
-
-<p>Emil Velikov (3):</p>
-<ul>
-  <li>vulkan/wsi: check if the display_fd given is master</li>
-  <li>vulkan/wsi: don't use DUMB_CLOSE for normal GEM handles</li>
-  <li>configure.ac: check for libdrm when using VL with X11</li>
-</ul>
-
-<p>Erik Faye-Lund (2):</p>
-<ul>
-  <li>softpipe: setup pixel_offset for all primitive types</li>
-  <li>draw: flush when setting stream-out targets</li>
-</ul>
-
-<p>Francisco Jerez (2):</p>
-<ul>
-  <li>intel/fs: Lower integer multiply correctly when destination stride equals 4.</li>
-  <li>intel/fs: Cap dst-aligned region stride to maximum representable hstride value.</li>
-</ul>
-
-<p>Hal Gentz (1):</p>
-<ul>
-  <li>glx: Fix synthetic error generation in __glXSendError</li>
-</ul>
-
-<p>Ian Romanick (2):</p>
-<ul>
-  <li>glsl: Silence may unused parameter warnings in glsl/ir.h</li>
-  <li>mesa: Add missing display list support for GL_FOG_COORDINATE_SOURCE</li>
-</ul>
-
-<p>Jason Ekstrand (1):</p>
-<ul>
-  <li>anv/descriptor_set: Destroy sets before pool finalization</li>
-</ul>
-
-<p>Jon Turney (1):</p>
-<ul>
-  <li>meson: Force '.so' extension for DRI drivers</li>
-</ul>
-
-<p>Juan A. Suarez Romero (2):</p>
-<ul>
-  <li>spirv: add missing SPV_EXT_descriptor_indexing capabilities</li>
-  <li>radv: enable descriptor indexing capabilities</li>
-</ul>
-
-<p>Kenneth Graunke (6):</p>
-<ul>
-  <li>glsl: Allow gl_nir_lower_samplers*() without a gl_shader_program</li>
-  <li>glsl: Don't look at sampler uniform storage for internal vars</li>
-  <li>i965: Ignore uniform storage for samplers or images, use binding info</li>
-  <li>i965: Fix BRW_MEMZONE_LOW_4G heap size.</li>
-  <li>i965: Force VMA alignment to be a multiple of the page size.</li>
-  <li>i965: leave the top 4Gb of the high heap VMA unused</li>
-</ul>
-
-<p>Lionel Landwerlin (4):</p>
-<ul>
-  <li>anv: store heap address bounds when initializing physical device</li>
-  <li>anv: leave the top 4Gb of the high heap VMA unused</li>
-  <li>anv: fix argument name for vkCmdEndQuery</li>
-  <li>anv: rework queries writes to ensure ordering memory writes</li>
-</ul>
-
-<p>Marek Olšák (2):</p>
-<ul>
-  <li>radeonsi/gfx9: set that window_rectangles always roll the context</li>
-  <li>radeonsi/gfx9: rework the gfx9 scissor bug workaround (v2)</li>
-</ul>
-
-<p>Nicolai Hähnle (1):</p>
-<ul>
-  <li>radeonsi: add si_debug_options for convenient adding/removing of options</li>
-</ul>
-
-<p>Rhys Perry (1):</p>
-<ul>
-  <li>radv: fix set_output_usage_mask() with composite and 64-bit types</li>
-</ul>
-
-<p>Ross Burton (1):</p>
-<ul>
-  <li>Revert "meson: drop GLESv1 .so version back to 1.0.0"</li>
-</ul>
-
-<p>Samuel Pitoiset (8):</p>
-<ul>
-  <li>radv: add missing VEGA20 chip in radv_get_device_name()</li>
-  <li>radv: do not need to force emit the TCS regs on Vega20</li>
-  <li>radv: fix color conversions for normalized uint/sint formats</li>
-  <li>radv: implement a workaround for VK_EXT_conditional_rendering</li>
-  <li>radv: set WD_SWITCH_ON_EOP=1 when drawing primitives from a stream output buffer</li>
-  <li>radv: only need to force emit the TCS regs on Vega10 and Raven1</li>
-  <li>radv: apply the indexing workaround for atomic buffer operations on GFX9</li>
-  <li>radv: fix setting the number of rectangles when it's dyanmic</li>
-</ul>
-
-<p>Tapani Pälli (1):</p>
-<ul>
-  <li>anv: expose VK_EXT_queue_family_foreign on Android</li>
-</ul>
-
-<p>Timothy Arceri (4):</p>
-<ul>
-  <li>nir: fix nir_remove_unused_varyings()</li>
-  <li>util/drirc: add workarounds for bugs in Doom 3: BFG</li>
-  <li>radeonsi: add config entry for Counter-Strike Global Offensive</li>
-  <li>Revert "glx: Fix synthetic error generation in __glXSendError"</li>
-</ul>
-
-</div>
-</body>
-</html>
--- a/docs/relnotes/19.1.0.html
+++ b/docs/relnotes/19.1.0.html
@@ -0,0 +1,60 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
+<html lang="en">
+<head>
+  <meta http-equiv="content-type" content="text/html; charset=utf-8">
+  <title>Mesa Release Notes</title>
+  <link rel="stylesheet" type="text/css" href="../mesa.css">
+</head>
+<body>
+
+<div class="header">
+  <h1>The Mesa 3D Graphics Library</h1>
+</div>
+
+<iframe src="../contents.html"></iframe>
+<div class="content">
+
+<h1>Mesa 19.1.0 Release Notes / TBD</h1>
+
+<p>
+Mesa 19.1.0 is a new development release. People who are concerned
+with stability and reliability should stick with a previous release or
+wait for Mesa 19.1.1.
+</p>
+<p>
+Mesa 19.1.0 implements the OpenGL 4.5 API, but the version reported by
+glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
+glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
+Some drivers don't support all the features required in OpenGL 4.5.  OpenGL
+4.5 is <strong>only</strong> available if requested at context creation.
+Compatibility contexts may report a lower version depending on each driver.
+</p>
+
+<h2>SHA256 checksums</h2>
+<pre>
+TBD.
+</pre>
+
+
+<h2>New features</h2>
+
+<ul>
+<li>GL_EXT_texture_compression_s3tc_srgb on Gallium drivers and i965 (ES extension).</li>
+<li>VK_EXT_buffer_device_address on Intel and RADV.</li>
+</ul>
+
+<h2>Bug fixes</h2>
+
+<ul>
+<li>TBD</li>
+</ul>
+
+<h2>Changes</h2>
+
+<ul>
+<li>TBD</li>
+</ul>
+
+</div>
+</body>
+</html>
--- a/docs/shading.html
+++ b/docs/shading.html
@@ -59,7 +59,6 @@ execution.  These are generally used for debugging.
 <li><b>nopfrag</b> - force fragment shader to be a simple shader that passes
    through the color attribute.
 <li><b>useprog</b> - log glUseProgram calls to stderr
-<li><b>errors</b> - GLSL compilation and link errors will be reported to stderr.
 </ul>
 <p>
 Example:  export MESA_GLSL=dump,nopt
--- a/docs/submittingpatches.html
+++ b/docs/submittingpatches.html
@@ -236,6 +236,11 @@ your email administrator for this.)
    <li>Other tag examples: gallium, util
  </ul>
 </p>
+<p>
+  Tick the following when creating the MR. It allows developers to
+  rebase your work on top of master.
+  <pre>Allow commits from members who can merge to the target branch</pre>
+</p>
 <p>
  If you revise your patches based on code review and push an update
  to your branch, you should maintain a <strong>clean</strong> history
--- a/include/CL/cl.h
+++ b/include/CL/cl.h
--- a/include/CL/cl.hpp
+++ b/include/CL/cl.hpp
--- a/include/CL/cl2.hpp
+++ b/include/CL/cl2.hpp
--- a/include/CL/cl_d3d10.h
+++ b/include/CL/cl_d3d10.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2015 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/include/CL/cl_d3d11.h
+++ b/include/CL/cl_d3d11.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2015 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
--- a/include/CL/cl_dx9_media_sharing.h
+++ b/include/CL/cl_dx9_media_sharing.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2015 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -33,7 +38,7 @@
 extern "C" {
 #endif

-/******************************************************************************
+/******************************************************************************/
 /* cl_khr_dx9_media_sharing                                                   */
 #define cl_khr_dx9_media_sharing 1

--- a/include/CL/cl_dx9_media_sharing_intel.h
+++ b/include/CL/cl_dx9_media_sharing_intel.h
@@ -0,0 +1,182 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2019 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+/*****************************************************************************\
+
+Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
+
+THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
+MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+File Name: cl_dx9_media_sharing_intel.h
+
+Abstract:
+
+Notes:
+
+\*****************************************************************************/
+
+#ifndef __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
+#define __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H
+
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+#include <d3d9.h>
+#include <dxvahd.h>
+#include <wtypes.h>
+#include <d3d9types.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************
+* cl_intel_dx9_media_sharing extension *
+****************************************/
+
+#define cl_intel_dx9_media_sharing 1
+
+typedef cl_uint cl_dx9_device_source_intel;
+typedef cl_uint cl_dx9_device_set_intel;
+
+/* error codes */
+#define CL_INVALID_DX9_DEVICE_INTEL                   -1010
+#define CL_INVALID_DX9_RESOURCE_INTEL                 -1011
+#define CL_DX9_RESOURCE_ALREADY_ACQUIRED_INTEL        -1012
+#define CL_DX9_RESOURCE_NOT_ACQUIRED_INTEL            -1013
+
+/* cl_dx9_device_source_intel */
+#define CL_D3D9_DEVICE_INTEL                          0x4022
+#define CL_D3D9EX_DEVICE_INTEL                        0x4070
+#define CL_DXVA_DEVICE_INTEL                          0x4071
+
+/* cl_dx9_device_set_intel */
+#define CL_PREFERRED_DEVICES_FOR_DX9_INTEL            0x4024
+#define CL_ALL_DEVICES_FOR_DX9_INTEL                  0x4025
+
+/* cl_context_info */
+#define CL_CONTEXT_D3D9_DEVICE_INTEL                  0x4026
+#define CL_CONTEXT_D3D9EX_DEVICE_INTEL                0x4072
+#define CL_CONTEXT_DXVA_DEVICE_INTEL                  0x4073
+
+/* cl_mem_info */
+#define CL_MEM_DX9_RESOURCE_INTEL                     0x4027
+#define CL_MEM_DX9_SHARED_HANDLE_INTEL                0x4074
+
+/* cl_image_info */
+#define CL_IMAGE_DX9_PLANE_INTEL                      0x4075
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_DX9_OBJECTS_INTEL          0x402A
+#define CL_COMMAND_RELEASE_DX9_OBJECTS_INTEL          0x402B
+/******************************************************************************/
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromDX9INTEL(
+    cl_platform_id              platform,
+    cl_dx9_device_source_intel  dx9_device_source,
+    void*                       dx9_object,
+    cl_dx9_device_set_intel     dx9_device_set,
+    cl_uint                     num_entries,
+    cl_device_id*               devices,
+    cl_uint*                    num_devices) CL_EXT_SUFFIX__VERSION_1_1;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL* clGetDeviceIDsFromDX9INTEL_fn)(
+    cl_platform_id              platform,
+    cl_dx9_device_source_intel  dx9_device_source,
+    void*                       dx9_object,
+    cl_dx9_device_set_intel     dx9_device_set,
+    cl_uint                     num_entries,
+    cl_device_id*               devices,
+    cl_uint*                    num_devices) CL_EXT_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromDX9MediaSurfaceINTEL(
+    cl_context                  context,
+    cl_mem_flags                flags,
+    IDirect3DSurface9*          resource,
+    HANDLE                      sharedHandle,
+    UINT                        plane,
+    cl_int*                     errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceINTEL_fn)(
+    cl_context                  context,
+    cl_mem_flags                flags,
+    IDirect3DSurface9*          resource,
+    HANDLE                      sharedHandle,
+    UINT                        plane,
+    cl_int*                     errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireDX9ObjectsINTEL(
+    cl_command_queue            command_queue,
+    cl_uint                     num_objects,
+    const cl_mem*               mem_objects,
+    cl_uint                     num_events_in_wait_list,
+    const cl_event*             event_wait_list,
+    cl_event*                   event) CL_EXT_SUFFIX__VERSION_1_1;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9ObjectsINTEL_fn)(
+    cl_command_queue            command_queue,
+    cl_uint                     num_objects,
+    const cl_mem*               mem_objects,
+    cl_uint                     num_events_in_wait_list,
+    const cl_event*             event_wait_list,
+    cl_event*                   event) CL_EXT_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseDX9ObjectsINTEL(
+    cl_command_queue            command_queue,
+    cl_uint                     num_objects,
+    cl_mem*                     mem_objects,
+    cl_uint                     num_events_in_wait_list,
+    const cl_event*             event_wait_list,
+    cl_event*                   event) CL_EXT_SUFFIX__VERSION_1_1;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseDX9ObjectsINTEL_fn)(
+    cl_command_queue            command_queue,
+    cl_uint                     num_objects,
+    cl_mem*                     mem_objects,
+    cl_uint                     num_events_in_wait_list,
+    const cl_event*             event_wait_list,
+    cl_event*                   event) CL_EXT_SUFFIX__VERSION_1_1;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __OPENCL_CL_DX9_MEDIA_SHARING_INTEL_H */
+
--- a/include/CL/cl_egl.h
+++ b/include/CL/cl_egl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2008-2010 The Khronos Group Inc.
+ * Copyright (c) 2008-2019 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -24,13 +29,7 @@
 #ifndef __OPENCL_CL_EGL_H
 #define __OPENCL_CL_EGL_H

-#ifdef __APPLE__
-
-#else
 #include <CL/cl.h>
-#include <EGL/egl.h>
-#include <EGL/eglext.h>
-#endif  

 #ifdef __cplusplus
 extern "C" {
@@ -62,69 +61,69 @@ typedef intptr_t cl_egl_image_properties_khr;
 #define cl_khr_egl_image 1

 extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromEGLImageKHR(cl_context                  /* context */,
-                        CLeglDisplayKHR             /* egldisplay */,
-                        CLeglImageKHR               /* eglimage */,
-                        cl_mem_flags                /* flags */,
-                        const cl_egl_image_properties_khr * /* properties */,
-                        cl_int *                    /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+clCreateFromEGLImageKHR(cl_context                  context,
+                        CLeglDisplayKHR             egldisplay,
+                        CLeglImageKHR               eglimage,
+                        cl_mem_flags                flags,
+                        const cl_egl_image_properties_khr * properties,
+                        cl_int *                    errcode_ret) CL_API_SUFFIX__VERSION_1_0;

 typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromEGLImageKHR_fn)(
-	cl_context                  context,
-	CLeglDisplayKHR             egldisplay,
-	CLeglImageKHR               eglimage,
-	cl_mem_flags                flags,
-	const cl_egl_image_properties_khr * properties,
-	cl_int *                    errcode_ret);
+    cl_context                  context,
+    CLeglDisplayKHR             egldisplay,
+    CLeglImageKHR               eglimage,
+    cl_mem_flags                flags,
+    const cl_egl_image_properties_khr * properties,
+    cl_int *                    errcode_ret);


 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireEGLObjectsKHR(cl_command_queue /* command_queue */,
-                              cl_uint          /* num_objects */,
-                              const cl_mem *   /* mem_objects */,
-                              cl_uint          /* num_events_in_wait_list */,
-                              const cl_event * /* event_wait_list */,
-                              cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+clEnqueueAcquireEGLObjectsKHR(cl_command_queue command_queue,
+                              cl_uint          num_objects,
+                              const cl_mem *   mem_objects,
+                              cl_uint          num_events_in_wait_list,
+                              const cl_event * event_wait_list,
+                              cl_event *       event) CL_API_SUFFIX__VERSION_1_0;

 typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireEGLObjectsKHR_fn)(
-	cl_command_queue command_queue,
-	cl_uint          num_objects,
-	const cl_mem *   mem_objects,
-	cl_uint          num_events_in_wait_list,
-	const cl_event * event_wait_list,
-	cl_event *       event);
+    cl_command_queue command_queue,
+    cl_uint          num_objects,
+    const cl_mem *   mem_objects,
+    cl_uint          num_events_in_wait_list,
+    const cl_event * event_wait_list,
+    cl_event *       event);


 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseEGLObjectsKHR(cl_command_queue /* command_queue */,
-                              cl_uint          /* num_objects */,
-                              const cl_mem *   /* mem_objects */,
-                              cl_uint          /* num_events_in_wait_list */,
-                              const cl_event * /* event_wait_list */,
-                              cl_event *       /* event */) CL_API_SUFFIX__VERSION_1_0;
+clEnqueueReleaseEGLObjectsKHR(cl_command_queue command_queue,
+                              cl_uint          num_objects,
+                              const cl_mem *   mem_objects,
+                              cl_uint          num_events_in_wait_list,
+                              const cl_event * event_wait_list,
+                              cl_event *       event) CL_API_SUFFIX__VERSION_1_0;

 typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseEGLObjectsKHR_fn)(
-	cl_command_queue command_queue,
-	cl_uint          num_objects,
-	const cl_mem *   mem_objects,
-	cl_uint          num_events_in_wait_list,
-	const cl_event * event_wait_list,
-	cl_event *       event);
+    cl_command_queue command_queue,
+    cl_uint          num_objects,
+    const cl_mem *   mem_objects,
+    cl_uint          num_events_in_wait_list,
+    const cl_event * event_wait_list,
+    cl_event *       event);


 #define cl_khr_egl_event 1

 extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateEventFromEGLSyncKHR(cl_context      /* context */,
-                            CLeglSyncKHR    /* sync */,
-                            CLeglDisplayKHR /* display */,
-                            cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+clCreateEventFromEGLSyncKHR(cl_context      context,
+                            CLeglSyncKHR    sync,
+                            CLeglDisplayKHR display,
+                            cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_0;

 typedef CL_API_ENTRY cl_event (CL_API_CALL *clCreateEventFromEGLSyncKHR_fn)(
-	cl_context      context,
-	CLeglSyncKHR    sync,
-	CLeglDisplayKHR display,
-	cl_int *        errcode_ret);
+    cl_context      context,
+    CLeglSyncKHR    sync,
+    CLeglDisplayKHR display,
+    cl_int *        errcode_ret);

 #ifdef __cplusplus
 }
--- a/include/CL/cl_ext.h
+++ b/include/CL/cl_ext.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2008-2013 The Khronos Group Inc.
+ * Copyright (c) 2008-2019 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -21,8 +26,6 @@
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 ******************************************************************************/

-/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
-
 /* cl_ext.h contains OpenCL extensions which don't have external */
 /* (OpenGL, D3D) dependencies.                                   */

@@ -33,11 +36,13 @@
 extern "C" {
 #endif

-#ifdef __APPLE__
-        #include <OpenCL/cl.h>
-    #include <AvailabilityMacros.h>
-#else
-        #include <CL/cl.h>
+#include <CL/cl.h>
+
+/* cl_khr_fp64 extension - no extension #define since it has no functions  */
+/* CL_DEVICE_DOUBLE_FP_CONFIG is defined in CL.h for OpenCL >= 120 */
+
+#if CL_TARGET_OPENCL_VERSION <= 110
+#define CL_DEVICE_DOUBLE_FP_CONFIG                       0x1032
 #endif

 /* cl_khr_fp16 extension - no extension #define since it has no functions  */
@@ -47,12 +52,12 @@ extern "C" {
 *
 * Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
 *
- * Registers a user callback function that will be called when the memory object is deleted and its resources 
- * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback 
- * stack associated with memobj. The registered user callback functions are called in the reverse order in 
- * which they were registered. The user callback functions are called and then the memory object is deleted 
- * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be 
- * notified when the memory referenced by host_ptr, specified when the memory object is created and used as 
+ * Registers a user callback function that will be called when the memory object is deleted and its resources
+ * freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
+ * stack associated with memobj. The registered user callback functions are called in the reverse order in
+ * which they were registered. The user callback functions are called and then the memory object is deleted
+ * and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
+ * notified when the memory referenced by host_ptr, specified when the memory object is created and used as
 * the storage bits for the memory object, can be reused or freed.
 *
 * The application may not call CL api's with the cl_mem object passed to the pfn_notify.
@@ -61,9 +66,9 @@ extern "C" {
 * before using.
 */
 #define cl_APPLE_SetMemObjectDestructor 1
-cl_int  CL_API_ENTRY clSetMemObjectDestructorAPPLE(  cl_mem /* memobj */, 
-                                        void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/), 
-                                        void * /*user_data */ )             CL_EXT_SUFFIX__VERSION_1_0;  
+cl_int  CL_API_ENTRY clSetMemObjectDestructorAPPLE(  cl_mem memobj,
+                                        void (* pfn_notify)(cl_mem memobj, void * user_data),
+                                        void * user_data)             CL_EXT_SUFFIX__VERSION_1_0;


 /* Context Logging Functions
@@ -72,29 +77,29 @@ cl_int  CL_API_ENTRY clSetMemObjectDestructorAPPLE(  cl_mem /* memobj */,
 * Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
 * before using.
 *
- * clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger 
+ * clLogMessagesToSystemLog forwards on all log messages to the Apple System Logger
 */
 #define cl_APPLE_ContextLoggingFunctions 1
-extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE(  const char * /* errstr */, 
-                                            const void * /* private_info */, 
-                                            size_t       /* cb */, 
-                                            void *       /* user_data */ )  CL_EXT_SUFFIX__VERSION_1_0;
+extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE(  const char * errstr,
+                                            const void * private_info,
+                                            size_t       cb,
+                                            void *       user_data)  CL_EXT_SUFFIX__VERSION_1_0;

 /* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
-extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE(   const char * /* errstr */, 
-                                          const void * /* private_info */, 
-                                          size_t       /* cb */, 
-                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
+extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE(   const char * errstr,
+                                          const void * private_info,
+                                          size_t       cb,
+                                          void *       user_data)    CL_EXT_SUFFIX__VERSION_1_0;

 /* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
-extern void CL_API_ENTRY clLogMessagesToStderrAPPLE(   const char * /* errstr */, 
-                                          const void * /* private_info */, 
-                                          size_t       /* cb */, 
-                                          void *       /* user_data */ )    CL_EXT_SUFFIX__VERSION_1_0;
+extern void CL_API_ENTRY clLogMessagesToStderrAPPLE(   const char * errstr,
+                                          const void * private_info,
+                                          size_t       cb,
+                                          void *       user_data)    CL_EXT_SUFFIX__VERSION_1_0;


-/************************ 
-* cl_khr_icd extension *                                                  
+/************************
+* cl_khr_icd extension *
 ************************/
 #define cl_khr_icd 1

@@ -105,16 +110,43 @@ extern void CL_API_ENTRY clLogMessagesToStderrAPPLE(   const char * /* errstr */
 #define CL_PLATFORM_NOT_FOUND_KHR                   -1001

 extern CL_API_ENTRY cl_int CL_API_CALL
-clIcdGetPlatformIDsKHR(cl_uint          /* num_entries */,
-                       cl_platform_id * /* platforms */,
-                       cl_uint *        /* num_platforms */);
+clIcdGetPlatformIDsKHR(cl_uint          num_entries,
+                       cl_platform_id * platforms,
+                       cl_uint *        num_platforms);

-typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
-    cl_uint          /* num_entries */,
-    cl_platform_id * /* platforms */,
-    cl_uint *        /* num_platforms */);
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(cl_uint          num_entries,
+                                         cl_platform_id * platforms,
+                                         cl_uint *        num_platforms);


+/*******************************
+ * cl_khr_il_program extension *
+ *******************************/
+#define cl_khr_il_program 1
+
+/* New property to clGetDeviceInfo for retrieving supported intermediate
+ * languages
+ */
+#define CL_DEVICE_IL_VERSION_KHR                    0x105B
+
+/* New property to clGetProgramInfo for retrieving for retrieving the IL of a
+ * program
+ */
+#define CL_PROGRAM_IL_KHR                           0x1169
+
+extern CL_API_ENTRY cl_program CL_API_CALL
+clCreateProgramWithILKHR(cl_context   context,
+                         const void * il,
+                         size_t       length,
+                         cl_int *     errcode_ret);
+
+typedef CL_API_ENTRY cl_program
+(CL_API_CALL *clCreateProgramWithILKHR_fn)(cl_context   context,
+                                           const void * il,
+                                           size_t       length,
+                                           cl_int *     errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
 /* Extension: cl_khr_image2D_buffer
 *
 * This extension allows a 2D image to be created from a cl_mem buffer without a copy.
@@ -129,31 +161,33 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
 * The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
 * The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
 */
-    
-/*************************************
- * cl_khr_initalize_memory extension *
- *************************************/
-    
-#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x200E
-    
-    
+
+/**************************************
+ * cl_khr_initialize_memory extension *
+ **************************************/
+
+#define CL_CONTEXT_MEMORY_INITIALIZE_KHR            0x2030
+
+
 /**************************************
 * cl_khr_terminate_context extension *
 **************************************/
-    
-#define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x200F
-#define CL_CONTEXT_TERMINATE_KHR                    0x2010
+
+#define CL_DEVICE_TERMINATE_CAPABILITY_KHR          0x2031
+#define CL_CONTEXT_TERMINATE_KHR                    0x2032

 #define cl_khr_terminate_context 1
-extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
+extern CL_API_ENTRY cl_int CL_API_CALL
+clTerminateContextKHR(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL *clTerminateContextKHR_fn)(cl_context context) CL_EXT_SUFFIX__VERSION_1_2;
+

-typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
-    
-    
 /*
 * Extension: cl_khr_spir
 *
- * This extension adds support to create an OpenCL program object from a 
+ * This extension adds support to create an OpenCL program object from a
 * Standard Portable Intermediate Representation (SPIR) instance
 */

@@ -161,9 +195,30 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /
 #define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE         0x40E1


+/*****************************************
+ * cl_khr_create_command_queue extension *
+ *****************************************/
+#define cl_khr_create_command_queue 1
+
+typedef cl_bitfield cl_queue_properties_khr;
+
+extern CL_API_ENTRY cl_command_queue CL_API_CALL
+clCreateCommandQueueWithPropertiesKHR(cl_context context,
+                                      cl_device_id device,
+                                      const cl_queue_properties_khr* properties,
+                                      cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_command_queue
+(CL_API_CALL *clCreateCommandQueueWithPropertiesKHR_fn)(cl_context context,
+                                                        cl_device_id device,
+                                                        const cl_queue_properties_khr* properties,
+                                                        cl_int* errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+
 /******************************************
 * cl_nv_device_attribute_query extension *
 ******************************************/
+
 /* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
 #define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV       0x4000
 #define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV       0x4001
@@ -173,88 +228,124 @@ typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /
 #define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV            0x4005
 #define CL_DEVICE_INTEGRATED_MEMORY_NV              0x4006

+
 /*********************************
 * cl_amd_device_attribute_query *
 *********************************/
+
 #define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD        0x4036

+
 /*********************************
 * cl_arm_printf extension
 *********************************/
+
 #define CL_PRINTF_CALLBACK_ARM                      0x40B0
 #define CL_PRINTF_BUFFERSIZE_ARM                    0x40B1

-#ifdef CL_VERSION_1_1
-   /***********************************
-    * cl_ext_device_fission extension *
-    ***********************************/
-    #define cl_ext_device_fission   1
-    
-    extern CL_API_ENTRY cl_int CL_API_CALL
-    clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
-    
-    typedef CL_API_ENTRY cl_int 
-    (CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;

-    extern CL_API_ENTRY cl_int CL_API_CALL
-    clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1; 
-    
-    typedef CL_API_ENTRY cl_int 
-    (CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+/***********************************
+* cl_ext_device_fission extension
+***********************************/
+#define cl_ext_device_fission   1

-    typedef cl_ulong  cl_device_partition_property_ext;
-    extern CL_API_ENTRY cl_int CL_API_CALL
-    clCreateSubDevicesEXT(  cl_device_id /*in_device*/,
-                            const cl_device_partition_property_ext * /* properties */,
-                            cl_uint /*num_entries*/,
-                            cl_device_id * /*out_devices*/,
-                            cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;

-    typedef CL_API_ENTRY cl_int 
-    ( CL_API_CALL * clCreateSubDevicesEXT_fn)(  cl_device_id /*in_device*/,
-                                                const cl_device_partition_property_ext * /* properties */,
-                                                cl_uint /*num_entries*/,
-                                                cl_device_id * /*out_devices*/,
-                                                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL *clReleaseDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainDeviceEXT(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL *clRetainDeviceEXT_fn)(cl_device_id device) CL_EXT_SUFFIX__VERSION_1_1;
+
+typedef cl_ulong  cl_device_partition_property_ext;
+extern CL_API_ENTRY cl_int CL_API_CALL
+clCreateSubDevicesEXT(cl_device_id   in_device,
+                      const cl_device_partition_property_ext * properties,
+                      cl_uint        num_entries,
+                      cl_device_id * out_devices,
+                      cl_uint *      num_devices) CL_EXT_SUFFIX__VERSION_1_1;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clCreateSubDevicesEXT_fn)(cl_device_id   in_device,
+                                         const cl_device_partition_property_ext * properties,
+                                         cl_uint        num_entries,
+                                         cl_device_id * out_devices,
+                                         cl_uint *      num_devices) CL_EXT_SUFFIX__VERSION_1_1;
+
+/* cl_device_partition_property_ext */
+#define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
+#define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
+#define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
+#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT  0x4053
+
+/* clDeviceGetInfo selectors */
+#define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
+#define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
+#define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
+#define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
+#define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
+
+/* error codes */
+#define CL_DEVICE_PARTITION_FAILED_EXT              -1057
+#define CL_INVALID_PARTITION_COUNT_EXT              -1058
+#define CL_INVALID_PARTITION_NAME_EXT               -1059
+
+/* CL_AFFINITY_DOMAINs */
+#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
+#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
+#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
+#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
+#define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
+#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT     0x100
+
+/* cl_device_partition_property_ext list terminators */
+#define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
+#define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
+#define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)
+
+
+/***********************************
+ * cl_ext_migrate_memobject extension definitions
+ ***********************************/
+#define cl_ext_migrate_memobject 1
+
+typedef cl_bitfield cl_mem_migration_flags_ext;
+
+#define CL_MIGRATE_MEM_OBJECT_HOST_EXT              0x1
+
+#define CL_COMMAND_MIGRATE_MEM_OBJECT_EXT           0x4040
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueMigrateMemObjectEXT(cl_command_queue command_queue,
+                             cl_uint          num_mem_objects,
+                             const cl_mem *   mem_objects,
+                             cl_mem_migration_flags_ext flags,
+                             cl_uint          num_events_in_wait_list,
+                             const cl_event * event_wait_list,
+                             cl_event *       event);
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL *clEnqueueMigrateMemObjectEXT_fn)(cl_command_queue command_queue,
+                                               cl_uint          num_mem_objects,
+                                               const cl_mem *   mem_objects,
+                                               cl_mem_migration_flags_ext flags,
+                                               cl_uint          num_events_in_wait_list,
+                                               const cl_event * event_wait_list,
+                                               cl_event *       event);

-    /* cl_device_partition_property_ext */
-    #define CL_DEVICE_PARTITION_EQUALLY_EXT             0x4050
-    #define CL_DEVICE_PARTITION_BY_COUNTS_EXT           0x4051
-    #define CL_DEVICE_PARTITION_BY_NAMES_EXT            0x4052
-    #define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT  0x4053
-    
-    /* clDeviceGetInfo selectors */
-    #define CL_DEVICE_PARENT_DEVICE_EXT                 0x4054
-    #define CL_DEVICE_PARTITION_TYPES_EXT               0x4055
-    #define CL_DEVICE_AFFINITY_DOMAINS_EXT              0x4056
-    #define CL_DEVICE_REFERENCE_COUNT_EXT               0x4057
-    #define CL_DEVICE_PARTITION_STYLE_EXT               0x4058
-    
-    /* error codes */
-    #define CL_DEVICE_PARTITION_FAILED_EXT              -1057
-    #define CL_INVALID_PARTITION_COUNT_EXT              -1058
-    #define CL_INVALID_PARTITION_NAME_EXT               -1059
-    
-    /* CL_AFFINITY_DOMAINs */
-    #define CL_AFFINITY_DOMAIN_L1_CACHE_EXT             0x1
-    #define CL_AFFINITY_DOMAIN_L2_CACHE_EXT             0x2
-    #define CL_AFFINITY_DOMAIN_L3_CACHE_EXT             0x3
-    #define CL_AFFINITY_DOMAIN_L4_CACHE_EXT             0x4
-    #define CL_AFFINITY_DOMAIN_NUMA_EXT                 0x10
-    #define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT     0x100
-    
-    /* cl_device_partition_property_ext list terminators */
-    #define CL_PROPERTIES_LIST_END_EXT                  ((cl_device_partition_property_ext) 0)
-    #define CL_PARTITION_BY_COUNTS_LIST_END_EXT         ((cl_device_partition_property_ext) 0)
-    #define CL_PARTITION_BY_NAMES_LIST_END_EXT          ((cl_device_partition_property_ext) 0 - 1)

 /*********************************
 * cl_qcom_ext_host_ptr extension
 *********************************/
+#define cl_qcom_ext_host_ptr 1

 #define CL_MEM_EXT_HOST_PTR_QCOM                  (1 << 29)

-#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM   0x40A0      
+#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM   0x40A0
 #define CL_DEVICE_PAGE_SIZE_QCOM                  0x40A1
 #define CL_IMAGE_ROW_ALIGNMENT_QCOM               0x40A2
 #define CL_IMAGE_SLICE_ALIGNMENT_QCOM             0x40A3
@@ -280,12 +371,21 @@ typedef struct _cl_mem_ext_host_ptr
    /* Type of external memory allocation. */
    /* Legal values will be defined in layered extensions. */
    cl_uint  allocation_type;
-            
-	/* Host cache policy for this external memory allocation. */
+
+    /* Host cache policy for this external memory allocation. */
    cl_uint  host_cache_policy;

 } cl_mem_ext_host_ptr;

+
+/*******************************************
+* cl_qcom_ext_host_ptr_iocoherent extension
+********************************************/
+
+/* Cache policy specifying io-coherence */
+#define CL_MEM_HOST_IOCOHERENT_QCOM               0x40A9
+
+
 /*********************************
 * cl_qcom_ion_host_ptr extension
 *********************************/
@@ -300,13 +400,339 @@ typedef struct _cl_mem_ion_host_ptr

    /* ION file descriptor */
    int                  ion_filedesc;
-            
+
    /* Host pointer to the ION allocated memory */
    void*                ion_hostptr;

 } cl_mem_ion_host_ptr;

-#endif /* CL_VERSION_1_1 */
+
+/*********************************
+* cl_qcom_android_native_buffer_host_ptr extension
+*********************************/
+
+#define CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM                  0x40C6
+
+typedef struct _cl_mem_android_native_buffer_host_ptr
+{
+    /* Type of external memory allocation. */
+    /* Must be CL_MEM_ANDROID_NATIVE_BUFFER_HOST_PTR_QCOM for Android native buffers. */
+    cl_mem_ext_host_ptr  ext_host_ptr;
+
+    /* Virtual pointer to the android native buffer */
+    void*                anb_ptr;
+
+} cl_mem_android_native_buffer_host_ptr;
+
+
+/******************************************
+ * cl_img_yuv_image extension *
+ ******************************************/
+
+/* Image formats used in clCreateImage */
+#define CL_NV21_IMG                                 0x40D0
+#define CL_YV12_IMG                                 0x40D1
+
+
+/******************************************
+ * cl_img_cached_allocations extension *
+ ******************************************/
+
+/* Flag values used by clCreateBuffer */
+#define CL_MEM_USE_UNCACHED_CPU_MEMORY_IMG          (1 << 26)
+#define CL_MEM_USE_CACHED_CPU_MEMORY_IMG            (1 << 27)
+
+
+/******************************************
+ * cl_img_use_gralloc_ptr extension *
+ ******************************************/
+#define cl_img_use_gralloc_ptr 1
+
+/* Flag values used by clCreateBuffer */
+#define CL_MEM_USE_GRALLOC_PTR_IMG                  (1 << 28)
+
+/* To be used by clGetEventInfo: */
+#define CL_COMMAND_ACQUIRE_GRALLOC_OBJECTS_IMG      0x40D2
+#define CL_COMMAND_RELEASE_GRALLOC_OBJECTS_IMG      0x40D3
+
+/* Error code from clEnqueueReleaseGrallocObjectsIMG */
+#define CL_GRALLOC_RESOURCE_NOT_ACQUIRED_IMG        0x40D4
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireGrallocObjectsIMG(cl_command_queue      command_queue,
+                                  cl_uint               num_objects,
+                                  const cl_mem *        mem_objects,
+                                  cl_uint               num_events_in_wait_list,
+                                  const cl_event *      event_wait_list,
+                                  cl_event *            event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseGrallocObjectsIMG(cl_command_queue      command_queue,
+                                  cl_uint               num_objects,
+                                  const cl_mem *        mem_objects,
+                                  cl_uint               num_events_in_wait_list,
+                                  const cl_event *      event_wait_list,
+                                  cl_event *            event) CL_EXT_SUFFIX__VERSION_1_2;
+
+
+/*********************************
+* cl_khr_subgroups extension
+*********************************/
+#define cl_khr_subgroups 1
+
+#if !defined(CL_VERSION_2_1)
+/* For OpenCL 2.1 and newer, cl_kernel_sub_group_info is declared in CL.h.
+   In hindsight, there should have been a khr suffix on this type for
+   the extension, but keeping it un-suffixed to maintain backwards
+   compatibility. */
+typedef cl_uint             cl_kernel_sub_group_info;
+#endif
+
+/* cl_kernel_sub_group_info */
+#define CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR    0x2033
+#define CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR       0x2034
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetKernelSubGroupInfoKHR(cl_kernel    in_kernel,
+                           cl_device_id in_device,
+                           cl_kernel_sub_group_info param_name,
+                           size_t       input_value_size,
+                           const void * input_value,
+                           size_t       param_value_size,
+                           void *       param_value,
+                           size_t *     param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
+
+typedef CL_API_ENTRY cl_int
+(CL_API_CALL * clGetKernelSubGroupInfoKHR_fn)(cl_kernel    in_kernel,
+                                              cl_device_id in_device,
+                                              cl_kernel_sub_group_info param_name,
+                                              size_t       input_value_size,
+                                              const void * input_value,
+                                              size_t       param_value_size,
+                                              void *       param_value,
+                                              size_t *     param_value_size_ret) CL_EXT_SUFFIX__VERSION_2_0_DEPRECATED;
+
+
+/*********************************
+* cl_khr_mipmap_image extension
+*********************************/
+
+/* cl_sampler_properties */
+#define CL_SAMPLER_MIP_FILTER_MODE_KHR              0x1155
+#define CL_SAMPLER_LOD_MIN_KHR                      0x1156
+#define CL_SAMPLER_LOD_MAX_KHR                      0x1157
+
+
+/*********************************
+* cl_khr_priority_hints extension
+*********************************/
+/* This extension define is for backwards compatibility.
+   It shouldn't be required since this extension has no new functions. */
+#define cl_khr_priority_hints 1
+
+typedef cl_uint  cl_queue_priority_khr;
+
+/* cl_command_queue_properties */
+#define CL_QUEUE_PRIORITY_KHR 0x1096
+
+/* cl_queue_priority_khr */
+#define CL_QUEUE_PRIORITY_HIGH_KHR (1<<0)
+#define CL_QUEUE_PRIORITY_MED_KHR (1<<1)
+#define CL_QUEUE_PRIORITY_LOW_KHR (1<<2)
+
+
+/*********************************
+* cl_khr_throttle_hints extension
+*********************************/
+/* This extension define is for backwards compatibility.
+   It shouldn't be required since this extension has no new functions. */
+#define cl_khr_throttle_hints 1
+
+typedef cl_uint  cl_queue_throttle_khr;
+
+/* cl_command_queue_properties */
+#define CL_QUEUE_THROTTLE_KHR 0x1097
+
+/* cl_queue_throttle_khr */
+#define CL_QUEUE_THROTTLE_HIGH_KHR (1<<0)
+#define CL_QUEUE_THROTTLE_MED_KHR (1<<1)
+#define CL_QUEUE_THROTTLE_LOW_KHR (1<<2)
+
+
+/*********************************
+* cl_khr_subgroup_named_barrier
+*********************************/
+/* This extension define is for backwards compatibility.
+   It shouldn't be required since this extension has no new functions. */
+#define cl_khr_subgroup_named_barrier 1
+
+/* cl_device_info */
+#define CL_DEVICE_MAX_NAMED_BARRIER_COUNT_KHR       0x2035
+
+
+/**********************************
+ * cl_arm_import_memory extension *
+ **********************************/
+#define cl_arm_import_memory 1
+
+typedef intptr_t cl_import_properties_arm;
+
+/* Default and valid proporties name for cl_arm_import_memory */
+#define CL_IMPORT_TYPE_ARM                        0x40B2
+
+/* Host process memory type default value for CL_IMPORT_TYPE_ARM property */
+#define CL_IMPORT_TYPE_HOST_ARM                   0x40B3
+
+/* DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
+#define CL_IMPORT_TYPE_DMA_BUF_ARM                0x40B4
+
+/* Protected DMA BUF memory type value for CL_IMPORT_TYPE_ARM property */
+#define CL_IMPORT_TYPE_PROTECTED_ARM              0x40B5
+
+/* This extension adds a new function that allows for direct memory import into
+ * OpenCL via the clImportMemoryARM function.
+ *
+ * Memory imported through this interface will be mapped into the device's page
+ * tables directly, providing zero copy access. It will never fall back to copy
+ * operations and aliased buffers.
+ *
+ * Types of memory supported for import are specified as additional extension
+ * strings.
+ *
+ * This extension produces cl_mem allocations which are compatible with all other
+ * users of cl_mem in the standard API.
+ *
+ * This extension maps pages with the same properties as the normal buffer creation
+ * function clCreateBuffer.
+ */
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clImportMemoryARM( cl_context context,
+                   cl_mem_flags flags,
+                   const cl_import_properties_arm *properties,
+                   void *memory,
+                   size_t size,
+                   cl_int *errcode_ret) CL_EXT_SUFFIX__VERSION_1_0;
+
+
+/******************************************
+ * cl_arm_shared_virtual_memory extension *
+ ******************************************/
+#define cl_arm_shared_virtual_memory 1
+
+/* Used by clGetDeviceInfo */
+#define CL_DEVICE_SVM_CAPABILITIES_ARM                  0x40B6
+
+/* Used by clGetMemObjectInfo */
+#define CL_MEM_USES_SVM_POINTER_ARM                     0x40B7
+
+/* Used by clSetKernelExecInfoARM: */
+#define CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM                0x40B8
+#define CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM   0x40B9
+
+/* To be used by clGetEventInfo: */
+#define CL_COMMAND_SVM_FREE_ARM                         0x40BA
+#define CL_COMMAND_SVM_MEMCPY_ARM                       0x40BB
+#define CL_COMMAND_SVM_MEMFILL_ARM                      0x40BC
+#define CL_COMMAND_SVM_MAP_ARM                          0x40BD
+#define CL_COMMAND_SVM_UNMAP_ARM                        0x40BE
+
+/* Flag values returned by clGetDeviceInfo with CL_DEVICE_SVM_CAPABILITIES_ARM as the param_name. */
+#define CL_DEVICE_SVM_COARSE_GRAIN_BUFFER_ARM           (1 << 0)
+#define CL_DEVICE_SVM_FINE_GRAIN_BUFFER_ARM             (1 << 1)
+#define CL_DEVICE_SVM_FINE_GRAIN_SYSTEM_ARM             (1 << 2)
+#define CL_DEVICE_SVM_ATOMICS_ARM                       (1 << 3)
+
+/* Flag values used by clSVMAllocARM: */
+#define CL_MEM_SVM_FINE_GRAIN_BUFFER_ARM                (1 << 10)
+#define CL_MEM_SVM_ATOMICS_ARM                          (1 << 11)
+
+typedef cl_bitfield cl_svm_mem_flags_arm;
+typedef cl_uint     cl_kernel_exec_info_arm;
+typedef cl_bitfield cl_device_svm_capabilities_arm;
+
+extern CL_API_ENTRY void * CL_API_CALL
+clSVMAllocARM(cl_context       context,
+              cl_svm_mem_flags_arm flags,
+              size_t           size,
+              cl_uint          alignment) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY void CL_API_CALL
+clSVMFreeARM(cl_context        context,
+             void *            svm_pointer) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueSVMFreeARM(cl_command_queue  command_queue,
+                    cl_uint           num_svm_pointers,
+                    void *            svm_pointers[],
+                    void (CL_CALLBACK * pfn_free_func)(cl_command_queue queue,
+                                                       cl_uint          num_svm_pointers,
+                                                       void *           svm_pointers[],
+                                                       void *           user_data),
+                    void *            user_data,
+                    cl_uint           num_events_in_wait_list,
+                    const cl_event *  event_wait_list,
+                    cl_event *        event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueSVMMemcpyARM(cl_command_queue  command_queue,
+                      cl_bool           blocking_copy,
+                      void *            dst_ptr,
+                      const void *      src_ptr,
+                      size_t            size,
+                      cl_uint           num_events_in_wait_list,
+                      const cl_event *  event_wait_list,
+                      cl_event *        event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueSVMMemFillARM(cl_command_queue  command_queue,
+                       void *            svm_ptr,
+                       const void *      pattern,
+                       size_t            pattern_size,
+                       size_t            size,
+                       cl_uint           num_events_in_wait_list,
+                       const cl_event *  event_wait_list,
+                       cl_event *        event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueSVMMapARM(cl_command_queue  command_queue,
+                   cl_bool           blocking_map,
+                   cl_map_flags      flags,
+                   void *            svm_ptr,
+                   size_t            size,
+                   cl_uint           num_events_in_wait_list,
+                   const cl_event *  event_wait_list,
+                   cl_event *        event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueSVMUnmapARM(cl_command_queue  command_queue,
+                     void *            svm_ptr,
+                     cl_uint           num_events_in_wait_list,
+                     const cl_event *  event_wait_list,
+                     cl_event *        event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelArgSVMPointerARM(cl_kernel    kernel,
+                            cl_uint      arg_index,
+                            const void * arg_value) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clSetKernelExecInfoARM(cl_kernel            kernel,
+                       cl_kernel_exec_info_arm  param_name,
+                       size_t               param_value_size,
+                       const void *         param_value) CL_EXT_SUFFIX__VERSION_1_2;
+
+/********************************
+ * cl_arm_get_core_id extension *
+ ********************************/
+
+#ifdef CL_VERSION_1_2
+
+#define cl_arm_get_core_id 1
+
+/* Device info property for bitfield of cores present */
+#define CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM      0x40BF
+
+#endif  /* CL_VERSION_1_2 */

 #ifdef __cplusplus
 }
--- a/include/CL/cl_ext_intel.h
+++ b/include/CL/cl_ext_intel.h
@@ -0,0 +1,423 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2019 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+/*****************************************************************************\
+
+Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
+
+THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
+MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+File Name: cl_ext_intel.h
+
+Abstract:
+
+Notes:
+
+\*****************************************************************************/
+
+#ifndef __CL_EXT_INTEL_H
+#define __CL_EXT_INTEL_H
+
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/***************************************
+* cl_intel_thread_local_exec extension *
+****************************************/
+
+#define cl_intel_thread_local_exec 1
+
+#define CL_QUEUE_THREAD_LOCAL_EXEC_ENABLE_INTEL      (((cl_bitfield)1) << 31)
+
+/***********************************************
+* cl_intel_device_partition_by_names extension *
+************************************************/
+
+#define cl_intel_device_partition_by_names 1
+
+#define CL_DEVICE_PARTITION_BY_NAMES_INTEL          0x4052
+#define CL_PARTITION_BY_NAMES_LIST_END_INTEL        -1
+
+/************************************************
+* cl_intel_accelerator extension                *
+* cl_intel_motion_estimation extension          *
+* cl_intel_advanced_motion_estimation extension *
+*************************************************/
+
+#define cl_intel_accelerator 1
+#define cl_intel_motion_estimation 1
+#define cl_intel_advanced_motion_estimation 1
+
+typedef struct _cl_accelerator_intel* cl_accelerator_intel;
+typedef cl_uint cl_accelerator_type_intel;
+typedef cl_uint cl_accelerator_info_intel;
+
+typedef struct _cl_motion_estimation_desc_intel {
+    cl_uint mb_block_type;
+    cl_uint subpixel_mode;
+    cl_uint sad_adjust_mode;
+    cl_uint search_path_type;
+} cl_motion_estimation_desc_intel;
+
+/* error codes */
+#define CL_INVALID_ACCELERATOR_INTEL                              -1094
+#define CL_INVALID_ACCELERATOR_TYPE_INTEL                         -1095
+#define CL_INVALID_ACCELERATOR_DESCRIPTOR_INTEL                   -1096
+#define CL_ACCELERATOR_TYPE_NOT_SUPPORTED_INTEL                   -1097
+
+/* cl_accelerator_type_intel */
+#define CL_ACCELERATOR_TYPE_MOTION_ESTIMATION_INTEL               0x0
+
+/* cl_accelerator_info_intel */
+#define CL_ACCELERATOR_DESCRIPTOR_INTEL                           0x4090
+#define CL_ACCELERATOR_REFERENCE_COUNT_INTEL                      0x4091
+#define CL_ACCELERATOR_CONTEXT_INTEL                              0x4092
+#define CL_ACCELERATOR_TYPE_INTEL                                 0x4093
+
+/* cl_motion_detect_desc_intel flags */
+#define CL_ME_MB_TYPE_16x16_INTEL                                 0x0
+#define CL_ME_MB_TYPE_8x8_INTEL                                   0x1
+#define CL_ME_MB_TYPE_4x4_INTEL                                   0x2
+
+#define CL_ME_SUBPIXEL_MODE_INTEGER_INTEL                         0x0
+#define CL_ME_SUBPIXEL_MODE_HPEL_INTEL                            0x1
+#define CL_ME_SUBPIXEL_MODE_QPEL_INTEL                            0x2
+
+#define CL_ME_SAD_ADJUST_MODE_NONE_INTEL                          0x0
+#define CL_ME_SAD_ADJUST_MODE_HAAR_INTEL                          0x1
+
+#define CL_ME_SEARCH_PATH_RADIUS_2_2_INTEL                        0x0
+#define CL_ME_SEARCH_PATH_RADIUS_4_4_INTEL                        0x1
+#define CL_ME_SEARCH_PATH_RADIUS_16_12_INTEL                      0x5
+
+#define CL_ME_SKIP_BLOCK_TYPE_16x16_INTEL                         0x0
+#define CL_ME_CHROMA_INTRA_PREDICT_ENABLED_INTEL                  0x1
+#define CL_ME_LUMA_INTRA_PREDICT_ENABLED_INTEL                    0x2
+#define CL_ME_SKIP_BLOCK_TYPE_8x8_INTEL                           0x4
+
+#define CL_ME_FORWARD_INPUT_MODE_INTEL                            0x1
+#define CL_ME_BACKWARD_INPUT_MODE_INTEL                           0x2
+#define CL_ME_BIDIRECTION_INPUT_MODE_INTEL                        0x3
+
+#define CL_ME_BIDIR_WEIGHT_QUARTER_INTEL                          16
+#define CL_ME_BIDIR_WEIGHT_THIRD_INTEL                            21
+#define CL_ME_BIDIR_WEIGHT_HALF_INTEL                             32
+#define CL_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL                        43
+#define CL_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL                    48
+
+#define CL_ME_COST_PENALTY_NONE_INTEL                             0x0
+#define CL_ME_COST_PENALTY_LOW_INTEL                              0x1
+#define CL_ME_COST_PENALTY_NORMAL_INTEL                           0x2
+#define CL_ME_COST_PENALTY_HIGH_INTEL                             0x3
+
+#define CL_ME_COST_PRECISION_QPEL_INTEL                           0x0
+#define CL_ME_COST_PRECISION_HPEL_INTEL                           0x1
+#define CL_ME_COST_PRECISION_PEL_INTEL                            0x2
+#define CL_ME_COST_PRECISION_DPEL_INTEL                           0x3
+
+#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL                  0x0
+#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL                0x1
+#define CL_ME_LUMA_PREDICTOR_MODE_DC_INTEL                        0x2
+#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL        0x3
+
+#define CL_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL       0x4
+#define CL_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL                     0x4
+#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL            0x5
+#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL           0x6
+#define CL_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL             0x7
+#define CL_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL             0x8
+
+#define CL_ME_CHROMA_PREDICTOR_MODE_DC_INTEL                      0x0
+#define CL_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL              0x1
+#define CL_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL                0x2
+#define CL_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL                   0x3
+
+/* cl_device_info */
+#define CL_DEVICE_ME_VERSION_INTEL                                0x407E
+
+#define CL_ME_VERSION_LEGACY_INTEL                                0x0
+#define CL_ME_VERSION_ADVANCED_VER_1_INTEL                        0x1
+#define CL_ME_VERSION_ADVANCED_VER_2_INTEL                        0x2
+
+extern CL_API_ENTRY cl_accelerator_intel CL_API_CALL
+clCreateAcceleratorINTEL(
+    cl_context                   context,
+    cl_accelerator_type_intel    accelerator_type,
+    size_t                       descriptor_size,
+    const void*                  descriptor,
+    cl_int*                      errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_accelerator_intel (CL_API_CALL *clCreateAcceleratorINTEL_fn)(
+    cl_context                   context,
+    cl_accelerator_type_intel    accelerator_type,
+    size_t                       descriptor_size,
+    const void*                  descriptor,
+    cl_int*                      errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetAcceleratorInfoINTEL(
+    cl_accelerator_intel         accelerator,
+    cl_accelerator_info_intel    param_name,
+    size_t                       param_value_size,
+    void*                        param_value,
+    size_t*                      param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetAcceleratorInfoINTEL_fn)(
+    cl_accelerator_intel         accelerator,
+    cl_accelerator_info_intel    param_name,
+    size_t                       param_value_size,
+    void*                        param_value,
+    size_t*                      param_value_size_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clRetainAcceleratorINTEL(
+    cl_accelerator_intel         accelerator) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clRetainAcceleratorINTEL_fn)(
+    cl_accelerator_intel         accelerator) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clReleaseAcceleratorINTEL(
+    cl_accelerator_intel         accelerator) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clReleaseAcceleratorINTEL_fn)(
+    cl_accelerator_intel         accelerator) CL_EXT_SUFFIX__VERSION_1_2;
+
+/******************************************
+* cl_intel_simultaneous_sharing extension *
+*******************************************/
+
+#define cl_intel_simultaneous_sharing 1
+
+#define CL_DEVICE_SIMULTANEOUS_INTEROPS_INTEL            0x4104
+#define CL_DEVICE_NUM_SIMULTANEOUS_INTEROPS_INTEL        0x4105
+
+/***********************************
+* cl_intel_egl_image_yuv extension *
+************************************/
+
+#define cl_intel_egl_image_yuv 1
+
+#define CL_EGL_YUV_PLANE_INTEL                           0x4107
+
+/********************************
+* cl_intel_packed_yuv extension *
+*********************************/
+
+#define cl_intel_packed_yuv 1
+
+#define CL_YUYV_INTEL                                    0x4076
+#define CL_UYVY_INTEL                                    0x4077
+#define CL_YVYU_INTEL                                    0x4078
+#define CL_VYUY_INTEL                                    0x4079
+
+/********************************************
+* cl_intel_required_subgroup_size extension *
+*********************************************/
+
+#define cl_intel_required_subgroup_size 1
+
+#define CL_DEVICE_SUB_GROUP_SIZES_INTEL                  0x4108
+#define CL_KERNEL_SPILL_MEM_SIZE_INTEL                   0x4109
+#define CL_KERNEL_COMPILE_SUB_GROUP_SIZE_INTEL           0x410A
+
+/****************************************
+* cl_intel_driver_diagnostics extension *
+*****************************************/
+
+#define cl_intel_driver_diagnostics 1
+
+typedef cl_uint cl_diagnostics_verbose_level;
+
+#define CL_CONTEXT_SHOW_DIAGNOSTICS_INTEL                0x4106
+
+#define CL_CONTEXT_DIAGNOSTICS_LEVEL_ALL_INTEL           ( 0xff )
+#define CL_CONTEXT_DIAGNOSTICS_LEVEL_GOOD_INTEL          ( 1 )
+#define CL_CONTEXT_DIAGNOSTICS_LEVEL_BAD_INTEL           ( 1 << 1 )
+#define CL_CONTEXT_DIAGNOSTICS_LEVEL_NEUTRAL_INTEL       ( 1 << 2 )
+
+/********************************
+* cl_intel_planar_yuv extension *
+*********************************/
+
+#define CL_NV12_INTEL                                       0x410E
+
+#define CL_MEM_NO_ACCESS_INTEL                              ( 1 << 24 )
+#define CL_MEM_ACCESS_FLAGS_UNRESTRICTED_INTEL              ( 1 << 25 )
+
+#define CL_DEVICE_PLANAR_YUV_MAX_WIDTH_INTEL                0x417E
+#define CL_DEVICE_PLANAR_YUV_MAX_HEIGHT_INTEL               0x417F
+
+/*******************************************************
+* cl_intel_device_side_avc_motion_estimation extension *
+********************************************************/
+
+#define CL_DEVICE_AVC_ME_VERSION_INTEL                      0x410B
+#define CL_DEVICE_AVC_ME_SUPPORTS_TEXTURE_SAMPLER_USE_INTEL 0x410C
+#define CL_DEVICE_AVC_ME_SUPPORTS_PREEMPTION_INTEL          0x410D
+
+#define CL_AVC_ME_VERSION_0_INTEL                           0x0;  // No support.
+#define CL_AVC_ME_VERSION_1_INTEL                           0x1;  // First supported version.
+
+#define CL_AVC_ME_MAJOR_16x16_INTEL                         0x0
+#define CL_AVC_ME_MAJOR_16x8_INTEL                          0x1
+#define CL_AVC_ME_MAJOR_8x16_INTEL                          0x2
+#define CL_AVC_ME_MAJOR_8x8_INTEL                           0x3
+
+#define CL_AVC_ME_MINOR_8x8_INTEL                           0x0
+#define CL_AVC_ME_MINOR_8x4_INTEL                           0x1
+#define CL_AVC_ME_MINOR_4x8_INTEL                           0x2
+#define CL_AVC_ME_MINOR_4x4_INTEL                           0x3
+
+#define CL_AVC_ME_MAJOR_FORWARD_INTEL                       0x0
+#define CL_AVC_ME_MAJOR_BACKWARD_INTEL                      0x1
+#define CL_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL                 0x2
+
+#define CL_AVC_ME_PARTITION_MASK_ALL_INTEL                  0x0
+#define CL_AVC_ME_PARTITION_MASK_16x16_INTEL                0x7E
+#define CL_AVC_ME_PARTITION_MASK_16x8_INTEL                 0x7D
+#define CL_AVC_ME_PARTITION_MASK_8x16_INTEL                 0x7B
+#define CL_AVC_ME_PARTITION_MASK_8x8_INTEL                  0x77
+#define CL_AVC_ME_PARTITION_MASK_8x4_INTEL                  0x6F
+#define CL_AVC_ME_PARTITION_MASK_4x8_INTEL                  0x5F
+#define CL_AVC_ME_PARTITION_MASK_4x4_INTEL                  0x3F
+
+#define CL_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL            0x0
+#define CL_AVC_ME_SEARCH_WINDOW_SMALL_INTEL                 0x1
+#define CL_AVC_ME_SEARCH_WINDOW_TINY_INTEL                  0x2
+#define CL_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL            0x3
+#define CL_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL               0x4
+#define CL_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL         0x5
+#define CL_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL             0x6
+#define CL_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL             0x7
+#define CL_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL                0x8
+#define CL_AVC_ME_SEARCH_WINDOW_16x12_RADIUS_INTEL          0x9
+#define CL_AVC_ME_SEARCH_WINDOW_4x4_RADIUS_INTEL            0x2
+#define CL_AVC_ME_SEARCH_WINDOW_2x2_RADIUS_INTEL            0xa
+
+#define CL_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL                0x0
+#define CL_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL                0x2
+
+#define CL_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL               0x0
+#define CL_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL                  0x1
+#define CL_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL                  0x3
+
+#define CL_AVC_ME_COST_PRECISION_QPEL_INTEL                 0x0
+#define CL_AVC_ME_COST_PRECISION_HPEL_INTEL                 0x1
+#define CL_AVC_ME_COST_PRECISION_PEL_INTEL                  0x2
+#define CL_AVC_ME_COST_PRECISION_DPEL_INTEL                 0x3
+
+#define CL_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL                0x10
+#define CL_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL                  0x15
+#define CL_AVC_ME_BIDIR_WEIGHT_HALF_INTEL                   0x20
+#define CL_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL              0x2B
+#define CL_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL          0x30
+
+#define CL_AVC_ME_BORDER_REACHED_LEFT_INTEL                 0x0
+#define CL_AVC_ME_BORDER_REACHED_RIGHT_INTEL                0x2
+#define CL_AVC_ME_BORDER_REACHED_TOP_INTEL                  0x4
+#define CL_AVC_ME_BORDER_REACHED_BOTTOM_INTEL               0x8
+
+#define CL_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL          0x0
+#define CL_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL            0x4000
+
+#define CL_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL     ( 0x1 << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL    ( 0x2 << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL        ( 0x3 << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL       ( 0x55 << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL      ( 0xAA << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL          ( 0xFF << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL     ( 0x1 << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL    ( 0x2 << 24 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL     ( 0x1 << 26 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL    ( 0x2 << 26 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL     ( 0x1 << 28 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL    ( 0x2 << 28 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL     ( 0x1 << 30 )
+#define CL_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL    ( 0x2 << 30 )
+
+#define CL_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL                0x00
+#define CL_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL                0x80
+
+#define CL_AVC_ME_INTRA_16x16_INTEL                         0x0
+#define CL_AVC_ME_INTRA_8x8_INTEL                           0x1
+#define CL_AVC_ME_INTRA_4x4_INTEL                           0x2
+
+#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL     0x6
+#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL       0x5
+#define CL_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL       0x3
+
+#define CL_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL         0x60
+#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL        0x10
+#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL  0x8
+#define CL_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL   0x4
+
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL            0x0
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL          0x1
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL                  0x2
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL  0x3
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL               0x4
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL      0x5
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL     0x6
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL       0x7
+#define CL_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL       0x8
+#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL                0x0
+#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL        0x1
+#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL          0x2
+#define CL_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL             0x3
+
+#define CL_AVC_ME_FRAME_FORWARD_INTEL                       0x1
+#define CL_AVC_ME_FRAME_BACKWARD_INTEL                      0x2
+#define CL_AVC_ME_FRAME_DUAL_INTEL                          0x3
+
+#define CL_AVC_ME_SLICE_TYPE_PRED_INTEL                     0x0
+#define CL_AVC_ME_SLICE_TYPE_BPRED_INTEL                    0x1
+#define CL_AVC_ME_SLICE_TYPE_INTRA_INTEL                    0x2
+
+#define CL_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL           0x0
+#define CL_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL        0x1
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __CL_EXT_INTEL_H */
--- a/include/CL/cl_gl.h
+++ b/include/CL/cl_gl.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008 - 2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2019 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -24,11 +29,7 @@
 #ifndef __OPENCL_CL_GL_H
 #define __OPENCL_CL_GL_H

-#ifdef __APPLE__
-#include <OpenCL/cl.h>
-#else
 #include <CL/cl.h>
-#endif	

 #ifdef __cplusplus
 extern "C" {
@@ -44,110 +45,118 @@ typedef struct __GLsync *cl_GLsync;
 #define CL_GL_OBJECT_TEXTURE2D                  0x2001
 #define CL_GL_OBJECT_TEXTURE3D                  0x2002
 #define CL_GL_OBJECT_RENDERBUFFER               0x2003
+#ifdef CL_VERSION_1_2
 #define CL_GL_OBJECT_TEXTURE2D_ARRAY            0x200E
 #define CL_GL_OBJECT_TEXTURE1D                  0x200F
 #define CL_GL_OBJECT_TEXTURE1D_ARRAY            0x2010
 #define CL_GL_OBJECT_TEXTURE_BUFFER             0x2011
+#endif

 /* cl_gl_texture_info           */
 #define CL_GL_TEXTURE_TARGET                    0x2004
 #define CL_GL_MIPMAP_LEVEL                      0x2005
+#ifdef CL_VERSION_1_2
 #define CL_GL_NUM_SAMPLES                       0x2012
+#endif


 extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLBuffer(cl_context     /* context */,
-                     cl_mem_flags   /* flags */,
-                     cl_GLuint      /* bufobj */,
-                     int *          /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+clCreateFromGLBuffer(cl_context     context,
+                     cl_mem_flags   flags,
+                     cl_GLuint      bufobj,
+                     cl_int *       errcode_ret) CL_API_SUFFIX__VERSION_1_0;
+
+#ifdef CL_VERSION_1_2

 extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLTexture(cl_context      /* context */,
-                      cl_mem_flags    /* flags */,
-                      cl_GLenum       /* target */,
-                      cl_GLint        /* miplevel */,
-                      cl_GLuint       /* texture */,
-                      cl_int *        /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
-    
+clCreateFromGLTexture(cl_context      context,
+                      cl_mem_flags    flags,
+                      cl_GLenum       target,
+                      cl_GLint        miplevel,
+                      cl_GLuint       texture,
+                      cl_int *        errcode_ret) CL_API_SUFFIX__VERSION_1_2;
+
+#endif
+
 extern CL_API_ENTRY cl_mem CL_API_CALL
-clCreateFromGLRenderbuffer(cl_context   /* context */,
-                           cl_mem_flags /* flags */,
-                           cl_GLuint    /* renderbuffer */,
-                           cl_int *     /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
+clCreateFromGLRenderbuffer(cl_context   context,
+                           cl_mem_flags flags,
+                           cl_GLuint    renderbuffer,
+                           cl_int *     errcode_ret) CL_API_SUFFIX__VERSION_1_0;

 extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLObjectInfo(cl_mem                /* memobj */,
-                  cl_gl_object_type *   /* gl_object_type */,
-                  cl_GLuint *           /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
-                  
-extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLTextureInfo(cl_mem               /* memobj */,
-                   cl_gl_texture_info   /* param_name */,
-                   size_t               /* param_value_size */,
-                   void *               /* param_value */,
-                   size_t *             /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
+clGetGLObjectInfo(cl_mem                memobj,
+                  cl_gl_object_type *   gl_object_type,
+                  cl_GLuint *           gl_object_name) CL_API_SUFFIX__VERSION_1_0;

 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueAcquireGLObjects(cl_command_queue      /* command_queue */,
-                          cl_uint               /* num_objects */,
-                          const cl_mem *        /* mem_objects */,
-                          cl_uint               /* num_events_in_wait_list */,
-                          const cl_event *      /* event_wait_list */,
-                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+clGetGLTextureInfo(cl_mem               memobj,
+                   cl_gl_texture_info   param_name,
+                   size_t               param_value_size,
+                   void *               param_value,
+                   size_t *             param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;

 extern CL_API_ENTRY cl_int CL_API_CALL
-clEnqueueReleaseGLObjects(cl_command_queue      /* command_queue */,
-                          cl_uint               /* num_objects */,
-                          const cl_mem *        /* mem_objects */,
-                          cl_uint               /* num_events_in_wait_list */,
-                          const cl_event *      /* event_wait_list */,
-                          cl_event *            /* event */) CL_API_SUFFIX__VERSION_1_0;
+clEnqueueAcquireGLObjects(cl_command_queue      command_queue,
+                          cl_uint               num_objects,
+                          const cl_mem *        mem_objects,
+                          cl_uint               num_events_in_wait_list,
+                          const cl_event *      event_wait_list,
+                          cl_event *            event) CL_API_SUFFIX__VERSION_1_0;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseGLObjects(cl_command_queue      command_queue,
+                          cl_uint               num_objects,
+                          const cl_mem *        mem_objects,
+                          cl_uint               num_events_in_wait_list,
+                          const cl_event *      event_wait_list,
+                          cl_event *            event) CL_API_SUFFIX__VERSION_1_0;


 /* Deprecated OpenCL 1.1 APIs */
 extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateFromGLTexture2D(cl_context      /* context */,
-                        cl_mem_flags    /* flags */,
-                        cl_GLenum       /* target */,
-                        cl_GLint        /* miplevel */,
-                        cl_GLuint       /* texture */,
-                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
+clCreateFromGLTexture2D(cl_context      context,
+                        cl_mem_flags    flags,
+                        cl_GLenum       target,
+                        cl_GLint        miplevel,
+                        cl_GLuint       texture,
+                        cl_int *        errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
 extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
-clCreateFromGLTexture3D(cl_context      /* context */,
-                        cl_mem_flags    /* flags */,
-                        cl_GLenum       /* target */,
-                        cl_GLint        /* miplevel */,
-                        cl_GLuint       /* texture */,
-                        cl_int *        /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
-    
+clCreateFromGLTexture3D(cl_context      context,
+                        cl_mem_flags    flags,
+                        cl_GLenum       target,
+                        cl_GLint        miplevel,
+                        cl_GLuint       texture,
+                        cl_int *        errcode_ret) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+
 /* cl_khr_gl_sharing extension  */
-    
+
 #define cl_khr_gl_sharing 1
-    
+
 typedef cl_uint     cl_gl_context_info;
-    
+
 /* Additional Error Codes  */
 #define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR  -1000
-    
+
 /* cl_gl_context_info  */
 #define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR    0x2006
 #define CL_DEVICES_FOR_GL_CONTEXT_KHR           0x2007
-    
+
 /* Additional cl_context_properties  */
 #define CL_GL_CONTEXT_KHR                       0x2008
 #define CL_EGL_DISPLAY_KHR                      0x2009
 #define CL_GLX_DISPLAY_KHR                      0x200A
 #define CL_WGL_HDC_KHR                          0x200B
 #define CL_CGL_SHAREGROUP_KHR                   0x200C
-    
+
 extern CL_API_ENTRY cl_int CL_API_CALL
-clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
-                      cl_gl_context_info            /* param_name */,
-                      size_t                        /* param_value_size */,
-                      void *                        /* param_value */,
-                      size_t *                      /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
-    
+clGetGLContextInfoKHR(const cl_context_properties * properties,
+                      cl_gl_context_info            param_name,
+                      size_t                        param_value_size,
+                      void *                        param_value,
+                      size_t *                      param_value_size_ret) CL_API_SUFFIX__VERSION_1_0;
+
 typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
    const cl_context_properties * properties,
    cl_gl_context_info            param_name,
--- a/include/CL/cl_gl_ext.h
+++ b/include/CL/cl_gl_ext.h
@@ -1,5 +1,5 @@
 /**********************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2019 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -21,11 +26,6 @@
 * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
 **********************************************************************************/

-/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
-
-/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have           */
-/* OpenGL dependencies.                                                         */
-
 #ifndef __OPENCL_CL_GL_EXT_H
 #define __OPENCL_CL_GL_EXT_H

@@ -33,34 +33,17 @@
 extern "C" {
 #endif

-#ifdef __APPLE__
-    #include <OpenCL/cl_gl.h>
-#else
-    #include <CL/cl_gl.h>
-#endif
-
-/*
- * For each extension, follow this template
- *  cl_VEN_extname extension  */
-/* #define cl_VEN_extname 1
- * ... define new types, if any
- * ... define new tokens, if any
- * ... define new APIs, if any
- *
- *  If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
- *  This allows us to avoid having to decide whether to include GL headers or GLES here.
- */
+#include <CL/cl_gl.h>

 /* 
- *  cl_khr_gl_event  extension
- *  See section 9.9 in the OpenCL 1.1 spec for more information
+ *  cl_khr_gl_event extension
 */
 #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR     0x200D

 extern CL_API_ENTRY cl_event CL_API_CALL
-clCreateEventFromGLsyncKHR(cl_context           /* context */,
-                           cl_GLsync            /* cl_GLsync */,
-                           cl_int *             /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
+clCreateEventFromGLsyncKHR(cl_context context,
+                           cl_GLsync  cl_GLsync,
+                           cl_int *   errcode_ret) CL_EXT_SUFFIX__VERSION_1_1;

 #ifdef __cplusplus
 }
--- a/include/CL/cl_platform.h
+++ b/include/CL/cl_platform.h
--- a/include/CL/cl_va_api_media_sharing_intel.h
+++ b/include/CL/cl_va_api_media_sharing_intel.h
@@ -0,0 +1,172 @@
+/**********************************************************************************
+ * Copyright (c) 2008-2019 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ **********************************************************************************/
+/*****************************************************************************\
+
+Copyright (c) 2013-2019 Intel Corporation All Rights Reserved.
+
+THESE MATERIALS ARE PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THESE
+MATERIALS, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+File Name: cl_va_api_media_sharing_intel.h
+
+Abstract:
+
+Notes:
+
+\*****************************************************************************/
+
+
+#ifndef __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
+#define __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H
+
+#include <CL/cl.h>
+#include <CL/cl_platform.h>
+#include <va/va.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/******************************************
+* cl_intel_va_api_media_sharing extension *
+*******************************************/
+
+#define cl_intel_va_api_media_sharing 1
+
+/* error codes */
+#define CL_INVALID_VA_API_MEDIA_ADAPTER_INTEL               -1098
+#define CL_INVALID_VA_API_MEDIA_SURFACE_INTEL               -1099
+#define CL_VA_API_MEDIA_SURFACE_ALREADY_ACQUIRED_INTEL      -1100
+#define CL_VA_API_MEDIA_SURFACE_NOT_ACQUIRED_INTEL          -1101
+
+/* cl_va_api_device_source_intel */
+#define CL_VA_API_DISPLAY_INTEL                             0x4094
+
+/* cl_va_api_device_set_intel */
+#define CL_PREFERRED_DEVICES_FOR_VA_API_INTEL               0x4095
+#define CL_ALL_DEVICES_FOR_VA_API_INTEL                     0x4096
+
+/* cl_context_info */
+#define CL_CONTEXT_VA_API_DISPLAY_INTEL                     0x4097
+
+/* cl_mem_info */
+#define CL_MEM_VA_API_MEDIA_SURFACE_INTEL                   0x4098
+
+/* cl_image_info */
+#define CL_IMAGE_VA_API_PLANE_INTEL                         0x4099
+
+/* cl_command_type */
+#define CL_COMMAND_ACQUIRE_VA_API_MEDIA_SURFACES_INTEL      0x409A
+#define CL_COMMAND_RELEASE_VA_API_MEDIA_SURFACES_INTEL      0x409B
+
+typedef cl_uint cl_va_api_device_source_intel;
+typedef cl_uint cl_va_api_device_set_intel;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clGetDeviceIDsFromVA_APIMediaAdapterINTEL(
+    cl_platform_id                platform,
+    cl_va_api_device_source_intel media_adapter_type,
+    void*                         media_adapter,
+    cl_va_api_device_set_intel    media_adapter_set,
+    cl_uint                       num_entries,
+    cl_device_id*                 devices,
+    cl_uint*                      num_devices) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL * clGetDeviceIDsFromVA_APIMediaAdapterINTEL_fn)(
+    cl_platform_id                platform,
+    cl_va_api_device_source_intel media_adapter_type,
+    void*                         media_adapter,
+    cl_va_api_device_set_intel    media_adapter_set,
+    cl_uint                       num_entries,
+    cl_device_id*                 devices,
+    cl_uint*                      num_devices) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_mem CL_API_CALL
+clCreateFromVA_APIMediaSurfaceINTEL(
+    cl_context                    context,
+    cl_mem_flags                  flags,
+    VASurfaceID*                  surface,
+    cl_uint                       plane,
+    cl_int*                       errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_mem (CL_API_CALL * clCreateFromVA_APIMediaSurfaceINTEL_fn)(
+    cl_context                    context,
+    cl_mem_flags                  flags,
+    VASurfaceID*                  surface,
+    cl_uint                       plane,
+    cl_int*                       errcode_ret) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueAcquireVA_APIMediaSurfacesINTEL(
+    cl_command_queue              command_queue,
+    cl_uint                       num_objects,
+    const cl_mem*                 mem_objects,
+    cl_uint                       num_events_in_wait_list,
+    const cl_event*               event_wait_list,
+    cl_event*                     event) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireVA_APIMediaSurfacesINTEL_fn)(
+    cl_command_queue              command_queue,
+    cl_uint                       num_objects,
+    const cl_mem*                 mem_objects,
+    cl_uint                       num_events_in_wait_list,
+    const cl_event*               event_wait_list,
+    cl_event*                     event) CL_EXT_SUFFIX__VERSION_1_2;
+
+extern CL_API_ENTRY cl_int CL_API_CALL
+clEnqueueReleaseVA_APIMediaSurfacesINTEL(
+    cl_command_queue              command_queue,
+    cl_uint                       num_objects,
+    const cl_mem*                 mem_objects,
+    cl_uint                       num_events_in_wait_list,
+    const cl_event*               event_wait_list,
+    cl_event*                     event) CL_EXT_SUFFIX__VERSION_1_2;
+
+typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueReleaseVA_APIMediaSurfacesINTEL_fn)(
+    cl_command_queue              command_queue,
+    cl_uint                       num_objects,
+    const cl_mem*                 mem_objects,
+    cl_uint                       num_events_in_wait_list,
+    const cl_event*               event_wait_list,
+    cl_event*                     event) CL_EXT_SUFFIX__VERSION_1_2;
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  /* __OPENCL_CL_VA_API_MEDIA_SHARING_INTEL_H */
+
--- a/include/CL/cl_version.h
+++ b/include/CL/cl_version.h
@@ -0,0 +1,86 @@
+/*******************************************************************************
+ * Copyright (c) 2018 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+#ifndef __CL_VERSION_H
+#define __CL_VERSION_H
+
+/* Detect which version to target */
+#if !defined(CL_TARGET_OPENCL_VERSION)
+#pragma message("cl_version.h: CL_TARGET_OPENCL_VERSION is not defined. Defaulting to 220 (OpenCL 2.2)")
+#define CL_TARGET_OPENCL_VERSION 220
+#endif
+#if CL_TARGET_OPENCL_VERSION != 100 && \
+    CL_TARGET_OPENCL_VERSION != 110 && \
+    CL_TARGET_OPENCL_VERSION != 120 && \
+    CL_TARGET_OPENCL_VERSION != 200 && \
+    CL_TARGET_OPENCL_VERSION != 210 && \
+    CL_TARGET_OPENCL_VERSION != 220
+#pragma message("cl_version: CL_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220). Defaulting to 220 (OpenCL 2.2)")
+#undef CL_TARGET_OPENCL_VERSION
+#define CL_TARGET_OPENCL_VERSION 220
+#endif
+
+
+/* OpenCL Version */
+#if CL_TARGET_OPENCL_VERSION >= 220 && !defined(CL_VERSION_2_2)
+#define CL_VERSION_2_2  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 210 && !defined(CL_VERSION_2_1)
+#define CL_VERSION_2_1  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 200 && !defined(CL_VERSION_2_0)
+#define CL_VERSION_2_0  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 120 && !defined(CL_VERSION_1_2)
+#define CL_VERSION_1_2  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 110 && !defined(CL_VERSION_1_1)
+#define CL_VERSION_1_1  1
+#endif
+#if CL_TARGET_OPENCL_VERSION >= 100 && !defined(CL_VERSION_1_0)
+#define CL_VERSION_1_0  1
+#endif
+
+/* Allow deprecated APIs for older OpenCL versions. */
+#if CL_TARGET_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS)
+#define CL_USE_DEPRECATED_OPENCL_2_1_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS)
+#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS)
+#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
+#endif
+#if CL_TARGET_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS)
+#define CL_USE_DEPRECATED_OPENCL_1_0_APIS
+#endif
+
+#endif  /* __CL_VERSION_H */
--- a/include/CL/opencl.h
+++ b/include/CL/opencl.h
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2008-2012 The Khronos Group Inc.
+ * Copyright (c) 2008-2015 The Khronos Group Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and/or associated documentation files (the
@@ -12,6 +12,11 @@
 * The above copyright notice and this permission notice shall be included
 * in all copies or substantial portions of the Materials.
 *
+ * MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS
+ * KHRONOS STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS
+ * SPECIFICATIONS AND HEADER INFORMATION ARE LOCATED AT
+ *    https://www.khronos.org/registry/
+ *
 * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
@@ -30,25 +35,13 @@
 extern "C" {
 #endif

-#ifdef __APPLE__
-
-#include <OpenCL/cl.h>
-#include <OpenCL/cl_gl.h>
-#include <OpenCL/cl_gl_ext.h>
-#include <OpenCL/cl_ext.h>
-
-#else
-
 #include <CL/cl.h>
 #include <CL/cl_gl.h>
 #include <CL/cl_gl_ext.h>
 #include <CL/cl_ext.h>

-#endif
-
 #ifdef __cplusplus
 }
 #endif

 #endif  /* __OPENCL_H   */
-
--- a/include/GL/internal/dri_interface.h
+++ b/include/GL/internal/dri_interface.h
@@ -48,6 +48,8 @@ typedef unsigned int drm_drawable_t;
 typedef struct drm_clip_rect drm_clip_rect_t;
 #endif

+#include <GL/gl.h>
+
 #include <stdint.h>

 /**
@@ -1345,6 +1347,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FOURCC_YUYV		0x56595559
 #define __DRI_IMAGE_FOURCC_UYVY		0x59565955
 #define __DRI_IMAGE_FOURCC_AYUV		0x56555941
+#define __DRI_IMAGE_FOURCC_XYUV8888	0x56555958

 #define __DRI_IMAGE_FOURCC_YVU410	0x39555659
 #define __DRI_IMAGE_FOURCC_YVU411	0x31315659
@@ -1352,6 +1355,10 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_FOURCC_YVU422	0x36315659
 #define __DRI_IMAGE_FOURCC_YVU444	0x34325659

+#define __DRI_IMAGE_FOURCC_P010		0x30313050
+#define __DRI_IMAGE_FOURCC_P012		0x32313050
+#define __DRI_IMAGE_FOURCC_P016		0x36313050
+
 /**
 * Queryable on images created by createImageFromNames.
 *
@@ -1372,6 +1379,7 @@ struct __DRIdri2ExtensionRec {
 #define __DRI_IMAGE_COMPONENTS_Y_XUXV	0x3005
 #define __DRI_IMAGE_COMPONENTS_Y_UXVX	0x3008
 #define __DRI_IMAGE_COMPONENTS_AYUV	0x3009
+#define __DRI_IMAGE_COMPONENTS_XYUV	0x300A
 #define __DRI_IMAGE_COMPONENTS_R	0x3006
 #define __DRI_IMAGE_COMPONENTS_RG	0x3007

--- a/include/drm-uapi/README
+++ b/include/drm-uapi/README
@@ -1,6 +1,6 @@
 This directory contains a copy of the installed kernel headers
-required by the anv & i965 drivers to communicate with the kernel.
-Whenever either of those driver needs new definitions for new kernel
+required by several drivers to communicate with the kernel.
+Whenever one of those driver needs new definitions for new kernel
 APIs, these files should be updated.

 These files in master should only be updated once the changes have landed
@@ -13,9 +13,9 @@ $ make headers_install INSTALL_HDR_PATH=/path/to/install

 The last update was done at the following kernel commit :

-commit 78230c46ec0a91dd4256c9e54934b3c7095a7ee3
-Merge: b65bd4031156 037f03155b7d
+commit a5f2fafece141ef3509e686cea576366d55cabb6
+Merge: 71f4e45a4ed3 860433ed2a55
 Author: Dave Airlie <airlied@redhat.com>
-Date:   Wed Mar 21 14:07:03 2018 +1000
+Date:   Wed Feb 20 12:16:30 2019 +1000

-    Merge tag 'omapdrm-4.17' of git://git.kernel.org/pub/scm/linux/kernel/git/tomba/linux into drm-next
+    Merge https://gitlab.freedesktop.org/drm/msm into drm-next
--- a/include/drm-uapi/drm.h
+++ b/include/drm-uapi/drm.h
@@ -674,6 +674,22 @@ struct drm_get_cap {
 */
 #define DRM_CLIENT_CAP_ATOMIC	3

+/**
+ * DRM_CLIENT_CAP_ASPECT_RATIO
+ *
+ * If set to 1, the DRM core will provide aspect ratio information in modes.
+ */
+#define DRM_CLIENT_CAP_ASPECT_RATIO    4
+
+/**
+ * DRM_CLIENT_CAP_WRITEBACK_CONNECTORS
+ *
+ * If set to 1, the DRM core will expose special connectors to be used for
+ * writing back to memory the scene setup in the commit. Depends on client
+ * also supporting DRM_CLIENT_CAP_ATOMIC
+ */
+#define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS	5
+
 /** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
 	__u64 capability;
--- a/include/drm-uapi/drm_fourcc.h
+++ b/include/drm-uapi/drm_fourcc.h
@@ -30,11 +30,50 @@
 extern "C" {
 #endif

+/**
+ * DOC: overview
+ *
+ * In the DRM subsystem, framebuffer pixel formats are described using the
+ * fourcc codes defined in `include/uapi/drm/drm_fourcc.h`. In addition to the
+ * fourcc code, a Format Modifier may optionally be provided, in order to
+ * further describe the buffer's format - for example tiling or compression.
+ *
+ * Format Modifiers
+ * ----------------
+ *
+ * Format modifiers are used in conjunction with a fourcc code, forming a
+ * unique fourcc:modifier pair. This format:modifier pair must fully define the
+ * format and data layout of the buffer, and should be the only way to describe
+ * that particular buffer.
+ *
+ * Having multiple fourcc:modifier pairs which describe the same layout should
+ * be avoided, as such aliases run the risk of different drivers exposing
+ * different names for the same data format, forcing userspace to understand
+ * that they are aliases.
+ *
+ * Format modifiers may change any property of the buffer, including the number
+ * of planes and/or the required allocation size. Format modifiers are
+ * vendor-namespaced, and as such the relationship between a fourcc code and a
+ * modifier is specific to the modifer being used. For example, some modifiers
+ * may preserve meaning - such as number of planes - from the fourcc code,
+ * whereas others may not.
+ *
+ * Vendors should document their modifier usage in as much detail as
+ * possible, to ensure maximum compatibility across devices, drivers and
+ * applications.
+ *
+ * The authoritative list of format modifier codes is found in
+ * `include/uapi/drm/drm_fourcc.h`
+ */
+
 #define fourcc_code(a, b, c, d) ((__u32)(a) | ((__u32)(b) << 8) | \
 				 ((__u32)(c) << 16) | ((__u32)(d) << 24))

 #define DRM_FORMAT_BIG_ENDIAN (1<<31) /* format is big endian instead of little endian */

+/* Reserve 0 for the invalid format specifier */
+#define DRM_FORMAT_INVALID	0
+
 /* color index */
 #define DRM_FORMAT_C8		fourcc_code('C', '8', ' ', ' ') /* [7:0] C */

@@ -112,6 +151,21 @@ extern "C" {
 #define DRM_FORMAT_VYUY		fourcc_code('V', 'Y', 'U', 'Y') /* [31:0] Y1:Cb0:Y0:Cr0 8:8:8:8 little endian */

 #define DRM_FORMAT_AYUV		fourcc_code('A', 'Y', 'U', 'V') /* [31:0] A:Y:Cb:Cr 8:8:8:8 little endian */
+#define DRM_FORMAT_XYUV8888		fourcc_code('X', 'Y', 'U', 'V') /* [31:0] X:Y:Cb:Cr 8:8:8:8 little endian */
+
+/*
+ * packed YCbCr420 2x2 tiled formats
+ * first 64 bits will contain Y,Cb,Cr components for a 2x2 tile
+ */
+/* [63:0]   A3:A2:Y3:0:Cr0:0:Y2:0:A1:A0:Y1:0:Cb0:0:Y0:0  1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */
+#define DRM_FORMAT_Y0L0		fourcc_code('Y', '0', 'L', '0')
+/* [63:0]   X3:X2:Y3:0:Cr0:0:Y2:0:X1:X0:Y1:0:Cb0:0:Y0:0  1:1:8:2:8:2:8:2:1:1:8:2:8:2:8:2 little endian */
+#define DRM_FORMAT_X0L0		fourcc_code('X', '0', 'L', '0')
+
+/* [63:0]   A3:A2:Y3:Cr0:Y2:A1:A0:Y1:Cb0:Y0  1:1:10:10:10:1:1:10:10:10 little endian */
+#define DRM_FORMAT_Y0L2		fourcc_code('Y', '0', 'L', '2')
+/* [63:0]   X3:X2:Y3:Cr0:Y2:X1:X0:Y1:Cb0:Y0  1:1:10:10:10:1:1:10:10:10 little endian */
+#define DRM_FORMAT_X0L2		fourcc_code('X', '0', 'L', '2')

 /*
 * 2 plane RGB + A
@@ -141,6 +195,27 @@ extern "C" {
 #define DRM_FORMAT_NV24		fourcc_code('N', 'V', '2', '4') /* non-subsampled Cr:Cb plane */
 #define DRM_FORMAT_NV42		fourcc_code('N', 'V', '4', '2') /* non-subsampled Cb:Cr plane */

+/*
+ * 2 plane YCbCr MSB aligned
+ * index 0 = Y plane, [15:0] Y:x [10:6] little endian
+ * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [10:6:10:6] little endian
+ */
+#define DRM_FORMAT_P010		fourcc_code('P', '0', '1', '0') /* 2x2 subsampled Cr:Cb plane 10 bits per channel */
+
+/*
+ * 2 plane YCbCr MSB aligned
+ * index 0 = Y plane, [15:0] Y:x [12:4] little endian
+ * index 1 = Cr:Cb plane, [31:0] Cr:x:Cb:x [12:4:12:4] little endian
+ */
+#define DRM_FORMAT_P012		fourcc_code('P', '0', '1', '2') /* 2x2 subsampled Cr:Cb plane 12 bits per channel */
+
+/*
+ * 2 plane YCbCr MSB aligned
+ * index 0 = Y plane, [15:0] Y little endian
+ * index 1 = Cr:Cb plane, [31:0] Cr:Cb [16:16] little endian
+ */
+#define DRM_FORMAT_P016		fourcc_code('P', '0', '1', '6') /* 2x2 subsampled Cr:Cb plane 16 bits per channel */
+
 /*
 * 3 plane YCbCr
 * index 0: Y plane, [7:0] Y
@@ -183,6 +258,9 @@ extern "C" {
 #define DRM_FORMAT_MOD_VENDOR_QCOM    0x05
 #define DRM_FORMAT_MOD_VENDOR_VIVANTE 0x06
 #define DRM_FORMAT_MOD_VENDOR_BROADCOM 0x07
+#define DRM_FORMAT_MOD_VENDOR_ARM     0x08
+#define DRM_FORMAT_MOD_VENDOR_ALLWINNER 0x09
+
 /* add more to the end as needed */

 #define DRM_FORMAT_RESERVED	      ((1ULL << 56) - 1)
@@ -298,6 +376,15 @@ extern "C" {
 */
 #define DRM_FORMAT_MOD_SAMSUNG_64_32_TILE	fourcc_mod_code(SAMSUNG, 1)

+/*
+ * Tiled, 16 (pixels) x 16 (lines) - sized macroblocks
+ *
+ * This is a simple tiled layout using tiles of 16x16 pixels in a row-major
+ * layout. For YCbCr formats Cb/Cr components are taken in such a way that
+ * they correspond to their 16x16 luma block.
+ */
+#define DRM_FORMAT_MOD_SAMSUNG_16_16_TILE	fourcc_mod_code(SAMSUNG, 2)
+
 /*
 * Qualcomm Compressed Format
 *
@@ -309,7 +396,7 @@ extern "C" {
 * Pixel data height is aligned with macrotile height.
 * Entire pixel data buffer is aligned with 4k(bytes).
 */
-#define DRM_FORMAT_MOD_QCOM_COMPRESSED  fourcc_mod_code(QCOM, 1)
+#define DRM_FORMAT_MOD_QCOM_COMPRESSED	fourcc_mod_code(QCOM, 1)

 /* Vivante framebuffer modifiers */

@@ -498,6 +585,128 @@ extern "C" {
 */
 #define DRM_FORMAT_MOD_BROADCOM_UIF fourcc_mod_code(BROADCOM, 6)

+/*
+ * Arm Framebuffer Compression (AFBC) modifiers
+ *
+ * AFBC is a proprietary lossless image compression protocol and format.
+ * It provides fine-grained random access and minimizes the amount of data
+ * transferred between IP blocks.
+ *
+ * AFBC has several features which may be supported and/or used, which are
+ * represented using bits in the modifier. Not all combinations are valid,
+ * and different devices or use-cases may support different combinations.
+ *
+ * Further information on the use of AFBC modifiers can be found in
+ * Documentation/gpu/afbc.rst
+ */
+#define DRM_FORMAT_MOD_ARM_AFBC(__afbc_mode)	fourcc_mod_code(ARM, __afbc_mode)
+
+/*
+ * AFBC superblock size
+ *
+ * Indicates the superblock size(s) used for the AFBC buffer. The buffer
+ * size (in pixels) must be aligned to a multiple of the superblock size.
+ * Four lowest significant bits(LSBs) are reserved for block size.
+ *
+ * Where one superblock size is specified, it applies to all planes of the
+ * buffer (e.g. 16x16, 32x8). When multiple superblock sizes are specified,
+ * the first applies to the Luma plane and the second applies to the Chroma
+ * plane(s). e.g. (32x8_64x4 means 32x8 Luma, with 64x4 Chroma).
+ * Multiple superblock sizes are only valid for multi-plane YCbCr formats.
+ */
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_MASK      0xf
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_16x16     (1ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8      (2ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_64x4      (3ULL)
+#define AFBC_FORMAT_MOD_BLOCK_SIZE_32x8_64x4 (4ULL)
+
+/*
+ * AFBC lossless colorspace transform
+ *
+ * Indicates that the buffer makes use of the AFBC lossless colorspace
+ * transform.
+ */
+#define AFBC_FORMAT_MOD_YTR     (1ULL <<  4)
+
+/*
+ * AFBC block-split
+ *
+ * Indicates that the payload of each superblock is split. The second
+ * half of the payload is positioned at a predefined offset from the start
+ * of the superblock payload.
+ */
+#define AFBC_FORMAT_MOD_SPLIT   (1ULL <<  5)
+
+/*
+ * AFBC sparse layout
+ *
+ * This flag indicates that the payload of each superblock must be stored at a
+ * predefined position relative to the other superblocks in the same AFBC
+ * buffer. This order is the same order used by the header buffer. In this mode
+ * each superblock is given the same amount of space as an uncompressed
+ * superblock of the particular format would require, rounding up to the next
+ * multiple of 128 bytes in size.
+ */
+#define AFBC_FORMAT_MOD_SPARSE  (1ULL <<  6)
+
+/*
+ * AFBC copy-block restrict
+ *
+ * Buffers with this flag must obey the copy-block restriction. The restriction
+ * is such that there are no copy-blocks referring across the border of 8x8
+ * blocks. For the subsampled data the 8x8 limitation is also subsampled.
+ */
+#define AFBC_FORMAT_MOD_CBR     (1ULL <<  7)
+
+/*
+ * AFBC tiled layout
+ *
+ * The tiled layout groups superblocks in 8x8 or 4x4 tiles, where all
+ * superblocks inside a tile are stored together in memory. 8x8 tiles are used
+ * for pixel formats up to and including 32 bpp while 4x4 tiles are used for
+ * larger bpp formats. The order between the tiles is scan line.
+ * When the tiled layout is used, the buffer size (in pixels) must be aligned
+ * to the tile size.
+ */
+#define AFBC_FORMAT_MOD_TILED   (1ULL <<  8)
+
+/*
+ * AFBC solid color blocks
+ *
+ * Indicates that the buffer makes use of solid-color blocks, whereby bandwidth
+ * can be reduced if a whole superblock is a single color.
+ */
+#define AFBC_FORMAT_MOD_SC      (1ULL <<  9)
+
+/*
+ * AFBC double-buffer
+ *
+ * Indicates that the buffer is allocated in a layout safe for front-buffer
+ * rendering.
+ */
+#define AFBC_FORMAT_MOD_DB      (1ULL << 10)
+
+/*
+ * AFBC buffer content hints
+ *
+ * Indicates that the buffer includes per-superblock content hints.
+ */
+#define AFBC_FORMAT_MOD_BCH     (1ULL << 11)
+
+/*
+ * Allwinner tiled modifier
+ *
+ * This tiling mode is implemented by the VPU found on all Allwinner platforms,
+ * codenamed sunxi. It is associated with a YUV format that uses either 2 or 3
+ * planes.
+ *
+ * With this tiling, the luminance samples are disposed in tiles representing
+ * 32x32 pixels and the chrominance samples in tiles representing 32x64 pixels.
+ * The pixel order in each tile is linear and the tiles are disposed linearly,
+ * both in row-major order.
+ */
+#define DRM_FORMAT_MOD_ALLWINNER_TILED fourcc_mod_code(ALLWINNER, 1)
+
 #if defined(__cplusplus)
 }
 #endif
--- a/include/drm-uapi/drm_mode.h
+++ b/include/drm-uapi/drm_mode.h
@@ -93,6 +93,15 @@ extern "C" {
 #define DRM_MODE_PICTURE_ASPECT_NONE		0
 #define DRM_MODE_PICTURE_ASPECT_4_3		1
 #define DRM_MODE_PICTURE_ASPECT_16_9		2
+#define DRM_MODE_PICTURE_ASPECT_64_27		3
+#define DRM_MODE_PICTURE_ASPECT_256_135		4
+
+/* Content type options */
+#define DRM_MODE_CONTENT_TYPE_NO_DATA		0
+#define DRM_MODE_CONTENT_TYPE_GRAPHICS		1
+#define DRM_MODE_CONTENT_TYPE_PHOTO		2
+#define DRM_MODE_CONTENT_TYPE_CINEMA		3
+#define DRM_MODE_CONTENT_TYPE_GAME		4

 /* Aspect ratio flag bitmask (4 bits 22:19) */
 #define DRM_MODE_FLAG_PIC_AR_MASK		(0x0F<<19)
@@ -102,6 +111,10 @@ extern "C" {
 			(DRM_MODE_PICTURE_ASPECT_4_3<<19)
 #define  DRM_MODE_FLAG_PIC_AR_16_9 \
 			(DRM_MODE_PICTURE_ASPECT_16_9<<19)
+#define  DRM_MODE_FLAG_PIC_AR_64_27 \
+			(DRM_MODE_PICTURE_ASPECT_64_27<<19)
+#define  DRM_MODE_FLAG_PIC_AR_256_135 \
+			(DRM_MODE_PICTURE_ASPECT_256_135<<19)

 #define  DRM_MODE_FLAG_ALL	(DRM_MODE_FLAG_PHSYNC |		\
 				 DRM_MODE_FLAG_NHSYNC |		\
@@ -173,8 +186,9 @@ extern "C" {
 /*
 * DRM_MODE_REFLECT_<axis>
 *
- * Signals that the contents of a drm plane is reflected in the <axis> axis,
+ * Signals that the contents of a drm plane is reflected along the <axis> axis,
 * in the same way as mirroring.
+ * See kerneldoc chapter "Plane Composition Properties" for more details.
 *
 * This define is provided as a convenience, looking up the property id
 * using the name->prop id lookup is the preferred method.
@@ -338,6 +352,7 @@ enum drm_mode_subconnector {
 #define DRM_MODE_CONNECTOR_VIRTUAL      15
 #define DRM_MODE_CONNECTOR_DSI		16
 #define DRM_MODE_CONNECTOR_DPI		17
+#define DRM_MODE_CONNECTOR_WRITEBACK	18

 struct drm_mode_get_connector {

@@ -873,6 +888,25 @@ struct drm_mode_revoke_lease {
 	__u32 lessee_id;
 };

+/**
+ * struct drm_mode_rect - Two dimensional rectangle.
+ * @x1: Horizontal starting coordinate (inclusive).
+ * @y1: Vertical starting coordinate (inclusive).
+ * @x2: Horizontal ending coordinate (exclusive).
+ * @y2: Vertical ending coordinate (exclusive).
+ *
+ * With drm subsystem using struct drm_rect to manage rectangular area this
+ * export it to user-space.
+ *
+ * Currently used by drm_mode_atomic blob property FB_DAMAGE_CLIPS.
+ */
+struct drm_mode_rect {
+	__s32 x1;
+	__s32 y1;
+	__s32 x2;
+	__s32 y2;
+};
+
 #if defined(__cplusplus)
 }
 #endif
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -412,6 +412,14 @@ typedef struct drm_i915_irq_wait {
 	int irq_seq;
 } drm_i915_irq_wait_t;

+/*
+ * Different modes of per-process Graphics Translation Table,
+ * see I915_PARAM_HAS_ALIASING_PPGTT
+ */
+#define I915_GEM_PPGTT_NONE	0
+#define I915_GEM_PPGTT_ALIASING	1
+#define I915_GEM_PPGTT_FULL	2
+
 /* Ioctl to query kernel params:
 */
 #define I915_PARAM_IRQ_ACTIVE            1
@@ -529,6 +537,28 @@ typedef struct drm_i915_irq_wait {
 */
 #define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51

+/*
+ * Once upon a time we supposed that writes through the GGTT would be
+ * immediately in physical memory (once flushed out of the CPU path). However,
+ * on a few different processors and chipsets, this is not necessarily the case
+ * as the writes appear to be buffered internally. Thus a read of the backing
+ * storage (physical memory) via a different path (with different physical tags
+ * to the indirect write via the GGTT) will see stale values from before
+ * the GGTT write. Inside the kernel, we can for the most part keep track of
+ * the different read/write domains in use (e.g. set-domain), but the assumption
+ * of coherency is baked into the ABI, hence reporting its true state in this
+ * parameter.
+ *
+ * Reports true when writes via mmap_gtt are immediately visible following an
+ * lfence to flush the WCB.
+ *
+ * Reports false when writes via mmap_gtt are indeterminately delayed in an in
+ * internal buffer and are _not_ immediately visible to third parties accessing
+ * directly via mmap_cpu/mmap_wc. Use of mmap_gtt as part of an IPC
+ * communications channel when reporting false is strongly disadvised.
+ */
+#define I915_PARAM_MMAP_GTT_COHERENT	52
+
 typedef struct drm_i915_getparam {
 	__s32 param;
 	/*
@@ -1456,9 +1486,73 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+	/*
+	 * When using the following param, value should be a pointer to
+	 * drm_i915_gem_context_param_sseu.
+	 */
+#define I915_CONTEXT_PARAM_SSEU		0x7
 	__u64 value;
 };

+/**
+ * Context SSEU programming
+ *
+ * It may be necessary for either functional or performance reason to configure
+ * a context to run with a reduced number of SSEU (where SSEU stands for Slice/
+ * Sub-slice/EU).
+ *
+ * This is done by configuring SSEU configuration using the below
+ * @struct drm_i915_gem_context_param_sseu for every supported engine which
+ * userspace intends to use.
+ *
+ * Not all GPUs or engines support this functionality in which case an error
+ * code -ENODEV will be returned.
+ *
+ * Also, flexibility of possible SSEU configuration permutations varies between
+ * GPU generations and software imposed limitations. Requesting such a
+ * combination will return an error code of -EINVAL.
+ *
+ * NOTE: When perf/OA is active the context's SSEU configuration is ignored in
+ * favour of a single global setting.
+ */
+struct drm_i915_gem_context_param_sseu {
+	/*
+	 * Engine class & instance to be configured or queried.
+	 */
+	__u16 engine_class;
+	__u16 engine_instance;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 flags;
+
+	/*
+	 * Mask of slices to enable for the context. Valid values are a subset
+	 * of the bitmask value returned for I915_PARAM_SLICE_MASK.
+	 */
+	__u64 slice_mask;
+
+	/*
+	 * Mask of subslices to enable for the context. Valid values are a
+	 * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK.
+	 */
+	__u64 subslice_mask;
+
+	/*
+	 * Minimum/Maximum number of EUs to enable per subslice for the
+	 * context. min_eus_per_subslice must be inferior or equal to
+	 * max_eus_per_subslice.
+	 */
+	__u16 min_eus_per_subslice;
+	__u16 max_eus_per_subslice;
+
+	/*
+	 * Unused for now. Must be cleared to zero.
+	 */
+	__u32 rsvd;
+};
+
 enum drm_i915_oa_format {
 	I915_OA_FORMAT_A13 = 1,	    /* HSW only */
 	I915_OA_FORMAT_A29,	    /* HSW only */
--- a/include/drm-uapi/tegra_drm.h
+++ b/include/drm-uapi/tegra_drm.h
@@ -32,143 +32,615 @@ extern "C" {
 #define DRM_TEGRA_GEM_CREATE_TILED     (1 << 0)
 #define DRM_TEGRA_GEM_CREATE_BOTTOM_UP (1 << 1)

+/**
+ * struct drm_tegra_gem_create - parameters for the GEM object creation IOCTL
+ */
 struct drm_tegra_gem_create {
+	/**
+	 * @size:
+	 *
+	 * The size, in bytes, of the buffer object to be created.
+	 */
 	__u64 size;
+
+	/**
+	 * @flags:
+	 *
+	 * A bitmask of flags that influence the creation of GEM objects:
+	 *
+	 * DRM_TEGRA_GEM_CREATE_TILED
+	 *   Use the 16x16 tiling format for this buffer.
+	 *
+	 * DRM_TEGRA_GEM_CREATE_BOTTOM_UP
+	 *   The buffer has a bottom-up layout.
+	 */
 	__u32 flags;
+
+	/**
+	 * @handle:
+	 *
+	 * The handle of the created GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 handle;
 };

+/**
+ * struct drm_tegra_gem_mmap - parameters for the GEM mmap IOCTL
+ */
 struct drm_tegra_gem_mmap {
+	/**
+	 * @handle:
+	 *
+	 * Handle of the GEM object to obtain an mmap offset for.
+	 */
 	__u32 handle;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @offset:
+	 *
+	 * The mmap offset for the given GEM object. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u64 offset;
 };

+/**
+ * struct drm_tegra_syncpt_read - parameters for the read syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_read {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to read the current value from.
+	 */
 	__u32 id;
+
+	/**
+	 * @value:
+	 *
+	 * The current syncpoint value. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 value;
 };

+/**
+ * struct drm_tegra_syncpt_incr - parameters for the increment syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_incr {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to increment.
+	 */
 	__u32 id;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };

+/**
+ * struct drm_tegra_syncpt_wait - parameters for the wait syncpoint IOCTL
+ */
 struct drm_tegra_syncpt_wait {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to wait on.
+	 */
 	__u32 id;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to wait.
+	 */
 	__u32 thresh;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, to wait.
+	 */
 	__u32 timeout;
+
+	/**
+	 * @value:
+	 *
+	 * The new syncpoint value after the wait. Set by the kernel upon
+	 * successful completion of the IOCTL.
+	 */
 	__u32 value;
 };

 #define DRM_TEGRA_NO_TIMEOUT	(0xffffffff)

+/**
+ * struct drm_tegra_open_channel - parameters for the open channel IOCTL
+ */
 struct drm_tegra_open_channel {
+	/**
+	 * @client:
+	 *
+	 * The client ID for this channel.
+	 */
 	__u32 client;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
+
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. Set by the kernel upon
+	 * successful completion of the IOCTL. This context needs to be passed
+	 * to the DRM_TEGRA_CHANNEL_CLOSE or the DRM_TEGRA_SUBMIT IOCTLs.
+	 */
 	__u64 context;
 };

+/**
+ * struct drm_tegra_close_channel - parameters for the close channel IOCTL
+ */
 struct drm_tegra_close_channel {
+	/**
+	 * @context:
+	 *
+	 * The application context of this channel. This is obtained from the
+	 * DRM_TEGRA_OPEN_CHANNEL IOCTL.
+	 */
 	__u64 context;
 };

+/**
+ * struct drm_tegra_get_syncpt - parameters for the get syncpoint IOCTL
+ */
 struct drm_tegra_get_syncpt {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel for which to obtain
+	 * the syncpoint ID.
+	 */
 	__u64 context;
+
+	/**
+	 * @index:
+	 *
+	 * Index of the client syncpoint for which to obtain the ID.
+	 */
 	__u32 index;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the given syncpoint. Set by the kernel upon successful
+	 * completion of the IOCTL.
+	 */
 	__u32 id;
 };

+/**
+ * struct drm_tegra_get_syncpt_base - parameters for the get wait base IOCTL
+ */
 struct drm_tegra_get_syncpt_base {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying for which channel to obtain the
+	 * wait base.
+	 */
 	__u64 context;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint for which to obtain the wait base.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @id:
+	 *
+	 * The ID of the wait base corresponding to the client syncpoint. Set
+	 * by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 id;
 };

+/**
+ * struct drm_tegra_syncpt - syncpoint increment operation
+ */
 struct drm_tegra_syncpt {
+	/**
+	 * @id:
+	 *
+	 * ID of the syncpoint to operate on.
+	 */
 	__u32 id;
+
+	/**
+	 * @incrs:
+	 *
+	 * Number of increments to perform for the syncpoint.
+	 */
 	__u32 incrs;
 };

+/**
+ * struct drm_tegra_cmdbuf - structure describing a command buffer
+ */
 struct drm_tegra_cmdbuf {
+	/**
+	 * @handle:
+	 *
+	 * Handle to a GEM object containing the command buffer.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, into the GEM object identified by @handle at
+	 * which the command buffer starts.
+	 */
 	__u32 offset;
+
+	/**
+	 * @words:
+	 *
+	 * Number of 32-bit words in this command buffer.
+	 */
 	__u32 words;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };

+/**
+ * struct drm_tegra_reloc - GEM object relocation structure
+ */
 struct drm_tegra_reloc {
 	struct {
+		/**
+		 * @cmdbuf.handle:
+		 *
+		 * Handle to the GEM object containing the command buffer for
+		 * which to perform this GEM object relocation.
+		 */
 		__u32 handle;
+
+		/**
+		 * @cmdbuf.offset:
+		 *
+		 * Offset, in bytes, into the command buffer at which to
+		 * insert the relocated address.
+		 */
 		__u32 offset;
 	} cmdbuf;
 	struct {
+		/**
+		 * @target.handle:
+		 *
+		 * Handle to the GEM object to be relocated.
+		 */
 		__u32 handle;
+
+		/**
+		 * @target.offset:
+		 *
+		 * Offset, in bytes, into the target GEM object at which the
+		 * relocated data starts.
+		 */
 		__u32 offset;
 	} target;
+
+	/**
+	 * @shift:
+	 *
+	 * The number of bits by which to shift relocated addresses.
+	 */
 	__u32 shift;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };

+/**
+ * struct drm_tegra_waitchk - wait check structure
+ */
 struct drm_tegra_waitchk {
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object containing a command stream on which to
+	 * perform the wait check.
+	 */
 	__u32 handle;
+
+	/**
+	 * @offset:
+	 *
+	 * Offset, in bytes, of the location in the command stream to perform
+	 * the wait check on.
+	 */
 	__u32 offset;
+
+	/**
+	 * @syncpt:
+	 *
+	 * ID of the syncpoint to wait check.
+	 */
 	__u32 syncpt;
+
+	/**
+	 * @thresh:
+	 *
+	 * Threshold value for which to check.
+	 */
 	__u32 thresh;
 };

+/**
+ * struct drm_tegra_submit - job submission structure
+ */
 struct drm_tegra_submit {
+	/**
+	 * @context:
+	 *
+	 * The application context identifying the channel to use for the
+	 * execution of this job.
+	 */
 	__u64 context;
-	__u32 num_syncpts;
-	__u32 num_cmdbufs;
-	__u32 num_relocs;
-	__u32 num_waitchks;
-	__u32 waitchk_mask;
-	__u32 timeout;
-	__u64 syncpts;
-	__u64 cmdbufs;
-	__u64 relocs;
-	__u64 waitchks;
-	__u32 fence;		/* Return value */

-	__u32 reserved[5];	/* future expansion */
+	/**
+	 * @num_syncpts:
+	 *
+	 * The number of syncpoints operated on by this job. This defines the
+	 * length of the array pointed to by @syncpts.
+	 */
+	__u32 num_syncpts;
+
+	/**
+	 * @num_cmdbufs:
+	 *
+	 * The number of command buffers to execute as part of this job. This
+	 * defines the length of the array pointed to by @cmdbufs.
+	 */
+	__u32 num_cmdbufs;
+
+	/**
+	 * @num_relocs:
+	 *
+	 * The number of relocations to perform before executing this job.
+	 * This defines the length of the array pointed to by @relocs.
+	 */
+	__u32 num_relocs;
+
+	/**
+	 * @num_waitchks:
+	 *
+	 * The number of wait checks to perform as part of this job. This
+	 * defines the length of the array pointed to by @waitchks.
+	 */
+	__u32 num_waitchks;
+
+	/**
+	 * @waitchk_mask:
+	 *
+	 * Bitmask of valid wait checks.
+	 */
+	__u32 waitchk_mask;
+
+	/**
+	 * @timeout:
+	 *
+	 * Timeout, in milliseconds, before this job is cancelled.
+	 */
+	__u32 timeout;
+
+	/**
+	 * @syncpts:
+	 *
+	 * A pointer to an array of &struct drm_tegra_syncpt structures that
+	 * specify the syncpoint operations performed as part of this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_syncpts.
+	 */
+	__u64 syncpts;
+
+	/**
+	 * @cmdbufs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_cmdbuf structures that
+	 * define the command buffers to execute as part of this job. The
+	 * number of elements in the array must be equal to the value given
+	 * by @num_syncpts.
+	 */
+	__u64 cmdbufs;
+
+	/**
+	 * @relocs:
+	 *
+	 * A pointer to an array of &struct drm_tegra_reloc structures that
+	 * specify the relocations that need to be performed before executing
+	 * this job. The number of elements in the array must be equal to the
+	 * value given by @num_relocs.
+	 */
+	__u64 relocs;
+
+	/**
+	 * @waitchks:
+	 *
+	 * A pointer to an array of &struct drm_tegra_waitchk structures that
+	 * specify the wait checks to be performed while executing this job.
+	 * The number of elements in the array must be equal to the value
+	 * given by @num_waitchks.
+	 */
+	__u64 waitchks;
+
+	/**
+	 * @fence:
+	 *
+	 * The threshold of the syncpoint associated with this job after it
+	 * has been completed. Set by the kernel upon successful completion of
+	 * the IOCTL. This can be used with the DRM_TEGRA_SYNCPT_WAIT IOCTL to
+	 * wait for this job to be finished.
+	 */
+	__u32 fence;
+
+	/**
+	 * @reserved:
+	 *
+	 * This field is reserved for future use. Must be 0.
+	 */
+	__u32 reserved[5];
 };

 #define DRM_TEGRA_GEM_TILING_MODE_PITCH 0
 #define DRM_TEGRA_GEM_TILING_MODE_TILED 1
 #define DRM_TEGRA_GEM_TILING_MODE_BLOCK 2

+/**
+ * struct drm_tegra_gem_set_tiling - parameters for the set tiling IOCTL
+ */
 struct drm_tegra_gem_set_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the tiling parameters.
+	 */
 	__u32 handle;
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode to set. Must be one of:
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_PITCH
+	 *   pitch linear format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_TILED
+	 *   16x16 tiling format
+	 *
+	 * DRM_TEGRA_GEM_TILING_MODE_BLOCK
+	 *   16Bx2 tiling format
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The value to set for the tiling mode parameter.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };

+/**
+ * struct drm_tegra_gem_get_tiling - parameters for the get tiling IOCTL
+ */
 struct drm_tegra_gem_get_tiling {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the tiling parameters.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @mode:
+	 *
+	 * The tiling mode currently associated with the GEM object. Set by
+	 * the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 mode;
+
+	/**
+	 * @value:
+	 *
+	 * The tiling mode parameter currently associated with the GEM object.
+	 * Set by the kernel upon successful completion of the IOCTL.
+	 */
 	__u32 value;
+
+	/**
+	 * @pad:
+	 *
+	 * Structure padding that may be used in the future. Must be 0.
+	 */
 	__u32 pad;
 };

 #define DRM_TEGRA_GEM_BOTTOM_UP		(1 << 0)
 #define DRM_TEGRA_GEM_FLAGS		(DRM_TEGRA_GEM_BOTTOM_UP)

+/**
+ * struct drm_tegra_gem_set_flags - parameters for the set flags IOCTL
+ */
 struct drm_tegra_gem_set_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to set the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags to set for the GEM object.
+	 */
 	__u32 flags;
 };

+/**
+ * struct drm_tegra_gem_get_flags - parameters for the get flags IOCTL
+ */
 struct drm_tegra_gem_get_flags {
-	/* input */
+	/**
+	 * @handle:
+	 *
+	 * Handle to the GEM object for which to query the flags.
+	 */
 	__u32 handle;
-	/* output */
+
+	/**
+	 * @flags:
+	 *
+	 * The flags currently associated with the GEM object. Set by the
+	 * kernel upon successful completion of the IOCTL.
+	 */
 	__u32 flags;
 };

@@ -193,7 +665,7 @@ struct drm_tegra_gem_get_flags {
 #define DRM_IOCTL_TEGRA_SYNCPT_INCR DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_INCR, struct drm_tegra_syncpt_incr)
 #define DRM_IOCTL_TEGRA_SYNCPT_WAIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SYNCPT_WAIT, struct drm_tegra_syncpt_wait)
 #define DRM_IOCTL_TEGRA_OPEN_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_OPEN_CHANNEL, struct drm_tegra_open_channel)
-#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_open_channel)
+#define DRM_IOCTL_TEGRA_CLOSE_CHANNEL DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_CLOSE_CHANNEL, struct drm_tegra_close_channel)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT, struct drm_tegra_get_syncpt)
 #define DRM_IOCTL_TEGRA_SUBMIT DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_SUBMIT, struct drm_tegra_submit)
 #define DRM_IOCTL_TEGRA_GET_SYNCPT_BASE DRM_IOWR(DRM_COMMAND_BASE + DRM_TEGRA_GET_SYNCPT_BASE, struct drm_tegra_get_syncpt_base)
--- a/include/drm-uapi/v3d_drm.h
+++ b/include/drm-uapi/v3d_drm.h
@@ -52,6 +52,14 @@ extern "C" {
 *
 * This asks the kernel to have the GPU execute an optional binner
 * command list, and a render command list.
+ *
+ * The L1T, slice, L2C, L2T, and GCA caches will be flushed before
+ * each CL executes.  The VCD cache should be flushed (if necessary)
+ * by the submitted CLs.  The TLB writes are guaranteed to have been
+ * flushed by the time the render done IRQ happens, which is the
+ * trigger for out_sync.  Any dirtying of cachelines by the job (only
+ * possible using TMU writes) must be flushed by the caller using the
+ * CL's cache flush commands.
 */
 struct drm_v3d_submit_cl {
 	/* Pointer to the binner command list.
--- a/include/meson.build
+++ b/include/meson.build
@@ -18,7 +18,6 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.

-inc_drm_uapi = include_directories('drm-uapi')
 inc_vulkan = include_directories('vulkan')
 inc_d3d9 = include_directories('D3D9')
 inc_gl_internal = include_directories('GL/internal')
@@ -94,14 +93,19 @@ if with_gallium_opencl and not with_opencl_icd
  install_headers(
    'CL/cl.h',
    'CL/cl.hpp',
+    'CL/cl2.hpp',
    'CL/cl_d3d10.h',
    'CL/cl_d3d11.h',
    'CL/cl_dx9_media_sharing.h',
+    'CL/cl_dx9_media_sharing_intel.h',
    'CL/cl_egl.h',
    'CL/cl_ext.h',
+    'CL/cl_ext_intel.h',
    'CL/cl_gl.h',
    'CL/cl_gl_ext.h',
    'CL/cl_platform.h',
+    'CL/cl_va_api_media_sharing_intel.h',
+    'CL/cl_version.h',
    'CL/opencl.h',
    subdir: 'CL'
  )
--- a/include/pci_ids/i965_pci_ids.h
+++ b/include/pci_ids/i965_pci_ids.h
@@ -1,3 +1,4 @@
+#ifndef IRIS
 CHIPSET(0x29A2, i965,    "Intel(R) 965G")
 CHIPSET(0x2992, i965,    "Intel(R) 965Q")
 CHIPSET(0x2982, i965,    "Intel(R) 965G")
@@ -91,6 +92,11 @@ CHIPSET(0x0F32, byt,     "Intel(R) Bay Trail")
 CHIPSET(0x0F33, byt,     "Intel(R) Bay Trail")
 CHIPSET(0x0157, byt,     "Intel(R) Bay Trail")
 CHIPSET(0x0155, byt,     "Intel(R) Bay Trail")
+CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
+CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
+CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
+CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
+#endif
 CHIPSET(0x1602, bdw_gt1, "Intel(R) Broadwell GT1")
 CHIPSET(0x1606, bdw_gt1, "Intel(R) Broadwell GT1")
 CHIPSET(0x160A, bdw_gt1, "Intel(R) Broadwell GT1")
@@ -109,10 +115,6 @@ CHIPSET(0x162A, bdw_gt3, "Intel(R) Iris Pro P6300 (Broadwell GT3e)")
 CHIPSET(0x162B, bdw_gt3, "Intel(R) Iris 6100 (Broadwell GT3)")
 CHIPSET(0x162D, bdw_gt3, "Intel(R) Broadwell GT3")
 CHIPSET(0x162E, bdw_gt3, "Intel(R) Broadwell GT3")
-CHIPSET(0x22B0, chv,     "Intel(R) HD Graphics (Cherrytrail)")
-CHIPSET(0x22B1, chv,     "Intel(R) HD Graphics XXX (Braswell)") /* Overridden in brw_get_renderer_string */
-CHIPSET(0x22B2, chv,     "Intel(R) HD Graphics (Cherryview)")
-CHIPSET(0x22B3, chv,     "Intel(R) HD Graphics (Cherryview)")
 CHIPSET(0x1902, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x1906, skl_gt1, "Intel(R) HD Graphics 510 (Skylake GT1)")
 CHIPSET(0x190A, skl_gt1, "Intel(R) Skylake GT1")
--- a/include/vulkan/vulkan_core.h
+++ b/include/vulkan/vulkan_core.h
@@ -43,7 +43,7 @@ extern "C" {
 #define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
 #define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
 // Version of this file
-#define VK_HEADER_VERSION 97
+#define VK_HEADER_VERSION 101


 #define VK_NULL_HANDLE 0
@@ -349,6 +349,8 @@ typedef enum VkStructureType {
    VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001,
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT = 1000101000,
    VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT = 1000101001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT = 1000102000,
+    VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT = 1000102001,
    VK_STRUCTURE_TYPE_HDR_METADATA_EXT = 1000105000,
    VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR = 1000109000,
    VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR = 1000109001,
@@ -431,6 +433,8 @@ typedef enum VkStructureType {
    VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_INFO_NV = 1000165012,
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_REPRESENTATIVE_FRAGMENT_TEST_FEATURES_NV = 1000166000,
    VK_STRUCTURE_TYPE_PIPELINE_REPRESENTATIVE_FRAGMENT_TEST_STATE_CREATE_INFO_NV = 1000166001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_IMAGE_FORMAT_INFO_EXT = 1000170000,
+    VK_STRUCTURE_TYPE_FILTER_CUBIC_IMAGE_VIEW_IMAGE_FORMAT_PROPERTIES_EXT = 1000170001,
    VK_STRUCTURE_TYPE_DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT = 1000174000,
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR = 1000177000,
    VK_STRUCTURE_TYPE_IMPORT_MEMORY_HOST_POINTER_INFO_EXT = 1000178000,
@@ -466,11 +470,15 @@ typedef enum VkStructureType {
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT = 1000237000,
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT = 1000238000,
    VK_STRUCTURE_TYPE_MEMORY_PRIORITY_ALLOCATE_INFO_EXT = 1000238001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEDICATED_ALLOCATION_IMAGE_ALIASING_FEATURES_NV = 1000240000,
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT = 1000244000,
    VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO_EXT = 1000244001,
    VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_CREATE_INFO_EXT = 1000244002,
    VK_STRUCTURE_TYPE_IMAGE_STENCIL_USAGE_CREATE_INFO_EXT = 1000246000,
    VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT = 1000247000,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_FEATURES_NV = 1000249000,
+    VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_NV = 1000249001,
+    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COOPERATIVE_MATRIX_PROPERTIES_NV = 1000249002,
    VK_STRUCTURE_TYPE_DEBUG_REPORT_CREATE_INFO_EXT = VK_STRUCTURE_TYPE_DEBUG_REPORT_CALLBACK_CREATE_INFO_EXT,
    VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO_KHR = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO,
    VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES_KHR = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES,
@@ -1141,6 +1149,7 @@ typedef enum VkFilter {
    VK_FILTER_NEAREST = 0,
    VK_FILTER_LINEAR = 1,
    VK_FILTER_CUBIC_IMG = 1000015000,
+    VK_FILTER_CUBIC_EXT = VK_FILTER_CUBIC_IMG,
    VK_FILTER_BEGIN_RANGE = VK_FILTER_NEAREST,
    VK_FILTER_END_RANGE = VK_FILTER_LINEAR,
    VK_FILTER_RANGE_SIZE = (VK_FILTER_LINEAR - VK_FILTER_NEAREST + 1),
@@ -1352,6 +1361,7 @@ typedef enum VkFormatFeatureFlagBits {
    VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT_KHR = VK_FORMAT_FEATURE_SAMPLED_IMAGE_YCBCR_CONVERSION_CHROMA_RECONSTRUCTION_EXPLICIT_FORCEABLE_BIT,
    VK_FORMAT_FEATURE_DISJOINT_BIT_KHR = VK_FORMAT_FEATURE_DISJOINT_BIT,
    VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT_KHR = VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT,
+    VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_EXT = VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_CUBIC_BIT_IMG,
    VK_FORMAT_FEATURE_FLAG_BITS_MAX_ENUM = 0x7FFFFFFF
 } VkFormatFeatureFlagBits;
 typedef VkFlags VkFormatFeatureFlags;
@@ -6244,7 +6254,7 @@ typedef struct VkPhysicalDeviceDepthStencilResolvePropertiesKHR {


 #define VK_KHR_vulkan_memory_model 1
-#define VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION 2
+#define VK_KHR_VULKAN_MEMORY_MODEL_SPEC_VERSION 3
 #define VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME "VK_KHR_vulkan_memory_model"

 typedef struct VkPhysicalDeviceVulkanMemoryModelFeaturesKHR {
@@ -6252,6 +6262,7 @@ typedef struct VkPhysicalDeviceVulkanMemoryModelFeaturesKHR {
    void*              pNext;
    VkBool32           vulkanMemoryModel;
    VkBool32           vulkanMemoryModelDeviceScope;
+    VkBool32           vulkanMemoryModelAvailabilityVisibilityChains;
 } VkPhysicalDeviceVulkanMemoryModelFeaturesKHR;


@@ -7456,6 +7467,27 @@ typedef struct VkPipelineRasterizationConservativeStateCreateInfoEXT {



+#define VK_EXT_depth_clip_enable 1
+#define VK_EXT_DEPTH_CLIP_ENABLE_SPEC_VERSION 1
+#define VK_EXT_DEPTH_CLIP_ENABLE_EXTENSION_NAME "VK_EXT_depth_clip_enable"
+
+typedef VkFlags VkPipelineRasterizationDepthClipStateCreateFlagsEXT;
+
+typedef struct VkPhysicalDeviceDepthClipEnableFeaturesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           depthClipEnable;
+} VkPhysicalDeviceDepthClipEnableFeaturesEXT;
+
+typedef struct VkPipelineRasterizationDepthClipStateCreateInfoEXT {
+    VkStructureType                                        sType;
+    const void*                                            pNext;
+    VkPipelineRasterizationDepthClipStateCreateFlagsEXT    flags;
+    VkBool32                                               depthClipEnable;
+} VkPipelineRasterizationDepthClipStateCreateInfoEXT;
+
+
+
 #define VK_EXT_swapchain_colorspace 1
 #define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 3
 #define VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME "VK_EXT_swapchain_colorspace"
@@ -8551,6 +8583,25 @@ typedef struct VkPipelineRepresentativeFragmentTestStateCreateInfoNV {



+#define VK_EXT_filter_cubic 1
+#define VK_EXT_FILTER_CUBIC_SPEC_VERSION  1
+#define VK_EXT_FILTER_CUBIC_EXTENSION_NAME "VK_EXT_filter_cubic"
+
+typedef struct VkPhysicalDeviceImageViewImageFormatInfoEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkImageViewType    imageViewType;
+} VkPhysicalDeviceImageViewImageFormatInfoEXT;
+
+typedef struct VkFilterCubicImageViewImageFormatPropertiesEXT {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           filterCubic;
+    VkBool32           filterCubicMinmax ;
+} VkFilterCubicImageViewImageFormatPropertiesEXT;
+
+
+
 #define VK_EXT_global_priority 1
 #define VK_EXT_GLOBAL_PRIORITY_SPEC_VERSION 2
 #define VK_EXT_GLOBAL_PRIORITY_EXTENSION_NAME "VK_EXT_global_priority"
@@ -9003,6 +9054,18 @@ typedef struct VkMemoryPriorityAllocateInfoEXT {



+#define VK_NV_dedicated_allocation_image_aliasing 1
+#define VK_NV_DEDICATED_ALLOCATION_IMAGE_ALIASING_SPEC_VERSION 1
+#define VK_NV_DEDICATED_ALLOCATION_IMAGE_ALIASING_EXTENSION_NAME "VK_NV_dedicated_allocation_image_aliasing"
+
+typedef struct VkPhysicalDeviceDedicatedAllocationImageAliasingFeaturesNV {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           dedicatedAllocationImageAliasing;
+} VkPhysicalDeviceDedicatedAllocationImageAliasingFeaturesNV;
+
+
+
 #define VK_EXT_buffer_device_address 1
 typedef uint64_t VkDeviceAddress;

@@ -9089,6 +9152,76 @@ typedef struct VkValidationFeaturesEXT {



+#define VK_NV_cooperative_matrix 1
+#define VK_NV_COOPERATIVE_MATRIX_SPEC_VERSION 1
+#define VK_NV_COOPERATIVE_MATRIX_EXTENSION_NAME "VK_NV_cooperative_matrix"
+
+
+typedef enum VkComponentTypeNV {
+    VK_COMPONENT_TYPE_FLOAT16_NV = 0,
+    VK_COMPONENT_TYPE_FLOAT32_NV = 1,
+    VK_COMPONENT_TYPE_FLOAT64_NV = 2,
+    VK_COMPONENT_TYPE_SINT8_NV = 3,
+    VK_COMPONENT_TYPE_SINT16_NV = 4,
+    VK_COMPONENT_TYPE_SINT32_NV = 5,
+    VK_COMPONENT_TYPE_SINT64_NV = 6,
+    VK_COMPONENT_TYPE_UINT8_NV = 7,
+    VK_COMPONENT_TYPE_UINT16_NV = 8,
+    VK_COMPONENT_TYPE_UINT32_NV = 9,
+    VK_COMPONENT_TYPE_UINT64_NV = 10,
+    VK_COMPONENT_TYPE_BEGIN_RANGE_NV = VK_COMPONENT_TYPE_FLOAT16_NV,
+    VK_COMPONENT_TYPE_END_RANGE_NV = VK_COMPONENT_TYPE_UINT64_NV,
+    VK_COMPONENT_TYPE_RANGE_SIZE_NV = (VK_COMPONENT_TYPE_UINT64_NV - VK_COMPONENT_TYPE_FLOAT16_NV + 1),
+    VK_COMPONENT_TYPE_MAX_ENUM_NV = 0x7FFFFFFF
+} VkComponentTypeNV;
+
+typedef enum VkScopeNV {
+    VK_SCOPE_DEVICE_NV = 1,
+    VK_SCOPE_WORKGROUP_NV = 2,
+    VK_SCOPE_SUBGROUP_NV = 3,
+    VK_SCOPE_QUEUE_FAMILY_NV = 5,
+    VK_SCOPE_BEGIN_RANGE_NV = VK_SCOPE_DEVICE_NV,
+    VK_SCOPE_END_RANGE_NV = VK_SCOPE_QUEUE_FAMILY_NV,
+    VK_SCOPE_RANGE_SIZE_NV = (VK_SCOPE_QUEUE_FAMILY_NV - VK_SCOPE_DEVICE_NV + 1),
+    VK_SCOPE_MAX_ENUM_NV = 0x7FFFFFFF
+} VkScopeNV;
+
+typedef struct VkCooperativeMatrixPropertiesNV {
+    VkStructureType      sType;
+    void*                pNext;
+    uint32_t             MSize;
+    uint32_t             NSize;
+    uint32_t             KSize;
+    VkComponentTypeNV    AType;
+    VkComponentTypeNV    BType;
+    VkComponentTypeNV    CType;
+    VkComponentTypeNV    DType;
+    VkScopeNV            scope;
+} VkCooperativeMatrixPropertiesNV;
+
+typedef struct VkPhysicalDeviceCooperativeMatrixFeaturesNV {
+    VkStructureType    sType;
+    void*              pNext;
+    VkBool32           cooperativeMatrix;
+    VkBool32           cooperativeMatrixRobustBufferAccess;
+} VkPhysicalDeviceCooperativeMatrixFeaturesNV;
+
+typedef struct VkPhysicalDeviceCooperativeMatrixPropertiesNV {
+    VkStructureType       sType;
+    void*                 pNext;
+    VkShaderStageFlags    cooperativeMatrixSupportedStages;
+} VkPhysicalDeviceCooperativeMatrixPropertiesNV;
+
+
+typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceCooperativeMatrixPropertiesNV)(VkPhysicalDevice physicalDevice, uint32_t* pPropertyCount, VkCooperativeMatrixPropertiesNV* pProperties);
+
+#ifndef VK_NO_PROTOTYPES
+VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceCooperativeMatrixPropertiesNV(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t*                                   pPropertyCount,
+    VkCooperativeMatrixPropertiesNV*            pProperties);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
--- a/meson.build
+++ b/meson.build
@@ -132,7 +132,7 @@ if _drivers.contains('auto')
    elif ['arm', 'aarch64'].contains(host_machine.cpu_family())
      _drivers = [
        'kmsro', 'v3d', 'vc4', 'freedreno', 'etnaviv', 'nouveau',
-        'tegra', 'virgl', 'swrast',
+        'tegra', 'virgl', 'swrast'
      ]
    else
      error('Unknown architecture @0@. Please pass -Dgallium-drivers to set driver options. Patches gladly accepted to fix this.'.format(
@@ -154,8 +154,10 @@ with_gallium_freedreno = _drivers.contains('freedreno')
 with_gallium_softpipe = _drivers.contains('swrast')
 with_gallium_vc4 = _drivers.contains('vc4')
 with_gallium_v3d = _drivers.contains('v3d')
+with_gallium_panfrost = _drivers.contains('panfrost')
 with_gallium_etnaviv = _drivers.contains('etnaviv')
 with_gallium_tegra = _drivers.contains('tegra')
+with_gallium_iris = _drivers.contains('iris')
 with_gallium_i915 = _drivers.contains('i915')
 with_gallium_svga = _drivers.contains('svga')
 with_gallium_virgl = _drivers.contains('virgl')
@@ -209,8 +211,8 @@ endif
 if with_dri_i915 and with_gallium_i915
  error('Only one i915 provider can be built')
 endif
-if with_gallium_kmsro and not (with_gallium_vc4 or with_gallium_etnaviv or with_gallium_freedreno)
-  error('kmsro driver requires one or more renderonly drivers (vc4, etnaviv, freedreno)')
+if with_gallium_kmsro and not (with_gallium_vc4 or with_gallium_etnaviv or with_gallium_freedreno or with_gallium_panfrost)
+  error('kmsro driver requires one or more renderonly drivers (vc4, etnaviv, freedreno, panfrost)')
 endif
 if with_gallium_tegra and not with_gallium_nouveau
  error('tegra driver requires nouveau driver')
@@ -616,7 +618,8 @@ if with_gallium_st_nine
  if not with_gallium_softpipe
    error('The nine state tracker requires gallium softpipe/llvmpipe.')
  elif not (with_gallium_radeonsi or with_gallium_nouveau or with_gallium_r600
-            or with_gallium_r300 or with_gallium_svga or with_gallium_i915)
+            or with_gallium_r300 or with_gallium_svga or with_gallium_i915
+            or with_gallium_iris)
    error('The nine state tracker requires at least one non-swrast gallium driver.')
  endif
  if not with_dri3
@@ -1213,7 +1216,6 @@ if _llvm != 'false'
      with_gallium_opencl or _llvm == 'true'
    ),
    static : not _shared_llvm,
-    method : 'config-tool',
  )
  with_llvm = dep_llvm.found()
 endif
@@ -1388,14 +1390,12 @@ if with_platform_x11
      dep_xshmfence = dependency('xshmfence', version : '>= 1.1')
    endif
  endif
-  if with_glx == 'dri' or with_glx == 'gallium-xlib'
-    dep_glproto = dependency('glproto', version : '>= 1.4.14')
-  endif
-  if with_glx == 'dri' 
+  if with_glx == 'dri'
    if with_dri_platform == 'drm'
      dep_dri2proto = dependency('dri2proto', version : '>= 2.8')
      dep_xxf86vm = dependency('xxf86vm')
    endif
+    dep_glproto = dependency('glproto', version : '>= 1.4.14')
  endif
  if (with_egl or (
      with_gallium_vdpau or with_gallium_xvmc or with_gallium_xa or
@@ -1467,6 +1467,10 @@ pkg = import('pkgconfig')
 env_test = environment()
 env_test.set('NM', find_program('nm').path())

+# This quirk needs to be applied to sources with functions defined in assembly
+# as GCC LTO drops them. See: https://bugs.freedesktop.org/show_bug.cgi?id=109391
+gcc_lto_quirk = (cc.get_id() == 'gcc') ? ['-fno-lto'] : []
+
 subdir('include')
 subdir('bin')
 subdir('src')
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -60,7 +60,7 @@ option(
  choices : [
    '', 'auto', 'kmsro', 'radeonsi', 'r300', 'r600', 'nouveau', 'freedreno',
    'swrast', 'v3d', 'vc4', 'etnaviv', 'tegra', 'i915', 'svga', 'virgl',
-    'swr',
+    'swr', 'panfrost', 'iris'
  ],
  description : 'List of gallium drivers to build. If this is set to auto all drivers applicable to the target OS/architecture will be built'
 )
@@ -167,6 +167,12 @@ option(
  value : '',
  description : 'Location relative to prefix to put vulkan icds on install. Default: $datadir/vulkan/icd.d'
 )
+option(
+  'vulkan-overlay-layer',
+  type : 'boolean',
+  value : false,
+  description : 'Whether to build the vulkan overlay layer'
+)
 option(
  'shared-glapi',
  type : 'boolean',
--- a/scons/custom.py
+++ b/scons/custom.py
@@ -48,12 +48,7 @@ import source_list
 # a path directly. We want to support both, so we need to detect the SCons version,
 # for which no API is provided by SCons 8-P

-# Scons version string has consistently been in this format:
-# MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
-# so this formula should cover all versions regardless of type
-# stable, alpha or beta.
-# For simplicity alpha and beta flags are removed.
-scons_version = tuple(map(int, SCons.__version__.split('.')[:3]))
+scons_version = tuple(map(int, SCons.__version__.split('.')))

 def quietCommandLines(env):
    # Quiet command lines
--- a/scons/gallium.py
+++ b/scons/gallium.py
@@ -308,20 +308,7 @@ def generate(env):
    if env.GetOption('num_jobs') <= 1:
        env.SetOption('num_jobs', num_jobs())

-    # Speed up dependency checking.  See
-    # - https://github.com/SCons/scons/wiki/GoFastButton
-    # - https://bugs.freedesktop.org/show_bug.cgi?id=109443
-
-    # Scons version string has consistently been in this format:
-    # MajorVersion.MinorVersion.Patch[.alpha/beta.yyyymmdd]
-    # so this formula should cover all versions regardless of type
-    # stable, alpha or beta.
-    # For simplicity alpha and beta flags are removed.
-
-    scons_version = distutils.version.StrictVersion('.'.join(SCons.__version__.split('.')[:3]))
-    if scons_version < distutils.version.StrictVersion('3.0.2') or \
-       scons_version > distutils.version.StrictVersion('3.0.4'):
-        env.Decider('MD5-timestamp')
+    env.Decider('MD5-timestamp')
    env.SetOption('max_drift', 60)

    # C preprocessor options
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -136,3 +136,18 @@ libglsl_util_la_SOURCES = \
 	mesa/program/prog_parameter.c \
 	mesa/program/symbol_table.c \
 	mesa/program/dummy_errors.c
+
+EXTRA_DIST += \
+	tools/imgui/imconfig.h \
+	tools/imgui/imgui.cpp \
+	tools/imgui/imgui.h \
+	tools/imgui/imgui_draw.cpp \
+	tools/imgui/imgui_demo.cpp \
+	tools/imgui/imgui_internal.h \
+	tools/imgui/imgui_memory_editor.h \
+	tools/imgui/stb_rect_pack.h \
+	tools/imgui/stb_textedit.h \
+	tools/imgui/stb_truetype.h \
+	tools/imgui/README \
+	tools/imgui/LICENSE.txt \
+	tools/imgui/meson.build
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -367,7 +367,9 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 	info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
 	info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
 	info->has_ctx_priority = info->drm_minor >= 22;
-	info->has_local_buffers = info->drm_minor >= 20;
+	/* TODO: Enable this once the kernel handles it efficiently. */
+	info->has_local_buffers = info->drm_minor >= 20 &&
+				  !info->has_dedicated_vram;
 	info->kernel_flushes_hdp_before_ib = true;
 	info->htile_cmask_support_1d_tiling = true;
 	info->si_TA_CS_BC_BASE_ADDR_allowed = true;
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -172,6 +172,12 @@ static inline unsigned ac_get_max_simd_waves(enum radeon_family family)
 	}
 }

+static inline uint32_t
+ac_get_num_physical_sgprs(enum chip_class chip_class)
+{
+	return chip_class >= VI ? 800 : 512;
+}
+
 #ifdef __cplusplus
 }
 #endif
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -219,6 +219,16 @@ ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
 		return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
 		                      LLVMGetVectorSize(t));
 	}
+	if (LLVMGetTypeKind(t) == LLVMPointerTypeKind) {
+		switch (LLVMGetPointerAddressSpace(t)) {
+		case AC_ADDR_SPACE_GLOBAL:
+			return ctx->i64;
+		case AC_ADDR_SPACE_LDS:
+			return ctx->i32;
+		default:
+			unreachable("unhandled address space");
+		}
+	}
 	return to_integer_type_scalar(ctx, t);
 }

@@ -226,6 +236,9 @@ LLVMValueRef
 ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
 {
 	LLVMTypeRef type = LLVMTypeOf(v);
+	if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
+		return LLVMBuildPtrToInt(ctx->builder, v, ac_to_integer_type(ctx, type), "");
+	}
 	return LLVMBuildBitCast(ctx->builder, v, ac_to_integer_type(ctx, type), "");
 }

@@ -535,10 +548,11 @@ ac_build_gather_values(struct ac_llvm_context *ctx,
 /* Expand a scalar or vector to <dst_channels x type> by filling the remaining
 * channels with undef. Extract at most src_channels components from the input.
 */
-LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
-			     LLVMValueRef value,
-			     unsigned src_channels,
-			     unsigned dst_channels)
+static LLVMValueRef
+ac_build_expand(struct ac_llvm_context *ctx,
+		LLVMValueRef value,
+		unsigned src_channels,
+		unsigned dst_channels)
 {
 	LLVMTypeRef elemtype;
 	LLVMValueRef chan[dst_channels];
@@ -606,7 +620,7 @@ ac_build_fdiv(struct ac_llvm_context *ctx,
 	 * If we do (num * (1 / den)), LLVM does:
 	 *    return num * v_rcp_f32(den);
 	 */
-	LLVMValueRef one = LLVMTypeOf(num) == ctx->f64 ? ctx->f64_1 : ctx->f32_1;
+	LLVMValueRef one = LLVMConstReal(LLVMTypeOf(num), 1.0);
 	LLVMValueRef rcp = LLVMBuildFDiv(ctx->builder, one, den, "");
 	LLVMValueRef ret = LLVMBuildFMul(ctx->builder, num, rcp, "");

@@ -1364,23 +1378,74 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
 				LLVMValueRef immoffset,
 				LLVMValueRef glc)
 {
-	const char *name = "llvm.amdgcn.tbuffer.load.i32";
-	LLVMTypeRef type = ctx->i32;
-	LLVMValueRef params[] = {
-				rsrc,
-				vindex,
-				voffset,
-				soffset,
-				immoffset,
-				LLVMConstInt(ctx->i32, V_008F0C_BUF_DATA_FORMAT_16, false),
-				LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, false),
-				glc,
-				ctx->i1false,
-	};
-	LLVMValueRef res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
+	unsigned dfmt = V_008F0C_BUF_DATA_FORMAT_16;
+	unsigned nfmt = V_008F0C_BUF_NUM_FORMAT_UINT;
+	LLVMValueRef res;
+
+	if (HAVE_LLVM >= 0x0800) {
+		voffset = LLVMBuildAdd(ctx->builder, voffset, immoffset, "");
+
+		res = ac_build_llvm8_tbuffer_load(ctx, rsrc, vindex, voffset,
+						  soffset, 1, dfmt, nfmt, glc,
+						  false, true, true);
+	} else {
+		const char *name = "llvm.amdgcn.tbuffer.load.i32";
+		LLVMTypeRef type = ctx->i32;
+		LLVMValueRef params[] = {
+					rsrc,
+					vindex,
+					voffset,
+					soffset,
+					immoffset,
+					LLVMConstInt(ctx->i32, dfmt, false),
+					LLVMConstInt(ctx->i32, nfmt, false),
+					glc,
+					ctx->i1false,
+		};
+		res = ac_build_intrinsic(ctx, name, type, params, 9, 0);
+	}
+
 	return LLVMBuildTrunc(ctx->builder, res, ctx->i16, "");
 }

+LLVMValueRef
+ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
+			    LLVMValueRef rsrc,
+			    LLVMValueRef vindex,
+			    LLVMValueRef voffset,
+			    LLVMValueRef soffset,
+			    unsigned num_channels,
+			    unsigned dfmt,
+			    unsigned nfmt,
+			    bool glc,
+			    bool slc,
+			    bool can_speculate,
+			    bool structurized)
+{
+	LLVMValueRef args[6];
+	int idx = 0;
+	args[idx++] = LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, "");
+	if (structurized)
+		args[idx++] = vindex ? vindex : ctx->i32_0;
+	args[idx++] = voffset ? voffset : ctx->i32_0;
+	args[idx++] = soffset ? soffset : ctx->i32_0;
+	args[idx++] = LLVMConstInt(ctx->i32, dfmt | (nfmt << 4), 0);
+	args[idx++] = LLVMConstInt(ctx->i32, (glc ? 1 : 0) + (slc ? 2 : 0), 0);
+	unsigned func = CLAMP(num_channels, 1, 3) - 1;
+
+	LLVMTypeRef types[] = {ctx->i32, ctx->v2i32, ctx->v4i32};
+	const char *type_names[] = {"i32", "v2i32", "v4i32"};
+	const char *indexing_kind = structurized ? "struct" : "raw";
+	char name[256];
+
+	snprintf(name, sizeof(name), "llvm.amdgcn.%s.tbuffer.load.%s",
+		 indexing_kind, type_names[func]);
+
+	return ac_build_intrinsic(ctx, name, types[func], args,
+				  idx,
+				  ac_get_load_intr_attribs(can_speculate));
+}
+
 /**
 * Set range metadata on an instruction.  This can only be used on load and
 * call instructions.  If you know an instruction can only produce the values
@@ -1570,16 +1635,20 @@ ac_build_umsb(struct ac_llvm_context *ctx,
 LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
 			   LLVMValueRef b)
 {
+	char name[64];
+	snprintf(name, sizeof(name), "llvm.minnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
 	LLVMValueRef args[2] = {a, b};
-	return ac_build_intrinsic(ctx, "llvm.minnum.f32", ctx->f32, args, 2,
+	return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
 				  AC_FUNC_ATTR_READNONE);
 }

 LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
 			   LLVMValueRef b)
 {
+	char name[64];
+	snprintf(name, sizeof(name), "llvm.maxnum.f%d", ac_get_elem_bits(ctx, LLVMTypeOf(a)));
 	LLVMValueRef args[2] = {a, b};
-	return ac_build_intrinsic(ctx, "llvm.maxnum.f32", ctx->f32, args, 2,
+	return ac_build_intrinsic(ctx, name, LLVMTypeOf(a), args, 2,
 				  AC_FUNC_ATTR_READNONE);
 }

@@ -1606,8 +1675,9 @@ LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a,

 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
 {
-	return ac_build_fmin(ctx, ac_build_fmax(ctx, value, ctx->f32_0),
-			     ctx->f32_1);
+	LLVMTypeRef t = LLVMTypeOf(value);
+	return ac_build_fmin(ctx, ac_build_fmax(ctx, value, LLVMConstReal(t, 0.0)),
+			     LLVMConstReal(t, 1.0));
 }

 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
@@ -2039,30 +2109,11 @@ LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
 LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
 			    unsigned bitsize)
 {
-	LLVMValueRef cmp, val, zero, one;
-	LLVMTypeRef type;
-
-	switch (bitsize) {
-	case 64:
-		type = ctx->i64;
-		zero = ctx->i64_0;
-		one = ctx->i64_1;
-		break;
-	case 32:
-		type = ctx->i32;
-		zero = ctx->i32_0;
-		one = ctx->i32_1;
-		break;
-	case 16:
-		type = ctx->i16;
-		zero = ctx->i16_0;
-		one = ctx->i16_1;
-		break;
-	default:
-		unreachable(!"invalid bitsize");
-		break;
-	}
+	LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, bitsize);
+	LLVMValueRef zero = LLVMConstInt(type, 0, false);
+	LLVMValueRef one = LLVMConstInt(type, 1, false);

+	LLVMValueRef cmp, val;
 	cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
 	val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
 	cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -171,9 +171,6 @@ LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
 		       LLVMValueRef *values,
 		       unsigned value_count);
-LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
-			     LLVMValueRef value,
-			     unsigned src_channels, unsigned dst_channels);
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
 				     LLVMValueRef value,
 				     unsigned num_channels);
@@ -309,6 +306,20 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
 				LLVMValueRef immoffset,
 				LLVMValueRef glc);

+LLVMValueRef
+ac_build_llvm8_tbuffer_load(struct ac_llvm_context *ctx,
+			    LLVMValueRef rsrc,
+			    LLVMValueRef vindex,
+			    LLVMValueRef voffset,
+			    LLVMValueRef soffset,
+			    unsigned num_channels,
+			    unsigned dfmt,
+			    unsigned nfmt,
+			    bool glc,
+			    bool slc,
+			    bool can_speculate,
+			    bool structurized);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);

--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -657,8 +657,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		break;
 	case nir_op_frcp:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
-		result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
-				       src[0]);
+		result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
 		break;
 	case nir_op_iand:
 		result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
@@ -789,8 +788,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 	case nir_op_frsq:
 		result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
 		                              ac_to_float_type(&ctx->ac, def_type), src[0]);
-		result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
-				       result);
+		result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
 		break;
 	case nir_op_frexp_exp:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
@@ -803,6 +801,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64",
 					    ctx->ac.f64, src, 1, AC_FUNC_ATTR_READNONE);
 		break;
+	case nir_op_fpow:
+		result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
+		                              ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
+		break;
 	case nir_op_fmax:
 		result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
 		                              ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
@@ -831,8 +833,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		break;
 	case nir_op_ldexp:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
-		if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32)
+		if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
 			result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
+		else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
+			result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
 		else
 			result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
 		break;
@@ -884,6 +888,8 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		break;
 	case nir_op_f2f16_rtz:
 		src[0] = ac_to_float(&ctx->ac, src[0]);
+		if (LLVMTypeOf(src[0]) == ctx->ac.f64)
+			src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
 		LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
 		result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
 		result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
@@ -1019,17 +1025,10 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		LLVMValueRef in[3];
 		for (unsigned chan = 0; chan < 3; chan++)
 			in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
-		results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
+		results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
 						ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
-		results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
+		results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
 						ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
-		LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
-						     ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
-		results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
-		results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
-		LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
-		results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
-		results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
 		result = ac_build_gather_values(&ctx->ac, results, 2);
 		break;
 	}
@@ -1121,6 +1120,10 @@ static void visit_load_const(struct ac_nir_context *ctx,

 	for (unsigned i = 0; i < instr->def.num_components; ++i) {
 		switch (instr->def.bit_size) {
+		case 8:
+			values[i] = LLVMConstInt(element_type,
+			                         instr->value.u8[i], false);
+			break;
 		case 16:
 			values[i] = LLVMConstInt(element_type,
 			                         instr->value.u16[i], false);
@@ -1399,10 +1402,31 @@ static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
                                             nir_intrinsic_instr *instr)
 {
 	LLVMValueRef ptr, addr;
+	LLVMValueRef src0 = get_src(ctx, instr->src[0]);
+	unsigned index = nir_intrinsic_base(instr);

-	addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
-	addr = LLVMBuildAdd(ctx->ac.builder, addr,
-			    get_src(ctx, instr->src[0]), "");
+	addr = LLVMConstInt(ctx->ac.i32, index, 0);
+	addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
+
+	/* Load constant values from user SGPRS when possible, otherwise
+	 * fallback to the default path that loads directly from memory.
+	 */
+	if (LLVMIsConstant(src0) &&
+	    instr->dest.ssa.bit_size == 32) {
+		unsigned count = instr->dest.ssa.num_components;
+		unsigned offset = index;
+
+		offset += LLVMConstIntGetZExtValue(src0);
+		offset /= 4;
+
+		offset -= ctx->abi->base_inline_push_consts;
+
+		if (offset + count <= ctx->abi->num_inline_push_consts) {
+			return ac_build_gather_values(&ctx->ac,
+						      ctx->abi->inline_push_consts + offset,
+						      count);
+		}
+	}

 	ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr);

@@ -1885,10 +1909,19 @@ static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
 	return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
 }

+static unsigned
+type_scalar_size_bytes(const struct glsl_type *type)
+{
+   assert(glsl_type_is_vector_or_scalar(type) ||
+          glsl_type_is_matrix(type));
+   return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
+}
+
 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 				   nir_intrinsic_instr *instr)
 {
-	nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
+	nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
+	nir_variable *var = nir_deref_instr_get_variable(deref);

 	LLVMValueRef values[8];
 	int idx = 0;
@@ -1898,7 +1931,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 	LLVMValueRef ret;
 	unsigned const_index;
 	unsigned stride = 4;
-	int mode = nir_var_mem_shared;
+	int mode = deref->mode;
 	
 	if (var) {
 		bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
@@ -1907,7 +1940,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 		comp = var->data.location_frac;
 		mode = var->data.mode;

-		get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), vs_in, NULL, NULL,
+		get_deref_offset(ctx, deref, vs_in, NULL, NULL,
 				 &const_index, &indir_index);

 		if (var->data.compact) {
@@ -1917,7 +1950,10 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 		}
 	}

-	if (instr->dest.ssa.bit_size == 64)
+	if (instr->dest.ssa.bit_size == 64 &&
+	    (deref->mode == nir_var_shader_in ||
+	     deref->mode == nir_var_shader_out ||
+	     deref->mode == nir_var_function_temp))
 		ve *= 2;

 	switch (mode) {
@@ -1931,8 +1967,8 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 			LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
 			LLVMValueRef indir_index;
 			unsigned const_index, vertex_index;
-			get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
-			                 false, &vertex_index, NULL, &const_index, &indir_index);
+			get_deref_offset(ctx, deref, false, &vertex_index, NULL,
+			                 &const_index, &indir_index);

 			return ctx->abi->load_inputs(ctx->abi, var->data.location,
 						     var->data.driver_location,
@@ -2006,6 +2042,32 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
 			}
 		}
 		break;
+	case nir_var_mem_global:  {
+		LLVMValueRef address = get_src(ctx, instr->src[0]);
+		unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+		unsigned natural_stride = type_scalar_size_bytes(deref->type);
+		unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+
+		LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
+		if (stride != natural_stride) {
+			LLVMTypeRef ptr_type =  LLVMPointerType(LLVMGetElementType(result_type),
+			                                        LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+			address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+
+			for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
+				LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
+				values[i] = LLVMBuildLoad(ctx->ac.builder,
+				                          ac_build_gep_ptr(&ctx->ac, address, offset), "");
+			}
+			return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
+		} else {
+			LLVMTypeRef ptr_type =  LLVMPointerType(result_type,
+			                                        LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+			address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+			LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
+			return val;
+		}
+	}
 	default:
 		unreachable("unhandle variable mode");
 	}
@@ -2040,7 +2102,9 @@ visit_store_var(struct ac_nir_context *ctx,
 		}
 	}

-	if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
+	if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
+	    (deref->mode == nir_var_shader_out ||
+	     deref->mode == nir_var_function_temp)) {

 		src = LLVMBuildBitCast(ctx->ac.builder, src,
 		                       LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
@@ -2124,33 +2188,52 @@ visit_store_var(struct ac_nir_context *ctx,
 			}
 		}
 		break;
+
+	case nir_var_mem_global:
 	case nir_var_mem_shared: {
 		int writemask = instr->const_index[0];
 		LLVMValueRef address = get_src(ctx, instr->src[0]);
 		LLVMValueRef val = get_src(ctx, instr->src[1]);
-		if (writemask == (1u << ac_get_llvm_num_components(val)) - 1) {
-			val = LLVMBuildBitCast(
-			   ctx->ac.builder, val,
-			   LLVMGetElementType(LLVMTypeOf(address)), "");
+
+		unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
+		unsigned natural_stride = type_scalar_size_bytes(deref->type);
+		unsigned stride = explicit_stride ? explicit_stride : natural_stride;
+
+		LLVMTypeRef ptr_type =  LLVMPointerType(LLVMTypeOf(val),
+							LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+		address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+
+		if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
+		    stride == natural_stride) {
+			LLVMTypeRef ptr_type =  LLVMPointerType(LLVMTypeOf(val),
+			                                        LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+			address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
+
+			val = LLVMBuildBitCast(ctx->ac.builder, val,
+			                       LLVMGetElementType(LLVMTypeOf(address)), "");
 			LLVMBuildStore(ctx->ac.builder, val, address);
 		} else {
+			LLVMTypeRef ptr_type =  LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)),
+			                                        LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
+			address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
 			for (unsigned chan = 0; chan < 4; chan++) {
 				if (!(writemask & (1 << chan)))
 					continue;
-				LLVMValueRef ptr =
-					LLVMBuildStructGEP(ctx->ac.builder,
-							   address, chan, "");
+
+				LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
+
+				LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
 				LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
 									chan);
-				src = LLVMBuildBitCast(
-				   ctx->ac.builder, src,
-				   LLVMGetElementType(LLVMTypeOf(ptr)), "");
+				src = LLVMBuildBitCast(ctx->ac.builder, src,
+				                       LLVMGetElementType(LLVMTypeOf(ptr)), "");
 				LLVMBuildStore(ctx->ac.builder, src, ptr);
 			}
 		}
 		break;
 	}
 	default:
+		abort();
 		break;
 	}
 }
@@ -2359,12 +2442,10 @@ static void get_image_coords(struct ac_nir_context *ctx,
 }

 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
-                                                const nir_intrinsic_instr *instr,
-						bool write, bool atomic)
+                                                const nir_intrinsic_instr *instr, bool write)
 {
 	LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
-	if (ctx->abi->gfx9_stride_size_workaround ||
-	    (ctx->abi->gfx9_stride_size_workaround_for_atomic && atomic)) {
+	if (ctx->abi->gfx9_stride_size_workaround) {
 		LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
 		LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
 		stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
@@ -2397,7 +2478,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
 		unsigned num_channels = util_last_bit(mask);
 		LLVMValueRef rsrc, vindex;

-		rsrc = get_image_buffer_descriptor(ctx, instr, false, false);
+		rsrc = get_image_buffer_descriptor(ctx, instr, false);
 		vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 						 ctx->ac.i32_0, "");

@@ -2441,12 +2522,12 @@ static void visit_image_store(struct ac_nir_context *ctx,
 	if (dim == GLSL_SAMPLER_DIM_BUF) {
 		char name[48];
 		const char *types[] = { "f32", "v2f32", "v4f32" };
-		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true, false);
+		LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
 		LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
 		unsigned src_channels = ac_get_llvm_num_components(src);

 		if (src_channels == 3)
-			src = ac_build_expand(&ctx->ac, src, 3, 4);
+			src = ac_build_expand_to_vec4(&ctx->ac, src, 3);

 		params[0] = src; /* data */
 		params[1] = rsrc;
@@ -2537,14 +2618,11 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
 	params[param_count++] = get_src(ctx, instr->src[3]);

 	if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
-		params[param_count++] = get_image_buffer_descriptor(ctx, instr, true, true);
+		params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
 		params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
 								ctx->ac.i32_0, ""); /* vindex */
 		params[param_count++] = ctx->ac.i32_0; /* voffset */
-		if (HAVE_LLVM >= 0x900) {
-			/* XXX: The new raw/struct atomic intrinsics are buggy
-			 * with LLVM 8, see r358579.
-			 */
+		if (HAVE_LLVM >= 0x800) {
 			params[param_count++] = ctx->ac.i32_0; /* soffset */
 			params[param_count++] = ctx->ac.i32_0;  /* slc */

@@ -3105,8 +3183,7 @@ static void visit_intrinsic(struct ac_nir_context *ctx,
 			ctx->abi->frag_pos[2],
 			ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
 		};
-		result = ac_to_integer(&ctx->ac,
-		                       ac_build_gather_values(&ctx->ac, values, 4));
+		result = ac_build_gather_values(&ctx->ac, values, 4);
 		break;
 	}
 	case nir_intrinsic_load_front_face:
@@ -3915,7 +3992,8 @@ glsl_to_llvm_type(struct ac_llvm_context *ac,
 static void visit_deref(struct ac_nir_context *ctx,
                        nir_deref_instr *instr)
 {
-	if (instr->mode != nir_var_mem_shared)
+	if (instr->mode != nir_var_mem_shared &&
+	    instr->mode != nir_var_mem_global)
 		return;

 	LLVMValueRef result = NULL;
@@ -3926,22 +4004,79 @@ static void visit_deref(struct ac_nir_context *ctx,
 		break;
 	}
 	case nir_deref_type_struct:
-		result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
-		                       LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
+		if (instr->mode == nir_var_mem_global) {
+			nir_deref_instr *parent = nir_deref_instr_parent(instr);
+			uint64_t offset = glsl_get_struct_field_offset(parent->type,
+                                                                       instr->strct.index);
+			result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
+			                       LLVMConstInt(ctx->ac.i32, offset, 0));
+		} else {
+			result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
+			                       LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
+		}
 		break;
 	case nir_deref_type_array:
-		result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
-		                       get_src(ctx, instr->arr.index));
+		if (instr->mode == nir_var_mem_global) {
+			nir_deref_instr *parent = nir_deref_instr_parent(instr);
+			unsigned stride = glsl_get_explicit_stride(parent->type);
+
+			if ((glsl_type_is_matrix(parent->type) &&
+			     glsl_matrix_type_is_row_major(parent->type)) ||
+			    (glsl_type_is_vector(parent->type) && stride == 0))
+				stride = type_scalar_size_bytes(parent->type);
+
+			assert(stride > 0);
+			LLVMValueRef index = get_src(ctx, instr->arr.index);
+			if (LLVMTypeOf(index) != ctx->ac.i64)
+				index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
+
+			LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
+
+			result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
+		} else {
+			result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
+			                       get_src(ctx, instr->arr.index));
+		}
 		break;
 	case nir_deref_type_ptr_as_array:
-		result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
-		                          get_src(ctx, instr->arr.index));
+		if (instr->mode == nir_var_mem_global) {
+			unsigned stride = nir_deref_instr_ptr_as_array_stride(instr);
+
+			LLVMValueRef index = get_src(ctx, instr->arr.index);
+			if (LLVMTypeOf(index) != ctx->ac.i64)
+				index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
+
+			LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
+
+			result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
+		} else {
+			result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
+			                       get_src(ctx, instr->arr.index));
+		}
 		break;
 	case nir_deref_type_cast: {
 		result = get_src(ctx, instr->parent);

-		LLVMTypeRef pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
-		LLVMTypeRef type = LLVMPointerType(pointee_type, AC_ADDR_SPACE_LDS);
+		/* We can't use the structs from LLVM because the shader
+		 * specifies its own offsets. */
+		LLVMTypeRef pointee_type = ctx->ac.i8;
+		if (instr->mode == nir_var_mem_shared)
+			pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
+
+		unsigned address_space;
+
+		switch(instr->mode) {
+		case nir_var_mem_shared:
+			address_space = AC_ADDR_SPACE_LDS;
+			break;
+		case nir_var_mem_global:
+			address_space = AC_ADDR_SPACE_GLOBAL;
+			break;
+		default:
+			unreachable("Unhandled address space");
+		}
+
+		LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);

 		if (LLVMTypeOf(result) != type) {
 			if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
--- a/src/amd/common/ac_shader_abi.h
+++ b/src/amd/common/ac_shader_abi.h
@@ -32,6 +32,8 @@ struct nir_variable;

 #define AC_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)

+#define AC_MAX_INLINE_PUSH_CONSTS 8
+
 enum ac_descriptor_type {
 	AC_DESC_IMAGE,
 	AC_DESC_FMASK,
@@ -66,6 +68,9 @@ struct ac_shader_abi {

 	/* Vulkan only */
 	LLVMValueRef push_constants;
+	LLVMValueRef inline_push_consts[AC_MAX_INLINE_PUSH_CONSTS];
+	unsigned num_inline_push_consts;
+	unsigned base_inline_push_consts;
 	LLVMValueRef view_index;

 	LLVMValueRef outputs[AC_LLVM_MAX_OUTPUTS * 4];
@@ -195,7 +200,6 @@ struct ac_shader_abi {
 	/* Whether to workaround GFX9 ignoring the stride for the buffer size if IDXEN=0
 	* and LLVM optimizes an indexed load with constant index to IDXEN=0. */
 	bool gfx9_stride_size_workaround;
-	bool gfx9_stride_size_workaround_for_atomic;
 };

 #endif /* AC_SHADER_ABI_H */
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -338,15 +338,14 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 		unsigned fence_offset, eop_bug_offset;
 		void *fence_ptr;

-		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 8, &fence_offset,
+		radv_cmd_buffer_upload_alloc(cmd_buffer, 8, 0, &fence_offset,
 					     &fence_ptr);
-
 		cmd_buffer->gfx9_fence_va =
 			radv_buffer_get_va(cmd_buffer->upload.upload_bo);
 		cmd_buffer->gfx9_fence_va += fence_offset;

 		/* Allocate a buffer for the EOP bug on GFX9. */
-		radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 8,
+		radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, 0,
 					     &eop_bug_offset, &fence_ptr);
 		cmd_buffer->gfx9_eop_bug_va =
 			radv_buffer_get_va(cmd_buffer->upload.upload_bo);
@@ -417,8 +416,6 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
 			     unsigned *out_offset,
 			     void **ptr)
 {
-	assert(util_is_power_of_two_nonzero(alignment));
-
 	uint64_t offset = align(cmd_buffer->upload.offset, alignment);
 	if (offset + size > cmd_buffer->upload.size) {
 		if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
@@ -631,6 +628,23 @@ radv_emit_descriptor_pointers(struct radv_cmd_buffer *cmd_buffer,
 	}
 }

+static void
+radv_emit_inline_push_consts(struct radv_cmd_buffer *cmd_buffer,
+			     struct radv_pipeline *pipeline,
+			     gl_shader_stage stage,
+			     int idx, int count, uint32_t *values)
+{
+	struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
+	uint32_t base_reg = pipeline->user_data_0[stage];
+	if (loc->sgpr_idx == -1)
+		return;
+
+	assert(loc->num_sgprs == count);
+
+	radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, count);
+	radeon_emit_array(cmd_buffer->cs, values, count);
+}
+
 static void
 radv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
 			      struct radv_pipeline *pipeline)
@@ -1208,10 +1222,10 @@ radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer,
 	if (!framebuffer || !subpass)
 		return;

-	att_idx = subpass->depth_stencil_attachment.attachment;
-	if (att_idx == VK_ATTACHMENT_UNUSED)
+	if (!subpass->depth_stencil_attachment)
 		return;

+	att_idx = subpass->depth_stencil_attachment->attachment;
 	att = &framebuffer->attachments[att_idx];
 	if (att->attachment->image != image)
 		return;
@@ -1225,7 +1239,7 @@ radv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer,
 	 */
 	if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
 	    ds_clear_value.depth == 0.0) {
-		VkImageLayout layout = subpass->depth_stencil_attachment.layout;
+		VkImageLayout layout = subpass->depth_stencil_attachment->layout;

 		radv_update_zrange_precision(cmd_buffer, &att->ds, image,
 					     layout, false);
@@ -1258,7 +1272,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
 		++reg_count;

-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, cmd_buffer->state.predicating));
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
 	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
@@ -1282,7 +1296,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
 	uint64_t va = radv_buffer_get_va(image->bo);
 	va += image->offset + image->tc_compat_zrange_offset;

-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
 	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
@@ -1476,7 +1490,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,

 	assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));

-	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, cmd_buffer->state.predicating));
+	radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
 	radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
 			S_370_WR_CONFIRM(1) |
 			S_370_ENGINE_SEL(V_370_PFP));
@@ -1577,9 +1591,9 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
 			num_bpp64_colorbufs++;
 	}

-	if(subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
-		int idx = subpass->depth_stencil_attachment.attachment;
-		VkImageLayout layout = subpass->depth_stencil_attachment.layout;
+	if (subpass->depth_stencil_attachment) {
+		int idx = subpass->depth_stencil_attachment->attachment;
+		VkImageLayout layout = subpass->depth_stencil_attachment->layout;
 		struct radv_attachment_info *att = &framebuffer->attachments[idx];
 		struct radv_image *image = att->attachment->image;
 		radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, att->attachment->bo);
@@ -1903,6 +1917,7 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
 		radv_get_descriptors_state(cmd_buffer, bind_point);
 	struct radv_pipeline_layout *layout = pipeline->layout;
 	struct radv_shader_variant *shader, *prev_shader;
+	bool need_push_constants = false;
 	unsigned offset;
 	void *ptr;
 	uint64_t va;
@@ -1912,37 +1927,56 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
 	    (!layout->push_constant_size && !layout->dynamic_offset_count))
 		return;

-	if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
-					  16 * layout->dynamic_offset_count,
-					  256, &offset, &ptr))
-		return;
-
-	memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
-	memcpy((char*)ptr + layout->push_constant_size,
-	       descriptors_state->dynamic_buffers,
-	       16 * layout->dynamic_offset_count);
-
-	va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
-	va += offset;
-
-	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
-	                                                   cmd_buffer->cs, MESA_SHADER_STAGES * 4);
-
-	prev_shader = NULL;
 	radv_foreach_stage(stage, stages) {
-		shader = radv_get_shader(pipeline, stage);
+		if (!pipeline->shaders[stage])
+			continue;

-		/* Avoid redundantly emitting the address for merged stages. */
-		if (shader && shader != prev_shader) {
-			radv_emit_userdata_address(cmd_buffer, pipeline, stage,
-						   AC_UD_PUSH_CONSTANTS, va);
+		need_push_constants |= pipeline->shaders[stage]->info.info.loads_push_constants;
+		need_push_constants |= pipeline->shaders[stage]->info.info.loads_dynamic_offsets;

-			prev_shader = shader;
+		uint8_t base = pipeline->shaders[stage]->info.info.base_inline_push_consts;
+		uint8_t count = pipeline->shaders[stage]->info.info.num_inline_push_consts;
+
+		radv_emit_inline_push_consts(cmd_buffer, pipeline, stage,
+					     AC_UD_INLINE_PUSH_CONSTANTS,
+					     count,
+					     (uint32_t *)&cmd_buffer->push_constants[base * 4]);
+	}
+
+	if (need_push_constants) {
+		if (!radv_cmd_buffer_upload_alloc(cmd_buffer, layout->push_constant_size +
+						  16 * layout->dynamic_offset_count,
+						  256, &offset, &ptr))
+			return;
+
+		memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
+		memcpy((char*)ptr + layout->push_constant_size,
+		       descriptors_state->dynamic_buffers,
+		       16 * layout->dynamic_offset_count);
+
+		va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
+		va += offset;
+
+		MAYBE_UNUSED unsigned cdw_max =
+			radeon_check_space(cmd_buffer->device->ws,
+	                                   cmd_buffer->cs, MESA_SHADER_STAGES * 4);
+
+		prev_shader = NULL;
+		radv_foreach_stage(stage, stages) {
+			shader = radv_get_shader(pipeline, stage);
+
+			/* Avoid redundantly emitting the address for merged stages. */
+			if (shader && shader != prev_shader) {
+				radv_emit_userdata_address(cmd_buffer, pipeline, stage,
+							   AC_UD_PUSH_CONSTANTS, va);
+
+				prev_shader = shader;
+			}
 		}
+		assert(cmd_buffer->cs->cdw <= cdw_max);
 	}

 	cmd_buffer->push_constant_stages &= ~stages;
-	assert(cmd_buffer->cs->cdw <= cdw_max);
 }

 static void
@@ -2157,7 +2191,6 @@ radv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer,
 	ia_multi_vgt_param =
 		si_get_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1,
 					  draw_info->indirect,
-					  !!draw_info->strmout_buffer,
 					  draw_info->indirect ? 0 : draw_info->count);

 	if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
@@ -2428,28 +2461,8 @@ static void radv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buf

 void
 radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
-			    const struct radv_subpass *subpass, bool transitions)
+			    const struct radv_subpass *subpass)
 {
-	if (transitions) {
-		radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
-
-		for (unsigned i = 0; i < subpass->color_count; ++i) {
-			if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)
-				radv_handle_subpass_image_transition(cmd_buffer,
-				                                     subpass->color_attachments[i]);
-		}
-
-		for (unsigned i = 0; i < subpass->input_count; ++i) {
-			radv_handle_subpass_image_transition(cmd_buffer,
-							subpass->input_attachments[i]);
-		}
-
-		if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
-			radv_handle_subpass_image_transition(cmd_buffer,
-							subpass->depth_stencil_attachment);
-		}
-	}
-
 	cmd_buffer->state.subpass = subpass;

 	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
@@ -2632,7 +2645,7 @@ VkResult radv_BeginCommandBuffer(
 		if (result != VK_SUCCESS)
 			return result;

-		radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
+		radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
 	}

 	if (unlikely(cmd_buffer->device->trace_bo)) {
@@ -3412,6 +3425,69 @@ void radv_TrimCommandPool(
 	}
 }

+static uint32_t
+radv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	uint32_t subpass_id = state->subpass - state->pass->subpasses;
+
+	/* The id of this subpass shouldn't exceed the number of subpasses in
+	 * this render pass minus 1.
+	 */
+	assert(subpass_id < state->pass->subpass_count);
+	return subpass_id;
+}
+
+static void
+radv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer,
+			      uint32_t subpass_id)
+{
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
+
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
+							   cmd_buffer->cs, 2048);
+
+	radv_subpass_barrier(cmd_buffer, &subpass->start_barrier);
+
+	for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+		const uint32_t a = subpass->attachments[i].attachment;
+		if (a == VK_ATTACHMENT_UNUSED)
+			continue;
+
+		radv_handle_subpass_image_transition(cmd_buffer,
+						     subpass->attachments[i]);
+	}
+
+	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
+	radv_cmd_buffer_clear_subpass(cmd_buffer);
+
+	assert(cmd_buffer->cs->cdw <= cdw_max);
+}
+
+static void
+radv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_cmd_state *state = &cmd_buffer->state;
+	const struct radv_subpass *subpass = state->subpass;
+	uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
+
+	radv_cmd_buffer_resolve_subpass(cmd_buffer);
+
+	for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
+		const uint32_t a = subpass->attachments[i].attachment;
+		if (a == VK_ATTACHMENT_UNUSED)
+			continue;
+
+		if (state->pass->attachments[a].last_subpass_idx != subpass_id)
+			continue;
+
+		VkImageLayout layout = state->pass->attachments[a].final_layout;
+		radv_handle_subpass_image_transition(cmd_buffer,
+		                      (struct radv_subpass_attachment){a, layout});
+	}
+}
+
 void radv_CmdBeginRenderPass(
 	VkCommandBuffer                             commandBuffer,
 	const VkRenderPassBeginInfo*                pRenderPassBegin,
@@ -3420,10 +3496,7 @@ void radv_CmdBeginRenderPass(
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBegin->renderPass);
 	RADV_FROM_HANDLE(radv_framebuffer, framebuffer, pRenderPassBegin->framebuffer);
-
-	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
-							   cmd_buffer->cs, 2048);
-	MAYBE_UNUSED VkResult result;
+	VkResult result;

 	cmd_buffer->state.framebuffer = framebuffer;
 	cmd_buffer->state.pass = pass;
@@ -3433,10 +3506,7 @@ void radv_CmdBeginRenderPass(
 	if (result != VK_SUCCESS)
 		return;

-	radv_cmd_buffer_set_subpass(cmd_buffer, pass->subpasses, true);
-	assert(cmd_buffer->cs->cdw <= cdw_max);
-
-	radv_cmd_buffer_clear_subpass(cmd_buffer);
+	radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
 }

 void radv_CmdBeginRenderPass2KHR(
@@ -3454,13 +3524,9 @@ void radv_CmdNextSubpass(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);

-	radv_cmd_buffer_resolve_subpass(cmd_buffer);
-
-	radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
-					      2048);
-
-	radv_cmd_buffer_set_subpass(cmd_buffer, cmd_buffer->state.subpass + 1, true);
-	radv_cmd_buffer_clear_subpass(cmd_buffer);
+	uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
+	radv_cmd_buffer_end_subpass(cmd_buffer);
+	radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
 }

 void radv_CmdNextSubpass2KHR(
@@ -4326,16 +4392,10 @@ void radv_CmdEndRenderPass(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);

+	radv_cmd_buffer_end_subpass(cmd_buffer);
+
 	radv_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);

-	radv_cmd_buffer_resolve_subpass(cmd_buffer);
-
-	for (unsigned i = 0; i < cmd_buffer->state.framebuffer->attachment_count; ++i) {
-		VkImageLayout layout = cmd_buffer->state.pass->attachments[i].final_layout;
-		radv_handle_subpass_image_transition(cmd_buffer,
-		                      (struct radv_subpass_attachment){i, layout});
-	}
-
 	vk_free(&cmd_buffer->pool->alloc, cmd_buffer->state.attachments);

 	cmd_buffer->state.pass = NULL;
@@ -4407,15 +4467,10 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
 	if (!radv_image_has_htile(image))
 		return;

-	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
-		uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
-
-		if (radv_layout_is_htile_compressed(image, dst_layout,
-						    dst_queue_mask)) {
-			clear_value = 0;
-		}
-
-		radv_initialize_htile(cmd_buffer, image, range, clear_value);
+	if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
+	           radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
+		/* TODO: merge with the clear if applicable */
+		radv_initialize_htile(cmd_buffer, image, range, 0);
 	} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
 	           radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
 		uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
@@ -4600,6 +4655,9 @@ static void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
 			return;
 	}

+	if (src_layout == dst_layout)
+		return;
+
 	unsigned src_queue_mask =
 		radv_image_queue_family_mask(image, src_family,
 					     cmd_buffer->queue_family_index);
@@ -4624,6 +4682,7 @@ struct radv_barrier_info {
 	uint32_t eventCount;
 	const VkEvent *pEvents;
 	VkPipelineStageFlags srcStageMask;
+	VkPipelineStageFlags dstStageMask;
 };

 static void
@@ -4675,7 +4734,19 @@ radv_barrier(struct radv_cmd_buffer *cmd_buffer,
 		                                        image);
 	}

-	radv_stage_flush(cmd_buffer, info->srcStageMask);
+	/* The Vulkan spec 1.1.98 says:
+	 *
+	 * "An execution dependency with only
+	 *  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT in the destination stage mask
+	 *  will only prevent that stage from executing in subsequently
+	 *  submitted commands. As this stage does not perform any actual
+	 *  execution, this is not observable - in effect, it does not delay
+	 *  processing of subsequent commands. Similarly an execution dependency
+	 *  with only VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT in the source stage mask
+	 *  will effectively not wait for any prior commands to complete."
+	 */
+	if (info->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+		radv_stage_flush(cmd_buffer, info->srcStageMask);
 	cmd_buffer->state.flush_bits |= src_flush_bits;

 	for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) {
@@ -4716,6 +4787,7 @@ void radv_CmdPipelineBarrier(
 	info.eventCount = 0;
 	info.pEvents = NULL;
 	info.srcStageMask = srcStageMask;
+	info.dstStageMask = destStageMask;

 	radv_barrier(cmd_buffer, memoryBarrierCount, pMemoryBarriers,
 		     bufferMemoryBarrierCount, pBufferMemoryBarriers,
@@ -4845,11 +4917,8 @@ void radv_CmdBeginConditionalRenderingEXT(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
-	struct radeon_cmdbuf *cs = cmd_buffer->cs;
 	bool draw_visible = true;
-	uint64_t pred_value = 0;
-	uint64_t va, new_va;
-	unsigned pred_offset;
+	uint64_t va;

 	va = radv_buffer_get_va(buffer->bo) + pConditionalRenderingBegin->offset;

@@ -4865,51 +4934,13 @@ void radv_CmdBeginConditionalRenderingEXT(

 	si_emit_cache_flush(cmd_buffer);

-	/* From the Vulkan spec 1.1.107:
-	 *
-	 * "If the 32-bit value at offset in buffer memory is zero, then the
-	 *  rendering commands are discarded, otherwise they are executed as
-	 *  normal. If the value of the predicate in buffer memory changes while
-	 *  conditional rendering is active, the rendering commands may be
-	 *  discarded in an implementation-dependent way. Some implementations
-	 *  may latch the value of the predicate upon beginning conditional
-	 *  rendering while others may read it before every rendering command."
-	 *
-	 * But, the AMD hardware treats the predicate as a 64-bit value which
-	 * means we need a workaround in the driver. Luckily, it's not required
-	 * to support if the value changes when predication is active.
-	 *
-	 * The workaround is as follows:
-	 * 1) allocate a 64-value in the upload BO and initialize it to 0
-	 * 2) copy the 32-bit predicate value to the upload BO
-	 * 3) use the new allocated VA address for predication
-	 *
-	 * Based on the conditionalrender demo, it's faster to do the COPY_DATA
-	 * in ME  (+ sync PFP) instead of PFP.
-	 */
-	radv_cmd_buffer_upload_data(cmd_buffer, 8, 16, &pred_value, &pred_offset);
-
-	new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
-
-	radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-	radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) |
-			COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
-			COPY_DATA_WR_CONFIRM);
-	radeon_emit(cs, va);
-	radeon_emit(cs, va >> 32);
-	radeon_emit(cs, new_va);
-	radeon_emit(cs, new_va >> 32);
-
-	radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
-	radeon_emit(cs, 0);
-
 	/* Enable predication for this command buffer. */
-	si_emit_set_predication_state(cmd_buffer, draw_visible, new_va);
+	si_emit_set_predication_state(cmd_buffer, draw_visible, va);
 	cmd_buffer->state.predicating = true;

 	/* Store conditional rendering user info. */
 	cmd_buffer->state.predication_type = draw_visible;
-	cmd_buffer->state.predication_va = new_va;
+	cmd_buffer->state.predication_va = va;
 }

 void radv_CmdEndConditionalRenderingEXT(
@@ -4953,7 +4984,7 @@ void radv_CmdBindTransformFeedbackBuffersEXT(
 		enabled_mask |= 1 << idx;
 	}

-	cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
+	cmd_buffer->state.streamout.enabled_mask = enabled_mask;

 	cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
 }
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -111,7 +111,6 @@ radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
 	case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
 	case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
 	case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
-	case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
 	case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
 	case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
 	default: chip_string = "AMD RADV unknown"; break;
@@ -338,7 +337,7 @@ radv_physical_device_init(struct radv_physical_device *device,
 	    device->rad_info.chip_class > GFX9)
 		fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");

-	radv_get_driver_uuid(&device->driver_uuid);
+	radv_get_driver_uuid(&device->device_uuid);
 	radv_get_device_uuid(&device->rad_info, &device->device_uuid);

 	if (device->rad_info.family == CHIP_STONEY ||
@@ -874,6 +873,20 @@ void radv_GetPhysicalDeviceFeatures2(
 			features->memoryPriority = VK_TRUE;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_ADDRESS_FEATURES_EXT: {
+			VkPhysicalDeviceBufferAddressFeaturesEXT *features =
+				(VkPhysicalDeviceBufferAddressFeaturesEXT *)ext;
+			features->bufferDeviceAddress = true;
+			features->bufferDeviceAddressCaptureReplay = false;
+			features->bufferDeviceAddressMultiDevice = false;
+			break;
+		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
+			VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
+				(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
+			features->depthClipEnable = true;
+			break;
+		}
 		default:
 			break;
 		}
@@ -931,8 +944,8 @@ void radv_GetPhysicalDeviceProperties(
 		.maxDescriptorSetSampledImages            = max_descriptor_set_size,
 		.maxDescriptorSetStorageImages            = max_descriptor_set_size,
 		.maxDescriptorSetInputAttachments         = max_descriptor_set_size,
-		.maxVertexInputAttributes                 = 32,
-		.maxVertexInputBindings                   = 32,
+		.maxVertexInputAttributes                 = MAX_VERTEX_ATTRIBS,
+		.maxVertexInputBindings                   = MAX_VBS,
 		.maxVertexInputAttributeOffset            = 2047,
 		.maxVertexInputBindingStride              = 2048,
 		.maxVertexOutputComponents                = 128,
@@ -1003,7 +1016,7 @@ void radv_GetPhysicalDeviceProperties(
 		.maxCullDistances                         = 8,
 		.maxCombinedClipAndCullDistances          = 8,
 		.discreteQueuePriorities                  = 2,
-		.pointSizeRange                           = { 0.0, 8192.0 },
+		.pointSizeRange                           = { 0.125, 255.875 },
 		.lineWidthRange                           = { 0.0, 7.9921875 },
 		.pointSizeGranularity                     = (1.0 / 8.0),
 		.lineWidthGranularity                     = (1.0 / 128.0),
@@ -1134,7 +1147,7 @@ void radv_GetPhysicalDeviceProperties2(

 			/* SGPR. */
 			properties->sgprsPerSimd =
-				radv_get_num_physical_sgprs(pdevice);
+				ac_get_num_physical_sgprs(pdevice->rad_info.chip_class);
 			properties->minSgprAllocation =
 				pdevice->rad_info.chip_class >= VI ? 16 : 8;
 			properties->maxSgprAllocation =
@@ -1556,6 +1569,9 @@ static VkResult radv_bo_list_add(struct radv_device *device,
 {
 	struct radv_bo_list *bo_list = &device->bo_list;

+	if (bo->is_local)
+		return VK_SUCCESS;
+
 	if (unlikely(!device->use_global_bo_list))
 		return VK_SUCCESS;

@@ -1583,6 +1599,9 @@ static void radv_bo_list_remove(struct radv_device *device,
 {
 	struct radv_bo_list *bo_list = &device->bo_list;

+	if (bo->is_local)
+		return;
+
 	if (unlikely(!device->use_global_bo_list))
 		return;

@@ -1693,7 +1712,8 @@ VkResult radv_CreateDevice(
 	 * from the descriptor set anymore, so we have to use a global BO list.
 	 */
 	device->use_global_bo_list =
-		device->enabled_extensions.EXT_descriptor_indexing;
+		device->enabled_extensions.EXT_descriptor_indexing ||
+		device->enabled_extensions.EXT_buffer_device_address;

 	mtx_init(&device->shader_slab_mutex, mtx_plain);
 	list_inithead(&device->shader_slabs);
@@ -2795,7 +2815,7 @@ VkResult radv_QueueSubmit(
 	struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL;
 	struct radeon_winsys_ctx *ctx = queue->hw_ctx;
 	int ret;
-	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
+	uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX;
 	uint32_t scratch_size = 0;
 	uint32_t compute_scratch_size = 0;
 	uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
@@ -4031,6 +4051,15 @@ void radv_DestroyBuffer(
 	vk_free2(&device->alloc, pAllocator, buffer);
 }

+VkDeviceAddress radv_GetBufferDeviceAddressEXT(
+	VkDevice                                    device,
+	const VkBufferDeviceAddressInfoEXT*         pInfo)
+{
+	RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
+	return radv_buffer_get_va(buffer->bo) + buffer->offset;
+}
+
+
 static inline unsigned
 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
 {
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -93,14 +93,16 @@ EXTENSIONS = [
    Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
+    Extension('VK_EXT_buffer_device_address',             1, True),
    Extension('VK_EXT_calibrated_timestamps',             1, True),
    Extension('VK_EXT_conditional_rendering',             1, True),
    Extension('VK_EXT_conservative_rasterization',        1, 'device->rad_info.chip_class >= GFX9'),
    Extension('VK_EXT_display_surface_counter',           1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_display_control',                   1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
    Extension('VK_EXT_debug_report',                      9, True),
+    Extension('VK_EXT_depth_clip_enable',                 1, True),
    Extension('VK_EXT_depth_range_unrestricted',          1, True),
-    Extension('VK_EXT_descriptor_indexing',               2, False),
+    Extension('VK_EXT_descriptor_indexing',               2, True),
    Extension('VK_EXT_discard_rectangles',                1, True),
    Extension('VK_EXT_external_memory_dma_buf',           1, True),
    Extension('VK_EXT_external_memory_host',              1, 'device->rad_info.has_userptr'),
--- a/src/amd/vulkan/radv_formats.c
+++ b/src/amd/vulkan/radv_formats.c
@@ -990,22 +990,10 @@ bool radv_format_pack_clear_color(VkFormat format,
 				assert(channel->size == 8);

 				v = util_format_linear_float_to_srgb_8unorm(value->float32[c]);
-			} else {
-				float f = MIN2(value->float32[c], 1.0f);
-
-				if (channel->type == VK_FORMAT_TYPE_UNSIGNED) {
-					f = MAX2(f, 0.0f) * ((1ULL << channel->size) - 1);
-				} else {
-					f = MAX2(f, -1.0f) * ((1ULL << (channel->size - 1)) - 1);
-				}
-
-				/* The hardware rounds before conversion. */
-				if (f > 0)
-					f += 0.5f;
-				else
-					f -= 0.5f;
-
-				v = (uint64_t)f;
+			} else if (channel->type == VK_FORMAT_TYPE_UNSIGNED) {
+				v = MAX2(MIN2(value->float32[c], 1.0f), 0.0f) * ((1ULL << channel->size) - 1);
+			} else  {
+				v = MAX2(MIN2(value->float32[c], 1.0f), -1.0f) * ((1ULL << (channel->size - 1)) - 1);
 			}
 		} else if (channel->type == VK_FORMAT_TYPE_FLOAT) {
 			if (channel->size == 32) {
--- a/src/amd/vulkan/radv_llvm_helper.cpp
+++ b/src/amd/vulkan/radv_llvm_helper.cpp
@@ -29,7 +29,7 @@ class radv_llvm_per_thread_info {
 public:
 	radv_llvm_per_thread_info(enum radeon_family arg_family,
 				enum ac_target_machine_options arg_tm_options)
-		: family(arg_family), tm_options(arg_tm_options) {}
+		: family(arg_family), tm_options(arg_tm_options), passes(NULL) {}

 	~radv_llvm_per_thread_info()
 	{
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -956,8 +956,8 @@ radv_device_init_meta_blit_color(struct radv_device *device, bool on_demand)
 									.attachment = VK_ATTACHMENT_UNUSED,
 									.layout = VK_IMAGE_LAYOUT_GENERAL,
 								},
-								.preserveAttachmentCount = 1,
-								.pPreserveAttachments = (uint32_t[]) { 0 },
+								.preserveAttachmentCount = 0,
+								.pPreserveAttachments = NULL,
 							},
 							.dependencyCount = 0,
 						}, &device->meta_state.alloc, &device->meta_state.blit.render_pass[key][j]);
@@ -1016,8 +1016,8 @@ radv_device_init_meta_blit_depth(struct radv_device *device, bool on_demand)
 								       .attachment = 0,
 								       .layout = layout,
 								},
-							       .preserveAttachmentCount = 1,
-							       .pPreserveAttachments = (uint32_t[]) { 0 },
+							       .preserveAttachmentCount = 0,
+							       .pPreserveAttachments = NULL,
 							},
 						        .dependencyCount = 0,
 						}, &device->meta_state.alloc, &device->meta_state.blit.depth_only_rp[ds_layout]);
@@ -1073,8 +1073,8 @@ radv_device_init_meta_blit_stencil(struct radv_device *device, bool on_demand)
 								       .attachment = 0,
 								       .layout = layout,
 							       },
-							       .preserveAttachmentCount = 1,
-							       .pPreserveAttachments = (uint32_t[]) { 0 },
+							       .preserveAttachmentCount = 0,
+							       .pPreserveAttachments = NULL,
 						       },
 						       .dependencyCount = 0,
 					 }, &device->meta_state.alloc, &device->meta_state.blit.stencil_only_rp[ds_layout]);
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -807,8 +807,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
 							.attachment = VK_ATTACHMENT_UNUSED,
 							.layout = layout,
 						},
-						.preserveAttachmentCount = 1,
-						.pPreserveAttachments = (uint32_t[]) { 0 },
+						.preserveAttachmentCount = 0,
+						.pPreserveAttachments = NULL,
 						},
 						.dependencyCount = 0,
 					}, &device->meta_state.alloc, &device->meta_state.blit2d_render_passes[fs_key][dst_layout]);
@@ -978,8 +978,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
 									       .attachment = 0,
 									       .layout = layout,
 								       },
-								       .preserveAttachmentCount = 1,
-								       .pPreserveAttachments = (uint32_t[]) { 0 },
+								       .preserveAttachmentCount = 0,
+								       .pPreserveAttachments = NULL,
 							       },
 							       .dependencyCount = 0,
 							}, &device->meta_state.alloc, &device->meta_state.blit2d_depth_only_rp[ds_layout]);
@@ -1148,8 +1148,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
 									       .attachment = 0,
 									       .layout = layout,
 								       },
-								       .preserveAttachmentCount = 1,
-								       .pPreserveAttachments = (uint32_t[]) { 0 },
+								       .preserveAttachmentCount = 0,
+								       .pPreserveAttachments = NULL,
 							       },
 							       .dependencyCount = 0,
 						       }, &device->meta_state.alloc, &device->meta_state.blit2d_stencil_only_rp[ds_layout]);
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -232,8 +232,8 @@ create_color_renderpass(struct radv_device *device,
 							       .attachment = VK_ATTACHMENT_UNUSED,
 							       .layout = VK_IMAGE_LAYOUT_GENERAL,
 						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
+						       .preserveAttachmentCount = 0,
+						       .pPreserveAttachments = NULL,
 					       },
 								.dependencyCount = 0,
 									 }, &device->meta_state.alloc, pass);
@@ -438,10 +438,10 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 		.color_attachments = (struct radv_subpass_attachment[]) {
 			subpass->color_attachments[clear_att->colorAttachment]
 		},
-		.depth_stencil_attachment = (struct radv_subpass_attachment) { VK_ATTACHMENT_UNUSED, VK_IMAGE_LAYOUT_UNDEFINED }
+		.depth_stencil_attachment = NULL,
 	};

-	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass, false);
+	radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);

 	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
 			     pipeline);
@@ -465,7 +465,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
 		radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
 	}

-	radv_cmd_buffer_set_subpass(cmd_buffer, subpass, false);
+	radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
 }


@@ -547,8 +547,8 @@ create_depthstencil_renderpass(struct radv_device *device,
 							       .attachment = 0,
 							       .layout = VK_IMAGE_LAYOUT_GENERAL,
 						       },
-						       .preserveAttachmentCount = 1,
-						       .pPreserveAttachments = (uint32_t[]) { 0 },
+						       .preserveAttachmentCount = 0,
+						       .pPreserveAttachments = NULL,
 					       },
 								.dependencyCount = 0,
 									 }, &device->meta_state.alloc, render_pass);
@@ -651,7 +651,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
 	    iview->base_mip == 0 &&
 	    iview->base_layer == 0 &&
 	    radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
-	    radv_image_extent_compare(iview->image, &iview->extent))
+	    !radv_image_extent_compare(iview->image, &iview->extent))
 		return true;
 	return false;
 }
@@ -720,7 +720,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_meta_state *meta_state = &device->meta_state;
 	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
 	const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
-	const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
+	const uint32_t pass_att = subpass->depth_stencil_attachment->attachment;
 	VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
 	VkImageAspectFlags aspects = clear_att->aspectMask;
 	const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
@@ -760,7 +760,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 							 iview,
 							 samples_log2,
 							 aspects,
-							 subpass->depth_stencil_attachment.layout,
+							 subpass->depth_stencil_attachment->layout,
 							 clear_rect,
 							 clear_value);
 	if (!pipeline)
@@ -770,7 +770,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
 			     pipeline);

 	if (depth_view_can_fast_clear(cmd_buffer, iview, aspects,
-	                              subpass->depth_stencil_attachment.layout,
+	                              subpass->depth_stencil_attachment->layout,
 	                              clear_rect, clear_value))
 		radv_update_ds_clear_metadata(cmd_buffer, iview->image,
 					      clear_value, aspects);
@@ -1320,6 +1320,7 @@ radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer,
 				image->cmask.size, value);
 }

+
 uint32_t
 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer,
 		 struct radv_image *image, uint32_t value)
@@ -1554,7 +1555,11 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,

 	if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
 		const uint32_t subpass_att = clear_att->colorAttachment;
+		assert(subpass_att < subpass->color_count);
 		const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
+		if (pass_att == VK_ATTACHMENT_UNUSED)
+			return;
+
 		VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
 		const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
 		VkClearColorValue clear_value = clear_att->clearValue.color;
@@ -1568,11 +1573,11 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
 			emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
 		}
 	} else {
-		const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
+		const uint32_t pass_att = subpass->depth_stencil_attachment->attachment;
 		if (pass_att == VK_ATTACHMENT_UNUSED)
 			return;

-		VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
+		VkImageLayout image_layout = subpass->depth_stencil_attachment->layout;
 		const struct radv_image_view *iview = fb ? fb->attachments[pass_att].attachment : NULL;
 		VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;

@@ -1615,7 +1620,10 @@ radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
 			return true;
 	}

-	a = cmd_state->subpass->depth_stencil_attachment.attachment;
+	if (!cmd_state->subpass->depth_stencil_attachment)
+		return false;
+
+	a = cmd_state->subpass->depth_stencil_attachment->attachment;
 	return radv_attachment_needs_clear(cmd_state, a);
 }

@@ -1684,17 +1692,19 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
 					      &post_flush);
 	}

-	uint32_t ds = cmd_state->subpass->depth_stencil_attachment.attachment;
-	if (radv_attachment_needs_clear(cmd_state, ds)) {
-		VkClearAttachment clear_att = {
-			.aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
-			.clearValue = cmd_state->attachments[ds].clear_value,
-		};
+	if (cmd_state->subpass->depth_stencil_attachment) {
+		uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
+		if (radv_attachment_needs_clear(cmd_state, ds)) {
+			VkClearAttachment clear_att = {
+				.aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
+				.clearValue = cmd_state->attachments[ds].clear_value,
+			};

-		radv_subpass_clear_attachment(cmd_buffer,
-					      &cmd_state->attachments[ds],
-					      &clear_att, &pre_flush,
-					      &post_flush);
+			radv_subpass_clear_attachment(cmd_buffer,
+						      &cmd_state->attachments[ds],
+						      &clear_att, &pre_flush,
+						      &post_flush);
+		}
 	}

 	radv_meta_restore(&saved_state, cmd_buffer);
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -633,8 +633,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
 		struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];

-		if (src_att.attachment == VK_ATTACHMENT_UNUSED ||
-		    dest_att.attachment == VK_ATTACHMENT_UNUSED)
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
 			continue;

 		struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
@@ -661,8 +660,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
 		struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];

-		if (src_att.attachment == VK_ATTACHMENT_UNUSED ||
-		    dest_att.attachment == VK_ATTACHMENT_UNUSED)
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
 			continue;

 		struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
@@ -675,10 +673,10 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
 		struct radv_subpass resolve_subpass = {
 			.color_count = 2,
 			.color_attachments = (struct radv_subpass_attachment[]) { src_att, dest_att },
-			.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
+			.depth_stencil_attachment = NULL,
 		};

-		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
+		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);

 		VkResult ret = build_resolve_pipeline(cmd_buffer->device, radv_format_meta_fs_key(dst_img->vk_format));
 		if (ret != VK_SUCCESS) {
@@ -710,8 +708,7 @@ radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer)
 		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
 		struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];

-		if (src_att.attachment == VK_ATTACHMENT_UNUSED ||
-		    dest_att.attachment == VK_ATTACHMENT_UNUSED)
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
 			continue;

 		struct radv_image *src_image =
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -232,8 +232,8 @@ create_resolve_pipeline(struct radv_device *device,
 							.attachment = VK_ATTACHMENT_UNUSED,
 							.layout = VK_IMAGE_LAYOUT_GENERAL,
 						},
-						.preserveAttachmentCount = 1,
-						.pPreserveAttachments = (uint32_t[]) { 0 },
+						.preserveAttachmentCount = 0,
+						.pPreserveAttachments = NULL,
 					},
 					.dependencyCount = 0,
 				}, &device->meta_state.alloc, rp + dst_layout);
@@ -610,8 +610,7 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
 		struct radv_subpass_attachment src_att = subpass->color_attachments[i];
 		struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];

-		if (src_att.attachment == VK_ATTACHMENT_UNUSED ||
-		    dest_att.attachment == VK_ATTACHMENT_UNUSED)
+		if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
 			continue;

 		struct radv_image_view *dest_iview = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
@@ -620,10 +619,10 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
 		struct radv_subpass resolve_subpass = {
 			.color_count = 1,
 			.color_attachments = (struct radv_subpass_attachment[]) { dest_att },
-			.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
+			.depth_stencil_attachment = NULL,
 		};

-		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
+		radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);

 		emit_resolve(cmd_buffer,
 			     src_iview,
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -589,6 +589,7 @@ set_loc_desc(struct radv_shader_context *ctx, int idx, uint8_t *sgpr_idx)
 struct user_sgpr_info {
 	bool need_ring_offsets;
 	bool indirect_all_descriptor_sets;
+	uint8_t remaining_sgprs;
 };

 static bool needs_view_index_sgpr(struct radv_shader_context *ctx,
@@ -627,6 +628,50 @@ count_vs_user_sgprs(struct radv_shader_context *ctx)
 	return count;
 }

+static void allocate_inline_push_consts(struct radv_shader_context *ctx,
+					struct user_sgpr_info *user_sgpr_info)
+{
+	uint8_t remaining_sgprs = user_sgpr_info->remaining_sgprs;
+
+	/* Only supported if shaders use push constants. */
+	if (ctx->shader_info->info.min_push_constant_used == UINT8_MAX)
+		return;
+
+	/* Only supported if shaders don't have indirect push constants. */
+	if (ctx->shader_info->info.has_indirect_push_constants)
+		return;
+
+	/* Only supported for 32-bit push constants. */
+	if (!ctx->shader_info->info.has_only_32bit_push_constants)
+		return;
+
+	uint8_t num_push_consts =
+		(ctx->shader_info->info.max_push_constant_used -
+		 ctx->shader_info->info.min_push_constant_used) / 4;
+
+	/* Check if the number of user SGPRs is large enough. */
+	if (num_push_consts < remaining_sgprs) {
+		ctx->shader_info->info.num_inline_push_consts = num_push_consts;
+	} else {
+		ctx->shader_info->info.num_inline_push_consts = remaining_sgprs;
+	}
+
+	/* Clamp to the maximum number of allowed inlined push constants. */
+	if (ctx->shader_info->info.num_inline_push_consts > AC_MAX_INLINE_PUSH_CONSTS)
+		ctx->shader_info->info.num_inline_push_consts = AC_MAX_INLINE_PUSH_CONSTS;
+
+	if (ctx->shader_info->info.num_inline_push_consts == num_push_consts &&
+	    !ctx->shader_info->info.loads_dynamic_offsets) {
+		/* Disable the default push constants path if all constants are
+		 * inlined and if shaders don't use dynamic descriptors.
+		 */
+		ctx->shader_info->info.loads_push_constants = false;
+	}
+
+	ctx->shader_info->info.base_inline_push_consts =
+		ctx->shader_info->info.min_push_constant_used / 4;
+}
+
 static void allocate_user_sgprs(struct radv_shader_context *ctx,
 				gl_shader_stage stage,
 				bool has_previous_stage,
@@ -702,7 +747,12 @@ static void allocate_user_sgprs(struct radv_shader_context *ctx,

 	if (remaining_sgprs < num_desc_set) {
 		user_sgpr_info->indirect_all_descriptor_sets = true;
+		user_sgpr_info->remaining_sgprs = remaining_sgprs - 1;
+	} else {
+		user_sgpr_info->remaining_sgprs = remaining_sgprs - num_desc_set;
 	}
+
+	allocate_inline_push_consts(ctx, user_sgpr_info);
 }

 static void
@@ -732,6 +782,13 @@ declare_global_input_sgprs(struct radv_shader_context *ctx,
 		add_arg(args, ARG_SGPR, type, &ctx->abi.push_constants);
 	}

+	for (unsigned i = 0; i < ctx->shader_info->info.num_inline_push_consts; i++) {
+		add_arg(args, ARG_SGPR, ctx->ac.i32,
+			&ctx->abi.inline_push_consts[i]);
+	}
+	ctx->abi.num_inline_push_consts = ctx->shader_info->info.num_inline_push_consts;
+	ctx->abi.base_inline_push_consts = ctx->shader_info->info.base_inline_push_consts;
+
 	if (ctx->shader_info->info.so.num_outputs) {
 		add_arg(args, ARG_SGPR,
 			ac_array_in_const32_addr_space(ctx->ac.v4i32),
@@ -850,6 +907,11 @@ set_global_input_locs(struct radv_shader_context *ctx,
 		set_loc_shader_ptr(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx);
 	}

+	if (ctx->shader_info->info.num_inline_push_consts) {
+		set_loc_shader(ctx, AC_UD_INLINE_PUSH_CONSTANTS, user_sgpr_idx,
+			       ctx->shader_info->info.num_inline_push_consts);
+	}
+
 	if (ctx->streamout_buffers) {
 		set_loc_shader_ptr(ctx, AC_UD_STREAMOUT_BUFFERS,
 			       user_sgpr_idx);
@@ -1976,6 +2038,70 @@ adjust_vertex_fetch_alpha(struct radv_shader_context *ctx,
 	return alpha;
 }

+static unsigned
+get_num_channels_from_data_format(unsigned data_format)
+{
+	switch (data_format) {
+	case V_008F0C_BUF_DATA_FORMAT_8:
+	case V_008F0C_BUF_DATA_FORMAT_16:
+	case V_008F0C_BUF_DATA_FORMAT_32:
+		return 1;
+	case V_008F0C_BUF_DATA_FORMAT_8_8:
+	case V_008F0C_BUF_DATA_FORMAT_16_16:
+	case V_008F0C_BUF_DATA_FORMAT_32_32:
+		return 2;
+	case V_008F0C_BUF_DATA_FORMAT_10_11_11:
+	case V_008F0C_BUF_DATA_FORMAT_11_11_10:
+	case V_008F0C_BUF_DATA_FORMAT_32_32_32:
+		return 3;
+	case V_008F0C_BUF_DATA_FORMAT_8_8_8_8:
+	case V_008F0C_BUF_DATA_FORMAT_10_10_10_2:
+	case V_008F0C_BUF_DATA_FORMAT_2_10_10_10:
+	case V_008F0C_BUF_DATA_FORMAT_16_16_16_16:
+	case V_008F0C_BUF_DATA_FORMAT_32_32_32_32:
+		return 4;
+	default:
+		break;
+	}
+
+	return 4;
+}
+
+static LLVMValueRef
+radv_fixup_vertex_input_fetches(struct radv_shader_context *ctx,
+				LLVMValueRef value,
+				unsigned num_channels,
+				bool is_float)
+{
+	LLVMValueRef zero = is_float ? ctx->ac.f32_0 : ctx->ac.i32_0;
+	LLVMValueRef one = is_float ? ctx->ac.f32_1 : ctx->ac.i32_1;
+	LLVMValueRef chan[4];
+
+	if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMVectorTypeKind) {
+		unsigned vec_size = LLVMGetVectorSize(LLVMTypeOf(value));
+
+		if (num_channels == 4 && num_channels == vec_size)
+			return value;
+
+		num_channels = MIN2(num_channels, vec_size);
+
+		for (unsigned i = 0; i < num_channels; i++)
+			chan[i] = ac_llvm_extract_elem(&ctx->ac, value, i);
+	} else {
+		if (num_channels) {
+			assert(num_channels == 1);
+			chan[0] = value;
+		}
+	}
+
+	for (unsigned i = num_channels; i < 4; i++) {
+		chan[i] = i == 3 ? one : zero;
+		chan[i] = ac_to_float(&ctx->ac, chan[i]);
+	}
+
+	return ac_build_gather_values(&ctx->ac, chan, 4);
+}
+
 static void
 handle_vs_input_decl(struct radv_shader_context *ctx,
 		     struct nir_variable *variable)
@@ -1988,7 +2114,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 	unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
 	uint8_t input_usage_mask =
 		ctx->shader_info->info.vs.input_usage_mask[variable->data.location];
-	unsigned num_channels = util_last_bit(input_usage_mask);
+	unsigned num_input_channels = util_last_bit(input_usage_mask);

 	variable->data.driver_location = variable->data.location * 4;

@@ -1996,6 +2122,11 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
 	for (unsigned i = 0; i < attrib_count; ++i) {
 		LLVMValueRef output[4];
 		unsigned attrib_index = variable->data.location + i - VERT_ATTRIB_GENERIC0;
+		unsigned attrib_format = ctx->options->key.vs.vertex_attribute_formats[attrib_index];
+		unsigned data_format = attrib_format & 0x0f;
+		unsigned num_format = (attrib_format >> 4) & 0x07;
+		bool is_float = num_format != V_008F0C_BUF_NUM_FORMAT_UINT &&
+		                num_format != V_008F0C_BUF_NUM_FORMAT_SINT;

 		if (ctx->options->key.vs.instance_rate_inputs & (1u << attrib_index)) {
 			uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
@@ -2027,34 +2158,19 @@ handle_vs_input_decl(struct radv_shader_context *ctx,

 		t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);

-		if (ctx->options->key.vs.vertex_attribute_provided & (1u << attrib_index)) {
-			input = ac_build_buffer_load_format(&ctx->ac, t_list,
-							    buffer_index,
-							    ctx->ac.i32_0,
-							    num_channels, false, true);
-		} else {
-			/* Per the Vulkan spec, it's invalid to consume vertex
-			 * attributes that are not provided by the pipeline but
-			 * some (invalid) apps appear to do that. Fill the
-			 * input array with (eg. (0, 0, 0, 1)) to workaround
-			 * the problem and to avoid possible GPU hangs.
-			 */
-			LLVMValueRef chan[4];
+		/* Adjust the number of channels to load based on the vertex
+		 * attribute format.
+		 */
+		unsigned num_format_channels = get_num_channels_from_data_format(data_format);
+		unsigned num_channels = MIN2(num_input_channels, num_format_channels);

-			/* The input_usage mask might be 0 if input variables
-			 * are not removed by the compiler.
-			 */
-			num_channels = CLAMP(num_channels, 1, 4);
+		input = ac_build_buffer_load_format(&ctx->ac, t_list,
+						    buffer_index,
+						    ctx->ac.i32_0,
+						    num_channels, false, true);

-			for (unsigned i = 0; i < num_channels; i++) {
-				chan[i] = i == 3 ? ctx->ac.f32_1 : ctx->ac.f32_0;
-				chan[i] = ac_to_float(&ctx->ac, chan[i]);
-			}
-
-			input = ac_build_gather_values(&ctx->ac, chan, num_channels);
-		}
-
-		input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels);
+		input = radv_fixup_vertex_input_fetches(ctx, input, num_channels,
+							is_float);

 		for (unsigned chan = 0; chan < 4; chan++) {
 			LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
@@ -3402,9 +3518,9 @@ ac_setup_rings(struct radv_shader_context *ctx)
 	}
 }

-static unsigned
-ac_nir_get_max_workgroup_size(enum chip_class chip_class,
-			      const struct nir_shader *nir)
+unsigned
+radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+				const struct nir_shader *nir)
 {
 	switch (nir->info.stage) {
 	case MESA_SHADER_TESS_CTRL:
@@ -3469,6 +3585,8 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,

 	memset(shader_info, 0, sizeof(*shader_info));

+	radv_nir_shader_info_init(&shader_info->info);
+
 	for(int i = 0; i < shader_count; ++i)
 		radv_nir_shader_info_pass(shaders[i], options, &shader_info->info);

@@ -3480,7 +3598,7 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 	ctx.max_workgroup_size = 0;
 	for (int i = 0; i < shader_count; ++i) {
 		ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
-		                              ac_nir_get_max_workgroup_size(ctx.options->chip_class,
+		                              radv_nir_get_max_workgroup_size(ctx.options->chip_class,
 		                                                            shaders[i]));
 	}

@@ -3497,17 +3615,10 @@ LLVMModuleRef ac_translate_nir_to_llvm(struct ac_llvm_compiler *ac_llvm,
 	ctx.abi.clamp_shadow_reference = false;
 	ctx.abi.gfx9_stride_size_workaround = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x800;

-	/* Because the new raw/struct atomic intrinsics are buggy with LLVM 8,
-	 * we fallback to the old intrinsics for atomic buffer image operations
-	 * and thus we need to apply the indexing workaround...
-	 */
-	ctx.abi.gfx9_stride_size_workaround_for_atomic = ctx.ac.chip_class == GFX9 && HAVE_LLVM < 0x900;
-
 	if (shader_count >= 2)
 		ac_init_exec_full_mask(&ctx.ac);

-	if ((ctx.ac.family == CHIP_VEGA10 ||
-	     ctx.ac.family == CHIP_RAVEN) &&
+	if (ctx.ac.chip_class == GFX9 &&
 	    shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
 		ac_nir_fixup_ls_hs_input_vgprs(&ctx);

--- a/src/amd/vulkan/radv_pass.c
+++ b/src/amd/vulkan/radv_pass.c
@@ -28,6 +28,116 @@

 #include "vk_util.h"

+static void
+radv_render_pass_add_subpass_dep(struct radv_render_pass *pass,
+				 const VkSubpassDependency2KHR *dep)
+{
+	uint32_t src = dep->srcSubpass;
+	uint32_t dst = dep->dstSubpass;
+
+	/* Ignore subpass self-dependencies as they allow the app to call
+	 * vkCmdPipelineBarrier() inside the render pass and the driver should
+	 * only do the barrier when called, not when starting the render pass.
+	 */
+	if (src == dst)
+		return;
+
+	/* Accumulate all ingoing external dependencies to the first subpass. */
+	if (src == VK_SUBPASS_EXTERNAL)
+		dst = 0;
+
+	if (dst == VK_SUBPASS_EXTERNAL) {
+		if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+			pass->end_barrier.src_stage_mask |= dep->srcStageMask;
+		pass->end_barrier.src_access_mask |= dep->srcAccessMask;
+		pass->end_barrier.dst_access_mask |= dep->dstAccessMask;
+	} else {
+		if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)
+			pass->subpasses[dst].start_barrier.src_stage_mask |= dep->srcStageMask;
+		pass->subpasses[dst].start_barrier.src_access_mask |= dep->srcAccessMask;
+		pass->subpasses[dst].start_barrier.dst_access_mask |= dep->dstAccessMask;
+	}
+}
+
+static void
+radv_render_pass_compile(struct radv_render_pass *pass)
+{
+	for (uint32_t i = 0; i < pass->subpass_count; i++) {
+		struct radv_subpass *subpass = &pass->subpasses[i];
+		uint32_t color_sample_count = 1, depth_sample_count = 1;
+
+		/* We don't allow depth_stencil_attachment to be non-NULL and
+		 * be VK_ATTACHMENT_UNUSED.  This way something can just check
+		 * for NULL and be guaranteed that they have a valid
+		 * attachment.
+		 */
+		if (subpass->depth_stencil_attachment &&
+		    subpass->depth_stencil_attachment->attachment == VK_ATTACHMENT_UNUSED)
+			subpass->depth_stencil_attachment = NULL;
+
+		for (uint32_t j = 0; j < subpass->attachment_count; j++) {
+			struct radv_subpass_attachment *subpass_att =
+				&subpass->attachments[j];
+			if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+				continue;
+
+			struct radv_render_pass_attachment *pass_att =
+				&pass->attachments[subpass_att->attachment];
+
+			pass_att->last_subpass_idx = i;
+		}
+
+		subpass->has_color_att = false;
+		for (uint32_t j = 0; j < subpass->color_count; j++) {
+			struct radv_subpass_attachment *subpass_att =
+				&subpass->color_attachments[j];
+			if (subpass_att->attachment == VK_ATTACHMENT_UNUSED)
+				continue;
+
+			subpass->has_color_att = true;
+
+			struct radv_render_pass_attachment *pass_att =
+				&pass->attachments[subpass_att->attachment];
+
+			color_sample_count = pass_att->samples;
+		}
+
+		if (subpass->depth_stencil_attachment) {
+			const uint32_t a =
+				subpass->depth_stencil_attachment->attachment;
+			struct radv_render_pass_attachment *pass_att =
+				&pass->attachments[a];
+			depth_sample_count = pass_att->samples;
+		}
+
+		subpass->max_sample_count = MAX2(color_sample_count,
+						 depth_sample_count);
+
+		/* We have to handle resolve attachments specially */
+		subpass->has_resolve = false;
+		if (subpass->resolve_attachments) {
+			for (uint32_t j = 0; j < subpass->color_count; j++) {
+				struct radv_subpass_attachment *resolve_att =
+					&subpass->resolve_attachments[j];
+
+				if (resolve_att->attachment == VK_ATTACHMENT_UNUSED)
+					continue;
+
+				subpass->has_resolve = true;
+			}
+		}
+	}
+}
+
+static unsigned
+radv_num_subpass_attachments(const VkSubpassDescription *desc)
+{
+	return desc->inputAttachmentCount +
+	       desc->colorAttachmentCount +
+	       (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+	       (desc->pDepthStencilAttachment != NULL);
+}
+
 VkResult radv_CreateRenderPass(
 	VkDevice                                    _device,
 	const VkRenderPassCreateInfo*               pCreateInfo,
@@ -82,13 +192,8 @@ VkResult radv_CreateRenderPass(
 	uint32_t subpass_attachment_count = 0;
 	struct radv_subpass_attachment *p;
 	for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
-		const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
-
 		subpass_attachment_count +=
-			desc->inputAttachmentCount +
-			desc->colorAttachmentCount +
-			(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
-			(desc->pDepthStencilAttachment != NULL);
+			radv_num_subpass_attachments(&pCreateInfo->pSubpasses[i]);
 	}

 	if (subpass_attachment_count) {
@@ -106,11 +211,13 @@ VkResult radv_CreateRenderPass(
 	p = pass->subpass_attachments;
 	for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
 		const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
-		uint32_t color_sample_count = 1, depth_sample_count = 1;
 		struct radv_subpass *subpass = &pass->subpasses[i];

 		subpass->input_count = desc->inputAttachmentCount;
 		subpass->color_count = desc->colorAttachmentCount;
+		subpass->attachment_count = radv_num_subpass_attachments(desc);
+		subpass->attachments = p;
+
 		if (multiview_info)
 			subpass->view_mask = multiview_info->pViewMasks[i];

@@ -123,8 +230,6 @@ VkResult radv_CreateRenderPass(
 					.attachment = desc->pInputAttachments[j].attachment,
 					.layout = desc->pInputAttachments[j].layout,
 				};
-				if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
-					pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask;
 			}
 		}

@@ -137,76 +242,61 @@ VkResult radv_CreateRenderPass(
 					.attachment = desc->pColorAttachments[j].attachment,
 					.layout = desc->pColorAttachments[j].layout,
 				};
-				if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) {
-					pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask;
-					color_sample_count = pCreateInfo->pAttachments[desc->pColorAttachments[j].attachment].samples;
-				}
 			}
 		}

-		subpass->has_resolve = false;
 		if (desc->pResolveAttachments) {
 			subpass->resolve_attachments = p;
 			p += desc->colorAttachmentCount;

 			for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-				uint32_t a = desc->pResolveAttachments[j].attachment;
 				subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
 					.attachment = desc->pResolveAttachments[j].attachment,
 					.layout = desc->pResolveAttachments[j].layout,
 				};
-				if (a != VK_ATTACHMENT_UNUSED) {
-					subpass->has_resolve = true;
-					pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask;
-				}
 			}
 		}

 		if (desc->pDepthStencilAttachment) {
-			subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
+			subpass->depth_stencil_attachment = p++;
+
+			*subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
 				.attachment = desc->pDepthStencilAttachment->attachment,
 				.layout = desc->pDepthStencilAttachment->layout,
 			};
-			if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
-				pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask;
-				depth_sample_count = pCreateInfo->pAttachments[desc->pDepthStencilAttachment->attachment].samples;
-			}
-		} else {
-			subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
 		}
-
-		subpass->max_sample_count = MAX2(color_sample_count,
-						 depth_sample_count);
 	}

 	for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
-		uint32_t src = pCreateInfo->pDependencies[i].srcSubpass;
-		uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass;
-
-		/* Ignore subpass self-dependencies as they allow the app to
-		 * call vkCmdPipelineBarrier() inside the render pass and the
-		 * driver should only do the barrier when called, not when
-		 * starting the render pass.
-		 */
-		if (src == dst)
-			continue;
-
-		if (dst == VK_SUBPASS_EXTERNAL) {
-			pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
-			pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
-			pass->end_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask;
-		} else {
-			pass->subpasses[dst].start_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
-			pass->subpasses[dst].start_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
-			pass->subpasses[dst].start_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask;
-		}
+		/* Convert to a Dependency2KHR */
+		struct VkSubpassDependency2KHR dep2 = {
+			.srcSubpass       = pCreateInfo->pDependencies[i].srcSubpass,
+			.dstSubpass       = pCreateInfo->pDependencies[i].dstSubpass,
+			.srcStageMask     = pCreateInfo->pDependencies[i].srcStageMask,
+			.dstStageMask     = pCreateInfo->pDependencies[i].dstStageMask,
+			.srcAccessMask    = pCreateInfo->pDependencies[i].srcAccessMask,
+			.dstAccessMask    = pCreateInfo->pDependencies[i].dstAccessMask,
+			.dependencyFlags  = pCreateInfo->pDependencies[i].dependencyFlags,
+		};
+		radv_render_pass_add_subpass_dep(pass, &dep2);
 	}

+	radv_render_pass_compile(pass);
+
 	*pRenderPass = radv_render_pass_to_handle(pass);

 	return VK_SUCCESS;
 }

+static unsigned
+radv_num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
+{
+	return desc->inputAttachmentCount +
+	       desc->colorAttachmentCount +
+	       (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
+	       (desc->pDepthStencilAttachment != NULL);
+}
+
 VkResult radv_CreateRenderPass2KHR(
    VkDevice                                    _device,
    const VkRenderPassCreateInfo2KHR*           pCreateInfo,
@@ -250,13 +340,8 @@ VkResult radv_CreateRenderPass2KHR(
 	uint32_t subpass_attachment_count = 0;
 	struct radv_subpass_attachment *p;
 	for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
-		const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
-
 		subpass_attachment_count +=
-			desc->inputAttachmentCount +
-			desc->colorAttachmentCount +
-			(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
-			(desc->pDepthStencilAttachment != NULL);
+			radv_num_subpass_attachments2(&pCreateInfo->pSubpasses[i]);
 	}

 	if (subpass_attachment_count) {
@@ -274,11 +359,12 @@ VkResult radv_CreateRenderPass2KHR(
 	p = pass->subpass_attachments;
 	for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
 		const VkSubpassDescription2KHR *desc = &pCreateInfo->pSubpasses[i];
-		uint32_t color_sample_count = 1, depth_sample_count = 1;
 		struct radv_subpass *subpass = &pass->subpasses[i];

 		subpass->input_count = desc->inputAttachmentCount;
 		subpass->color_count = desc->colorAttachmentCount;
+		subpass->attachment_count = radv_num_subpass_attachments2(desc);
+		subpass->attachments = p;
 		subpass->view_mask = desc->viewMask;

 		if (desc->inputAttachmentCount > 0) {
@@ -290,8 +376,6 @@ VkResult radv_CreateRenderPass2KHR(
 					.attachment = desc->pInputAttachments[j].attachment,
 					.layout = desc->pInputAttachments[j].layout,
 				};
-				if (desc->pInputAttachments[j].attachment != VK_ATTACHMENT_UNUSED)
-					pass->attachments[desc->pInputAttachments[j].attachment].view_mask |= subpass->view_mask;
 			}
 		}

@@ -304,71 +388,38 @@ VkResult radv_CreateRenderPass2KHR(
 					.attachment = desc->pColorAttachments[j].attachment,
 					.layout = desc->pColorAttachments[j].layout,
 				};
-				if (desc->pColorAttachments[j].attachment != VK_ATTACHMENT_UNUSED) {
-					pass->attachments[desc->pColorAttachments[j].attachment].view_mask |= subpass->view_mask;
-					color_sample_count = pCreateInfo->pAttachments[desc->pColorAttachments[j].attachment].samples;
-				}
 			}
 		}

-		subpass->has_resolve = false;
 		if (desc->pResolveAttachments) {
 			subpass->resolve_attachments = p;
 			p += desc->colorAttachmentCount;

 			for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
-				uint32_t a = desc->pResolveAttachments[j].attachment;
 				subpass->resolve_attachments[j] = (struct radv_subpass_attachment) {
 					.attachment = desc->pResolveAttachments[j].attachment,
 					.layout = desc->pResolveAttachments[j].layout,
 				};
-				if (a != VK_ATTACHMENT_UNUSED) {
-					subpass->has_resolve = true;
-					pass->attachments[desc->pResolveAttachments[j].attachment].view_mask |= subpass->view_mask;
-				}
 			}
 		}

 		if (desc->pDepthStencilAttachment) {
-			subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
+			subpass->depth_stencil_attachment = p++;
+
+			*subpass->depth_stencil_attachment = (struct radv_subpass_attachment) {
 				.attachment = desc->pDepthStencilAttachment->attachment,
 				.layout = desc->pDepthStencilAttachment->layout,
 			};
-			if (desc->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
-				pass->attachments[desc->pDepthStencilAttachment->attachment].view_mask |= subpass->view_mask;
-				depth_sample_count = pCreateInfo->pAttachments[desc->pDepthStencilAttachment->attachment].samples;
-			}
-		} else {
-			subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
 		}
-
-		subpass->max_sample_count = MAX2(color_sample_count,
-						 depth_sample_count);
 	}

 	for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
-		uint32_t src = pCreateInfo->pDependencies[i].srcSubpass;
-		uint32_t dst = pCreateInfo->pDependencies[i].dstSubpass;
-
-		/* Ignore subpass self-dependencies as they allow the app to
-		 * call vkCmdPipelineBarrier() inside the render pass and the
-		 * driver should only do the barrier when called, not when
-		 * starting the render pass.
-		 */
-		if (src == dst)
-			continue;
-
-		if (dst == VK_SUBPASS_EXTERNAL) {
-			pass->end_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
-			pass->end_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
-			pass->end_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask;
-		} else {
-			pass->subpasses[dst].start_barrier.src_stage_mask = pCreateInfo->pDependencies[i].srcStageMask;
-			pass->subpasses[dst].start_barrier.src_access_mask = pCreateInfo->pDependencies[i].srcAccessMask;
-			pass->subpasses[dst].start_barrier.dst_access_mask = pCreateInfo->pDependencies[i].dstAccessMask;
-		}
+		radv_render_pass_add_subpass_dep(pass,
+						 &pCreateInfo->pDependencies[i]);
 	}

+	radv_render_pass_compile(pass);
+
 	*pRenderPass = radv_render_pass_to_handle(pass);

 	return VK_SUCCESS;
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -975,11 +975,11 @@ radv_pipeline_out_of_order_rast(struct radv_pipeline *pipeline,
 	};

 	if (pCreateInfo->pDepthStencilState &&
-	    subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+	    subpass->depth_stencil_attachment) {
 		const VkPipelineDepthStencilStateCreateInfo *vkds =
 			pCreateInfo->pDepthStencilState;
 		struct radv_render_pass_attachment *attachment =
-			pass->attachments + subpass->depth_stencil_attachment.attachment;
+			pass->attachments + subpass->depth_stencil_attachment->attachment;
 		bool has_stencil = vk_format_is_stencil(attachment->format);
 		struct radv_dsa_order_invariance order_invariance[2];
 		struct radv_shader_variant *ps =
@@ -1384,15 +1384,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 	 *    disabled or if the subpass of the render pass the pipeline is
 	 *    created against does not use any color attachments.
 	 */
-	bool uses_color_att = false;
-	for (unsigned i = 0; i < subpass->color_count; ++i) {
-		if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
-			uses_color_att = true;
-			break;
-		}
-	}
-
-	if (uses_color_att && states & RADV_DYNAMIC_BLEND_CONSTANTS) {
+	if (subpass->has_color_att && states & RADV_DYNAMIC_BLEND_CONSTANTS) {
 		assert(pCreateInfo->pColorBlendState);
 		typed_memcpy(dynamic->blend_constants,
 			     pCreateInfo->pColorBlendState->blendConstants, 4);
@@ -1410,8 +1402,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
 	 *    disabled or if the subpass of the render pass the pipeline is created
 	 *    against does not use a depth/stencil attachment.
 	 */
-	if (needed_states &&
-	    subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
+	if (needed_states && subpass->depth_stencil_attachment) {
 		assert(pCreateInfo->pDepthStencilState);

 		if (states & RADV_DYNAMIC_DEPTH_BOUNDS) {
@@ -1445,13 +1436,11 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,

 	const  VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
 			vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
-	if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
+	if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
 		dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
-		if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
-			typed_memcpy(dynamic->discard_rectangle.rectangles,
-			             discard_rectangle_info->pDiscardRectangles,
-			             discard_rectangle_info->discardRectangleCount);
-		}
+		typed_memcpy(dynamic->discard_rectangle.rectangles,
+		             discard_rectangle_info->pDiscardRectangles,
+		             discard_rectangle_info->discardRectangleCount);
 	}

 	pipeline->dynamic_state.mask = states;
@@ -1894,13 +1883,27 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
 	}

 	for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
-		unsigned location = input_state->pVertexAttributeDescriptions[i].location;
-		unsigned binding = input_state->pVertexAttributeDescriptions[i].binding;
+		const VkVertexInputAttributeDescription *desc =
+			&input_state->pVertexAttributeDescriptions[i];
+		const struct vk_format_description *format_desc;
+		unsigned location = desc->location;
+		unsigned binding = desc->binding;
+		unsigned num_format, data_format;
+		int first_non_void;
+
 		if (binding_input_rate & (1u << binding)) {
 			key.instance_rate_inputs |= 1u << location;
 			key.instance_rate_divisors[location] = instance_rate_divisors[binding];
 		}

+		format_desc = vk_format_description(desc->format);
+		first_non_void = vk_format_get_first_non_void_channel(desc->format);
+
+		num_format = radv_translate_buffer_numformat(format_desc, first_non_void);
+		data_format = radv_translate_buffer_dataformat(format_desc, first_non_void);
+
+		key.vertex_attribute_formats[location] = data_format | (num_format << 4);
+
 		if (pipeline->device->physical_device->rad_info.chip_class <= VI &&
 		    pipeline->device->physical_device->rad_info.family != CHIP_STONEY) {
 			VkFormat format = input_state->pVertexAttributeDescriptions[i].format;
@@ -1924,8 +1927,6 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
 			}
 			key.vertex_alpha_adjust |= adjust << (2 * location);
 		}
-
-		key.vertex_attribute_provided |= 1 << location;
 	}

 	if (pCreateInfo->pTessellationState)
@@ -1954,9 +1955,10 @@ radv_fill_shader_keys(struct radv_shader_variant_key *keys,
 {
 	keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
 	keys[MESA_SHADER_VERTEX].vs.alpha_adjust = key->vertex_alpha_adjust;
-	keys[MESA_SHADER_VERTEX].vs.vertex_attribute_provided = key->vertex_attribute_provided;
-	for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i)
+	for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i) {
 		keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
+		keys[MESA_SHADER_VERTEX].vs.vertex_attribute_formats[i] = key->vertex_attribute_formats[i];
+	}

 	if (nir[MESA_SHADER_TESS_CTRL]) {
 		keys[MESA_SHADER_VERTEX].vs.as_ls = true;
@@ -2520,8 +2522,8 @@ radv_compute_bin_size(struct radv_pipeline *pipeline, const VkGraphicsPipelineCr

 	extent = color_entry->extent;

-	if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
-		struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
+	if (subpass->depth_stencil_attachment) {
+		struct radv_render_pass_attachment *attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;

 		/* Coefficients taken from AMDVLK */
 		unsigned depth_coeff = vk_format_is_depth(attachment->format) ? 5 : 0;
@@ -2612,8 +2614,8 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
 	uint32_t db_render_control = 0, db_render_override2 = 0;
 	uint32_t db_render_override = 0;

-	if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED)
-		attachment = pass->attachments + subpass->depth_stencil_attachment.attachment;
+	if (subpass->depth_stencil_attachment)
+		attachment = pass->attachments + subpass->depth_stencil_attachment->attachment;

 	bool has_depth_attachment = attachment && vk_format_is_depth(attachment->format);
 	bool has_stencil_attachment = attachment && vk_format_is_stencil(attachment->format);
@@ -2655,8 +2657,7 @@ radv_pipeline_generate_depth_stencil_state(struct radeon_cmdbuf *ctx_cs,
 	db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
 			      S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE);

-	if (pipeline->device->enabled_extensions.EXT_depth_range_unrestricted &&
-	    !pCreateInfo->pRasterizationState->depthClampEnable &&
+	if (!pCreateInfo->pRasterizationState->depthClampEnable &&
 	    ps->info.info.ps.writes_z) {
 		/* From VK_EXT_depth_range_unrestricted spec:
 		 *
@@ -2725,11 +2726,18 @@ radv_pipeline_generate_raster_state(struct radeon_cmdbuf *ctx_cs,
 	const VkConservativeRasterizationModeEXT mode =
 		radv_get_conservative_raster_mode(vkraster);
 	uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1);
+	bool depth_clip_disable = vkraster->depthClampEnable;
+
+	const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state =
+		vk_find_struct_const(vkraster->pNext, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);
+	if (depth_clip_state) {
+		depth_clip_disable = !depth_clip_state->depthClipEnable;
+	}

 	radeon_set_context_reg(ctx_cs, R_028810_PA_CL_CLIP_CNTL,
 	                       S_028810_DX_CLIP_SPACE_DEF(1) | // vulkan uses DX conventions.
-	                       S_028810_ZCLIP_NEAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
-	                       S_028810_ZCLIP_FAR_DISABLE(vkraster->depthClampEnable ? 1 : 0) |
+	                       S_028810_ZCLIP_NEAR_DISABLE(depth_clip_disable ? 1 : 0) |
+	                       S_028810_ZCLIP_FAR_DISABLE(depth_clip_disable ? 1 : 0) |
 	                       S_028810_DX_RASTERIZATION_KILL(vkraster->rasterizerDiscardEnable ? 1 : 0) |
 	                       S_028810_DX_LINEAR_ATTR_CLIP_ENA(1));

@@ -3202,7 +3210,6 @@ radv_compute_db_shader_control(const struct radv_device *device,
 			       const struct radv_pipeline *pipeline,
                               const struct radv_shader_variant *ps)
 {
-	const struct radv_multisample_state *ms = &pipeline->graphics.ms;
 	unsigned z_order;
 	if (ps->info.fs.early_fragment_test || !ps->info.info.ps.writes_memory)
 		z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
@@ -3572,8 +3579,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 		   struct radv_device *device,
 		   struct radv_pipeline_cache *cache,
 		   const VkGraphicsPipelineCreateInfo *pCreateInfo,
-		   const struct radv_graphics_pipeline_create_info *extra,
-		   const VkAllocationCallbacks *alloc)
+		   const struct radv_graphics_pipeline_create_info *extra)
 {
 	VkResult result;
 	bool has_view_index = false;
@@ -3582,8 +3588,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
 	if (subpass->view_mask)
 		has_view_index = true;
-	if (alloc == NULL)
-		alloc = &device->alloc;

 	pipeline->device = device;
 	pipeline->layout = radv_pipeline_layout_from_handle(pCreateInfo->layout);
@@ -3711,7 +3715,7 @@ radv_graphics_pipeline_create(
 		return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);

 	result = radv_pipeline_init(pipeline, device, cache,
-				    pCreateInfo, extra, pAllocator);
+				    pCreateInfo, extra);
 	if (result != VK_SUCCESS) {
 		radv_pipeline_destroy(device, pipeline, pAllocator);
 		return result;
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -365,7 +365,7 @@ struct radv_pipeline_cache {
 struct radv_pipeline_key {
 	uint32_t instance_rate_inputs;
 	uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
-	uint32_t vertex_attribute_provided;
+	uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
 	uint64_t vertex_alpha_adjust;
 	unsigned tess_input_vertices;
 	uint32_t col_format;
@@ -1148,7 +1148,6 @@ void si_write_scissors(struct radeon_cmdbuf *cs, int first,
 		       const VkViewport *viewports, bool can_use_guardband);
 uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 				   bool instanced_draw, bool indirect_draw,
-				   bool count_from_stream_output,
 				   uint32_t draw_vertex_count);
 void si_cs_emit_write_event_eop(struct radeon_cmdbuf *cs,
 				enum chip_class chip_class,
@@ -1188,8 +1187,7 @@ radv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer,
 			     void **ptr);
 void
 radv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer,
-			    const struct radv_subpass *subpass,
-			    bool transitions);
+			    const struct radv_subpass *subpass);
 bool
 radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer,
 			    unsigned size, unsigned alignmnet,
@@ -1822,16 +1820,22 @@ struct radv_subpass_attachment {
 };

 struct radv_subpass {
+	uint32_t                                     attachment_count;
+	struct radv_subpass_attachment *             attachments;
+
 	uint32_t                                     input_count;
 	uint32_t                                     color_count;
 	struct radv_subpass_attachment *             input_attachments;
 	struct radv_subpass_attachment *             color_attachments;
 	struct radv_subpass_attachment *             resolve_attachments;
-	struct radv_subpass_attachment               depth_stencil_attachment;
+	struct radv_subpass_attachment *             depth_stencil_attachment;

 	/** Subpass has at least one resolve attachment */
 	bool                                         has_resolve;

+	/** Subpass has at least one color attachment */
+	bool                                         has_color_att;
+
 	struct radv_subpass_barrier                  start_barrier;

 	uint32_t                                     view_mask;
@@ -1845,7 +1849,9 @@ struct radv_render_pass_attachment {
 	VkAttachmentLoadOp                           stencil_load_op;
 	VkImageLayout                                initial_layout;
 	VkImageLayout                                final_layout;
-	uint32_t                                     view_mask;
+
+	/* The subpass id in which the attachment will be used last. */
+	uint32_t                                     last_subpass_idx;
 };

 struct radv_render_pass {
@@ -1940,6 +1946,9 @@ void radv_compile_nir_shader(struct ac_llvm_compiler *ac_llvm,
 			     int nir_count,
 			     const struct radv_nir_compiler_options *options);

+unsigned radv_nir_get_max_workgroup_size(enum chip_class chip_class,
+					 const struct nir_shader *nir);
+
 /* radv_shader_info.h */
 struct radv_shader_info;

@@ -1947,6 +1956,8 @@ void radv_nir_shader_info_pass(const struct nir_shader *nir,
 			       const struct radv_nir_compiler_options *options,
 			       struct radv_shader_info *info);

+void radv_nir_shader_info_init(struct radv_shader_info *info);
+
 struct radeon_winsys_sem;

 #define RADV_DEFINE_HANDLE_CASTS(__radv_type, __VkType)		\
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -159,7 +159,7 @@ radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively,
                NIR_PASS(progress, shader, nir_opt_if);
                NIR_PASS(progress, shader, nir_opt_dead_cf);
                NIR_PASS(progress, shader, nir_opt_cse);
-                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true);
+                NIR_PASS(progress, shader, nir_opt_peephole_select, 8, true, true);
                NIR_PASS(progress, shader, nir_opt_algebraic);
                NIR_PASS(progress, shader, nir_opt_constant_folding);
                NIR_PASS(progress, shader, nir_opt_undef);
@@ -222,8 +222,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
 			.lower_ubo_ssbo_access_to_offsets = true,
 			.caps = {
 				.descriptor_array_dynamic_indexing = true,
-				.descriptor_array_non_uniform_indexing = true,
-				.descriptor_indexing = true,
 				.device_group = true,
 				.draw_parameters = true,
 				.float64 = true,
@@ -234,6 +232,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 				.int16 = true,
 				.int64 = true,
 				.multiview = true,
+				.physical_storage_buffer_address = true,
 				.runtime_descriptor_array = true,
 				.shader_viewport_index_layer = true,
 				.stencil_export = true,
@@ -252,6 +251,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 			},
 			.ubo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
 			.ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT, 2),
+			.phys_ssbo_ptr_type = glsl_vector_type(GLSL_TYPE_UINT64, 1),
 			.push_const_ptr_type = glsl_uint_type(),
 			.shared_ptr_type = glsl_uint_type(),
 		};
@@ -735,7 +735,8 @@ generate_shader_stats(struct radv_device *device,
 		      gl_shader_stage stage,
 		      struct _mesa_string_buffer *buf)
 {
-	unsigned lds_increment = device->physical_device->rad_info.chip_class >= CIK ? 512 : 256;
+	enum chip_class chip_class = device->physical_device->rad_info.chip_class;
+	unsigned lds_increment = chip_class >= CIK ? 512 : 256;
 	struct ac_shader_config *conf;
 	unsigned max_simd_waves;
 	unsigned lds_per_wave = 0;
@@ -748,12 +749,17 @@ generate_shader_stats(struct radv_device *device,
 		lds_per_wave = conf->lds_size * lds_increment +
 			       align(variant->info.fs.num_interp * 48,
 				     lds_increment);
+	} else if (stage == MESA_SHADER_COMPUTE) {
+		unsigned max_workgroup_size =
+				radv_nir_get_max_workgroup_size(chip_class, variant->nir);
+		lds_per_wave = (conf->lds_size * lds_increment) /
+			       DIV_ROUND_UP(max_workgroup_size, 64);
 	}

 	if (conf->num_sgprs)
 		max_simd_waves =
 			MIN2(max_simd_waves,
-			     radv_get_num_physical_sgprs(device->physical_device) / conf->num_sgprs);
+			     ac_get_num_physical_sgprs(chip_class) / conf->num_sgprs);

 	if (conf->num_vgprs)
 		max_simd_waves =
@@ -838,7 +844,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
 			VkShaderStatisticsInfoAMD statistics = {};
 			statistics.shaderStageMask = shaderStage;
 			statistics.numPhysicalVgprs = RADV_NUM_PHYSICAL_VGPRS;
-			statistics.numPhysicalSgprs = radv_get_num_physical_sgprs(device->physical_device);
+			statistics.numPhysicalSgprs = ac_get_num_physical_sgprs(device->physical_device->rad_info.chip_class);
 			statistics.numAvailableSgprs = statistics.numPhysicalSgprs;

 			if (stage == MESA_SHADER_COMPUTE) {
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -65,9 +65,7 @@ enum {
 struct radv_vs_variant_key {
 	uint32_t instance_rate_inputs;
 	uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
-
-	/* Mask of vertex attributes that are provided by the pipeline. */
-	uint32_t vertex_attribute_provided;
+	uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];

 	/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
 	 * so we may need to fix it up. */
@@ -132,10 +130,11 @@ struct radv_nir_compiler_options {
 enum radv_ud_index {
 	AC_UD_SCRATCH_RING_OFFSETS = 0,
 	AC_UD_PUSH_CONSTANTS = 1,
-	AC_UD_INDIRECT_DESCRIPTOR_SETS = 2,
-	AC_UD_VIEW_INDEX = 3,
-	AC_UD_STREAMOUT_BUFFERS = 4,
-	AC_UD_SHADER_START = 5,
+	AC_UD_INLINE_PUSH_CONSTANTS = 2,
+	AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
+	AC_UD_VIEW_INDEX = 4,
+	AC_UD_STREAMOUT_BUFFERS = 5,
+	AC_UD_SHADER_START = 6,
 	AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
 	AC_UD_VS_BASE_VERTEX_START_INSTANCE,
 	AC_UD_VS_MAX_UD,
@@ -165,6 +164,13 @@ struct radv_streamout_info {

 struct radv_shader_info {
 	bool loads_push_constants;
+	bool loads_dynamic_offsets;
+	uint8_t min_push_constant_used;
+	uint8_t max_push_constant_used;
+	bool has_only_32bit_push_constants;
+	bool has_indirect_push_constants;
+	uint8_t num_inline_push_consts;
+	uint8_t base_inline_push_consts;
 	uint32_t desc_set_used_mask;
 	bool needs_multiview_view_index;
 	bool uses_invocation_id;
@@ -413,10 +419,4 @@ static inline unsigned shader_io_get_unique_index(gl_varying_slot slot)
 	unreachable("illegal slot in get unique index\n");
 }

-static inline uint32_t
-radv_get_num_physical_sgprs(struct radv_physical_device *physical_device)
-{
-	return physical_device->rad_info.chip_class >= VI ? 800 : 512;
-}
-
 #endif
--- a/src/amd/vulkan/radv_shader_info.c
+++ b/src/amd/vulkan/radv_shader_info.c
@@ -115,15 +115,6 @@ gather_intrinsic_load_deref_info(const nir_shader *nir,
 	}
 }

-static uint32_t
-widen_writemask(uint32_t wrmask)
-{
-	uint32_t new_wrmask = 0;
-	for(unsigned i = 0; i < 4; i++)
-		new_wrmask |= (wrmask & (1 << i) ? 0x3 : 0x0) << (i * 2);
-	return new_wrmask;
-}
-
 static void
 set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
 		      uint8_t *output_usage_mask)
@@ -131,7 +122,7 @@ set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
 	nir_deref_instr *deref_instr =
 		nir_instr_as_deref(instr->src[0].ssa->parent_instr);
 	nir_variable *var = nir_deref_instr_get_variable(deref_instr);
-	unsigned attrib_count = glsl_count_attribute_slots(deref_instr->type, false);
+	unsigned attrib_count = glsl_count_attribute_slots(var->type, false);
 	unsigned idx = var->data.location;
 	unsigned comp = var->data.location_frac;
 	unsigned const_offset = 0;
@@ -139,19 +130,15 @@ set_output_usage_mask(const nir_shader *nir, const nir_intrinsic_instr *instr,
 	get_deref_offset(deref_instr, &const_offset);

 	if (var->data.compact) {
-		assert(!glsl_type_is_64bit(deref_instr->type));
 		const_offset += comp;
 		output_usage_mask[idx + const_offset / 4] |= 1 << (const_offset % 4);
 		return;
 	}

-	uint32_t wrmask = nir_intrinsic_write_mask(instr);
-	if (glsl_type_is_64bit(deref_instr->type))
-		wrmask = widen_writemask(wrmask);
-
-	for (unsigned i = 0; i < attrib_count; i++)
+	for (unsigned i = 0; i < attrib_count; i++) {
 		output_usage_mask[idx + i + const_offset] |=
-			((wrmask >> (i * 4)) & 0xf) << comp;
+			instr->const_index[0] << comp;
+	}
 }

 static void
@@ -197,6 +184,32 @@ gather_intrinsic_store_deref_info(const nir_shader *nir,
 	}
 }

+static void
+gather_push_constant_info(const nir_shader *nir,
+			  const nir_intrinsic_instr *instr,
+			  struct radv_shader_info *info)
+{
+	nir_const_value *cval = nir_src_as_const_value(instr->src[0]);
+	int base = nir_intrinsic_base(instr);
+
+	if (!cval) {
+		info->has_indirect_push_constants = true;
+	} else {
+		uint32_t min = base + cval->u32[0];
+		uint32_t max = min + instr->num_components * 4;
+
+		info->max_push_constant_used =
+			MAX2(max, info->max_push_constant_used);
+		info->min_push_constant_used =
+			MIN2(min, info->min_push_constant_used);
+	}
+
+	if (instr->dest.ssa.bit_size != 32)
+		info->has_only_32bit_push_constants = false;
+
+	info->loads_push_constants = true;
+}
+
 static void
 gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
 		      struct radv_shader_info *info)
@@ -250,7 +263,7 @@ gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
 		info->uses_prim_id = true;
 		break;
 	case nir_intrinsic_load_push_constant:
-		info->loads_push_constants = true;
+		gather_push_constant_info(nir, instr, info);
 		break;
 	case nir_intrinsic_vulkan_resource_index:
 		info->desc_set_used_mask |= (1 << nir_intrinsic_desc_set(instr));
@@ -512,6 +525,14 @@ gather_xfb_info(const nir_shader *nir, struct radv_shader_info *info)
 	ralloc_free(xfb);
 }

+void
+radv_nir_shader_info_init(struct radv_shader_info *info)
+{
+	/* Assume that shaders only have 32-bit push constants by default. */
+	info->min_push_constant_used = UINT8_MAX;
+	info->has_only_32bit_push_constants = true;
+}
+
 void
 radv_nir_shader_info_pass(const struct nir_shader *nir,
 			  const struct radv_nir_compiler_options *options,
@@ -523,6 +544,7 @@ radv_nir_shader_info_pass(const struct nir_shader *nir,
 	if (options->layout && options->layout->dynamic_offset_count &&
 	    (options->layout->dynamic_shader_stages & mesa_to_vk_shader_stage(nir->info.stage))) {
 		info->loads_push_constants = true;
+		info->loads_dynamic_offsets = true;
 	}

 	nir_foreach_variable(variable, &nir->inputs)
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -561,7 +561,6 @@ radv_prims_for_vertices(struct radv_prim_vertex_count *info, unsigned num)
 uint32_t
 si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 			  bool instanced_draw, bool indirect_draw,
-			  bool count_from_stream_output,
 			  uint32_t draw_vertex_count)
 {
 	enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
@@ -623,12 +622,6 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 		    (instanced_draw || indirect_draw))
 			partial_vs_wave = true;

-		/* Hardware requirement when drawing primitives from a stream
-		 * output buffer.
-		 */
-		if (count_from_stream_output)
-			wd_switch_on_eop = true;
-
 		/* If the WD switch is false, the IA switch must be false too. */
 		assert(wd_switch_on_eop || !ia_switch_on_eop);
 	}
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys_public.h
@@ -29,13 +29,6 @@
 #ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
 #define RADV_AMDGPU_WINSYS_PUBLIC_H

-/* The number of IBs per submit isn't infinite, it depends on the ring type
- * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
- * This limit is arbitrary but should be safe for now.  Ideally, we should get
- * this limit from the KMD.
-*/
-#define RADV_MAX_IBS_PER_SUBMIT 192
-
 struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
 						uint64_t perftest_flags);

--- a/src/broadcom/cle/v3d_decoder.c
+++ b/src/broadcom/cle/v3d_decoder.c
@@ -651,7 +651,8 @@ v3d_spec_load(const struct v3d_device_info *devinfo)
        struct parser_context ctx;
        void *buf;
        uint8_t *text_data = NULL;
-        uint32_t text_offset = 0, text_length = 0, total_length;
+        uint32_t text_offset = 0, text_length = 0;
+        MAYBE_UNUSED uint32_t total_length;

        for (int i = 0; i < ARRAY_SIZE(genxml_files_table); i++) {
                if (i != 0) {
--- a/src/broadcom/cle/v3d_packet_v33.xml
+++ b/src/broadcom/cle/v3d_packet_v33.xml
@@ -820,8 +820,8 @@

  <packet code="120" name="Tile Binning Mode Cfg" min_ver="41">

-    <field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
-    <field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
+    <field name="Height (in pixels)" size="12" start="48" type="uint" minus_one="true"/>
+    <field name="Width (in pixels)" size="12" start="32" type="uint" minus_one="true"/>

    <field name="Double-buffer in non-ms mode" size="1" start="15" type="bool"/>
    <field name="Multisample Mode (4x)" size="1" start="14" type="bool"/>
--- a/src/broadcom/common/v3d_limits.h
+++ b/src/broadcom/common/v3d_limits.h
@@ -32,8 +32,7 @@
 */
 #define V3D_MAX_TEXTURE_SAMPLERS 16

-/* The HW can do 16384 (15), but we run into hangs when we expose that. */
-#define V3D_MAX_MIP_LEVELS 13
+#define V3D_MAX_MIP_LEVELS 12

 #define V3D_MAX_SAMPLES 4

--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -283,8 +283,10 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                           instr->num_components - 2);
        }

-        if (c->execute.file != QFILE_NULL)
-                vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+        if (vir_in_nonuniform_control_flow(c)) {
+                vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+                           V3D_QPU_PF_PUSHZ);
+        }

        struct qreg dest;
        if (config == ~0)
@@ -307,7 +309,7 @@ ntq_emit_tmu_general(struct v3d_compile *c, nir_intrinsic_instr *instr,
                        vir_uniform_ui(c, config);
        }

-        if (c->execute.file != QFILE_NULL)
+        if (vir_in_nonuniform_control_flow(c))
                vir_set_cond(tmu, V3D_QPU_COND_IFA);

        vir_emit_thrsw(c);
@@ -392,13 +394,14 @@ ntq_store_dest(struct v3d_compile *c, nir_dest *dest, int chan,
                /* If we're in control flow, then make this update of the reg
                 * conditional on the execution mask.
                 */
-                if (c->execute.file != QFILE_NULL) {
+                if (vir_in_nonuniform_control_flow(c)) {
                        last_inst->dst.index = qregs[chan].index;

                        /* Set the flags to the current exec mask.
                         */
                        c->cursor = vir_before_inst(last_inst);
-                        vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+                        vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+                                   V3D_QPU_PF_PUSHZ);
                        c->cursor = vir_after_inst(last_inst);

                        vir_set_cond(last_inst, V3D_QPU_COND_IFA);
@@ -540,26 +543,13 @@ ntq_fsign(struct v3d_compile *c, struct qreg src)
        struct qreg t = vir_get_temp(c);

        vir_MOV_dest(c, t, vir_uniform_f(c, 0.0));
-        vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHZ);
+        vir_set_pf(vir_FMOV_dest(c, vir_nop_reg(), src), V3D_QPU_PF_PUSHZ);
        vir_MOV_cond(c, V3D_QPU_COND_IFNA, t, vir_uniform_f(c, 1.0));
-        vir_PF(c, vir_FMOV(c, src), V3D_QPU_PF_PUSHN);
+        vir_set_pf(vir_FMOV_dest(c, vir_nop_reg(), src), V3D_QPU_PF_PUSHN);
        vir_MOV_cond(c, V3D_QPU_COND_IFA, t, vir_uniform_f(c, -1.0));
        return vir_MOV(c, t);
 }

-static struct qreg
-ntq_isign(struct v3d_compile *c, struct qreg src)
-{
-        struct qreg t = vir_get_temp(c);
-
-        vir_MOV_dest(c, t, vir_uniform_ui(c, 0));
-        vir_PF(c, vir_MOV(c, src), V3D_QPU_PF_PUSHZ);
-        vir_MOV_cond(c, V3D_QPU_COND_IFNA, t, vir_uniform_ui(c, 1));
-        vir_PF(c, vir_MOV(c, src), V3D_QPU_PF_PUSHN);
-        vir_MOV_cond(c, V3D_QPU_COND_IFA, t, vir_uniform_ui(c, -1));
-        return vir_MOV(c, t);
-}
-
 static void
 emit_fragcoord_input(struct v3d_compile *c, int attr)
 {
@@ -711,7 +701,7 @@ ntq_emit_comparison(struct v3d_compile *c,
        if (nir_op_infos[compare_instr->op].num_inputs > 1)
                src1 = ntq_get_alu_src(c, compare_instr, 1);
        bool cond_invert = false;
-        struct qreg nop = vir_reg(QFILE_NULL, 0);
+        struct qreg nop = vir_nop_reg();

        switch (compare_instr->op) {
        case nir_op_feq32:
@@ -756,6 +746,16 @@ ntq_emit_comparison(struct v3d_compile *c,
                vir_set_pf(vir_SUB_dest(c, nop, src0, src1), V3D_QPU_PF_PUSHC);
                break;

+        case nir_op_i2b32:
+                vir_set_pf(vir_MOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ);
+                cond_invert = true;
+                break;
+
+        case nir_op_f2b32:
+                vir_set_pf(vir_FMOV_dest(c, nop, src0), V3D_QPU_PF_PUSHZ);
+                cond_invert = true;
+                break;
+
        default:
                return false;
        }
@@ -789,28 +789,24 @@ ntq_get_alu_parent(nir_src src)
        return instr;
 }

-/**
- * Attempts to fold a comparison generating a boolean result into the
- * condition code for selecting between two values, instead of comparing the
- * boolean result against 0 to generate the condition code.
- */
-static struct qreg ntq_emit_bcsel(struct v3d_compile *c, nir_alu_instr *instr,
-                                  struct qreg *src)
+/* Turns a NIR bool into a condition code to predicate on. */
+static enum v3d_qpu_cond
+ntq_emit_bool_to_cond(struct v3d_compile *c, nir_src src)
 {
-        nir_alu_instr *compare = ntq_get_alu_parent(instr->src[0].src);
+        nir_alu_instr *compare = ntq_get_alu_parent(src);
        if (!compare)
                goto out;

        enum v3d_qpu_cond cond;
        if (ntq_emit_comparison(c, compare, &cond))
-                return vir_MOV(c, vir_SEL(c, cond, src[1], src[2]));
+                return cond;

 out:
-        vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
-        return vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA, src[1], src[2]));
+        vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), ntq_get_src(c, src, 0)),
+                   V3D_QPU_PF_PUSHZ);
+        return V3D_QPU_COND_IFNA;
 }

-
 static void
 ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
 {
@@ -889,13 +885,6 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
        case nir_op_b2i32:
                result = vir_AND(c, src[0], vir_uniform_ui(c, 1));
                break;
-        case nir_op_i2b32:
-        case nir_op_f2b32:
-                vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
-                result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
-                                            vir_uniform_ui(c, ~0),
-                                            vir_uniform_ui(c, 0)));
-                break;

        case nir_op_iadd:
                result = vir_ADD(c, src[0], src[1]);
@@ -958,6 +947,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
                break;
        }

+        case nir_op_i2b32:
+        case nir_op_f2b32:
        case nir_op_feq32:
        case nir_op_fne32:
        case nir_op_fge32:
@@ -978,10 +969,15 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
        }

        case nir_op_b32csel:
-                result = ntq_emit_bcsel(c, instr, src);
+                result = vir_MOV(c,
+                                 vir_SEL(c,
+                                         ntq_emit_bool_to_cond(c, instr->src[0].src),
+                                         src[1], src[2]));
                break;
+
        case nir_op_fcsel:
-                vir_PF(c, src[0], V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), src[0]),
+                           V3D_QPU_PF_PUSHZ);
                result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFNA,
                                            src[1], src[2]));
                break;
@@ -1011,9 +1007,6 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
        case nir_op_ftrunc:
                result = vir_FTRUNC(c, src[0]);
                break;
-        case nir_op_ffract:
-                result = vir_FSUB(c, src[0], vir_FFLOOR(c, src[0]));
-                break;

        case nir_op_fsin:
                result = ntq_fsincos(c, src[0], false);
@@ -1025,9 +1018,6 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
        case nir_op_fsign:
                result = ntq_fsign(c, src[0]);
                break;
-        case nir_op_isign:
-                result = ntq_isign(c, src[0]);
-                break;

        case nir_op_fabs: {
                result = vir_FMOV(c, src[0]);
@@ -1036,8 +1026,7 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
        }

        case nir_op_iabs:
-                result = vir_MAX(c, src[0],
-                                vir_SUB(c, vir_uniform_ui(c, 0), src[0]));
+                result = vir_MAX(c, src[0], vir_NEG(c, src[0]));
                break;

        case nir_op_fddx:
@@ -1053,7 +1042,8 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
                break;

        case nir_op_uadd_carry:
-                vir_PF(c, vir_ADD(c, src[0], src[1]), V3D_QPU_PF_PUSHC);
+                vir_set_pf(vir_ADD_dest(c, vir_nop_reg(), src[0], src[1]),
+                           V3D_QPU_PF_PUSHC);
                result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
                                            vir_uniform_ui(c, ~0),
                                            vir_uniform_ui(c, 0)));
@@ -1064,9 +1054,6 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
                break;

        case nir_op_unpack_half_2x16_split_x:
-                /* XXX perf: It would be good to be able to merge this unpack
-                 * with whatever uses our result.
-                 */
                result = vir_FMOV(c, src[0]);
                vir_set_unpack(c->defs[result.index], 0, V3D_QPU_UNPACK_L);
                break;
@@ -1129,8 +1116,8 @@ emit_frag_end(struct v3d_compile *c)
        */

        bool has_any_tlb_color_write = false;
-        for (int rt = 0; rt < c->fs_key->nr_cbufs; rt++) {
-                if (c->output_color_var[rt])
+        for (int rt = 0; rt < V3D_MAX_DRAW_BUFFERS; rt++) {
+                if (c->fs_key->cbufs & (1 << rt) && c->output_color_var[rt])
                        has_any_tlb_color_write = true;
        }

@@ -1138,7 +1125,7 @@ emit_frag_end(struct v3d_compile *c)
                struct nir_variable *var = c->output_color_var[0];
                struct qreg *color = &c->outputs[var->data.driver_location * 4];

-                vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                vir_SETMSF_dest(c, vir_nop_reg(),
                                vir_AND(c,
                                        vir_MSF(c),
                                        vir_FTOC(c, color[3])));
@@ -1175,7 +1162,7 @@ emit_frag_end(struct v3d_compile *c)

                struct qinst *inst = vir_MOV_dest(c,
                                                  vir_reg(QFILE_TLBU, 0),
-                                                  vir_reg(QFILE_NULL, 0));
+                                                  vir_nop_reg());
                uint8_t tlb_specifier = TLB_TYPE_DEPTH;

                if (c->devinfo->ver >= 42) {
@@ -1197,8 +1184,8 @@ emit_frag_end(struct v3d_compile *c)
         * uniform setup
         */

-        for (int rt = 0; rt < c->fs_key->nr_cbufs; rt++) {
-                if (!c->output_color_var[rt])
+        for (int rt = 0; rt < V3D_MAX_DRAW_BUFFERS; rt++) {
+                if (!(c->fs_key->cbufs & (1 << rt)) || !c->output_color_var[rt])
                        continue;

                nir_variable *var = c->output_color_var[rt];
@@ -1458,7 +1445,7 @@ v3d_optimize_nir(struct nir_shader *s)
                NIR_PASS(progress, s, nir_opt_dce);
                NIR_PASS(progress, s, nir_opt_dead_cf);
                NIR_PASS(progress, s, nir_opt_cse);
-                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true);
+                NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
                NIR_PASS(progress, s, nir_opt_algebraic);
                NIR_PASS(progress, s, nir_opt_constant_folding);
                NIR_PASS(progress, s, nir_opt_undef);
@@ -1492,7 +1479,6 @@ ntq_emit_vpm_read(struct v3d_compile *c,

        if (*num_components_queued != 0) {
                (*num_components_queued)--;
-                c->num_inputs++;
                return vir_MOV(c, vpm);
        }

@@ -1502,7 +1488,6 @@ ntq_emit_vpm_read(struct v3d_compile *c,

        *num_components_queued = num_components - 1;
        *remaining -= num_components;
-        c->num_inputs++;

        return vir_MOV(c, vpm);
 }
@@ -1550,6 +1535,12 @@ ntq_setup_vpm_inputs(struct v3d_compile *c)
                                           &num_components, ~0);
        }

+        /* The actual loads will happen directly in nir_intrinsic_load_input
+         * on newer versions.
+         */
+        if (c->devinfo->ver >= 40)
+                return;
+
        for (int loc = 0; loc < ARRAY_SIZE(c->vattr_sizes); loc++) {
                resize_qreg_array(c, &c->inputs, &c->inputs_array_size,
                                  (loc + 1) * 4);
@@ -1855,7 +1846,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                break;

        case nir_intrinsic_load_helper_invocation:
-                vir_PF(c, vir_MSF(c), V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_MSF_dest(c, vir_nop_reg()), V3D_QPU_PF_PUSHZ);
                ntq_store_dest(c, &instr->dest, 0,
                               vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
                                                  vir_uniform_ui(c, ~0),
@@ -1881,12 +1872,43 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                break;

        case nir_intrinsic_load_input:
-                for (int i = 0; i < instr->num_components; i++) {
-                        offset = (nir_intrinsic_base(instr) +
-                                  nir_src_as_uint(instr->src[0]));
-                        int comp = nir_intrinsic_component(instr) + i;
-                        ntq_store_dest(c, &instr->dest, i,
-                                       vir_MOV(c, c->inputs[offset * 4 + comp]));
+                offset = (nir_intrinsic_base(instr) +
+                          nir_src_as_uint(instr->src[0]));
+                if (c->s->info.stage != MESA_SHADER_FRAGMENT &&
+                    c->devinfo->ver >= 40) {
+                        /* Emit the LDVPM directly now, rather than at the top
+                         * of the shader like we did for V3D 3.x (which needs
+                         * vpmsetup when not just taking the next offset).
+                         *
+                         * Note that delaying like this may introduce stalls,
+                         * as LDVPMV takes a minimum of 1 instruction but may
+                         * be slower if the VPM unit is busy with another QPU.
+                         */
+                        int index = 0;
+                        if (c->s->info.system_values_read &
+                            (1ull << SYSTEM_VALUE_INSTANCE_ID)) {
+                                index++;
+                        }
+                        if (c->s->info.system_values_read &
+                            (1ull << SYSTEM_VALUE_VERTEX_ID)) {
+                                index++;
+                        }
+                        for (int i = 0; i < offset; i++)
+                                index += c->vattr_sizes[i];
+                        index += nir_intrinsic_component(instr);
+                        for (int i = 0; i < instr->num_components; i++) {
+                                struct qreg vpm_offset =
+                                        vir_uniform_ui(c, index++);
+                                ntq_store_dest(c, &instr->dest, i,
+                                               vir_LDVPMV_IN(c, vpm_offset));
+                        }
+                } else {
+                        for (int i = 0; i < instr->num_components; i++) {
+                                int comp = nir_intrinsic_component(instr) + i;
+                                ntq_store_dest(c, &instr->dest, i,
+                                               vir_MOV(c, c->inputs[offset * 4 +
+                                                                    comp]));
+                        }
                }
                break;

@@ -1908,38 +1930,35 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
                break;

        case nir_intrinsic_discard:
-                if (c->execute.file != QFILE_NULL) {
-                        vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
-                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                if (vir_in_nonuniform_control_flow(c)) {
+                        vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+                                   V3D_QPU_PF_PUSHZ);
+                        vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(),
                                                     vir_uniform_ui(c, 0)),
                                V3D_QPU_COND_IFA);
                } else {
-                        vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
+                        vir_SETMSF_dest(c, vir_nop_reg(),
                                        vir_uniform_ui(c, 0));
                }
                break;

        case nir_intrinsic_discard_if: {
-                /* true (~0) if we're discarding */
-                struct qreg cond = ntq_get_src(c, instr->src[0], 0);
+                enum v3d_qpu_cond cond = ntq_emit_bool_to_cond(c, instr->src[0]);

-                if (c->execute.file != QFILE_NULL) {
-                        /* execute == 0 means the channel is active.  Invert
-                         * the condition so that we can use zero as "executing
-                         * and discarding."
-                         */
-                        vir_PF(c, vir_OR(c, c->execute, vir_NOT(c, cond)),
-                               V3D_QPU_PF_PUSHZ);
-                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
-                                                     vir_uniform_ui(c, 0)),
-                                     V3D_QPU_COND_IFA);
-                } else {
-                        vir_PF(c, cond, V3D_QPU_PF_PUSHZ);
-                        vir_set_cond(vir_SETMSF_dest(c, vir_reg(QFILE_NULL, 0),
-                                                     vir_uniform_ui(c, 0)),
-                                     V3D_QPU_COND_IFNA);
+                if (vir_in_nonuniform_control_flow(c)) {
+                        struct qinst *exec_flag = vir_MOV_dest(c, vir_nop_reg(),
+                                                               c->execute);
+                        if (cond == V3D_QPU_COND_IFA) {
+                                vir_set_uf(exec_flag, V3D_QPU_UF_ANDZ);
+                        } else {
+                                vir_set_uf(exec_flag, V3D_QPU_UF_NORNZ);
+                                cond = V3D_QPU_COND_IFA;
+                        }
                }

+                vir_set_cond(vir_SETMSF_dest(c, vir_nop_reg(),
+                                             vir_uniform_ui(c, 0)), cond);
+
                break;
        }

@@ -2030,7 +2049,7 @@ ntq_emit_intrinsic(struct v3d_compile *c, nir_intrinsic_instr *instr)
 static void
 ntq_activate_execute_for_block(struct v3d_compile *c)
 {
-        vir_set_pf(vir_XOR_dest(c, vir_reg(QFILE_NULL, 0),
+        vir_set_pf(vir_XOR_dest(c, vir_nop_reg(),
                                c->execute, vir_uniform_ui(c, c->cur_block->index)),
                   V3D_QPU_PF_PUSHZ);

@@ -2054,14 +2073,7 @@ ntq_emit_uniform_if(struct v3d_compile *c, nir_if *if_stmt)
                else_block = vir_new_block(c);

        /* Set up the flags for the IF condition (taking the THEN branch). */
-        nir_alu_instr *if_condition_alu = ntq_get_alu_parent(if_stmt->condition);
-        enum v3d_qpu_cond cond;
-        if (!if_condition_alu ||
-            !ntq_emit_comparison(c, if_condition_alu, &cond)) {
-                vir_PF(c, ntq_get_src(c, if_stmt->condition, 0),
-                       V3D_QPU_PF_PUSHZ);
-                cond = V3D_QPU_COND_IFNA;
-        }
+        enum v3d_qpu_cond cond = ntq_emit_bool_to_cond(c, if_stmt->condition);

        /* Jump to ELSE. */
        vir_BRANCH(c, cond == V3D_QPU_COND_IFA ?
@@ -2107,20 +2119,13 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
                else_block = vir_new_block(c);

        bool was_uniform_control_flow = false;
-        if (c->execute.file == QFILE_NULL) {
+        if (!vir_in_nonuniform_control_flow(c)) {
                c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
                was_uniform_control_flow = true;
        }

        /* Set up the flags for the IF condition (taking the THEN branch). */
-        nir_alu_instr *if_condition_alu = ntq_get_alu_parent(if_stmt->condition);
-        enum v3d_qpu_cond cond;
-        if (!if_condition_alu ||
-            !ntq_emit_comparison(c, if_condition_alu, &cond)) {
-                vir_PF(c, ntq_get_src(c, if_stmt->condition, 0),
-                       V3D_QPU_PF_PUSHZ);
-                cond = V3D_QPU_COND_IFNA;
-        }
+        enum v3d_qpu_cond cond = ntq_emit_bool_to_cond(c, if_stmt->condition);

        /* Update the flags+cond to mean "Taking the ELSE branch (!cond) and
         * was previously active (execute Z) for updating the exec flags.
@@ -2128,8 +2133,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
        if (was_uniform_control_flow) {
                cond = v3d_qpu_cond_invert(cond);
        } else {
-                struct qinst *inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0),
-                                                  c->execute);
+                struct qinst *inst = vir_MOV_dest(c, vir_nop_reg(), c->execute);
                if (cond == V3D_QPU_COND_IFA) {
                        vir_set_uf(inst, V3D_QPU_UF_NORNZ);
                } else {
@@ -2145,7 +2149,7 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
        /* Jump to ELSE if nothing is active for THEN, otherwise fall
         * through.
         */
-        vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+        vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ);
        vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLNA);
        vir_link_blocks(c->cur_block, else_block);
        vir_link_blocks(c->cur_block, then_block);
@@ -2159,14 +2163,16 @@ ntq_emit_nonuniform_if(struct v3d_compile *c, nir_if *if_stmt)
                 * active channels update their execute flags to point to
                 * ENDIF
                 */
-                vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+                           V3D_QPU_PF_PUSHZ);
                vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
                             vir_uniform_ui(c, after_block->index));

                /* If everything points at ENDIF, then jump there immediately. */
-                vir_PF(c, vir_XOR(c, c->execute,
-                                  vir_uniform_ui(c, after_block->index)),
-                       V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_XOR_dest(c, vir_nop_reg(),
+                                        c->execute,
+                                        vir_uniform_ui(c, after_block->index)),
+                           V3D_QPU_PF_PUSHZ);
                vir_BRANCH(c, V3D_QPU_BRANCH_COND_ALLA);
                vir_link_blocks(c->cur_block, after_block);
                vir_link_blocks(c->cur_block, else_block);
@@ -2190,7 +2196,7 @@ ntq_emit_if(struct v3d_compile *c, nir_if *nif)
 {
        bool was_in_control_flow = c->in_control_flow;
        c->in_control_flow = true;
-        if (c->execute.file == QFILE_NULL &&
+        if (!vir_in_nonuniform_control_flow(c) &&
            nir_src_is_dynamically_uniform(nif->condition)) {
                ntq_emit_uniform_if(c, nif);
        } else {
@@ -2204,13 +2210,15 @@ ntq_emit_jump(struct v3d_compile *c, nir_jump_instr *jump)
 {
        switch (jump->type) {
        case nir_jump_break:
-                vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+                           V3D_QPU_PF_PUSHZ);
                vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
                             vir_uniform_ui(c, c->loop_break_block->index));
                break;

        case nir_jump_continue:
-                vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+                vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute),
+                           V3D_QPU_PF_PUSHZ);
                vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute,
                             vir_uniform_ui(c, c->loop_cont_block->index));
                break;
@@ -2277,7 +2285,7 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
        c->in_control_flow = true;

        bool was_uniform_control_flow = false;
-        if (c->execute.file == QFILE_NULL) {
+        if (!vir_in_nonuniform_control_flow(c)) {
                c->execute = vir_MOV(c, vir_uniform_ui(c, 0));
                was_uniform_control_flow = true;
        }
@@ -2299,13 +2307,14 @@ ntq_emit_loop(struct v3d_compile *c, nir_loop *loop)
         *
         * XXX: Use the .ORZ flags update, instead.
         */
-        vir_PF(c, vir_XOR(c,
-                          c->execute,
-                          vir_uniform_ui(c, c->loop_cont_block->index)),
-               V3D_QPU_PF_PUSHZ);
+        vir_set_pf(vir_XOR_dest(c,
+                                vir_nop_reg(),
+                                c->execute,
+                                vir_uniform_ui(c, c->loop_cont_block->index)),
+                   V3D_QPU_PF_PUSHZ);
        vir_MOV_cond(c, V3D_QPU_COND_IFA, c->execute, vir_uniform_ui(c, 0));

-        vir_PF(c, c->execute, V3D_QPU_PF_PUSHZ);
+        vir_set_pf(vir_MOV_dest(c, vir_nop_reg(), c->execute), V3D_QPU_PF_PUSHZ);

        struct qinst *branch = vir_BRANCH(c, V3D_QPU_BRANCH_COND_ANYA);
        /* Pixels that were not dispatched or have been discarded should not
@@ -2471,6 +2480,7 @@ const nir_shader_compiler_options v3d_nir_options = {
        .lower_bitfield_reverse = true,
        .lower_bit_count = true,
        .lower_cs_local_id_from_index = true,
+        .lower_ffract = true,
        .lower_pack_unorm_2x16 = true,
        .lower_pack_snorm_2x16 = true,
        .lower_pack_unorm_4x8 = true,
@@ -2487,6 +2497,7 @@ const nir_shader_compiler_options v3d_nir_options = {
        .lower_fsat = true,
        .lower_fsqrt = true,
        .lower_ifind_msb = true,
+        .lower_isign = true,
        .lower_ldexp = true,
        .lower_mul_high = true,
        .lower_wpos_pntc = true,
@@ -2659,5 +2670,15 @@ v3d_nir_to_vir(struct v3d_compile *c)
                        vir_remove_thrsw(c);
        }

+        if (c->spill_size &&
+            (V3D_DEBUG & (V3D_DEBUG_VIR |
+                          v3d_debug_flag_for_shader_stage(c->s->info.stage)))) {
+                fprintf(stderr, "%s prog %d/%d spilled VIR:\n",
+                        vir_get_stage_name(c),
+                        c->program_id, c->variant_id);
+                vir_dump(c);
+                fprintf(stderr, "\n");
+        }
+
        v3d_vir_to_qpu(c, temp_registers);
 }
--- a/src/broadcom/compiler/qpu_schedule.c
+++ b/src/broadcom/compiler/qpu_schedule.c
@@ -1028,7 +1028,7 @@ insert_scheduled_instruction(struct v3d_compile *c,
 static struct qinst *
 vir_nop()
 {
-        struct qreg undef = { QFILE_NULL, 0 };
+        struct qreg undef = vir_nop_reg();
        struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);

        return qinst;
--- a/src/broadcom/compiler/v3d_compiler.h
+++ b/src/broadcom/compiler/v3d_compiler.h
@@ -105,6 +105,11 @@ static inline struct qreg vir_reg(enum qfile file, uint32_t index)
        return (struct qreg){file, index};
 }

+static inline struct qreg vir_nop_reg(void)
+{
+        return (struct qreg){QFILE_NULL, 0};
+}
+
 /**
 * A reference to an actual register at the QPU level, for register
 * allocation.
@@ -357,7 +362,8 @@ struct v3d_fs_key {
        bool sample_alpha_to_one;
        bool clamp_color;
        bool shade_model_flat;
-        uint8_t nr_cbufs;
+        /* Mask of which color render targets are present. */
+        uint8_t cbufs;
        uint8_t swap_color_rb;
        /* Mask of which render targets need to be written as 32-bit floats */
        uint8_t f32_color_rb;
@@ -669,7 +675,6 @@ struct v3d_prog_data {
        uint32_t ubo_size;
        uint32_t spill_size;

-        uint8_t num_inputs;
        uint8_t threads;

        /* For threads > 1, whether the program should be dispatched in the
@@ -717,6 +722,7 @@ struct v3d_fs_prog_data {

        uint32_t centroid_flags[((V3D_MAX_FS_INPUTS - 1) / 24) + 1];

+        uint8_t num_inputs;
        bool writes_z;
        bool disable_ez;
        bool uses_center_w;
@@ -788,7 +794,6 @@ bool vir_is_raw_mov(struct qinst *inst);
 bool vir_is_tex(struct qinst *inst);
 bool vir_is_add(struct qinst *inst);
 bool vir_is_mul(struct qinst *inst);
-bool vir_is_float_input(struct qinst *inst);
 bool vir_writes_r3(const struct v3d_device_info *devinfo, struct qinst *inst);
 bool vir_writes_r4(const struct v3d_device_info *devinfo, struct qinst *inst);
 struct qreg vir_follow_movs(struct v3d_compile *c, struct qreg reg);
@@ -833,8 +838,6 @@ bool vir_init_reg_sets(struct v3d_compiler *compiler);

 bool v3d_gl_format_is_return_32(GLenum format);

-void vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf);
-
 static inline bool
 quniform_contents_is_texture_p0(enum quniform_contents contents)
 {
@@ -843,6 +846,12 @@ quniform_contents_is_texture_p0(enum quniform_contents contents)
                            V3D_MAX_TEXTURE_SAMPLERS));
 }

+static inline bool
+vir_in_nonuniform_control_flow(struct v3d_compile *c)
+{
+        return c->execute.file != QFILE_NULL;
+}
+
 static inline struct qreg
 vir_uniform_ui(struct v3d_compile *c, uint32_t ui)
 {
@@ -1143,4 +1152,8 @@ vir_BRANCH(struct v3d_compile *c, enum v3d_qpu_branch_cond cond)
        vir_for_each_block(_block, c)                                   \
                vir_for_each_inst(inst, _block)

+#define vir_for_each_inst_inorder_safe(inst, c)                         \
+        vir_for_each_block(_block, c)                                   \
+                vir_for_each_inst_safe(inst, _block)
+
 #endif /* V3D_COMPILER_H */
--- a/src/broadcom/compiler/vir.c
+++ b/src/broadcom/compiler/vir.c
@@ -132,38 +132,6 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
        return false;
 }

-bool
-vir_is_float_input(struct qinst *inst)
-{
-        /* XXX: More instrs */
-        switch (inst->qpu.type) {
-        case V3D_QPU_INSTR_TYPE_BRANCH:
-                return false;
-        case V3D_QPU_INSTR_TYPE_ALU:
-                switch (inst->qpu.alu.add.op) {
-                case V3D_QPU_A_FADD:
-                case V3D_QPU_A_FSUB:
-                case V3D_QPU_A_FMIN:
-                case V3D_QPU_A_FMAX:
-                case V3D_QPU_A_FTOIN:
-                        return true;
-                default:
-                        break;
-                }
-
-                switch (inst->qpu.alu.mul.op) {
-                case V3D_QPU_M_FMOV:
-                case V3D_QPU_M_VFMUL:
-                case V3D_QPU_M_FMUL:
-                        return true;
-                default:
-                        break;
-                }
-        }
-
-        return false;
-}
-
 bool
 vir_is_raw_mov(struct qinst *inst)
 {
@@ -178,6 +146,13 @@ vir_is_raw_mov(struct qinst *inst)
                return false;
        }

+        if (inst->qpu.alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
+            inst->qpu.alu.add.b_unpack != V3D_QPU_UNPACK_NONE ||
+            inst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||
+            inst->qpu.alu.mul.b_unpack != V3D_QPU_UNPACK_NONE) {
+                return false;
+        }
+
        if (inst->qpu.flags.ac != V3D_QPU_COND_NONE ||
            inst->qpu.flags.mc != V3D_QPU_COND_NONE)
                return false;
@@ -433,7 +408,7 @@ vir_branch_inst(enum v3d_qpu_branch_cond cond, struct qreg src)
        inst->qpu.branch.ub = true;
        inst->qpu.branch.bdu = V3D_QPU_BRANCH_DEST_REL;

-        inst->dst = vir_reg(QFILE_NULL, 0);
+        inst->dst = vir_nop_reg();
        inst->src[0] = src;
        inst->uniform = ~0;

@@ -697,8 +672,6 @@ static void
 v3d_vs_set_prog_data(struct v3d_compile *c,
                     struct v3d_vs_prog_data *prog_data)
 {
-        prog_data->base.num_inputs = c->num_inputs;
-
        /* The vertex data gets format converted by the VPM so that
         * each attribute channel takes up a VPM column.  Precompute
         * the sizes for the shader record.
@@ -754,7 +727,7 @@ static void
 v3d_set_fs_prog_data_inputs(struct v3d_compile *c,
                            struct v3d_fs_prog_data *prog_data)
 {
-        prog_data->base.num_inputs = c->num_inputs;
+        prog_data->num_inputs = c->num_inputs;
        memcpy(prog_data->input_slots, c->input_slots,
               c->num_inputs * sizeof(*c->input_slots));

@@ -1091,51 +1064,6 @@ vir_uniform(struct v3d_compile *c,
        return vir_reg(QFILE_UNIF, uniform);
 }

-static bool
-vir_can_set_flags(struct v3d_compile *c, struct qinst *inst)
-{
-        if (c->devinfo->ver >= 40 && (v3d_qpu_reads_vpm(&inst->qpu) ||
-                                      v3d_qpu_uses_sfu(&inst->qpu))) {
-                return false;
-        }
-
-        if (inst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||
-            (inst->qpu.alu.add.op == V3D_QPU_A_NOP &&
-             inst->qpu.alu.mul.op == V3D_QPU_M_NOP)) {
-               return false;
-        }
-
-        return true;
-}
-
-void
-vir_PF(struct v3d_compile *c, struct qreg src, enum v3d_qpu_pf pf)
-{
-        struct qinst *last_inst = NULL;
-
-        if (!list_empty(&c->cur_block->instructions)) {
-                last_inst = (struct qinst *)c->cur_block->instructions.prev;
-
-                /* Can't stuff the PF into the last last inst if our cursor
-                 * isn't pointing after it.
-                 */
-                struct vir_cursor after_inst = vir_after_inst(last_inst);
-                if (c->cursor.mode != after_inst.mode ||
-                    c->cursor.link != after_inst.link)
-                        last_inst = NULL;
-        }
-
-        if (src.file != QFILE_TEMP ||
-            !c->defs[src.index] ||
-            last_inst != c->defs[src.index] ||
-            !vir_can_set_flags(c, last_inst)) {
-                /* XXX: Make the MOV be the appropriate type */
-                last_inst = vir_MOV_dest(c, vir_reg(QFILE_NULL, 0), src);
-        }
-
-        vir_set_pf(last_inst, pf);
-}
-
 #define OPTPASS(func)                                                   \
        do {                                                            \
                bool stage_progress = func(c);                          \
--- a/src/broadcom/compiler/vir_dump.c
+++ b/src/broadcom/compiler/vir_dump.c
@@ -30,6 +30,7 @@ vir_dump_uniform(enum quniform_contents contents,
                 uint32_t data)
 {
        static const char *quniform_names[] = {
+                [QUNIFORM_ALPHA_REF] = "alpha_ref",
                [QUNIFORM_VIEWPORT_X_SCALE] = "vp_x_scale",
                [QUNIFORM_VIEWPORT_Y_SCALE] = "vp_y_scale",
                [QUNIFORM_VIEWPORT_Z_OFFSET] = "vp_z_offset",
@@ -118,7 +119,8 @@ vir_dump_uniform(enum quniform_contents contents,
                        fprintf(stderr, "tex[%d].p0: 0x%08x",
                                contents - QUNIFORM_TEXTURE_CONFIG_P0_0,
                                data);
-                } else if (contents < ARRAY_SIZE(quniform_names)) {
+                } else if (contents < ARRAY_SIZE(quniform_names) &&
+                           quniform_names[contents]) {
                        fprintf(stderr, "%s",
                                quniform_names[contents]);
                } else {
--- a/src/broadcom/compiler/vir_opt_copy_propagate.c
+++ b/src/broadcom/compiler/vir_opt_copy_propagate.c
@@ -151,13 +151,36 @@ try_copy_prop(struct v3d_compile *c, struct qinst *inst, struct qinst **movs)
                         * would be the same between the two
                         * instructions.
                         */
-                        if (vir_is_float_input(inst) !=
-                            vir_is_float_input(mov)) {
+                        if (v3d_qpu_unpacks_f32(&inst->qpu) !=
+                            v3d_qpu_unpacks_f32(&mov->qpu) ||
+                            v3d_qpu_unpacks_f16(&inst->qpu) !=
+                            v3d_qpu_unpacks_f16(&mov->qpu)) {
                                continue;
                        }
+
                        /* No composing the unpacks. */
                        if (vir_has_unpack(inst, i))
-                            continue;
+                                continue;
+
+                        /* these ops can't represent abs. */
+                        if (mov->qpu.alu.mul.a_unpack == V3D_QPU_UNPACK_ABS) {
+                                switch (inst->qpu.alu.add.op) {
+                                case V3D_QPU_A_VFPACK:
+                                case V3D_QPU_A_FROUND:
+                                case V3D_QPU_A_FTRUNC:
+                                case V3D_QPU_A_FFLOOR:
+                                case V3D_QPU_A_FCEIL:
+                                case V3D_QPU_A_FDX:
+                                case V3D_QPU_A_FDY:
+                                case V3D_QPU_A_FTOIN:
+                                case V3D_QPU_A_FTOIZ:
+                                case V3D_QPU_A_FTOUZ:
+                                case V3D_QPU_A_FTOC:
+                                        continue;
+                                default:
+                                        break;
+                                }
+                        }
                }

                if (debug) {
--- a/src/broadcom/compiler/vir_opt_dead_code.c
+++ b/src/broadcom/compiler/vir_opt_dead_code.c
@@ -194,7 +194,6 @@ vir_opt_dead_code(struct v3d_compile *c)
                                uint32_t offset = (inst->src[i].index % 4);

                                if (c->vattr_sizes[attr] == offset) {
-                                        c->num_inputs--;
                                        c->vattr_sizes[attr]--;
                                }
                        }
--- a/src/broadcom/compiler/vir_register_allocate.c
+++ b/src/broadcom/compiler/vir_register_allocate.c
@@ -47,10 +47,21 @@ is_last_ldtmu(struct qinst *inst, struct qblock *block)
        return true;
 }

+static bool
+vir_is_mov_uniform(struct v3d_compile *c, int temp)
+{
+        struct qinst *def = c->defs[temp];
+
+        return (def &&
+                vir_is_raw_mov(def) &&
+                def->src[0].file == QFILE_UNIF);
+}
+
 static int
 v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
                      uint32_t *temp_to_node)
 {
+        const float tmu_scale = 5;
        float block_scale = 1.0;
        float spill_costs[c->num_temps];
        bool in_tmu_operation = false;
@@ -75,22 +86,28 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
                                        continue;

                                int temp = inst->src[i].index;
-                                if (no_spilling) {
-                                        BITSET_CLEAR(c->spillable,
-                                                     temp);
-                                } else {
+                                if (vir_is_mov_uniform(c, temp)) {
                                        spill_costs[temp] += block_scale;
+                                } else if (!no_spilling) {
+                                        spill_costs[temp] += (block_scale *
+                                                              tmu_scale);
+                                } else {
+                                        BITSET_CLEAR(c->spillable, temp);
                                }
                        }

                        if (inst->dst.file == QFILE_TEMP) {
                                int temp = inst->dst.index;

-                                if (no_spilling) {
-                                        BITSET_CLEAR(c->spillable,
-                                                     temp);
+                                if (vir_is_mov_uniform(c, temp)) {
+                                        /* We just rematerialize the unform
+                                         * later.
+                                         */
+                                } else if (!no_spilling) {
+                                        spill_costs[temp] += (block_scale *
+                                                              tmu_scale);
                                } else {
-                                        spill_costs[temp] += block_scale;
+                                        BITSET_CLEAR(c->spillable, temp);
                                }
                        }

@@ -184,18 +201,28 @@ v3d_emit_spill_tmua(struct v3d_compile *c, uint32_t spill_offset)
 static void
 v3d_spill_reg(struct v3d_compile *c, int spill_temp)
 {
-        uint32_t spill_offset = c->spill_size;
-        c->spill_size += 16 * sizeof(uint32_t);
+        bool is_uniform = vir_is_mov_uniform(c, spill_temp);

-        if (spill_offset == 0)
-                v3d_setup_spill_base(c);
+        uint32_t spill_offset = 0;
+
+        if (!is_uniform) {
+                uint32_t spill_offset = c->spill_size;
+                c->spill_size += 16 * sizeof(uint32_t);
+
+                if (spill_offset == 0)
+                        v3d_setup_spill_base(c);
+        }

        struct qinst *last_thrsw = c->last_thrsw;
        assert(!last_thrsw || last_thrsw->is_last_thrsw);

        int start_num_temps = c->num_temps;

-        vir_for_each_inst_inorder(inst, c) {
+        struct qreg uniform_src = c->undef;
+        if (is_uniform)
+                uniform_src = c->defs[spill_temp]->src[0];
+
+        vir_for_each_inst_inorder_safe(inst, c) {
                for (int i = 0; i < vir_get_nsrc(inst); i++) {
                        if (inst->src[i].file != QFILE_TEMP ||
                            inst->src[i].index != spill_temp) {
@@ -204,23 +231,33 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)

                        c->cursor = vir_before_inst(inst);

-                        v3d_emit_spill_tmua(c, spill_offset);
-                        vir_emit_thrsw(c);
-                        inst->src[i] = vir_LDTMU(c);
-                        c->fills++;
+                        if (is_uniform) {
+                                inst->src[i] = vir_MOV(c, uniform_src);
+                        } else {
+                                v3d_emit_spill_tmua(c, spill_offset);
+                                vir_emit_thrsw(c);
+                                inst->src[i] = vir_LDTMU(c);
+                                c->fills++;
+                        }
                }

                if (inst->dst.file == QFILE_TEMP &&
                    inst->dst.index == spill_temp) {
-                        c->cursor = vir_after_inst(inst);
+                        if (is_uniform) {
+                                c->cursor.link = NULL;
+                                vir_remove_instruction(c, inst);
+                        } else {
+                                c->cursor = vir_after_inst(inst);

-                        inst->dst.index = c->num_temps++;
-                        vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_TMUD),
-                                     inst->dst);
-                        v3d_emit_spill_tmua(c, spill_offset);
-                        vir_emit_thrsw(c);
-                        vir_TMUWT(c);
-                        c->spills++;
+                                inst->dst.index = c->num_temps++;
+                                vir_MOV_dest(c, vir_reg(QFILE_MAGIC,
+                                                        V3D_QPU_WADDR_TMUD),
+                                             inst->dst);
+                                v3d_emit_spill_tmua(c, spill_offset);
+                                vir_emit_thrsw(c);
+                                vir_TMUWT(c);
+                                c->spills++;
+                        }
                }

                /* If we didn't have a last-thrsw inserted by nir_to_vir and
@@ -228,7 +265,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
                 * right before we start the vpm/tlb sequence for the last
                 * thread segment.
                 */
-                if (!last_thrsw && c->last_thrsw &&
+                if (!is_uniform && !last_thrsw && c->last_thrsw &&
                    (v3d_qpu_writes_vpm(&inst->qpu) ||
                     v3d_qpu_uses_tlb(&inst->qpu))) {
                        c->cursor = vir_before_inst(inst);
--- a/src/broadcom/compiler/vir_to_qpu.c
+++ b/src/broadcom/compiler/vir_to_qpu.c
@@ -76,7 +76,7 @@ v3d_qpu_nop(void)
 static struct qinst *
 vir_nop(void)
 {
-        struct qreg undef = { QFILE_NULL, 0 };
+        struct qreg undef = vir_nop_reg();
        struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);

        return qinst;
--- a/src/broadcom/qpu/qpu_instr.c
+++ b/src/broadcom/qpu/qpu_instr.c
@@ -751,9 +751,6 @@ bool
 v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
                  const struct v3d_qpu_instr *inst)
 {
-        if (inst->sig.ldtmu)
-                return true;
-
        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                if (inst->alu.add.magic_write &&
                    (inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||
@@ -768,8 +765,10 @@ v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
                }
        }

-        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&
-            inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4) {
+        if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {
+                if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)
+                        return true;
+        } else if (inst->sig.ldtmu) {
                return true;
        }

@@ -867,3 +866,70 @@ v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)

        return false;
 }
+
+bool
+v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)
+{
+        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+                return false;
+
+        switch (inst->alu.add.op) {
+        case V3D_QPU_A_FADD:
+        case V3D_QPU_A_FADDNF:
+        case V3D_QPU_A_FSUB:
+        case V3D_QPU_A_FMIN:
+        case V3D_QPU_A_FMAX:
+        case V3D_QPU_A_FCMP:
+        case V3D_QPU_A_FROUND:
+        case V3D_QPU_A_FTRUNC:
+        case V3D_QPU_A_FFLOOR:
+        case V3D_QPU_A_FCEIL:
+        case V3D_QPU_A_FDX:
+        case V3D_QPU_A_FDY:
+        case V3D_QPU_A_FTOIN:
+        case V3D_QPU_A_FTOIZ:
+        case V3D_QPU_A_FTOUZ:
+        case V3D_QPU_A_FTOC:
+        case V3D_QPU_A_VFPACK:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        switch (inst->alu.mul.op) {
+        case V3D_QPU_M_FMOV:
+        case V3D_QPU_M_FMUL:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        return false;
+}
+bool
+v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)
+{
+        if (inst->type != V3D_QPU_INSTR_TYPE_ALU)
+                return false;
+
+        switch (inst->alu.add.op) {
+        case V3D_QPU_A_VFMIN:
+        case V3D_QPU_A_VFMAX:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        switch (inst->alu.mul.op) {
+        case V3D_QPU_M_VFMUL:
+                return true;
+                break;
+        default:
+                break;
+        }
+
+        return false;
+}
--- a/src/broadcom/qpu/qpu_instr.h
+++ b/src/broadcom/qpu/qpu_instr.h
@@ -464,5 +464,7 @@ bool v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
                                const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
+bool v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;
+bool v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst) ATTRIBUTE_CONST;

 #endif
--- a/Show More
+++ b/Show More