Update version to 17.1.0-rc4

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
radv: apply the tess+GS hang workaround to Polaris12 as well
2017-05-08 11:40:34 +01:00 · 2017-05-08 11:37:10 +01:00 · 2017-05-08 11:36:26 +01:00 · 2017-05-08 11:33:00 +01:00 · 2017-05-08 11:32:57 +01:00 · 2017-05-08 11:32:49 +01:00
80 changed files with 1051 additions and 339 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,24 +1,11 @@
 language: c

-sudo: required
+sudo: false
 dist: trusty

 cache:
-  directories:
-    - $HOME/.ccache
-
-addons:
-  apt:
-    packages:
-      - libdrm-dev
-      - x11proto-xf86vidmode-dev
-      - libexpat1-dev
-      - libxcb-dri2-0-dev
-      - libx11-xcb-dev
-      # LLVM packaging is broken and misses these dependencies
-      - libedit-dev
-      - libelf-dev
-      - scons
+  apt: true
+  ccache: true

 env:
  global:
@@ -32,17 +19,260 @@ env:
    - XCBPROTO_VERSION=xcb-proto-1.11
    - LIBXCB_VERSION=libxcb-1.11
    - LIBXSHMFENCE_VERSION=libxshmfence-1.2
-    - LLVM_VERSION=3.9
-    - LLVM_PACKAGE="llvm-${LLVM_VERSION} llvm-${LLVM_VERSION}-dev"
-    - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+    - LIBTXC_DXTN_VERSION=libtxc_dxtn-1.0.1
+    - LIBVDPAU_VERSION=libvdpau-1.1
+    - LIBVA_VERSION=libva-1.6.2
+    - LIBWAYLAND_VERSION=wayland-1.11.1
    - PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig
-    - MAKEFLAGS=-j2
-  matrix:
-    - BUILD=make
-    - BUILD=scons
+    - LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH"
+
+matrix:
+  include:
+    - env:
+        - LABEL="make loaders/classic DRI"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="make check"
+        - DRI_LOADERS="--enable-glx --enable-gbm --enable-egl --with-platforms=x11,drm,surfaceless,wayland --enable-osmesa"
+        - DRI_DRIVERS="i915,i965,radeon,r200,swrast,nouveau"
+        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS=""
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          packages:
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+    - env:
+        # NOTE: Building SWR is 2x (yes two) times slower than all the other
+        # gallium drivers combined.
+        # Start this early so that it doesn't hunder the run time.
+        - LABEL="make Gallium Drivers SWR"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - OVERRIDE_CC="gcc-5"
+        - OVERRIDE_CXX="g++-5"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS="swr"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - llvm-toolchain-trusty-3.9
+          packages:
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - g++-5
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="make Gallium Drivers Other"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS="i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-3.9
+          packages:
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        # NOTE: Analogous to SWR above, building Clover is quite slow.
+        - LABEL="make Gallium ST Clover"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - LLVM_VERSION=3.6
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        - OVERRIDE_CC=gcc-4.7
+        - OVERRIDE_CXX=g++-4.7
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        # i915 most likely doesn't work with OpenCL.
+        # Regardless - we're doing a quick build test here.
+        - GALLIUM_DRIVERS="i915"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-3.6
+          packages:
+            - libclc-dev
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            - g++-4.7
+            # From sources above
+            - llvm-3.6-dev
+            - clang-3.6
+            - libclang-3.6-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="make Gallium ST Other"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="true"
+        - DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
+        - DRI_DRIVERS=""
+        - GALLIUM_ST="--enable-dri --disable-opencl --enable-xa --enable-nine --enable-xvmc --enable-vdpau --enable-va --enable-omx --enable-gallium-osmesa"
+        # We need swrast for osmesa and nine.
+        # i915 most likely doesn't work with most ST.
+        # Regardless - we're doing a quick build test here.
+        - GALLIUM_DRIVERS="i915,swrast"
+        - VULKAN_DRIVERS=""
+      addons:
+        apt:
+          packages:
+            # Nine requires gcc 4.6... which is the one we have right ?
+            - libxvmc-dev
+            # Build locally, for now.
+            #- libvdpau-dev
+            #- libva-dev
+            - libomxil-bellagio-dev
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="make Vulkan"
+        - BUILD=make
+        - MAKEFLAGS="-j4"
+        - MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        # XXX: we want to test the WSI, but those are enabled via the EGL toggles
+        # XXX: Platform X11 dependencies are checked when --enable-glx is set
+        - DRI_LOADERS="--enable-glx --disable-gbm --enable-egl --with-platforms=x11,wayland"
+        - DRI_DRIVERS=""
+        # XXX: enable DRI for EGL above
+        - GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
+        - GALLIUM_DRIVERS=""
+        - VULKAN_DRIVERS="intel,radeon"
+      addons:
+        apt:
+          sources:
+            - llvm-toolchain-trusty-3.9
+          packages:
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="scons"
+        - BUILD=scons
+        - SCONSFLAGS="-j4"
+        # Explicitly disable.
+        - SCONS_TARGET="llvm=0"
+        # Keep it symmetrical to the make build.
+        - SCONS_CHECK_COMMAND="scons llvm=0 check"
+      addons:
+        apt:
+          packages:
+            - scons
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="scons LLVM"
+        - BUILD=scons
+        - SCONSFLAGS="-j4"
+        - SCONS_TARGET="llvm=1"
+        # Keep it symmetrical to the make build.
+        - SCONS_CHECK_COMMAND="scons llvm=1 check"
+        - LLVM_VERSION=3.3
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+      addons:
+        apt:
+          packages:
+            - scons
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            - llvm-3.3-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev
+    - env:
+        - LABEL="scons SWR"
+        - BUILD=scons
+        - SCONSFLAGS="-j4"
+        - SCONS_TARGET="swr=1"
+        - LLVM_VERSION=3.9
+        - LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
+        # Keep it symmetrical to the make build. There's no actual SWR, yet.
+        - SCONS_CHECK_COMMAND="true"
+        - OVERRIDE_CC="gcc-5"
+        - OVERRIDE_CXX="g++-5"
+      addons:
+        apt:
+          sources:
+            - ubuntu-toolchain-r-test
+            - llvm-toolchain-trusty-3.9
+          packages:
+            - scons
+            # LLVM packaging is broken and misses these dependencies
+            - libedit-dev
+            # From sources above
+            - g++-5
+            - llvm-3.9-dev
+            # Common
+            - xz-utils
+            - x11proto-xf86vidmode-dev
+            - libexpat1-dev
+            - libx11-xcb-dev
+            - libelf-dev

 install:
-  - export PATH="/usr/lib/ccache:$PATH"
  - pip install --user mako

  # Since libdrm gets updated in configure.ac regularly, try to pick up the
@@ -90,25 +320,64 @@ install:
  - tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
  - (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)

-  # Install LLVM directly via apt-get (not Travis-CI's apt addon)
-  # See https://github.com/travis-ci/apt-source-whitelist/pull/205#issuecomment-216054237
+  # libtxc-dxtn uses the patented S3 Texture Compression
+  # algorithm. Therefore, we don't want to use this library but it is
+  # still possible through setting the USE_TXC_DXTN variable to yes in
+  # the travis web UI.
+  #
+  # According to Wikipedia, the patent expires on October 2, 2017:
+  # https://en.wikipedia.org/wiki/S3_Texture_Compression#Patent
+  - if test "x$USE_TXC_DXTN" = xyes; then
+      wget https://people.freedesktop.org/~cbrill/libtxc_dxtn/$LIBTXC_DXTN_VERSION.tar.bz2;
+      tar -jxvf $LIBTXC_DXTN_VERSION.tar.bz2;
+      (cd $LIBTXC_DXTN_VERSION && ./configure --prefix=$HOME/prefix && make install);
+    fi

-  - wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
-  - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
-  - sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty main'
-  - sudo apt-get update -qq
-  - sudo apt-get install -qq -y $LLVM_PACKAGE
+  - wget http://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
+  - tar -jxvf $LIBVDPAU_VERSION.tar.bz2
+  - (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
+
+  - wget http://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
+  - tar -jxvf $LIBVA_VERSION.tar.bz2
+  - (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
+
+  - wget http://wayland.freedesktop.org/releases/$LIBWAYLAND_VERSION.tar.xz
+  - tar -axvf $LIBWAYLAND_VERSION.tar.xz
+  - (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
+
+  # Generate the header since one is missing on the Travis instance
+  - mkdir -p linux
+  - printf "%s\n" \
+           "#ifndef _LINUX_MEMFD_H" \
+           "#define _LINUX_MEMFD_H" \
+           "" \
+           "#define __NR_memfd_create 319" \
+           "#define SYS_memfd_create __NR_memfd_create" \
+           "" \
+           "#define MFD_CLOEXEC             0x0001U" \
+           "#define MFD_ALLOW_SEALING       0x0002U" \
+           "" \
+           "#endif /* _LINUX_MEMFD_H */" > linux/memfd.h

 script:
  - if test "x$BUILD" = xmake; then
+      test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
+      test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
+      export CC="$CC -isystem`pwd`";
+
      ./autogen.sh --enable-debug
-        --with-platforms=x11,drm
-        --with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
-        --with-gallium-drivers=i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx
-        --with-vulkan-drivers=radeon
+        $DRI_LOADERS
+        --with-dri-drivers=$DRI_DRIVERS
+        $GALLIUM_ST
+        --with-gallium-drivers=$GALLIUM_DRIVERS
+        --with-vulkan-drivers=$VULKAN_DRIVERS
        --disable-llvm-shared-libs
-        ;
-      make && make check;
-    elif test x$BUILD = xscons; then
-      scons llvm=1 && scons llvm=1 check;
+        &&
+      make && eval $MAKE_CHECK_COMMAND;
+    fi
+
+  - if test "x$BUILD" = xscons; then
+      test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
+      test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
+      scons $SCONS_TARGET && eval $SCONS_CHECK_COMMAND;
    fi
--- a/2
+++ b/2
@@ -1 +1 @@
-17.1.0-devel
+17.1.0-rc4
--- a/configure.ac
+++ b/configure.ac
@@ -724,7 +724,7 @@ dnl Arch/platform-specific settings
 dnl
 AC_ARG_ENABLE([asm],
    [AS_HELP_STRING([--disable-asm],
-        [disable assembly usage @<:@default=enabled on supported plaforms@:>@])],
+        [disable assembly usage @<:@default=enabled on supported platforms@:>@])],
    [enable_asm="$enableval"],
    [enable_asm=yes]
 )
@@ -1367,7 +1367,7 @@ if test "x$enable_libglvnd" = xyes ; then
    esac

    PKG_CHECK_MODULES([GLVND], libglvnd >= 0.2.0)
-    PKG_CHECK_VAR(LIBGLVND_DATADIR, libglvnd, datadir)
+    LIBGLVND_DATADIR=`$PKG_CONFIG --variable=datadir libglvnd`
    AC_SUBST([LIBGLVND_DATADIR])

    DEFINES="${DEFINES} -DUSE_LIBGLVND=1"
@@ -2146,12 +2146,11 @@ dnl DEPRECATED: EGL Platforms configuration
 dnl
 AC_ARG_WITH([egl-platforms],
    [AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
-        [DEPRECATED: use --with-plaforms instead@<:@default=auto@:>@])],
+        [DEPRECATED: use --with-platforms instead@<:@default=auto@:>@])],
    [with_egl_platforms="$withval"],
    [with_egl_platforms=auto])

 if test "x$with_egl_platforms" = xauto; then
-    AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-plaforms instead.])
    if test "x$enable_egl" = xyes; then
        if test "x$enable_gbm" = xyes; then
           with_egl_platforms="x11,drm"
@@ -2161,6 +2160,8 @@ if test "x$with_egl_platforms" = xauto; then
    else
        with_egl_platforms=""
    fi
+else
+    AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-platforms instead.])
 fi

 dnl
@@ -2465,7 +2466,9 @@ if test -n "$with_gallium_drivers"; then
        xvirgl)
            HAVE_GALLIUM_VIRGL=yes
            require_libdrm "virgl"
-            require_basic_egl "virgl"
+            if test "x$enable_egl" = xyes; then
+                require_basic_egl "virgl"
+            fi
            ;;
        *)
            AC_MSG_ERROR([Unknown Gallium driver: $driver])
--- a/scons/llvm.py
+++ b/scons/llvm.py
@@ -103,8 +103,26 @@ def generate(env):
            'HAVE_STDINT_H',
        ])
        env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
-        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
-        if llvm_version >= distutils.version.LooseVersion('3.9'):
+        # LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
+        if llvm_version >= distutils.version.LooseVersion('4.0'):
+            env.Prepend(LIBS = [
+                'LLVMX86Disassembler', 'LLVMX86AsmParser',
+                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
+                'LLVMDebugInfoCodeView', 'LLVMCodeGen',
+                'LLVMScalarOpts', 'LLVMInstCombine',
+                'LLVMTransformUtils',
+                'LLVMBitWriter', 'LLVMX86Desc',
+                'LLVMMCDisassembler', 'LLVMX86Info',
+                'LLVMX86AsmPrinter', 'LLVMX86Utils',
+                'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
+                'LLVMAnalysis', 'LLVMProfileData',
+                'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
+                'LLVMBitReader', 'LLVMMC', 'LLVMCore',
+                'LLVMSupport',
+                'LLVMIRReader', 'LLVMAsmParser',
+                'LLVMDemangle', 'LLVMGlobalISel', 'LLVMDebugInfoMSF',
+            ])
+        elif llvm_version >= distutils.version.LooseVersion('3.9'):
            env.Prepend(LIBS = [
                'LLVMX86Disassembler', 'LLVMX86AsmParser',
                'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -114,6 +114,7 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
 	case CHIP_POLARIS10:
 		return "polaris10";
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		return "polaris11";
 #endif
 	default:
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -4074,6 +4074,10 @@
 #define   S_028060_PUNCHOUT_MODE(x)                                   (((unsigned)(x) & 0x03) << 0)
 #define   G_028060_PUNCHOUT_MODE(x)                                   (((x) >> 0) & 0x03)
 #define   C_028060_PUNCHOUT_MODE                                      0xFFFFFFFC
+#define     V_028060_AUTO						0
+#define     V_028060_FORCE_ON						1
+#define     V_028060_FORCE_OFF						2
+#define     V_028060_RESERVED						3
 #define   S_028060_POPS_DRAIN_PS_ON_OVERLAP(x)                        (((unsigned)(x) & 0x1) << 2)
 #define   G_028060_POPS_DRAIN_PS_ON_OVERLAP(x)                        (((x) >> 2) & 0x1)
 #define   C_028060_POPS_DRAIN_PS_ON_OVERLAP                           0xFFFFFFFB
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1186,6 +1186,15 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
 {
 	struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;

+	if (G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.raster.pa_cl_clip_cntl))
+		return;
+
+	if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
+		radv_emit_viewport(cmd_buffer);
+
+	if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
+		radv_emit_scissor(cmd_buffer);
+
 	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
 		unsigned width = cmd_buffer->state.dynamic.line_width * 8;
 		radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
@@ -1474,12 +1483,6 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
 	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RENDER_TARGETS)
 		radv_emit_framebuffer_state(cmd_buffer);

-	if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
-		radv_emit_viewport(cmd_buffer);
-
-	if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
-		radv_emit_scissor(cmd_buffer);
-
 	ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, draw_vertex_count);
 	if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) {
 		if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
@@ -1900,6 +1903,8 @@ void radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
 {
 	struct radeon_winsys *ws = cmd_buffer->device->ws;

+	assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
+
 	cmd_buffer->state.descriptors[idx] = set;
 	cmd_buffer->state.descriptors_dirty |= (1 << idx);
 	if (!set)
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -401,7 +401,7 @@ radv_enumerate_devices(struct radv_instance *instance)

 	instance->physicalDeviceCount = 0;

-	max_devices = drmGetDevices2(0, devices, sizeof(devices));
+	max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
 	if (max_devices < 1)
 		return VK_ERROR_INCOMPATIBLE_DRIVER;

@@ -417,9 +417,11 @@ radv_enumerate_devices(struct radv_instance *instance)
 			if (result == VK_SUCCESS)
 				++instance->physicalDeviceCount;
 			else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
-				return result;
+				break;
 		}
 	}
+	drmFreeDevices(devices, max_devices);
+
 	return result;
 }

@@ -915,6 +917,7 @@ radv_device_init_gs_info(struct radv_device *device)
 	case CHIP_FIJI:
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		device->gs_table_depth = 32;
 		return;
 	default:
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -52,7 +52,9 @@ radv_meta_restore(const struct radv_meta_saved_state *state,
 		  struct radv_cmd_buffer *cmd_buffer)
 {
 	cmd_buffer->state.pipeline = state->old_pipeline;
-	radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
+
+	cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+	cmd_buffer->state.descriptors_dirty |= (1u << 0);
 	memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
 	       sizeof(state->old_vertex_bindings));

@@ -110,7 +112,9 @@ radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
 {
 	radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
 			     radv_pipeline_to_handle(state->old_pipeline));
-	radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
+
+	cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
+	cmd_buffer->state.descriptors_dirty |= (1u << 0);

 	if (push_constant_size) {
 		memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size);
--- a/src/amd/vulkan/si_cmd_buffer.c
+++ b/src/amd/vulkan/si_cmd_buffer.c
@@ -297,6 +297,7 @@ si_emit_config(struct radv_physical_device *physical_device,
 		raster_config_1 = 0x0000002a;
 		break;
 	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
 		raster_config = 0x16000012;
 		raster_config_1 = 0x00000000;
 		break;
@@ -671,7 +672,8 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
 				if (family == CHIP_TONGA ||
 				    family == CHIP_FIJI ||
 				    family == CHIP_POLARIS10 ||
-				    family == CHIP_POLARIS11)
+				    family == CHIP_POLARIS11 ||
+				    family == CHIP_POLARIS12)
 					partial_vs_wave = true;
 			} else {
 				partial_vs_wave = true;
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -107,6 +107,7 @@ get_chip_name(enum radeon_family family)
 	case CHIP_FIJI: return "AMD RADV FIJI";
 	case CHIP_POLARIS10: return "AMD RADV POLARIS10";
 	case CHIP_POLARIS11: return "AMD RADV POLARIS11";
+	case CHIP_POLARIS12: return "AMD RADV POLARIS12";
 	case CHIP_STONEY: return "AMD RADV STONEY";
 	default: return "AMD RADV unknown";
 	}
@@ -271,6 +272,10 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
 		ws->family = FAMILY_VI;
 		ws->rev_id = VI_POLARIS11_M_A0;
 		break;
+	case CHIP_POLARIS12:
+		ws->family = FAMILY_VI;
+		ws->rev_id = VI_POLARIS12_V_A0;
+		break;
 	default:
 		fprintf(stderr, "amdgpu: Unknown family.\n");
 		goto fail;
--- a/src/compiler/nir/nir_lower_bitmap.c
+++ b/src/compiler/nir/nir_lower_bitmap.c
@@ -96,7 +96,9 @@ lower_bitmap(nir_shader *shader, nir_builder *b,
   tex->texture_index = options->sampler;
   tex->dest_type = nir_type_float;
   tex->src[0].src_type = nir_tex_src_coord;
-   tex->src[0].src = nir_src_for_ssa(texcoord);
+   tex->src[0].src =
+      nir_src_for_ssa(nir_channels(b, texcoord,
+                                   (1 << tex->coord_components) - 1));

   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
   nir_builder_instr_insert(b, &tex->instr);
--- a/src/compiler/nir/nir_lower_drawpixels.c
+++ b/src/compiler/nir/nir_lower_drawpixels.c
@@ -135,7 +135,9 @@ lower_color(lower_drawpixels_state *state, nir_intrinsic_instr *intr)
   tex->texture_index = state->options->drawpix_sampler;
   tex->dest_type = nir_type_float;
   tex->src[0].src_type = nir_tex_src_coord;
-   tex->src[0].src = nir_src_for_ssa(texcoord);
+   tex->src[0].src =
+      nir_src_for_ssa(nir_channels(b, texcoord,
+                                   (1 << tex->coord_components) - 1));

   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
   nir_builder_instr_insert(b, &tex->instr);
--- a/src/egl/drivers/dri2/platform_android.c
+++ b/src/egl/drivers/dri2/platform_android.c
@@ -264,10 +264,15 @@ droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_sur
 }

 static void
-droid_window_cancel_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
+droid_window_cancel_buffer(struct dri2_egl_surface *dri2_surf)
 {
-   /* no cancel buffer? */
-   droid_window_enqueue_buffer(disp, dri2_surf);
+   int ret;
+
+   ret = dri2_surf->window->cancelBuffer(dri2_surf->window, dri2_surf->buffer, -1);
+   if (ret < 0) {
+      _eglLog(_EGL_WARNING, "ANativeWindow::cancelBuffer failed");
+      dri2_surf->base.Lost = EGL_TRUE;
+   }
 }

 static __DRIbuffer *
@@ -399,7 +404,7 @@ droid_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)

   if (dri2_surf->base.Type == EGL_WINDOW_BIT) {
      if (dri2_surf->buffer)
-         droid_window_cancel_buffer(disp, dri2_surf);
+         droid_window_cancel_buffer(dri2_surf);

      dri2_surf->window->common.decRef(&dri2_surf->window->common);
   }
@@ -426,12 +431,16 @@ droid_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
 static int
 update_buffers(struct dri2_egl_surface *dri2_surf)
 {
+   if (dri2_surf->base.Lost)
+      return -1;
+
   if (dri2_surf->base.Type != EGL_WINDOW_BIT)
      return 0;

   /* try to dequeue the next back buffer */
   if (!dri2_surf->buffer && !droid_window_dequeue_buffer(dri2_surf)) {
      _eglLog(_EGL_WARNING, "Could not dequeue buffer from native window");
+      dri2_surf->base.Lost = EGL_TRUE;
      return -1;
   }

@@ -631,6 +640,12 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)

   dri2_flush_drawable_for_swapbuffers(disp, draw);

+   /* dri2_surf->buffer can be null even when no error has occured. For
+    * example, if the user has called no GL rendering commands since the
+    * previous eglSwapBuffers, then the driver may have not triggered
+    * a callback to ANativeWindow::dequeueBuffer, in which case
+    * dri2_surf->buffer remains null.
+    */
   if (dri2_surf->buffer)
      droid_window_enqueue_buffer(disp, dri2_surf);

--- a/src/egl/drivers/dri2/platform_drm.c
+++ b/src/egl/drivers/dri2/platform_drm.c
@@ -689,12 +689,12 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
         fd = loader_open_device(buf);
      if (fd < 0)
         fd = loader_open_device("/dev/dri/card0");
-      dri2_dpy->own_device = 1;
      gbm = gbm_create_device(fd);
      if (gbm == NULL) {
         err = "DRI2: failed to create gbm device";
         goto cleanup;
      }
+      dri2_dpy->own_device = 1;
   } else {
      fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
      if (fd < 0) {
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -828,6 +828,33 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,
         RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE);
   }

+   _EGLThreadInfo *t =_eglGetCurrentThread();
+   _EGLContext *old_ctx = t->CurrentContext;
+   _EGLSurface *old_draw_surf = old_ctx ? old_ctx->DrawSurface : NULL;
+   _EGLSurface *old_read_surf = old_ctx ? old_ctx->ReadSurface : NULL;
+
+   /* From the EGL 1.5 spec, Section 3.7.3 Binding Context and Drawables:
+    *
+    *    If the previous context of the calling thread has unflushed commands,
+    *    and the previous surface is no longer valid, an
+    *    EGL_BAD_CURRENT_SURFACE error is generated.
+    *
+    * It's difficult to check if the context has unflushed commands, but it's
+    * easy to check if the surface is no longer valid.
+    */
+   if (old_draw_surf && old_draw_surf->Lost)
+      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
+   if (old_read_surf && old_read_surf->Lost)
+      RETURN_EGL_ERROR(disp, EGL_BAD_CURRENT_SURFACE, EGL_FALSE);
+
+   /*    If a native window underlying either draw or read is no longer valid,
+    *    an EGL_BAD_NATIVE_WINDOW error is generated.
+    */
+   if (draw_surf && draw_surf->Lost)
+      RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_FALSE);
+   if (read_surf && read_surf->Lost)
+      RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_FALSE);
+
   ret = drv->API.MakeCurrent(drv, disp, draw_surf, read_surf, context);

   RETURN_EGL_EVAL(disp, ret);
@@ -1215,6 +1242,15 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface surface)
      RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
   #endif

+   /* From the EGL 1.5 spec:
+    *
+    *    If eglSwapBuffers is called and the native window associated with
+    *    surface is no longer valid, an EGL_BAD_NATIVE_WINDOW error is
+    *    generated.
+    */
+   if (surf->Lost)
+      RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_FALSE);
+
   ret = drv->API.SwapBuffers(drv, disp, surf);

   RETURN_EGL_EVAL(disp, ret);
--- a/src/egl/main/eglsurface.c
+++ b/src/egl/main/eglsurface.c
@@ -295,6 +295,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
   _eglInitResource(&surf->Resource, sizeof(*surf), dpy);
   surf->Type = type;
   surf->Config = conf;
+   surf->Lost = EGL_FALSE;

   surf->Width = 0;
   surf->Height = 0;
--- a/src/egl/main/eglsurface.h
+++ b/src/egl/main/eglsurface.h
@@ -56,6 +56,11 @@ struct _egl_surface

   EGLint Type; /* one of EGL_WINDOW_BIT, EGL_PIXMAP_BIT or EGL_PBUFFER_BIT */

+   /* The native surface is lost. The EGL spec requires certain functions
+    * to generate EGL_BAD_NATIVE_WINDOW when given this surface.
+    */
+   EGLBoolean Lost;
+
   /* attributes set by attribute list */
   EGLint Width, Height;
   EGLenum TextureFormat;
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -64,13 +64,13 @@ static const struct pipe_loader_ops pipe_loader_drm_ops;

 #ifdef GALLIUM_STATIC_TARGETS
 static const struct drm_conf_ret throttle_ret = {
-   DRM_CONF_INT,
-   {2},
+   .type = DRM_CONF_INT,
+   .val.val_int = 2,
 };

 static const struct drm_conf_ret share_fd_ret = {
-   DRM_CONF_BOOL,
-   {true},
+   .type = DRM_CONF_BOOL,
+   .val.val_bool = true,
 };

 static inline const struct drm_conf_ret *
--- a/src/gallium/auxiliary/renderonly/renderonly.c
+++ b/src/gallium/auxiliary/renderonly/renderonly.c
@@ -29,11 +29,11 @@
 #include <errno.h>
 #include <fcntl.h>
 #include <stdio.h>
-#include <sys/ioctl.h>
 #include <xf86drm.h>

 #include "state_tracker/drm_driver.h"
 #include "pipe/p_screen.h"
+#include "util/u_inlines.h"
 #include "util/u_memory.h"

 struct renderonly *
@@ -65,8 +65,16 @@ renderonly_scanout_for_prime(struct pipe_resource *rsc, struct renderonly *ro)
 }

 void
-renderonly_scanout_destroy(struct renderonly_scanout *scanout)
+renderonly_scanout_destroy(struct renderonly_scanout *scanout,
+			   struct renderonly *ro)
 {
+   struct drm_mode_destroy_dumb destroy_dumb = { };
+
+   pipe_resource_reference(&scanout->prime, NULL);
+   if (ro->kms_fd != -1) {
+      destroy_dumb.handle = scanout->handle;
+      drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);
+   }
   FREE(scanout);
 }

@@ -90,7 +98,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
      return NULL;

   /* create dumb buffer at scanout GPU */
-   err = ioctl(ro->kms_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb);
+   err = drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb);
   if (err < 0) {
      fprintf(stderr, "DRM_IOCTL_MODE_CREATE_DUMB failed: %s\n",
            strerror(errno));
@@ -116,6 +124,8 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
   scanout->prime = screen->resource_from_handle(screen, rsc,
         &handle, PIPE_HANDLE_USAGE_READ_WRITE);

+   close(prime_fd);
+
   if (!scanout->prime) {
      fprintf(stderr, "failed to create resource_from_handle: %s\n", strerror(errno));
      goto free_dumb;
@@ -125,7 +135,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,

 free_dumb:
   destroy_dumb.handle = scanout->handle;
-   ioctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);
+   drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);

 free_scanout:
   FREE(scanout);
--- a/src/gallium/auxiliary/renderonly/renderonly.h
+++ b/src/gallium/auxiliary/renderonly/renderonly.h
@@ -77,7 +77,8 @@ struct renderonly_scanout *
 renderonly_scanout_for_prime(struct pipe_resource *rsc, struct renderonly *ro);

 void
-renderonly_scanout_destroy(struct renderonly_scanout *scanout);
+renderonly_scanout_destroy(struct renderonly_scanout *scanout,
+			   struct renderonly *ro);

 static inline boolean
 renderonly_get_handle(struct renderonly_scanout *scanout,
--- a/src/gallium/drivers/etnaviv/etnaviv_format.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_format.c
@@ -103,6 +103,8 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
   _T(B4G4R4A4_UNORM, A4R4G4B4, A4R4G4B4),
   _T(B4G4R4X4_UNORM, X4R4G4B4, X4R4G4B4),

+   _T(L8A8_UNORM, A8L8, NONE),
+
   _T(Z16_UNORM,      D16,      A4R4G4B4),
   _T(B5G6R5_UNORM,   R5G6B5,   R5G6B5),
   _T(B5G5R5A1_UNORM, A1R5G5B5, A1R5G5B5),
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -308,7 +308,7 @@ etna_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
      etna_bo_del(rsc->ts_bo);

   if (rsc->scanout)
-      renderonly_scanout_destroy(rsc->scanout);
+      renderonly_scanout_destroy(rsc->scanout, etna_screen(pscreen)->ro);

   list_delinit(&rsc->list);

--- a/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_gmem.c
@@ -149,6 +149,9 @@ use_hw_binning(struct fd_batch *batch)
 	if (gmem->minx || gmem->miny)
 		return false;

+	if ((gmem->maxpw * gmem->maxph) > 32)
+		return false;
+
 	return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
 }

--- a/src/gallium/drivers/freedreno/freedreno_context.c
+++ b/src/gallium/drivers/freedreno/freedreno_context.c
@@ -112,7 +112,7 @@ fd_context_destroy(struct pipe_context *pctx)

 	DBG("");

-	if (ctx->screen->reorder)
+	if (ctx->screen->reorder && util_queue_is_initialized(&ctx->flush_queue))
 		util_queue_destroy(&ctx->flush_queue);

 	fd_batch_reference(&ctx->batch, NULL);  /* unref current batch */
--- a/src/gallium/drivers/freedreno/freedreno_gmem.c
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.c
@@ -219,6 +219,9 @@ calculate_tiles(struct fd_batch *batch)
 			div_round_up(nbins_x, tpp_x)) > 8)
 		tpp_x += 1;

+	gmem->maxpw = tpp_x;
+	gmem->maxph = tpp_y;
+
 	/* configure pipes: */
 	xoff = yoff = 0;
 	for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
--- a/src/gallium/drivers/freedreno/freedreno_gmem.h
+++ b/src/gallium/drivers/freedreno/freedreno_gmem.h
@@ -57,6 +57,7 @@ struct fd_gmem_stateobj {
 	uint16_t bin_w, nbins_x;
 	uint16_t minx, miny;
 	uint16_t width, height;
+	uint16_t maxpw, maxph;   /* maximum pipe width/height */
 };

 struct fd_batch;
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -765,7 +765,7 @@ static void si_set_shader_image(struct si_context *ctx,
 		static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
 		struct r600_texture *tex = (struct r600_texture *)res;
 		unsigned level = view->u.tex.level;
-		unsigned width, height, depth;
+		unsigned width, height, depth, hw_level;
 		bool uses_dcc = vi_dcc_enabled(tex, level);

 		assert(!tex->is_depth);
@@ -794,20 +794,31 @@ static void si_set_shader_image(struct si_context *ctx,
 		    p_atomic_read(&tex->framebuffers_bound))
 			ctx->need_check_render_feedback = true;

-		/* Always force the base level to the selected level.
-		 *
-		 * This is required for 3D textures, where otherwise
-		 * selecting a single slice for non-layered bindings
-		 * fails. It doesn't hurt the other targets.
-		 */
-		width = u_minify(res->b.b.width0, level);
-		height = u_minify(res->b.b.height0, level);
-		depth = u_minify(res->b.b.depth0, level);
+		if (ctx->b.chip_class >= GFX9) {
+			/* Always set the base address. The swizzle modes don't
+			 * allow setting mipmap level offsets as the base.
+			 */
+			width = res->b.b.width0;
+			height = res->b.b.height0;
+			depth = res->b.b.depth0;
+			hw_level = level;
+		} else {
+			/* Always force the base level to the selected level.
+			 *
+			 * This is required for 3D textures, where otherwise
+			 * selecting a single slice for non-layered bindings
+			 * fails. It doesn't hurt the other targets.
+			 */
+			width = u_minify(res->b.b.width0, level);
+			height = u_minify(res->b.b.height0, level);
+			depth = u_minify(res->b.b.depth0, level);
+			hw_level = 0;
+		}

 		si_make_texture_descriptor(screen, tex,
 					   false, res->b.b.target,
 					   view->format, swizzle,
-					   0, 0,
+					   hw_level, hw_level,
 					   view->u.tex.first_layer,
 					   view->u.tex.last_layer,
 					   width, height, depth,
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2344,8 +2344,8 @@ handle_semantic:
 	    shader->selector->info.writes_layer) {
 		pos_args[1].enabled_channels = shader->selector->info.writes_psize |
 					       (shader->selector->info.writes_edgeflag << 1) |
-					       (shader->selector->info.writes_layer << 2) |
-					       (shader->selector->info.writes_viewport_index << 3);
+					       (shader->selector->info.writes_layer << 2);
+
 		pos_args[1].valid_mask = 0; /* EXEC mask */
 		pos_args[1].done = 0; /* last export? */
 		pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
@@ -2374,11 +2374,34 @@ handle_semantic:
 							  ctx->f32, "");
 		}

-		if (shader->selector->info.writes_layer)
-			pos_args[1].out[2] = layer_value;
+		if (ctx->screen->b.chip_class >= GFX9) {
+			/* GFX9 has the layer in out.z[10:0] and the viewport
+			 * index in out.z[19:16].
+			 */
+			if (shader->selector->info.writes_layer)
+				pos_args[1].out[2] = layer_value;

-		if (shader->selector->info.writes_viewport_index)
-			pos_args[1].out[3] = viewport_index_value;
+			if (shader->selector->info.writes_viewport_index) {
+				LLVMValueRef v = viewport_index_value;
+
+				v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, v);
+				v = LLVMBuildShl(ctx->gallivm.builder, v,
+						 LLVMConstInt(ctx->i32, 16, 0), "");
+				v = LLVMBuildOr(ctx->gallivm.builder, v,
+						bitcast(bld_base, TGSI_TYPE_UNSIGNED,
+						        pos_args[1].out[2]), "");
+				pos_args[1].out[2] = bitcast(bld_base, TGSI_TYPE_FLOAT, v);
+				pos_args[1].enabled_channels |= 1 << 2;
+			}
+		} else {
+			if (shader->selector->info.writes_layer)
+				pos_args[1].out[2] = layer_value;
+
+			if (shader->selector->info.writes_viewport_index) {
+				pos_args[1].out[3] = viewport_index_value;
+				pos_args[1].enabled_channels |= 1 << 3;
+			}
+		}
 	}

 	for (i = 0; i < 4; i++)
@@ -3400,7 +3423,7 @@ image_fetch_rsrc(
 static LLVMValueRef image_fetch_coords(
 		struct lp_build_tgsi_context *bld_base,
 		const struct tgsi_full_instruction *inst,
-		unsigned src)
+		unsigned src, LLVMValueRef desc)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
 	struct gallivm_state *gallivm = &ctx->gallivm;
@@ -3417,14 +3440,30 @@ static LLVMValueRef image_fetch_coords(
 		coords[chan] = tmp;
 	}

-	/* 1D textures are allocated and used as 2D on GFX9. */
 	if (ctx->screen->b.chip_class >= GFX9) {
+		/* 1D textures are allocated and used as 2D on GFX9. */
 		if (target == TGSI_TEXTURE_1D) {
 			coords[1] = ctx->i32_0;
 			num_coords++;
 		} else if (target == TGSI_TEXTURE_1D_ARRAY) {
 			coords[2] = coords[1];
 			coords[1] = ctx->i32_0;
+			num_coords++;
+		} else if (target == TGSI_TEXTURE_2D) {
+			/* The hw can't bind a slice of a 3D image as a 2D
+			 * image, because it ignores BASE_ARRAY if the target
+			 * is 3D. The workaround is to read BASE_ARRAY and set
+			 * it as the 3rd address operand for all 2D images.
+			 */
+			LLVMValueRef first_layer, const5, mask;
+
+			const5 = LLVMConstInt(ctx->i32, 5, 0);
+			mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
+			first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
+			first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
+
+			coords[2] = first_layer;
+			num_coords++;
 		}
 	}

@@ -3539,7 +3578,7 @@ static void load_fetch_args(
 		LLVMValueRef coords;

 		image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
-		coords = image_fetch_coords(bld_base, inst, 1);
+		coords = image_fetch_coords(bld_base, inst, 1, rsrc);

 		if (target == TGSI_TEXTURE_BUFFER) {
 			buffer_append_args(ctx, emit_data, rsrc, coords,
@@ -3814,16 +3853,15 @@ static void store_fetch_args(
 		 */
 		bool force_glc = ctx->screen->b.chip_class == SI;

-		coords = image_fetch_coords(bld_base, inst, 0);
+		image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
+		coords = image_fetch_coords(bld_base, inst, 0, rsrc);

 		if (target == TGSI_TEXTURE_BUFFER) {
-			image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
 			buffer_append_args(ctx, emit_data, rsrc, coords,
 					   ctx->i32_0, false, force_glc);
 		} else {
 			emit_data->args[1] = coords;
-			image_fetch_rsrc(bld_base, &memory, true, target,
-					 &emit_data->args[2]);
+			emit_data->args[2] = rsrc;
 			emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */
 			emit_data->arg_count = 4;

@@ -4027,7 +4065,7 @@ static void atomic_fetch_args(
 		LLVMValueRef coords;

 		image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
-		coords = image_fetch_coords(bld_base, inst, 1);
+		coords = image_fetch_coords(bld_base, inst, 1, rsrc);

 		if (target == TGSI_TEXTURE_BUFFER) {
 			buffer_append_args(ctx, emit_data, rsrc, coords,
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2976,7 +2976,40 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
 	num_records = size / stride;
 	num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);

-	if (screen->b.chip_class == VI)
+	/* The NUM_RECORDS field has a different meaning depending on the chip,
+	 * instruction type, STRIDE, and SWIZZLE_ENABLE.
+	 *
+	 * SI-CIK:
+	 * - If STRIDE == 0, it's in byte units.
+	 * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
+	 *
+	 * VI:
+	 * - For SMEM and STRIDE == 0, it's in byte units.
+	 * - For SMEM and STRIDE != 0, it's in units of STRIDE.
+	 * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
+	 * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
+	 * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
+	 *       ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
+	 *       using SMEM. This can be done in the shader by clearing STRIDE with s_and.
+	 *       That way the same descriptor can be used by both SMEM and VMEM.
+	 *
+	 * GFX9:
+	 * - For SMEM and STRIDE == 0, it's in byte units.
+	 * - For SMEM and STRIDE != 0, it's in units of STRIDE.
+	 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
+	 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
+	 */
+	if (screen->b.chip_class >= GFX9)
+		/* When vindex == 0, LLVM sets IDXEN = 0, thus changing units
+		 * from STRIDE to bytes. This works around it by setting
+		 * NUM_RECORDS to at least the size of one element, so that
+		 * the first element is readable when IDXEN == 0.
+		 *
+		 * TODO: Fix this in LLVM, but do we need a new intrinsic where
+		 *       IDXEN is enforced?
+		 */
+		num_records = num_records ? MAX2(num_records, stride) : 0;
+	else if (screen->b.chip_class == VI)
 		num_records *= stride;

 	state[4] = 0;
@@ -3156,7 +3189,8 @@ si_make_texture_descriptor(struct si_screen *screen,
 	if (!sampler &&
 	    (res->target == PIPE_TEXTURE_CUBE ||
 	     res->target == PIPE_TEXTURE_CUBE_ARRAY ||
-	     res->target == PIPE_TEXTURE_3D)) {
+	     (screen->b.chip_class <= VI &&
+	      res->target == PIPE_TEXTURE_3D))) {
 		/* For the purpose of shader images, treat cube maps and 3D
 		 * textures as 2D arrays. For 3D textures, the address
 		 * calculations for mipmaps are different, so we rely on the
@@ -4527,15 +4561,30 @@ static void si_init_config(struct si_context *sctx)
 		      RADEON_PRIO_BORDER_COLORS);

 	if (sctx->b.chip_class >= GFX9) {
-		si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 0);
+		unsigned num_se = sscreen->b.info.max_se;
+		unsigned pc_lines = 0;
+
+		switch (sctx->b.family) {
+		case CHIP_VEGA10:
+			pc_lines = 4096;
+			break;
+		default:
+			assert(0);
+		}
+
+		si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL,
+			       S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
 		si_pm4_set_reg(pm4, R_028064_DB_RENDER_FILTER, 0);
 		/* TODO: We can use this to disable RBs for rendering to GART: */
 		si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
 		si_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
 		/* TODO: Enable the binner: */
 		si_pm4_set_reg(pm4, R_028C44_PA_SC_BINNER_CNTL_0,
-			       S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC));
-		si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 0);
+			       S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
+			       S_028C44_DISABLE_START_OF_PRIM(1));
+		si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
+			       S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
+			       S_028C48_MAX_PRIM_PER_BATCH(1023));
 		si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
 			       S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
 		si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -291,13 +291,15 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
 		/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
 		if (sscreen->has_distributed_tess) {
 			if (key->u.uses_gs) {
-				partial_es_wave = true;
+				if (sscreen->b.chip_class <= VI)
+					partial_es_wave = true;

 				/* GPU hang workaround. */
 				if (sscreen->b.family == CHIP_TONGA ||
 				    sscreen->b.family == CHIP_FIJI ||
 				    sscreen->b.family == CHIP_POLARIS10 ||
-				    sscreen->b.family == CHIP_POLARIS11)
+				    sscreen->b.family == CHIP_POLARIS11 ||
+				    sscreen->b.family == CHIP_POLARIS12)
 					partial_vs_wave = true;
 			} else {
 				partial_vs_wave = true;
@@ -371,7 +373,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
 	}

 	/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
-	if (ia_switch_on_eoi)
+	if (sscreen->b.chip_class <= VI && ia_switch_on_eoi)
 		partial_es_wave = true;

 	return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
@@ -379,7 +381,8 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
 		S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
 		S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
 		S_028AA8_WD_SWITCH_ON_EOP(sscreen->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
-		S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class >= VI ?
+		/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
+		S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class == VI ?
 					     max_primgroup_in_wave : 0) |
 		S_030960_EN_INST_OPT_BASIC(sscreen->b.chip_class >= GFX9) |
 		S_030960_EN_INST_OPT_ADV(sscreen->b.chip_class >= GFX9);
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -655,8 +655,17 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
 	 * not sent again.
 	 */
 	if (!gs) {
-		si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
-			       S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
+		unsigned mode = 0;
+
+		/* PrimID needs GS scenario A.
+		 * GFX9 also needs it when ViewportIndex is enabled.
+		 */
+		if (enable_prim_id ||
+		    (sscreen->b.chip_class >= GFX9 &&
+		     shader->selector->info.writes_viewport_index))
+			mode = V_028A40_GS_SCENARIO_A;
+
+		si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, S_028A40_MODE(mode));
 		si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
 	} else {
 		si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs));
@@ -2115,7 +2124,10 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
 	unsigned num_se = sctx->screen->b.info.max_se;
 	unsigned wave_size = 64;
 	unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
-	unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register (per SE) */
+	/* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
+	 * On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
+	 */
+	unsigned gs_vertex_reuse = (sctx->b.chip_class >= VI ? 32 : 16) * num_se;
 	unsigned alignment = 256 * num_se;
 	/* The maximum size is 63.999 MB per SE. */
 	unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
@@ -2542,6 +2554,9 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
 			          S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
 		}

+		if (sctx->b.chip_class >= GFX9)
+			stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
+
 		si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
 	}
 	si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
--- a/src/gallium/drivers/vc4/vc4_tiling_lt.c
+++ b/src/gallium/drivers/vc4/vc4_tiling_lt.c
@@ -61,7 +61,7 @@ static void
 vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp)
 {
        uint32_t gpu_stride = vc4_utile_stride(cpp);
-#if defined(VC4_BUILD_NEON) && defined(__ARM_ARCH)
+#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
        if (gpu_stride == 8) {
                __asm__ volatile (
                        /* Load from the GPU in one shot, no interleave, to
@@ -118,7 +118,7 @@ vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
 {
        uint32_t gpu_stride = vc4_utile_stride(cpp);

-#if defined(VC4_BUILD_NEON) && defined(__ARM_ARCH)
+#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
        if (gpu_stride == 8) {
                __asm__ volatile (
                        /* Load each 8-byte line from cpu-side source,
--- a/src/gallium/state_trackers/clover/llvm/codegen.hpp
+++ b/src/gallium/state_trackers/clover/llvm/codegen.hpp
@@ -49,7 +49,7 @@ namespace clover {
      build_module_library(const ::llvm::Module &mod,
                           enum module::section::type section_type);

-      std::unique_ptr<::llvm::Module>
+      std::unique_ptr< ::llvm::Module>
      parse_module_library(const module &m, ::llvm::LLVMContext &ctx,
                           std::string &r_log);

--- a/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
+++ b/src/gallium/state_trackers/clover/llvm/codegen/bitcode.cpp
@@ -94,7 +94,7 @@ clover::llvm::build_module_library(const ::llvm::Module &mod,
   return m;
 }

-std::unique_ptr<::llvm::Module>
+std::unique_ptr< ::llvm::Module>
 clover::llvm::parse_module_library(const module &m, ::llvm::LLVMContext &ctx,
                                   std::string &r_log) {
   auto mod = ::llvm::parseBitcodeFile(::llvm::MemoryBufferRef(
@@ -104,5 +104,5 @@ clover::llvm::parse_module_library(const module &m, ::llvm::LLVMContext &ctx,
         fail(r_log, error(CL_INVALID_PROGRAM), s);
      });

-   return std::unique_ptr<::llvm::Module>(std::move(*mod));
+   return std::unique_ptr< ::llvm::Module>(std::move(*mod));
 }
--- a/src/gallium/state_trackers/clover/llvm/codegen/common.cpp
+++ b/src/gallium/state_trackers/clover/llvm/codegen/common.cpp
@@ -121,12 +121,12 @@ namespace {
         } else {
            // Other types.
            const auto actual_type =
-               isa<::llvm::PointerType>(arg_type) && arg.hasByValAttr() ?
-               cast<::llvm::PointerType>(arg_type)->getElementType() : arg_type;
+               isa< ::llvm::PointerType>(arg_type) && arg.hasByValAttr() ?
+               cast< ::llvm::PointerType>(arg_type)->getElementType() : arg_type;

            if (actual_type->isPointerTy()) {
               const unsigned address_space =
-                  cast<::llvm::PointerType>(actual_type)->getAddressSpace();
+                  cast< ::llvm::PointerType>(actual_type)->getAddressSpace();

               if (address_space == address_spaces[clang::LangAS::opencl_local
                                                   - compat::lang_as_offset]) {
--- a/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
+++ b/src/gallium/state_trackers/clover/llvm/codegen/native.cpp
@@ -156,7 +156,7 @@ clover::llvm::print_module_native(const ::llvm::Module &mod,
                                  const target &target) {
   std::string log;
   try {
-      std::unique_ptr<::llvm::Module> cmod { CloneModule(&mod) };
+      std::unique_ptr< ::llvm::Module> cmod { CloneModule(&mod) };
      return as_string(emit_code(*cmod, target,
                                 TargetMachine::CGFT_AssemblyFile, log));
   } catch (...) {
--- a/src/gallium/state_trackers/clover/llvm/compat.hpp
+++ b/src/gallium/state_trackers/clover/llvm/compat.hpp
@@ -132,18 +132,18 @@ namespace clover {
 #endif
         }

-         inline std::unique_ptr<::llvm::Linker>
+         inline std::unique_ptr< ::llvm::Linker>
         create_linker(::llvm::Module &mod) {
 #if HAVE_LLVM >= 0x0308
-            return std::unique_ptr<::llvm::Linker>(new ::llvm::Linker(mod));
+            return std::unique_ptr< ::llvm::Linker>(new ::llvm::Linker(mod));
 #else
-            return std::unique_ptr<::llvm::Linker>(new ::llvm::Linker(&mod));
+            return std::unique_ptr< ::llvm::Linker>(new ::llvm::Linker(&mod));
 #endif
         }

         inline bool
         link_in_module(::llvm::Linker &linker,
-                        std::unique_ptr<::llvm::Module> mod) {
+                        std::unique_ptr< ::llvm::Module> mod) {
 #if HAVE_LLVM >= 0x0308
            return linker.linkInModule(std::move(mod));
 #else
--- a/src/gallium/state_trackers/clover/llvm/metadata.hpp
+++ b/src/gallium/state_trackers/clover/llvm/metadata.hpp
@@ -51,7 +51,7 @@ namespace clover {
         is_kernel_node_for(const ::llvm::Function &f) {
            return [&](const ::llvm::MDNode *n) {
               using ::llvm::mdconst::dyn_extract;
-               return &f == dyn_extract<::llvm::Function>(n->getOperand(0));
+               return &f == dyn_extract< ::llvm::Function>(n->getOperand(0));
            };
         }

@@ -65,7 +65,7 @@ namespace clover {
 #endif
         }

-         inline iterator_range<::llvm::MDNode::op_iterator>
+         inline iterator_range< ::llvm::MDNode::op_iterator>
         get_kernel_metadata_operands(const ::llvm::Function &f,
                                      const std::string &name) {
 #if HAVE_LLVM >= 0x0309
@@ -79,11 +79,11 @@ namespace clover {
            const auto kernel_node = find(is_kernel_node_for(f),
                                          get_kernel_nodes(*f.getParent()));

-            const auto data_node = cast<::llvm::MDNode>(
+            const auto data_node = cast< ::llvm::MDNode>(
               find([&](const ::llvm::MDOperand &op) {
-                     if (auto m = dyn_cast<::llvm::MDNode>(op))
+                     if (auto m = dyn_cast< ::llvm::MDNode>(op))
                        if (m->getNumOperands())
-                           if (auto m_name = dyn_cast<::llvm::MDString>(
+                           if (auto m_name = dyn_cast< ::llvm::MDString>(
                                  m->getOperand(0).get()))
                              return m_name->getString() == name;

@@ -106,7 +106,7 @@ namespace clover {
      get_argument_metadata(const ::llvm::Function &f,
                            const ::llvm::Argument &arg,
                            const std::string &name) {
-         return ::llvm::cast<::llvm::MDString>(
+         return ::llvm::cast< ::llvm::MDString>(
               detail::get_kernel_metadata_operands(f, name)[arg.getArgNo()])
            ->getString();
      }
--- a/src/gallium/targets/dri/Makefile.am
+++ b/src/gallium/targets/dri/Makefile.am
@@ -1,10 +1,8 @@
 include $(top_srcdir)/src/gallium/Automake.inc

-if HAVE_ANDROID
 if HAVE_SHARED_GLAPI
 SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
 endif
-endif

 AM_CFLAGS = \
 	-I$(top_srcdir)/src/mapi \
@@ -31,8 +29,10 @@ gallium_dri_la_LDFLAGS = \
 	-shared \
 	-shrext .so \
 	-module \
+	-no-undefined \
 	-avoid-version \
-	$(GC_SECTIONS)
+	$(GC_SECTIONS) \
+	$(LD_NO_UNDEFINED)

 if HAVE_LD_VERSION_SCRIPT
 gallium_dri_la_LDFLAGS += \
--- a/src/gallium/targets/pipe-loader/pipe_i915.c
+++ b/src/gallium/targets/pipe-loader/pipe_i915.c
@@ -30,7 +30,7 @@ static const struct drm_conf_ret throttle_ret = {

 static const struct drm_conf_ret share_fd_ret = {
   .type = DRM_CONF_BOOL,
-   .val.val_int = true,
+   .val.val_bool = true,
 };

 static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
--- a/src/gallium/targets/pipe-loader/pipe_msm.c
+++ b/src/gallium/targets/pipe-loader/pipe_msm.c
@@ -24,7 +24,7 @@ static const struct drm_conf_ret throttle_ret = {

 static const struct drm_conf_ret share_fd_ret = {
   .type = DRM_CONF_BOOL,
-   .val.val_int = true,
+   .val.val_bool = true,
 };

 static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
--- a/src/gallium/targets/pipe-loader/pipe_nouveau.c
+++ b/src/gallium/targets/pipe-loader/pipe_nouveau.c
@@ -24,7 +24,7 @@ static const struct drm_conf_ret throttle_ret = {

 static const struct drm_conf_ret share_fd_ret = {
   .type = DRM_CONF_BOOL,
-   .val.val_int = true,
+   .val.val_bool = true,
 };

 static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
--- a/src/gallium/targets/pipe-loader/pipe_r300.c
+++ b/src/gallium/targets/pipe-loader/pipe_r300.c
@@ -20,7 +20,7 @@ static const struct drm_conf_ret throttle_ret = {

 static const struct drm_conf_ret share_fd_ret = {
   .type = DRM_CONF_BOOL,
-   .val.val_int = true,
+   .val.val_bool = true,
 };

 static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
--- a/src/gallium/targets/pipe-loader/pipe_r600.c
+++ b/src/gallium/targets/pipe-loader/pipe_r600.c
@@ -20,7 +20,7 @@ static const struct drm_conf_ret throttle_ret = {

 static const struct drm_conf_ret share_fd_ret = {
   .type = DRM_CONF_BOOL,
-   .val.val_int = true,
+   .val.val_bool = true,
 };

 static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
--- a/src/gallium/targets/pipe-loader/pipe_radeonsi.c
+++ b/src/gallium/targets/pipe-loader/pipe_radeonsi.c
@@ -26,7 +26,7 @@ static const struct drm_conf_ret throttle_ret = {

 static const struct drm_conf_ret share_fd_ret = {
   .type = DRM_CONF_BOOL,
-   .val.val_int = true,
+   .val.val_bool = true,
 };

 static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
--- a/src/gallium/targets/pipe-loader/pipe_vmwgfx.c
+++ b/src/gallium/targets/pipe-loader/pipe_vmwgfx.c
@@ -30,7 +30,7 @@ static const struct drm_conf_ret throttle_ret = {

 static const struct drm_conf_ret share_fd_ret = {
   .type = DRM_CONF_BOOL,
-   .val.val_int = true,
+   .val.val_bool = true,
 };

 static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -310,6 +310,7 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
   case CHIP_POLARIS12:
      ws->family = FAMILY_VI;
      ws->rev_id = VI_POLARIS12_V_A0;
+      break;
   case CHIP_VEGA10:
      ws->family = FAMILY_AI;
      ws->rev_id = AI_VEGA10_P_A0;
--- a/src/gallium/winsys/sw/dri/dri_sw_winsys.c
+++ b/src/gallium/winsys/sw/dri/dri_sw_winsys.c
@@ -200,7 +200,7 @@ dri_sw_displaytarget_display(struct sw_winsys *ws,

   if (box) {
       void *data;
-       data = dri_sw_dt->data + (dri_sw_dt->stride * box->y) + box->x * blsize;
+       data = (char *)dri_sw_dt->data + (dri_sw_dt->stride * box->y) + box->x * blsize;
       dri_sw_ws->lf->put_image2(dri_drawable, data,
                                 box->x, box->y, box->width, box->height, dri_sw_dt->stride);
   } else {
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -2651,7 +2651,7 @@ _GLX_PUBLIC void (*glXGetProcAddressARB(const GLubyte * procName)) (void)
   f = (gl_function) get_glx_proc_address((const char *) procName);
   if ((f == NULL) && (procName[0] == 'g') && (procName[1] == 'l')
       && (procName[2] != 'X')) {
-#ifdef GLX_SHARED_GLAPI
+#ifdef GLX_INDIRECT_RENDERING
      f = (gl_function) __indirect_get_proc_address((const char *) procName);
 #endif
      if (!f)
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -822,12 +822,11 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
   foreach_block_and_inst(block, fs_inst, inst, cfg) {
      for (unsigned int i = 0; i < inst->sources; i++) {
 	 if (inst->src[i].file == VGRF)
-            spill_costs[inst->src[i].nr] += block_scale;
+            spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale;
      }

      if (inst->dst.file == VGRF)
-         spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE)
-                                      * block_scale;
+         spill_costs[inst->dst.nr] += regs_written(inst) * block_scale;

      switch (inst->opcode) {

--- a/src/intel/compiler/brw_vec4.cpp
+++ b/src/intel/compiler/brw_vec4.cpp
@@ -1071,6 +1071,13 @@ vec4_instruction::can_reswizzle(const struct gen_device_info *devinfo,
   if (devinfo->gen == 6 && is_math() && swizzle != BRW_SWIZZLE_XYZW)
      return false;

+   /* We can't swizzle implicit accumulator access.  We'd have to
+    * reswizzle the producer of the accumulator value in addition
+    * to the consumer (i.e. both MUL and MACH).  Just skip this.
+    */
+   if (reads_accumulator_implicitly())
+      return false;
+
   if (!can_do_writemask(devinfo) && dst_writemask != WRITEMASK_XYZW)
      return false;

@@ -1941,6 +1948,24 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
   inst->mlen = 2;
 }

+static bool
+is_align1_df(vec4_instruction *inst)
+{
+   switch (inst->opcode) {
+   case VEC4_OPCODE_DOUBLE_TO_F32:
+   case VEC4_OPCODE_DOUBLE_TO_D32:
+   case VEC4_OPCODE_DOUBLE_TO_U32:
+   case VEC4_OPCODE_TO_DOUBLE:
+   case VEC4_OPCODE_PICK_LOW_32BIT:
+   case VEC4_OPCODE_PICK_HIGH_32BIT:
+   case VEC4_OPCODE_SET_LOW_32BIT:
+   case VEC4_OPCODE_SET_HIGH_32BIT:
+      return true;
+   default:
+      return false;
+   }
+}
+
 void
 vec4_visitor::convert_to_hw_regs()
 {
@@ -1950,9 +1975,7 @@ vec4_visitor::convert_to_hw_regs()
         struct brw_reg reg;
         switch (src.file) {
         case VGRF: {
-            const unsigned type_size = type_sz(src.type);
-            const unsigned width = REG_SIZE / 2 / MAX2(4, type_size);
-            reg = byte_offset(brw_vecn_grf(width, src.nr, 0), src.offset);
+            reg = byte_offset(brw_vecn_grf(4, src.nr, 0), src.offset);
            reg.type = src.type;
            reg.abs = src.abs;
            reg.negate = src.negate;
@@ -1960,12 +1983,11 @@ vec4_visitor::convert_to_hw_regs()
         }

         case UNIFORM: {
-            const unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(src.type));
            reg = stride(byte_offset(brw_vec4_grf(
                                        prog_data->base.dispatch_grf_start_reg +
                                        src.nr / 2, src.nr % 2 * 4),
                                     src.offset),
-                         0, width, 1);
+                         0, 4, 1);
            reg.type = src.type;
            reg.abs = src.abs;
            reg.negate = src.negate;
@@ -1998,6 +2020,20 @@ vec4_visitor::convert_to_hw_regs()

         apply_logical_swizzle(&reg, inst, i);
         src = reg;
+
+         /* From IVB PRM, vol4, part3, "General Restrictions on Regioning
+          * Parameters":
+          *
+          *   "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set
+          *    to Width * HorzStride."
+          *
+          * We can break this rule with DF sources on DF align1
+          * instructions, because the exec_size would be 4 and width is 4.
+          * As we know we are not accessing to next GRF, it is safe to
+          * set vstride to the formula given by the rule itself.
+          */
+         if (is_align1_df(inst) && (cvt(inst->exec_size) - 1) == src.width)
+            src.vstride = src.width + src.hstride;
      }

      if (inst->is_3src(devinfo)) {
@@ -2255,24 +2291,6 @@ vec4_visitor::lower_simd_width()
   return progress;
 }

-static bool
-is_align1_df(vec4_instruction *inst)
-{
-   switch (inst->opcode) {
-   case VEC4_OPCODE_DOUBLE_TO_F32:
-   case VEC4_OPCODE_DOUBLE_TO_D32:
-   case VEC4_OPCODE_DOUBLE_TO_U32:
-   case VEC4_OPCODE_TO_DOUBLE:
-   case VEC4_OPCODE_PICK_LOW_32BIT:
-   case VEC4_OPCODE_PICK_HIGH_32BIT:
-   case VEC4_OPCODE_SET_LOW_32BIT:
-   case VEC4_OPCODE_SET_HIGH_32BIT:
-      return true;
-   default:
-      return false;
-   }
-}
-
 static brw_predicate
 scalarize_predicate(brw_predicate predicate, unsigned writemask)
 {
@@ -2506,6 +2524,11 @@ vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
   assert(brw_is_single_value_swizzle(reg.swizzle) ||
          is_supported_64bit_region(inst, arg));

+   /* Apply the region <2, 2, 1> for GRF or <0, 2, 1> for uniforms, as align16
+    * HW can only do 32-bit swizzle channels.
+    */
+   hw_reg->width = BRW_WIDTH_2;
+
   if (is_supported_64bit_region(inst, arg) &&
       !is_gen7_supported_64bit_swizzle(inst, arg)) {
      /* Supported 64-bit swizzles are those such that their first two
--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1980,8 +1980,6 @@ generate_code(struct brw_codegen *p,
         else
            spread_dst = stride(dst, 8, 4, 2);

-         src[0].vstride = BRW_VERTICAL_STRIDE_4;
-         src[0].width = BRW_WIDTH_4;
         brw_MOV(p, spread_dst, src[0]);

         brw_set_default_access_mode(p, BRW_ALIGN_16);
@@ -2016,9 +2014,7 @@ generate_code(struct brw_codegen *p,
         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
         if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT)
            src[0] = suboffset(src[0], 1);
-         src[0].vstride = BRW_VERTICAL_STRIDE_8;
-         src[0].width = BRW_WIDTH_4;
-         src[0].hstride = BRW_HORIZONTAL_STRIDE_2;
+         src[0] = spread(src[0], 2);
         brw_MOV(p, dst, src[0]);

         brw_set_default_access_mode(p, BRW_ALIGN_16);
@@ -2041,9 +2037,6 @@ generate_code(struct brw_codegen *p,
         dst.hstride = BRW_HORIZONTAL_STRIDE_2;

         src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
-         src[0].vstride = BRW_VERTICAL_STRIDE_4;
-         src[0].width = BRW_WIDTH_4;
-         src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
         brw_MOV(p, dst, src[0]);

         brw_set_default_access_mode(p, BRW_ALIGN_16);
--- a/src/intel/vulkan/anv_allocator.c
+++ b/src/intel/vulkan/anv_allocator.c
@@ -884,7 +884,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size)
   assert(new_bo.size == pow2_size);

   new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0);
-   if (new_bo.map == NULL) {
+   if (new_bo.map == MAP_FAILED) {
      anv_gem_close(pool->device, new_bo.gem_handle);
      return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
   }
@@ -993,6 +993,25 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,

   anv_bo_init_new(&bo->bo, device, size);

+   /* Even though the Scratch base pointers in 3DSTATE_*S are 64 bits, they
+    * are still relative to the general state base address.  When we emit
+    * STATE_BASE_ADDRESS, we set general state base address to 0 and the size
+    * to the maximum (1 page under 4GB).  This allows us to just place the
+    * scratch buffers anywhere we wish in the bottom 32 bits of address space
+    * and just set the scratch base pointer in 3DSTATE_*S using a relocation.
+    * However, in order to do so, we need to ensure that the kernel does not
+    * place the scratch BO above the 32-bit boundary.
+    *
+    * NOTE: Technically, it can't go "anywhere" because the top page is off
+    * limits.  However, when EXEC_OBJECT_SUPPORTS_48B_ADDRESS is set, the
+    * kernel allocates space using
+    *
+    *    end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
+    *
+    * so nothing will ever touch the top page.
+    */
+   bo->bo.flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
   /* Set the exists last because it may be read by other threads */
   __sync_synchronize();
   bo->exists = true;
--- a/src/intel/vulkan/anv_batch_chain.c
+++ b/src/intel/vulkan/anv_batch_chain.c
@@ -1265,7 +1265,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
                                      cmd_buffer->last_ss_pool_center);
   VkResult result =
      anv_execbuf_add_bo(&execbuf, &ss_pool->bo, &cmd_buffer->surface_relocs,
-                         &cmd_buffer->pool->alloc);
+                         &device->alloc);
   if (result != VK_SUCCESS)
      return result;

@@ -1278,7 +1278,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
                                       cmd_buffer->last_ss_pool_center);

      result = anv_execbuf_add_bo(&execbuf, &(*bbo)->bo, &(*bbo)->relocs,
-                                  &cmd_buffer->pool->alloc);
+                                  &device->alloc);
      if (result != VK_SUCCESS)
         return result;
   }
@@ -1387,7 +1387,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,

   result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);

-   anv_execbuf_finish(&execbuf, &cmd_buffer->pool->alloc);
+   anv_execbuf_finish(&execbuf, &device->alloc);

   return result;
 }
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -701,10 +701,10 @@ void anv_CmdUpdateBuffer(
      struct anv_state tmp_data =
         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);

-      anv_state_flush(cmd_buffer->device, tmp_data);
-
      memcpy(tmp_data.map, pData, copy_size);

+      anv_state_flush(cmd_buffer->device, tmp_data);
+
      int bs = 16;
      bs = gcd_pow2_u64(bs, dstOffset);
      bs = gcd_pow2_u64(bs, copy_size);
@@ -1398,7 +1398,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
    * still hot in the cache.
    */
   bool found_draw = false;
-   bool self_dep = false;
   enum anv_subpass_usage usage = 0;
   for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
      usage |= pass->attachments[att].subpass_usage[s];
@@ -1408,8 +1407,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
          * wait to resolve until then.
          */
         found_draw = true;
-         if (pass->attachments[att].subpass_usage[s] & ANV_SUBPASS_USAGE_INPUT)
-            self_dep = true;
         break;
      }
   }
@@ -1468,14 +1465,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
          *    binding this surface to Sampler."
          */
         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
-      } else if (cmd_buffer->device->info.gen == 8 && self_dep &&
-                 att_state->input_aux_usage == ISL_AUX_USAGE_CCS_D) {
-         /* On Broadwell we still need to do resolves when there is a
-          * self-dependency because HW could not see fast-clears and works
-          * on the render cache as if there was regular non-fast-clear surface.
-          * To avoid any inconsistency, we force the resolve.
-          */
-         resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
      }
   }

--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -26,6 +26,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <sys/mman.h>

 #include "anv_private.h"
 #include "util/debug.h"
@@ -347,7 +348,7 @@ VkResult anv_BindImageMemory(
   if (image->aux_surface.isl.size > 0) {

      /* The offset and size must be a multiple of 4K or else the
-       * anv_gem_mmap call below will return NULL.
+       * anv_gem_mmap call below will fail.
       */
      assert((image->offset + image->aux_surface.offset) % 4096 == 0);
      assert(image->aux_surface.isl.size % 4096 == 0);
@@ -363,10 +364,7 @@ VkResult anv_BindImageMemory(
                               image->aux_surface.isl.size,
                               device->info.has_llc ? 0 : I915_MMAP_WC);

-      /* If anv_gem_mmap returns NULL, it's likely that the kernel was
-       * not able to find space on the host to create a proper mapping.
-       */
-      if (map == NULL)
+      if (map == MAP_FAILED)
         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);

      memset(map, 0, image->aux_surface.isl.size);
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -291,27 +291,21 @@ color_attachment_compute_aux_usage(struct anv_device *device,
      att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
   } else if (att_state->fast_clear) {
      att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
-      if (GEN_GEN >= 9 &&
-          !isl_format_supports_ccs_e(&device->info, iview->isl.format)) {
-         /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
-          *
-          *    "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
-          *    setting is only allowed if Surface Format supported for Fast
-          *    Clear. In addition, if the surface is bound to the sampling
-          *    engine, Surface Format must be supported for Render Target
-          *    Compression for surfaces bound to the sampling engine."
-          *
-          * In other words, we can't sample from a fast-cleared image if it
-          * doesn't also support color compression.
-          */
-         att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
-      } else if (GEN_GEN >= 8) {
-         /* Broadwell/Skylake can sample from fast-cleared images */
+      /* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
+       *
+       *    "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
+       *    setting is only allowed if Surface Format supported for Fast
+       *    Clear. In addition, if the surface is bound to the sampling
+       *    engine, Surface Format must be supported for Render Target
+       *    Compression for surfaces bound to the sampling engine."
+       *
+       * In other words, we can only sample from a fast-cleared image if it
+       * also supports color compression.
+       */
+      if (isl_format_supports_ccs_e(&device->info, iview->isl.format))
         att_state->input_aux_usage = ISL_AUX_USAGE_CCS_D;
-      } else {
-         /* Ivy Bridge and Haswell cannot */
+      else
         att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
-      }
   } else {
      att_state->aux_usage = ISL_AUX_USAGE_NONE;
      att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
--- a/src/mapi/glapi/gen/glX_proto_send.py
+++ b/src/mapi/glapi/gen/glX_proto_send.py
@@ -382,7 +382,7 @@ const GLuint __glXDefaultPixelStore[9] = { 0, 0, 0, 0, 0, 0, 0, 0, 1 };
                    procs[n] = func.static_glx_name(n)

        print """
-#ifdef GLX_SHARED_GLAPI
+#ifdef GLX_INDIRECT_RENDERING

 static const struct proc_pair
 {
@@ -418,7 +418,7 @@ __indirect_get_proc_address(const char *name)
   return (pair) ? pair->proc : NULL;
 }

-#endif /* GLX_SHARED_GLAPI */
+#endif /* GLX_INDIRECT_RENDERING */
 """
        return

@@ -1113,7 +1113,7 @@ extern _X_HIDDEN NOINLINE FASTCALL GLubyte * __glXSetupVendorRequest(
                    break

        print ''
-        print '#ifdef GLX_SHARED_GLAPI'
+        print '#ifdef GLX_INDIRECT_RENDERING'
        print 'extern _X_HIDDEN void (*__indirect_get_proc_address(const char *name))(void);'
        print '#endif'

--- a/src/mapi/glapi/gen/gl_API.xml
+++ b/src/mapi/glapi/gen/gl_API.xml
@@ -10219,7 +10219,7 @@
 </category>

 <category name="GL_EXT_multi_draw_arrays" number="148">
-    <function name="MultiDrawArraysEXT" es1="1.0" es2="2.0" alias="MultiDrawArrays">
+    <function name="MultiDrawArraysEXT" es1="1.0" es2="2.0" exec="dynamic" alias="MultiDrawArrays">
        <param name="mode" type="GLenum"/>
        <param name="first" type="const GLint *"/>
        <param name="count" type="const GLsizei *"/>
--- a/src/mesa/Makefile.am
+++ b/src/mesa/Makefile.am
@@ -96,6 +96,7 @@ endif

 AM_CPPFLAGS = $(DEFINES) $(INCLUDE_DIRS)
 AM_CFLAGS = \
+	$(VDPAU_CFLAGS) \
 	$(LLVM_CFLAGS) \
 	$(VISIBILITY_CFLAGS) \
 	$(MSVC2013_COMPAT_CFLAGS)
--- a/src/mesa/drivers/dri/Makefile.am
+++ b/src/mesa/drivers/dri/Makefile.am
@@ -6,12 +6,9 @@ MEGADRIVERS_DEPS =

 SUBDIRS+=common

-# On Android, we need to explicitly link to libglapi.so.
-if HAVE_ANDROID
 if HAVE_SHARED_GLAPI
 SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
 endif
-endif

 if HAVE_I915_DRI
 SUBDIRS += i915
@@ -59,10 +56,15 @@ driinclude_HEADERS = \
 nodist_EXTRA_mesa_dri_drivers_la_SOURCES = dummy.cpp
 mesa_dri_drivers_la_SOURCES =
 mesa_dri_drivers_la_LDFLAGS = \
-        -module -avoid-version -shared -shrext .so \
-        $(BSYMBOLIC) \
-        $(GC_SECTIONS) \
-        $()
+	-shared \
+	-shrext .so \
+	-module \
+	-no-undefined \
+	-avoid-version \
+	$(BSYMBOLIC) \
+	$(GC_SECTIONS) \
+	$(LD_NO_UNDEFINED)
+
 mesa_dri_drivers_la_LIBADD = \
        ../../libmesa.la \
        common/libmegadriver_stub.la \
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -485,10 +485,13 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)

   DBG("%s\n", __func__);

-   if (brw->has_llc)
-      brw_bo_unmap(cache->bo);
-   brw_bo_unreference(cache->bo);
-   cache->bo = NULL;
+   /* This can be NULL if context creation failed early on */
+   if (cache->bo) {
+      if (brw->has_llc)
+         brw_bo_unmap(cache->bo);
+      brw_bo_unreference(cache->bo);
+      cache->bo = NULL;
+   }
   brw_clear_cache(brw, cache);
   free(cache->items);
   cache->items = NULL;
--- a/src/mesa/drivers/dri/i965/brw_sync.c
+++ b/src/mesa/drivers/dri/i965/brw_sync.c
@@ -470,7 +470,7 @@ brw_dri_create_fence_fd(__DRIcontext *dri_ctx, int fd)
         goto fail;
   } else {
      /* Import the sync fd as an in-fence. */
-      fence->sync_fd = fd;
+      fence->sync_fd = dup(fd);
   }

   assert(fence->sync_fd != -1);
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -290,6 +290,43 @@ static struct intel_image_format intel_image_formats[] = {
       { 0, 1, 0, __DRI_IMAGE_FORMAT_ARGB8888, 4 } } }
 };

+static const struct {
+   uint32_t tiling;
+   uint64_t modifier;
+} tiling_modifier_map[] = {
+   { .tiling = I915_TILING_NONE, .modifier = DRM_FORMAT_MOD_LINEAR },
+   { .tiling = I915_TILING_X, .modifier = I915_FORMAT_MOD_X_TILED },
+   { .tiling = I915_TILING_Y, .modifier = I915_FORMAT_MOD_Y_TILED },
+};
+
+static uint32_t
+modifier_to_tiling(uint64_t modifier)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) {
+      if (tiling_modifier_map[i].modifier == modifier)
+         return tiling_modifier_map[i].tiling;
+   }
+
+   assert(0 && "modifier_to_tiling should only receive known modifiers");
+   unreachable();
+}
+
+static uint64_t
+tiling_to_modifier(uint32_t tiling)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(tiling_modifier_map); i++) {
+      if (tiling_modifier_map[i].tiling == tiling)
+         return tiling_modifier_map[i].modifier;
+   }
+
+   assert(0 && "tiling_to_modifier received unknown tiling mode");
+   unreachable();
+}
+
 static void
 intel_image_warn_if_unaligned(__DRIimage *image, const char *func)
 {
@@ -407,6 +444,7 @@ intel_create_image_from_name(__DRIscreen *dri_screen,
       free(image);
       return NULL;
    }
+    image->modifier = tiling_to_modifier(image->bo->tiling_mode);

    return image;
 }
@@ -435,6 +473,7 @@ intel_create_image_from_renderbuffer(__DRIcontext *context,

   image->internal_format = rb->InternalFormat;
   image->format = rb->Format;
+   image->modifier = tiling_to_modifier(irb->mt->tiling);
   image->offset = 0;
   image->data = loaderPrivate;
   brw_bo_unreference(image->bo);
@@ -496,6 +535,7 @@ intel_create_image_from_texture(__DRIcontext *context, int target,

   image->internal_format = obj->Image[face][level]->InternalFormat;
   image->format = obj->Image[face][level]->TexFormat;
+   image->modifier = tiling_to_modifier(iobj->mt->tiling);
   image->data = loaderPrivate;
   intel_setup_image_from_mipmap_tree(brw, image, iobj->mt, level, zoffset);
   image->dri_format = driGLFormatToImageFormat(image->format);
@@ -568,10 +608,7 @@ intel_create_image_common(__DRIscreen *dri_screen,
 {
   __DRIimage *image;
   struct intel_screen *screen = dri_screen->driverPrivate;
-   /* Historically, X-tiled was the default, and so lack of modifier means
-    * X-tiled.
-    */
-   uint32_t tiling = I915_TILING_X;
+   uint32_t tiling;
   int cpp;

   /* Callers of this may specify a modifier, or a dri usage, but not both. The
@@ -581,21 +618,18 @@ intel_create_image_common(__DRIscreen *dri_screen,
   assert(!(use && count));

   uint64_t modifier = select_best_modifier(&screen->devinfo, modifiers, count);
-   switch (modifier) {
-   case I915_FORMAT_MOD_X_TILED:
-      assert(tiling == I915_TILING_X);
-      break;
-   case DRM_FORMAT_MOD_LINEAR:
-      tiling = I915_TILING_NONE;
-      break;
-   case I915_FORMAT_MOD_Y_TILED:
-      tiling = I915_TILING_Y;
-      break;
-   case DRM_FORMAT_MOD_INVALID:
+   if (modifier == DRM_FORMAT_MOD_INVALID) {
+      /* User requested specific modifiers, none of which work */
      if (modifiers)
         return NULL;
-   default:
-         break;
+
+      /* Historically, X-tiled was the default, and so lack of modifier means
+       * X-tiled.
+       */
+      tiling = I915_TILING_X;
+   } else {
+      /* select_best_modifier has found a modifier we support */
+      tiling = modifier_to_tiling(modifier);
   }

   if (use & __DRI_IMAGE_USE_CURSOR) {
@@ -710,6 +744,7 @@ intel_dup_image(__DRIimage *orig_image, void *loaderPrivate)
   image->planar_format   = orig_image->planar_format;
   image->dri_format      = orig_image->dri_format;
   image->format          = orig_image->format;
+   image->modifier        = orig_image->modifier;
   image->offset          = orig_image->offset;
   image->width           = orig_image->width;
   image->height          = orig_image->height;
@@ -828,6 +863,7 @@ intel_create_image_from_fds(__DRIscreen *dri_screen,
      free(image);
      return NULL;
   }
+   image->modifier = tiling_to_modifier(image->bo->tiling_mode);

   if (f->nplanes == 1) {
      image->offset = image->offsets[0];
@@ -915,6 +951,7 @@ intel_from_planar(__DRIimage *parent, int plane, void *loaderPrivate)

    image->bo = parent->bo;
    brw_bo_reference(parent->bo);
+    image->modifier = parent->modifier;

    image->width = width;
    image->height = height;
--- a/src/mesa/main/api_validate.c
+++ b/src/mesa/main/api_validate.c
@@ -816,25 +816,10 @@ _mesa_validate_DrawRangeElements(struct gl_context *ctx, GLenum mode,
                                       "glDrawRangeElements");
 }

+
 static bool
-validate_draw_arrays(struct gl_context *ctx, const char *func,
-                     GLenum mode, GLsizei count, GLsizei numInstances)
+need_xfb_remaining_prims_check(const struct gl_context *ctx)
 {
-   struct gl_transform_feedback_object *xfb_obj
-      = ctx->TransformFeedback.CurrentObject;
-   FLUSH_CURRENT(ctx, 0);
-
-   if (count < 0) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "%s(count)", func);
-      return false;
-   }
-
-   if (!_mesa_valid_prim_mode(ctx, mode, func))
-      return false;
-
-   if (!check_valid_to_render(ctx, func))
-      return false;
-
   /* From the GLES3 specification, section 2.14.2 (Transform Feedback
    * Primitive Capture):
    *
@@ -862,10 +847,33 @@ validate_draw_arrays(struct gl_context *ctx, const char *func,
    *     is removed and replaced with the GL behavior (primitives are not
    *     written and the corresponding counter is not updated)..."
    */
-   if (_mesa_is_gles3(ctx) && _mesa_is_xfb_active_and_unpaused(ctx) &&
-       !_mesa_has_OES_geometry_shader(ctx) &&
-       !_mesa_has_OES_tessellation_shader(ctx)) {
-      size_t prim_count = vbo_count_tessellated_primitives(mode, count, 1);
+   return _mesa_is_gles3(ctx) && _mesa_is_xfb_active_and_unpaused(ctx) &&
+          !_mesa_has_OES_geometry_shader(ctx) &&
+          !_mesa_has_OES_tessellation_shader(ctx);
+}
+
+
+static bool
+validate_draw_arrays(struct gl_context *ctx, const char *func,
+                     GLenum mode, GLsizei count, GLsizei numInstances)
+{
+   FLUSH_CURRENT(ctx, 0);
+
+   if (count < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(count)", func);
+      return false;
+   }
+
+   if (!_mesa_valid_prim_mode(ctx, mode, func))
+      return false;
+
+   if (!check_valid_to_render(ctx, func))
+      return false;
+
+   if (need_xfb_remaining_prims_check(ctx)) {
+      struct gl_transform_feedback_object *xfb_obj
+         = ctx->TransformFeedback.CurrentObject;
+      size_t prim_count = vbo_count_tessellated_primitives(mode, count, numInstances);
      if (xfb_obj->GlesRemainingPrims < prim_count) {
         _mesa_error(ctx, GL_INVALID_OPERATION,
                     "%s(exceeds transform feedback size)", func);
@@ -913,6 +921,60 @@ _mesa_validate_DrawArraysInstanced(struct gl_context *ctx, GLenum mode, GLint fi
 }


+/**
+ * Called to error check the function parameters.
+ *
+ * Note that glMultiDrawArrays is not part of GLES, so there's limited scope
+ * for sharing code with the validation of glDrawArrays.
+ */
+bool
+_mesa_validate_MultiDrawArrays(struct gl_context *ctx, GLenum mode,
+                               const GLsizei *count, GLsizei primcount)
+{
+   int i;
+
+   FLUSH_CURRENT(ctx, 0);
+
+   if (!_mesa_valid_prim_mode(ctx, mode, "glMultiDrawArrays"))
+      return false;
+
+   if (!check_valid_to_render(ctx, "glMultiDrawArrays"))
+      return false;
+
+   if (primcount < 0) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "glMultiDrawArrays(primcount=%d)",
+                  primcount);
+      return false;
+   }
+
+   for (i = 0; i < primcount; ++i) {
+      if (count[i] < 0) {
+         _mesa_error(ctx, GL_INVALID_VALUE, "glMultiDrawArrays(count[%d]=%d)",
+                     i, count[i]);
+         return false;
+      }
+   }
+
+   if (need_xfb_remaining_prims_check(ctx)) {
+      struct gl_transform_feedback_object *xfb_obj
+         = ctx->TransformFeedback.CurrentObject;
+      size_t xfb_prim_count = 0;
+
+      for (i = 0; i < primcount; ++i)
+         xfb_prim_count += vbo_count_tessellated_primitives(mode, count[i], 1);
+
+      if (xfb_obj->GlesRemainingPrims < xfb_prim_count) {
+         _mesa_error(ctx, GL_INVALID_OPERATION,
+                     "glMultiDrawArrays(exceeds transform feedback size)");
+         return false;
+      }
+      xfb_obj->GlesRemainingPrims -= xfb_prim_count;
+   }
+
+   return true;
+}
+
+
 GLboolean
 _mesa_validate_DrawElementsInstanced(struct gl_context *ctx,
                                     GLenum mode, GLsizei count, GLenum type,
--- a/src/mesa/main/api_validate.h
+++ b/src/mesa/main/api_validate.h
@@ -48,6 +48,10 @@ _mesa_valid_prim_mode(struct gl_context *ctx, GLenum mode, const char *name);
 extern GLboolean
 _mesa_validate_DrawArrays(struct gl_context *ctx, GLenum mode, GLsizei count);

+extern bool
+_mesa_validate_MultiDrawArrays(struct gl_context *ctx, GLenum mode,
+                               const GLsizei *count, GLsizei primcount);
+
 extern GLboolean
 _mesa_validate_DrawElements(struct gl_context *ctx,
 			    GLenum mode, GLsizei count, GLenum type,
--- a/src/mesa/main/copyimage.c
+++ b/src/mesa/main/copyimage.c
@@ -149,30 +149,9 @@ prepare_target(struct gl_context *ctx, GLuint name, GLenum target,
         return false;
      }

-      /* The ARB_copy_image specification says:
-       *
-       *    "INVALID_OPERATION is generated if either object is a texture and
-       *     the texture is not complete (as defined in section 3.9.14)"
-       *
-       * The cited section says:
-       *
-       *    "Using the preceding definitions, a texture is complete unless any
-       *     of the following conditions hold true: [...]
-       *
-       *     * The minification filter requires a mipmap (is neither NEAREST
-       *       nor LINEAR), and the texture is not mipmap complete."
-       *
-       * This imposes the bizarre restriction that glCopyImageSubData requires
-       * mipmap completion at times, which dEQP mandates, and other drivers
-       * appear to implement.  We don't have any texture units here, so we
-       * can't look at any bound separate sampler objects...it appears that
-       * you're supposed to use the sampler object which is built-in to the
-       * texture object.
-       *
-       * See https://cvs.khronos.org/bugzilla/show_bug.cgi?id=16224.
-       */
      _mesa_test_texobj_completeness(ctx, texObj);
-      if (!_mesa_is_texture_complete(texObj, &texObj->Sampler)) {
+      if (!texObj->_BaseComplete ||
+          (level != 0 && !texObj->_MipmapComplete)) {
         _mesa_error(ctx, GL_INVALID_OPERATION,
                     "glCopyImageSubData(%sName incomplete)", dbg_prefix);
         return false;
--- a/src/mesa/main/glthread.c
+++ b/src/mesa/main/glthread.c
@@ -265,7 +265,7 @@ _mesa_glthread_finish(struct gl_context *ctx)
    * dri interface entrypoints), in which case we don't need to actually
    * synchronize against ourself.
    */
-   if (pthread_self() == glthread->thread)
+   if (pthread_equal(pthread_self(), glthread->thread))
      return;

   pthread_mutex_lock(&glthread->mutex);
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -945,6 +945,8 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
    */
   if (uni->type->is_sampler()) {
      bool flushed = false;
+      shProg->SamplersValidated = GL_TRUE;
+
      for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 	 struct gl_linked_shader *const sh = shProg->_LinkedShaders[i];

--- a/src/mesa/main/uniforms.c
+++ b/src/mesa/main/uniforms.c
@@ -67,11 +67,15 @@ void
 _mesa_update_shader_textures_used(struct gl_shader_program *shProg,
                                  struct gl_program *prog)
 {
+   GLbitfield mask = prog->SamplersUsed;
+   gl_shader_stage prog_stage =
+      _mesa_program_enum_to_shader_stage(prog->Target);
+   struct gl_linked_shader *shader = shProg->_LinkedShaders[prog_stage];
+
+   assert(shader);
+
   memset(prog->TexturesUsed, 0, sizeof(prog->TexturesUsed));

-   shProg->SamplersValidated = GL_TRUE;
-
-   GLbitfield mask = prog->SamplersUsed;
   while (mask) {
      const int s = u_bit_scan(&mask);
      GLuint unit = prog->SamplerUnits[s];
@@ -87,8 +91,20 @@ _mesa_update_shader_textures_used(struct gl_shader_program *shProg,
       *     types pointing to the same texture image unit within a program
       *     object."
       */
-      if (prog->TexturesUsed[unit] & ~(1 << tgt))
-         shProg->SamplersValidated = GL_FALSE;
+      unsigned stages_mask = shProg->data->linked_stages;
+      while (stages_mask) {
+         const int stage = u_bit_scan(&stages_mask);
+
+         /* Skip validation if we are yet to update textures used in this
+          * stage.
+          */
+         if (prog_stage < stage)
+            break;
+
+         struct gl_program *glprog = shProg->_LinkedShaders[stage]->Program;
+         if (glprog->TexturesUsed[unit] & ~(1 << tgt))
+            shProg->SamplersValidated = GL_FALSE;
+      }

      prog->TexturesUsed[unit] |= (1 << tgt);
   }
--- a/src/mesa/main/varray.c
+++ b/src/mesa/main/varray.c
@@ -1539,24 +1539,6 @@ _mesa_UnlockArraysEXT( void )
 }


-/* GL_EXT_multi_draw_arrays */
-void GLAPIENTRY
-_mesa_MultiDrawArrays( GLenum mode, const GLint *first,
-                          const GLsizei *count, GLsizei primcount )
-{
-   GET_CURRENT_CONTEXT(ctx);
-   GLint i;
-
-   FLUSH_VERTICES(ctx, 0);
-
-   for (i = 0; i < primcount; i++) {
-      if (count[i] > 0) {
-         CALL_DrawArrays(ctx->CurrentClientDispatch, (mode, first[i], count[i]));
-      }
-   }
-}
-
-
 /* GL_IBM_multimode_draw_arrays */
 void GLAPIENTRY
 _mesa_MultiModeDrawArraysIBM( const GLenum * mode, const GLint * first,
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -3117,6 +3117,11 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
   }

   if (prog->data->LinkStatus) {
+      /* Reset sampler validated to true, validation happens via the
+       * LinkShader call below.
+       */
+      prog->SamplersValidated = GL_TRUE;
+
      if (!ctx->Driver.LinkShader(ctx, prog)) {
         prog->data->LinkStatus = linking_failure;
      }
--- a/src/mesa/state_tracker/st_cb_fbo.h
+++ b/src/mesa/state_tracker/st_cb_fbo.h
@@ -85,7 +85,8 @@ static inline struct st_framebuffer *
 st_ws_framebuffer(struct gl_framebuffer *fb)
 {
   /* FBO cannot be casted.  See st_new_framebuffer */
-   if (fb && _mesa_is_winsys_fbo(fb))
+   if (fb && _mesa_is_winsys_fbo(fb) &&
+       fb != _mesa_get_incomplete_framebuffer())
      return (struct st_framebuffer *) fb;
   return NULL;
 }
--- a/src/mesa/state_tracker/st_draw.c
+++ b/src/mesa/state_tracker/st_draw.c
@@ -282,6 +282,8 @@ st_indirect_draw_vbo(struct gl_context *ctx,
   assert(ctx->NewState == 0x0);
   assert(stride);

+   st_invalidate_readpix_cache(st);
+
   /* Validate state. */
   if ((st->dirty | ctx->NewDriverState) & ST_PIPELINE_RENDER_STATE_MASK ||
       st->gfx_shaders_may_be_dirty) {
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -844,13 +844,14 @@ st_manager_flush_frontbuffer(struct st_context *st)
   struct st_framebuffer *stfb = st_ws_framebuffer(st->ctx->DrawBuffer);
   struct st_renderbuffer *strb = NULL;

+   assert(st->ctx->DrawBuffer != _mesa_get_incomplete_framebuffer());
+
   if (stfb)
      strb = st_renderbuffer(stfb->Base.Attachment[BUFFER_FRONT_LEFT].Renderbuffer);
   if (!strb)
      return;

   /* never a dummy fb */
-   assert(&stfb->Base != _mesa_get_incomplete_framebuffer());
   stfb->iface->flush_front(&st->iface, stfb->iface, ST_ATTACHMENT_FRONT_LEFT);
 }

--- a/src/mesa/vbo/vbo_exec_array.c
+++ b/src/mesa/vbo/vbo_exec_array.c
@@ -404,7 +404,8 @@ vbo_bind_arrays(struct gl_context *ctx)
 */
 static void
 vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
-                GLsizei count, GLuint numInstances, GLuint baseInstance)
+                GLsizei count, GLuint numInstances, GLuint baseInstance,
+                GLuint drawID)
 {
   struct vbo_context *vbo = vbo_context(ctx);
   struct _mesa_prim prim[2];
@@ -420,6 +421,7 @@ vbo_draw_arrays(struct gl_context *ctx, GLenum mode, GLint start,
   prim[0].mode = mode;
   prim[0].num_instances = numInstances;
   prim[0].base_instance = baseInstance;
+   prim[0].draw_id = drawID;
   prim[0].is_indirect = 0;
   prim[0].start = start;
   prim[0].count = count;
@@ -572,7 +574,7 @@ vbo_exec_DrawArrays(GLenum mode, GLint start, GLsizei count)
   if (0)
      check_draw_arrays_data(ctx, start, count);

-   vbo_draw_arrays(ctx, mode, start, count, 1, 0);
+   vbo_draw_arrays(ctx, mode, start, count, 1, 0, 0);

   if (0)
      print_draw_arrays(ctx, mode, start, count);
@@ -600,7 +602,7 @@ vbo_exec_DrawArraysInstanced(GLenum mode, GLint start, GLsizei count,
   if (0)
      check_draw_arrays_data(ctx, start, count);

-   vbo_draw_arrays(ctx, mode, start, count, numInstances, 0);
+   vbo_draw_arrays(ctx, mode, start, count, numInstances, 0, 0);

   if (0)
      print_draw_arrays(ctx, mode, start, count);
@@ -630,13 +632,52 @@ vbo_exec_DrawArraysInstancedBaseInstance(GLenum mode, GLint first,
   if (0)
      check_draw_arrays_data(ctx, first, count);

-   vbo_draw_arrays(ctx, mode, first, count, numInstances, baseInstance);
+   vbo_draw_arrays(ctx, mode, first, count, numInstances, baseInstance, 0);

   if (0)
      print_draw_arrays(ctx, mode, first, count);
 }


+/**
+ * Called from glMultiDrawArrays when in immediate mode.
+ */
+static void GLAPIENTRY
+vbo_exec_MultiDrawArrays(GLenum mode, const GLint *first,
+                         const GLsizei *count, GLsizei primcount)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLint i;
+
+   if (MESA_VERBOSE & VERBOSE_DRAW)
+      _mesa_debug(ctx,
+                  "glMultiDrawArrays(%s, %p, %p, %d)\n",
+                  _mesa_enum_to_string(mode), first, count, primcount);
+
+   if (!_mesa_validate_MultiDrawArrays(ctx, mode, count, primcount))
+      return;
+
+   for (i = 0; i < primcount; i++) {
+      if (count[i] > 0) {
+         if (0)
+            check_draw_arrays_data(ctx, first[i], count[i]);
+
+         /* The GL_ARB_shader_draw_parameters spec adds the following after the
+          * pseudo-code describing glMultiDrawArrays:
+          *
+          *    "The index of the draw (<i> in the above pseudo-code) may be
+          *     read by a vertex shader as <gl_DrawIDARB>, as described in
+          *     Section 11.1.3.9."
+          */
+         vbo_draw_arrays(ctx, mode, first[i], count[i], 1, 0, i);
+
+         if (0)
+            print_draw_arrays(ctx, mode, first[i], count[i]);
+      }
+   }
+}
+
+

 /**
 * Map GL_ELEMENT_ARRAY_BUFFER and print contents.
@@ -1230,7 +1271,7 @@ vbo_draw_transform_feedback(struct gl_context *ctx, GLenum mode,
        !_mesa_all_varyings_in_vbos(ctx->Array.VAO))) {
      GLsizei n =
         ctx->Driver.GetTransformFeedbackVertexCount(ctx, obj, stream);
-      vbo_draw_arrays(ctx, mode, 0, n, numInstances, 0);
+      vbo_draw_arrays(ctx, mode, 0, n, numInstances, 0, 0);
      return;
   }

@@ -1641,6 +1682,7 @@ vbo_initialize_exec_dispatch(const struct gl_context *ctx,
      SET_DrawRangeElements(exec, vbo_exec_DrawRangeElements);
   }

+   SET_MultiDrawArrays(exec, vbo_exec_MultiDrawArrays);
   SET_MultiDrawElementsEXT(exec, vbo_exec_MultiDrawElements);

   if (ctx->API == API_OPENGL_COMPAT) {
--- a/src/mesa/vbo/vbo_save_api.c
+++ b/src/mesa/vbo/vbo_save_api.c
@@ -1176,6 +1176,40 @@ _save_OBE_DrawArrays(GLenum mode, GLint start, GLsizei count)
 }


+static void GLAPIENTRY
+_save_OBE_MultiDrawArrays(GLenum mode, const GLint *first,
+                          const GLsizei *count, GLsizei primcount)
+{
+   GET_CURRENT_CONTEXT(ctx);
+   GLint i;
+
+   if (!_mesa_is_valid_prim_mode(ctx, mode)) {
+      _mesa_compile_error(ctx, GL_INVALID_ENUM, "glMultiDrawArrays(mode)");
+      return;
+   }
+
+   if (primcount < 0) {
+      _mesa_compile_error(ctx, GL_INVALID_VALUE,
+                          "glMultiDrawArrays(primcount<0)");
+      return;
+   }
+
+   for (i = 0; i < primcount; i++) {
+      if (count[i] < 0) {
+         _mesa_compile_error(ctx, GL_INVALID_VALUE,
+                             "glMultiDrawArrays(count[i]<0)");
+         return;
+      }
+   }
+
+   for (i = 0; i < primcount; i++) {
+      if (count[i] > 0) {
+         _save_OBE_DrawArrays(mode, first[i], count[i]);
+      }
+   }
+}
+
+
 /* Could do better by copying the arrays and element list intact and
 * then emitting an indexed prim at runtime.
 */
@@ -1484,6 +1518,7 @@ vbo_initialize_save_dispatch(const struct gl_context *ctx,
                             struct _glapi_table *exec)
 {
   SET_DrawArrays(exec, _save_OBE_DrawArrays);
+   SET_MultiDrawArrays(exec, _save_OBE_MultiDrawArrays);
   SET_DrawElements(exec, _save_OBE_DrawElements);
   SET_DrawElementsBaseVertex(exec, _save_OBE_DrawElementsBaseVertex);
   SET_DrawRangeElements(exec, _save_OBE_DrawRangeElements);
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -31,7 +31,6 @@
 #include <sys/file.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sys/statvfs.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <fcntl.h>
@@ -47,7 +46,6 @@
 #include "util/mesa-sha1.h"
 #include "util/ralloc.h"
 #include "main/errors.h"
-#include "util/macros.h"

 #include "disk_cache.h"

@@ -171,7 +169,6 @@ disk_cache_create(const char *gpu_name, const char *timestamp)
   uint64_t max_size;
   int fd = -1;
   struct stat sb;
-   struct statvfs vfs = { 0 };
   size_t size;

   /* If running as a users other than the real user disable cache */
@@ -331,10 +328,9 @@ disk_cache_create(const char *gpu_name, const char *timestamp)
      }
   }

-   /* Default to 1GB or 10% of filesystem for maximum cache size. */
+   /* Default to 1GB for maximum cache size. */
   if (max_size == 0) {
-      statvfs(path, &vfs);
-      max_size = MAX2(1024*1024*1024, vfs.f_blocks * vfs.f_bsize / 10);
+      max_size = 1024*1024*1024;
   }

   cache->max_size = max_size;
@@ -532,7 +528,7 @@ unlink_lru_file_from_directory(const char *path)
   unlink(filename);
   free (filename);

-   return sb.st_size;
+   return sb.st_blocks * 512;
 }

 /* Is entry a directory with a two-character name, (and not the
@@ -637,8 +633,8 @@ disk_cache_remove(struct disk_cache *cache, const cache_key key)
   unlink(filename);
   free(filename);

-   if (sb.st_size)
-      p_atomic_add(cache->size, - (uint64_t)sb.st_size);
+   if (sb.st_blocks)
+      p_atomic_add(cache->size, - (uint64_t)sb.st_blocks * 512);
 }

 static ssize_t
@@ -880,8 +876,14 @@ cache_put(void *job, int thread_index)
      goto done;
   }

-   file_size += cf_data_size + dc_job->cache->driver_keys_blob_size;
-   p_atomic_add(dc_job->cache->size, file_size);
+   struct stat sb;
+   if (stat(filename, &sb) == -1) {
+      /* Something went wrong remove the file */
+      unlink(filename);
+      goto done;
+   }
+
+   p_atomic_add(dc_job->cache->size, sb.st_blocks * 512);

 done:
   if (fd_final != -1)
--- a/src/util/u_queue.c
+++ b/src/util/u_queue.c
@@ -298,9 +298,18 @@ util_queue_add_job(struct util_queue *queue,
   struct util_queue_job *ptr;

   assert(fence->signalled);
-   fence->signalled = false;

   mtx_lock(&queue->lock);
+   if (queue->kill_threads) {
+      mtx_unlock(&queue->lock);
+      /* well no good option here, but any leaks will be
+       * short-lived as things are shutting down..
+       */
+      return;
+   }
+
+   fence->signalled = false;
+
   assert(queue->num_queued >= 0 && queue->num_queued <= queue->max_jobs);

   /* if the queue is full, wait until there is space */