Update version to 18.3.0-rc5

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
cherry-ignore: egl/wayland: rather obvious build fix
2018-11-29 11:56:27 +00:00 · 2018-11-28 18:05:05 +00:00 · 2018-11-28 18:05:05 +00:00 · 2018-11-28 18:05:05 +00:00 · 2018-11-28 17:41:53 +00:00 · 2018-11-28 17:03:40 +00:00
68 changed files with 886 additions and 405 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -52,6 +52,7 @@ matrix:
            # Common
            - xz-utils
            - libexpat1-dev
+            - libx11-xcb-dev
            - libelf-dev
            - python3.5
            - python3-pip
@@ -120,7 +121,6 @@ matrix:
            - llvm-6.0-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -150,7 +150,6 @@ matrix:
            - llvm-6.0-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -183,7 +182,6 @@ matrix:
            - llvm-3.9-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -222,7 +220,6 @@ matrix:
            - libclang-3.9-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -258,7 +255,6 @@ matrix:
            - libclang-4.0-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -294,7 +290,6 @@ matrix:
            - libclang-5.0-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -327,7 +322,6 @@ matrix:
            - libclang-6.0-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -361,7 +355,6 @@ matrix:
            - libclang-7-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -397,7 +390,6 @@ matrix:
            - libedit-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
@@ -427,7 +419,6 @@ matrix:
            - llvm-6.0-dev
            # Common
            - xz-utils
-            - x11proto-xf86vidmode-dev
            - libexpat1-dev
            - libx11-xcb-dev
            - libelf-dev
--- a/2
+++ b/2
@@ -1 +1 @@
-18.3.0-devel
+18.3.0-rc5
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -0,0 +1,2 @@
+# fixes: Commit was squashed into the respective offenders
+c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix
--- a/bin/get-fixes-pick-list.sh
+++ b/bin/get-fixes-pick-list.sh
@@ -1,81 +0,0 @@
-#!/bin/sh
-
-# Script for generating a list of candidates [referenced by a Fixes tag] for
-# cherry-picking to a stable branch
-#
-# Usage examples:
-#
-# $ bin/get-fixes-pick-list.sh
-# $ bin/get-fixes-pick-list.sh > picklist
-# $ bin/get-fixes-pick-list.sh | tee picklist
-
-# Use the last branchpoint as our limit for the search
-latest_branchpoint=`git merge-base origin/master HEAD`
-
-# List all the commits between day 1 and the branch point...
-git log --reverse --pretty=%H $latest_branchpoint > already_landed
-
-# ... and the ones cherry-picked.
-git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
-	grep "cherry picked from commit" |\
-	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//'  > already_picked
-
-# Grep for commits with Fixes tag
-git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
-while read sha
-do
-	# Check to see whether the patch is on the ignore list ...
-	if [ -f bin/.cherry-ignore ] ; then
-		if grep -q ^$sha bin/.cherry-ignore ; then
-			continue
-		fi
-	fi
-
-	# Skip if it has been already cherry-picked.
-	if grep -q ^$sha already_picked ; then
-		continue
-	fi
-
-	# Place every "fixes:" tag on its own line and join with the next word
-	# on its line or a later one.
-	fixes=`git show --pretty=medium -s $sha | tr -d "\n" | sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'`
-
-	# For each one try to extract the tag
-	fixes_count=`echo "$fixes" | wc -l`
-	warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0`
-	while [ $fixes_count -gt 0 ] ; do
-		# Treat only the current line
-		id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
-		fixes_count=$(($fixes_count-1))
-
-		# Bail out if we cannot find suitable id.
-		# Any specific validation the $id is valid and not some junk, is
-		# implied with the follow up code
-		if [ "x$id" = x ] ; then
-			continue
-		fi
-
-		# Check if the offending commit is in branch.
-
-		# Be that cherry-picked ...
-		# ... or landed before the branchpoint.
-		if grep -q ^$id already_picked ||
-		   grep -q ^$id already_landed ; then
-
-			printf "Commit \"%s\" fixes %s\n" \
-			       "`git log -n1 --pretty=oneline $sha`" \
-			       "$id"
-			warn=$(($warn-1))
-		fi
-
-	done
-
-	if [ $warn -gt 0 ] ; then
-		printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
-		       "`git log -n1 --pretty=oneline $sha`"
-	fi
-
-done
-
-rm -f already_picked
-rm -f already_landed
--- a/bin/get-pick-list.sh
+++ b/bin/get-pick-list.sh
@@ -7,21 +7,92 @@
 # $ bin/get-pick-list.sh
 # $ bin/get-pick-list.sh > picklist
 # $ bin/get-pick-list.sh | tee picklist
+#
+# The output is as follows:
+# [nomination_type] commit_sha commit summary
+
+is_stable_nomination()
+{
+	git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
+}
+
+is_typod_nomination()
+{
+	git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
+}
+
+# Helper to handle various mistypos of the fixes tag.
+# The tag string itself is passed as argument and normalised within.
+is_sha_nomination()
+{
+	fixes=`git show --pretty=medium -s $1 | tr -d "\n" | \
+		sed -e 's/'"$2"'/\nfixes:/Ig' | \
+		grep -Eo 'fixes:[a-f0-9]{8,40}'`
+
+	fixes_count=`echo "$fixes" | wc -l`
+	if test $fixes_count -eq 0; then
+		return 0
+	fi
+	while test $fixes_count -gt 0; do
+		# Treat only the current line
+		id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
+		fixes_count=$(($fixes_count-1))
+
+		# Bail out if we cannot find suitable id.
+		# Any specific validation the $id is valid and not some junk, is
+		# implied with the follow up code
+		if test "x$id" = x; then
+			continue
+		fi
+
+		#Check if the offending commit is in branch.
+
+		# Be that cherry-picked ...
+		# ... or landed before the branchpoint.
+		if grep -q ^$id already_picked ||
+		   grep -q ^$id already_landed ; then
+			return 0
+		fi
+	done
+	return 1
+}
+
+is_fixes_nomination()
+{
+	is_sha_nomination "$1" "fixes:[[:space:]]*"
+	if test $? -eq 0; then
+		return 0
+	fi
+	is_sha_nomination "$1" "fixes[[:space:]]\+"
+}
+
+is_brokenby_nomination()
+{
+	is_sha_nomination "$1" "broken by"
+}
+
+is_revert_nomination()
+{
+	is_sha_nomination "$1" "This reverts commit "
+}

 # Use the last branchpoint as our limit for the search
 latest_branchpoint=`git merge-base origin/master HEAD`

-# Grep for commits with "cherry picked from commit" in the commit message.
+# List all the commits between day 1 and the branch point...
+git log --reverse --pretty=%H $latest_branchpoint > already_landed
+
+# ... and the ones cherry-picked.
 git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
 	grep "cherry picked from commit" |\
 	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked

-# Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable' $latest_branchpoint..origin/master |\
+# Grep for potential candidates
+git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\<fixes\>\|\<broken by\>\|This reverts commit' $latest_branchpoint..origin/master |\
 while read sha
 do
 	# Check to see whether the patch is on the ignore list.
-	if [ -f bin/.cherry-ignore ] ; then
+	if test -f bin/.cherry-ignore; then
 		if grep -q ^$sha bin/.cherry-ignore ; then
 			continue
 		fi
@@ -32,7 +103,23 @@ do
 		continue
 	fi

-	git log -n1 --pretty=oneline $sha | cat
+	if is_stable_nomination "$sha"; then
+		tag=stable
+	elif is_typod_nomination "$sha"; then
+		tag=typod
+	elif is_fixes_nomination "$sha"; then
+		tag=fixes
+	elif is_brokenby_nomination "$sha"; then
+		tag=brokenby
+	elif is_revert_nomination "$sha"; then
+		tag=revert
+	else
+		continue
+	fi
+
+	printf "[ %8s ] " "$tag"
+	git --no-pager show --summary --oneline $sha
 done

 rm -f already_picked
+rm -f already_landed
--- a/bin/get-typod-pick-list.sh
+++ b/bin/get-typod-pick-list.sh
@@ -1,42 +0,0 @@
-#!/bin/sh
-
-# Script for generating a list of candidates which have typos in the nomination line
-#
-# Usage examples:
-#
-# $ bin/get-typod-pick-list.sh
-# $ bin/get-typod-pick-list.sh > picklist
-# $ bin/get-typod-pick-list.sh | tee picklist
-
-# NB:
-# This script intentionally _never_ checks for specific version tag
-# Should we consider folding it with the original get-pick-list.sh
-
-# Use the last branchpoint as our limit for the search
-latest_branchpoint=`git merge-base origin/master HEAD`
-
-# Grep for commits with "cherry picked from commit" in the commit message.
-git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
-	grep "cherry picked from commit" |\
-	sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
-
-# Grep for commits that were marked as a candidate for the stable tree.
-git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' $latest_branchpoint..origin/master |\
-while read sha
-do
-	# Check to see whether the patch is on the ignore list.
-	if [ -f bin/.cherry-ignore ] ; then
-		if grep -q ^$sha bin/.cherry-ignore ; then
-			continue
-		fi
-	fi
-
-	# Check to see if it has already been picked over.
-	if grep -q ^$sha already_picked ; then
-		continue
-	fi
-
-	git log -n1 --pretty=oneline $sha | cat
-done
-
-rm -f already_picked
--- a/configure.ac
+++ b/configure.ac
@@ -1725,11 +1725,7 @@ xdri)
        fi
    fi

-    # add xf86vidmode if available
-    PKG_CHECK_MODULES([XF86VIDMODE], [xxf86vm], HAVE_XF86VIDMODE=yes, HAVE_XF86VIDMODE=no)
-    if test "$HAVE_XF86VIDMODE" = yes ; then
-        dri_modules="$dri_modules xxf86vm"
-    fi
+    dri_modules="$dri_modules xxf86vm"

    PKG_CHECK_MODULES([DRIGL], [$dri_modules])
    GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV $dri_modules"
@@ -1742,10 +1738,6 @@ xdri)
    ;;
 esac

-# This is outside the case (above) so that it is invoked even for non-GLX
-# builds.
-AM_CONDITIONAL(HAVE_XF86VIDMODE, test "x$HAVE_XF86VIDMODE" = xyes)
-
 GLESv1_CM_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
 GLESv1_CM_PC_LIB_PRIV="-lm $PTHREAD_LIBS $DLOPEN_LIBS"
 GLESv2_LIB_DEPS="$LIBDRM_LIBS -lm $PTHREAD_LIBS $DLOPEN_LIBS"
@@ -1762,8 +1754,6 @@ AC_SUBST([GLESv1_CM_PC_LIB_PRIV])
 AC_SUBST([GLESv2_LIB_DEPS])
 AC_SUBST([GLESv2_PC_LIB_PRIV])

-AC_SUBST([HAVE_XF86VIDMODE])
-
 dnl
 dnl More GLX setup
 dnl
--- a/docs/releasing.html
+++ b/docs/releasing.html
@@ -21,6 +21,7 @@
 <li><a href="#overview">Overview</a>
 <li><a href="#schedule">Release schedule</a>
 <li><a href="#pickntest">Cherry-pick and test</a>
+<li><a href="#stagingbranch">Staging branch</a>
 <li><a href="#branch">Making a branchpoint</a>
 <li><a href="#prerelease">Pre-release announcement</a>
 <li><a href="#release">Making a new release</a>
@@ -209,6 +210,25 @@ system and making some every day's use until the release may be a good
 idea too.
 </p>

+<h1 id="stagingbranch">Staging branch</h1>
+
+<p>
+A live branch, which contains the currently merge/rejected patches is available
+in the main repository under <code>staging/X.Y</code>. For example:
+</p>
+<pre>
+	staging/18.1 - WIP branch for the 18.1 series
+	staging/18.2 - WIP branch for the 18.2 series
+</pre>
+
+<p>
+Notes:
+</p>
+<ul>
+<li>People are encouraged to test the branch and report regressions.</li>
+<li>The branch history is not stable and it <strong>will</strong> be rebased,</li>
+</ul>
+

 <h1 id="branch">Making a branchpoint</h1>

--- a/docs/submittingpatches.html
+++ b/docs/submittingpatches.html
@@ -251,6 +251,9 @@ If you are not the author of the original patch, please Cc: them in your
 nomination request.
 </p>

+<p>
+The current patch status can be observed in the <a href="releasing.html#stagingbranch">staging branch</a>.
+</p>

 <h3 id="thetag">The stable tag</h3>

--- a/meson.build
+++ b/meson.build
@@ -787,7 +787,7 @@ endif

 # Check for generic C arguments
 c_args = []
-foreach a : ['-Wall', '-Werror=implicit-function-declaration',
+foreach a : ['-Werror=implicit-function-declaration',
             '-Werror=missing-prototypes', '-Werror=return-type',
             '-fno-math-errno',
             '-fno-trapping-math', '-Qunused-arguments']
@@ -809,7 +809,7 @@ endif

 # Check for generic C++ arguments
 cpp_args = []
-foreach a : ['-Wall', '-Werror=return-type',
+foreach a : ['-Werror=return-type',
             '-fno-math-errno', '-fno-trapping-math',
             '-Qunused-arguments']
  if cpp.has_argument(a)
@@ -905,8 +905,9 @@ if not cc.links('''#include <stdint.h>
                   int main() {
                     return __sync_add_and_fetch(&v, (uint64_t)1);
                   }''',
+                dependencies : dep_atomic,
                name : 'GCC 64bit atomics')
-  pre_args += '-DMISSING_64_BIT_ATOMICS'
+  pre_args += '-DMISSING_64BIT_ATOMICS'
 endif

 # TODO: shared/static? Is this even worth doing?
@@ -1317,13 +1318,6 @@ if with_platform_wayland
    'linux-dmabuf', 'linux-dmabuf-unstable-v1.xml'
  )
  pre_args += ['-DHAVE_WAYLAND_PLATFORM', '-DWL_HIDE_DEPRECATED']
-else
-  prog_wl_scanner = []
-  wl_scanner_arg = ''
-  dep_wl_protocols = null_dep
-  dep_wayland_client = null_dep
-  dep_wayland_server = null_dep
-  wayland_dmabuf_xml = ''
 endif

 dep_x11 = null_dep
@@ -1356,7 +1350,7 @@ if with_platform_x11
    dep_xdamage = dependency('xdamage', version : '>= 1.1')
    dep_xfixes = dependency('xfixes')
    dep_xcb_glx = dependency('xcb-glx', version : '>= 1.8.1')
-    dep_xxf86vm = dependency('xxf86vm', required : false)
+    dep_xxf86vm = dependency('xxf86vm')
  endif
  if (with_any_vk or with_glx == 'dri' or
       (with_gallium_vdpau or with_gallium_xvmc or with_gallium_va or
@@ -1434,13 +1428,11 @@ elif with_glx == 'dri'
  if with_dri_platform == 'drm'
    gl_priv_reqs += 'xcb-dri2 >= 1.8'
  endif
+  gl_priv_reqs += 'xxf86vm'
 endif
 if dep_libdrm.found()
  gl_priv_reqs += 'libdrm >= 2.4.75'
 endif
-if dep_xxf86vm.found()
-  gl_priv_reqs += 'xxf86vm'
-endif

 gl_priv_libs = []
 if dep_thread.found()
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -311,9 +311,18 @@ static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
 }

 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
-			     LLVMValueRef src0)
+			     LLVMValueRef src0,
+			     unsigned bitsize)
 {
-	return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
+	LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
+					   LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
+					   "");
+	result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
+
+	if (bitsize == 32)
+		return result;
+
+	return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
 }

 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
@@ -932,7 +941,7 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
 		result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
 		break;
 	case nir_op_b2f:
-		result = emit_b2f(&ctx->ac, src[0]);
+		result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
 		break;
 	case nir_op_f2b:
 		result = emit_f2b(&ctx->ac, src[0]);
--- a/src/amd/vulkan/Android.mk
+++ b/src/amd/vulkan/Android.mk
@@ -74,7 +74,8 @@ LOCAL_C_INCLUDES := \
 	$(call generated-sources-dir-for,STATIC_LIBRARIES,libmesa_vulkan_util,,)/util

 LOCAL_WHOLE_STATIC_LIBRARIES := \
-	libmesa_vulkan_util
+	libmesa_vulkan_util \
+	libmesa_git_sha1

 LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.c
 LOCAL_GENERATED_SOURCES += $(intermediates)/radv_entrypoints.h
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -1950,6 +1950,8 @@ radv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)

 			va = radv_buffer_get_va(buffer->bo) + buffer->offset;

+			va += sb[i].offset;
+
 			/* Set the descriptor.
 			 *
 			 * On VI, the format must be non-INVALID, otherwise
@@ -3518,8 +3520,13 @@ static bool radv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,

 	uint32_t used_states = cmd_buffer->state.pipeline->graphics.needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;

-	/* Index & Vertex buffer don't change context regs, and pipeline is handled later. */
-	used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | RADV_CMD_DIRTY_PIPELINE);
+	/* Index, vertex and streamout buffers don't change context regs, and
+	 * pipeline is handled later.
+	 */
+	used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER |
+			 RADV_CMD_DIRTY_VERTEX_BUFFER |
+			 RADV_CMD_DIRTY_STREAMOUT_BUFFER |
+			 RADV_CMD_DIRTY_PIPELINE);

 	/* Assume all state changes except  these two can imply context rolls. */
 	if (cmd_buffer->state.dirty & used_states)
@@ -4741,28 +4748,30 @@ void radv_CmdBeginTransformFeedbackEXT(
 	struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
 	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	uint32_t i;

 	radv_flush_vgt_streamout(cmd_buffer);

 	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-	for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) {
-		if (!(so->enabled_mask & (1 << i)))
-			continue;
+	for_each_bit(i, so->enabled_mask) {
+		int32_t counter_buffer_idx = i - firstCounterBuffer;
+		if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount)
+			counter_buffer_idx = -1;

 		/* SI binds streamout buffers as shader resources.
 		 * VGT only counts primitives and tells the shader through
 		 * SGPRs what to do.
 		 */
 		radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16*i, 2);
-		radeon_emit(cs, (sb[i].offset + sb[i].size) >> 2);	/* BUFFER_SIZE (in DW) */
+		radeon_emit(cs, sb[i].size >> 2);	/* BUFFER_SIZE (in DW) */
 		radeon_emit(cs, so->stride_in_dw[i]);			/* VTX_STRIDE (in DW) */

-		if (pCounterBuffers && pCounterBuffers[i]) {
+		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
 			/* The array of counter buffers is optional. */
-			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]);
+			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
 			uint64_t va = radv_buffer_get_va(buffer->bo);

-			va += buffer->offset + pCounterBufferOffsets[i];
+			va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];

 			/* Append */
 			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
@@ -4783,7 +4792,7 @@ void radv_CmdBeginTransformFeedbackEXT(
 					STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
 			radeon_emit(cs, 0); /* unused */
 			radeon_emit(cs, 0); /* unused */
-			radeon_emit(cs, sb[i].offset >> 2); /* buffer offset in DW */
+			radeon_emit(cs, 0); /* unused */
 			radeon_emit(cs, 0); /* unused */
 		}
 	}
@@ -4801,20 +4810,22 @@ void radv_CmdEndTransformFeedbackEXT(
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
 	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
 	struct radeon_cmdbuf *cs = cmd_buffer->cs;
+	uint32_t i;

 	radv_flush_vgt_streamout(cmd_buffer);

 	assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
-	for (uint32_t i = firstCounterBuffer; i < counterBufferCount; i++) {
-		if (!(so->enabled_mask & (1 << i)))
-			continue;
+	for_each_bit(i, so->enabled_mask) {
+		int32_t counter_buffer_idx = i - firstCounterBuffer;
+		if (counter_buffer_idx >= 0 && counter_buffer_idx > counterBufferCount)
+			counter_buffer_idx = -1;

-		if (pCounterBuffers && pCounterBuffers[i]) {
+		if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
 			/* The array of counters buffer is optional. */
-			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[i]);
+			RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
 			uint64_t va = radv_buffer_get_va(buffer->bo);

-			va += buffer->offset + pCounterBufferOffsets[i];
+			va += buffer->offset + pCounterBufferOffsets[counter_buffer_idx];

 			radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
 			radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) |
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1054,16 +1054,14 @@ void radv_GetPhysicalDeviceProperties2(
 			    (VkPhysicalDeviceSubgroupProperties*)ext;
 			properties->subgroupSize = 64;
 			properties->supportedStages = VK_SHADER_STAGE_ALL;
-			/* TODO: Enable VK_SUBGROUP_FEATURE_VOTE_BIT when wwm
-			 * is fixed in LLVM.
-			 */
 			properties->supportedOperations =
-							VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
 							VK_SUBGROUP_FEATURE_BASIC_BIT |
 							VK_SUBGROUP_FEATURE_BALLOT_BIT |
-							VK_SUBGROUP_FEATURE_QUAD_BIT;
+							VK_SUBGROUP_FEATURE_QUAD_BIT |
+							VK_SUBGROUP_FEATURE_VOTE_BIT;
 			if (pdevice->rad_info.chip_class >= VI) {
 				properties->supportedOperations |=
+							VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
 							VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
 							VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
 			}
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -691,7 +691,7 @@ radv_query_opaque_metadata(struct radv_device *device,
 	si_make_texture_descriptor(device, image, false,
 				   (VkImageViewType)image->type, image->vk_format,
 				   &fixedmapping, 0, image->info.levels - 1, 0,
-				   image->info.array_size,
+				   image->info.array_size - 1,
 				   image->info.width, image->info.height,
 				   image->info.depth,
 				   desc, NULL);
@@ -1175,8 +1175,6 @@ radv_image_view_init(struct radv_image_view *iview,
 		 if (device->physical_device->rad_info.chip_class >= GFX9 &&
 		     vk_format_is_compressed(image->vk_format) &&
 		     !vk_format_is_compressed(iview->vk_format)) {
-			 unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
-			 unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
 			 unsigned lvl_width  = radv_minify(image->info.width , range->baseMipLevel);
 			 unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);

@@ -1186,8 +1184,8 @@ radv_image_view_init(struct radv_image_view *iview,
 			 lvl_width <<= range->baseMipLevel;
 			 lvl_height <<= range->baseMipLevel;

-			 iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
-			 iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
+			 iview->extent.width = CLAMP(lvl_width, iview->extent.width, iview->image->surface.u.gfx9.surf_pitch);
+			 iview->extent.height = CLAMP(lvl_height, iview->extent.height, iview->image->surface.u.gfx9.surf_height);
 		 }
 	}

--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -595,6 +595,7 @@ struct radv_meta_state {
 		VkPipelineLayout p_layout;
 		VkPipeline occlusion_query_pipeline;
 		VkPipeline pipeline_statistics_query_pipeline;
+		VkPipeline tfb_query_pipeline;
 	} query;
 };

--- a/src/amd/vulkan/radv_query.c
+++ b/src/amd/vulkan/radv_query.c
@@ -512,11 +512,233 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
 	return b.shader;
 }

+static nir_shader *
+build_tfb_query_shader(struct radv_device *device)
+{
+	/* the shader this builds is roughly
+	 *
+	 * uint32_t src_stride = 32;
+	 *
+	 * location(binding = 0) buffer dst_buf;
+	 * location(binding = 1) buffer src_buf;
+	 *
+	 * void main() {
+	 *	uint64_t result[2] = {};
+	 *	bool available = false;
+	 *	uint64_t src_offset = src_stride * global_id.x;
+	 * 	uint64_t dst_offset = dst_stride * global_id.x;
+	 * 	uint64_t *src_data = src_buf[src_offset];
+	 *	uint32_t avail = (src_data[0] >> 32) &
+	 *			 (src_data[1] >> 32) &
+	 *			 (src_data[2] >> 32) &
+	 *			 (src_data[3] >> 32);
+	 *	if (avail & 0x80000000) {
+	 *		result[0] = src_data[3] - src_data[1];
+	 *		result[1] = src_data[2] - src_data[0];
+	 *		available = true;
+	 *	}
+	 * 	uint32_t result_size = flags & VK_QUERY_RESULT_64_BIT ? 16 : 8;
+	 * 	if ((flags & VK_QUERY_RESULT_PARTIAL_BIT) || available) {
+	 *		if (flags & VK_QUERY_RESULT_64_BIT) {
+	 *			dst_buf[dst_offset] = result;
+	 *		} else {
+	 *			dst_buf[dst_offset] = (uint32_t)result;
+	 *		}
+	 *	}
+	 *	if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
+	 *		dst_buf[dst_offset + result_size] = available;
+	 * 	}
+	 * }
+	 */
+	nir_builder b;
+	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
+	b.shader->info.name = ralloc_strdup(b.shader, "tfb_query");
+	b.shader->info.cs.local_size[0] = 64;
+	b.shader->info.cs.local_size[1] = 1;
+	b.shader->info.cs.local_size[2] = 1;
+
+	/* Create and initialize local variables. */
+	nir_variable *result =
+		nir_local_variable_create(b.impl,
+					  glsl_vector_type(GLSL_TYPE_UINT64, 2),
+					  "result");
+	nir_variable *available =
+		nir_local_variable_create(b.impl, glsl_int_type(), "available");
+
+	nir_store_var(&b, result,
+		      nir_vec2(&b, nir_imm_int64(&b, 0),
+				   nir_imm_int64(&b, 0)), 0x3);
+	nir_store_var(&b, available, nir_imm_int(&b, 0), 0x1);
+
+	nir_ssa_def *flags = radv_load_push_int(&b, 0, "flags");
+
+	/* Load resources. */
+	nir_intrinsic_instr *dst_buf = nir_intrinsic_instr_create(b.shader,
+	                                                          nir_intrinsic_vulkan_resource_index);
+	dst_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	nir_intrinsic_set_desc_set(dst_buf, 0);
+	nir_intrinsic_set_binding(dst_buf, 0);
+	nir_ssa_dest_init(&dst_buf->instr, &dst_buf->dest, 1, 32, NULL);
+	nir_builder_instr_insert(&b, &dst_buf->instr);
+
+	nir_intrinsic_instr *src_buf = nir_intrinsic_instr_create(b.shader,
+	                                                          nir_intrinsic_vulkan_resource_index);
+	src_buf->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
+	nir_intrinsic_set_desc_set(src_buf, 0);
+	nir_intrinsic_set_binding(src_buf, 1);
+	nir_ssa_dest_init(&src_buf->instr, &src_buf->dest, 1, 32, NULL);
+	nir_builder_instr_insert(&b, &src_buf->instr);
+
+	/* Compute global ID. */
+	nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
+	nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
+	nir_ssa_def *block_size = nir_imm_ivec4(&b,
+	                                        b.shader->info.cs.local_size[0],
+	                                        b.shader->info.cs.local_size[1],
+	                                        b.shader->info.cs.local_size[2], 0);
+	nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
+	global_id = nir_channel(&b, global_id, 0); // We only care about x here.
+
+	/* Compute src/dst strides. */
+	nir_ssa_def *input_stride = nir_imm_int(&b, 32);
+	nir_ssa_def *input_base = nir_imul(&b, input_stride, global_id);
+	nir_ssa_def *output_stride = radv_load_push_int(&b, 4, "output_stride");
+	nir_ssa_def *output_base = nir_imul(&b, output_stride, global_id);
+
+	/* Load data from the query pool. */
+	nir_intrinsic_instr *load1 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
+	load1->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
+	load1->src[1] = nir_src_for_ssa(input_base);
+	nir_ssa_dest_init(&load1->instr, &load1->dest, 4, 32, NULL);
+	load1->num_components = 4;
+	nir_builder_instr_insert(&b, &load1->instr);
+
+	nir_intrinsic_instr *load2 = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_ssbo);
+	load2->src[0] = nir_src_for_ssa(&src_buf->dest.ssa);
+	load2->src[1] = nir_src_for_ssa(nir_iadd(&b, input_base, nir_imm_int(&b, 16)));
+	nir_ssa_dest_init(&load2->instr, &load2->dest, 4, 32, NULL);
+	load2->num_components = 4;
+	nir_builder_instr_insert(&b, &load2->instr);
+
+	/* Check if result is available. */
+	nir_ssa_def *avails[2];
+	avails[0] = nir_iand(&b, nir_channel(&b, &load1->dest.ssa, 1),
+				 nir_channel(&b, &load1->dest.ssa, 3));
+	avails[1] = nir_iand(&b, nir_channel(&b, &load2->dest.ssa, 1),
+				 nir_channel(&b, &load2->dest.ssa, 3));
+	nir_ssa_def *result_is_available =
+		nir_iand(&b, nir_iand(&b, avails[0], avails[1]),
+			     nir_imm_int(&b, 0x80000000));
+
+	/* Only compute result if available. */
+	nir_if *available_if = nir_if_create(b.shader);
+	available_if->condition = nir_src_for_ssa(result_is_available);
+	nir_cf_node_insert(b.cursor, &available_if->cf_node);
+
+	b.cursor = nir_after_cf_list(&available_if->then_list);
+
+	/* Pack values. */
+	nir_ssa_def *packed64[4];
+	packed64[0] = nir_pack_64_2x32(&b, nir_vec2(&b,
+						    nir_channel(&b, &load1->dest.ssa, 0),
+						    nir_channel(&b, &load1->dest.ssa, 1)));
+	packed64[1] = nir_pack_64_2x32(&b, nir_vec2(&b,
+						    nir_channel(&b, &load1->dest.ssa, 2),
+						    nir_channel(&b, &load1->dest.ssa, 3)));
+	packed64[2] = nir_pack_64_2x32(&b, nir_vec2(&b,
+						    nir_channel(&b, &load2->dest.ssa, 0),
+						    nir_channel(&b, &load2->dest.ssa, 1)));
+	packed64[3] = nir_pack_64_2x32(&b, nir_vec2(&b,
+						    nir_channel(&b, &load2->dest.ssa, 2),
+						    nir_channel(&b, &load2->dest.ssa, 3)));
+
+	/* Compute result. */
+	nir_ssa_def *num_primitive_written =
+		nir_isub(&b, packed64[3], packed64[1]);
+	nir_ssa_def *primitive_storage_needed =
+		nir_isub(&b, packed64[2], packed64[0]);
+
+	nir_store_var(&b, result,
+		      nir_vec2(&b, num_primitive_written,
+				   primitive_storage_needed), 0x3);
+	nir_store_var(&b, available, nir_imm_int(&b, 1), 0x1);
+
+	b.cursor = nir_after_cf_node(&available_if->cf_node);
+
+	/* Determine if result is 64 or 32 bit. */
+	nir_ssa_def *result_is_64bit =
+		nir_iand(&b, flags, nir_imm_int(&b, VK_QUERY_RESULT_64_BIT));
+	nir_ssa_def *result_size =
+		nir_bcsel(&b, result_is_64bit, nir_imm_int(&b, 16),
+			  nir_imm_int(&b, 8));
+
+	/* Store the result if complete or partial results have been requested. */
+	nir_if *store_if = nir_if_create(b.shader);
+	store_if->condition =
+		nir_src_for_ssa(nir_ior(&b, nir_iand(&b, flags,
+						     nir_imm_int(&b, VK_QUERY_RESULT_PARTIAL_BIT)),
+					nir_load_var(&b, available)));
+	nir_cf_node_insert(b.cursor, &store_if->cf_node);
+
+	b.cursor = nir_after_cf_list(&store_if->then_list);
+
+	/* Store result. */
+	nir_if *store_64bit_if = nir_if_create(b.shader);
+	store_64bit_if->condition = nir_src_for_ssa(result_is_64bit);
+	nir_cf_node_insert(b.cursor, &store_64bit_if->cf_node);
+
+	b.cursor = nir_after_cf_list(&store_64bit_if->then_list);
+
+	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+	store->src[0] = nir_src_for_ssa(nir_load_var(&b, result));
+	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+	store->src[2] = nir_src_for_ssa(output_base);
+	nir_intrinsic_set_write_mask(store, 0x3);
+	store->num_components = 2;
+	nir_builder_instr_insert(&b, &store->instr);
+
+	b.cursor = nir_after_cf_list(&store_64bit_if->else_list);
+
+	store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+	store->src[0] = nir_src_for_ssa(nir_u2u32(&b, nir_load_var(&b, result)));
+	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+	store->src[2] = nir_src_for_ssa(output_base);
+	nir_intrinsic_set_write_mask(store, 0x3);
+	store->num_components = 2;
+	nir_builder_instr_insert(&b, &store->instr);
+
+	b.cursor = nir_after_cf_node(&store_64bit_if->cf_node);
+
+	b.cursor = nir_after_cf_node(&store_if->cf_node);
+
+	/* Store the availability bit if requested. */
+	nir_if *availability_if = nir_if_create(b.shader);
+	availability_if->condition =
+		nir_src_for_ssa(nir_iand(&b, flags,
+					 nir_imm_int(&b, VK_QUERY_RESULT_WITH_AVAILABILITY_BIT)));
+	nir_cf_node_insert(b.cursor, &availability_if->cf_node);
+
+	b.cursor = nir_after_cf_list(&availability_if->then_list);
+
+	store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_store_ssbo);
+	store->src[0] = nir_src_for_ssa(nir_load_var(&b, available));
+	store->src[1] = nir_src_for_ssa(&dst_buf->dest.ssa);
+	store->src[2] = nir_src_for_ssa(nir_iadd(&b, result_size, output_base));
+	nir_intrinsic_set_write_mask(store, 0x1);
+	store->num_components = 1;
+	nir_builder_instr_insert(&b, &store->instr);
+
+	b.cursor = nir_after_cf_node(&availability_if->cf_node);
+
+	return b.shader;
+}
+
 static VkResult radv_device_init_meta_query_state_internal(struct radv_device *device)
 {
 	VkResult result;
 	struct radv_shader_module occlusion_cs = { .nir = NULL };
 	struct radv_shader_module pipeline_statistics_cs = { .nir = NULL };
+	struct radv_shader_module tfb_cs = { .nir = NULL };

 	mtx_lock(&device->meta_state.mtx);
 	if (device->meta_state.query.pipeline_statistics_query_pipeline) {
@@ -525,6 +747,7 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d
 	}
 	occlusion_cs.nir = build_occlusion_query_shader(device);
 	pipeline_statistics_cs.nir = build_pipeline_statistics_query_shader(device);
+	tfb_cs.nir = build_tfb_query_shader(device);

 	VkDescriptorSetLayoutCreateInfo occlusion_ds_create_info = {
 		.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -611,12 +834,34 @@ static VkResult radv_device_init_meta_query_state_internal(struct radv_device *d
 					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
 					     1, &pipeline_statistics_vk_pipeline_info, NULL,
 					     &device->meta_state.query.pipeline_statistics_query_pipeline);
+	if (result != VK_SUCCESS)
+		goto fail;

+	VkPipelineShaderStageCreateInfo tfb_pipeline_shader_stage = {
+		.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+		.stage = VK_SHADER_STAGE_COMPUTE_BIT,
+		.module = radv_shader_module_to_handle(&tfb_cs),
+		.pName = "main",
+		.pSpecializationInfo = NULL,
+	};
+
+	VkComputePipelineCreateInfo tfb_pipeline_info = {
+		.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+		.stage = tfb_pipeline_shader_stage,
+		.flags = 0,
+		.layout = device->meta_state.query.p_layout,
+	};
+
+	result = radv_CreateComputePipelines(radv_device_to_handle(device),
+					     radv_pipeline_cache_to_handle(&device->meta_state.cache),
+					     1, &tfb_pipeline_info, NULL,
+					     &device->meta_state.query.tfb_query_pipeline);
 fail:
 	if (result != VK_SUCCESS)
 		radv_device_finish_meta_query_state(device);
 	ralloc_free(occlusion_cs.nir);
 	ralloc_free(pipeline_statistics_cs.nir);
+	ralloc_free(tfb_cs.nir);
 	mtx_unlock(&device->meta_state.mtx);
 	return result;
 }
@@ -631,6 +876,11 @@ VkResult radv_device_init_meta_query_state(struct radv_device *device, bool on_d

 void radv_device_finish_meta_query_state(struct radv_device *device)
 {
+	if (device->meta_state.query.tfb_query_pipeline)
+		radv_DestroyPipeline(radv_device_to_handle(device),
+				     device->meta_state.query.tfb_query_pipeline,
+				     &device->meta_state.alloc);
+
 	if (device->meta_state.query.pipeline_statistics_query_pipeline)
 		radv_DestroyPipeline(radv_device_to_handle(device),
 				     device->meta_state.query.pipeline_statistics_query_pipeline,
@@ -663,6 +913,7 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 {
 	struct radv_device *device = cmd_buffer->device;
 	struct radv_meta_saved_state saved_state;
+	bool old_predicating;

 	if (!*pipeline) {
 		VkResult ret = radv_device_init_meta_query_state_internal(device);
@@ -677,6 +928,12 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 		       RADV_META_SAVE_CONSTANTS |
 		       RADV_META_SAVE_DESCRIPTORS);

+	/* VK_EXT_conditional_rendering says that copy commands should not be
+	 * affected by conditional rendering.
+	 */
+	old_predicating = cmd_buffer->state.predicating;
+	cmd_buffer->state.predicating = false;
+
 	struct radv_buffer dst_buffer = {
 		.bo = dst_bo,
 		.offset = dst_offset,
@@ -758,6 +1015,8 @@ static void radv_query_shader(struct radv_cmd_buffer *cmd_buffer,
 	cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2 |
 	                                RADV_CMD_FLAG_INV_VMEM_L1 |
 	                                RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
+	/* Restore conditional rendering. */
+	cmd_buffer->state.predicating = old_predicating;

 	radv_meta_restore(&saved_state, cmd_buffer);
 }
@@ -1115,6 +1374,33 @@ void radv_CmdCopyQueryPoolResults(
 			assert(cs->cdw <= cdw_max);
 		}
 		break;
+	case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
+		if (flags & VK_QUERY_RESULT_WAIT_BIT) {
+			for(unsigned i = 0; i < queryCount; i++) {
+				unsigned query = firstQuery + i;
+				uint64_t src_va = va + query * pool->stride;
+
+				/* Wait on the upper word of all results. */
+				for (unsigned j = 0; j < 4; j++, src_va += 8) {
+					radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
+					radeon_emit(cs, WAIT_REG_MEM_GREATER_OR_EQUAL |
+							WAIT_REG_MEM_MEM_SPACE(1));
+					radeon_emit(cs, (src_va + 4));
+					radeon_emit(cs, (src_va + 4) >> 32);
+					radeon_emit(cs, 0x80000000); /* reference value */
+					radeon_emit(cs, 0xffffffff); /* mask */
+					radeon_emit(cs, 4); /* poll interval */
+				}
+			}
+		}
+
+		radv_query_shader(cmd_buffer, &cmd_buffer->device->meta_state.query.tfb_query_pipeline,
+		                  pool->bo, dst_buffer->bo,
+				  firstQuery * pool->stride,
+		                  dst_buffer->offset + dstOffset,
+		                  pool->stride, stride,
+				  queryCount, flags, 0, 0);
+		break;
 	default:
 		unreachable("trying to get results of unhandled query type");
 	}
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_bo.c
@@ -304,8 +304,12 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
 		return NULL;
 	}

+	unsigned virt_alignment = alignment;
+	if (size >= ws->info.pte_fragment_size)
+		virt_alignment = MAX2(virt_alignment, ws->info.pte_fragment_size);
+
 	r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general,
-				  size, alignment, 0, &va, &va_handle,
+				  size, virt_alignment, 0, &va, &va_handle,
 				  (flags & RADEON_FLAG_32BIT ? AMDGPU_VA_RANGE_32_BIT : 0) |
 				   AMDGPU_VA_RANGE_HIGH);
 	if (r)
--- a/src/compiler/glsl/ast_to_hir.cpp
+++ b/src/compiler/glsl/ast_to_hir.cpp
@@ -892,7 +892,8 @@ validate_assignment(struct _mesa_glsl_parse_state *state,
   }
   if (unsized_array) {
      if (is_initializer) {
-         return rhs;
+         if (rhs->type->get_scalar_type() == lhs->type->get_scalar_type())
+            return rhs;
      } else {
         _mesa_glsl_error(&loc, state,
                          "implicitly sized arrays cannot be assigned");
--- a/src/compiler/glsl/serialize.cpp
+++ b/src/compiler/glsl/serialize.cpp
@@ -360,13 +360,20 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg)
   if (xfb_stage == ~0u)
      return;

+   if (shProg->TransformFeedback.VaryingNames)  {
+      for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; ++i)
+         free(shProg->TransformFeedback.VaryingNames[i]);
+   }
+
   /* Data set by glTransformFeedbackVaryings. */
   shProg->TransformFeedback.BufferMode = blob_read_uint32(metadata);
   blob_copy_bytes(metadata, &shProg->TransformFeedback.BufferStride,
                   sizeof(shProg->TransformFeedback.BufferStride));
   shProg->TransformFeedback.NumVarying = blob_read_uint32(metadata);
+
   shProg->TransformFeedback.VaryingNames = (char **)
-      malloc(shProg->TransformFeedback.NumVarying * sizeof(GLchar *));
+      realloc(shProg->TransformFeedback.VaryingNames,
+             shProg->TransformFeedback.NumVarying * sizeof(GLchar *));
   /* Note, malloc used with VaryingNames. */
   for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++)
      shProg->TransformFeedback.VaryingNames[i] =
--- a/src/compiler/nir/nir_linking_helpers.c
+++ b/src/compiler/nir/nir_linking_helpers.c
@@ -195,9 +195,12 @@ nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
 }

 static uint8_t
-get_interp_type(nir_variable *var, bool default_to_smooth_interp)
+get_interp_type(nir_variable *var, const struct glsl_type *type,
+                bool default_to_smooth_interp)
 {
-   if (var->data.interpolation != INTERP_MODE_NONE)
+   if (glsl_type_is_integer(type))
+      return INTERP_MODE_FLAT;
+   else if (var->data.interpolation != INTERP_MODE_NONE)
      return var->data.interpolation;
   else if (default_to_smooth_interp)
      return INTERP_MODE_SMOOTH;
@@ -252,7 +255,7 @@ get_slot_component_masks_and_interp_types(struct exec_list *var_list,
         unsigned comps_slot2 = 0;
         for (unsigned i = 0; i < slots; i++) {
            interp_type[location + i] =
-               get_interp_type(var, default_to_smooth_interp);
+               get_interp_type(var, type, default_to_smooth_interp);
            interp_loc[location + i] = get_interp_loc(var);

            if (dual_slot) {
@@ -424,7 +427,7 @@ compact_components(nir_shader *producer, nir_shader *consumer, uint8_t *comps,
            continue;

         bool found_new_offset = false;
-         uint8_t interp = get_interp_type(var, default_to_smooth_interp);
+         uint8_t interp = get_interp_type(var, type, default_to_smooth_interp);
         for (; cursor[interp] < 32; cursor[interp]++) {
            uint8_t cursor_used_comps = comps[cursor[interp]];

--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -194,6 +194,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
   }

   case nir_op_unpack_64_2x32:
+   case nir_op_unpack_32_2x16:
      return false;

      LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
--- a/src/compiler/nir/nir_opt_if.c
+++ b/src/compiler/nir/nir_opt_if.c
@@ -391,6 +391,34 @@ evaluate_if_condition(nir_if *nif, nir_cursor cursor, bool *value)
   }
 }

+static nir_ssa_def *
+clone_alu_and_replace_src_defs(nir_builder *b, const nir_alu_instr *alu,
+                               nir_ssa_def **src_defs)
+{
+   nir_alu_instr *nalu = nir_alu_instr_create(b->shader, alu->op);
+   nalu->exact = alu->exact;
+
+   nir_ssa_dest_init(&nalu->instr, &nalu->dest.dest,
+                     alu->dest.dest.ssa.num_components,
+                     alu->dest.dest.ssa.bit_size, alu->dest.dest.ssa.name);
+
+   nalu->dest.saturate = alu->dest.saturate;
+   nalu->dest.write_mask = alu->dest.write_mask;
+
+   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
+      assert(alu->src[i].src.is_ssa);
+      nalu->src[i].src = nir_src_for_ssa(src_defs[i]);
+      nalu->src[i].negate = alu->src[i].negate;
+      nalu->src[i].abs = alu->src[i].abs;
+      memcpy(nalu->src[i].swizzle, alu->src[i].swizzle,
+             sizeof(nalu->src[i].swizzle));
+   }
+
+   nir_builder_instr_insert(b, &nalu->instr);
+
+   return &nalu->dest.dest.ssa;;
+}
+
 /*
 * This propagates if condition evaluation down the chain of some alu
 * instructions. For example by checking the use of some of the following alu
@@ -448,7 +476,7 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
   if (!evaluate_if_condition(nif, b->cursor, &bool_value))
      return false;

-   nir_ssa_def *def[2] = {0};
+   nir_ssa_def *def[4] = {0};
   for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
      if (alu->src[i].src.ssa == use_src->ssa) {
         def[i] = nir_imm_bool(b, bool_value);
@@ -456,7 +484,8 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
         def[i] = alu->src[i].src.ssa;
      }
   }
-   nir_ssa_def *nalu = nir_build_alu(b, alu->op, def[0], def[1], NULL, NULL);
+
+   nir_ssa_def *nalu = clone_alu_and_replace_src_defs(b, alu, def);

   /* Rewrite use to use new alu instruction */
   nir_src new_src = nir_src_for_ssa(nalu);
@@ -472,14 +501,21 @@ propagate_condition_eval(nir_builder *b, nir_if *nif, nir_src *use_src,
 static bool
 can_propagate_through_alu(nir_src *src)
 {
-   if (src->parent_instr->type == nir_instr_type_alu &&
-       (nir_instr_as_alu(src->parent_instr)->op == nir_op_ior ||
-        nir_instr_as_alu(src->parent_instr)->op == nir_op_iand ||
-        nir_instr_as_alu(src->parent_instr)->op == nir_op_inot ||
-        nir_instr_as_alu(src->parent_instr)->op == nir_op_b2i))
-      return true;
+   if (src->parent_instr->type != nir_instr_type_alu)
+      return false;

-   return false;
+   nir_alu_instr *alu = nir_instr_as_alu(src->parent_instr);
+   switch (alu->op) {
+      case nir_op_ior:
+      case nir_op_iand:
+      case nir_op_inot:
+      case nir_op_b2i:
+         return true;
+      case nir_op_bcsel:
+         return src == &alu->src[0].src;
+      default:
+         return false;
+   }
 }

 static bool
--- a/src/compiler/nir_types.cpp
+++ b/src/compiler/nir_types.cpp
@@ -301,6 +301,11 @@ glsl_type_is_boolean(const struct glsl_type *type)
 {
   return type->is_boolean();
 }
+bool
+glsl_type_is_integer(const struct glsl_type *type)
+{
+   return type->is_integer();
+}

 const glsl_type *
 glsl_void_type(void)
--- a/src/compiler/nir_types.h
+++ b/src/compiler/nir_types.h
@@ -142,6 +142,7 @@ bool glsl_type_is_image(const struct glsl_type *type);
 bool glsl_type_is_dual_slot(const struct glsl_type *type);
 bool glsl_type_is_numeric(const struct glsl_type *type);
 bool glsl_type_is_boolean(const struct glsl_type *type);
+bool glsl_type_is_integer(const struct glsl_type *type);
 bool glsl_sampler_type_is_shadow(const struct glsl_type *type);
 bool glsl_sampler_type_is_array(const struct glsl_type *type);
 bool glsl_contains_atomic(const struct glsl_type *type);
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -1811,6 +1811,26 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
            src[j] = src_val->constant->values[0];
         }

+         /* fix up fixed size sources */
+         switch (op) {
+         case nir_op_ishl:
+         case nir_op_ishr:
+         case nir_op_ushr: {
+            if (bit_size == 32)
+               break;
+            for (unsigned i = 0; i < num_components; ++i) {
+               switch (bit_size) {
+               case 64: src[1].u32[i] = src[1].u64[i]; break;
+               case 16: src[1].u32[i] = src[1].u16[i]; break;
+               case  8: src[1].u32[i] = src[1].u8[i];  break;
+               }
+            }
+            break;
+         }
+         default:
+            break;
+         }
+
         val->constant->values[0] =
            nir_eval_const_opcode(op, num_components, bit_size, src);
         break;
--- a/src/compiler/spirv/vtn_alu.c
+++ b/src/compiler/spirv/vtn_alu.c
@@ -696,6 +696,17 @@ vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
         src[1] = tmp;
      }

+      switch (op) {
+      case nir_op_ishl:
+      case nir_op_ishr:
+      case nir_op_ushr:
+         if (src[1]->bit_size != 32)
+            src[1] = nir_u2u32(&b->nb, src[1]);
+         break;
+      default:
+         break;
+      }
+
      val->ssa->def = nir_build_alu(&b->nb, op, src[0], src[1], src[2], src[3]);
      break;
   } /* default */
--- a/src/egl/drivers/dri2/egl_dri2.c
+++ b/src/egl/drivers/dri2/egl_dri2.c
@@ -2309,7 +2309,7 @@ dri2_check_dma_buf_format(const _EGLImageAttribs *attrs)
 {
   unsigned plane_n = dri2_num_fourcc_format_planes(attrs->DMABufFourCC.Value);
   if (plane_n == 0) {
-      _eglError(EGL_BAD_ATTRIBUTE, "invalid format");
+      _eglError(EGL_BAD_MATCH, "unknown drm fourcc format");
      return 0;
   }

--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1127,13 +1127,22 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device)
   if (dri2_dpy->fd == -1) {
      _eglLog(_EGL_WARNING, "wayland-egl: could not open %s (%s)",
              dri2_dpy->device_name, strerror(errno));
+      free(dri2_dpy->device_name);
+      dri2_dpy->device_name = NULL;
      return;
   }

   if (drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER) {
      dri2_dpy->authenticated = true;
   } else {
-      drmGetMagic(dri2_dpy->fd, &magic);
+      if (drmGetMagic(dri2_dpy->fd, &magic)) {
+         close(dri2_dpy->fd);
+         dri2_dpy->fd = -1;
+         free(dri2_dpy->device_name);
+         dri2_dpy->device_name = NULL;
+         _eglLog(_EGL_WARNING, "wayland-egl: drmGetMagic failed");
+         return;
+      }
      wl_drm_authenticate(dri2_dpy->wl_drm, magic);
   }
 }
@@ -1661,8 +1670,8 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
   if (dri2_surf->back)
      return 0;

-   if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
-       dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
+   if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
+       dri2_surf->base.Height != dri2_surf->wl_win->height) {

      dri2_wl_release_buffers(dri2_surf);

--- a/src/egl/generate/eglFunctionList.py
+++ b/src/egl/generate/eglFunctionList.py
@@ -196,8 +196,18 @@ EGL_FUNCTIONS = (
    # EGL_ANDROID_native_fence_sync
    _eglFunc("eglDupNativeFenceFDANDROID",           "display"),

+    # EGL_ANDROID_blob_cache
+    _eglFunc("eglSetBlobCacheFuncsANDROID",          "display"),
+
    # EGL_EXT_image_dma_buf_import_modifiers
    _eglFunc("eglQueryDmaBufFormatsEXT",             "display"),
    _eglFunc("eglQueryDmaBufModifiersEXT",           "display"),
+
+    # EGL_EXT_device_base
+    _eglFunc("eglQueryDeviceAttribEXT",              "device"),
+    _eglFunc("eglQueryDeviceStringEXT",              "device"),
+    _eglFunc("eglQueryDevicesEXT",                   "none"),
+    _eglFunc("eglQueryDisplayAttribEXT",             "display"),
+
 )

--- a/src/egl/main/egldispatchstubs.c
+++ b/src/egl/main/egldispatchstubs.c
@@ -59,6 +59,11 @@ static __eglMustCastToProperFunctionPointerType FetchVendorFunc(__EGLvendorInfo
    }
    if (func == NULL) {
        if (errorCode != EGL_SUCCESS) {
+            // Since we have no vendor, the follow-up eglGetError() call will
+            // end up using the GLVND error code. Set it here.
+            if (vendor == NULL) {
+                exports->setEGLError(errorCode);
+            }
            _eglError(errorCode, __EGL_DISPATCH_FUNC_NAMES[index]);
        }
        return NULL;
--- a/src/gallium/auxiliary/Android.mk
+++ b/src/gallium/auxiliary/Android.mk
@@ -36,7 +36,8 @@ LOCAL_SRC_FILES := \
 	util/u_debug_stack_android.cpp

 LOCAL_C_INCLUDES := \
-	$(GALLIUM_TOP)/auxiliary/util
+	$(GALLIUM_TOP)/auxiliary/util \
+	$(MESA_TOP)/src/util

 ifeq ($(MESA_ENABLE_LLVM),true)
 LOCAL_SRC_FILES += \
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -105,6 +105,12 @@ static void r600_destroy_context(struct pipe_context *context)
 	}
 	util_unreference_framebuffer_state(&rctx->framebuffer.state);

+	if (rctx->gs_rings.gsvs_ring.buffer)
+		pipe_resource_reference(&rctx->gs_rings.gsvs_ring.buffer, NULL);
+
+	if (rctx->gs_rings.esgs_ring.buffer)
+		pipe_resource_reference(&rctx->gs_rings.esgs_ring.buffer, NULL);
+
 	for (sh = 0; sh < PIPE_SHADER_TYPES; ++sh)
 		for (i = 0; i < PIPE_MAX_CONSTANT_BUFFERS; ++i)
 			rctx->b.b.set_constant_buffer(context, sh, i, NULL);
--- a/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
+++ b/src/gallium/drivers/r600/sb/sb_bc_builder.cpp
@@ -567,7 +567,7 @@ int bc_builder::build_fetch_gds(fetch_node *n) {
 	const fetch_op_info *fop = bc.op_ptr;
 	unsigned gds_op = (ctx.fetch_opcode(bc.op) >> 8) & 0x3f;
 	unsigned mem_op = 4;
-	assert(fop->flags && FF_GDS);
+	assert(fop->flags & FF_GDS);

 	if (bc.op == FETCH_OP_TF_WRITE) {
 		mem_op = 5;
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -580,10 +580,12 @@ static int si_get_video_param(struct pipe_screen *screen,
 		case PIPE_VIDEO_CAP_SUPPORTED:
 			return (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC &&
 				(si_vce_is_fw_version_supported(sscreen) ||
-				sscreen->info.family == CHIP_RAVEN)) ||
+				 sscreen->info.family == CHIP_RAVEN ||
+				 sscreen->info.family == CHIP_RAVEN2)) ||
 				(profile == PIPE_VIDEO_PROFILE_HEVC_MAIN &&
 				(sscreen->info.family == CHIP_RAVEN ||
-				si_radeon_uvd_enc_supported(sscreen)));
+				 sscreen->info.family == CHIP_RAVEN2 ||
+				 si_radeon_uvd_enc_supported(sscreen)));
 		case PIPE_VIDEO_CAP_NPOT_TEXTURES:
 			return 1;
 		case PIPE_VIDEO_CAP_MAX_WIDTH:
@@ -631,7 +633,8 @@ static int si_get_video_param(struct pipe_screen *screen,
 				return profile == PIPE_VIDEO_PROFILE_HEVC_MAIN;
 			return false;
 		case PIPE_VIDEO_FORMAT_JPEG:
-			if (sscreen->info.family == CHIP_RAVEN)
+			if (sscreen->info.family == CHIP_RAVEN ||
+			    sscreen->info.family == CHIP_RAVEN2)
 				return true;
 			if (sscreen->info.family < CHIP_CARRIZO || sscreen->info.family >= CHIP_VEGA10)
 				return false;
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -793,17 +793,10 @@ static void si_query_hw_do_emit_start(struct si_context *sctx,
 			emit_sample_streamout(cs, va + 32 * stream, stream);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
-		/* Write the timestamp from the CP not waiting for
-		 * outstanding draws (top-of-pipe).
-		 */
-		radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
-		radeon_emit(cs, COPY_DATA_COUNT_SEL |
-				COPY_DATA_SRC_SEL(COPY_DATA_TIMESTAMP) |
-				COPY_DATA_DST_SEL(COPY_DATA_DST_MEM));
-		radeon_emit(cs, 0);
-		radeon_emit(cs, 0);
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
+		si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
+				  EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
+				  EOP_DATA_SEL_TIMESTAMP, NULL, va,
+				  0, query->b.type);
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
--- a/src/gallium/drivers/radeonsi/si_state_msaa.c
+++ b/src/gallium/drivers/radeonsi/si_state_msaa.c
@@ -101,6 +101,10 @@ static const uint64_t centroid_priority_4x = 0x3210321032103210ull;
 static const uint32_t sample_locs_8x[] = {
 	FILL_SREG(-3,-5,   5, 1,  -1, 3,   7,-7),
 	FILL_SREG(-7,-1,   3, 7,  -5, 5,   1,-3),
+	/* The following are unused by hardware, but we emit them to IBs
+	 * instead of multiple SET_CONTEXT_REG packets. */
+	0,
+	0,
 };
 static const uint64_t centroid_priority_8x = 0x3546012735460127ull;

--- a/src/gallium/drivers/radeonsi/si_uvd.c
+++ b/src/gallium/drivers/radeonsi/si_uvd.c
@@ -146,7 +146,8 @@ struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,
 					       const struct pipe_video_codec *templ)
 {
 	struct si_context *ctx = (struct si_context *)context;
-	bool vcn = (ctx->family == CHIP_RAVEN) ? true : false;
+	bool vcn = ctx->family == CHIP_RAVEN ||
+		   ctx->family == CHIP_RAVEN2;

 	if (templ->entrypoint == PIPE_VIDEO_ENTRYPOINT_ENCODE) {
 		if (vcn) {
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -572,7 +572,15 @@ vc4_resource_create_with_modifiers(struct pipe_screen *pscreen,
                        goto fail;
        }

-        if (screen->ro && tmpl->bind & PIPE_BIND_SCANOUT) {
+        /* Set up the "scanout resource" (the dmabuf export of our buffer to
+         * the KMS handle) if the buffer might ever have
+         * resource_get_handle(WINSYS_HANDLE_TYPE_KMS) called on it.
+         * create_with_modifiers() doesn't give us usage flags, so we have to
+         * assume that all calls with modifiers are scanout-possible.
+         */
+        if (screen->ro &&
+            ((tmpl->bind & PIPE_BIND_SCANOUT) ||
+             !(count == 1 && modifiers[0] == DRM_FORMAT_MOD_INVALID))) {
                rsc->scanout =
                        renderonly_scanout_for_resource(prsc, screen->ro, NULL);
                if (!rsc->scanout)
--- a/src/gallium/state_trackers/nine/threadpool.c
+++ b/src/gallium/state_trackers/nine/threadpool.c
@@ -37,6 +37,7 @@
 #include "os/os_thread.h"
 #include "threadpool.h"

+/* POSIX thread function */
 static void *
 threadpool_worker(void *data)
 {
@@ -76,6 +77,15 @@ threadpool_worker(void *data)
    return NULL;
 }

+/* Windows thread function */
+static DWORD NINE_WINAPI
+wthreadpool_worker(void *data)
+{
+    threadpool_worker(data);
+
+    return 0;
+}
+
 struct threadpool *
 _mesa_threadpool_create(struct NineSwapChain9 *swapchain)
 {
@@ -87,7 +97,9 @@ _mesa_threadpool_create(struct NineSwapChain9 *swapchain)
    pthread_mutex_init(&pool->m, NULL);
    pthread_cond_init(&pool->new_work, NULL);

-    pool->wthread = NineSwapChain9_CreateThread(swapchain, threadpool_worker, pool);
+    /* This uses WINE's CreateThread, so the thread function needs to use
+     * the Windows ABI */
+    pool->wthread = NineSwapChain9_CreateThread(swapchain, wthreadpool_worker, pool);
    if (!pool->wthread) {
        /* using pthread as fallback */
        pthread_create(&pool->pthread, NULL, threadpool_worker, pool);
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -598,10 +598,8 @@ surface_from_external_memory(VADriverContextP ctx, vlVaSurface *surface,
   return VA_STATUS_SUCCESS;

 fail:
-   for (i = 0; i < VL_NUM_COMPONENTS; i++) {
-      if (resources[i])
-         pscreen->resource_destroy(pscreen, resources[i]);
-   }
+   for (i = 0; i < VL_NUM_COMPONENTS; i++)
+      pipe_resource_reference(&resources[i], NULL);
   return result;
 }

--- a/src/gallium/targets/d3dadapter9/meson.build
+++ b/src/gallium/targets/d3dadapter9/meson.build
@@ -53,7 +53,7 @@ libgallium_nine = shared_library(
    libswkmsdri,
  ],
  dependencies : [
-    dep_selinux, dep_expat, dep_libdrm, dep_llvm,
+    dep_selinux, dep_expat, dep_libdrm, dep_llvm, dep_thread,
    driver_swrast, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
    driver_i915, driver_svga,
  ],
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -1310,6 +1310,12 @@ static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws,
   if (bo) {
      p_atomic_inc(&bo->base.reference.count);
      simple_mtx_unlock(&ws->bo_export_table_lock);
+
+      /* Release the buffer handle, because we don't need it anymore.
+       * This function is returning an existing buffer, which has its own
+       * handle.
+       */
+      amdgpu_bo_free(result.buf_handle);
      return &bo->base;
   }

--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -280,6 +280,12 @@ amdgpu_winsys_create(int fd, const struct pipe_screen_config *config,
   if (ws) {
      pipe_reference(NULL, &ws->reference);
      simple_mtx_unlock(&dev_tab_mutex);
+
+      /* Release the device handle, because we don't need it anymore.
+       * This function is returning an existing winsys instance, which
+       * has its own device handle.
+       */
+      amdgpu_device_deinitialize(dev);
      return &ws->base;
   }

--- a/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
+++ b/src/gallium/winsys/virgl/vtest/virgl_vtest_winsys.c
@@ -559,7 +559,7 @@ virgl_cs_create_fence(struct virgl_winsys *vws)
   res = virgl_vtest_winsys_resource_cache_create(vws,
                                                PIPE_BUFFER,
                                                PIPE_FORMAT_R8_UNORM,
-                                                PIPE_BIND_CUSTOM,
+                                                VIRGL_BIND_CUSTOM,
                                                8, 1, 1, 0, 0, 0, 8);

   return (struct pipe_fence_handle *)res;
--- a/src/gbm/meson.build
+++ b/src/gbm/meson.build
@@ -32,7 +32,6 @@ args_gbm = []
 deps_gbm = []
 incs_gbm = [
  include_directories('main'), inc_include, inc_src, inc_loader,
-  inc_wayland_drm,
 ]

 if with_dri2
--- a/src/glx/Makefile.am
+++ b/src/glx/Makefile.am
@@ -24,10 +24,6 @@ SUBDIRS =

 EXTRA_DIST = SConscript meson.build

-if HAVE_XF86VIDMODE
-EXTRA_DEFINES_XF86VIDMODE = -DXF86VIDMODE
-endif
-
 AM_CFLAGS = \
 	-I$(top_srcdir)/include \
 	-I$(top_srcdir)/include/GL/internal \
@@ -38,7 +34,6 @@ AM_CFLAGS = \
 	-I$(top_builddir)/src/mapi/glapi \
 	-I$(top_srcdir)/src/mapi/glapi \
 	$(VISIBILITY_CFLAGS) \
-	$(EXTRA_DEFINES_XF86VIDMODE) \
 	-D_REENTRANT \
 	-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
 	$(DEFINES) \
--- a/src/glx/SConscript
+++ b/src/glx/SConscript
@@ -36,10 +36,7 @@ env.Prepend(LIBS = [
 env.PkgUseModules('X11')
 env.PkgUseModules('XCB')
 env.PkgUseModules('DRM')
-
-if env['HAVE_XF86VIDMODE']:
-    env.Append(CPPDEFINES = ['XF86VIDMODE'])
-    env.PkgUseModules('XF86VIDMODE')
+env.PkgUseModules('XF86VIDMODE')

 sources = [
    'clientattrib.c',
--- a/src/glx/glxcmds.c
+++ b/src/glx/glxcmds.c
@@ -46,11 +46,9 @@
 #include "util/debug.h"
 #else
 #include <sys/time.h>
-#ifdef XF86VIDMODE
 #include <X11/extensions/xf86vmode.h>
 #endif
 #endif
-#endif

 #include <X11/Xlib-xcb.h>
 #include <xcb/xcb.h>
@@ -2071,7 +2069,6 @@ _X_HIDDEN GLboolean
 __glxGetMscRate(struct glx_screen *psc,
 		int32_t * numerator, int32_t * denominator)
 {
-#ifdef XF86VIDMODE
   XF86VidModeModeLine mode_line;
   int dot_clock;
   int i;
@@ -2118,8 +2115,6 @@ __glxGetMscRate(struct glx_screen *psc,

      return True;
   }
-   else
-#endif

   return False;
 }
@@ -2145,7 +2140,7 @@ _X_HIDDEN GLboolean
 __glXGetMscRateOML(Display * dpy, GLXDrawable drawable,
                   int32_t * numerator, int32_t * denominator)
 {
-#if defined( GLX_DIRECT_RENDERING ) && defined( XF86VIDMODE )
+#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
   __GLXDRIdrawable *draw = GetGLXDRIDrawable(dpy, drawable);

   if (draw == NULL)
--- a/src/glx/meson.build
+++ b/src/glx/meson.build
@@ -137,10 +137,6 @@ gl_lib_cargs = [
  '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
 ]

-if dep_xxf86vm.found()
-  gl_lib_cargs += '-DHAVE_XF86VIDMODE'
-endif
-
 libglx = static_library(
  'glx',
  [files_libglx, glx_generated],
@@ -167,7 +163,7 @@ if with_glx == 'dri'
    link_args : [ld_args_bsymbolic, ld_args_gc_sections, extra_ld_args_libgl],
    dependencies : [
      dep_libdrm, dep_dl, dep_m, dep_thread, dep_x11, dep_xcb_glx, dep_xcb,
-      dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage,
+      dep_x11_xcb, dep_xcb_dri2, dep_xext, dep_xfixes, dep_xdamage, dep_xxf86vm,
      extra_deps_libgl,
    ],
    version : gl_lib_version,
--- a/src/intel/common/gen_batch_decoder.c
+++ b/src/intel/common/gen_batch_decoder.c
@@ -214,7 +214,7 @@ handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
         surface_modify = iter.raw_value;
      } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
         dynamic_modify = iter.raw_value;
-      } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) {
+      } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
         instruction_modify = iter.raw_value;
      }
   }
--- a/src/intel/tools/aubinator_viewer_decoder.cpp
+++ b/src/intel/tools/aubinator_viewer_decoder.cpp
@@ -172,7 +172,7 @@ handle_state_base_address(struct aub_viewer_decode_ctx *ctx,
         surface_modify = iter.raw_value;
      } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
         dynamic_modify = iter.raw_value;
-      } else if (strcmp(iter.name, "Insntruction Base Address Modify Enable") == 0) {
+      } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
         instruction_modify = iter.raw_value;
      }
   }
--- a/src/intel/vulkan/anv_android.c
+++ b/src/intel/vulkan/anv_android.c
@@ -128,7 +128,7 @@ anv_image_from_gralloc(VkDevice device_h,
    */
   int dma_buf = gralloc_info->handle->data[0];

-   uint64_t bo_flags = 0;
+   uint64_t bo_flags = ANV_BO_EXTERNAL;
   if (device->instance->physicalDevice.supports_48bit_addresses)
      bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
   if (device->instance->physicalDevice.use_softpin)
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -636,7 +636,7 @@ VkResult anv_CreateInstance(
   }

   if (instance->app_info.api_version == 0)
-      anv_EnumerateInstanceVersion(&instance->app_info.api_version);
+      instance->app_info.api_version = VK_API_VERSION_1_0;

   instance->enabled_extensions = enabled_extensions;

--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -446,6 +446,9 @@ anv_pipeline_hash_graphics(struct anv_pipeline *pipeline,
   if (layout)
      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));

+   const bool rba = pipeline->device->robust_buffer_access;
+   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
+
   for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
      if (stages[s].entrypoint)
         anv_pipeline_hash_shader(&ctx, &stages[s]);
@@ -466,6 +469,9 @@ anv_pipeline_hash_compute(struct anv_pipeline *pipeline,
   if (layout)
      _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));

+   const bool rba = pipeline->device->robust_buffer_access;
+   _mesa_sha1_update(&ctx, &rba, sizeof(rba));
+
   anv_pipeline_hash_shader(&ctx, stage);

   _mesa_sha1_final(&ctx, sha1_out);
--- a/src/mapi/shared-glapi/meson.build
+++ b/src/mapi/shared-glapi/meson.build
@@ -40,7 +40,7 @@ libglapi = shared_library(
  'glapi',
  [files_mapi_glapi, files_mapi_util, shared_glapi_mapi_tmp_h],
  c_args : [
-    c_msvc_compat_args, '-DMAPI_MODE_GLAPI',
+    c_msvc_compat_args, c_vis_args, '-DMAPI_MODE_GLAPI',
    '-DMAPI_ABI_HEADER="@0@"'.format(shared_glapi_mapi_tmp_h.full_path()),
  ],
  link_args : [ld_args_gc_sections],
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1499,18 +1499,6 @@ update_buffer_image_param(struct brw_context *brw,
   param->stride[0] = _mesa_get_format_bytes(u->_ActualFormat);
 }

-static unsigned
-get_image_num_layers(const struct intel_mipmap_tree *mt, GLenum target,
-                     unsigned level)
-{
-   if (target == GL_TEXTURE_CUBE_MAP)
-      return 6;
-
-   return target == GL_TEXTURE_3D ?
-      minify(mt->surf.logical_level0_px.depth, level) :
-      mt->surf.logical_level0_px.array_len;
-}
-
 static void
 update_image_surface(struct brw_context *brw,
                     struct gl_image_unit *u,
@@ -1541,14 +1529,29 @@ update_image_surface(struct brw_context *brw,
      } else {
         struct intel_texture_object *intel_obj = intel_texture_object(obj);
         struct intel_mipmap_tree *mt = intel_obj->mt;
-         const unsigned num_layers = u->Layered ?
-            get_image_num_layers(mt, obj->Target, u->Level) : 1;
+
+         unsigned base_layer, num_layers;
+         if (u->Layered) {
+            if (obj->Target == GL_TEXTURE_3D) {
+               base_layer = 0;
+               num_layers = minify(mt->surf.logical_level0_px.depth, u->Level);
+            } else {
+               assert(obj->Immutable || obj->MinLayer == 0);
+               base_layer = obj->MinLayer;
+               num_layers = obj->Immutable ?
+                                obj->NumLayers :
+                                mt->surf.logical_level0_px.array_len;
+            }
+         } else {
+            base_layer = obj->MinLayer + u->_Layer;
+            num_layers = 1;
+         }

         struct isl_view view = {
            .format = format,
            .base_level = obj->MinLevel + u->Level,
            .levels = 1,
-            .base_array_layer = obj->MinLayer + u->_Layer,
+            .base_array_layer = base_layer,
            .array_len = num_layers,
            .swizzle = ISL_SWIZZLE_IDENTITY,
            .usage = ISL_SURF_USAGE_STORAGE_BIT,
--- a/src/mesa/drivers/dri/i965/genX_blorp_exec.c
+++ b/src/mesa/drivers/dri/i965/genX_blorp_exec.c
@@ -268,7 +268,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
   assert(batch->blorp->driver_ctx == batch->driver_batch);
   struct brw_context *brw = batch->driver_batch;
   struct gl_context *ctx = &brw->ctx;
-   bool check_aperture_failed_once;
+   bool check_aperture_failed_once = false;

 #if GEN_GEN >= 11
   /* The PIPE_CONTROL command description says:
@@ -309,7 +309,7 @@ retry:
   intel_batchbuffer_require_space(brw, 1400);
   brw_require_statebuffer_space(brw, 600);
   intel_batchbuffer_save_state(brw);
-   check_aperture_failed_once = intel_batchbuffer_saved_state_is_empty(brw);
+   check_aperture_failed_once |= intel_batchbuffer_saved_state_is_empty(brw);
   brw->batch.no_wrap = true;

 #if GEN_GEN == 6
--- a/src/mesa/main/texgetimage.c
+++ b/src/mesa/main/texgetimage.c
@@ -900,8 +900,7 @@ select_tex_image(const struct gl_texture_object *texObj, GLenum target,

 /**
 * Error-check the offset and size arguments to
- * glGet[Compressed]TextureSubImage().  Also checks if the specified
- * texture image is missing.
+ * glGet[Compressed]TextureSubImage().
 * \return true if error, false if no error.
 */
 static bool
@@ -913,6 +912,7 @@ dimensions_error_check(struct gl_context *ctx,
                       const char *caller)
 {
   const struct gl_texture_image *texImage;
+   GLuint imageWidth = 0, imageHeight = 0, imageDepth = 0;

   if (xoffset < 0) {
      _mesa_error(ctx, GL_INVALID_VALUE, "%s(xoffset = %d)", caller, xoffset);
@@ -981,82 +981,44 @@ dimensions_error_check(struct gl_context *ctx,
                     "%s(zoffset + depth = %d)", caller, zoffset + depth);
         return true;
      }
-      /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"):
-       *
-       *   "An INVALID_OPERATION error is generated by GetTextureImage if the
-       *   effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY ,
-       *   and the texture object is not cube complete or cube array complete,
-       *   respectively."
-       *
-       * This applies also to GetTextureSubImage, GetCompressedTexImage,
-       * GetCompressedTextureImage, and GetnCompressedTexImage.
-       */
-      if (!_mesa_cube_complete(texObj)) {
-         _mesa_error(ctx, GL_INVALID_OPERATION,
-                     "%s(cube incomplete)", caller);
-         return true;
-      }
      break;
   default:
      ; /* nothing */
   }

   texImage = select_tex_image(texObj, target, level, zoffset);
-   if (!texImage) {
-      /* Trying to return a non-defined level is a valid operation per se, as
-       * OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries") does not
-       * handle this case as an error.
-       *
-       * Rather, we need to look at section 8.22 ("Texture State and Proxy
-       * State"):
-       *
-       *   "Each initial texture image is null. It has zero width, height, and
-       *    depth, internal format RGBA, or R8 for buffer textures, component
-       *    sizes set to zero and component types set to NONE, the compressed
-       *    flag set to FALSE, a zero compressed size, and the bound buffer
-       *    object name is zero."
-       *
-       * This means we need to assume the image for the non-defined level is
-       * an empty image. With this assumption, we can go back to section
-       * 8.11.4 and checking again the errors:
-       *
-       *   "An INVALID_VALUE error is generated if xoffset + width is greater
-       *    than the texture’s width, yoffset + height is greater than the
-       *    texture’s height, or zoffset + depth is greater than the texture’s
-       *    depth."
-       *
-       * Thus why we return INVALID_VALUE.
-       */
-      _mesa_error(ctx, GL_INVALID_VALUE, "%s(missing image)", caller);
-      return true;
+   if (texImage) {
+      imageWidth = texImage->Width;
+      imageHeight = texImage->Height;
+      imageDepth = texImage->Depth;
   }

-   if (xoffset + width > texImage->Width) {
+   if (xoffset + width > imageWidth) {
      _mesa_error(ctx, GL_INVALID_VALUE,
                  "%s(xoffset %d + width %d > %u)",
-                  caller, xoffset, width, texImage->Width);
+                  caller, xoffset, width, imageWidth);
      return true;
   }

-   if (yoffset + height > texImage->Height) {
+   if (yoffset + height > imageHeight) {
      _mesa_error(ctx, GL_INVALID_VALUE,
                  "%s(yoffset %d + height %d > %u)",
-                  caller, yoffset, height, texImage->Height);
+                  caller, yoffset, height, imageHeight);
      return true;
   }

   if (target != GL_TEXTURE_CUBE_MAP) {
      /* Cube map error checking was done above */
-      if (zoffset + depth > texImage->Depth) {
+      if (zoffset + depth > imageDepth) {
         _mesa_error(ctx, GL_INVALID_VALUE,
                     "%s(zoffset %d + depth %d > %u)",
-                     caller, zoffset, depth, texImage->Depth);
+                     caller, zoffset, depth, imageDepth);
         return true;
      }
   }

   /* Extra checks for compressed textures */
-   {
+   if (texImage) {
      GLuint bw, bh, bd;
      _mesa_get_format_block_size_3d(texImage->TexFormat, &bw, &bh, &bd);
      if (bw > 1 || bh > 1 || bd > 1) {
@@ -1162,53 +1124,15 @@ pbo_error_check(struct gl_context *ctx, GLenum target,


 /**
- * Do error checking for all (non-compressed) get-texture-image functions.
- * \return true if any error, false if no errors.
+ * Do teximage-related error checking for getting uncompressed images.
+ * \return true if there was an error
 */
 static bool
-getteximage_error_check(struct gl_context *ctx,
-                        struct gl_texture_object *texObj,
-                        GLenum target, GLint level,
-                        GLint xoffset, GLint yoffset, GLint zoffset,
-                        GLsizei width, GLsizei height, GLsizei depth,
-                        GLenum format, GLenum type, GLsizei bufSize,
-                        GLvoid *pixels, const char *caller)
+teximage_error_check(struct gl_context *ctx,
+                     struct gl_texture_image *texImage,
+                     GLenum format, const char *caller)
 {
-   struct gl_texture_image *texImage;
-   GLenum baseFormat, err;
-   GLint maxLevels;
-
-   assert(texObj);
-
-   if (texObj->Target == 0) {
-      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
-      return true;
-   }
-
-   maxLevels = _mesa_max_texture_levels(ctx, target);
-   if (level < 0 || level >= maxLevels) {
-      _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
-      return true;
-   }
-
-   err = _mesa_error_check_format_and_type(ctx, format, type);
-   if (err != GL_NO_ERROR) {
-      _mesa_error(ctx, err, "%s(format/type)", caller);
-      return true;
-   }
-
-   if (dimensions_error_check(ctx, texObj, target, level,
-                              xoffset, yoffset, zoffset,
-                              width, height, depth, caller)) {
-      return true;
-   }
-
-   if (pbo_error_check(ctx, target, width, height, depth,
-                       format, type, bufSize, pixels, caller)) {
-      return true;
-   }
-
-   texImage = select_tex_image(texObj, target, level, zoffset);
+   GLenum baseFormat;
   assert(texImage);

   /*
@@ -1241,8 +1165,8 @@ getteximage_error_check(struct gl_context *ctx,
      return true;
   }
   else if (_mesa_is_stencil_format(format)
-	    && !_mesa_is_depthstencil_format(baseFormat)
-	    && !_mesa_is_stencil_format(baseFormat)) {
+            && !_mesa_is_depthstencil_format(baseFormat)
+            && !_mesa_is_stencil_format(baseFormat)) {
      _mesa_error(ctx, GL_INVALID_OPERATION,
                  "%s(format mismatch)", caller);
      return true;
@@ -1271,6 +1195,142 @@ getteximage_error_check(struct gl_context *ctx,
 }


+/**
+ * Do common teximage-related error checking for getting uncompressed images.
+ * \return true if there was an error
+ */
+static bool
+common_error_check(struct gl_context *ctx,
+                   struct gl_texture_object *texObj,
+                   GLenum target, GLint level,
+                   GLsizei width, GLsizei height, GLsizei depth,
+                   GLenum format, GLenum type, GLsizei bufSize,
+                   GLvoid *pixels, const char *caller)
+{
+   GLenum err;
+   GLint maxLevels;
+
+   if (texObj->Target == 0) {
+      _mesa_error(ctx, GL_INVALID_OPERATION, "%s(invalid texture)", caller);
+      return true;
+   }
+
+   maxLevels = _mesa_max_texture_levels(ctx, target);
+   if (level < 0 || level >= maxLevels) {
+      _mesa_error(ctx, GL_INVALID_VALUE, "%s(level = %d)", caller, level);
+      return true;
+   }
+
+   err = _mesa_error_check_format_and_type(ctx, format, type);
+   if (err != GL_NO_ERROR) {
+      _mesa_error(ctx, err, "%s(format/type)", caller);
+      return true;
+   }
+
+   /* According to OpenGL 4.6 spec, section 8.11.4 ("Texture Image Queries"):
+    *
+    *   "An INVALID_OPERATION error is generated by GetTextureImage if the
+    *   effective target is TEXTURE_CUBE_MAP or TEXTURE_CUBE_MAP_ARRAY ,
+    *   and the texture object is not cube complete or cube array complete,
+    *   respectively."
+    *
+    * This applies also to GetTextureSubImage, GetCompressedTexImage,
+    * GetCompressedTextureImage, and GetnCompressedTexImage.
+    */
+   if (target == GL_TEXTURE_CUBE_MAP && !_mesa_cube_complete(texObj)) {
+      _mesa_error(ctx, GL_INVALID_OPERATION,
+                  "%s(cube incomplete)", caller);
+      return true;
+   }
+
+   return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
+getteximage_error_check(struct gl_context *ctx,
+                        struct gl_texture_object *texObj,
+                        GLenum target, GLint level,
+                        GLsizei width, GLsizei height, GLsizei depth,
+                        GLenum format, GLenum type, GLsizei bufSize,
+                        GLvoid *pixels, const char *caller)
+{
+   struct gl_texture_image *texImage;
+
+   assert(texObj);
+
+   if (common_error_check(ctx, texObj, target, level, width, height, depth,
+                          format, type, bufSize, pixels, caller)) {
+      return true;
+   }
+
+   if (width == 0 || height == 0 || depth == 0) {
+      /* Not an error, but nothing to do.  Return 'true' so that the
+       * caller simply returns.
+       */
+      return true;
+   }
+
+   if (pbo_error_check(ctx, target, width, height, depth,
+                       format, type, bufSize, pixels, caller)) {
+      return true;
+   }
+
+   texImage = select_tex_image(texObj, target, level, 0);
+   if (teximage_error_check(ctx, texImage, format, caller)) {
+      return true;
+   }
+
+   return false;
+}
+
+
+/**
+ * Do error checking for all (non-compressed) get-texture-image functions.
+ * \return true if any error, false if no errors.
+ */
+static bool
+gettexsubimage_error_check(struct gl_context *ctx,
+                           struct gl_texture_object *texObj,
+                           GLenum target, GLint level,
+                           GLint xoffset, GLint yoffset, GLint zoffset,
+                           GLsizei width, GLsizei height, GLsizei depth,
+                           GLenum format, GLenum type, GLsizei bufSize,
+                           GLvoid *pixels, const char *caller)
+{
+   struct gl_texture_image *texImage;
+
+   assert(texObj);
+
+   if (common_error_check(ctx, texObj, target, level, width, height, depth,
+                          format, type, bufSize, pixels, caller)) {
+      return true;
+   }
+
+   if (dimensions_error_check(ctx, texObj, target, level,
+                              xoffset, yoffset, zoffset,
+                              width, height, depth, caller)) {
+      return true;
+   }
+
+   if (pbo_error_check(ctx, target, width, height, depth,
+                       format, type, bufSize, pixels, caller)) {
+      return true;
+   }
+
+   texImage = select_tex_image(texObj, target, level, zoffset);
+   if (teximage_error_check(ctx, texImage, format, caller)) {
+      return true;
+   }
+
+   return false;
+}
+
+
 /**
 * Return the width, height and depth of a texture image.
 * This function must be resilient to bad parameter values since
@@ -1399,7 +1459,7 @@ _mesa_GetnTexImageARB(GLenum target, GLint level, GLenum format, GLenum type,
   get_texture_image_dims(texObj, target, level, &width, &height, &depth);

   if (getteximage_error_check(ctx, texObj, target, level,
-                               0, 0, 0, width, height, depth,
+                               width, height, depth,
                               format, type, bufSize, pixels, caller)) {
      return;
   }
@@ -1430,7 +1490,7 @@ _mesa_GetTexImage(GLenum target, GLint level, GLenum format, GLenum type,
   get_texture_image_dims(texObj, target, level, &width, &height, &depth);

   if (getteximage_error_check(ctx, texObj, target, level,
-                               0, 0, 0, width, height, depth,
+                               width, height, depth,
                               format, type, INT_MAX, pixels, caller)) {
      return;
   }
@@ -1464,7 +1524,7 @@ _mesa_GetTextureImage(GLuint texture, GLint level, GLenum format, GLenum type,
                          &width, &height, &depth);

   if (getteximage_error_check(ctx, texObj, texObj->Target, level,
-                               0, 0, 0, width, height, depth,
+                               width, height, depth,
                               format, type, bufSize, pixels, caller)) {
      return;
   }
@@ -1497,9 +1557,10 @@ _mesa_GetTextureSubImage(GLuint texture, GLint level,
      return;
   }

-   if (getteximage_error_check(ctx, texObj, texObj->Target, level,
-                               xoffset, yoffset, zoffset, width, height, depth,
-                               format, type, bufSize, pixels, caller)) {
+   if (gettexsubimage_error_check(ctx, texObj, texObj->Target, level,
+                                  xoffset, yoffset, zoffset,
+                                  width, height, depth,
+                                  format, type, bufSize, pixels, caller)) {
      return;
   }

--- a/src/mesa/main/transformfeedback.c
+++ b/src/mesa/main/transformfeedback.c
@@ -40,6 +40,7 @@
 #include "shaderapi.h"
 #include "shaderobj.h"

+#include "program/program.h"
 #include "program/prog_parameter.h"

 struct using_program_tuple
@@ -470,6 +471,7 @@ begin_transform_feedback(struct gl_context *ctx, GLenum mode, bool no_error)

   if (obj->program != source) {
      ctx->NewDriverState |= ctx->DriverFlags.NewTransformFeedbackProg;
+      _mesa_reference_program_(ctx, &obj->program, source);
      obj->program = source;
   }

@@ -504,6 +506,7 @@ end_transform_feedback(struct gl_context *ctx,
   assert(ctx->Driver.EndTransformFeedback);
   ctx->Driver.EndTransformFeedback(ctx, obj);

+   _mesa_reference_program_(ctx, &obj->program, NULL);
   ctx->TransformFeedback.CurrentObject->Active = GL_FALSE;
   ctx->TransformFeedback.CurrentObject->Paused = GL_FALSE;
   ctx->TransformFeedback.CurrentObject->EndedAnytime = GL_TRUE;
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -1069,15 +1069,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
       * of the referenced drawables no longer exist.
       */
      st_framebuffers_purge(st);
-
-      /* Notify the driver that the context thread may have been changed.
-       * This should pin all driver threads to a specific L3 cache for optimal
-       * performance on AMD Zen CPUs.
-       */
-      struct glthread_state *glthread = st->ctx->GLThread;
-      thrd_t *upper_thread = glthread ? &glthread->queue.threads[0] : NULL;
-
-      util_context_thread_changed(st->pipe, upper_thread);
   }
   else {
      ret = _mesa_make_current(NULL, NULL, NULL);
--- a/src/meson.build
+++ b/src/meson.build
@@ -51,8 +51,12 @@ subdir('util')
 subdir('mapi')
 # TODO: opengl
 subdir('compiler')
-subdir('egl/wayland/wayland-drm')
-subdir('vulkan')
+if with_platform_wayland
+  subdir('egl/wayland/wayland-drm')
+endif
+if with_any_vk
+  subdir('vulkan')
+endif
 if with_gallium_radeonsi or with_amd_vk
  subdir('amd')
 endif
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -60,7 +60,8 @@ libmesautil_la_LIBADD = \
 	$(PTHREAD_LIBS) \
 	$(CLOCK_LIB) \
 	$(ZLIB_LIBS) \
-	$(LIBATOMIC_LIBS)
+	$(LIBATOMIC_LIBS) \
+	-lm

 libxmlconfig_la_SOURCES = $(XMLCONFIG_FILES)
 libxmlconfig_la_CFLAGS = \
--- a/src/util/meson.build
+++ b/src/util/meson.build
@@ -119,7 +119,7 @@ libmesa_util = static_library(
  'mesa_util',
  [files_mesa_util, format_srgb],
  include_directories : inc_common,
-  dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic],
+  dependencies : [dep_zlib, dep_clock, dep_thread, dep_atomic, dep_m],
  c_args : [c_msvc_compat_args, c_vis_args],
  build_by_default : false
 )
--- a/src/util/ralloc.c
+++ b/src/util/ralloc.c
@@ -554,10 +554,18 @@ ralloc_vasprintf_rewrite_tail(char **str, size_t *start, const char *fmt,
 */

 #define MIN_LINEAR_BUFSIZE 2048
-#define SUBALLOC_ALIGNMENT sizeof(uintptr_t)
+#define SUBALLOC_ALIGNMENT 8
 #define LMAGIC 0x87b9c7d3

-struct linear_header {
+struct
+#ifdef _MSC_VER
+ __declspec(align(8))
+#elif defined(__LP64__)
+ __attribute__((aligned(16)))
+#else
+ __attribute__((aligned(8)))
+#endif
+   linear_header {
 #ifdef DEBUG
   unsigned magic;   /* for debugging */
 #endif
@@ -651,6 +659,8 @@ linear_alloc_child(void *parent, unsigned size)
   ptr = (linear_size_chunk *)((char*)&latest[1] + latest->offset);
   ptr->size = size;
   latest->offset += full_size;
+
+   assert((uintptr_t)&ptr[1] % SUBALLOC_ALIGNMENT == 0);
   return &ptr[1];
 }

--- a/src/vulkan/wsi/wsi_common.c
+++ b/src/vulkan/wsi/wsi_common.c
@@ -954,8 +954,8 @@ wsi_common_queue_present(const struct wsi_device *wsi,
         /* We only need/want to wait on semaphores once.  After that, we're
          * guaranteed ordering since it all happens on the same queue.
          */
-         submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount,
-         submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores,
+         submit_info.waitSemaphoreCount = pPresentInfo->waitSemaphoreCount;
+         submit_info.pWaitSemaphores = pPresentInfo->pWaitSemaphores;

         /* Set up the pWaitDstStageMasks */
         stage_flags = vk_alloc(&swapchain->alloc,
--- a/src/vulkan/wsi/wsi_common_display.c
+++ b/src/vulkan/wsi/wsi_common_display.c
@@ -1062,6 +1062,8 @@ wsi_display_swapchain_destroy(struct wsi_swapchain *drv_chain,

   for (uint32_t i = 0; i < chain->base.image_count; i++)
      wsi_display_image_finish(drv_chain, allocator, &chain->images[i]);
+
+   wsi_swapchain_finish(&chain->base);
   vk_free(allocator, chain);
   return VK_SUCCESS;
 }
--- a/src/vulkan/wsi/wsi_common_wayland.c
+++ b/src/vulkan/wsi/wsi_common_wayland.c
@@ -455,10 +455,11 @@ wsi_wl_get_presentation_support(struct wsi_device *wsi_device,
      (struct wsi_wayland *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_WAYLAND];

   struct wsi_wl_display display;
-   int ret = wsi_wl_display_init(wsi, &display, wl_display, false);
-   wsi_wl_display_finish(&display);
+   VkResult ret = wsi_wl_display_init(wsi, &display, wl_display, false);
+   if (ret == VK_SUCCESS)
+      wsi_wl_display_finish(&display);

-   return ret == 0;
+   return ret == VK_SUCCESS;
 }

 static VkResult