Update version to 17.2.0-rc4

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
radv: force cs/ps/l2 flush at end of command stream. (v2)
2017-08-12 17:04:27 +01:00 · 2017-08-11 21:00:49 +01:00 · 2017-08-11 21:00:46 +01:00 · 2017-08-11 21:00:43 +01:00 · 2017-08-11 21:00:40 +01:00 · 2017-08-11 21:00:37 +01:00
50 changed files with 443 additions and 373 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-17.2.0-rc2
+17.2.0-rc4
--- a/configure.ac
+++ b/configure.ac
@@ -2551,7 +2551,7 @@ if test -n "$with_gallium_drivers"; then
            if test "x$HAVE_SWR_AVX" != xyes -a \
                    "x$HAVE_SWR_AVX2" != xyes -a \
                    "x$HAVE_SWR_KNL" != xyes -a \
-                    "x$HAVE_SWR_SKX" != xyes -a; then
+                    "x$HAVE_SWR_SKX" != xyes; then
               AC_MSG_ERROR([swr enabled but no swr architectures selected])
            fi

--- a/include/GLES/gl.h
+++ b/include/GLES/gl.h
@@ -50,9 +50,22 @@ extern "C" {

 #ifndef GL_VERSION_ES_CM_1_0
 #define GL_VERSION_ES_CM_1_0 1
+
+/*
+ * XXX: Temporary fix; needs to be reverted as part of the next
+ * header update.
+ * For more details:
+ * https://github.com/KhronosGroup/OpenGL-Registry/pull/76
+ * https://lists.freedesktop.org/archives/mesa-dev/2017-June/161647.html
+ */
+#include <KHR/khrplatform.h>
+typedef khronos_int8_t GLbyte;
+typedef khronos_float_t GLclampf;
+typedef short GLshort;
+typedef unsigned short GLushort;
+
 typedef void GLvoid;
 typedef unsigned int GLenum;
-#include <KHR/khrplatform.h>
 typedef khronos_float_t GLfloat;
 typedef khronos_int32_t GLfixed;
 typedef unsigned int GLuint;
--- a/include/GLES/glext.h
+++ b/include/GLES/glext.h
@@ -104,7 +104,6 @@ GL_API void GL_APIENTRY glBlendEquationOES (GLenum mode);

 #ifndef GL_OES_byte_coordinates
 #define GL_OES_byte_coordinates 1
-typedef khronos_int8_t GLbyte;
 #endif /* GL_OES_byte_coordinates */

 #ifndef GL_OES_compressed_ETC1_RGB8_sub_texture
@@ -128,7 +127,6 @@ typedef khronos_int8_t GLbyte;

 #ifndef GL_OES_draw_texture
 #define GL_OES_draw_texture 1
-typedef short GLshort;
 #define GL_TEXTURE_CROP_RECT_OES          0x8B9D
 typedef void (GL_APIENTRYP PFNGLDRAWTEXSOESPROC) (GLshort x, GLshort y, GLshort z, GLshort width, GLshort height);
 typedef void (GL_APIENTRYP PFNGLDRAWTEXIOESPROC) (GLint x, GLint y, GLint z, GLint width, GLint height);
@@ -409,7 +407,6 @@ GL_API GLbitfield GL_APIENTRY glQueryMatrixxOES (GLfixed *mantissa, GLint *expon

 #ifndef GL_OES_single_precision
 #define GL_OES_single_precision 1
-typedef khronos_float_t GLclampf;
 typedef void (GL_APIENTRYP PFNGLCLEARDEPTHFOESPROC) (GLclampf depth);
 typedef void (GL_APIENTRYP PFNGLCLIPPLANEFOESPROC) (GLenum plane, const GLfloat *equation);
 typedef void (GL_APIENTRYP PFNGLDEPTHRANGEFOESPROC) (GLclampf n, GLclampf f);
--- a/src/amd/common/ac_binary.c
+++ b/src/amd/common/ac_binary.c
@@ -109,7 +109,7 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
 	}
 }

-void ac_elf_read(const char *elf_data, unsigned elf_size,
+bool ac_elf_read(const char *elf_data, unsigned elf_size,
 		 struct ac_shader_binary *binary)
 {
 	char *elf_buffer;
@@ -118,6 +118,7 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 	Elf_Data *symbols = NULL, *relocs = NULL;
 	size_t section_str_index;
 	unsigned symbol_sh_link = 0;
+	bool success = true;

 	/* One of the libelf implementations
 	 * (http://www.mr511.de/software/english.htm) requires calling
@@ -137,7 +138,8 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 		GElf_Shdr section_header;
 		if (gelf_getshdr(section, &section_header) != &section_header) {
 			fprintf(stderr, "Failed to read ELF section header\n");
-			return;
+			success = false;
+			break;
 		}
 		name = elf_strptr(elf, section_str_index, section_header.sh_name);
 		if (!strcmp(name, ".text")) {
@@ -148,6 +150,11 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 		} else if (!strcmp(name, ".AMDGPU.config")) {
 			section_data = elf_getdata(section, section_data);
 			binary->config_size = section_data->d_size;
+			if (!binary->config_size) {
+				fprintf(stderr, ".AMDGPU.config is empty!\n");
+				success = false;
+				break;
+			}
 			binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
 			memcpy(binary->config, section_data->d_buf, binary->config_size);
 		} else if (!strcmp(name, ".AMDGPU.disasm")) {
@@ -186,6 +193,7 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
 		binary->global_symbol_count = 1;
 		binary->config_size_per_symbol = binary->config_size;
 	}
+	return success;
 }

 const unsigned char *ac_shader_binary_config_start(
--- a/src/amd/common/ac_binary.h
+++ b/src/amd/common/ac_binary.h
@@ -83,7 +83,7 @@ struct ac_shader_config {
 * Parse the elf binary stored in \p elf_data and create a
 * ac_shader_binary object.
 */
-void ac_elf_read(const char *elf_data, unsigned elf_size,
+bool ac_elf_read(const char *elf_data, unsigned elf_size,
 		 struct ac_shader_binary *binary);

 /**
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -795,21 +795,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 	      bool has_ds_bpermute,
 	      uint32_t mask,
 	      int idx,
-	      LLVMValueRef lds,
 	      LLVMValueRef val)
 {
-	LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
+	LLVMValueRef tl, trbl, args[2];
 	LLVMValueRef result;

-	thread_id = ac_get_thread_id(ctx);
-
-	tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
-			      LLVMConstInt(ctx->i32, mask, false), "");
-
-	trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
-				LLVMConstInt(ctx->i32, idx, false), "");
-
 	if (has_ds_bpermute) {
+		LLVMValueRef thread_id, tl_tid, trbl_tid;
+		thread_id = ac_get_thread_id(ctx);
+
+		tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
+				      LLVMConstInt(ctx->i32, mask, false), "");
+
+		trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
+					LLVMConstInt(ctx->i32, idx, false), "");
+
 		args[0] = LLVMBuildMul(ctx->builder, tl_tid,
 				       LLVMConstInt(ctx->i32, 4, false), "");
 		args[1] = val;
@@ -827,15 +827,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 					  AC_FUNC_ATTR_READNONE |
 					  AC_FUNC_ATTR_CONVERGENT);
 	} else {
-		LLVMValueRef store_ptr, load_ptr0, load_ptr1;
+		uint32_t masks[2];

-		store_ptr = ac_build_gep0(ctx, lds, thread_id);
-		load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
-		load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
+		switch (mask) {
+		case AC_TID_MASK_TOP_LEFT:
+			masks[0] = 0x8000;
+			if (idx == 1)
+				masks[1] = 0x8055;
+			else
+				masks[1] = 0x80aa;

-		LLVMBuildStore(ctx->builder, val, store_ptr);
-		tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
-		trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
+			break;
+		case AC_TID_MASK_TOP:
+			masks[0] = 0x8044;
+			masks[1] = 0x80ee;
+			break;
+		case AC_TID_MASK_LEFT:
+			masks[0] = 0x80a0;
+			masks[1] = 0x80f5;
+			break;
+		}
+
+		args[0] = val;
+		args[1] = LLVMConstInt(ctx->i32, masks[0], false);
+
+		tl = ac_build_intrinsic(ctx,
+					"llvm.amdgcn.ds.swizzle", ctx->i32,
+					args, 2,
+					AC_FUNC_ATTR_READNONE |
+					AC_FUNC_ATTR_CONVERGENT);
+
+		args[1] = LLVMConstInt(ctx->i32, masks[1], false);
+		trbl = ac_build_intrinsic(ctx,
+					"llvm.amdgcn.ds.swizzle", ctx->i32,
+					args, 2,
+					AC_FUNC_ATTR_READNONE |
+					AC_FUNC_ATTR_CONVERGENT);
 	}

 	tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
--- a/src/amd/common/ac_llvm_build.h
+++ b/src/amd/common/ac_llvm_build.h
@@ -173,7 +173,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 	      bool has_ds_bpermute,
 	      uint32_t mask,
 	      int idx,
-	      LLVMValueRef lds,
 	      LLVMValueRef val);

 #define AC_SENDMSG_GS 2
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -1178,7 +1178,17 @@ static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx,
 		 */
 		LLVMConstInt(ctx->i1, 1, false),
 	};
-	return ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
+
+	LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
+					      params, 2,
+					      AC_FUNC_ATTR_READNONE);
+
+	/* TODO: We need an intrinsic to skip this conditional. */
+	/* Check for zero: */
+	return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
+							   LLVMIntEQ, src0,
+							   ctx->i32_0, ""),
+			       LLVMConstInt(ctx->i32, -1, 0), lsb, "");
 }

 static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx,
@@ -1305,7 +1315,6 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
 	src0 = to_float(&ctx->ac, src0);
 	result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");

-	/* TODO SI/CIK options here */
 	if (ctx->options->chip_class >= VI) {
 		LLVMValueRef args[2];
 		/* Check if the result is a denormal - and flush to 0 if so. */
@@ -1319,7 +1328,22 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,

 	if (ctx->options->chip_class >= VI)
 		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
-
+	else {
+		/* for SI/CIK */
+		/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
+		 * so compare the result and flush to 0 if it's smaller.
+		 */
+		LLVMValueRef temp, cond2;
+		temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
+					    ctx->f32, result);
+		cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
+				     LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
+				     temp, "");
+		cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
+				      temp, ctx->f32zero, "");
+		cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
+		result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
+	}
 	return result;
 }

@@ -1443,11 +1467,6 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
 	int idx;
 	LLVMValueRef result;

-	if (!ctx->lds && !ctx->has_ds_bpermute)
-		ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
-						       LLVMArrayType(ctx->i32, 64),
-						       "ddxy_lds", LOCAL_ADDR_SPACE);
-
 	if (op == nir_op_fddx_fine || op == nir_op_fddx)
 		mask = AC_TID_MASK_LEFT;
 	else if (op == nir_op_fddy_fine || op == nir_op_fddy)
@@ -1464,7 +1483,7 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
 		idx = 2;

 	result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
-			      mask, idx, ctx->lds,
+			      mask, idx,
 			      src0);
 	return result;
 }
@@ -5180,6 +5199,7 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 		unsigned index = target - V_008DFC_SQ_EXP_MRT;
 		unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
 		bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
+		bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1;

 		switch(col_format) {
 		case V_028714_SPI_SHADER_ZERO:
@@ -5257,11 +5277,13 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 			break;

 		case V_028714_SPI_SHADER_UINT16_ABGR: {
-			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
+			LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+							    is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
+			LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);

 			for (unsigned chan = 0; chan < 4; chan++) {
 				val[chan] = to_integer(&ctx->ac, values[chan]);
-				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], max);
+				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
 			}

 			args->compr = 1;
@@ -5271,14 +5293,18 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
 		}

 		case V_028714_SPI_SHADER_SINT16_ABGR: {
-			LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
-			LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
+			LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
+							    is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
+			LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
+							    is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
+			LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->i32one;
+			LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);

 			/* Clamp. */
 			for (unsigned chan = 0; chan < 4; chan++) {
 				val[chan] = to_integer(&ctx->ac, values[chan]);
-				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], max);
-				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], min);
+				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
+				val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
 			}

 			args->compr = 1;
--- a/src/amd/common/ac_nir_to_llvm.h
+++ b/src/amd/common/ac_nir_to_llvm.h
@@ -57,6 +57,7 @@ struct ac_tcs_variant_key {
 struct ac_fs_variant_key {
 	uint32_t col_format;
 	uint32_t is_int8;
+	uint32_t is_int10;
 };

 union ac_shader_variant_key {
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2233,8 +2233,11 @@ VkResult radv_EndCommandBuffer(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);

-	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
+	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
+		if (cmd_buffer->device->physical_device->rad_info.chip_class == SI)
+			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
 		si_emit_cache_flush(cmd_buffer);
+	}

 	if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
 	    cmd_buffer->record_fail)
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -928,15 +928,17 @@ void radv_GetPhysicalDeviceMemoryProperties(
 	};

 	STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
+	uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
+	                                  physical_device->rad_info.vram_vis_size);

 	pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
 	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
 		.size = physical_device->rad_info.vram_size -
-				physical_device->rad_info.vram_vis_size,
+				visible_vram_size,
 		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 	};
 	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
-		.size = physical_device->rad_info.vram_vis_size,
+		.size = visible_vram_size,
 		.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
 	};
 	pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
@@ -3077,9 +3079,13 @@ radv_initialise_color_surface(struct radv_device *device,
 				    format != V_028C70_COLOR_24_8) |
 		S_028C70_NUMBER_TYPE(ntype) |
 		S_028C70_ENDIAN(endian);
-	if (iview->image->info.samples > 1)
-		if (iview->image->fmask.size)
-			cb->cb_color_info |= S_028C70_COMPRESSION(1);
+	if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
+		cb->cb_color_info |= S_028C70_COMPRESSION(1);
+		if (device->physical_device->rad_info.chip_class == SI) {
+			unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
+			cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
+		}
+	}

 	if (iview->image->cmask.size &&
 	    !(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
--- a/src/amd/vulkan/radv_meta.c
+++ b/src/amd/vulkan/radv_meta.c
@@ -477,48 +477,8 @@ radv_meta_build_nir_fs_noop(void)
 	return b.shader;
 }

-static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
-							    nir_ssa_def *input)
-{
-	nir_const_value v;
-	unsigned i;
-	v.u32[0] = 0x3b4d2e1c; // 0.00313080009
-
-	nir_ssa_def *cmp[3];
-	for (i = 0; i < 3; i++)
-		cmp[i] = nir_flt(b, nir_channel(b, input, i),
-				 nir_build_imm(b, 1, 32, v));
-
-	nir_ssa_def *ltvals[3];
-	v.f32[0] = 12.92;
-	for (i = 0; i < 3; i++)
-		ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
-				     nir_build_imm(b, 1, 32, v));
-
-	nir_ssa_def *gtvals[3];
-
-	for (i = 0; i < 3; i++) {
-		v.f32[0] = 1.0/2.4;
-		gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
-				     nir_build_imm(b, 1, 32, v));
-		v.f32[0] = 1.055;
-		gtvals[i] = nir_fmul(b, gtvals[i],
-				     nir_build_imm(b, 1, 32, v));
-		v.f32[0] = 0.055;
-		gtvals[i] = nir_fsub(b, gtvals[i],
-				     nir_build_imm(b, 1, 32, v));
-	}
-
-	nir_ssa_def *comp[4];
-	for (i = 0; i < 3; i++)
-		comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
-	comp[3] = nir_channels(b, input, 3);
-	return nir_vec(b, comp, 4);
-}
-
 void radv_meta_build_resolve_shader_core(nir_builder *b,
 					 bool is_integer,
-					 bool is_srgb,
 					 int samples,
 					 nir_variable *input_img,
 					 nir_variable *color,
@@ -596,10 +556,4 @@ void radv_meta_build_resolve_shader_core(nir_builder *b,

 	if (outer_if)
 		b->cursor = nir_after_cf_node(&outer_if->cf_node);
-
-	if (is_srgb) {
-		nir_ssa_def *newv = nir_load_var(b, color);
-		newv = radv_meta_build_resolve_srgb_conversion(b, newv);
-		nir_store_var(b, color, newv, 0xf);
-	}
 }
--- a/src/amd/vulkan/radv_meta.h
+++ b/src/amd/vulkan/radv_meta.h
@@ -234,7 +234,6 @@ nir_shader *radv_meta_build_nir_fs_noop(void);

 void radv_meta_build_resolve_shader_core(nir_builder *b,
 					 bool is_integer,
-					 bool is_srgb,
 					 int samples,
 					 nir_variable *input_img,
 					 nir_variable *color,
--- a/src/amd/vulkan/radv_meta_blit.c
+++ b/src/amd/vulkan/radv_meta_blit.c
@@ -695,6 +695,8 @@ static VkFormat pipeline_formats[] = {
   VK_FORMAT_R8G8B8A8_UNORM,
   VK_FORMAT_R8G8B8A8_UINT,
   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_A2R10G10B10_UINT_PACK32,
+   VK_FORMAT_A2R10G10B10_SINT_PACK32,
   VK_FORMAT_R16G16B16A16_UNORM,
   VK_FORMAT_R16G16B16A16_SNORM,
   VK_FORMAT_R16G16B16A16_UINT,
--- a/src/amd/vulkan/radv_meta_blit2d.c
+++ b/src/amd/vulkan/radv_meta_blit2d.c
@@ -1134,6 +1134,8 @@ static VkFormat pipeline_formats[] = {
   VK_FORMAT_R8G8B8A8_UNORM,
   VK_FORMAT_R8G8B8A8_UINT,
   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_A2R10G10B10_UINT_PACK32,
+   VK_FORMAT_A2R10G10B10_SINT_PACK32,
   VK_FORMAT_R16G16B16A16_UNORM,
   VK_FORMAT_R16G16B16A16_SNORM,
   VK_FORMAT_R16G16B16A16_UINT,
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -754,6 +754,8 @@ static VkFormat pipeline_formats[] = {
 	VK_FORMAT_R8G8B8A8_UNORM,
 	VK_FORMAT_R8G8B8A8_UINT,
 	VK_FORMAT_R8G8B8A8_SINT,
+	VK_FORMAT_A2R10G10B10_UINT_PACK32,
+	VK_FORMAT_A2R10G10B10_SINT_PACK32,
 	VK_FORMAT_R16G16B16A16_UNORM,
 	VK_FORMAT_R16G16B16A16_SNORM,
 	VK_FORMAT_R16G16B16A16_UINT,
--- a/src/amd/vulkan/radv_meta_decompress.c
+++ b/src/amd/vulkan/radv_meta_decompress.c
@@ -29,7 +29,9 @@
 #include "sid.h"

 static VkResult
-create_pass(struct radv_device *device)
+create_pass(struct radv_device *device,
+	    uint32_t samples,
+	    VkRenderPass *pass)
 {
 	VkResult result;
 	VkDevice device_h = radv_device_to_handle(device);
@@ -37,7 +39,7 @@ create_pass(struct radv_device *device)
 	VkAttachmentDescription attachment;

 	attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
-	attachment.samples = 1;
+	attachment.samples = samples;
 	attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
 	attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
 	attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
@@ -65,14 +67,18 @@ create_pass(struct radv_device *device)
 								.dependencyCount = 0,
 								   },
 				       alloc,
-				       &device->meta_state.depth_decomp.pass);
+				       pass);

 	return result;
 }

 static VkResult
 create_pipeline(struct radv_device *device,
-                VkShaderModule vs_module_h)
+                VkShaderModule vs_module_h,
+		uint32_t samples,
+		VkRenderPass pass,
+		VkPipeline *decompress_pipeline,
+		VkPipeline *resummarize_pipeline)
 {
 	VkResult result;
 	VkDevice device_h = radv_device_to_handle(device);
@@ -129,7 +135,7 @@ create_pipeline(struct radv_device *device,
 		},
 		.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
 			.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
-			.rasterizationSamples = 1,
+			.rasterizationSamples = samples,
 			.sampleShadingEnable = false,
 			.pSampleMask = NULL,
 			.alphaToCoverageEnable = false,
@@ -156,7 +162,7 @@ create_pipeline(struct radv_device *device,
 				VK_DYNAMIC_STATE_SCISSOR,
 			},
 		},
-		.renderPass = device->meta_state.depth_decomp.pass,
+		.renderPass = pass,
 		.subpass = 0,
 	};

@@ -169,7 +175,7 @@ create_pipeline(struct radv_device *device,
 							.db_flush_stencil_inplace = true,
 					       },
 					       &device->meta_state.alloc,
-					       &device->meta_state.depth_decomp.decompress_pipeline);
+					       decompress_pipeline);
 	if (result != VK_SUCCESS)
 		goto cleanup;

@@ -183,7 +189,7 @@ create_pipeline(struct radv_device *device,
 							.db_resummarize = true,
 					       },
 					       &device->meta_state.alloc,
-					       &device->meta_state.depth_decomp.resummarize_pipeline);
+					       resummarize_pipeline);
 	if (result != VK_SUCCESS)
 		goto cleanup;

@@ -199,29 +205,31 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
 {
 	struct radv_meta_state *state = &device->meta_state;
 	VkDevice device_h = radv_device_to_handle(device);
-	VkRenderPass pass_h = device->meta_state.depth_decomp.pass;
 	const VkAllocationCallbacks *alloc = &device->meta_state.alloc;

-	if (pass_h)
-		radv_DestroyRenderPass(device_h, pass_h,
-					     &device->meta_state.alloc);
-
-	VkPipeline pipeline_h = state->depth_decomp.decompress_pipeline;
-	if (pipeline_h) {
-		radv_DestroyPipeline(device_h, pipeline_h, alloc);
-	}
-	pipeline_h = state->depth_decomp.resummarize_pipeline;
-	if (pipeline_h) {
-		radv_DestroyPipeline(device_h, pipeline_h, alloc);
+	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+		VkRenderPass pass_h = state->depth_decomp[i].pass;
+		if (pass_h) {
+			radv_DestroyRenderPass(device_h, pass_h, alloc);
+		}
+		VkPipeline pipeline_h = state->depth_decomp[i].decompress_pipeline;
+		if (pipeline_h) {
+			radv_DestroyPipeline(device_h, pipeline_h, alloc);
+		}
+		pipeline_h = state->depth_decomp[i].resummarize_pipeline;
+		if (pipeline_h) {
+			radv_DestroyPipeline(device_h, pipeline_h, alloc);
+		}
 	}
 }

 VkResult
 radv_device_init_meta_depth_decomp_state(struct radv_device *device)
 {
+	struct radv_meta_state *state = &device->meta_state;
 	VkResult res = VK_SUCCESS;

-	zero(device->meta_state.depth_decomp);
+	zero(state->depth_decomp);

 	struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
 	if (!vs_module.nir) {
@@ -230,14 +238,22 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)
 		goto fail;
 	}

-	res = create_pass(device);
-	if (res != VK_SUCCESS)
-		goto fail;
-
 	VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
-	res = create_pipeline(device, vs_module_h);
-	if (res != VK_SUCCESS)
-		goto fail;
+
+	for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
+		uint32_t samples = 1 << i;
+
+		res = create_pass(device, samples, &state->depth_decomp[i].pass);
+		if (res != VK_SUCCESS)
+			goto fail;
+
+		res = create_pipeline(device, vs_module_h, samples,
+				      state->depth_decomp[i].pass,
+				      &state->depth_decomp[i].decompress_pipeline,
+				      &state->depth_decomp[i].resummarize_pipeline);
+		if (res != VK_SUCCESS)
+			goto fail;
+	}

 	goto cleanup;

@@ -283,10 +299,15 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
 }


+enum radv_depth_op {
+	DEPTH_DECOMPRESS,
+	DEPTH_RESUMMARIZE,
+};
+
 static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					     struct radv_image *image,
 					     VkImageSubresourceRange *subresourceRange,
-					     VkPipeline pipeline_h)
+					     enum radv_depth_op op)
 {
 	struct radv_meta_saved_state saved_state;
 	struct radv_meta_saved_pass_state saved_pass_state;
@@ -296,6 +317,9 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 				     subresourceRange->baseMipLevel);
 	uint32_t height = radv_minify(image->info.height,
 				     subresourceRange->baseMipLevel);
+	uint32_t samples = image->info.samples;
+	uint32_t samples_log2 = ffs(samples) - 1;
+	struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;

 	if (!image->surface.htile_size)
 		return;
@@ -339,7 +363,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 		radv_CmdBeginRenderPass(cmd_buffer_h,
 					      &(VkRenderPassBeginInfo) {
 						      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
-							      .renderPass = cmd_buffer->device->meta_state.depth_decomp.pass,
+							      .renderPass = meta_state->depth_decomp[samples_log2].pass,
 							      .framebuffer = fb_h,
 							      .renderArea = {
 							      .offset = {
@@ -356,6 +380,18 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					   },
 					   VK_SUBPASS_CONTENTS_INLINE);

+		VkPipeline pipeline_h;
+		switch (op) {
+		case DEPTH_DECOMPRESS:
+			pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline;
+			break;
+		case DEPTH_RESUMMARIZE:
+			pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline;
+			break;
+		default:
+			unreachable("unknown operation");
+		}
+
 		emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h);
 		radv_CmdEndRenderPass(cmd_buffer_h);

@@ -371,8 +407,7 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 VkImageSubresourceRange *subresourceRange)
 {
 	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
-					 cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline);
+	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_DECOMPRESS);
 }

 void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
@@ -380,6 +415,5 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
 					 VkImageSubresourceRange *subresourceRange)
 {
 	assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
-	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
-					 cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline);
+	radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_RESUMMARIZE);
 }
--- a/src/amd/vulkan/radv_meta_resolve.c
+++ b/src/amd/vulkan/radv_meta_resolve.c
@@ -382,6 +382,11 @@ void radv_CmdResolveImage(
 	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	assert(src_image->info.samples > 1);
+	if (src_image->info.samples <= 1) {
+		/* this causes GPU hangs if we get past here */
+		fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");
+		return;
+	}
 	assert(dest_image->info.samples == 1);

 	if (src_image->info.samples >= 16) {
--- a/src/amd/vulkan/radv_meta_resolve_cs.c
+++ b/src/amd/vulkan/radv_meta_resolve_cs.c
@@ -31,6 +31,45 @@
 #include "sid.h"
 #include "vk_format.h"

+static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
+							    nir_ssa_def *input)
+{
+	nir_const_value v;
+	unsigned i;
+	v.u32[0] = 0x3b4d2e1c; // 0.00313080009
+
+	nir_ssa_def *cmp[3];
+	for (i = 0; i < 3; i++)
+		cmp[i] = nir_flt(b, nir_channel(b, input, i),
+				 nir_build_imm(b, 1, 32, v));
+
+	nir_ssa_def *ltvals[3];
+	v.f32[0] = 12.92;
+	for (i = 0; i < 3; i++)
+		ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
+				     nir_build_imm(b, 1, 32, v));
+
+	nir_ssa_def *gtvals[3];
+
+	for (i = 0; i < 3; i++) {
+		v.f32[0] = 1.0/2.4;
+		gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
+				     nir_build_imm(b, 1, 32, v));
+		v.f32[0] = 1.055;
+		gtvals[i] = nir_fmul(b, gtvals[i],
+				     nir_build_imm(b, 1, 32, v));
+		v.f32[0] = 0.055;
+		gtvals[i] = nir_fsub(b, gtvals[i],
+				     nir_build_imm(b, 1, 32, v));
+	}
+
+	nir_ssa_def *comp[4];
+	for (i = 0; i < 3; i++)
+		comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
+	comp[3] = nir_channels(b, input, 1 << 3);
+	return nir_vec(b, comp, 4);
+}
+
 static nir_shader *
 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
 {
@@ -88,10 +127,13 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
 	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
 	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

-	radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb, samples,
-					    input_img, color, img_coord);
+	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
+	                                    color, img_coord);

 	nir_ssa_def *outval = nir_load_var(&b, color);
+	if (is_srgb)
+		outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
+
 	nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
 	nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
 	store->src[0] = nir_src_for_ssa(coord);
@@ -402,7 +444,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
 						     .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
 							     .image = radv_image_to_handle(dest_image),
 							     .viewType = radv_meta_get_view_type(dest_image),
-							     .format = dest_image->vk_format,
+							     .format = vk_to_non_srgb_format(dest_image->vk_format),
 							     .subresourceRange = {
 							     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
 							     .baseMipLevel = region->dstSubresource.mipLevel,
--- a/src/amd/vulkan/radv_meta_resolve_fs.c
+++ b/src/amd/vulkan/radv_meta_resolve_fs.c
@@ -51,7 +51,7 @@ build_nir_vertex_shader(void)
 }

 static nir_shader *
-build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
+build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
 {
 	nir_builder b;
 	char name[64];
@@ -62,7 +62,7 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_
 								 false,
 								 GLSL_TYPE_FLOAT);

-	snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
+	snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
 	nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
 	b.shader->info.name = ralloc_strdup(b.shader, name);

@@ -92,8 +92,8 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_
 	nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3);
 	nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");

-	radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb,samples,
-					    input_img, color, img_coord);
+	radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
+	                                    color, img_coord);

 	nir_ssa_def *outval = nir_load_var(&b, color);
 	nir_store_var(&b, color_out, outval, 0xf);
@@ -160,6 +160,8 @@ static VkFormat pipeline_formats[] = {
   VK_FORMAT_R8G8B8A8_UNORM,
   VK_FORMAT_R8G8B8A8_UINT,
   VK_FORMAT_R8G8B8A8_SINT,
+   VK_FORMAT_A2R10G10B10_UINT_PACK32,
+   VK_FORMAT_A2R10G10B10_SINT_PACK32,
   VK_FORMAT_R16G16B16A16_UNORM,
   VK_FORMAT_R16G16B16A16_SNORM,
   VK_FORMAT_R16G16B16A16_UINT,
@@ -175,31 +177,25 @@ create_resolve_pipeline(struct radv_device *device,
 			VkFormat format)
 {
 	VkResult result;
-	bool is_integer = false, is_srgb = false;
+	bool is_integer = false;
 	uint32_t samples = 1 << samples_log2;
 	unsigned fs_key = radv_format_meta_fs_key(format);
 	const VkPipelineVertexInputStateCreateInfo *vi_create_info;
 	vi_create_info = &normal_vi_create_info;
 	if (vk_format_is_int(format))
 		is_integer = true;
-	else if (vk_format_is_srgb(format))
-		is_srgb = true;

 	struct radv_shader_module fs = { .nir = NULL };
-	fs.nir = build_resolve_fragment_shader(device, is_integer, is_srgb, samples);
+	fs.nir = build_resolve_fragment_shader(device, is_integer, samples);
 	struct radv_shader_module vs = {
 		.nir = build_nir_vertex_shader(),
 	};

-	VkRenderPass *rp = is_srgb ?
-		&device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
-		&device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+	VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];

 	assert(!*rp);

-	VkPipeline *pipeline = is_srgb ?
-		&device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
-		&device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+	VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
 	assert(!*pipeline);

 	VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -348,8 +344,6 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
 		for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
 			res = create_resolve_pipeline(device, i, pipeline_formats[j]);
 		}
-
-		res = create_resolve_pipeline(device, i, VK_FORMAT_R8G8B8A8_SRGB);
 	}

 	return res;
@@ -368,12 +362,6 @@ radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
 					     state->resolve_fragment.rc[i].pipeline[j],
 					     &state->alloc);
 		}
-		radv_DestroyRenderPass(radv_device_to_handle(device),
-				       state->resolve_fragment.rc[i].srgb_render_pass,
-					       &state->alloc);
-		radv_DestroyPipeline(radv_device_to_handle(device),
-				     state->resolve_fragment.rc[i].srgb_pipeline,
-				     &state->alloc);
 	}

 	radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
@@ -430,9 +418,7 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
 			      push_constants);

 	unsigned fs_key = radv_format_meta_fs_key(dest_iview->vk_format);
-	VkPipeline pipeline_h = vk_format_is_srgb(dest_iview->vk_format) ?
-		device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
-		device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
+	VkPipeline pipeline_h = device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];

 	radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
 			     pipeline_h);
@@ -483,9 +469,7 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
 		radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
 	}

-	rp = vk_format_is_srgb(dest_image->vk_format) ?
-		device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
-		device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
+	rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
 	radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);

 	for (uint32_t r = 0; r < region_count; ++r) {
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -1067,20 +1067,37 @@ format_is_int8(VkFormat format)
 	       desc->channel[channel].size == 8;
 }

+static bool
+format_is_int10(VkFormat format)
+{
+	const struct vk_format_description *desc = vk_format_description(format);
+
+	if (desc->nr_channels != 4)
+		return false;
+	for (unsigned i = 0; i < 4; i++) {
+		if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
+			return true;
+	}
+	return false;
+}
+
 unsigned radv_format_meta_fs_key(VkFormat format)
 {
 	unsigned col_format = si_choose_spi_color_format(format, false, false) - 1;
 	bool is_int8 = format_is_int8(format);
+	bool is_int10 = format_is_int10(format);

-	return col_format + (is_int8 ? 3 : 0);
+	return col_format + (is_int8 ? 3 : is_int10 ? 5 : 0);
 }

-static unsigned
-radv_pipeline_compute_is_int8(const VkGraphicsPipelineCreateInfo *pCreateInfo)
+static void
+radv_pipeline_compute_get_int_clamp(const VkGraphicsPipelineCreateInfo *pCreateInfo,
+				    unsigned *is_int8, unsigned *is_int10)
 {
 	RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
 	struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
-	unsigned is_int8 = 0;
+	*is_int8 = 0;
+	*is_int10 = 0;

 	for (unsigned i = 0; i < subpass->color_count; ++i) {
 		struct radv_render_pass_attachment *attachment;
@@ -1091,10 +1108,10 @@ radv_pipeline_compute_is_int8(const VkGraphicsPipelineCreateInfo *pCreateInfo)
 		attachment = pass->attachments + subpass->color_attachments[i].attachment;

 		if (format_is_int8(attachment->format))
-			is_int8 |= 1 << i;
+			*is_int8 |= 1 << i;
+		if (format_is_int10(attachment->format))
+			*is_int10 |= 1 << i;
 	}
-
-	return is_int8;
 }

 static void
@@ -2053,9 +2070,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
 	}

 	if (modules[MESA_SHADER_FRAGMENT]) {
-		union ac_shader_variant_key key;
+		union ac_shader_variant_key key = {0};
 		key.fs.col_format = pipeline->graphics.blend.spi_shader_col_format;
-		key.fs.is_int8 = radv_pipeline_compute_is_int8(pCreateInfo);
+
+		if (pipeline->device->physical_device->rad_info.chip_class < VI)
+			radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.fs.is_int8, &key.fs.is_int10);

 		const VkPipelineShaderStageCreateInfo *stage = pStages[MESA_SHADER_FRAGMENT];

--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -84,7 +84,7 @@ typedef uint32_t xcb_window_t;
 #define MAX_PUSH_DESCRIPTORS 32
 #define MAX_DYNAMIC_BUFFERS 16
 #define MAX_SAMPLES_LOG2 4
-#define NUM_META_FS_KEYS 11
+#define NUM_META_FS_KEYS 13
 #define RADV_MAX_DRM_DEVICES 8

 #define NUM_DEPTH_CLEAR_PIPELINES 3
@@ -433,8 +433,6 @@ struct radv_meta_state {
 		VkPipelineLayout                          p_layout;

 		struct {
-			VkRenderPass srgb_render_pass;
-			VkPipeline   srgb_pipeline;
 			VkRenderPass render_pass[NUM_META_FS_KEYS];
 			VkPipeline   pipeline[NUM_META_FS_KEYS];
 		} rc[MAX_SAMPLES_LOG2];
@@ -444,7 +442,7 @@ struct radv_meta_state {
 		VkPipeline                                decompress_pipeline;
 		VkPipeline                                resummarize_pipeline;
 		VkRenderPass                              pass;
-	} depth_decomp;
+	} depth_decomp[1 + MAX_SAMPLES_LOG2];

 	struct {
 		VkPipeline                                cmask_eliminate_pipeline;
--- a/src/amd/vulkan/vk_format.h
+++ b/src/amd/vulkan/vk_format.h
@@ -465,4 +465,27 @@ vk_format_get_component_bits(VkFormat format,
 	}
 }

+static inline VkFormat
+vk_to_non_srgb_format(VkFormat format)
+{
+	switch(format) {
+	case VK_FORMAT_R8_SRGB :
+		return VK_FORMAT_R8_UNORM;
+	case VK_FORMAT_R8G8_SRGB:
+		return VK_FORMAT_R8G8_UNORM;
+	case VK_FORMAT_R8G8B8_SRGB:
+		return VK_FORMAT_R8G8B8_UNORM;
+	case VK_FORMAT_B8G8R8_SRGB:
+		return VK_FORMAT_B8G8R8_UNORM;
+	case VK_FORMAT_R8G8B8A8_SRGB :
+		return VK_FORMAT_R8G8B8A8_UNORM;
+	case VK_FORMAT_B8G8R8A8_SRGB:
+		return VK_FORMAT_B8G8R8A8_UNORM;
+	case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
+		return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
+	default:
+		return format;
+	}
+}
+
 #endif /* VK_FORMAT_H */
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -250,8 +250,8 @@ optimizations = [
   (('ishr', a, 0), a),
   (('ushr', 0, a), 0),
   (('ushr', a, 0), a),
-   (('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
-   (('iand', 0xffff, ('ushr', a, 16)), ('ushr', a, 16)),
+   (('iand', 0xff, ('ushr@32', a, 24)), ('ushr', a, 24)),
+   (('iand', 0xffff, ('ushr@32', a, 16)), ('ushr', a, 16)),
   # Exponential/logarithmic identities
   (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
   (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
--- a/src/compiler/spirv/spirv_to_nir.c
+++ b/src/compiler/spirv/spirv_to_nir.c
@@ -721,7 +721,7 @@ translate_image_format(SpvImageFormat format)
   case SpvImageFormatRg32ui:       return 0x823C; /* GL_RG32UI */
   case SpvImageFormatRg16ui:       return 0x823A; /* GL_RG16UI */
   case SpvImageFormatRg8ui:        return 0x8238; /* GL_RG8UI */
-   case SpvImageFormatR16ui:        return 0x823A; /* GL_RG16UI */
+   case SpvImageFormatR16ui:        return 0x8234; /* GL_R16UI */
   case SpvImageFormatR8ui:         return 0x8232; /* GL_R8UI */
   default:
      assert(!"Invalid image format");
--- a/src/egl/drivers/dri2/platform_x11.c
+++ b/src/egl/drivers/dri2/platform_x11.c
@@ -646,6 +646,7 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
       error != NULL || xfixes_query->major_version < 2) {
      _eglLog(_EGL_WARNING, "DRI2: failed to query xfixes version");
      free(error);
+      free(xfixes_query);
      return EGL_FALSE;
   }
   free(xfixes_query);
--- a/src/egl/main/eglapi.c
+++ b/src/egl/main/eglapi.c
@@ -923,7 +923,7 @@ static void *
 _fixupNativeWindow(_EGLDisplay *disp, void *native_window)
 {
 #ifdef HAVE_X11_PLATFORM
-   if (disp->Platform == _EGL_PLATFORM_X11 && native_window != NULL) {
+   if (disp && disp->Platform == _EGL_PLATFORM_X11 && native_window != NULL) {
      /* The `native_window` parameter for the X11 platform differs between
       * eglCreateWindowSurface() and eglCreatePlatformPixmapSurfaceEXT(). In
       * eglCreateWindowSurface(), the type of `native_window` is an Xlib
@@ -985,7 +985,7 @@ _fixupNativePixmap(_EGLDisplay *disp, void *native_pixmap)
       * `Pixmap*`.  Convert `Pixmap*` to `Pixmap` because that's what
       * dri2_x11_create_pixmap_surface() expects.
       */
-   if (disp->Platform == _EGL_PLATFORM_X11 && native_pixmap != NULL)
+   if (disp && disp->Platform == _EGL_PLATFORM_X11 && native_pixmap != NULL)
      return (void *)(* (Pixmap*) native_pixmap);
 #endif
   return native_pixmap;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -2006,6 +2006,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
 void
 CodeEmitterNVC0::emitMOV(const Instruction *i)
 {
+   assert(!i->saturate);
   if (i->def(0).getFile() == FILE_PREDICATE) {
      if (i->src(0).getFile() == FILE_GPR) {
         code[0] = 0xfc01c003;
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1509,6 +1509,14 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
   default:
      return;
   }
+
+   // This can get left behind some of the optimizations which simplify
+   // saturatable values.
+   if (newi->op == OP_MOV && newi->saturate) {
+      newi->saturate = 0;
+      newi->op = OP_SAT;
+   }
+
   if (newi->op != op)
      foldCount++;
 }
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -1373,6 +1373,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
 		"IMM[1] UINT32 {1, 2, 4, 8}\n"
 		"IMM[2] UINT32 {16, 32, 64, 128}\n"
 		"IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
+		"IMM[4] UINT32 {0, 0, 0, 0}\n"

 		"AND TEMP[5], CONST[0].wwww, IMM[2].xxxx\n"
 		"UIF TEMP[5]\n"
@@ -1472,7 +1473,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
 					/* Convert to boolean */
 					"AND TEMP[4], CONST[0].wwww, IMM[1].wwww\n"
 					"UIF TEMP[4]\n"
-						"U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[0].xxxx\n"
+						"U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
 						"AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
 						"MOV TEMP[0].y, IMM[0].xxxx\n"
 					"ENDIF\n"
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -3471,7 +3471,7 @@ static void si_llvm_emit_ddxy(

 	val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
 	val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
-			    mask, idx, ctx->lds, val);
+			    mask, idx, val);
 	emit_data->output[emit_data->chan] = val;
 }

@@ -4510,20 +4510,6 @@ static void create_function(struct si_shader_context *ctx)
 	assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
 	shader->info.num_input_vgprs -= num_prolog_vgprs;

-	if (!ctx->screen->has_ds_bpermute &&
-	    bld_base->info &&
-	    (bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
-	     bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
-		ctx->lds =
-			LLVMAddGlobalInAddressSpace(gallivm->module,
-						    LLVMArrayType(ctx->i32, 64),
-						    "ddxy_lds",
-						    LOCAL_ADDR_SPACE);
-
 	if (shader->key.as_ls ||
 	    ctx->type == PIPE_SHADER_TESS_CTRL ||
 	    /* GFX9 has the ESGS ring buffer in LDS. */
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1400,7 +1400,7 @@ static void tex_fetch_args(
 		 * It's unnecessary if the original texture format was
 		 * Z32_FLOAT, but we don't know that here.
 		 */
-		if (ctx->screen->b.chip_class == VI)
+		if (ctx->screen->b.chip_class >= VI)
 			z = ac_build_clamp(&ctx->ac, z);

 		address[count++] = z;
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -148,7 +148,10 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 	buffer_size = LLVMGetBufferSize(out_buffer);
 	buffer_data = LLVMGetBufferStart(out_buffer);

-	ac_elf_read(buffer_data, buffer_size, binary);
+	if (!ac_elf_read(buffer_data, buffer_size, binary)) {
+		fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
+		diag.retval = 1;
+	}

 	/* Clean up */
 	LLVMDisposeMemoryBuffer(out_buffer);
@@ -756,7 +759,7 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 			 */
 			if (array_size > 16 ||
 			    !ctx->screen->llvm_has_working_vgpr_indexing) {
-				array_alloca = LLVMBuildAlloca(builder,
+				array_alloca = lp_build_alloca_undef(&ctx->gallivm,
 					LLVMArrayType(ctx->f32,
 						      array_size), "array");
 				ctx->temp_array_allocas[id] = array_alloca;
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -3162,14 +3162,13 @@ si_make_texture_descriptor(struct si_screen *screen,
 			   uint32_t *fmask_state)
 {
 	struct pipe_resource *res = &tex->resource.b.b;
-	const struct util_format_description *base_desc, *desc;
+	const struct util_format_description *desc;
 	unsigned char swizzle[4];
 	int first_non_void;
 	unsigned num_format, data_format, type;
 	uint64_t va;

 	desc = util_format_description(pipe_format);
-	base_desc = util_format_description(res->format);

 	if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
 		const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
@@ -3270,15 +3269,6 @@ si_make_texture_descriptor(struct si_screen *screen,
 		data_format = 0;
 	}

-	/* Enable clamping for UNORM depth formats promoted to Z32F. */
-	if (screen->b.chip_class >= GFX9 &&
-	    util_format_has_depth(desc) &&
-	    num_format == V_008F14_IMG_NUM_FORMAT_FLOAT &&
-	    util_get_depth_format_type(base_desc) != UTIL_FORMAT_TYPE_FLOAT) {
-		/* NUM_FORMAT=FLOAT and DATA_FORMAT=24_8 means "clamp to [0,1]". */
-		data_format = V_008F14_IMG_DATA_FORMAT_24_8;
-	}
-
 	/* S8 with Z32 HTILE needs a special format. */
 	if (screen->b.chip_class >= GFX9 &&
 	    pipe_format == PIPE_FORMAT_S8_UINT &&
--- a/src/gallium/drivers/swr/SConscript
+++ b/src/gallium/drivers/swr/SConscript
@@ -53,7 +53,7 @@ env.CodeGenerate(
    source = '',
    command = python_cmd + ' $SCRIPT --output $TARGET --gen_h'
 )
-Depends('rasterizer/codegen/gen_knobs.cpp',
+Depends('rasterizer/codegen/gen_knobs.h',
        swrroot + 'rasterizer/codegen/templates/gen_knobs.cpp')

 env.CodeGenerate(
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -133,6 +133,11 @@ dri_fill_in_modes(struct dri_screen *screen)
      MESA_FORMAT_B8G8R8A8_SRGB,
      MESA_FORMAT_B8G8R8X8_SRGB,
      MESA_FORMAT_B5G6R5_UNORM,
+#ifdef ANDROID
+      /*
+       * To reduce the risk of breaking non-Android users in stable release
+       * let's keep these for Android alone until this is handled properly.
+       */

      /* The 32-bit RGBA format must not precede the 32-bit BGRA format.
       * Likewise for RGBX and BGRX.  Otherwise, the GLX client and the GLX
@@ -154,6 +159,7 @@ dri_fill_in_modes(struct dri_screen *screen)

      /* Required by Android, for HAL_PIXEL_FORMAT_RGBX_8888. */
      MESA_FORMAT_R8G8B8X8_UNORM,
+#endif
   };
   static const enum pipe_format pipe_formats[] = {
      PIPE_FORMAT_BGRA8888_UNORM,
@@ -161,8 +167,14 @@ dri_fill_in_modes(struct dri_screen *screen)
      PIPE_FORMAT_BGRA8888_SRGB,
      PIPE_FORMAT_BGRX8888_SRGB,
      PIPE_FORMAT_B5G6R5_UNORM,
+#ifdef ANDROID
+      /*
+       * To reduce the risk of breaking non-Android users in stable release
+       * let's keep these for Android alone until this is handled properly.
+       */
      PIPE_FORMAT_RGBA8888_UNORM,
      PIPE_FORMAT_RGBX8888_UNORM,
+#endif
   };
   mesa_format format;
   __DRIconfig **configs = NULL;
--- a/src/gallium/state_trackers/osmesa/osmesa.c
+++ b/src/gallium/state_trackers/osmesa/osmesa.c
@@ -439,6 +439,7 @@ osmesa_st_framebuffer_validate(struct st_context_iface *stctx,
   return TRUE;
 }

+static uint32_t osmesa_fb_ID = 0;

 static struct st_framebuffer_iface *
 osmesa_create_st_framebuffer(void)
@@ -448,6 +449,8 @@ osmesa_create_st_framebuffer(void)
      stfbi->flush_front = osmesa_st_framebuffer_flush_front;
      stfbi->validate = osmesa_st_framebuffer_validate;
      p_atomic_set(&stfbi->stamp, 1);
+      stfbi->ID = p_atomic_inc_return(&osmesa_fb_ID);
+      stfbi->state_manager = get_st_manager();
   }
   return stfbi;
 }
@@ -508,6 +511,14 @@ osmesa_find_buffer(enum pipe_format color_format,
 static void
 osmesa_destroy_buffer(struct osmesa_buffer *osbuffer)
 {
+   struct st_api *stapi = get_st_api();
+
+   /*
+    * Notify the state manager that the associated framebuffer interface
+    * is no longer valid.
+    */
+   stapi->destroy_drawable(stapi, osbuffer->stfb);
+
   FREE(osbuffer->stfb);
   FREE(osbuffer);
 }
--- a/src/intel/compiler/brw_vec4_gs_visitor.cpp
+++ b/src/intel/compiler/brw_vec4_gs_visitor.cpp
@@ -912,6 +912,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
            memcpy(prog_data->base.base.param, param,
                   sizeof(gl_constant_value*) * param_count);
            prog_data->base.base.nr_params = param_count;
+            prog_data->base.base.nr_pull_params = 0;
            ralloc_free(param);
         }
      }
--- a/src/intel/isl/isl.c
+++ b/src/intel/isl/isl.c
@@ -982,7 +982,8 @@ isl_calc_phys_total_extent_el_gen4_2d(
                                           &phys_slice0_sa);
   *total_extent_el = (struct isl_extent2d) {
      .w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
-      .h = *array_pitch_el_rows * phys_level0_sa->array_len,
+      .h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
+           isl_assert_div(phys_slice0_sa.h, fmtl->bh),
   };
 }

@@ -1366,124 +1367,19 @@ isl_calc_row_pitch(const struct isl_device *dev,
       !pitch_in_range(row_pitch, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
      return false;

-   if (surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT)
-      isl_finishme("validate row pitch of stencil surfaces");
+   const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
+      _3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
+      _3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
+
+   if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
+       !pitch_in_range(row_pitch, stencil_pitch_bits))
+      return false;

 done:
   *out_row_pitch = row_pitch;
   return true;
 }

-/**
- * Calculate and apply any padding required for the surface.
- *
- * @param[inout] total_h_el is updated with the new height
- * @param[out] pad_bytes is overwritten with additional padding requirements.
- */
-static void
-isl_apply_surface_padding(const struct isl_device *dev,
-                          const struct isl_surf_init_info *restrict info,
-                          const struct isl_tile_info *tile_info,
-                          uint32_t *total_h_el,
-                          uint32_t *pad_bytes)
-{
-   const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
-
-   *pad_bytes = 0;
-
-   /* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
-    * Formats >> Surface Padding Requirements >> Render Target and Media
-    * Surfaces:
-    *
-    *   The data port accesses data (pixels) outside of the surface if they
-    *   are contained in the same cache request as pixels that are within the
-    *   surface. These pixels will not be returned by the requesting message,
-    *   however if these pixels lie outside of defined pages in the GTT,
-    *   a GTT error will result when the cache request is processed. In
-    *   order to avoid these GTT errors, “padding” at the bottom of the
-    *   surface is sometimes necessary.
-    *
-    * From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
-    * Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
-    *
-    *    ... Lots of padding requirements, all listed separately below.
-    */
-
-   /* We can safely ignore the first padding requirement, quoted below,
-    * because isl doesn't do buffers.
-    *
-    *    - [pre-BDW] For buffers, which have no inherent “height,” padding
-    *      requirements are different. A buffer must be padded to the next
-    *      multiple of 256 array elements, with an additional 16 bytes added
-    *      beyond that to account for the L1 cache line.
-    */
-
-   /*
-    *    - For compressed textures [...], padding at the bottom of the surface
-    *      is to an even compressed row.
-    */
-   if (isl_format_is_compressed(info->format))
-      *total_h_el = isl_align(*total_h_el, 2);
-
-   /*
-    *    - For cube surfaces, an additional two rows of padding are required
-    *      at the bottom of the surface.
-    */
-   if (info->usage & ISL_SURF_USAGE_CUBE_BIT)
-      *total_h_el += 2;
-
-   /*
-    *    - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
-    *      additional padding is required. These surfaces require an extra row
-    *      plus 16 bytes of padding at the bottom in addition to the general
-    *      padding requirements.
-    */
-   if (isl_format_is_yuv(info->format) &&
-       (fmtl->bpb == 96 || fmtl->bpb == 48|| fmtl->bpb == 24)) {
-      *total_h_el += 1;
-      *pad_bytes += 16;
-   }
-
-   /*
-    *    - For linear surfaces, additional padding of 64 bytes is required at
-    *      the bottom of the surface. This is in addition to the padding
-    *      required above.
-    */
-   if (tile_info->tiling == ISL_TILING_LINEAR)
-      *pad_bytes += 64;
-
-   /* The below text weakens, not strengthens, the padding requirements for
-    * linear surfaces. Therefore we can safely ignore it.
-    *
-    *    - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array,
-    *      non-MSAA, non-mip-mapped surfaces in linear memory, the only
-    *      padding requirement is to the next aligned 64-byte boundary beyond
-    *      the end of the surface. The rest of the padding requirements
-    *      documented above do not apply to these surfaces.
-    */
-
-   /*
-    *    - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and
-    *      height % 4 != 0, the surface must be padded with
-    *      4-(height % 4)*Surface Pitch # of bytes.
-    */
-   if (ISL_DEV_GEN(dev) >= 9 &&
-       tile_info->tiling == ISL_TILING_LINEAR &&
-       (info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) {
-      *total_h_el = isl_align(*total_h_el, 4);
-   }
-
-   /*
-    *    - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded
-    *      to 4 times the Surface Pitch # of bytes
-    */
-   if (ISL_DEV_GEN(dev) >= 9 &&
-       tile_info->tiling == ISL_TILING_LINEAR &&
-       info->dim == ISL_SURF_DIM_1D) {
-      *total_h_el += 4;
-   }
-}
-
 bool
 isl_surf_init_s(const struct isl_device *dev,
                struct isl_surf *surf,
@@ -1536,10 +1432,6 @@ isl_surf_init_s(const struct isl_device *dev,
                                 array_pitch_span, &array_pitch_el_rows,
                                 &phys_total_el);

-   uint32_t padded_h_el = phys_total_el.h;
-   uint32_t pad_bytes;
-   isl_apply_surface_padding(dev, info, &tile_info, &padded_h_el, &pad_bytes);
-
   uint32_t row_pitch;
   if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
                           &phys_total_el, &row_pitch))
@@ -1548,7 +1440,7 @@ isl_surf_init_s(const struct isl_device *dev,
   uint32_t base_alignment;
   uint64_t size;
   if (tiling == ISL_TILING_LINEAR) {
-      size = (uint64_t) row_pitch * padded_h_el + pad_bytes;
+      size = (uint64_t) row_pitch * phys_total_el.h;

      /* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
       *
@@ -1569,9 +1461,8 @@ isl_surf_init_s(const struct isl_device *dev,
      }
      base_alignment = isl_round_up_to_power_of_two(base_alignment);
   } else {
-      padded_h_el += isl_align_div_npot(pad_bytes, row_pitch);
      const uint32_t total_h_tl =
-         isl_align_div(padded_h_el, tile_info.logical_extent_el.height);
+         isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);

      size = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch;

--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -496,10 +496,6 @@ static const VkExtensionProperties device_extensions[] = {
      .extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
      .specVersion = 1,
   },
-   {
-      .extensionName = VK_KHX_MULTIVIEW_EXTENSION_NAME,
-      .specVersion = 1,
-   },
 };

 static void *
--- a/src/intel/vulkan/anv_formats.c
+++ b/src/intel/vulkan/anv_formats.c
@@ -395,7 +395,8 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d
      /* Nothing to do here */
   } else if (vk_format_is_depth_or_stencil(format)) {
      tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
-      if (physical_device->info.gen >= 8)
+      if (vk_format_aspects(format) == VK_IMAGE_ASPECT_DEPTH_BIT ||
+          physical_device->info.gen >= 8)
         tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;

      tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -504,6 +504,7 @@ loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw,
                                     x, y, width, height, __BLIT_FLAG_FLUSH);
   }

+   loader_dri3_swapbuffer_barrier(draw);
   dri3_fence_reset(draw->conn, back);
   dri3_copy_area(draw->conn,
                  dri3_back_buffer(draw)->pixmap,
@@ -595,6 +596,7 @@ loader_dri3_wait_gl(struct loader_dri3_drawable *draw)
                                  front->height,
                                  0, 0, front->width,
                                  front->height, __BLIT_FLAG_FLUSH);
+   loader_dri3_swapbuffer_barrier(draw);
   loader_dri3_copy_drawable(draw, draw->drawable, front->pixmap);
 }

@@ -1258,6 +1260,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
         }
         break;
      case loader_dri3_buffer_front:
+         loader_dri3_swapbuffer_barrier(draw);
         dri3_fence_reset(draw->conn, new_buffer);
         dri3_copy_area(draw->conn,
                        draw->drawable,
@@ -1431,3 +1434,18 @@ loader_dri3_update_drawable_geometry(struct loader_dri3_drawable *draw)
      free(geom_reply);
   }
 }
+
+
+/**
+ * Make sure the server has flushed all pending swap buffers to hardware
+ * for this drawable. Ideally we'd want to send an X protocol request to
+ * have the server block our connection until the swaps are complete. That
+ * would avoid the potential round-trip here.
+ */
+void
+loader_dri3_swapbuffer_barrier(struct loader_dri3_drawable *draw)
+{
+   int64_t ust, msc, sbc;
+
+   (void) loader_dri3_wait_for_sbc(draw, 0, &ust, &msc, &sbc);
+}
--- a/src/loader/loader_dri3_helper.h
+++ b/src/loader/loader_dri3_helper.h
@@ -241,4 +241,7 @@ loader_dri3_get_buffers(__DRIdrawable *driDrawable,

 void
 loader_dri3_update_drawable_geometry(struct loader_dri3_drawable *draw);
+
+void
+loader_dri3_swapbuffer_barrier(struct loader_dri3_drawable *draw);
 #endif
--- a/src/mesa/drivers/dri/i965/intel_blit.c
+++ b/src/mesa/drivers/dri/i965/intel_blit.c
@@ -187,7 +187,6 @@ get_blit_intratile_offset_el(const struct brw_context *brw,
       * The offsets we get from ISL in the tiled case are already aligned.
       * In the linear case, we need to do some of our own aligning.
       */
-      assert(mt->surf.row_pitch % 64 == 0);
      uint32_t delta = *base_address_offset & 63;
      assert(delta % mt->cpp == 0);
      *base_address_offset -= delta;
@@ -831,11 +830,11 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
         if (brw->gen >= 8) {
            OUT_RELOC64(mt->bo,
                        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                        offset);
+                        mt->offset + offset);
         } else {
            OUT_RELOC(mt->bo,
                      I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-                      offset);
+                      mt->offset + offset);
         }
         OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
         ADVANCE_BATCH_TILED(dst_y_tiled, false);
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -847,9 +847,15 @@ intel_miptree_create_for_bo(struct brw_context *brw,
   mt->bo = bo;
   mt->offset = offset;

-   if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
+   if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX)) {
      intel_miptree_choose_aux_usage(brw, mt);

+      if (!intel_miptree_alloc_aux(brw, mt)) {
+         intel_miptree_release(&mt);
+         return NULL;
+      }
+   }
+
   return mt;
 }

@@ -979,11 +985,6 @@ intel_miptree_create_for_dri_image(struct brw_context *brw,
      }
   }

-   if (!intel_miptree_alloc_aux(brw, mt)) {
-      intel_miptree_release(&mt);
-      return NULL;
-   }
-
   return mt;
 }

--- a/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
+++ b/src/mesa/drivers/dri/i965/intel_pixel_bitmap.c
@@ -294,7 +294,7 @@ do_blit_bitmap( struct gl_context *ctx,
 						color,
 						irb->mt->surf.row_pitch,
 						irb->mt->bo,
-						0,
+						irb->mt->offset,
 						irb->mt->surf.tiling,
 						dstx + px,
 						dsty + py,
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -358,7 +358,8 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
       */
      if (returnType == uni->type->base_type ||
          ((returnType == GLSL_TYPE_INT || returnType == GLSL_TYPE_UINT) &&
-           (uni->type->is_sampler() || uni->type->is_image()))) {
+           (uni->type->is_sampler() || uni->type->is_image())) ||
+          (returnType == GLSL_TYPE_UINT64 && uni->is_bindless)) {
         memcpy(paramsOut, src, bytes);
      } else {
         union gl_constant_value *const dst =
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -399,7 +399,7 @@ find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id)
 }

 struct rename_reg_pair {
-   bool valid;
+   int old_reg;
   int new_reg;
 };

@@ -568,7 +568,7 @@ public:

   void simplify_cmp(void);

-   void rename_temp_registers(struct rename_reg_pair *renames);
+   void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
   void get_first_temp_read(int *first_reads);
   void get_first_temp_write(int *first_writes);
   void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
@@ -4646,6 +4646,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
   mem_ctx = ralloc_context(NULL);
   ctx = NULL;
   prog = NULL;
+   precise = 0;
   shader_program = NULL;
   shader = NULL;
   options = NULL;
@@ -4835,37 +4836,36 @@ glsl_to_tgsi_visitor::simplify_cmp(void)

 /* Replaces all references to a temporary register index with another index. */
 void
-glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames)
+glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
 {
   foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
      unsigned j;
+      int k;
      for (j = 0; j < num_inst_src_regs(inst); j++) {
-         if (inst->src[j].file == PROGRAM_TEMPORARY) {
-            int old_idx = inst->src[j].index;
-            if (renames[old_idx].valid)
-               inst->src[j].index = renames[old_idx].new_reg;
-         }
+         if (inst->src[j].file == PROGRAM_TEMPORARY)
+            for (k = 0; k < num_renames; k++)
+               if (inst->src[j].index == renames[k].old_reg)
+                  inst->src[j].index = renames[k].new_reg;
      }

      for (j = 0; j < inst->tex_offset_num_offset; j++) {
-         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
-            int old_idx = inst->tex_offsets[j].index;
-            if (renames[old_idx].valid)
-               inst->tex_offsets[j].index = renames[old_idx].new_reg;
-         }
+         if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
+            for (k = 0; k < num_renames; k++)
+               if (inst->tex_offsets[j].index == renames[k].old_reg)
+                  inst->tex_offsets[j].index = renames[k].new_reg;
      }

      if (inst->resource.file == PROGRAM_TEMPORARY) {
-         int old_idx = inst->resource.index;
-         if (renames[old_idx].valid)
-            inst->resource.index = renames[old_idx].new_reg;
+         for (k = 0; k < num_renames; k++)
+            if (inst->resource.index == renames[k].old_reg)
+               inst->resource.index = renames[k].new_reg;
      }

      for (j = 0; j < num_inst_dst_regs(inst); j++) {
-         if (inst->dst[j].file == PROGRAM_TEMPORARY) {
-            int old_idx = inst->dst[j].index;
-            if (renames[old_idx].valid)
-               inst->dst[j].index = renames[old_idx].new_reg;}
+         if (inst->dst[j].file == PROGRAM_TEMPORARY)
+             for (k = 0; k < num_renames; k++)
+                if (inst->dst[j].index == renames[k].old_reg)
+                   inst->dst[j].index = renames[k].new_reg;
      }
   }
 }
@@ -5446,6 +5446,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
   int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
   struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
   int i, j;
+   int num_renames = 0;

   /* Read the indices of the last read and first write to each temp register
    * into an array so that we don't have to traverse the instruction list as
@@ -5472,8 +5473,9 @@ glsl_to_tgsi_visitor::merge_registers(void)
          * as the register at index j. */
         if (first_writes[i] <= first_writes[j] &&
             last_reads[i] <= first_writes[j]) {
-            renames[j].new_reg = i;
-            renames[j].valid = true;
+            renames[num_renames].old_reg = j;
+            renames[num_renames].new_reg = i;
+            num_renames++;

            /* Update the first_writes and last_reads arrays with the new
             * values for the merged register index, and mark the newly unused
@@ -5486,7 +5488,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
      }
   }

-   rename_temp_registers(renames);
+   rename_temp_registers(num_renames, renames);
   ralloc_free(renames);
   ralloc_free(last_reads);
   ralloc_free(first_writes);
@@ -5501,6 +5503,7 @@ glsl_to_tgsi_visitor::renumber_registers(void)
   int new_index = 0;
   int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
   struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
+   int num_renames = 0;

   for (i = 0; i < this->next_temp; i++) {
      first_writes[i] = -1;
@@ -5510,13 +5513,14 @@ glsl_to_tgsi_visitor::renumber_registers(void)
   for (i = 0; i < this->next_temp; i++) {
      if (first_writes[i] < 0) continue;
      if (i != new_index) {
-         renames[i].new_reg = new_index;
-         renames[i].valid = true;
+         renames[num_renames].old_reg = i;
+         renames[num_renames].new_reg = new_index;
+         num_renames++;
      }
      new_index++;
   }

-   rename_temp_registers(renames);
+   rename_temp_registers(num_renames, renames);
   this->next_temp = new_index;
   ralloc_free(renames);
   ralloc_free(first_writes);
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -48,7 +48,6 @@
 #include "st_cb_fbo.h"
 #include "st_cb_flush.h"
 #include "st_manager.h"
-#include "st_sampler_view.h"

 #include "state_tracker/st_gl_api.h"

@@ -736,7 +735,6 @@ st_context_teximage(struct st_context_iface *stctxi,
   pipe_resource_reference(&stImage->pt, tex);
   stObj->surface_format = pipe_format;

-   st_texture_release_all_sampler_views(st, stObj);
   stObj->needs_validation = true;

   _mesa_dirty_texobj(ctx, texObj);