Update version to 17.2.0-rc2

Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
st/glsl_to_tgsi: fix getting the image type for array of structs
2017-07-31 10:52:13 +01:00 · 2017-07-31 10:26:27 +01:00 · 2017-07-31 10:25:55 +01:00 · 2017-07-31 10:24:42 +01:00 · 2017-07-31 10:23:09 +01:00 · 2017-07-31 10:21:17 +01:00
42 changed files with 511 additions and 197 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-17.2.0-devel
+17.2.0-rc2
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -3400,7 +3400,10 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 	char intrinsic_name[64];
 	const nir_variable *var = instr->variables[0]->var;
 	const struct glsl_type *type = glsl_without_array(var->type);
-
+	LLVMValueRef glc = ctx->i1false;
+	bool force_glc = ctx->options->chip_class == SI;
+	if (force_glc)
+		glc = ctx->i1true;
 	if (ctx->stage == MESA_SHADER_FRAGMENT)
 		ctx->shader_info->fs.writes_memory = true;

@@ -3410,7 +3413,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 		params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
 						    LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
 		params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
-		params[4] = ctx->i1false;  /* glc */
+		params[4] = glc;  /* glc */
 		params[5] = ctx->i1false;  /* slc */
 		ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
 				   params, 6, 0);
@@ -3418,7 +3421,6 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
 		bool is_da = glsl_sampler_type_is_array(type) ||
 			     glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
 		LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
-		LLVMValueRef glc = ctx->i1false;
 		LLVMValueRef slc = ctx->i1false;

 		params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2]));
@@ -5815,10 +5817,11 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
 		args.enabled_channels |= 0x4;
 	}

-	/* SI (except OLAND) has a bug that it only looks
+	/* SI (except OLAND and HAINAN) has a bug that it only looks
 	 * at the X writemask component. */
 	if (ctx->options->chip_class == SI &&
-	    ctx->options->family != CHIP_OLAND)
+	    ctx->options->family != CHIP_OLAND &&
+	    ctx->options->family != CHIP_HAINAN)
 		args.enabled_channels |= 0x1;

 	ac_build_export(&ctx->ac, &args);
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -257,6 +257,18 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
 	AddrSurfInfoIn->width = u_minify(config->info.width, level);
 	AddrSurfInfoIn->height = u_minify(config->info.height, level);

+	/* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
+	 * because GFX9 needs linear alignment of 256 bytes.
+	 */
+	if (config->info.levels == 1 &&
+	    AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
+	    AddrSurfInfoIn->bpp) {
+		unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
+
+		assert(util_is_power_of_two(AddrSurfInfoIn->bpp));
+		AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
+	}
+
 	if (config->is_3d)
 		AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
 	else if (config->is_cube)
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -3246,6 +3246,8 @@ radv_initialise_ds_surface(struct radv_device *device,
 			ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
 			tile_mode_index = si_tile_mode_index(iview->image, level, true);
 			ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
+			if (stencil_only)
+				ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
 		}

 		ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
@@ -3624,9 +3626,14 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
 	const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
 	VkExternalSemaphorePropertiesKHR*           pExternalSemaphoreProperties)
 {
-	pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
-	pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
-	pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
-		VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
-
+	if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
+		pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+		pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
+		pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
+			VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
+	} else {
+		pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
+		pExternalSemaphoreProperties->compatibleHandleTypes = 0;
+		pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
+	}
 }
--- a/src/amd/vulkan/radv_image.c
+++ b/src/amd/vulkan/radv_image.c
@@ -181,6 +181,11 @@ radv_make_buffer_descriptor(struct radv_device *device,
 	state[0] = va;
 	state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
 		S_008F04_STRIDE(stride);
+
+	if (device->physical_device->rad_info.chip_class < VI && stride) {
+		range /= stride;
+	}
+
 	state[2] = range;
 	state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
 		   S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
--- a/src/egl/drivers/dri2/platform_wayland.c
+++ b/src/egl/drivers/dri2/platform_wayland.c
@@ -1083,6 +1083,10 @@ dmabuf_handle_modifier(void *data, struct zwp_linux_dmabuf_v1 *dmabuf,
   struct dri2_egl_display *dri2_dpy = data;
   uint64_t *mod = NULL;

+   if (modifier_hi == (DRM_FORMAT_MOD_INVALID >> 32) &&
+       modifier_lo == (DRM_FORMAT_MOD_INVALID & 0xffffffff))
+      return;
+
   switch (format) {
   case WL_DRM_FORMAT_ARGB8888:
      mod = u_vector_add(&dri2_dpy->wl_modifiers.argb8888);
--- a/src/egl/main/eglcontext.c
+++ b/src/egl/main/eglcontext.c
@@ -328,17 +328,6 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
            break;
         }

-         /* The EGL_KHR_create_context_no_error spec says:
-          *
-          *    "BAD_MATCH is generated if the EGL_CONTEXT_OPENGL_NO_ERROR_KHR is TRUE at
-          *    the same time as a debug or robustness context is specified."
-          */
-         if (ctx->Flags & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR ||
-             ctx->Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) {
-            err = EGL_BAD_MATCH;
-            break;
-         }
-
         /* Canonicalize value to EGL_TRUE/EGL_FALSE definitions */
         ctx->NoError = !!val;
         break;
@@ -489,6 +478,16 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
      break;
   }

+   /* The EGL_KHR_create_context_no_error spec says:
+    *
+    *    "BAD_MATCH is generated if the EGL_CONTEXT_OPENGL_NO_ERROR_KHR is TRUE at
+    *    the same time as a debug or robustness context is specified."
+    */
+   if (ctx->NoError && (ctx->Flags & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR ||
+                        ctx->Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR)) {
+      err = EGL_BAD_MATCH;
+   }
+
   if ((ctx->Flags & ~(EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR
                      | EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR
                      | EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR)) != 0) {
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -925,10 +925,6 @@ static inline void radeon_set_ctl_const(struct radeon_winsys_cs *cs, unsigned re
 /*
 * common helpers
 */
-static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
-{
-	return value * (1 << frac_bits);
-}

 /* 12.4 fixed-point */
 static inline unsigned r600_pack_float_12p4(float x)
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -1024,6 +1024,25 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
 	}
 }

+static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
+					  enum pipe_shader_ir ir_type)
+{
+	if (ir_type != PIPE_SHADER_IR_TGSI)
+		return 256;
+
+	/* Only 16 waves per thread-group on gfx9. */
+	if (screen->chip_class >= GFX9)
+		return 1024;
+
+	/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
+	 * round number.
+	 */
+	if (screen->chip_class >= SI)
+		return 2048;
+
+	return 256;
+}
+
 static int r600_get_compute_param(struct pipe_screen *screen,
        enum pipe_shader_ir ir_type,
        enum pipe_compute_cap param,
@@ -1078,27 +1097,17 @@ static int r600_get_compute_param(struct pipe_screen *screen,
 	case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
 		if (ret) {
 			uint64_t *block_size = ret;
-			if (rscreen->chip_class >= SI &&
-			    ir_type == PIPE_SHADER_IR_TGSI) {
-				block_size[0] = 2048;
-				block_size[1] = 2048;
-				block_size[2] = 2048;
-			} else {
-				block_size[0] = 256;
-				block_size[1] = 256;
-				block_size[2] = 256;
-			}
+			unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
+			block_size[0] = threads_per_block;
+			block_size[1] = threads_per_block;
+			block_size[2] = threads_per_block;
 		}
 		return 3 * sizeof(uint64_t);

 	case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
 		if (ret) {
 			uint64_t *max_threads_per_block = ret;
-			if (rscreen->chip_class >= SI &&
-			    ir_type == PIPE_SHADER_IR_TGSI)
-				*max_threads_per_block = 2048;
-			else
-				*max_threads_per_block = 256;
+			*max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
 		}
 		return sizeof(uint64_t);
 	case PIPE_COMPUTE_CAP_ADDRESS_BITS:
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -1006,4 +1006,9 @@ vi_dcc_enabled(struct r600_texture *tex, unsigned level)
 	(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) |	   \
 	 (((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))

+static inline int S_FIXED(float value, unsigned frac_bits)
+{
+	return value * (1 << frac_bits);
+}
+
 #endif
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -121,9 +121,7 @@ si_blit_dbcb_copy(struct si_context *sctx,

 	assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);

-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;

 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
@@ -169,7 +167,6 @@ si_blit_dbcb_copy(struct si_context *sctx,
 	}

 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 	sctx->dbcb_depth_copy_enabled = false;
 	sctx->dbcb_stencil_copy_enabled = false;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
@@ -225,9 +222,7 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,

 	surf_tmpl.format = texture->resource.b.b.format;

-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;

 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
@@ -267,7 +262,6 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
 		texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;

 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
 	sctx->db_flush_depth_inplace = false;
 	sctx->db_flush_stencil_inplace = false;
 	si_mark_atom_dirty(sctx, &sctx->db_render_state);
@@ -474,9 +468,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 		custom_blend = sctx->custom_blend_eliminate_fastclear;
 	}

-	bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
 	sctx->decompression_enabled = true;
-	sctx->framebuffer.do_update_surf_dirtiness = false;

 	while (level_mask) {
 		unsigned level = u_bit_scan(&level_mask);
@@ -519,7 +511,6 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
 	}

 	sctx->decompression_enabled = false;
-	sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;

 	sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
 			 SI_CONTEXT_INV_GLOBAL_L2 |
@@ -971,10 +962,32 @@ static void si_decompress_subresource(struct pipe_context *ctx,
 		if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
 			planes &= ~PIPE_MASK_S;

+		/* If we've rendered into the framebuffer and it's a blitting
+		 * source, make sure the decompression pass is invoked
+		 * by dirtying the framebuffer.
+		 */
+		if (sctx->framebuffer.state.zsbuf &&
+		    sctx->framebuffer.state.zsbuf->u.tex.level == level &&
+		    sctx->framebuffer.state.zsbuf->texture == tex)
+			si_update_fb_dirtiness_after_rendering(sctx);
+
 		si_decompress_depth(sctx, rtex, planes,
 				    level, level,
 				    first_layer, last_layer);
 	} else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
+		/* If we've rendered into the framebuffer and it's a blitting
+		 * source, make sure the decompression pass is invoked
+		 * by dirtying the framebuffer.
+		 */
+		for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
+			if (sctx->framebuffer.state.cbufs[i] &&
+			    sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
+			    sctx->framebuffer.state.cbufs[i]->texture == tex) {
+				si_update_fb_dirtiness_after_rendering(sctx);
+				break;
+			}
+		}
+
 		si_blit_decompress_color(ctx, rtex, level, level,
 					 first_layer, last_layer, false);
 	}
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -970,7 +970,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,

 	if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
 			     "si_shader_low",
-			     32, num_compiler_threads,
+			     32, num_compiler_threads_lowprio,
 			     UTIL_QUEUE_INIT_RESIZE_IF_FULL |
 			     UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
 	       si_destroy_shader_cache(sscreen);
@@ -1002,8 +1002,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 		 sscreen->b.info.pfp_fw_version >= 211 &&
 		 sscreen->b.info.me_fw_version >= 173) ||
 		(sscreen->b.chip_class == SI &&
-		 sscreen->b.info.pfp_fw_version >= 121 &&
-		 sscreen->b.info.me_fw_version >= 87);
+		 sscreen->b.info.pfp_fw_version >= 79 &&
+		 sscreen->b.info.me_fw_version >= 142);

 	sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
 	sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -113,10 +113,15 @@ struct si_screen {

 	/* Shader compiler queue for multithreaded compilation. */
 	struct util_queue		shader_compiler_queue;
-	LLVMTargetMachineRef		tm[4]; /* used by the queue only */
+	/* Use at most 3 normal compiler threads on quadcore and better.
+	 * Hyperthreaded CPUs report the number of threads, but we want
+	 * the number of cores. */
+	LLVMTargetMachineRef		tm[3]; /* used by the queue only */

 	struct util_queue		shader_compiler_queue_low_priority;
-	LLVMTargetMachineRef		tm_low_priority[4];
+	/* Use at most 2 low priority threads on quadcore and better.
+	 * We want to minimize the impact on multithreaded Mesa. */
+	LLVMTargetMachineRef		tm_low_priority[2]; /* at most 2 threads */
 };

 struct si_blend_color {
@@ -182,7 +187,6 @@ struct si_framebuffer {
 	ubyte				dirty_cbufs;
 	bool				dirty_zsbuf;
 	bool				any_dst_linear;
-	bool				do_update_surf_dirtiness;
 };

 struct si_clip_state {
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -174,6 +174,20 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
 	}
 }

+/**
+ * Helper function that builds an LLVM IR PHI node and immediately adds
+ * incoming edges.
+ */
+static LLVMValueRef
+build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
+	  unsigned count_incoming, LLVMValueRef *values,
+	  LLVMBasicBlockRef *blocks)
+{
+	LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
+	LLVMAddIncoming(phi, values, blocks, count_incoming);
+	return phi;
+}
+
 /**
 * Get the value of a shader input parameter and extract a bitfield.
 */
@@ -2698,6 +2712,7 @@ si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
 static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 {
 	struct si_shader_context *ctx = si_shader_context(bld_base);
+	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;

 	si_copy_tcs_inputs(bld_base);
@@ -2706,8 +2721,29 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
 	invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
 	tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);

+	if (ctx->screen->b.chip_class >= GFX9) {
+		LLVMBasicBlockRef blocks[2] = {
+			LLVMGetInsertBlock(builder),
+			ctx->merged_wrap_if_state.entry_block
+		};
+		LLVMValueRef values[2];
+
+		lp_build_endif(&ctx->merged_wrap_if_state);
+
+		values[0] = rel_patch_id;
+		values[1] = LLVMGetUndef(ctx->i32);
+		rel_patch_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+
+		values[0] = tf_lds_offset;
+		values[1] = LLVMGetUndef(ctx->i32);
+		tf_lds_offset = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+
+		values[0] = invocation_id;
+		values[1] = ctx->i32_1; /* cause the epilog to skip threads */
+		invocation_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
+	}
+
 	/* Return epilog parameters from this function. */
-	LLVMBuilderRef builder = ctx->gallivm.builder;
 	LLVMValueRef ret = ctx->return_value;
 	unsigned vgpr;

@@ -2879,7 +2915,12 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)

 	if (ctx->screen->b.chip_class >= GFX9 && info->num_outputs) {
 		unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
-		lds_base = LLVMBuildMul(gallivm->builder, ac_get_thread_id(&ctx->ac),
+		LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
+		LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
+		vertex_idx = LLVMBuildOr(gallivm->builder, vertex_idx,
+					 LLVMBuildMul(gallivm->builder, wave_idx,
+						      LLVMConstInt(ctx->i32, 64, false), ""), "");
+		lds_base = LLVMBuildMul(gallivm->builder, vertex_idx,
 					LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
 	}

@@ -2930,6 +2971,9 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)

 	ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
 			 si_get_gs_wave_id(ctx));
+
+	if (ctx->screen->b.chip_class >= GFX9)
+		lp_build_endif(&ctx->merged_wrap_if_state);
 }

 static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
@@ -5497,14 +5541,20 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 	preload_ring_buffers(ctx);

 	/* For GFX9 merged shaders:
-	 * - Set EXEC. If the prolog is present, set EXEC there instead.
+	 * - Set EXEC for the first shader. If the prolog is present, set
+	 *   EXEC there instead.
 	 * - Add a barrier before the second shader.
+	 * - In the second shader, reset EXEC to ~0 and wrap the main part in
+	 *   an if-statement. This is required for correctness in geometry
+	 *   shaders, to ensure that empty GS waves do not send GS_EMIT and
+	 *   GS_CUT messages.
 	 *
-	 * The same thing for monolithic shaders is done in
-	 * si_build_wrapper_function.
+	 * For monolithic merged shaders, the first shader is wrapped in an
+	 * if-block together with its prolog in si_build_wrapper_function.
 	 */
-	if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
-		if (sel->info.num_instructions > 1 && /* not empty shader */
+	if (ctx->screen->b.chip_class >= GFX9) {
+		if (!is_monolithic &&
+		    sel->info.num_instructions > 1 && /* not empty shader */
 		    (shader->key.as_es || shader->key.as_ls) &&
 		    (ctx->type == PIPE_SHADER_TESS_EVAL ||
 		     (ctx->type == PIPE_SHADER_VERTEX &&
@@ -5513,9 +5563,19 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
 						ctx->param_merged_wave_info, 0);
 		} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
 			   ctx->type == PIPE_SHADER_GEOMETRY) {
-			si_init_exec_from_input(ctx,
-						ctx->param_merged_wave_info, 8);
+			if (!is_monolithic)
+				si_init_exec_full_mask(ctx);
+
+			/* The barrier must execute for all shaders in a
+			 * threadgroup.
+			 */
 			si_llvm_emit_barrier(NULL, bld_base, NULL);
+
+			LLVMValueRef num_threads = unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
+			LLVMValueRef ena =
+				LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
+					    ac_get_thread_id(&ctx->ac), num_threads, "");
+			lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
 		}
 	}

@@ -5986,15 +6046,9 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,

 		/* Merged shaders are executed conditionally depending
 		 * on the number of enabled threads passed in the input SGPRs. */
-		if (is_merged_shader(ctx->shader) &&
-		    (part == 0 || part == next_shader_first_part)) {
+		if (is_merged_shader(ctx->shader) && part == 0) {
 			LLVMValueRef ena, count = initial[3];

-			/* The thread count for the 2nd shader is at bit-offset 8. */
-			if (part == next_shader_first_part) {
-				count = LLVMBuildLShr(builder, count,
-						      LLVMConstInt(ctx->i32, 8, 0), "");
-			}
 			count = LLVMBuildAnd(builder, count,
 					     LLVMConstInt(ctx->i32, 0x7f, 0), "");
 			ena = LLVMBuildICmp(builder, LLVMIntULT,
@@ -6051,26 +6105,20 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
 		ret = LLVMBuildCall(builder, parts[part], in, num_params, "");

 		if (is_merged_shader(ctx->shader) &&
-		    (part + 1 == next_shader_first_part ||
-		     part + 1 == num_parts)) {
+		    part + 1 == next_shader_first_part) {
 			lp_build_endif(&if_state);

-			if (part + 1 == next_shader_first_part) {
-				/* A barrier is required between 2 merged shaders. */
-				si_llvm_emit_barrier(NULL, &ctx->bld_base, NULL);
-
-				/* The second half of the merged shader should use
-				 * the inputs from the toplevel (wrapper) function,
-				 * not the return value from the last call.
-				 *
-				 * That's because the last call was executed condi-
-				 * tionally, so we can't consume it in the main
-				 * block.
-				 */
-				memcpy(out, initial, sizeof(initial));
-				num_out = initial_num_out;
-				num_out_sgpr = initial_num_out_sgpr;
-			}
+			/* The second half of the merged shader should use
+			 * the inputs from the toplevel (wrapper) function,
+			 * not the return value from the last call.
+			 *
+			 * That's because the last call was executed condi-
+			 * tionally, so we can't consume it in the main
+			 * block.
+			 */
+			memcpy(out, initial, sizeof(initial));
+			num_out = initial_num_out;
+			num_out_sgpr = initial_num_out_sgpr;
 			continue;
 		}

--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -25,6 +25,7 @@
 #define SI_SHADER_PRIVATE_H

 #include "si_shader.h"
+#include "gallivm/lp_bld_flow.h"
 #include "gallivm/lp_bld_init.h"
 #include "gallivm/lp_bld_tgsi.h"
 #include "tgsi/tgsi_parse.h"
@@ -105,6 +106,8 @@ struct si_shader_context {
 	unsigned flow_depth;
 	unsigned flow_depth_max;

+	struct lp_build_if_state merged_wrap_if_state;
+
 	struct tgsi_array_info *temp_arrays;
 	LLVMValueRef *temp_array_allocas;

--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -74,11 +74,6 @@ static unsigned si_map_swizzle(unsigned swizzle)
 	}
 }

-static uint32_t S_FIXED(float value, uint32_t frac_bits)
-{
-	return value * (1 << frac_bits);
-}
-
 /* 12.4 fixed-point */
 static unsigned si_pack_float_12p4(float x)
 {
@@ -2457,6 +2452,38 @@ static void si_init_depth_surface(struct si_context *sctx,
 	surf->depth_initialized = true;
 }

+void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
+{
+	if (sctx->decompression_enabled)
+		return;
+
+	if (sctx->framebuffer.state.zsbuf) {
+		struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
+		struct r600_texture *rtex = (struct r600_texture *)surf->texture;
+
+		rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+
+		if (rtex->surface.flags & RADEON_SURF_SBUFFER)
+			rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
+	}
+	if (sctx->framebuffer.compressed_cb_mask) {
+		struct pipe_surface *surf;
+		struct r600_texture *rtex;
+		unsigned mask = sctx->framebuffer.compressed_cb_mask;
+
+		do {
+			unsigned i = u_bit_scan(&mask);
+			surf = sctx->framebuffer.state.cbufs[i];
+			rtex = (struct r600_texture*)surf->texture;
+
+			if (rtex->fmask.size)
+				rtex->dirty_level_mask |= 1 << surf->u.tex.level;
+			if (rtex->dcc_gather_statistics)
+				rtex->separate_dcc_dirty = true;
+		} while (mask);
+	}
+}
+
 static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
 {
 	for (int i = 0; i < state->nr_cbufs; ++i) {
@@ -2484,6 +2511,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 	bool unbound = false;
 	int i;

+	si_update_fb_dirtiness_after_rendering(sctx);
+
 	for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
 		if (!sctx->framebuffer.state.cbufs[i])
 			continue;
@@ -2681,7 +2710,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		 * changes come from the decompression passes themselves.
 		 */
 		sctx->need_check_render_feedback = true;
-		sctx->framebuffer.do_update_surf_dirtiness = true;
 	}
 }

@@ -3989,6 +4017,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 {
 	struct si_context *sctx = (struct si_context *)ctx;

+	si_update_fb_dirtiness_after_rendering(sctx);
+
 	/* Multisample surfaces are flushed in si_decompress_textures. */
 	if (sctx->framebuffer.nr_samples <= 1 &&
 	    sctx->framebuffer.state.nr_cbufs) {
@@ -3996,7 +4026,6 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
 				 SI_CONTEXT_INV_GLOBAL_L2 |
 				 SI_CONTEXT_FLUSH_AND_INV_CB;
 	}
-	sctx->framebuffer.do_update_surf_dirtiness = true;
 }

 /* This only ensures coherency for shader image/buffer stores. */
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -384,6 +384,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
 			      const struct pipe_sampler_view *state,
 			      unsigned width0, unsigned height0,
 			      unsigned force_level);
+void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);

 /* si_state_shader.c */
 bool si_update_shaders(struct si_context *sctx);
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1207,7 +1207,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		sctx->framebuffer.dirty_cbufs |=
 			((1 << sctx->framebuffer.state.nr_cbufs) - 1);
 		sctx->framebuffer.dirty_zsbuf = true;
-		sctx->framebuffer.do_update_surf_dirtiness = true;
 		si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 		si_update_all_texture_descriptors(sctx);
 	}
@@ -1392,36 +1391,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
 		sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
 	}

-	if (sctx->framebuffer.do_update_surf_dirtiness) {
-		/* Set the depth buffer as dirty. */
-		if (sctx->framebuffer.state.zsbuf) {
-			struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
-			struct r600_texture *rtex = (struct r600_texture *)surf->texture;
-
-			rtex->dirty_level_mask |= 1 << surf->u.tex.level;
-
-			if (rtex->surface.flags & RADEON_SURF_SBUFFER)
-				rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
-		}
-		if (sctx->framebuffer.compressed_cb_mask) {
-			struct pipe_surface *surf;
-			struct r600_texture *rtex;
-			unsigned mask = sctx->framebuffer.compressed_cb_mask;
-
-			do {
-				unsigned i = u_bit_scan(&mask);
-				surf = sctx->framebuffer.state.cbufs[i];
-				rtex = (struct r600_texture*)surf->texture;
-
-				if (rtex->fmask.size)
-					rtex->dirty_level_mask |= 1 << surf->u.tex.level;
-				if (rtex->dcc_gather_statistics)
-					rtex->separate_dcc_dirty = true;
-			} while (mask);
-		}
-		sctx->framebuffer.do_update_surf_dirtiness = false;
-	}
-
 	sctx->b.num_draw_calls++;
 	if (info->primitive_restart)
 		sctx->b.num_prim_restart_calls++;
--- a/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
+++ b/src/gallium/drivers/swr/rasterizer/codegen/templates/gen_knobs.cpp
@@ -140,6 +140,26 @@ extern GlobalKnobs g_GlobalKnobs;
 //========================================================
 void KnobBase::autoExpandEnvironmentVariables(std::string &text)
 {
+#if (__GNUC__) && (GCC_VERSION < 409000)
+    // <regex> isn't implemented prior to gcc-4.9.0
+    // unix style variable replacement
+    size_t start;
+    while ((start = text.find("${'${'}")) != std::string::npos) {
+        size_t end = text.find("}");
+        if (end == std::string::npos)
+            break;
+        const std::string var = GetEnv(text.substr(start + 2, end - start - 2));
+        text.replace(start, end - start + 1, var);
+    }
+    // win32 style variable replacement
+    while ((start = text.find("%")) != std::string::npos) {
+        size_t end = text.find("%", start + 1);
+        if (end == std::string::npos)
+            break;
+        const std::string var = GetEnv(text.substr(start + 1, end - start - 1));
+        text.replace(start, end - start + 1, var);
+    }
+#else
    {
        // unix style variable replacement
        static std::regex env("\\$\\{([^}]+)\\}");
@@ -164,6 +184,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string &text)
            text.replace(match.prefix().length(), match[0].length(), var);
        }
    }
+#endif
 }


--- a/src/gallium/drivers/swr/swr_draw.cpp
+++ b/src/gallium/drivers/swr/swr_draw.cpp
@@ -81,8 +81,11 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
               offsets[output_buffer] = so->output[i].dst_offset;
            }

+            unsigned attrib_slot = so->output[i].register_index;
+            attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
+
            state.stream.decl[num].bufferIndex = output_buffer;
-            state.stream.decl[num].attribSlot = so->output[i].register_index - 1;
+            state.stream.decl[num].attribSlot = attrib_slot;
            state.stream.decl[num].componentMask =
               ((1 << so->output[i].num_components) - 1)
               << so->output[i].start_component;
@@ -129,10 +132,36 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
    * XXX setup provokingVertex & topologyProvokingVertex */
   SWR_FRONTEND_STATE feState = {0};

-   feState.vsVertexSize =
-      VERTEX_ATTRIB_START_SLOT +
-      + ctx->vs->info.base.num_outputs
-      - (ctx->vs->info.base.writes_position ? 1 : 0);
+   // feState.vsVertexSize seeds the PA size that is used as an interface
+   // between all the shader stages, so it has to be large enough to
+   // incorporate all interfaces between stages
+
+   // max of gs and vs num_outputs
+   feState.vsVertexSize = ctx->vs->info.base.num_outputs;
+   if (ctx->gs &&
+       ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
+      feState.vsVertexSize = ctx->gs->info.base.num_outputs;
+   }
+
+   if (ctx->vs->info.base.num_outputs) {
+      // gs does not adjust for position in SGV slot at input from vs
+      if (!ctx->gs)
+         feState.vsVertexSize--;
+   }
+
+   // other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
+   feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
+
+   // The PA in the clipper does not handle BE vertex sizes
+   // different from FE. Increase vertexsize only for the cases that needed it
+
+   // primid needs a slot
+   if (ctx->fs->info.base.uses_primid)
+      feState.vsVertexSize++;
+   // sprite coord enable
+   if (ctx->rasterizer->sprite_coord_enable)
+      feState.vsVertexSize++;
+

   if (ctx->rasterizer->flatshade_first) {
      feState.provokingVertex = {1, 0, 0};
--- a/src/gallium/drivers/swr/swr_shader.cpp
+++ b/src/gallium/drivers/swr/swr_shader.cpp
@@ -414,7 +414,10 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
       } else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
          attribSlot = VERTEX_POSITION_SLOT;
       } else {
-          attribSlot = VERTEX_ATTRIB_START_SLOT + attrib - 1;
+          attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
+          if (iface->info->writes_position) {
+             attribSlot--;
+          }
       }

 #if USE_SIMD16_FRONTEND
@@ -923,6 +926,33 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
   return func;
 }

+unsigned
+swr_so_adjust_attrib(unsigned in_attrib,
+                     swr_vertex_shader *swr_vs)
+{
+   ubyte semantic_name;
+   unsigned attrib;
+
+   attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
+
+   if (swr_vs) {
+      semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
+      if (semantic_name == TGSI_SEMANTIC_POSITION) {
+         attrib = VERTEX_POSITION_SLOT;
+      } else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
+         attrib = VERTEX_SGV_SLOT;
+      } else if (semantic_name == TGSI_SEMANTIC_LAYER) {
+         attrib = VERTEX_SGV_SLOT;
+      } else {
+         if (swr_vs->info.base.writes_position) {
+               attrib--;
+         }
+      }
+   }
+
+   return attrib;
+}
+
 static unsigned
 locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
 {
--- a/src/gallium/drivers/swr/swr_shader.h
+++ b/src/gallium/drivers/swr/swr_shader.h
@@ -30,6 +30,9 @@ struct swr_jit_fs_key;
 struct swr_jit_vs_key;
 struct swr_jit_gs_key;

+unsigned swr_so_adjust_attrib(unsigned in_attrib,
+                              swr_vertex_shader *swr_vs);
+
 PFN_VERTEX_FUNC
 swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key);

--- a/src/gallium/drivers/swr/swr_state.cpp
+++ b/src/gallium/drivers/swr/swr_state.cpp
@@ -345,13 +345,14 @@ swr_create_vs_state(struct pipe_context *pipe,
      // soState.streamToRasterizer not used

      for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
+         unsigned attrib_slot = stream_output->output[i].register_index;
+         attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs);
         swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
-            1 << (stream_output->output[i].register_index - 1);
+            (1 << attrib_slot);
      }
      for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
        swr_vs->soState.streamNumEntries[i] =
             _mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
-        swr_vs->soState.vertexAttribOffset[i] = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
       }
   }

--- a/src/gallium/drivers/vc4/vc4_blit.c
+++ b/src/gallium/drivers/vc4/vc4_blit.c
@@ -212,14 +212,16 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
        if (vc4_tile_blit(pctx, blit_info))
                return;

-        if (util_try_blit_via_copy_region(pctx, &info)) {
-                return; /* done */
-        }
-
        if (info.mask & PIPE_MASK_S) {
-                fprintf(stderr, "cannot blit stencil, skipping\n");
+                if (util_try_blit_via_copy_region(pctx, &info))
+                        return;
+
                info.mask &= ~PIPE_MASK_S;
+                fprintf(stderr, "cannot blit stencil, skipping\n");
        }

-        vc4_render_blit(pctx, &info);
+        if (vc4_render_blit(pctx, &info))
+                return;
+
+        fprintf(stderr, "Unsupported blit\n");
 }
--- a/src/gallium/drivers/virgl/virgl_encode.c
+++ b/src/gallium/drivers/virgl/virgl_encode.c
@@ -409,7 +409,7 @@ int virgl_encoder_set_index_buffer(struct virgl_context *ctx,
   virgl_encoder_write_res(ctx, res);
   if (ib) {
      virgl_encoder_write_dword(ctx->cbuf, ib->index_size);
-      virgl_encoder_write_dword(ctx->cbuf, 0);
+      virgl_encoder_write_dword(ctx->cbuf, ib->offset);
   }
   return 0;
 }
--- a/src/gallium/drivers/virgl/virgl_tgsi.c
+++ b/src/gallium/drivers/virgl/virgl_tgsi.c
@@ -48,6 +48,15 @@ virgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
   }
 }

+static void
+virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
+				 struct tgsi_full_instruction *inst)
+{
+   if (inst->Instruction.Precise)
+      inst->Instruction.Precise = 0;
+   ctx->emit_instruction(ctx, inst);
+}
+
 struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)
 {

@@ -61,6 +70,7 @@ struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)

   memset(&transform, 0, sizeof(transform));
   transform.base.transform_property = virgl_tgsi_transform_property;
+   transform.base.transform_instruction = virgl_tgsi_transform_instruction;
   tgsi_transform_shader(tokens_in, new_tokens, newLen, &transform.base);

   return new_tokens;
--- a/src/gallium/include/state_tracker/st_api.h
+++ b/src/gallium/include/state_tracker/st_api.h
@@ -284,6 +284,7 @@ struct st_context_attribs
 };

 struct st_context_iface;
+struct st_manager;

 /**
 * Represent a windowing system drawable.
@@ -316,6 +317,11 @@ struct st_framebuffer_iface
    */
   uint32_t ID;

+   /**
+    * The state tracker manager that manages this object.
+    */
+   struct st_manager *state_manager;
+
   /**
    * Available for the state tracker manager to use.
    */
@@ -375,6 +381,11 @@ struct st_context_iface
   void *st_context_private;
   void *st_manager_private;

+   /**
+    * The state tracker manager that manages this object.
+    */
+   struct st_manager *state_manager;
+
   /**
    * The CSO context associated with this context in case we need to draw
    * something before swap buffers.
@@ -483,6 +494,16 @@ struct st_manager
    */
   void (*set_background_context)(struct st_context_iface *stctxi,
                                  struct util_queue_monitoring *queue_info);
+
+   /**
+    * Destroy any private data used by the state tracker manager.
+    */
+   void (*destroy)(struct st_manager *smapi);
+
+   /**
+    * Available for the state tracker manager to use.
+    */
+   void *st_manager_private;
 };

 /**
--- a/src/gallium/state_trackers/dri/dri2.c
+++ b/src/gallium/state_trackers/dri/dri2.c
@@ -1183,26 +1183,30 @@ dri2_query_image(__DRIimage *image, int attrib, int *value)
   switch (attrib) {
   case __DRI_IMAGE_ATTRIB_STRIDE:
      whandle.type = DRM_API_HANDLE_TYPE_KMS;
-      image->texture->screen->resource_get_handle(image->texture->screen,
-            NULL, image->texture, &whandle, usage);
+      if (!image->texture->screen->resource_get_handle(image->texture->screen,
+            NULL, image->texture, &whandle, usage))
+         return GL_FALSE;
      *value = whandle.stride;
      return GL_TRUE;
   case __DRI_IMAGE_ATTRIB_OFFSET:
      whandle.type = DRM_API_HANDLE_TYPE_KMS;
-      image->texture->screen->resource_get_handle(image->texture->screen,
-            NULL, image->texture, &whandle, usage);
+      if (!image->texture->screen->resource_get_handle(image->texture->screen,
+            NULL, image->texture, &whandle, usage))
+         return GL_FALSE;
      *value = whandle.offset;
      return GL_TRUE;
   case __DRI_IMAGE_ATTRIB_HANDLE:
      whandle.type = DRM_API_HANDLE_TYPE_KMS;
-      image->texture->screen->resource_get_handle(image->texture->screen,
-         NULL, image->texture, &whandle, usage);
+      if (!image->texture->screen->resource_get_handle(image->texture->screen,
+         NULL, image->texture, &whandle, usage))
+         return GL_FALSE;
      *value = whandle.handle;
      return GL_TRUE;
   case __DRI_IMAGE_ATTRIB_NAME:
      whandle.type = DRM_API_HANDLE_TYPE_SHARED;
-      image->texture->screen->resource_get_handle(image->texture->screen,
-         NULL, image->texture, &whandle, usage);
+      if (!image->texture->screen->resource_get_handle(image->texture->screen,
+         NULL, image->texture, &whandle, usage))
+         return GL_FALSE;
      *value = whandle.handle;
      return GL_TRUE;
   case __DRI_IMAGE_ATTRIB_FD:
@@ -1235,14 +1239,22 @@ dri2_query_image(__DRIimage *image, int attrib, int *value)
      return GL_TRUE;
   case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER:
      whandle.type = DRM_API_HANDLE_TYPE_KMS;
-      image->texture->screen->resource_get_handle(image->texture->screen,
-            NULL, image->texture, &whandle, usage);
+      whandle.modifier = DRM_FORMAT_MOD_INVALID;
+      if (!image->texture->screen->resource_get_handle(image->texture->screen,
+            NULL, image->texture, &whandle, usage))
+         return GL_FALSE;
+      if (whandle.modifier == DRM_FORMAT_MOD_INVALID)
+         return GL_FALSE;
      *value = (whandle.modifier >> 32) & 0xffffffff;
      return GL_TRUE;
   case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER:
      whandle.type = DRM_API_HANDLE_TYPE_KMS;
-      image->texture->screen->resource_get_handle(image->texture->screen,
-            NULL, image->texture, &whandle, usage);
+      whandle.modifier = DRM_FORMAT_MOD_INVALID;
+      if (!image->texture->screen->resource_get_handle(image->texture->screen,
+            NULL, image->texture, &whandle, usage))
+         return GL_FALSE;
+      if (whandle.modifier == DRM_FORMAT_MOD_INVALID)
+         return GL_FALSE;
      *value = whandle.modifier & 0xffffffff;
      return GL_TRUE;
   default:
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -158,6 +158,7 @@ dri_create_buffer(__DRIscreen * sPriv,
   dPriv->driverPrivate = (void *)drawable;
   p_atomic_set(&drawable->base.stamp, 1);
   drawable->base.ID = p_atomic_inc_return(&drifb_ID);
+   drawable->base.state_manager = &screen->base;

   return GL_TRUE;
 fail:
--- a/src/gallium/state_trackers/dri/dri_screen.c
+++ b/src/gallium/state_trackers/dri/dri_screen.c
@@ -457,6 +457,9 @@ dri_destroy_option_cache(struct dri_screen * screen)
 void
 dri_destroy_screen_helper(struct dri_screen * screen)
 {
+   if (screen->base.destroy)
+      screen->base.destroy(&screen->base);
+
   if (screen->st_api && screen->st_api->destroy)
      screen->st_api->destroy(screen->st_api);

--- a/src/gallium/state_trackers/glx/xlib/xm_api.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_api.c
@@ -181,6 +181,9 @@ xmesa_close_display(Display *display)
    *    xmdpy->screen->destroy(xmdpy->screen);
    * }
    */
+
+   if (xmdpy->smapi->destroy)
+      xmdpy->smapi->destroy(xmdpy->smapi);
   free(xmdpy->smapi);

   XFree((char *) info);
--- a/src/gallium/state_trackers/glx/xlib/xm_st.c
+++ b/src/gallium/state_trackers/glx/xlib/xm_st.c
@@ -304,6 +304,7 @@ xmesa_create_st_framebuffer(XMesaDisplay xmdpy, XMesaBuffer b)
   stfbi->flush_front = xmesa_st_framebuffer_flush_front;
   stfbi->validate = xmesa_st_framebuffer_validate;
   stfbi->ID = p_atomic_inc_return(&xmesa_stfbi_ID);
+   stfbi->state_manager = xmdpy->smapi;
   p_atomic_set(&stfbi->stamp, 1);
   stfbi->st_manager_private = (void *) xstfb;

--- a/src/gallium/state_trackers/wgl/stw_device.c
+++ b/src/gallium/state_trackers/wgl/stw_device.c
@@ -199,6 +199,9 @@ stw_cleanup(void)
   DeleteCriticalSection(&stw_dev->fb_mutex);
   DeleteCriticalSection(&stw_dev->ctx_mutex);

+   if (stw_dev->smapi->destroy)
+      stw_dev->smapi->destroy(stw_dev->smapi);
+
   FREE(stw_dev->smapi);
   stw_dev->stapi->destroy(stw_dev->stapi);

--- a/src/gallium/state_trackers/wgl/stw_st.c
+++ b/src/gallium/state_trackers/wgl/stw_st.c
@@ -235,6 +235,7 @@ stw_st_create_framebuffer(struct stw_framebuffer *fb)
   stwfb->fb = fb;
   stwfb->stvis = fb->pfi->stvis;
   stwfb->base.ID = p_atomic_inc_return(&stwfb_ID);
+   stwfb->base.state_manager = stw_dev->smapi;

   stwfb->base.visual = &stwfb->stvis;
   p_atomic_set(&stwfb->base.stamp, 1);
--- a/src/intel/Makefile.sources
+++ b/src/intel/Makefile.sources
@@ -3,6 +3,7 @@ BLORP_FILES = \
 	blorp/blorp.h \
 	blorp/blorp_blit.c \
 	blorp/blorp_clear.c \
+	blorp/blorp_nir_builder.h \
 	blorp/blorp_genX_exec.h \
 	blorp/blorp_priv.h

--- a/src/intel/vulkan/anv_cmd_buffer.c
+++ b/src/intel/vulkan/anv_cmd_buffer.c
@@ -566,7 +566,7 @@ void anv_CmdBindVertexBuffers(
   /* We have to defer setting up vertex buffer since we need the buffer
    * stride from the pipeline. */

-   assert(firstBinding + bindingCount < MAX_VBS);
+   assert(firstBinding + bindingCount <= MAX_VBS);
   for (uint32_t i = 0; i < bindingCount; i++) {
      vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
      vb[firstBinding + i].offset = pOffsets[i];
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -89,7 +89,7 @@ struct gen_l3_config;
 */
 #define ANV_HZ_FC_VAL 1.0f

-#define MAX_VBS         31
+#define MAX_VBS         28
 #define MAX_SETS         8
 #define MAX_RTS          8
 #define MAX_VIEWPORTS   16
--- a/src/mesa/drivers/dri/i965/brw_blorp.c
+++ b/src/mesa/drivers/dri/i965/brw_blorp.c
@@ -1103,7 +1103,7 @@ brw_blorp_mcs_partial_resolve(struct brw_context *brw,
   DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
       start_layer, start_layer + num_layers - 1);

-   assert(mt->aux_usage = ISL_AUX_USAGE_MCS);
+   assert(mt->aux_usage == ISL_AUX_USAGE_MCS);

   const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
   enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true);
--- a/src/mesa/drivers/dri/i965/brw_performance_query.c
+++ b/src/mesa/drivers/dri/i965/brw_performance_query.c
@@ -1212,6 +1212,14 @@ brw_begin_perf_query(struct gl_context *ctx,
      obj->oa.begin_report_id = brw->perfquery.next_query_start_report_id;
      brw->perfquery.next_query_start_report_id += 2;

+      /* We flush the batchbuffer here to minimize the chances that MI_RPC
+       * delimiting commands end up in different batchbuffers. If that's the
+       * case, the measurement will include the time it takes for the kernel
+       * scheduler to load a new request into the hardware. This is manifested in
+       * tools like frameretrace by spikes in the "GPU Core Clocks" counter.
+       */
+      intel_batchbuffer_flush(brw);
+
      /* Take a starting OA counter snapshot. */
      brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
                                          obj->oa.begin_report_id);
@@ -1298,14 +1306,6 @@ brw_end_perf_query(struct gl_context *ctx,
                                             obj->oa.begin_report_id + 1);
      }

-      /* We flush the batchbuffer here to minimize the chances that MI_RPC
-       * delimiting commands end up in different batchbuffers. If that's the
-       * case, the measurement will include the time it takes for the kernel
-       * scheduler to load a new request into the hardware. This is manifested
-       * in tools like frameretrace by spikes in the "GPU Core Clocks"
-       * counter.
-       */
-      intel_batchbuffer_flush(brw);
      --brw->perfquery.n_active_oa_queries;

      /* NB: even though the query has now ended, it can't be accumulated
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -101,9 +101,9 @@ get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
   assert(view->levels == 1 && view->array_len == 1);
   assert(*tile_x == 0 && *tile_y == 0);

-   offset += intel_miptree_get_tile_offsets(mt, view->base_level,
-                                            view->base_array_layer,
-                                            tile_x, tile_y);
+   *offset += intel_miptree_get_tile_offsets(mt, view->base_level,
+                                             view->base_array_layer,
+                                             tile_x, tile_y);

   /* Minify the logical dimensions of the texture. */
   const unsigned l = view->base_level - mt->first_level;
--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -3790,7 +3790,7 @@ get_image_qualifiers(ir_dereference *ir, const glsl_type **type,
      for (unsigned i = 0; i < struct_type->length; i++) {
         if (!strcmp(struct_type->fields.structure[i].name,
                     deref_record->field)) {
-            *type = struct_type->fields.structure[i].type;
+            *type = struct_type->fields.structure[i].type->without_array();
            *memory_coherent =
               struct_type->fields.structure[i].memory_coherent;
            *memory_volatile =
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -48,6 +48,7 @@
 #include "st_cb_fbo.h"
 #include "st_cb_flush.h"
 #include "st_manager.h"
+#include "st_sampler_view.h"

 #include "state_tracker/st_gl_api.h"

@@ -61,9 +62,13 @@
 #include "util/list.h"

 struct hash_table;
-static struct hash_table *st_fbi_ht; /* framebuffer iface objects hash table */
+struct st_manager_private
+{
+   struct hash_table *stfbi_ht; /* framebuffer iface objects hash table */
+   mtx_t st_mutex;
+};

-static mtx_t st_mutex = _MTX_INITIALIZER_NP;
+static void st_manager_destroy(struct st_manager *);

 /**
 * Map an attachment to a buffer index.
@@ -511,45 +516,63 @@ st_framebuffer_iface_equal(const void *a, const void *b)


 static boolean
-st_framebuffer_iface_lookup(const struct st_framebuffer_iface *stfbi)
+st_framebuffer_iface_lookup(struct st_manager *smapi,
+                            const struct st_framebuffer_iface *stfbi)
 {
+   struct st_manager_private *smPriv =
+      (struct st_manager_private *)smapi->st_manager_private;
   struct hash_entry *entry;

-   mtx_lock(&st_mutex);
-   entry = _mesa_hash_table_search(st_fbi_ht, stfbi);
-   mtx_unlock(&st_mutex);
+   assert(smPriv);
+   assert(smPriv->stfbi_ht);
+
+   mtx_lock(&smPriv->st_mutex);
+   entry = _mesa_hash_table_search(smPriv->stfbi_ht, stfbi);
+   mtx_unlock(&smPriv->st_mutex);

   return entry != NULL;
 }


 static boolean
-st_framebuffer_iface_insert(struct st_framebuffer_iface *stfbi)
+st_framebuffer_iface_insert(struct st_manager *smapi,
+                            struct st_framebuffer_iface *stfbi)
 {
+   struct st_manager_private *smPriv =
+      (struct st_manager_private *)smapi->st_manager_private;
   struct hash_entry *entry;

-   mtx_lock(&st_mutex);
-   entry = _mesa_hash_table_insert(st_fbi_ht, stfbi, stfbi);
-   mtx_unlock(&st_mutex);
+   assert(smPriv);
+   assert(smPriv->stfbi_ht);
+
+   mtx_lock(&smPriv->st_mutex);
+   entry = _mesa_hash_table_insert(smPriv->stfbi_ht, stfbi, stfbi);
+   mtx_unlock(&smPriv->st_mutex);

   return entry != NULL;
 }


 static void
-st_framebuffer_iface_remove(struct st_framebuffer_iface *stfbi)
+st_framebuffer_iface_remove(struct st_manager *smapi,
+                            struct st_framebuffer_iface *stfbi)
 {
+   struct st_manager_private *smPriv =
+      (struct st_manager_private *)smapi->st_manager_private;
   struct hash_entry *entry;

-   mtx_lock(&st_mutex);
-   entry = _mesa_hash_table_search(st_fbi_ht, stfbi);
+   if (!smPriv || !smPriv->stfbi_ht)
+      return;
+
+   mtx_lock(&smPriv->st_mutex);
+   entry = _mesa_hash_table_search(smPriv->stfbi_ht, stfbi);
   if (!entry)
      goto unlock;

-   _mesa_hash_table_remove(st_fbi_ht, entry);
+   _mesa_hash_table_remove(smPriv->stfbi_ht, entry);

 unlock:
-   mtx_unlock(&st_mutex);
+   mtx_unlock(&smPriv->st_mutex);
 }


@@ -561,7 +584,10 @@ static void
 st_api_destroy_drawable(struct st_api *stapi,
                        struct st_framebuffer_iface *stfbi)
 {
-   st_framebuffer_iface_remove(stfbi);
+   if (!stfbi)
+      return;
+
+   st_framebuffer_iface_remove(stfbi->state_manager, stfbi);
 }


@@ -572,16 +598,24 @@ st_api_destroy_drawable(struct st_api *stapi,
 static void
 st_framebuffers_purge(struct st_context *st)
 {
+   struct st_context_iface *st_iface = &st->iface;
+   struct st_manager *smapi = st_iface->state_manager;
   struct st_framebuffer *stfb, *next;

+   assert(smapi);
+
   LIST_FOR_EACH_ENTRY_SAFE_REV(stfb, next, &st->winsys_buffers, head) {
+      struct st_framebuffer_iface *stfbi = stfb->iface;
+
+      assert(stfbi);
+
      /**
       * If the corresponding framebuffer interface object no longer exists,
       * remove the framebuffer object from the context's winsys buffers list,
       * and unreference the framebuffer object, so its resources can be
       * deleted.
       */
-      if (!st_framebuffer_iface_lookup(stfb->iface)) {
+      if (!st_framebuffer_iface_lookup(smapi, stfbi)) {
         LIST_DEL(&stfb->head);
         st_framebuffer_reference(&stfb, NULL);
      }
@@ -609,6 +643,16 @@ st_context_flush(struct st_context_iface *stctxi, unsigned flags,

   if (flags & ST_FLUSH_FRONT)
      st_manager_flush_frontbuffer(st);
+
+   /* DRI3 changes the framebuffer after SwapBuffers, but we need to invoke
+    * st_manager_validate_framebuffers to notice that.
+    *
+    * Set gfx_shaders_may_be_dirty to invoke st_validate_state in the next
+    * draw call, which will invoke st_manager_validate_framebuffers, but it
+    * won't dirty states if there is no change.
+    */
+   if (flags & ST_FLUSH_END_OF_FRAME)
+      st->gfx_shaders_may_be_dirty = true;
 }

 static boolean
@@ -692,6 +736,7 @@ st_context_teximage(struct st_context_iface *stctxi,
   pipe_resource_reference(&stImage->pt, tex);
   stObj->surface_format = pipe_format;

+   st_texture_release_all_sampler_views(st, stObj);
   stObj->needs_validation = true;

   _mesa_dirty_texobj(ctx, texObj);
@@ -778,6 +823,21 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
      return NULL;
   }

+   /* Create a hash table for the framebuffer interface objects
+    * if it has not been created for this st manager.
+    */
+   if (smapi->st_manager_private == NULL) {
+      struct st_manager_private *smPriv;
+
+      smPriv = CALLOC_STRUCT(st_manager_private);
+      mtx_init(&smPriv->st_mutex, mtx_plain);
+      smPriv->stfbi_ht = _mesa_hash_table_create(NULL,
+                                                 st_framebuffer_iface_hash,
+                                                 st_framebuffer_iface_equal);
+      smapi->st_manager_private = smPriv;
+      smapi->destroy = st_manager_destroy;
+   }
+
   if (attribs->flags & ST_CONTEXT_FLAG_ROBUST_ACCESS)
      ctx_flags |= PIPE_CONTEXT_ROBUST_BUFFER_ACCESS;

@@ -846,6 +906,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
   st->iface.st_context_private = (void *) smapi;
   st->iface.cso_context = st->cso_context;
   st->iface.pipe = st->pipe;
+   st->iface.state_manager = smapi;

   *error = ST_CONTEXT_SUCCESS;
   return &st->iface;
@@ -888,7 +949,7 @@ st_framebuffer_reuse_or_create(struct st_context *st,
         /* add the referenced framebuffer interface object to
          * the framebuffer interface object hash table.
          */
-         if (!st_framebuffer_iface_insert(stfbi)) {
+         if (!st_framebuffer_iface_insert(stfbi->state_manager, stfbi)) {
            st_framebuffer_reference(&cur, NULL);
            return NULL;
         }
@@ -964,8 +1025,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
 static void
 st_api_destroy(struct st_api *stapi)
 {
-   _mesa_hash_table_destroy(st_fbi_ht, NULL);
-   mtx_destroy(&st_mutex);
 }

 /**
@@ -1051,6 +1110,19 @@ st_manager_add_color_renderbuffer(struct st_context *st,
   return TRUE;
 }

+static void
+st_manager_destroy(struct st_manager *smapi)
+{
+   struct st_manager_private *smPriv = smapi->st_manager_private;
+
+   if (smPriv && smPriv->stfbi_ht) {
+      _mesa_hash_table_destroy(smPriv->stfbi_ht, NULL);
+      mtx_destroy(&smPriv->st_mutex);
+      free(smPriv);
+      smapi->st_manager_private = NULL;
+   }
+}
+
 static unsigned
 get_version(struct pipe_screen *screen,
            struct st_config_options *options, gl_api api)
@@ -1106,12 +1178,5 @@ static const struct st_api st_gl_api = {
 struct st_api *
 st_gl_api_create(void)
 {
-   /* Create a hash table for all the framebuffer interface objects */
-
-   mtx_init(&st_mutex, mtx_plain);
-   st_fbi_ht = _mesa_hash_table_create(NULL,
-                                       st_framebuffer_iface_hash,
-                                       st_framebuffer_iface_equal);
-
   return (struct st_api *) &st_gl_api;
 }
Author	SHA1	Message	Date
Emil Velikov	58fe86a6d6	Update version to 17.2.0-rc2 Signed-off-by: Emil Velikov <emil.velikov@collabora.com>	2017-07-31 10:52:13 +01:00
Samuel Pitoiset	d466a70532	st/glsl_to_tgsi: fix getting the image type for array of structs Since array splitting for AoA is disabled, we have to retrieve the type of the first non-array type when an array of images is declared inside a structure. Otherwise, it will hit an assert in glsl_type::sampler_index() because it expects either a sampler or an image type. This fixes a regression in the following piglit test: arb_bindless_texture/compiler/images/arrays-of-struct.frag Fixes: `57165f2ef8` ("glsl: disable array splitting for AoA") Cc: 17.2 <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `f99e9335e2`)	2017-07-31 10:26:27 +01:00
Marek Olšák	e62eddcdbe	st/mesa: release sampler views when redefining a texture in st_context_teximage Noticed randomly. Cc: 17.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (cherry picked from commit `5c1241268b`)	2017-07-31 10:25:55 +01:00
Dave Airlie	6d07e58afb	radv: for stencil only set Z tile mode index to same value On SI this was causing a hang in dEQP-VK.pipeline.render_to_image.core.2d_array.mipmap.r16g16_sint_s8_uint This was due to not handling the tile mode index for depth like I fixed previously for new GPUs. Fixes: `01d0c5a9` (radv: fix stencil regression since new addrlib import) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `800d162209`)	2017-07-31 10:24:42 +01:00
Dave Airlie	f9e563597d	virgl: drop precise modifier. The host doesn't understand this yet, so drop it for now. Fixes: virgl regressions. Fixes: `af22adee4f` (tgsi: add precise flag to tgsi_instruction) Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `554aa09440`)	2017-07-31 10:23:09 +01:00
Marek Olšák	5b61ba4432	radeonsi: update dirty_level_mask only when flushing or unbinding framebuffer This fixes corruption with bindless textures in Dawn Of War 3. The do_update_surf_dirtiness mechanism was complicated and dirty_level_mask was only updated after the first draw call. The problem is bindless textures are checked for decompression every draw call and we would only decompress after the first draw call. The solution is to set dirtiness after the last draw call to the framebuffer, so the (unconditional) decompression of bindless textures happens at the right time. Cc: 17.2 <mesa-stable@lists.freedesktop.org> Tested-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `f4d095cc65`)	2017-07-31 10:21:17 +01:00
Marek Olšák	6625382b1c	st/mesa: always unconditionally revalidate main framebuffer after SwapBuffers This fixes the black Feral launcher window. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101867 Cc: 17.2 <mesa-stable@lists.freedesktop.org> Tested-by: Edmondo Tommasina <edmondo.tommasina@gmail.com> (cherry picked from commit `7257c171e9`)	2017-07-31 10:20:44 +01:00
Nicolai Hähnle	2bca74253d	radeonsi/gfx9: always wrap GS and TCS in an if-block (v2) With merged ESGS shaders, the GS part of a wave may be empty, and the hardware gets confused if any GS messages are sent from that wave. Since S_SENDMSG is executed even when EXEC = 0, we have to wrap even non-monolithic GS shaders in an if-block, so that the entire shader and hence the S_SENDMSG instructions are skipped in empty waves. This change is not required for TCS/HS, but applying it there as well simplifies the logic a bit. Fixes GL45-CTS.geometry_shader.rendering.rendering.* v2: ensure that the TCS epilog doesn't run for non-existing patches Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `081ac6e5c6`)	2017-07-31 10:20:20 +01:00
Nicolai Hähnle	b36ff2d1f2	radeonsi/gfx9: fix vertex idx in ES with multiple waves per threadgroup Cc: mesa-stable@lists.freedesktop.org Reviewed: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `873789002f`)	2017-07-31 10:20:12 +01:00
George Kyriazis	99b2613ce1	swr: fix transform feedback logic The shader that is used to copy vertex data out of the vs/gs shaders to the user-specified buffer (streamout or SO shader) was not using the correct offsets. Adjust the offsets that are used just for the SO shader: - Make sure that position is handled in the same special way as in the vs/gs shaders - Use the correct offset to be passed in the core - consolidate register slot mapping logic into one function, since it's been calculated in 2 different places (one for calcuating the slot mask, and one for the register offsets themselves Also make room for all attibutes in the backend vertex area. Fixes: - all vtk GL2PS tests - 18 piglit tests (16 ext_transform_feedback tests, arb-quads-follow-provoking-vertex and primitive-type gl_points v2: - take care of more SGV slots in slot mapping logic - trim feState.vsVertexSize - fix GS interface and incorporate GS while calculating vsVertexSize Note that vsVertexSize is used in the core as the one parameter that controls vertex size between all stages, so it has to be adjusted appropriately for the whole vs/gs/fs pipeline. Also note that GS and SO is not fully implemented. This will be addressed later. fixes: - fixes total of 20 piglit tests CC: 17.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com> (cherry picked from commit `194ff5eed1`)	2017-07-31 10:19:55 +01:00
Dave Airlie	f9c7549605	radv/ac: port SI TC L1 write corruption fix. This ports `72e46c988` to radv. radeonsi: apply a TC L1 write corruption workaround for SI Fixes: `f4e499ec7` (radv: add initial non-conformant radv vulkan driver) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `e77ff11ffe`)	2017-07-27 19:54:53 +01:00
Dave Airlie	2ce4f0afd3	radv/ac: realign SI workaround with radeonsi. This ports: `da7453666a` radeonsi: don't apply the Z export bug workaround to Hainan to radv. Just noticed in passing. Fixes: `f4e499ec7` (radv: add initial non-conformant radv vulkan driver) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `a81e99f50a`)	2017-07-27 19:54:52 +01:00
Dave Airlie	546282e8bc	radv: only report external semaphore info for opaque fd. Until we support sync fd, don't report the info. Fixes CTS dEQP-VK.api.external.semaphore.sync_fd.* from crashing. Fixes: `eaa56eab6` (radv: initial support for shared semaphores (v2)) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `6cbc8cf178`)	2017-07-27 19:54:52 +01:00
Dave Airlie	de55bc8f49	radv: fix buffer views on SI/CIK. Fixes CTS dEQP-VK.memory.pipeline_barrier.host_write_uniform_texel_buffer.1024 on SI/CIK with radv. Fixes: `f4e499ec` (radv: add initial non-conformant radv vulkan driver) Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `ca82ef5ac7`)	2017-07-27 19:54:52 +01:00
Daniel Stone	f0b7563a36	st/dri2: Return invalid modifier when no driver support Always initialise whandle.modifier for DRIImage modifier queries, so if the driver doesn't support it then we return false for the query. Signed-off-by: Daniel Stone <daniels@collabora.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Fixes: `d33fe8b84e` ("st/dri: enable DRIimage modifier queries") (cherry picked from commit `45383d32d4`)	2017-07-27 19:54:52 +01:00
Daniel Stone	5bee196840	st/dri: Check get-handle return value in queryImage In the DRIImage queryImage hook, check if resource_get_handle() failed and return FALSE if so. Signed-off-by: Daniel Stone <daniels@collabora.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `b4a18f13ce`)	2017-07-27 19:54:52 +01:00
Daniel Stone	2a1792981c	egl/wayland: Ignore invalid modifiers If the underlying driver does not support modifiers, dmabuf will still advertise formats through the 'modifier' event, but send them with an invalid modifier. Ignore them if this is the case, rather than passing them through to the driver. Signed-off-by: Daniel Stone <daniels@collabora.com> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> Fixes: `02cc359372` ("egl/wayland: Use linux-dmabuf interface for buffers") (cherry picked from commit `dd072cf4b1`)	2017-07-27 19:54:52 +01:00
Tim Rowley	a2b7477603	swr/rast: non-regex knob fallback code for gcc < 4.9 gcc prior to 4.9 didn't implement <regex>, causing a startup crash in the swr knob parameter reading code. CC: <mesa-stable@lists.freedesktop.org> Reviewed-by: Bruce Cherniak <bruce.cherniak@intel.com> (cherry picked from commit `e21fc2c625`)	2017-07-27 19:54:52 +01:00
Dave Airlie	3e777d5cab	virgl: encode index buffer offset. Fixes arb_vertex_buffer_object-combined-vertex-index Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Dave Airlie <airlied@redhat.com> (cherry picked from commit `c4652a0a5b`)	2017-07-27 19:54:52 +01:00
Marek Olšák	9bb6aa5794	ac/surface: fix hybrid graphics where APU=GFX9, dGPU=older v2: don't do it for compressed textures (bpp = 0) Cc: 17.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1) Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (v1) (cherry picked from commit `5e81df0f10`)	2017-07-27 19:54:52 +01:00
Marek Olšák	90bbcb93b1	radeonsi: decrease the number of compiler threads Cc: 17.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (cherry picked from commit `ed2b3f5c81`)	2017-07-27 19:54:52 +01:00
Marek Olšák	529c440dd3	gallium/radeon: make S_FIXED function signed and move it to shared code This fixes a bug uncovered by: `2412c4c81e` util: Make CLAMP turn NaN into MIN. Cc: 17.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Roland Scheidegger <sroland@vmware.com> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Reviewed-by: Nicolai Hähnle <nicolai.haehnle@amd.com> (cherry picked from commit `433f6f7ac9`)	2017-07-27 19:54:52 +01:00
Charmaine Lee	94e0de90ee	st/mesa: create framebuffer iface hash table per st manager With commit `5124bf9823`, a framebuffer interface hash table is created in st_gl_api_create(), which is called in dri_init_screen_helper() for each screen. When the hash table is overwritten with multiple calls to st_gl_api_create(), it can cause race condition. This patch fixes the problem by creating a framebuffer interface hash table per state tracker manager. Fixes crash with steam. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=101876 Fixes: `5124bf9823` ("st/mesa: add destroy_drawable interface") Tested-by: Christoph Haag <haagch@frickel.club> Reviewed-by: Brian Paul <brianp@vmware.com> (cherry picked from commit `bbc29393d3`) Squashed with: st/mesa: fix unconditional return in st_framebuffer_iface_remove Noticed by James Legg @ Feral. Cc: 17.2 <mesa-stable@lists.freedesktop.org> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `914f11e75b`) Squashed with: st/mesa: Fix inversed test in st_api_destroy_drawable Fixes a drawable leak. Fixes: `bbc29393d3` ("st/mesa: create framebuffer iface hash table per st manager") Bugzilla: https://bugs.freedesktop.org/101930 Tested-by: Nick Sarnie <commendsarnex@gmail.com> Reviewed-by: Brian Paul <brianp@vmware.com> (cherry picked from commit `57132d126f`)	2017-07-27 19:54:24 +01:00
Grigori Goronzy	3180f0fa0d	egl: move KHR_no_error vs debug/robustness check further down We'll fail to flag an error if the context flags appear after the no-error attribute in the context attribute list. Delay the check to after attribute parsing to fix this. Fixes: `4909519a66` ("egl: Add EGL_KHR_create_context_no_error support") Cc: mesa-stable@lists.freedesktop.org [Emil Velikov: add fixes/stable tags, commit message polish] Reviewed-by: Emil Velikov <emil.velikov@collabora.com> (cherry picked from commit `39bf7756b9`)	2017-07-27 18:56:45 +01:00
Nicolai Hähnle	e7f14a8b52	radeonsi/gfx9: reduce max threads per block to 1024 on gfx9+ The number of supported waves per thread group has been reduced to 16 with gfx9. Trying to use 32 waves causes hangs, and barriers might not work correctly with > 16 waves. Cc: mesa-stable@lists.freedesktop.org Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `a0e6b9a2db`)	2017-07-27 18:56:45 +01:00
Nicolai Hähnle	7f5d9d7a6d	radeonsi: fix detection of DRAW_INDIRECT_MULTI on SI The firmware version numbers for SI were wrong. The new numbers are probably too conservative (we don't have a definitive answer by the firmware team), but DRAW_INDIRECT_MULTI has been confirmed to work with these versions on Tahiti (by Gustaw) and on Verde (by myself). While this is technically adding a feature, it's a feature we thought we had for a long time. The change is small enough and we're early enough in the 17.2 release cycle that it should still go in. Reported-by: Gustaw Smolarczyk <wielkiegie@gmail.com> Cc: 17.2 <mesa-stable@lists.freedesktop.org> Acked-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `65fbaab0b7`)	2017-07-27 18:56:45 +01:00
Iago Toral Quiroga	3d0960e761	anv: only expose up to 28 vertex attributes The EU limit of 128 GRFs should allow 32 vertex elements of 4 GRFs. However, the maximum allowed value of "Vertex URB Entry Read Length" in SIMD8 is 15. And 15 * 8 = 120 gives us a limit of 30 vertex elements. Because we also need to reserve a vertex buffer to upload VertexIndex/InstanceIndex and another to upload DrawID when needed, we can only expose 28. Cc: "17.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (cherry picked from commit `31f1863ace`)	2017-07-27 18:56:45 +01:00
Iago Toral Quiroga	bdbd8ab517	anv/cmd_buffer: fix off by one error in assertion Cc: "17.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (cherry picked from commit `a848e693ef`)	2017-07-27 18:56:44 +01:00
Kenneth Graunke	47bca2cfa7	i965: Fix = vs == in MCS aux usage assert. Caught by Coverity (CID 1415680). Cc: "17.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit `698636cc97`) Fixes: `0f9b609cf4` ("i965/blorp: Do prepare/finish manually")	2017-07-27 18:54:41 +01:00
Kenneth Graunke	04bb687f04	i965: Fix offset addition in get_isl_surf. Increase the value, not the pointer to the stack variable. Caught by Coverity (CID 1415574). Not shipped in a real release. Cc: "17.2" <mesa-stable@lists.freedesktop.org> Reviewed-by: Topi Pohjolainen <topi.pohjolainen@intel.com> (cherry picked from commit `f6e674fa51`) Fixes: `63a43f4161` ("i965: Refactor miptree to isl converter and adjustment")	2017-07-27 18:54:13 +01:00
Eric Anholt	4e0f29ed0b	broadcom/vc4: Prefer blit via rendering to the software fallback. I don't know how I managed to leave this here for so long. Found when working on a 1:1 overlapping blit extension for X11. Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `93fec49a75`)	2017-07-27 18:21:27 +01:00
Lionel Landwerlin	0ccb853cc0	i965: perf: flush batchbuffers at the beginning of queries As Chris commented, it makes more sense to have batch buffer flushes before the query. Usually applications like frame_retrace do a series of queries and in that case, with flushes at the end of the queries, we might still have the first query contained in 2 different batchs. More generally it would be quite usual to have the query contained in 2 batch buffers because we never now what's the fill rate of the current batch buffer. If we move the flushing at the beginning of the queries, it's pretty much guaranteed that queries will be contained in a single batch buffer (unless the amount of commands is huge, but then it's only fair to include reloading request times in the measurements). Fixes: `adafe4b733` ("i965: perf: minimize the chances to spread queries across batchbuffers") Reported-by: Chris Wilson <chris@chris-wilson.co.uk> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Cc: "17.2 17.1" <mesa-stable@lists.freedesktop.org> Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (cherry picked from commit `9f439ae120`)	2017-07-27 18:21:22 +01:00
Emil Velikov	a455f594bb	Update version to 17.2.0-rc1 Signed-off-by: Emil Velikov <emil.velikov@collabora.com>	2017-07-24 16:59:32 +01:00
Emil Velikov	a955622c1a	intel/blorp: ship blorp_genX_exec.h within the tarball Fixes: `c9cb37b2a6` ("intel/blorp: Add a partial resolve pass for MCS") Signed-off-by: Emil Velikov <emil.velikov@collabora.com> (cherry picked from commit `5d47dd9c2a`)	2017-07-24 16:59:32 +01:00