Update version to 19.1.0-rc5

Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>
freedreno: Fix GCC build error.
2019-06-05 16:23:45 +00:00 · 2019-06-05 09:00:53 +00:00 · 2019-06-05 08:29:08 +00:00 · 2019-06-05 08:27:14 +00:00 · 2019-06-04 15:06:46 +00:00 · 2019-06-04 15:06:46 +00:00
52 changed files with 283 additions and 256 deletions
--- a/2
+++ b/2
@@ -1 +1 @@
-19.1.0-rc3
+19.1.0-rc5
--- a/bin/.cherry-ignore
+++ b/bin/.cherry-ignore
@@ -1,2 +0,0 @@
-stable: this commit causes issues in several systems
-78e35df52aa2f7d770f929a0866a0faa89c261a9 radeonsi: update buffer descriptors in all contexts after buffer invalidation
--- a/src/amd/common/ac_llvm_build.c
+++ b/src/amd/common/ac_llvm_build.c
@@ -4016,7 +4016,7 @@ ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws)

 	/* ws->result_reduce is already the correct value */
 	if (ws->enable_inclusive)
-		ws->result_inclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->src, ws->op);
+		ws->result_inclusive = ac_build_alu_op(ctx, ws->result_inclusive, ws->src, ws->op);
 	if (ws->enable_exclusive)
 		ws->result_exclusive = ac_build_alu_op(ctx, ws->result_exclusive, ws->extra, ws->op);
 }
--- a/src/amd/common/ac_nir_to_llvm.c
+++ b/src/amd/common/ac_nir_to_llvm.c
@@ -38,6 +38,7 @@ struct ac_nir_context {
 	struct ac_shader_abi *abi;

 	gl_shader_stage stage;
+	shader_info *info;

 	LLVMValueRef *ssa_defs;

@@ -1395,6 +1396,22 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
 	}

 	args->attributes = AC_FUNC_ATTR_READNONE;
+	bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE &&
+			 ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
+	if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
+		/* Prevent texture instructions with implicit derivatives from being
+		 * sinked into branches. */
+		switch (instr->op) {
+		case nir_texop_tex:
+		case nir_texop_txb:
+		case nir_texop_lod:
+			args->attributes |= AC_FUNC_ATTR_CONVERGENT;
+			break;
+		default:
+			break;
+		}
+	}
+
 	return ac_build_image_opcode(&ctx->ac, args);
 }

@@ -4351,6 +4368,7 @@ void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
 	ctx.abi = abi;

 	ctx.stage = nir->info.stage;
+	ctx.info = &nir->info;

 	ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));

--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -133,17 +133,16 @@ libvulkan_radeon = shared_library(
  'vulkan_radeon',
  [libradv_files, radv_entrypoints, radv_extensions_c, amd_vk_format_table_c, sha1_h, xmlpool_options_h],
  include_directories : [
-    inc_common, inc_amd, inc_amd_common, inc_compiler, inc_util, inc_vulkan_util,
-    inc_vulkan_wsi,
+    inc_common, inc_amd, inc_amd_common, inc_compiler, inc_util, inc_vulkan_wsi,
  ],
  link_with : [
-    libamd_common, libamdgpu_addrlib, libvulkan_util, libvulkan_wsi,
+    libamd_common, libamdgpu_addrlib, libvulkan_wsi,
    libmesa_util, libxmlconfig
  ],
  dependencies : [
    dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
    dep_valgrind, radv_deps,
-    idep_nir,
+    idep_nir, idep_vulkan_util,
  ],
  c_args : [c_vis_args, no_override_init_args, radv_flags],
  cpp_args : [cpp_vis_args, radv_flags],
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -4825,7 +4825,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,

 	radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);

-	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 18);
+	MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 21);

 	/* Flags that only require a top-of-pipe event. */
 	VkPipelineStageFlags top_of_pipe_flags =
--- a/src/amd/vulkan/radv_meta_clear.c
+++ b/src/amd/vulkan/radv_meta_clear.c
@@ -650,6 +650,7 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
 	if (radv_image_has_htile(iview->image) &&
 	    iview->base_mip == 0 &&
 	    iview->base_layer == 0 &&
+	    iview->layer_count == iview->image->info.array_size &&
 	    radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
 	    radv_image_extent_compare(iview->image, &iview->extent))
 		return true;
--- a/src/compiler/nir/nir_lower_non_uniform_access.c
+++ b/src/compiler/nir/nir_lower_non_uniform_access.c
@@ -129,7 +129,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
   nir_builder b;
   nir_builder_init(&b, impl);

-   nir_foreach_block(block, impl) {
+   nir_foreach_block_safe(block, impl) {
      nir_foreach_instr_safe(instr, block) {
         switch (instr->type) {
         case nir_instr_type_tex: {
--- a/src/compiler/nir/nir_opt_dead_cf.c
+++ b/src/compiler/nir/nir_opt_dead_cf.c
@@ -216,7 +216,7 @@ node_is_dead(nir_cf_node *node)

      nir_foreach_instr(instr, block) {
         if (instr->type == nir_instr_type_call)
-            return true;
+            return false;

         /* Return instructions can cause us to skip over other side-effecting
          * instructions after the loop, so consider them to have side effects
--- a/src/compiler/nir/nir_opt_move_load_ubo.c
+++ b/src/compiler/nir/nir_opt_move_load_ubo.c
@@ -91,7 +91,7 @@ move_load_ubo(nir_block *block)
      }
   }

-   return false;
+   return progress;
 }

 bool
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -1044,6 +1044,7 @@ emit_intrinsic_barrier(struct ir3_context *ctx, nir_intrinsic_instr *intr)
 		barrier->cat7.g = true;
 		barrier->cat7.r = true;
 		barrier->cat7.w = true;
+		barrier->cat7.l = true;
 		barrier->barrier_class = IR3_BARRIER_IMAGE_W |
 				IR3_BARRIER_BUFFER_W;
 		barrier->barrier_conflict =
@@ -2376,6 +2377,7 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
 	so->inputs[n].compmask = (1 << (ncomp + frac)) - 1;
 	so->inputs_count = MAX2(so->inputs_count, n + 1);
 	so->inputs[n].interpolate = in->data.interpolation;
+	so->inputs[n].ncomp = ncomp;

 	if (ctx->so->type == MESA_SHADER_FRAGMENT) {

--- a/src/freedreno/vulkan/meson.build
+++ b/src/freedreno/vulkan/meson.build
@@ -90,12 +90,10 @@ libvulkan_freedreno = shared_library(
  include_directories : [
    inc_common,
    inc_compiler,
-    inc_vulkan_util,
    inc_vulkan_wsi,
    inc_freedreno,
  ],
  link_with : [
-    libvulkan_util,
    libvulkan_wsi,
    libmesa_util,
    libfreedreno_drm, # required by ir3_shader_get_variant, which we don't use
@@ -111,6 +109,7 @@ libvulkan_freedreno = shared_library(
    dep_valgrind,
    idep_nir,
    tu_deps,
+    idep_vulkan_util,
  ],
  c_args : [c_vis_args, no_override_init_args, tu_flags],
  link_args : [ld_args_bsymbolic, ld_args_gc_sections],
--- a/src/freedreno/vulkan/tu_device.c
+++ b/src/freedreno/vulkan/tu_device.c
@@ -897,7 +897,7 @@ static const VkQueueFamilyProperties tu_queue_family_properties = {
      VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
   .queueCount = 1,
   .timestampValidBits = 64,
-   .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
+   .minImageTransferGranularity = { 1, 1, 1 },
 };

 void
--- a/src/gallium/auxiliary/util/u_blitter.c
+++ b/src/gallium/auxiliary/util/u_blitter.c
@@ -2059,7 +2059,8 @@ void util_blitter_generate_mipmap(struct blitter_context *blitter,
      target = PIPE_TEXTURE_2D_ARRAY;

   assert(tex->nr_samples <= 1);
-   assert(!util_format_has_stencil(desc));
+   /* Disallow stencil formats without depth. */
+   assert(!util_format_has_stencil(desc) || util_format_has_depth(desc));

   is_depth = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS;

--- a/src/gallium/auxiliary/util/u_screen.c
+++ b/src/gallium/auxiliary/util/u_screen.c
@@ -359,7 +359,7 @@ u_pipe_screen_get_param_defaults(struct pipe_screen *pscreen,
      return 1;

   case PIPE_CAP_DMABUF:
-#ifdef PIPE_OS_LINUX
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD)
      return 1;
 #else
      return 0;
--- a/src/gallium/drivers/etnaviv/etnaviv_emit.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_emit.c
@@ -577,12 +577,12 @@ etna_emit_state(struct etna_context *ctx)
   static const uint32_t uniform_dirty_bits =
      ETNA_DIRTY_SHADER | ETNA_DIRTY_CONSTBUF;

-   if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
+   if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
      etna_uniforms_write(
         ctx, ctx->shader.vs, &ctx->constant_buffer[PIPE_SHADER_VERTEX],
         ctx->shader_state.VS_UNIFORMS, &ctx->shader_state.vs_uniforms_size);

-   if (dirty & (uniform_dirty_bits | ctx->shader.vs->uniforms_dirty_bits))
+   if (dirty & (uniform_dirty_bits | ctx->shader.fs->uniforms_dirty_bits))
      etna_uniforms_write(
         ctx, ctx->shader.fs, &ctx->constant_buffer[PIPE_SHADER_FRAGMENT],
         ctx->shader_state.PS_UNIFORMS, &ctx->shader_state.ps_uniforms_size);
--- a/src/gallium/drivers/etnaviv/etnaviv_resource.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_resource.c
@@ -622,6 +622,7 @@ etna_resource_get_handle(struct pipe_screen *pscreen,
      rsc = etna_resource(rsc->external);

   handle->stride = rsc->levels[0].stride;
+   handle->offset = rsc->levels[0].offset;
   handle->modifier = layout_to_modifier(rsc->layout);

   if (handle->type == WINSYS_HANDLE_TYPE_SHARED) {
--- a/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_gmem.c
@@ -640,6 +640,13 @@ set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring)
 	blit_scissor.maxx = MIN2(pfb->width, batch->max_scissor.maxx);
 	blit_scissor.maxy = MIN2(pfb->height, batch->max_scissor.maxy);

+	/* NOTE: blob switches to CP_BLIT instead of CP_EVENT_WRITE:BLIT for
+	 * small render targets.  But since we align pitch to binw I think
+	 * we can get away avoiding GPU hangs a simpler way, by just rounding
+	 * up the blit scissor:
+	 */
+	blit_scissor.maxx = MAX2(blit_scissor.maxx, batch->ctx->screen->gmem_alignw);
+
 	OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
 	OUT_RING(ring,
 			 A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) |
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -468,7 +468,8 @@ iris_setup_uniforms(const struct brw_compiler *compiler,
            if (load->src[0].ssa == temp_ubo_name) {
               nir_instr_rewrite_src(instr, &load->src[0],
                                     nir_src_for_ssa(nir_imm_int(&b, 0)));
-            } else if (nir_src_as_uint(load->src[0]) == 0) {
+            } else if (nir_src_is_const(load->src[0]) &&
+                       nir_src_as_uint(load->src[0]) == 0) {
               nir_ssa_def *offset =
                  nir_iadd(&b, load->src[1].ssa,
                           nir_imm_int(&b, 4 * num_system_values));
--- a/src/gallium/drivers/lima/lima_draw.c
+++ b/src/gallium/drivers/lima/lima_draw.c
@@ -1438,6 +1438,7 @@ lima_pack_wb_zsbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
 {
   struct lima_context_framebuffer *fb = &ctx->framebuffer;
   struct lima_resource *res = lima_resource(fb->base.zsbuf->texture);
+   int level = fb->base.zsbuf->u.tex.level;

   uint32_t format;

@@ -1455,14 +1456,14 @@ lima_pack_wb_zsbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)

   struct lima_pp_wb_reg *wb = (void *)wb_reg;
   wb[wb_idx].type = 0x01; /* 1 for depth, stencil */
-   wb[wb_idx].address = res->bo->va;
+   wb[wb_idx].address = res->bo->va + res->levels[level].offset;
   wb[wb_idx].pixel_format = format;
   if (res->tiled) {
      wb[wb_idx].pixel_layout = 0x2;
      wb[wb_idx].pitch = fb->tiled_w;
   } else {
      wb[wb_idx].pixel_layout = 0x0;
-      wb[wb_idx].pitch = res->levels[0].stride / 8;
+      wb[wb_idx].pitch = res->levels[level].stride / 8;
   }
   wb[wb_idx].mrt_bits = 0;
 }
@@ -1472,6 +1473,7 @@ lima_pack_wb_cbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)
 {
   struct lima_context_framebuffer *fb = &ctx->framebuffer;
   struct lima_resource *res = lima_resource(fb->base.cbufs[0]->texture);
+   int level = fb->base.cbufs[0]->u.tex.level;

   bool swap_channels = false;
   switch (fb->base.cbufs[0]->format) {
@@ -1485,14 +1487,14 @@ lima_pack_wb_cbuf_reg(struct lima_context *ctx, uint32_t *wb_reg, int wb_idx)

   struct lima_pp_wb_reg *wb = (void *)wb_reg;
   wb[wb_idx].type = 0x02; /* 2 for color buffer */
-   wb[wb_idx].address = res->bo->va;
+   wb[wb_idx].address = res->bo->va + res->levels[level].offset;
   wb[wb_idx].pixel_format = LIMA_PIXEL_FORMAT_B8G8R8A8;
   if (res->tiled) {
      wb[wb_idx].pixel_layout = 0x2;
      wb[wb_idx].pitch = fb->tiled_w;
   } else {
      wb[wb_idx].pixel_layout = 0x0;
-      wb[wb_idx].pitch = res->levels[0].stride / 8;
+      wb[wb_idx].pitch = res->levels[level].stride / 8;
   }
   wb[wb_idx].mrt_bits = swap_channels ? 0x4 : 0x0;
 }
--- a/src/gallium/drivers/lima/lima_texture.c
+++ b/src/gallium/drivers/lima/lima_texture.c
@@ -119,19 +119,17 @@ lima_texture_desc_set_res(struct lima_context *ctx, uint32_t *desc,

   uint32_t base_va = lima_res->bo->va;

-   /* attach level 0 */
-   desc[6] |= (base_va << 24) | (layout << 13);
-   desc[7] |= base_va >> 8;
+   /* attach first level */
+   uint32_t first_va = base_va + lima_res->levels[first_level].offset;
+   desc[6] |= (first_va << 24) | (layout << 13);
+   desc[7] |= first_va >> 8;

   /* Attach remaining levels.
    * Each subsequent mipmap address is specified using the 26 msbs.
    * These addresses are then packed continuously in memory */
   unsigned current_desc_index = 7;
   unsigned current_desc_bit_index = 24;
-   for (i = 1; i < LIMA_MAX_MIP_LEVELS; i++) {
-      if (first_level + i > last_level)
-         break;
-
+   for (i = first_level + 1; i <= last_level; i++) {
      uint32_t address = base_va + lima_res->levels[i].offset;
      address = (address >> 6);
      desc[current_desc_index] |= (address << current_desc_bit_index);
@@ -163,32 +161,21 @@ lima_update_tex_desc(struct lima_context *ctx, struct lima_sampler_state *sample
   /* 2D texture */
   desc[1] |= 0x400;

-   desc[1] &= ~0xff000000;
+   first_level = texture->base.u.tex.first_level;
+   last_level = texture->base.u.tex.last_level;
+   if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
+      last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
+
   switch (sampler->base.min_mip_filter) {
-      case PIPE_TEX_MIPFILTER_NEAREST:
-         first_level = texture->base.u.tex.first_level;
-         last_level = texture->base.u.tex.last_level;
-         if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
-            last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
-         mipmapping = true;
-         desc[1] |= ((last_level - first_level) << 24);
-         desc[2] &= ~0x0600;
-         break;
      case PIPE_TEX_MIPFILTER_LINEAR:
-         first_level = texture->base.u.tex.first_level;
-         last_level = texture->base.u.tex.last_level;
-         if (last_level - first_level >= LIMA_MAX_MIP_LEVELS)
-            last_level = first_level + LIMA_MAX_MIP_LEVELS - 1;
+         desc[2] |= 0x0600;
+      case PIPE_TEX_MIPFILTER_NEAREST:
         mipmapping = true;
         desc[1] |= ((last_level - first_level) << 24);
-         desc[2] |= 0x0600;
         break;
      case PIPE_TEX_MIPFILTER_NONE:
      default:
-         first_level = 0;
-         last_level = 0;
         mipmapping = false;
-         desc[2] &= ~0x0600;
         break;
   }

--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1587,7 +1587,9 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx)

 /* BUFFER DISCARD/INVALIDATION */

-/** Reset descriptors of buffer resources after \p buf has been invalidated. */
+/* Reset descriptors of buffer resources after \p buf has been invalidated.
+ * If buf == NULL, reset all descriptors.
+ */
 static void si_reset_buffer_resources(struct si_context *sctx,
 				      struct si_buffer_resources *buffers,
 				      unsigned descriptors_idx,
@@ -1600,13 +1602,15 @@ static void si_reset_buffer_resources(struct si_context *sctx,

 	while (mask) {
 		unsigned i = u_bit_scan(&mask);
-		if (buffers->buffers[i] == buf) {
-			si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
+		struct pipe_resource *buffer = buffers->buffers[i];
+
+		if (buffer && (!buf || buffer == buf)) {
+			si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
 						descs->list + i*4);
 			sctx->descriptors_dirty |= 1u << descriptors_idx;

 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
-								si_resource(buf),
+								si_resource(buffer),
 								buffers->writable_mask & (1u << i) ?
 									RADEON_USAGE_READWRITE :
 									RADEON_USAGE_READ,
@@ -1615,9 +1619,12 @@ static void si_reset_buffer_resources(struct si_context *sctx,
 	}
 }

-/* Update all resource bindings where the buffer is bound, including
+/* Update all buffer bindings where the buffer is bound, including
 * all resource descriptors. This is invalidate_buffer without
- * the invalidation. */
+ * the invalidation.
+ *
+ * If buf == NULL, update all buffer bindings.
+ */
 void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 {
 	struct si_resource *buffer = si_resource(buf);
@@ -1632,7 +1639,10 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 	 */

 	/* Vertex buffers. */
-	if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
+	if (!buffer) {
+		if (num_elems)
+			sctx->vertex_buffers_dirty = true;
+	} else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
 		for (i = 0; i < num_elems; i++) {
 			int vb = sctx->vertex_elements->vertex_buffer_index[i];

@@ -1649,21 +1659,23 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 	}

 	/* Streamout buffers. (other internal buffers can't be invalidated) */
-	if (buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
 		for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
 			struct si_buffer_resources *buffers = &sctx->rw_buffers;
 			struct si_descriptors *descs =
 				&sctx->descriptors[SI_DESCS_RW_BUFFERS];
+			struct pipe_resource *buffer = buffers->buffers[i];

-			if (buffers->buffers[i] != buf)
+			if (!buffer || (buf && buffer != buf))
 				continue;

-			si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
+			si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i],
 						descs->list + i*4);
 			sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;

 			radeon_add_to_gfx_buffer_list_check_mem(sctx,
-								buffer, RADEON_USAGE_WRITE,
+								si_resource(buffer),
+								RADEON_USAGE_WRITE,
 								RADEON_PRIO_SHADER_RW_BUFFER,
 								true);

@@ -1677,7 +1689,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 	}

 	/* Constant and shader buffers. */
-	if (buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
@@ -1686,7 +1698,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 						  sctx->const_and_shader_buffers[shader].priority_constbuf);
 	}

-	if (buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++)
 			si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
 						  si_const_and_shader_buffer_descriptors_idx(shader),
@@ -1695,7 +1707,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 						  sctx->const_and_shader_buffers[shader].priority);
 	}

-	if (buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
 		/* Texture buffers - update bindings. */
 		for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
 			struct si_samplers *samplers = &sctx->samplers[shader];
@@ -1705,26 +1717,29 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)

 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
-				if (samplers->views[i]->texture == buf) {
+				struct pipe_resource *buffer = samplers->views[i]->texture;
+
+				if (buffer && buffer->target == PIPE_BUFFER &&
+				    (!buf || buffer == buf)) {
 					unsigned desc_slot = si_get_sampler_slot(i);

-					si_set_buf_desc_address(si_resource(buf),
+					si_set_buf_desc_address(si_resource(buffer),
 								samplers->views[i]->u.buf.offset,
 								descs->list + desc_slot * 16 + 4);
 					sctx->descriptors_dirty |=
 						1u << si_sampler_and_image_descriptors_idx(shader);

-					radeon_add_to_gfx_buffer_list_check_mem(sctx,
-									    buffer, RADEON_USAGE_READ,
-									    RADEON_PRIO_SAMPLER_BUFFER,
-									    true);
+					radeon_add_to_gfx_buffer_list_check_mem(
+						sctx, si_resource(buffer),
+						RADEON_USAGE_READ,
+						RADEON_PRIO_SAMPLER_BUFFER, true);
 				}
 			}
 		}
 	}

 	/* Shader images */
-	if (buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
+	if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
 		for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
 			struct si_images *images = &sctx->images[shader];
 			struct si_descriptors *descs =
@@ -1733,21 +1748,23 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)

 			while (mask) {
 				unsigned i = u_bit_scan(&mask);
+				struct pipe_resource *buffer = images->views[i].resource;

-				if (images->views[i].resource == buf) {
+				if (buffer && buffer->target == PIPE_BUFFER &&
+				    (!buf || buffer == buf)) {
 					unsigned desc_slot = si_get_image_slot(i);

 					if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
 						si_mark_image_range_valid(&images->views[i]);

-					si_set_buf_desc_address(si_resource(buf),
+					si_set_buf_desc_address(si_resource(buffer),
 								images->views[i].u.buf.offset,
 								descs->list + desc_slot * 8 + 4);
 					sctx->descriptors_dirty |=
 						1u << si_sampler_and_image_descriptors_idx(shader);

 					radeon_add_to_gfx_buffer_list_check_mem(
-						sctx, buffer,
+						sctx, si_resource(buffer),
 						RADEON_USAGE_READWRITE,
 						RADEON_PRIO_SAMPLER_BUFFER, true);
 				}
@@ -1756,16 +1773,18 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 	}

 	/* Bindless texture handles */
-	if (buffer->texture_handle_allocated) {
+	if (!buffer || buffer->texture_handle_allocated) {
 		struct si_descriptors *descs = &sctx->bindless_descriptors;

 		util_dynarray_foreach(&sctx->resident_tex_handles,
 				      struct si_texture_handle *, tex_handle) {
 			struct pipe_sampler_view *view = (*tex_handle)->view;
 			unsigned desc_slot = (*tex_handle)->desc_slot;
+			struct pipe_resource *buffer = view->texture;

-			if (view->texture == buf) {
-				si_set_buf_desc_address(buffer,
+			if (buffer && buffer->target == PIPE_BUFFER &&
+			    (!buf || buffer == buf)) {
+				si_set_buf_desc_address(si_resource(buffer),
 							view->u.buf.offset,
 							descs->list +
 							desc_slot * 16 + 4);
@@ -1774,7 +1793,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 				sctx->bindless_descriptors_dirty = true;

 				radeon_add_to_gfx_buffer_list_check_mem(
-					sctx, buffer,
+					sctx, si_resource(buffer),
 					RADEON_USAGE_READ,
 					RADEON_PRIO_SAMPLER_BUFFER, true);
 			}
@@ -1782,19 +1801,21 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 	}

 	/* Bindless image handles */
-	if (buffer->image_handle_allocated) {
+	if (!buffer || buffer->image_handle_allocated) {
 		struct si_descriptors *descs = &sctx->bindless_descriptors;

 		util_dynarray_foreach(&sctx->resident_img_handles,
 				      struct si_image_handle *, img_handle) {
 			struct pipe_image_view *view = &(*img_handle)->view;
 			unsigned desc_slot = (*img_handle)->desc_slot;
+			struct pipe_resource *buffer = view->resource;

-			if (view->resource == buf) {
+			if (buffer && buffer->target == PIPE_BUFFER &&
+			    (!buf || buffer == buf)) {
 				if (view->access & PIPE_IMAGE_ACCESS_WRITE)
 					si_mark_image_range_valid(view);

-				si_set_buf_desc_address(buffer,
+				si_set_buf_desc_address(si_resource(buffer),
 							view->u.buf.offset,
 							descs->list +
 							desc_slot * 16 + 4);
@@ -1803,12 +1824,25 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
 				sctx->bindless_descriptors_dirty = true;

 				radeon_add_to_gfx_buffer_list_check_mem(
-					sctx, buffer,
+					sctx, si_resource(buffer),
 					RADEON_USAGE_READWRITE,
 					RADEON_PRIO_SAMPLER_BUFFER, true);
 			}
 		}
 	}
+
+	if (buffer) {
+		/* Do the same for other contexts. They will invoke this function
+		 * with buffer == NULL.
+		 */
+		unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter);
+
+		/* Skip the update for the current context, because we have already updated
+		 * the buffer bindings.
+		 */
+		if (new_counter == sctx->last_dirty_buf_counter + 1)
+			sctx->last_dirty_buf_counter = new_counter;
+	}
 }

 static void si_upload_bindless_descriptor(struct si_context *sctx,
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -514,9 +514,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	si_init_fence_functions(sctx);
 	si_init_state_compute_functions(sctx);

-	if (sscreen->debug_flags & DBG(FORCE_DMA))
-		sctx->b.resource_copy_region = sctx->dma_copy;
-
 	/* Initialize graphics-only context functions. */
 	if (sctx->has_graphics) {
 		si_init_context_texture_functions(sctx);
@@ -541,6 +538,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
 	else
 		si_init_dma_functions(sctx);

+	if (sscreen->debug_flags & DBG(FORCE_DMA))
+		sctx->b.resource_copy_region = sctx->dma_copy;
+
 	sctx->sample_mask = 0xffff;

 	/* Initialize multimedia functions. */
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -526,6 +526,7 @@ struct si_screen {
 	 * the counter before drawing and re-emit the states accordingly.
 	 */
 	unsigned			dirty_tex_counter;
+	unsigned			dirty_buf_counter;

 	/* Atomically increment this counter when an existing texture's
 	 * metadata is enabled or disabled in a way that requires changing
@@ -852,6 +853,7 @@ struct si_context {
 	unsigned			initial_gfx_cs_size;
 	unsigned			gpu_reset_counter;
 	unsigned			last_dirty_tex_counter;
+	unsigned			last_dirty_buf_counter;
 	unsigned			last_compressed_colortex_counter;
 	unsigned			last_num_draw_calls;
 	unsigned			flags; /* flush flags */
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1254,7 +1254,7 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
 	struct pipe_resource *indexbuf = info->index.resource;
-	unsigned dirty_tex_counter;
+	unsigned dirty_tex_counter, dirty_buf_counter;
 	enum pipe_prim_type rast_prim;
 	unsigned index_size = info->index_size;
 	unsigned index_offset = info->indirect ? info->start * index_size : 0;
@@ -1292,6 +1292,13 @@ static void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *i
 		si_update_all_texture_descriptors(sctx);
 	}

+	dirty_buf_counter = p_atomic_read(&sctx->screen->dirty_buf_counter);
+	if (unlikely(dirty_buf_counter != sctx->last_dirty_buf_counter)) {
+		sctx->last_dirty_buf_counter = dirty_buf_counter;
+		/* Rebind all buffers unconditionally. */
+		si_rebind_buffer(sctx, NULL);
+	}
+
 	si_decompress_textures(sctx, u_bit_consecutive(0, SI_NUM_GRAPHICS_SHADERS));

 	/* Set the rasterization primitive type.
--- a/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
+++ b/src/gallium/drivers/swr/rasterizer/common/simdlib.hpp
@@ -565,68 +565,3 @@ using Vec4 = typename SIMD_T::Vec4;
 template <typename SIMD_T>
 using Mask = typename SIMD_T::Mask;

-template <typename SIMD_T>
-struct SIMDVecEqual
-{
-    INLINE bool operator()(Integer<SIMD_T> a, Integer<SIMD_T> b) const
-    {
-        Integer<SIMD_T> c = SIMD_T::xor_si(a, b);
-        return SIMD_T::testz_si(c, c);
-    }
-
-    INLINE bool operator()(Float<SIMD_T> a, Float<SIMD_T> b) const
-    {
-        return this->operator()(SIMD_T::castps_si(a), SIMD_T::castps_si(b));
-    }
-
-    INLINE bool operator()(Double<SIMD_T> a, Double<SIMD_T> b) const
-    {
-        return this->operator()(SIMD_T::castpd_si(a), SIMD_T::castpd_si(b));
-    }
-};
-
-template <typename SIMD_T>
-struct SIMDVecHash
-{
-    INLINE uint32_t operator()(Integer<SIMD_T> val) const
-    {
-#if defined(_WIN64) || !defined(_WIN32) // assume non-Windows is always 64-bit
-        static_assert(sizeof(void*) == 8, "This path only meant for 64-bit code");
-
-        uint64_t              crc32          = 0;
-        const uint64_t*       pData          = reinterpret_cast<const uint64_t*>(&val);
-        static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
-        static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
-
-        for (uint32_t i = 0; i < loopIterations; ++i)
-        {
-            crc32 = _mm_crc32_u64(crc32, pData[i]);
-        }
-
-        return static_cast<uint32_t>(crc32);
-#else
-        static_assert(sizeof(void*) == 4, "This path only meant for 32-bit code");
-
-        uint32_t crc32 = 0;
-        const uint32_t* pData = reinterpret_cast<const uint32_t*>(&val);
-        static const uint32_t loopIterations = sizeof(val) / sizeof(void*);
-        static_assert(loopIterations * sizeof(void*) == sizeof(val), "bad vector size");
-
-        for (uint32_t i = 0; i < loopIterations; ++i)
-        {
-            crc32 = _mm_crc32_u32(crc32, pData[i]);
-        }
-
-        return crc32;
-#endif
-    };
-
-    INLINE uint32_t operator()(Float<SIMD_T> val) const
-    {
-        return operator()(SIMD_T::castps_si(val));
-    };
-    INLINE uint32_t operator()(Double<SIMD_T> val) const
-    {
-        return operator()(SIMD_T::castpd_si(val));
-    }
-};
--- a/src/gallium/targets/d3dadapter9/drm.c
+++ b/src/gallium/targets/d3dadapter9/drm.c
@@ -243,8 +243,10 @@ drm_create_adapter( int fd,
        return D3DERR_DRIVERINTERNALERROR;
    }

-    ctx->base.throttling_value =
-       ctx->base.hal->get_param(ctx->base.hal, PIPE_CAP_MAX_FRAMES_IN_FLIGHT);
+    /* Previously was set to PIPE_CAP_MAX_FRAMES_IN_FLIGHT,
+     * but the change of value of this cap to 1 seems to cause
+     * regressions. */
+    ctx->base.throttling_value = 2;
    ctx->base.throttling = ctx->base.throttling_value > 0;

    driParseOptionInfo(&defaultInitOptions, __driConfigOptionsNine);
--- a/src/intel/Android.vulkan.mk
+++ b/src/intel/Android.vulkan.mk
@@ -71,6 +71,7 @@ LOCAL_C_INCLUDES := \

 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_entrypoints.h
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/dummy.c
+LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h

 $(intermediates)/vulkan/dummy.c:
 	@mkdir -p $(dir $@)
@@ -85,6 +86,14 @@ $(intermediates)/vulkan/anv_entrypoints.h: $(intermediates)/vulkan/dummy.c \
 		--outdir $(dir $@) \
 		--xml $(VULKAN_API_XML)

+$(intermediates)/vulkan/anv_extensions.h: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
+					  $(ANV_EXTENSIONS_SCRIPT) \
+					  $(VULKAN_API_XML)
+	@mkdir -p $(dir $@)
+	$(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
+		--xml $(VULKAN_API_XML) \
+		--out-h $@
+
 LOCAL_EXPORT_C_INCLUDE_DIRS := \
        $(intermediates)

@@ -261,7 +270,6 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \

 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_entrypoints.c
 LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.c
-LOCAL_GENERATED_SOURCES += $(intermediates)/vulkan/anv_extensions.h

 $(intermediates)/vulkan/anv_entrypoints.c: $(ANV_ENTRYPOINTS_GEN_SCRIPT) \
 					   $(ANV_EXTENSIONS_SCRIPT) \
@@ -279,14 +287,6 @@ $(intermediates)/vulkan/anv_extensions.c: $(ANV_EXTENSIONS_GEN_SCRIPT) \
 		--xml $(VULKAN_API_XML) \
 		--out-c $@

-$(intermediates)/vulkan/anv_extensions.h: $(ANV_EXTENSIONS_GEN_SCRIPT) \
-					   $(ANV_EXTENSIONS_SCRIPT) \
-					   $(VULKAN_API_XML)
-	@mkdir -p $(dir $@)
-	$(MESA_PYTHON2) $(ANV_EXTENSIONS_GEN_SCRIPT) \
-		--xml $(VULKAN_API_XML) \
-		--out-h $@
-
 LOCAL_SHARED_LIBRARIES := $(ANV_SHARED_LIBRARIES)
 LOCAL_HEADER_LIBRARIES += $(VULKAN_COMMON_HEADER_LIBRARIES)

--- a/src/intel/compiler/brw_eu.h
+++ b/src/intel/compiler/brw_eu.h
@@ -1113,7 +1113,9 @@ brw_untyped_surface_write(struct brw_codegen *p,
 void
 brw_memory_fence(struct brw_codegen *p,
                 struct brw_reg dst,
-                 enum opcode send_op);
+                 struct brw_reg src,
+                 enum opcode send_op,
+                 bool stall);

 void
 brw_pixel_interpolator_query(struct brw_codegen *p,
--- a/src/intel/compiler/brw_eu_emit.c
+++ b/src/intel/compiler/brw_eu_emit.c
@@ -707,9 +707,9 @@ brw_alu3(struct brw_codegen *p, unsigned opcode, struct brw_reg dest,
   gen7_convert_mrf_to_grf(p, &dest);

   assert(dest.nr < 128);
-   assert(src0.file != BRW_IMMEDIATE_VALUE || src0.nr < 128);
-   assert(src1.file != BRW_IMMEDIATE_VALUE || src1.nr < 128);
-   assert(src2.file != BRW_IMMEDIATE_VALUE || src2.nr < 128);
+   assert(src0.file == BRW_IMMEDIATE_VALUE || src0.nr < 128);
+   assert(src1.file != BRW_IMMEDIATE_VALUE && src1.nr < 128);
+   assert(src2.file == BRW_IMMEDIATE_VALUE || src2.nr < 128);
   assert(dest.address_mode == BRW_ADDRESS_DIRECT);
   assert(src0.address_mode == BRW_ADDRESS_DIRECT);
   assert(src1.address_mode == BRW_ADDRESS_DIRECT);
@@ -3037,10 +3037,12 @@ brw_set_memory_fence_message(struct brw_codegen *p,
 void
 brw_memory_fence(struct brw_codegen *p,
                 struct brw_reg dst,
-                 enum opcode send_op)
+                 struct brw_reg src,
+                 enum opcode send_op,
+                 bool stall)
 {
   const struct gen_device_info *devinfo = p->devinfo;
-   const bool commit_enable =
+   const bool commit_enable = stall ||
      devinfo->gen >= 10 || /* HSD ES # 1404612949 */
      (devinfo->gen == 7 && !devinfo->is_haswell);
   struct brw_inst *insn;
@@ -3048,15 +3050,15 @@ brw_memory_fence(struct brw_codegen *p,
   brw_push_insn_state(p);
   brw_set_default_mask_control(p, BRW_MASK_DISABLE);
   brw_set_default_exec_size(p, BRW_EXECUTE_1);
-   dst = vec1(dst);
+   dst = retype(vec1(dst), BRW_REGISTER_TYPE_UW);
+   src = retype(vec1(src), BRW_REGISTER_TYPE_UD);

   /* Set dst as destination for dependency tracking, the MEMORY_FENCE
    * message doesn't write anything back.
    */
   insn = next_insn(p, send_op);
-   dst = retype(dst, BRW_REGISTER_TYPE_UW);
   brw_set_dest(p, insn, dst);
-   brw_set_src0(p, insn, dst);
+   brw_set_src0(p, insn, src);
   brw_set_memory_fence_message(p, insn, GEN7_SFID_DATAPORT_DATA_CACHE,
                                commit_enable);

@@ -3067,7 +3069,7 @@ brw_memory_fence(struct brw_codegen *p,
       */
      insn = next_insn(p, send_op);
      brw_set_dest(p, insn, offset(dst, 1));
-      brw_set_src0(p, insn, offset(dst, 1));
+      brw_set_src0(p, insn, src);
      brw_set_memory_fence_message(p, insn, GEN6_SFID_DATAPORT_RENDER_CACHE,
                                   commit_enable);

@@ -3079,6 +3081,9 @@ brw_memory_fence(struct brw_codegen *p,
      brw_MOV(p, dst, offset(dst, 1));
   }

+   if (stall)
+      brw_MOV(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW), dst);
+
   brw_pop_insn_state(p);
 }

--- a/src/intel/compiler/brw_fs_generator.cpp
+++ b/src/intel/compiler/brw_fs_generator.cpp
@@ -2070,13 +2070,14 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
         break;

      case SHADER_OPCODE_MEMORY_FENCE:
-         brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+         assert(src[1].file == BRW_IMMEDIATE_VALUE);
+         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, src[1].ud);
         break;

      case SHADER_OPCODE_INTERLOCK:
         assert(devinfo->gen >= 9);
         /* The interlock is basically a memory fence issued via sendc */
-         brw_memory_fence(p, dst, BRW_OPCODE_SENDC);
+         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SENDC, false);
         break;

      case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4169,7 +4169,8 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
   case nir_intrinsic_memory_barrier: {
      const fs_builder ubld = bld.group(8, 0);
      const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
-      ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+      ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
+                brw_vec8_grf(0, 0), brw_imm_ud(0))
         ->size_written = 2 * REG_SIZE;
      break;
   }
@@ -4970,14 +4971,26 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
      const fs_builder ubld = bld.group(8, 0);
      const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);

-      ubld.emit(SHADER_OPCODE_INTERLOCK, tmp)->size_written = 2 *
-         REG_SIZE;
-
+      ubld.emit(SHADER_OPCODE_INTERLOCK, tmp, brw_vec8_grf(0, 0))
+         ->size_written = 2 * REG_SIZE;
      break;
   }

   case nir_intrinsic_end_invocation_interlock: {
-      /* We don't need to do anything here */
+      /* For endInvocationInterlock(), we need to insert a memory fence which
+       * stalls in the shader until the memory transactions prior to that
+       * fence are complete.  This ensures that the shader does not end before
+       * any writes from its critical section have landed.  Otherwise, you can
+       * end up with a case where the next invocation on that pixel properly
+       * stalls for previous FS invocation on its pixel to complete but
+       * doesn't actually wait for the dataport memory transactions from that
+       * thread to land before submitting its own.
+       */
+      const fs_builder ubld = bld.group(8, 0);
+      const fs_reg tmp = ubld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+      ubld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp,
+                brw_vec8_grf(0, 0), brw_imm_ud(1))
+         ->size_written = 2 * REG_SIZE;
      break;
   }

--- a/src/intel/compiler/brw_vec4_generator.cpp
+++ b/src/intel/compiler/brw_vec4_generator.cpp
@@ -1883,7 +1883,7 @@ generate_code(struct brw_codegen *p,
         break;

      case SHADER_OPCODE_MEMORY_FENCE:
-         brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+         brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false);
         break;

      case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
--- a/src/intel/compiler/brw_vec4_nir.cpp
+++ b/src/intel/compiler/brw_vec4_nir.cpp
@@ -760,7 +760,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
      const vec4_builder bld =
         vec4_builder(this).at_end().annotate(current_annotation, base_ir);
      const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
-      bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+      bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp, brw_vec8_grf(0, 0))
         ->size_written = 2 * REG_SIZE;
      break;
   }
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -103,7 +103,11 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
        type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
      data |= ANV_DESCRIPTOR_ADDRESS_RANGE;

-   /* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader */
+   /* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader
+    * Do not handle VK_DESCRIPTOR_TYPE_STORAGE_IMAGE and
+    * VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT because they already must
+    * have identity swizzle.
+    */
   if (device->info.gen == 7 && !device->info.is_haswell &&
       (type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
        type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
--- a/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
+++ b/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
@@ -857,8 +857,21 @@ lower_tex_deref(nir_tex_instr *tex, nir_tex_src_type deref_src_type,
         assert(deref->deref_type == nir_deref_type_array);

         if (nir_src_is_const(deref->arr.index)) {
-            unsigned arr_index = nir_src_as_uint(deref->arr.index);
-            *base_index += MIN2(arr_index, array_size - 1);
+            unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
+            struct anv_sampler **immutable_samplers =
+               state->layout->set[set].layout->binding[binding].immutable_samplers;
+            if (immutable_samplers) {
+               /* Array of YCbCr samplers are tightly packed in the binding
+                * tables, compute the offset of an element in the array by
+                * adding the number of planes of all preceding elements.
+                */
+               unsigned desc_arr_index = 0;
+               for (int i = 0; i < arr_index; i++)
+                  desc_arr_index += immutable_samplers[i]->n_planes;
+               *base_index += desc_arr_index;
+            } else {
+               *base_index += arr_index;
+            }
         } else {
            /* From VK_KHR_sampler_ycbcr_conversion:
             *
@@ -929,13 +942,15 @@ lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane,
   assert(deref_src_idx >= 0);

   nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
-   UNUSED nir_variable *var = nir_deref_instr_get_variable(deref);
+   nir_variable *var = nir_deref_instr_get_variable(deref);

-   UNUSED unsigned set = var->data.descriptor_set;
-   UNUSED unsigned binding = var->data.binding;
-   UNUSED const struct anv_descriptor_set_binding_layout *bind_layout =
+   unsigned set = var->data.descriptor_set;
+   unsigned binding = var->data.binding;
+   const struct anv_descriptor_set_binding_layout *bind_layout =
      &state->layout->set[set].layout->binding[binding];
-   assert(bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE);
+
+   if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
+      return;

   nir_builder *b = &state->builder;
   b->cursor = nir_before_instr(&tex->instr);
--- a/src/intel/vulkan/meson.build
+++ b/src/intel/vulkan/meson.build
@@ -98,14 +98,15 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', ['gen7_cmd_buffer.c']],
    'anv_gen@0@'.format(_gen),
    [anv_gen_files, g[1], anv_entrypoints[0], anv_extensions_h],
    include_directories : [
-      inc_common, inc_compiler, inc_include, inc_intel, inc_vulkan_util,
-      inc_vulkan_wsi,
+      inc_common, inc_compiler, inc_include, inc_intel, inc_vulkan_wsi,
    ],
    c_args : [
      c_vis_args, no_override_init_args, c_sse2_args,
      '-DGEN_VERSIONx10=@0@'.format(_gen),
    ],
-    dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml],
+    dependencies : [
+      dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml, idep_vulkan_util_headers,
+    ],
  )
 endforeach

@@ -144,6 +145,7 @@ anv_deps = [
  dep_libdrm,
  dep_valgrind,
  idep_nir_headers,
+  idep_vulkan_util_headers,
 ]
 anv_flags = [
  c_vis_args,
@@ -183,7 +185,7 @@ libanv_common = static_library(
    gen_xml_pack,
  ],
  include_directories : [
-    inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util,
+    inc_common, inc_intel, inc_compiler, inc_include,
    inc_vulkan_wsi,
  ],
  c_args : anv_flags,
@@ -194,16 +196,15 @@ libvulkan_intel = shared_library(
  'vulkan_intel',
  [files('anv_gem.c'), anv_entrypoints[0], anv_extensions_h],
  include_directories : [
-    inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util,
-    inc_vulkan_wsi,
+    inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_wsi,
  ],
  link_whole : [libanv_common, libanv_gen_libs],
  link_with : [
    libintel_compiler, libintel_common, libintel_dev, libisl, libblorp,
-    libvulkan_util, libvulkan_wsi, libmesa_util,
+    libvulkan_wsi, libmesa_util,
  ],
  dependencies : [
-    dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_genxml,
+    dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_genxml, idep_vulkan_util
  ],
  c_args : anv_flags,
  link_args : ['-Wl,--build-id=sha1', ld_args_bsymbolic, ld_args_gc_sections],
@@ -215,16 +216,15 @@ if with_tests
    'vulkan_intel_test',
    [files('anv_gem_stubs.c'), anv_entrypoints[0], anv_extensions_h],
    include_directories : [
-      inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_util,
-      inc_vulkan_wsi,
+      inc_common, inc_intel, inc_compiler, inc_include, inc_vulkan_wsi,
    ],
    link_whole : libanv_common,
    link_with : [
      libanv_gen_libs, libintel_compiler, libintel_common, libintel_dev,
-      libisl, libblorp, libvulkan_util, libvulkan_wsi, libmesa_util,
+      libisl, libblorp, libvulkan_wsi, libmesa_util,
    ],
    dependencies : [
-      dep_thread, dep_dl, dep_m, anv_deps, idep_nir,
+      dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_vulkan_util
    ],
    c_args : anv_flags,
  )
@@ -239,9 +239,9 @@ if with_tests
        ['tests/@0@.c'.format(t), anv_entrypoints[0], anv_extensions_h],
        c_args : [ c_sse2_args ],
        link_with : libvulkan_intel_test,
-        dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind],
+        dependencies : [dep_libdrm, dep_thread, dep_m, dep_valgrind, idep_vulkan_util, ],
        include_directories : [
-          inc_common, inc_intel, inc_compiler, inc_vulkan_util, inc_vulkan_wsi,
+          inc_common, inc_intel, inc_compiler, inc_vulkan_wsi,
        ],
      ),
      suite : ['intel'],
--- a/src/mesa/main/context.c
+++ b/src/mesa/main/context.c
@@ -1767,10 +1767,6 @@ _mesa_make_current( struct gl_context *newCtx,

         check_init_viewport(newCtx, drawBuffer->Width, drawBuffer->Height);
      }
-      else {
-         _mesa_reference_framebuffer(&newCtx->WinSysDrawBuffer, NULL);
-         _mesa_reference_framebuffer(&newCtx->WinSysReadBuffer, NULL);
-      }

      if (newCtx->FirstTimeCurrent) {
         handle_first_current(newCtx);
--- a/src/mesa/program/ir_to_mesa.cpp
+++ b/src/mesa/program/ir_to_mesa.cpp
@@ -2506,8 +2506,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx,
 void
 _mesa_associate_uniform_storage(struct gl_context *ctx,
                                struct gl_shader_program *shader_program,
-                                struct gl_program *prog,
-                                bool propagate_to_storage)
+                                struct gl_program *prog)
 {
   struct gl_program_parameter_list *params = prog->Parameters;
   gl_shader_stage shader_type = prog->info.stage;
@@ -2633,26 +2632,24 @@ _mesa_associate_uniform_storage(struct gl_context *ctx,
          * data from the linker's backing store.  This will cause values from
          * initializers in the source code to be copied over.
          */
-         if (propagate_to_storage) {
-            unsigned array_elements = MAX2(1, storage->array_elements);
-            if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm &&
-                (storage->is_bindless || !storage->type->contains_opaque())) {
-               const int dmul = storage->type->is_64bit() ? 2 : 1;
-               const unsigned components =
-                  storage->type->vector_elements *
-                  storage->type->matrix_columns;
+         unsigned array_elements = MAX2(1, storage->array_elements);
+         if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm &&
+             (storage->is_bindless || !storage->type->contains_opaque())) {
+            const int dmul = storage->type->is_64bit() ? 2 : 1;
+            const unsigned components =
+               storage->type->vector_elements *
+               storage->type->matrix_columns;

-               for (unsigned s = 0; s < storage->num_driver_storage; s++) {
-                  gl_constant_value *uni_storage = (gl_constant_value *)
-                     storage->driver_storage[s].data;
-                  memcpy(uni_storage, storage->storage,
-                         sizeof(storage->storage[0]) * components *
-                         array_elements * dmul);
-               }
-            } else {
-               _mesa_propagate_uniforms_to_driver_storage(storage, 0,
-                                                          array_elements);
+            for (unsigned s = 0; s < storage->num_driver_storage; s++) {
+               gl_constant_value *uni_storage = (gl_constant_value *)
+                  storage->driver_storage[s].data;
+               memcpy(uni_storage, storage->storage,
+                      sizeof(storage->storage[0]) * components *
+                      array_elements * dmul);
            }
+         } else {
+            _mesa_propagate_uniforms_to_driver_storage(storage, 0,
+                                                       array_elements);
         }

 	      last_location = location;
@@ -3011,7 +3008,7 @@ get_mesa_program(struct gl_context *ctx,
    * prog->ParameterValues to get reallocated (e.g., anything that adds a
    * program constant) has to happen before creating this linkage.
    */
-   _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
+   _mesa_associate_uniform_storage(ctx, shader_program, prog);
   if (!shader_program->data->LinkStatus) {
      goto fail_exit;
   }
--- a/src/mesa/program/ir_to_mesa.h
+++ b/src/mesa/program/ir_to_mesa.h
@@ -50,8 +50,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx,
 void
 _mesa_associate_uniform_storage(struct gl_context *ctx,
                                struct gl_shader_program *shader_program,
-                                struct gl_program *prog,
-                                bool propagate_to_storage);
+                                struct gl_program *prog);

 #ifdef __cplusplus
 }
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -645,7 +645,7 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
                                        PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET));

   /* GL limits and extensions */
-   st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions, ctx->API);
+   st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions);
   st_init_extensions(pipe->screen, &ctx->Const,
                      &ctx->Extensions, &st->options, ctx->API);

--- a/src/mesa/state_tracker/st_extensions.c
+++ b/src/mesa/state_tracker/st_extensions.c
@@ -76,8 +76,7 @@ static int _clamp(int a, int min, int max)
 * Note that we have to limit/clamp against Mesa's internal limits too.
 */
 void st_init_limits(struct pipe_screen *screen,
-                    struct gl_constants *c, struct gl_extensions *extensions,
-                    gl_api api)
+                    struct gl_constants *c, struct gl_extensions *extensions)
 {
   int supported_irs;
   unsigned sh;
@@ -449,14 +448,8 @@ void st_init_limits(struct pipe_screen *screen,
   c->GLSLFrontFacingIsSysVal =
      screen->get_param(screen, PIPE_CAP_TGSI_FS_FACE_IS_INTEGER_SYSVAL);

-   /* GL_ARB_get_program_binary
-    *
-    * The QT framework has a bug in their shader program cache, which is built
-    * on GL_ARB_get_program_binary. In an effort to allow them to fix the bug
-    * we don't enable more than 1 binary format for compatibility profiles.
-    */
-   if (api != API_OPENGL_COMPAT &&
-       screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen))
+   /* GL_ARB_get_program_binary */
+   if (screen->get_disk_shader_cache && screen->get_disk_shader_cache(screen))
      c->NumProgramBinaryFormats = 1;

   c->MaxAtomicBufferBindings =
--- a/src/mesa/state_tracker/st_extensions.h
+++ b/src/mesa/state_tracker/st_extensions.h
@@ -35,8 +35,7 @@ struct pipe_screen;

 extern void st_init_limits(struct pipe_screen *screen,
                           struct gl_constants *c,
-                           struct gl_extensions *extensions,
-                           gl_api api);
+                           struct gl_extensions *extensions);

 extern void st_init_extensions(struct pipe_screen *screen,
                               struct gl_constants *consts,
--- a/src/mesa/state_tracker/st_glsl_to_nir.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_nir.cpp
@@ -500,7 +500,7 @@ st_glsl_to_nir_post_opts(struct st_context *st, struct gl_program *prog,
    * prog->ParameterValues to get reallocated (e.g., anything that adds a
    * program constant) has to happen before creating this linkage.
    */
-   _mesa_associate_uniform_storage(st->ctx, shader_program, prog, true);
+   _mesa_associate_uniform_storage(st->ctx, shader_program, prog);

   st_set_prog_affected_state_flags(prog);

--- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
+++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
@@ -7247,7 +7247,7 @@ get_mesa_program_tgsi(struct gl_context *ctx,
    * prog->ParameterValues to get reallocated (e.g., anything that adds a
    * program constant) has to happen before creating this linkage.
    */
-   _mesa_associate_uniform_storage(ctx, shader_program, prog, true);
+   _mesa_associate_uniform_storage(ctx, shader_program, prog);
   if (!shader_program->data->LinkStatus) {
      free_glsl_to_tgsi_visitor(v);
      _mesa_reference_program(ctx, &shader->Program, NULL);
--- a/src/mesa/state_tracker/st_manager.c
+++ b/src/mesa/state_tracker/st_manager.c
@@ -1269,7 +1269,7 @@ get_version(struct pipe_screen *screen,
   _mesa_init_constants(&consts, api);
   _mesa_init_extensions(&extensions);

-   st_init_limits(screen, &consts, &extensions, api);
+   st_init_limits(screen, &consts, &extensions);
   st_init_extensions(screen, &consts, &extensions, options, api);

   return _mesa_get_version(&extensions, &consts, api);
--- a/src/mesa/state_tracker/st_shader_cache.c
+++ b/src/mesa/state_tracker/st_shader_cache.c
@@ -366,7 +366,7 @@ st_deserialise_ir_program(struct gl_context *ctx,
   }

   st_set_prog_affected_state_flags(prog);
-   _mesa_associate_uniform_storage(ctx, shProg, prog, false);
+   _mesa_associate_uniform_storage(ctx, shProg, prog);

   /* Create Gallium shaders now instead of on demand. */
   if (ST_DEBUG & DEBUG_PRECOMPILE ||
--- a/src/vulkan/meson.build
+++ b/src/vulkan/meson.build
@@ -20,7 +20,6 @@

 vk_api_xml = files('registry/vk.xml')

-inc_vulkan_util = include_directories('util')
 inc_vulkan_wsi = include_directories('wsi')

 vulkan_wsi_args = []
--- a/src/vulkan/overlay-layer/meson.build
+++ b/src/vulkan/overlay-layer/meson.build
@@ -41,10 +41,10 @@ vklayer_mesa_overlay = shared_library(
  vklayer_files, overlay_spv,
  c_args : [c_vis_args, no_override_init_args, vulkan_wsi_args],
  cpp_args : [cpp_vis_args, vulkan_wsi_args],
-  dependencies : [vulkan_wsi_deps, libimgui_core_dep, dep_dl],
-  include_directories : [inc_common, inc_vulkan_util],
+  dependencies : [idep_vulkan_util, vulkan_wsi_deps, libimgui_core_dep, dep_dl],
+  include_directories : inc_common,
  link_args : cc.get_supported_link_arguments(['-Wl,-Bsymbolic-functions', '-Wl,-z,relro']),
-  link_with : [libmesa_util, libvulkan_util],
+  link_with : libmesa_util,
  install : true
 )

--- a/src/vulkan/overlay-layer/overlay.cpp
+++ b/src/vulkan/overlay-layer/overlay.cpp
@@ -1893,10 +1893,6 @@ static VkResult overlay_BeginCommandBuffer(
      return result;
   }

-   /* Primary command buffers with no queries. */
-   if (!cmd_buffer_data->pipeline_query_pool && cmd_buffer_data->timestamp_query_pool)
-      return device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);
-
   /* Otherwise record a begin query as first command. */
   VkResult result = device_data->vtable.BeginCommandBuffer(commandBuffer, pBeginInfo);

--- a/src/vulkan/util/meson.build
+++ b/src/vulkan/util/meson.build
@@ -44,3 +44,15 @@ libvulkan_util = static_library(
  c_args : [c_vis_args, vulkan_wsi_args],
  build_by_default : false,
 )
+
+idep_vulkan_util_headers = declare_dependency(
+  sources : vk_enum_to_str[1],
+  include_directories : include_directories('.')
+)
+
+idep_vulkan_util = declare_dependency(
+  sources : vk_enum_to_str[1],
+  link_with : libvulkan_util,
+  include_directories : include_directories('.'),
+  dependencies : idep_vulkan_util_headers
+)
--- a/src/vulkan/wsi/meson.build
+++ b/src/vulkan/wsi/meson.build
@@ -41,9 +41,9 @@ endif
 libvulkan_wsi = static_library(
  'vulkan_wsi',
  files_vulkan_wsi,
-  include_directories : [inc_common, inc_vulkan_util, inc_include],
+  include_directories : [inc_common, inc_include],
  link_with: [libxmlconfig],
-  dependencies : [vulkan_wsi_deps, dep_libdrm],
+  dependencies : [vulkan_wsi_deps, dep_libdrm, idep_vulkan_util],
  c_args : [c_vis_args, vulkan_wsi_args],
  build_by_default : false,
 )
Author	SHA1	Message	Date
Juan A. Suarez Romero	9d8f104f39	Update version to 19.1.0-rc5 Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>	2019-06-05 16:23:45 +00:00
Vinson Lee	2a45ddd42d	freedreno: Fix GCC build error. ../src/freedreno/vulkan/tu_device.c:900:4: error: initializer element is not constant .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, ^ Suggested-by: Kristian Høgsberg <krh@bitplanet.net> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110698 Signed-off-by: Vinson Lee <vlee@freedesktop.org> Reviewed-by: Rob Clark <robdclark@gmail.com> (cherry picked from commit `d4e70be739`)	2019-06-05 09:00:53 +00:00
Marek Olšák	96fbd54398	ac: fix a typo in ac_build_wg_scan_bottom Cc: 19.1 <mesa-stable@lists.freedesktop.org> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> (cherry picked from commit `c9b64b58de`)	2019-06-05 08:29:08 +00:00
Rhys Perry	60688cc393	ac/nir: mark some texture intrinsics as convergent Otherwise LLVM can sink them and their texture coordinate calculations into divergent branches. v2: simplify the conditions on which the intrinsic is marked as convergent v3: only mark as convergent in FS and CS with derivative groups Cc: <mesa-stable@lists.freedesktop.org> Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `73dda85512`)	2019-06-05 08:27:14 +00:00
Samuel Pitoiset	38927a35a6	radv: do not use gfx fast depth clears for layered depth/stencil images The driver should only fast depth clears with the graphics path when the view covers all image layers, otherwise this might corrupt layers when HTILE is enabled. Cc: 19.0 19.1 mesa-stable@lists.freedesktop.org Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `8a35eb0602`)	2019-06-04 15:06:46 +00:00
Sagar Ghuge	cf6472e780	intel/compiler: Fix assertions in brw_alu3 v2: Fix assertion for src1 (Ian Romanick) Fixes: `3b967e17` (intel/compiler: Avoid false positive assertions) Signed-off-by: Sagar Ghuge <sagar.ghuge@intel.com> Suggested-by: Matt Turner <mattst88@gmail.com> Reviewed-by: Matt Turner <mattst88@gmail.com> (cherry picked from commit `3016756398`)	2019-06-04 15:06:46 +00:00
Pierre-Eric Pelloux-Prayer	5394f1578c	radeonsi: init sctx->dma_copy before using it Commit `a1378639ab` reordered context functions initializations but broke sctx->b.resource_copy_region init when using AMD_DEBUG=forcedma. In this case sctx->dma_copy was assigned a value after being used in: sctx->b.resource_copy_region = sctx->dma_copy; This commit moves the FORCE_DMA special case after sctx->dma_copy initialization. See https://bugs.freedesktop.org/show_bug.cgi?id=110422 Signed-off-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `4583f09caa`)	2019-06-04 15:06:46 +00:00
Timothy Arceri	51998d720b	st/glsl: make sure to propagate initialisers to driver storage This essentially reverts `20234cfe3a`. Fixes piglit test: tests/spec/arb_get_program_binary/execution/uniform-after-restore.shader_test Fixes: `20234cfe3a` "st/mesa: don't propagate uniforms when restoring from cache" Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110784 (cherry picked from commit `fea36a8f43`)	2019-06-04 15:06:46 +00:00
Axel Davy	8773e20238	d3dadapter9: Revert to old throttling limit value Recently PIPE_CAP_MAX_FRAMES_IN_FLIGHT was changed from 2 to 1: `20909284f2` No driver seems to overwrite the default value. One user reports severe regressions for some games. For now, revert to the value 2 for nine. Cc: "19.1" mesa-stable@lists.freedesktop.org Signed-off-by: Axel Davy <davyaxel0@gmail.com> (cherry picked from commit `5820ac6756`)	2019-06-04 15:06:46 +00:00
Marek Olšák	4524f09cc0	u_blitter: don't fail mipmap generation for depth formats containing stencil Bugzilla: https://bugzilla.freedesktop.org/show_bug.cgi?id=109754 Cc: 19.0 19.1 <mesa-stable@lists.freedesktop.org> Tested-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> (cherry picked from commit `4b11ed443b`)	2019-06-04 15:06:46 +00:00
Rob Clark	3fce389c8b	freedreno/a6xx: fix GPU crash on small render targets Fixes dEQP-GLES2.functional.multisampled_render_to_texture.readpixels Signed-off-by: Rob Clark <robdclark@chromium.org> Acked-by: Eric Anholt <eric@anholt.net> (cherry picked from commit `8eaa2d5021`)	2019-06-04 15:06:46 +00:00
Rob Clark	a37f10af7b	freedreno/ir3: set more barrier bits Blob is also setting the .l bit, and it seems to solve some intermittent failures with a couple of deqp's: dEQP-GLES31.functional.image_load_store.2d.qualifiers.coherent_r32i dEQP-GLES31.functional.image_load_store.2d.qualifiers.volatile_r32f Signed-off-by: Rob Clark <robdclark@chromium.org> Acked-by: Eric Anholt <eric@anholt.net> (cherry picked from commit `f9fa456e1d`)	2019-06-04 15:06:46 +00:00
Jonathan Marek	90d045f993	freedreno/ir3: fix input ncomp for vertex shaders ncomp is never set for vertex shaders, but a3xx and a4xx still use it. Fixes: `831f1a05c0` freedreno/ir3: rework varying packing Signed-off-by: Jonathan Marek <jonathan@marek.ca> Reviewed-by: Rob Clark <robdclark@chromium.org> (cherry picked from commit `1db86d8b62`)	2019-06-03 08:20:25 +00:00
Bas Nieuwenhuizen	b2c5c16668	nir: Actually propagate progress in nir_opt_move_load_ubo. Found with Jasons new metadata rework (https://gitlab.freedesktop.org/mesa/mesa/merge_requests/950). Fixes: `af355aaa07` "nir: add nir_opt_move_load_ubo() optimization pass" Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com> (cherry picked from commit `e24a7840f6`)	2019-06-03 08:15:53 +00:00
Jan Zielinski	fecdcce09c	swr/rast: fix 32-bit compilation on Linux Removing unused but problematic code from simdlib header to fix compilation problem on 32-bit Linux. Reviewed-by: Alok Hota <alok.hota@intel.com> (cherry picked from commit `cf673747ce`)	2019-05-31 17:03:55 +02:00
Jason Ekstrand	a13bda4957	nir/dead_cf: Call instructions aren't dead When we inlined cf_node_has_side_effects into node_is_dead, all the conditions flipped and we forgot to flip one. Fortunately, it doesn't matter right now because no one uses this pass on shaders with more than one function. Fixes: `b50465d197` "nir/dead_cf: Inline cf_node_has_side_effects" Reviewed-by: Dave Airlie <airlied@redhat.com> Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> (cherry picked from commit `8948048c6f`)	2019-05-31 08:15:31 +00:00
Jason Ekstrand	c2a945771c	intel/fs: Do a stalling MFENCE in endInvocationInterlock() Fixes: `939312702e` "i965: Add ARB_fragment_shader_interlock support" Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (cherry picked from commit `9e403dc56e`)	2019-05-31 08:13:44 +00:00
Jason Ekstrand	92f4a16af8	intel/fs,vec4: Use g0 as the header for MFENCE We set header_present but then pass it some random garbage. Give it g0 instead. I'm not actually sure this does anything but g0 is the usual header data and this is what the windows driver does so it seems like a good idea. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> (cherry picked from commit `859de4a748`)	2019-05-31 08:11:35 +00:00
Jason Ekstrand	a19270007c	iris: Don't assume UBO indices are constant It will be true for the constant/system value buffer because they use a constant zero but it's not true in general. If we ever got here when the source wasn't constant, nir_src_as_uint would assert. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org> Cc: mesa-stable@lists.freedesktop.org (cherry picked from commit `9dc57eebd5`)	2019-05-30 09:06:28 +00:00
Lionel Landwerlin	4c7dfaba9c	nir/lower_non_uniform: safely iterate over blocks This fixes a problem where the same instruction gets replaced twice. This was happening when the replaced instruction would be at the end of a block. Replacement of : if ssa_8 { .... intrinsic bindless_image_store (ssa_44, ssa_16, ssa_0, ssa_15) (5, 0, 34836, 32) /* image_dim=Buf / / image_array=false / / format=34836 / / access=32 / } Would be : if ssa_8 { loop { vec1 32 ssa_47 = intrinsic read_first_invocation (ssa_44) () vec1 1 ssa_48 = ieq ssa_47, ssa_44 if ssa_48 { loop { vec1 32 ssa_49 = intrinsic read_first_invocation (ssa_44) () vec1 1 ssa_50 = ieq ssa_49, ssa_44 if ssa_50 { intrinsic bindless_image_store (ssa_44, ssa_16, ssa_0, ssa_15) (5, 0, 34836, 32) / image_dim=Buf / / image_array=false / / format=34836 / / access=32 */ break } else { .... } Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: `3bd5457641` ("nir: Add a lowering pass for non-uniform resource access") Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> (cherry picked from commit `366811bedb`)	2019-05-30 09:01:40 +00:00
Samuel Pitoiset	411114c45c	radv: allocate more space in the CS when emitting events If the driver waits for CP DMA to be idle and emit an EOP event we need more space. This fixes a crash with Quake Champions. Cc: <mesa-stable@lists.freedesktop.org> Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl> (cherry picked from commit `47a10edefb`)	2019-05-30 09:00:31 +00:00
Juan A. Suarez Romero	dd9635c1d2	Update version to 19.1.0-rc4 Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com>	2019-05-29 16:44:45 +02:00
Timothy Arceri	0dcba748f9	Revert "st/mesa: expose 0 shader binary formats for compat profiles for Qt" This reverts commit `55376cb31e`. It's been over a year and both QT 5.9.5 and 5.11.0 contained a fix for the original issue. It seems i965 only ever applied this workaround to the 18.0 branch. Reviewed-by: Marek Olšák <marek.olsak@amd.com> (cherry picked from commit `11e16ca7ce`)	2019-05-28 07:13:40 +00:00
Lionel Landwerlin	fe7c45b97e	anv: fix apply_pipeline_layout pass for arrays of YCbCr descriptors When using the binding tables to access arrays of YCbCr descriptors we did not consider the offset of the accessed element. We can't do a simple multiple because the binding table entries are tightly packed. For example element 0 of the array could use 2 entries/planes and element 1 could use 2 entries/planes. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: `3bb8768b9d` ("anv: toggle on support for VK_EXT_ycbcr_image_arrays") Reviewed-by: Tapani Pälli <tapani.palli@intel.com> (cherry picked from commit `2042f22e28`)	2019-05-28 07:12:43 +00:00
Chenglei Ren	16eac8f754	anv/android: fix missing dependencies issue during parallel build The libmesa_anv_gen* modules require anv_extensions.h, patch makes sure it gets generated as a dependency before building them. Signed-off-by: Chenglei Ren <chenglei.ren@intel.com> Reviewed-by: Tapani Pälli <tapani.palli@intel.com> Cc: <mesa-stable@lists.freedesktop.org> (cherry picked from commit `13b38ca1e4`)	2019-05-28 07:11:10 +00:00
Qiang Yu	4b3c805b88	lima: fix render to non-zero level texture Current implementation won't respect level of surface to render. Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> (cherry picked from commit `54490b0b36`)	2019-05-28 07:10:04 +00:00
Qiang Yu	87ac0bd86a	lima: fix lima_blit with non-zero level source resource lima_blit will do blit between resources with different levels. When blit from a level!=0 source, it will sample from that level of resource as texture. Current texture setup won't respect level when not mipmap filter. Reviewed-by: Vasily Khoruzhick <anarsoul@gmail.com> Signed-off-by: Qiang Yu <yuq825@gmail.com> (cherry picked from commit `1dc593e9b9`)	2019-05-28 07:09:05 +00:00
Dave Airlie	74c5367612	Revert "mesa: unreference current winsys buffers when unbinding winsys buffers" This reverts commit `12bf7cfecf`. This commits caused lots of problems: https://bugs.freedesktop.org/show_bug.cgi?id=110721 https://bugs.freedesktop.org/show_bug.cgi?id=110761 Fixes: `12bf7cfecf` ("mesa: unreference current winsys buffers when unbinding winsys buffers") Pushing without review as we need to get it into next stable. (cherry picked from commit `7fe5a8e874`)	2019-05-27 08:31:05 +00:00
Christian Gmeiner	95ffe6323e	etnaviv: use the correct uniform dirty bits Found during code inspection. Cc: mesa-stable@lists.freedesktop.org Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> (cherry picked from commit `78fb5594be`)	2019-05-27 08:28:37 +00:00
Danylo Piliaiev	03fd344776	anv: Do not emulate texture swizzle for INPUT_ATTACHMENT, STORAGE_IMAGE If descriptorType is VK_DESCRIPTOR_TYPE_STORAGE_IMAGE or VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT, the imageView member of each element of pImageInfo must have been created with the identity swizzle. Fixes: `d2aa65eb` Signed-off-by: Danylo Piliaiev <danylo.piliaiev@globallogic.com> Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> (cherry picked from commit `c82dcf89ae`)	2019-05-27 08:26:47 +00:00
Lionel Landwerlin	9037cf26bb	vulkan: fix build dependency issue with generated files On machines with many cores, you can run into that issue : ../mesa-9999/src/vulkan/overlay-layer/overlay.cpp:42:10: fatal error: vk_enum_to_str.h: No such file or directory v2: Move declare_dependency around (Eric) Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Reported-by: Jan Ziak Cc: <mesa-stable@lists.freedesktop.org> Reviewed-by: Eric Engestrom <eric.engestrom@intel.com> (cherry picked from commit `cb7c9b2a93`)	2019-05-23 08:57:26 +00:00
Greg V	b02c6e8ee7	gallium: enable dmabuf on BSD as well The DRM_CONF_SHARE_FD code did not check for Linux, so the commit that introduced PIPE_CAP_DMABUF broke Wayland-EGL clients on FreeBSD. Fixes: `8ae50e60` (gallium: replace DRM_CONF_SHARE_FD with PIPE_CAP_DMABUF) Reviewed-by: Dylan Baker <dylan@pnwbakers.com> Reviewed-by: Eric Engestrom <eric.engestrom@intel.com> (cherry picked from commit `506ebf55c0`)	2019-05-23 08:56:14 +00:00
Philipp Zabel	e13c13f54c	etnaviv: fill missing offset in etna_resource_get_handle Without this gbm_bo_get_offset() can return 0 where it shouldn't. Reviewed-by: Lucas Stach <l.stach@pengutronix.de> Reviewed-by: Christian Gmeiner <christian.gmeiner@gmail.com> Reviewed-by: Emil Velikov <emil.velikov@collabora.com> Cc: <mesa-stable@lists.freedesktop.org> (cherry picked from commit `1ccb8a071b`)	2019-05-23 08:53:19 +00:00
Marek Olšák	60d524fd39	radeonsi: fix a regression in si_rebind_buffer Don't update non-buffer images. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=110701 Fixes: `78e35df52a` "radeonsi: update buffer descriptors in all contexts after buffer invalidation" Cc: 19.1 <mesa-stable@lists.freedesktop.org> Tested-By: Gert Wollny <gert.wollny@collabora..com> (cherry picked from commit `d6053bf2a1`)	2019-05-23 08:51:16 +00:00
Lionel Landwerlin	ce2d68aace	vulkan/overlay: fix timestamp query emission with no pipeline stats The if (!pipe && timestamp) logic was broken. It should have been : if (!pipe && !timestamp) Let just drop this condition as the following code does the right thing for all cases. An error was appearing with the following variables : VK_INSTANCE_LAYERS=VK_LAYER_MESA_overlay VK_LAYER_MESA_OVERLAY_CONFIG=gpu_timing Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: `ea7a6fa980` ("vulkan/overlay: add pipeline statistic & timestamps support") Reviewed-by: Tapani Pälli <tapani.palli@intel.com> (cherry picked from commit `213d6527d4`)	2019-05-23 08:50:11 +00:00
Marek Olšák	c1d83ae9fb	radeonsi: update buffer descriptors in all contexts after buffer invalidation Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=108824 Cc: 19.1 <mesa-stable@lists.freedesktop.org> (cherry picked from commit `78e35df52a`) [Juan: resolve trivial conflicts] [Juan: remove the commit from the ignored cherry-pick] Signed-off-by: Juan A. Suarez Romero <jasuarez@igalia.com> Conflicts: src/gallium/drivers/radeonsi/si_state_draw.c	2019-05-23 08:48:21 +00:00