bump version for 20.3.0 final

docs: add release notes for 20.3.0
freedreno: Break out of "should we free the entry" loop once we've freed.
2020-12-03 09:56:23 -08:00 · 2020-12-03 09:49:56 -08:00 · 2020-12-02 15:10:44 -08:00 · 2020-12-02 15:08:50 -08:00 · 2020-12-02 15:08:47 -08:00 · 2020-12-02 15:08:46 -08:00
32 changed files with 8645 additions and 109 deletions
--- a/.pick_status.json
+++ b/.pick_status.json
--- a/2
+++ b/2
@@ -1 +1 @@
-20.3.0-rc3
+20.3.0
--- a/docs/drivers/zink.rst
+++ b/docs/drivers/zink.rst
@@ -103,6 +103,8 @@ variable:
   current directory, and print a message with the filename to stderr.
 ``tgsi``
   Print the TGSI form of TGSI shaders to stderr.
+``validation``
+   Dump Validation layer output.

 Vulkan Validation Layers
 ^^^^^^^^^^^^^^^^^^^^^^^^
--- a/docs/relnotes/20.3.0.rst
+++ b/docs/relnotes/20.3.0.rst
--- a/docs/relnotes/new_features.txt
+++ b/docs/relnotes/new_features.txt
@@ -1,19 +0,0 @@
-GL 4.5 on llvmpipe
-GL_INTEL_blackhole_render on radeonsi
-GL_NV_copy_depth_to_color for NIR
-GL_NV_half_float
-GL_NV_shader_atomic_int64 on radeonsi
-EGL_KHR_swap_buffers_with_damage on X11 (DRI3)
-VK_PRESENT_MODE_FIFO_RELAXED on X11
-GLX_EXT_swap_control for DRI2 and DRI3
-GLX_EXT_swap_control_tear for DRI3
-VK_KHR_copy_commands2 on RADV
-VK_KHR_shader_terminate_invocation on RADV
-NGG GS support in ACO
-VK_KHR_shader_terminate_invocation on ANV
-driconf: add glx_extension_override
-driconf: add indirect_gl_extension_override
-VK_AMD_mixed_attachment_samples on RADV (GFX6-GFX7).
-GL_MESA_pack_invert on r100 and vieux
-GL_ANGLE_pack_reverse_row_order
-VK_EXT_shader_image_atomic_int64 on RADV
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1745,7 +1745,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)

         Temp tmp = dst.regClass() == s1 ? bld.tmp(v1) : dst;
         if (src0_ub <= 0xffffff && src1_ub <= 0xffffff) {
-            emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_u32_u24, tmp);
+            emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_hi_u32_u24, tmp, true);
         } else {
            emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_u32, tmp);
         }
@@ -11358,11 +11358,11 @@ std::pair<Temp, Temp> ngg_gs_workgroup_reduce_and_scan(isel_context *ctx, Temp s

   /* Determine if the current lane is the first. */
   Temp is_first_lane = bld.copy(bld.def(bld.lm), Operand(1u, ctx->program->wave_size == 64));
+   Temp wave_id_in_tg = wave_id_in_threadgroup(ctx);
   begin_divergent_if_then(ctx, &ic, is_first_lane);
   bld.reset(ctx->block);

   /* The first lane of each wave stores the result of its subgroup reduction to LDS (NGG scratch). */
-   Temp wave_id_in_tg = wave_id_in_threadgroup(ctx);
   Temp wave_id_in_tg_lds_addr = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), wave_id_in_tg);
   store_lds(ctx, 4u, as_vgpr(ctx, sg_reduction), 0x1u, wave_id_in_tg_lds_addr, ctx->ngg_gs_scratch_addr, 4u);

--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -3090,7 +3090,9 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
   /* Mark SCC needed, so the uniform boolean transformation won't swap the definitions when it isn't beneficial */
   if (instr->format == Format::PSEUDO_BRANCH &&
       instr->operands.size() &&
-       instr->operands[0].isTemp()) {
+       instr->operands[0].isTemp() &&
+       instr->operands[0].isFixed() &&
+       instr->operands[0].physReg() == scc) {
      ctx.info[instr->operands[0].tempId()].set_scc_needed();
      return;
   } else if ((instr->opcode == aco_opcode::s_cselect_b64 ||
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2235,6 +2235,74 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
 	}
 }

+/* GFX9+ metadata cache flushing workaround. metadata cache coherency is
+ * broken if the CB caches data of multiple mips of the same image at the
+ * same time.
+ *
+ * Insert some flushes to avoid this.
+ */
+static void
+radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
+{
+	struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
+	const struct radv_subpass *subpass = cmd_buffer->state.subpass;
+	bool color_mip_changed = false;
+
+	/* Entire workaround is not applicable before GFX9 */
+	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+		return;
+
+	if (!framebuffer)
+		return;
+
+	for (int i = 0; i < subpass->color_count; ++i) {
+		int idx = subpass->color_attachments[i].attachment;
+		if (idx == VK_ATTACHMENT_UNUSED)
+			continue;
+
+		struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
+
+		if ((radv_image_has_CB_metadata(iview->image) ||
+		     radv_image_has_dcc(iview->image)) &&
+		    cmd_buffer->state.cb_mip[i] != iview->base_mip)
+			color_mip_changed = true;
+
+		cmd_buffer->state.cb_mip[i] = iview->base_mip;
+	}
+
+	if (color_mip_changed) {
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+		                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+	}
+}
+
+/* This function does the flushes for mip changes if the levels are not zero for
+ * all render targets. This way we can assume at the start of the next cmd_buffer
+ * that rendering to mip 0 doesn't need any flushes. As that is the most common
+ * case that saves some flushes. */
+static void
+radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer)
+{
+	/* Entire workaround is not applicable before GFX9 */
+	if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
+		return;
+
+	bool need_color_mip_flush = false;
+	for (unsigned i = 0; i < 8; ++i) {
+		if (cmd_buffer->state.cb_mip[i]) {
+			need_color_mip_flush = true;
+			break;
+		}
+	}
+
+	if (need_color_mip_flush) {
+		cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
+		                                RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
+	}
+
+	memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
+}
+
 static void
 radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
 {
@@ -4074,6 +4142,8 @@ VkResult radv_EndCommandBuffer(
 {
 	RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);

+	radv_emit_mip_change_flush_default(cmd_buffer);
+
 	if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
 		if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
 			cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
@@ -4653,6 +4723,8 @@ void radv_CmdExecuteCommands(

 	assert(commandBufferCount > 0);

+	radv_emit_mip_change_flush_default(primary);
+
 	/* Emit pending flushes on primary prior to executing secondary */
 	si_emit_cache_flush(primary);

@@ -4685,6 +4757,7 @@ void radv_CmdExecuteCommands(
 			 * has been recorded without a framebuffer, otherwise
 			 * fast color/depth clears can't work.
 			 */
+			radv_emit_fb_mip_change_flush(primary);
 			radv_emit_framebuffer_state(primary);
 		}

@@ -5292,6 +5365,10 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
 			return;
 	}

+	/* Need to apply this workaround early as it can set flush flags. */
+	if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
+		radv_emit_fb_mip_change_flush(cmd_buffer);
+
 	/* Use optimal packet order based on whether we need to sync the
 	 * pipeline.
 	 */
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -1389,6 +1389,8 @@ struct radv_cmd_state {
 	uint32_t num_layout_transitions;
 	bool pending_sqtt_barrier_end;
 	enum rgp_flush_bits sqtt_flush_bits;
+
+	uint8_t cb_mip[MAX_RTS];
 };

 struct radv_cmd_pool {
--- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
+++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_winsys.c
@@ -166,6 +166,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
 		amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
 	free(ws->syncobj);

+	pthread_mutex_destroy(&ws->syncobj_lock);
 	u_rwlock_destroy(&ws->global_bo_list_lock);
 	ac_addrlib_destroy(ws->addrlib);
 	amdgpu_device_deinitialize(ws->dev);
--- a/src/compiler/glsl/gl_nir_link_uniforms.c
+++ b/src/compiler/glsl/gl_nir_link_uniforms.c
@@ -1776,19 +1776,21 @@ gl_nir_link_uniforms(struct gl_context *ctx,
                     break;
               }
               assert(found);
-            } else
+               var->data.location = location;
+            } else {
               /* this is the base block offset */
-               location = buffer_block_index;
+               var->data.location = buffer_block_index;
+               location = 0;
+            }
            assert(buffer_block_index >= 0);
            const struct gl_uniform_block *const block =
               &blocks[buffer_block_index];
-            assert(location != -1);
+            assert(location >= 0 && location < block->NumUniforms);

            const struct gl_uniform_buffer_variable *const ubo_var =
               &block->Uniforms[location];

            state.offset = ubo_var->Offset;
-            var->data.location = location;
         }

         /* Check if the uniform has been processed already for
--- a/src/compiler/nir/nir_intrinsics.py
+++ b/src/compiler/nir/nir_intrinsics.py
@@ -964,7 +964,7 @@ load("raw_output_pan", [1], [BASE], [CAN_ELIMINATE, CAN_REORDER])

 # Loads the sampler paramaters <min_lod, max_lod, lod_bias>
 # src[] = { sampler_index }
-load("sampler_lod_parameters_pan", [1], [CAN_ELIMINATE, CAN_REORDER])
+load("sampler_lod_parameters_pan", [1], flags=[CAN_ELIMINATE, CAN_REORDER])

 # R600 specific instrincs
 #
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -984,13 +984,22 @@ optimizations.extend([
   (('ieq(is_not_used_by_if)', a, False), ('inot', 'a')),
   (('bcsel', a, True, False), a),
   (('bcsel', a, False, True), ('inot', a)),
-   (('bcsel', a, 1.0, 0.0), ('b2f', a)),
-   (('bcsel', a, 0.0, 1.0), ('b2f', ('inot', a))),
-   (('bcsel', a, -1.0, -0.0), ('fneg', ('b2f', a))),
-   (('bcsel', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
   (('bcsel', True, b, c), b),
   (('bcsel', False, b, c), c),

+   (('bcsel@16', a, 1.0, 0.0), ('b2f', a)),
+   (('bcsel@16', a, 0.0, 1.0), ('b2f', ('inot', a))),
+   (('bcsel@16', a, -1.0, -0.0), ('fneg', ('b2f', a))),
+   (('bcsel@16', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
+   (('bcsel@32', a, 1.0, 0.0), ('b2f', a)),
+   (('bcsel@32', a, 0.0, 1.0), ('b2f', ('inot', a))),
+   (('bcsel@32', a, -1.0, -0.0), ('fneg', ('b2f', a))),
+   (('bcsel@32', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
+   (('bcsel@64', a, 1.0, 0.0), ('b2f', a), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
+   (('bcsel@64', a, 0.0, 1.0), ('b2f', ('inot', a)), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
+   (('bcsel@64', a, -1.0, -0.0), ('fneg', ('b2f', a)), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
+   (('bcsel@64', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a))), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
+
   (('bcsel', a, b, b), b),
   (('~fcsel', a, b, b), b),

--- a/src/compiler/nir/nir_range_analysis.c
+++ b/src/compiler/nir/nir_range_analysis.c
@@ -1102,6 +1102,7 @@ static uint64_t mul_clamp(uint32_t a, uint32_t b)
      return a * b;
 }

+/* recursively gather at most "buf_size" phi/bcsel sources */
 static unsigned
 search_phi_bcsel(nir_ssa_scalar scalar, nir_ssa_scalar *buf, unsigned buf_size, struct set *visited)
 {
@@ -1112,15 +1113,17 @@ search_phi_bcsel(nir_ssa_scalar scalar, nir_ssa_scalar *buf, unsigned buf_size,
   if (scalar.def->parent_instr->type == nir_instr_type_phi) {
      nir_phi_instr *phi = nir_instr_as_phi(scalar.def->parent_instr);
      unsigned num_sources_left = exec_list_length(&phi->srcs);
-      unsigned total_added = 0;
-      nir_foreach_phi_src(src, phi) {
-         unsigned added = search_phi_bcsel(
-            (nir_ssa_scalar){src->src.ssa, 0}, buf + total_added, buf_size - num_sources_left, visited);
-         buf_size -= added;
-         total_added += added;
-         num_sources_left--;
+      if (buf_size >= num_sources_left) {
+         unsigned total_added = 0;
+         nir_foreach_phi_src(src, phi) {
+            unsigned added = search_phi_bcsel(
+               (nir_ssa_scalar){src->src.ssa, 0}, buf + total_added, buf_size - num_sources_left, visited);
+            buf_size -= added;
+            total_added += added;
+            num_sources_left--;
+         }
+         return total_added;
      }
-      return total_added;
   }

   if (nir_ssa_scalar_is_alu(scalar)) {
--- a/src/etnaviv/drm/etnaviv_bo_cache.c
+++ b/src/etnaviv/drm/etnaviv_bo_cache.c
@@ -28,7 +28,6 @@
 #include "etnaviv_drmif.h"

 void _etna_bo_del(struct etna_bo *bo);
-extern pthread_mutex_t etna_drm_table_lock;

 static void add_bucket(struct etna_bo_cache *cache, int size)
 {
--- a/src/etnaviv/drm/etnaviv_device.c
+++ b/src/etnaviv/drm/etnaviv_device.c
@@ -30,8 +30,6 @@
 #include "etnaviv_priv.h"
 #include "etnaviv_drmif.h"

-static pthread_mutex_t etna_drm_table_lock = PTHREAD_MUTEX_INITIALIZER;
-
 struct etna_device *etna_device_new(int fd)
 {
 	struct etna_device *dev = calloc(sizeof(*dev), 1);
--- a/src/etnaviv/drm/etnaviv_priv.h
+++ b/src/etnaviv/drm/etnaviv_priv.h
@@ -50,6 +50,8 @@
 #include "etnaviv_drmif.h"
 #include "drm-uapi/etnaviv_drm.h"

+extern pthread_mutex_t etna_drm_table_lock;
+
 struct etna_bo_bucket {
 	uint32_t size;
 	struct list_head list;
--- a/src/freedreno/computerator/meson.build
+++ b/src/freedreno/computerator/meson.build
@@ -23,6 +23,7 @@ computerator_files = [
  'ir3_asm.c',
  'main.c',
  freedreno_xml_header_files,
+  ir3_parser[1],
 ]

 computerator = executable(
--- a/src/freedreno/fdl/fd6_layout.c
+++ b/src/freedreno/fdl/fd6_layout.c
@@ -211,7 +211,7 @@ fdl6_layout(struct fdl_layout *layout,
 		 * may not be. note this only matters if last level is linear
 		 */
 		if (level == mip_levels - 1)
-			height = align(nblocksy, 4);
+			nblocksy = align(nblocksy, 4);

 		slice->offset = offset + layout->size;

--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -371,6 +371,7 @@ void cso_destroy_context( struct cso_context *ctx )

      {
         static struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { NULL };
+         static struct pipe_shader_buffer ssbos[PIPE_MAX_SHADER_BUFFERS] = { 0 };
         static void *zeros[PIPE_MAX_SAMPLERS] = { NULL };
         struct pipe_screen *scr = ctx->pipe->screen;
         enum pipe_shader_type sh;
@@ -379,14 +380,25 @@ void cso_destroy_context( struct cso_context *ctx )
                                               PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
            int maxview = scr->get_shader_param(scr, sh,
                                                PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS);
+            int maxssbo = scr->get_shader_param(scr, sh,
+                                                PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
+            int maxcb = scr->get_shader_param(scr, sh,
+                                              PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
            assert(maxsam <= PIPE_MAX_SAMPLERS);
            assert(maxview <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+            assert(maxssbo <= PIPE_MAX_SHADER_BUFFERS);
            if (maxsam > 0) {
               ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros);
            }
            if (maxview > 0) {
               ctx->pipe->set_sampler_views(ctx->pipe, sh, 0, maxview, views);
            }
+            if (maxssbo > 0) {
+               ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0);
+            }
+            for (int i = 0; i < maxcb; i++) {
+               ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, NULL);
+            }
         }
      }

@@ -397,17 +409,13 @@ void cso_destroy_context( struct cso_context *ctx )
      ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, NULL);
      if (ctx->has_geometry_shader) {
         ctx->pipe->bind_gs_state(ctx->pipe, NULL);
-         ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_GEOMETRY, 0, NULL);
      }
      if (ctx->has_tessellation) {
         ctx->pipe->bind_tcs_state(ctx->pipe, NULL);
-         ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_CTRL, 0, NULL);
         ctx->pipe->bind_tes_state(ctx->pipe, NULL);
-         ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL);
      }
      if (ctx->has_compute_shader) {
         ctx->pipe->bind_compute_state(ctx->pipe, NULL);
-         ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
      }
      ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );

--- a/src/gallium/auxiliary/gallivm/lp_bld_limits.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_limits.h
@@ -110,7 +110,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
   case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
      return LP_MAX_TGSI_CONST_BUFFER_SIZE;
   case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
-      return PIPE_MAX_CONSTANT_BUFFERS;
+      return LP_MAX_TGSI_CONST_BUFFERS;
   case PIPE_SHADER_CAP_MAX_TEMPS:
      return LP_MAX_TGSI_TEMPS;
   case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.c
@@ -120,6 +120,7 @@ pipe_loader_load_options(struct pipe_loader_device *dev)
   driParseOptionInfo(&dev->option_info, merged_driconf, merged_count);
   driParseConfigFiles(&dev->option_cache, &dev->option_info, 0,
                       dev->driver_name, NULL, NULL, 0, NULL, 0);
+   free((void *)merged_driconf);
 }

 char *
--- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
@@ -484,6 +484,7 @@ fd6_rebind_resource(struct fd_context *ctx, struct fd_resource *rsc)
 			if (rsc->seqno == state->key.view[i].rsc_seqno) {
 				fd6_texture_state_destroy(entry->data);
 				_mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
+				break;
 			}
 		}
 	}
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -162,6 +162,8 @@ fd_screen_destroy(struct pipe_screen *pscreen)

 	simple_mtx_destroy(&screen->lock);

+	u_transfer_helper_destroy(pscreen->transfer_helper);
+
 	if (screen->compiler)
 		ir3_compiler_destroy(screen->compiler);

--- a/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
+++ b/src/gallium/drivers/r600/sfn/sfn_shader_base.cpp
@@ -867,68 +867,75 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
   auto bufid = nir_src_as_const_value(instr->src[0]);
   auto buf_offset = nir_src_as_const_value(instr->src[1]);

-   if (bufid) {
-      if (buf_offset) {
-         int buf_cmp = nir_intrinsic_component(instr);
-         AluInstruction *ir = nullptr;
-         for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-            int cmp = buf_cmp + i;
-            assert(cmp < 4);
-            auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
-            if (instr->dest.is_ssa)
-               load_preloaded_value(instr->dest, i, u);
-            else {
-               ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
-               emit_instruction(ir);
-            }
-         }
-         if (ir)
-            ir->set_flag(alu_last_instr);
-         return true;
-
-      } else {
-         return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32 + 1);
-      }
-   } else {
-      if (buf_offset) {
-         int buf_cmp = nir_intrinsic_component(instr);
-         AluInstruction *ir = nullptr;
-         auto kc_id = from_nir(instr->src[0], 0);
-         for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-            int cmp = buf_cmp + i;
-            auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, kc_id));
-            if (instr->dest.is_ssa)
-               load_preloaded_value(instr->dest, i, u);
-            else {
-               ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
-               emit_instruction(ir);
-            }
-         }
-         if (ir)
-            ir->set_flag(alu_last_instr);
-         return true;
-      }
+   if (!buf_offset) {
      /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
       * on the ALU block, and this would probably make sense when there are more then one
       * loads with the same buffer ID. */
-      PValue bufid = from_nir(instr->src[0], 0, 0);
+
      PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
      GPRVector trgt;
      std::array<int, 4> swz = {7,7,7,7};
-      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
-         trgt.set_reg_i(i, from_nir(instr->dest, i));
-         swz[i] = i + nir_intrinsic_component(instr);
+      for (unsigned i = 0; i < 4; ++i) {
+         if (i < nir_dest_num_components(instr->dest)) {
+            trgt.set_reg_i(i, from_nir(instr->dest, i));
+            swz[i] = i + nir_intrinsic_component(instr);
+         } else {
+            trgt.set_reg_i(i, from_nir(instr->dest, 7));
+         }
      }

-      auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
-                                     1, bufid, bim_zero);
+      FetchInstruction *ir;
+      if (bufid) {
+         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
+                                              1, nullptr, bim_none);
+      } else {
+         PValue bufid = from_nir(instr->src[0], 0, 0);
+         ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
+                                              1, bufid, bim_zero);
+      }
      ir->set_dest_swizzle(swz);
-
      emit_instruction(ir);
      m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
      return true;
   }

+
+   if (bufid) {
+      int buf_cmp = nir_intrinsic_component(instr);
+      AluInstruction *ir = nullptr;
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         int cmp = buf_cmp + i;
+         assert(cmp < 4);
+         auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
+         if (instr->dest.is_ssa)
+            load_preloaded_value(instr->dest, i, u);
+         else {
+            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
+            emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+      return true;
+
+   } else {
+      int buf_cmp = nir_intrinsic_component(instr);
+      AluInstruction *ir = nullptr;
+      auto kc_id = from_nir(instr->src[0], 0);
+      for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
+         int cmp = buf_cmp + i;
+         auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, kc_id));
+         if (instr->dest.is_ssa)
+            load_preloaded_value(instr->dest, i, u);
+         else {
+            ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
+            emit_instruction(ir);
+         }
+      }
+      if (ir)
+         ir->set_flag(alu_last_instr);
+      return true;
+   }
 }

 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
--- a/src/gallium/drivers/radeonsi/si_pm4.c
+++ b/src/gallium/drivers/radeonsi/si_pm4.c
@@ -38,6 +38,7 @@ void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
 {
   assert(state->ndw < SI_PM4_MAX_DW);
   state->pm4[state->ndw++] = dw;
+   state->last_opcode = -1;
 }

 static void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
@@ -76,13 +77,15 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)

   reg >>= 2;

+   assert(state->ndw + 2 <= SI_PM4_MAX_DW);
+
   if (opcode != state->last_opcode || reg != (state->last_reg + 1)) {
      si_pm4_cmd_begin(state, opcode);
-      si_pm4_cmd_add(state, reg);
+      state->pm4[state->ndw++] = reg;
   }

   state->last_reg = reg;
-   si_pm4_cmd_add(state, val);
+   state->pm4[state->ndw++] = val;
   si_pm4_cmd_end(state, false);
 }

--- a/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
+++ b/src/gallium/drivers/radeonsi/si_shaderlib_tgsi.c
@@ -278,6 +278,7 @@ void *si_create_dcc_retile_cs(struct pipe_context *ctx)

   void *cs = ctx->create_compute_state(ctx, &state);
   ureg_destroy(ureg);
+   ureg_free_tokens(state.prog);
   return cs;
 }

--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -1147,7 +1147,10 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
      pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
      S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
         S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(1) |
-         S_00B228_MEM_ORDERED(1) | S_00B228_WGP_MODE(1) |
+         S_00B228_MEM_ORDERED(1) |
+         /* Disable the WGP mode on gfx10.3 because it can hang. (it happened on VanGogh)
+          * Let's disable it on all chips that disable exactly 1 CU per SA for GS. */
+         S_00B228_WGP_MODE(sscreen->info.chip_class == GFX10) |
         S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
   si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
                  S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) |
@@ -3026,8 +3029,13 @@ bool si_update_ngg(struct si_context *sctx)
       * VGT_FLUSH is also emitted at the beginning of IBs when legacy GS ring
       * pointers are set.
       */
-      if ((sctx->chip_class == GFX10 || sctx->family == CHIP_SIENNA_CICHLID) && !new_ngg)
+      if ((sctx->chip_class == GFX10 || sctx->family == CHIP_SIENNA_CICHLID) && !new_ngg) {
         sctx->flags |= SI_CONTEXT_VGT_FLUSH;
+         if (sctx->chip_class == GFX10) {
+            /* Workaround for https://gitlab.freedesktop.org/mesa/mesa/-/issues/2941 */
+            si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
+         }
+      }

      sctx->ngg = new_ngg;
      sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
--- a/src/gallium/drivers/zink/zink_device_info.py
+++ b/src/gallium/drivers/zink/zink_device_info.py
@@ -58,7 +58,7 @@ import sys
 def EXTENSIONS():
    return [
        Extension("VK_KHR_maintenance1",             required=True),
-        Extension("VK_KHR_external_memory",          required=True),
+        Extension("VK_KHR_external_memory"),
        Extension("VK_KHR_external_memory_fd"),
        Extension("VK_KHR_vulkan_memory_model"),
        Extension("VK_EXT_conditional_rendering",    alias="cond_render", have_feature="conditionalRendering"),
--- a/src/intel/vulkan/anv_descriptor_set.c
+++ b/src/intel/vulkan/anv_descriptor_set.c
@@ -909,8 +909,10 @@ void anv_DestroyDescriptorPool(
      anv_descriptor_set_layout_unref(device, set->layout);
   }

-   if (pool->bo)
+   if (pool->bo) {
+      util_vma_heap_finish(&pool->bo_heap);
      anv_device_release_bo(device, pool->bo);
+   }
   anv_state_stream_finish(&pool->surface_state_stream);

   vk_object_base_finish(&pool->base);
--- a/src/mesa/main/uniform_query.cpp
+++ b/src/mesa/main/uniform_query.cpp
@@ -1181,6 +1181,10 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
               /* Mark this bindless sampler as bound to a texture unit.
                */
               if (sampler->unit != value || !sampler->bound) {
+                  if (!flushed) {
+                     FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM);
+                     flushed = true;
+                  }
                  sampler->unit = value;
                  changed = true;
               }
@@ -1188,6 +1192,10 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
               sh->Program->sh.HasBoundBindlessSampler = true;
            } else {
               if (sh->Program->SamplerUnits[unit] != value) {
+                  if (!flushed) {
+                     FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM);
+                     flushed = true;
+                  }
                  sh->Program->SamplerUnits[unit] = value;
                  changed = true;
               }
@@ -1195,11 +1203,6 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
         }

         if (changed) {
-            if (!flushed) {
-               FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM);
-               flushed = true;
-            }
-
            struct gl_program *const prog = sh->Program;
            _mesa_update_shader_textures_used(shProg, prog);
            if (ctx->Driver.SamplerUniformChange)
--- a/src/mesa/state_tracker/st_program.c
+++ b/src/mesa/state_tracker/st_program.c
@@ -666,7 +666,7 @@ lower_ucp(struct st_context *st,
                                           PIPE_CAP_NIR_COMPACT_ARRAYS);
      bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;

-      gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
+      gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
      for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
         if (use_eye) {
            clipplane_state[i][0] = STATE_CLIPPLANE;
Author	SHA1	Message	Date
Dylan Baker	08169ff176	bump version for 20.3.0 final	2020-12-03 09:56:23 -08:00
Dylan Baker	b9d2f63f2f	docs: add release notes for 20.3.0	2020-12-03 09:49:56 -08:00
Eric Anholt	404c440015	freedreno: Break out of "should we free the entry" loop once we've freed. Fixes a use-after-free of the state on the next iteration when it was probably just destroyed. Fixes: `6de01faac5` ("freedreno/a6xx: invalidate tex state cache entries on rebind") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7695> (cherry picked from commit `d3c67d7e7e`) Conflicts: src/gallium/drivers/freedreno/a6xx/fd6_texture.c	2020-12-02 15:10:44 -08:00
Eric Anholt	01f9a5e822	gallium: Fix leak of currently bound UBOs at CSO context destruction. Cc: mesa-stable Reviewed-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7695> (cherry picked from commit `daaf5f1d18`)	2020-12-02 15:08:50 -08:00
Eric Anholt	faeaa8171e	gallivm: Fix max const buffer count. llvmpipe was reporting 32 max const buffers, while sizing its arrays to 16 according to gallivm's #define. Fixes: `1d35f77228` ("gallivm,llvmpipe,draw: Support multiple constant buffers.") Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7695> (cherry picked from commit `d90107a200`)	2020-12-02 15:08:47 -08:00
Eric Anholt	8258c5867a	gallium: Fix leak of bound SSBOs at CSO context destruction. Cc: mesa-stable Reviewed-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7695> (cherry picked from commit `634384e4a0`)	2020-12-02 15:08:46 -08:00
Eric Anholt	d0d3a589a1	freedreno: Fix leak of u_transfer_helper. Fixes: `d1465b3aee` ("freedreno: use u_transfer_helper") Reviewed-by: Rob Clark <robdclark@chromium.org> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7695> (cherry picked from commit `9cc8fc7bbc`)	2020-12-02 15:08:45 -08:00
Eric Anholt	0ee24d08fa	gallium: Fix leak of the merged driconf options. Fixes: `8a05d6ffc6` ("driconf: Make the driver's declarations be structs instead of XML.") Reviewed-by: Rob Clark <robdclark@chromium.org> Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7695> (cherry picked from commit `0626e3a950`)	2020-12-02 15:08:44 -08:00
Erik Faye-Lund	531b15c22a	zink: do not require VK_KHR_external_memory This is only required for the DRI-path. For the swrast code-path, we don't need this. We also don't need to explicitly test for it in the DRI-path, because we test for KHR_external_memory_fd, which depends on KHR_external_memory. So no implementation will expose the former without the latter. Fixes: `f1432fd3e2` ("zink: generate extension infrastructure using a python script") Reviewed-by: Hoe Hao Cheng <haochengho12907@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7882> (cherry picked from commit `59a6705cce`)	2020-12-02 15:08:44 -08:00
Daniel Stone	cddf1bf5f9	freedreno: Add missing dependency to build computerator depends on ir3_parser.h, which is a generated file, but this dependency is not expressed in the build. Fixes: `1e8808a4a0` ("freedreno/ir3: refactor out helper to compile shader from asm") Signed-off-by: Daniel Stone <daniels@collabora.com> Reviewed-by: Erik Faye-Lund <erik.faye-lund@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7870> (cherry picked from commit `9eee405484`)	2020-12-02 15:08:42 -08:00
James Park	12c40b0477	radv: Fix leak in radv_amdgpu_winsys_destroy() Fixes: `fa97061a82` ("radv/winsys: Add binary syncobj ABI changes for timeline semaphores.") Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7866> (cherry picked from commit `6ec0953e22`)	2020-12-02 15:08:41 -08:00
Marek Olšák	1578dde278	radeonsi: disable WGP mode on gfx10.3 to prevent hangs I think that reducing the CU mask to 1 disabled CU per SA broke the WGP mode on VanGogh, causing a hang. To be sure, disable it on all chips. Fixes: `9538b9a68e` - radeonsi: add support for Sienna Cichlid Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7721> (cherry picked from commit `69c927debe`)	2020-12-02 15:08:41 -08:00
Marek Olšák	2b9da404c1	radeonsi: fix a nasty bug in si_pm4.c If you did: si_pm4_set_reg(pm4, reg, val0); si_pm4_cmd_add(pm4, val1); si_pm4 set_reg(pm4, reg + 4, val1); it wrote val0 to reg, val1 to reg + 4, and val2 to reg + 8. This fixes it by clearing last_opcode in si_pm4_cmd_add, so that si_pm4_set_reg doesn't try to combine set_reg calls across si_pm4_cmd_add. Fixes: `da78d50bc8` - radeonsi: make si_pm4_cmd_begin/end static and simplify all usages Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7721> (cherry picked from commit `0d4f1dcd15`)	2020-12-02 15:08:40 -08:00
Marek Olšák	b7659c5ed7	radeonsi: fix a memory leak in si_create_dcc_retile_cs Fixes: `1f21396431` - radeonsi: add support for displayable DCC for multi-RB chips Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7721> (cherry picked from commit `e64d5cc4d6`)	2020-12-02 15:08:39 -08:00
Dylan Baker	4e39cdaa50	.pick_status.json: Update to `d3c67d7e7e`	2020-12-02 15:08:38 -08:00
Kenneth Graunke	bcbc20bf6e	nir/algebraic: Avoid creating new fp64 ops when using softfp64 In commit `00b28a50b2`, Marek extended a number of optimizations that had been 32-bit specific to work on other bit-sizes. Most optimizations preserve the data type across the transformation. In other words, an optimization which generates e.g. fp64 operations only does so when the source expression also contains fp64 operations. These transformations are fine with respect to lowering, because we will lower away all expressions that would trigger the search portion of the expression, and so we'd never apply those rules. However, a few of the rules create new operations that run afoul of lowering passes. For example, ('bcsel', a, 1.0, 0.0) => ('b2f', a) where the result is a double would simply be a selection between two different 64-bit constants. The replacement expression, on the other hand, involves a nir_op_b2f64 ALU operation. If we're run after nir_lower_doubles, then it may not be legal to generate such an expression anymore (at least without running lowering again, which we don't do today). Regressions due to this are blocking the 20.3 release, so for now, we take the easy route and simply disallow those few rules when doing full softfp64 lowering, which fixes the immediate problem. But it doesn't solve the long-term problem in an extensible manner. In the future, we may want to add a `lowered_alu_ops` bitfield to the NIR shader, and as lowering passes are run, mark them as taboo. Then, we could have each algebraic transformation track which operations it creates in the replacement expression. With both of those in place, nir_replace_instr could compare the transformation's list of ALU ops against `lowered_alu_ops` and implicitly skip rules that generate forbidden ALU operations. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3504 Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7841> (cherry picked from commit `531843cf2e`)	2020-12-01 10:06:17 -08:00
Timur Kristóf	7a4f33b1f5	aco: Fix NGG GS assert failure from the WG scan. There was a temp which was defined in a branch but used outside, without a phi. Fixes: `62b5012ec3` Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Rhys Perry <pendingchaos02@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7817> (cherry picked from commit `94f8cb29ee`)	2020-12-01 09:12:41 -08:00
Christian Gmeiner	f4a059eb98	etnaviv/drm: fix evil-twin etna_drm_table_lock Cc: <mesa-stable@lists.freedesktop.org> Signed-off-by: Christian Gmeiner <christian.gmeiner@gmail.com> Reviewed-by: Lucas Stach <l.stach@pengutronix.de> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7832> (cherry picked from commit `aad0c7c6b8`)	2020-12-01 09:12:41 -08:00
Marek Olšák	b52bb0dc5c	st/mesa: fix uninitialized/random clip plane state vars in lower_ucp Fixes: `584f27326c` - st/mesa: factor ucp-lowering logic into helper Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6946> (cherry picked from commit `fdd3a448ae`)	2020-12-01 09:12:40 -08:00
Marek Olšák	770f46c781	mesa: call FLUSH_VERTICES before changing sampler uniforms Fixes: `9545139ce5` "mesa: skip FLUSH_VERTICES() if no samplers were changed" Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6946> (cherry picked from commit `0a2117bc9e`)	2020-12-01 09:12:40 -08:00
Bas Nieuwenhuizen	ded8b21e2a	radv: Deal with unused attachments in mip flush Fixes: `4cce4d22a7` ("radv: Fix a hang on CB change by adding flushes.") Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7813> (cherry picked from commit `aed8d30b50`)	2020-12-01 09:12:39 -08:00
Gert Wollny	db68b97f25	r600/sfn: fix component loading from fixed buffer ID Fixes: `18e9781714` r600/sfn: Use load_ubo_vec4 lowering pass Signed-off-by: Gert Wollny <gert.wollny@collabora.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7715> (cherry picked from commit `c41d0d0c3d`)	2020-12-01 09:12:38 -08:00
Bas Nieuwenhuizen	5355ff3744	radv: Fix a hang on CB change by adding flushes. This workaround fixes a hang while loading a renderdoc trace for me. Since the workload does 1 mip per cmdbuffer it is quite hard to confirm what exactly the conditions for the hang are but this is the most restrictive set I found and it corresponds to a workaround in AMDVLK as well. CC: mesa-stable Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7210> (cherry picked from commit `4cce4d22a7`)	2020-12-01 09:12:38 -08:00
Erik Faye-Lund	25f01a7d4b	docs: document new zink-flag We forgot to document this previously, so let's add it now. Fixes: `feb9462bb1` ("zink: Added inbuilt debug logging from the VK_LAYER_LUNARG_standard_validation layer.") Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7802> (cherry picked from commit `8564715253`)	2020-12-01 09:12:37 -08:00
Rhys Perry	3e2a3b402b	nir: fix sampler_lod_parameters_pan indices Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Fixes: `deaebc82a7` "nir: Add load_sampler_lod_paramaters_pan intrinsic" Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6587> (cherry picked from commit `f6407b9b7d`)	2020-12-01 09:12:36 -08:00
Danylo Piliaiev	1891d30031	freedreno/a6xx: Fix typo in height alignment calculation in a6xx layout Fixes KHR-GL31.texture_size_promotion.functional Fixes: `e49748521e` Signed-off-by: Danylo Piliaiev <dpiliaiev@igalia.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7792> (cherry picked from commit `a569ffeb83`)	2020-12-01 09:12:36 -08:00
Lionel Landwerlin	bf76f2b21c	anv: fix descriptor pool leak in VMA object Quoting the spec : "When a pool is destroyed, all descriptor sets allocated from the pool are implicitly freed and become invalid. Descriptor sets allocated from a given pool do not need to be freed before destroying that descriptor pool." This implies we might leak nodes allocated in the vma object. Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com> Fixes: `0a6d2593b8` ("anv: Allocate descriptor buffers from the BO cache") Reviewed-by: Jason Ekstrand <jason@jlekstrand.net> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7796> (cherry picked from commit `5d55ca9c30`)	2020-12-01 09:12:35 -08:00
Timur Kristóf	099804865a	aco/optimizer: Only set scc_needed when it is actually needed. Not every p_cbranch uses the SCC, but our optimizer thought so. Fixes: `8a32f57fff` Signed-off-by: Timur Kristóf <timur.kristof@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7677> (cherry picked from commit `8bd3fefb74`)	2020-12-01 09:12:34 -08:00
Andrii Simiklit	6e72700f84	glsl: avoid an out-of-bound access while setting up a location for variable It fixes the following valgrind issue: ==141996== Invalid read of size 4 ==141996== at 0x61F8806: gl_nir_link_uniforms (gl_nir_link_uniforms.c:1788) ==141996== by 0x60F17AA: gl_nir_link_glsl (gl_nir_linker.c:672) ==141996== by 0x5C1AEDF: st_link_nir (st_glsl_to_nir.cpp:739) ==141996== by 0x5C15574: st_link_shader (st_glsl_to_ir.cpp:172) ==141996== by 0x5C673B0: _mesa_glsl_link_shader (ir_to_mesa.cpp:3117) ==141996== by 0x5E7B61C: link_program (shaderapi.c:1311) ==141996== by 0x5E7B61C: link_program_error (shaderapi.c:1419) ==141996== by 0x5E7CF8A: _mesa_LinkProgram (shaderapi.c:1911) ==141996== by 0x4923D13: stub_glLinkProgram (piglit-dispatch-gen.c:33956) ==141996== by 0x1142C0: link_and_use_shaders (shader_runner.c:1636) ==141996== by 0x1205A6: init_test (shader_runner.c:5347) ==141996== by 0x121555: piglit_init (shader_runner.c:5725) ==141996== by 0x4991C84: run_test (piglit_fbo_framework.c:50) It can be reproduced on `iris` using the following piglit test: instance-matching-shader-storage-blocks-align-qualifier-mismatch.shader_test Closes: #3818 Reviewed-by: Ian Romanick <ian.d.romanick@intel.com> Fixes: `47c35823` ("glsl: fix up location setting for variables pointing to a UBO's base") Signed-off-by: Mike Blumenkrantz <michael.blumenkrantz@gmail.com> Signed-off-by: Andrii Simiklit <andrii.simiklit@globallogic.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7376> (cherry picked from commit `a941618a1f`)	2020-12-01 09:12:34 -08:00
Pierre-Eric Pelloux-Prayer	0eabed30a4	radeonsi/gfx10: flush gfx cs on ngg -> legacy transition with a sequence like this: glClear(STENCIL) glBeginTransformFeedback() ... glEndTransformFeedback() glClear(STENCIL) The second clear sometimes may produce an unexpected result. Calling si_flush_gfx_cs() when doing ngg -> legacy transition seems to be a valid workaround (both for the synthetic reproducer and the real Blender bug). Using flush flags or events (BOTTOM_OF_PIPE_TS, RESET_TO_LOWEST_VGT) didn't help. Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/2941 Cc: mesa-stable Reviewed-by: Marek Olšák <marek.olsak@amd.com> Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7750> (cherry picked from commit `0b3bd7c516`)	2020-12-01 09:12:33 -08:00
Rhys Perry	bfb711b209	nir/unsigned_upper_bound: fix buffer overflow in search_phi_bcsel It should only recurse if there's enough space to add the phi sources. Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Daniel Schürmann <daniel@schuermann.dev> Fixes: `72ac3f6026` ("nir: add nir_unsigned_upper_bound and nir_addition_might_overflow") Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7748> (cherry picked from commit `65fbae16e3`)	2020-12-01 09:12:32 -08:00
Rhys Perry	9c6e0fb476	aco: fix v_mul_hi_u32_u24 format Signed-off-by: Rhys Perry <pendingchaos02@gmail.com> Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com> Fixes: `57c152af9c` ("aco: select v_mul_{hi}_u32_u24 for 24-bit multiplications") Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/3874 Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7759> (cherry picked from commit `cf0b54cdc1`)	2020-12-01 09:12:32 -08:00
Dylan Baker	7770f9a27d	.pick_status.json: Update to `89f6b72f19`	2020-12-01 09:12:31 -08:00