Compare commits
33 Commits
mesa-20.3.
...
mesa-20.3.
Author | SHA1 | Date | |
---|---|---|---|
|
08169ff176 | ||
|
b9d2f63f2f | ||
|
404c440015 | ||
|
01f9a5e822 | ||
|
faeaa8171e | ||
|
8258c5867a | ||
|
d0d3a589a1 | ||
|
0ee24d08fa | ||
|
531b15c22a | ||
|
cddf1bf5f9 | ||
|
12c40b0477 | ||
|
1578dde278 | ||
|
2b9da404c1 | ||
|
b7659c5ed7 | ||
|
4e39cdaa50 | ||
|
bcbc20bf6e | ||
|
7a4f33b1f5 | ||
|
f4a059eb98 | ||
|
b52bb0dc5c | ||
|
770f46c781 | ||
|
ded8b21e2a | ||
|
db68b97f25 | ||
|
5355ff3744 | ||
|
25f01a7d4b | ||
|
3e2a3b402b | ||
|
1891d30031 | ||
|
bf76f2b21c | ||
|
099804865a | ||
|
6e72700f84 | ||
|
0eabed30a4 | ||
|
bfb711b209 | ||
|
9c6e0fb476 | ||
|
7770f9a27d |
3726
.pick_status.json
3726
.pick_status.json
File diff suppressed because it is too large
Load Diff
@@ -103,6 +103,8 @@ variable:
|
||||
current directory, and print a message with the filename to stderr.
|
||||
``tgsi``
|
||||
Print the TGSI form of TGSI shaders to stderr.
|
||||
``validation``
|
||||
Dump Validation layer output.
|
||||
|
||||
Vulkan Validation Layers
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
4695
docs/relnotes/20.3.0.rst
Normal file
4695
docs/relnotes/20.3.0.rst
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,19 +0,0 @@
|
||||
GL 4.5 on llvmpipe
|
||||
GL_INTEL_blackhole_render on radeonsi
|
||||
GL_NV_copy_depth_to_color for NIR
|
||||
GL_NV_half_float
|
||||
GL_NV_shader_atomic_int64 on radeonsi
|
||||
EGL_KHR_swap_buffers_with_damage on X11 (DRI3)
|
||||
VK_PRESENT_MODE_FIFO_RELAXED on X11
|
||||
GLX_EXT_swap_control for DRI2 and DRI3
|
||||
GLX_EXT_swap_control_tear for DRI3
|
||||
VK_KHR_copy_commands2 on RADV
|
||||
VK_KHR_shader_terminate_invocation on RADV
|
||||
NGG GS support in ACO
|
||||
VK_KHR_shader_terminate_invocation on ANV
|
||||
driconf: add glx_extension_override
|
||||
driconf: add indirect_gl_extension_override
|
||||
VK_AMD_mixed_attachment_samples on RADV (GFX6-GFX7).
|
||||
GL_MESA_pack_invert on r100 and vieux
|
||||
GL_ANGLE_pack_reverse_row_order
|
||||
VK_EXT_shader_image_atomic_int64 on RADV
|
@@ -1745,7 +1745,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
|
||||
|
||||
Temp tmp = dst.regClass() == s1 ? bld.tmp(v1) : dst;
|
||||
if (src0_ub <= 0xffffff && src1_ub <= 0xffffff) {
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_u32_u24, tmp);
|
||||
emit_vop2_instruction(ctx, instr, aco_opcode::v_mul_hi_u32_u24, tmp, true);
|
||||
} else {
|
||||
emit_vop3a_instruction(ctx, instr, aco_opcode::v_mul_hi_u32, tmp);
|
||||
}
|
||||
@@ -11358,11 +11358,11 @@ std::pair<Temp, Temp> ngg_gs_workgroup_reduce_and_scan(isel_context *ctx, Temp s
|
||||
|
||||
/* Determine if the current lane is the first. */
|
||||
Temp is_first_lane = bld.copy(bld.def(bld.lm), Operand(1u, ctx->program->wave_size == 64));
|
||||
Temp wave_id_in_tg = wave_id_in_threadgroup(ctx);
|
||||
begin_divergent_if_then(ctx, &ic, is_first_lane);
|
||||
bld.reset(ctx->block);
|
||||
|
||||
/* The first lane of each wave stores the result of its subgroup reduction to LDS (NGG scratch). */
|
||||
Temp wave_id_in_tg = wave_id_in_threadgroup(ctx);
|
||||
Temp wave_id_in_tg_lds_addr = bld.vop2_e64(aco_opcode::v_lshlrev_b32, bld.def(v1), Operand(2u), wave_id_in_tg);
|
||||
store_lds(ctx, 4u, as_vgpr(ctx, sg_reduction), 0x1u, wave_id_in_tg_lds_addr, ctx->ngg_gs_scratch_addr, 4u);
|
||||
|
||||
|
@@ -3090,7 +3090,9 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||
/* Mark SCC needed, so the uniform boolean transformation won't swap the definitions when it isn't beneficial */
|
||||
if (instr->format == Format::PSEUDO_BRANCH &&
|
||||
instr->operands.size() &&
|
||||
instr->operands[0].isTemp()) {
|
||||
instr->operands[0].isTemp() &&
|
||||
instr->operands[0].isFixed() &&
|
||||
instr->operands[0].physReg() == scc) {
|
||||
ctx.info[instr->operands[0].tempId()].set_scc_needed();
|
||||
return;
|
||||
} else if ((instr->opcode == aco_opcode::s_cselect_b64 ||
|
||||
|
@@ -2235,6 +2235,74 @@ radv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
/* GFX9+ metadata cache flushing workaround. metadata cache coherency is
|
||||
* broken if the CB caches data of multiple mips of the same image at the
|
||||
* same time.
|
||||
*
|
||||
* Insert some flushes to avoid this.
|
||||
*/
|
||||
static void
|
||||
radv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
|
||||
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
|
||||
bool color_mip_changed = false;
|
||||
|
||||
/* Entire workaround is not applicable before GFX9 */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
|
||||
return;
|
||||
|
||||
if (!framebuffer)
|
||||
return;
|
||||
|
||||
for (int i = 0; i < subpass->color_count; ++i) {
|
||||
int idx = subpass->color_attachments[i].attachment;
|
||||
if (idx == VK_ATTACHMENT_UNUSED)
|
||||
continue;
|
||||
|
||||
struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
|
||||
|
||||
if ((radv_image_has_CB_metadata(iview->image) ||
|
||||
radv_image_has_dcc(iview->image)) &&
|
||||
cmd_buffer->state.cb_mip[i] != iview->base_mip)
|
||||
color_mip_changed = true;
|
||||
|
||||
cmd_buffer->state.cb_mip[i] = iview->base_mip;
|
||||
}
|
||||
|
||||
if (color_mip_changed) {
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
|
||||
}
|
||||
}
|
||||
|
||||
/* This function does the flushes for mip changes if the levels are not zero for
|
||||
* all render targets. This way we can assume at the start of the next cmd_buffer
|
||||
* that rendering to mip 0 doesn't need any flushes. As that is the most common
|
||||
* case that saves some flushes. */
|
||||
static void
|
||||
radv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
/* Entire workaround is not applicable before GFX9 */
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class < GFX9)
|
||||
return;
|
||||
|
||||
bool need_color_mip_flush = false;
|
||||
for (unsigned i = 0; i < 8; ++i) {
|
||||
if (cmd_buffer->state.cb_mip[i]) {
|
||||
need_color_mip_flush = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (need_color_mip_flush) {
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
|
||||
}
|
||||
|
||||
memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
@@ -4074,6 +4142,8 @@ VkResult radv_EndCommandBuffer(
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
radv_emit_mip_change_flush_default(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX6)
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
|
||||
@@ -4653,6 +4723,8 @@ void radv_CmdExecuteCommands(
|
||||
|
||||
assert(commandBufferCount > 0);
|
||||
|
||||
radv_emit_mip_change_flush_default(primary);
|
||||
|
||||
/* Emit pending flushes on primary prior to executing secondary */
|
||||
si_emit_cache_flush(primary);
|
||||
|
||||
@@ -4685,6 +4757,7 @@ void radv_CmdExecuteCommands(
|
||||
* has been recorded without a framebuffer, otherwise
|
||||
* fast color/depth clears can't work.
|
||||
*/
|
||||
radv_emit_fb_mip_change_flush(primary);
|
||||
radv_emit_framebuffer_state(primary);
|
||||
}
|
||||
|
||||
@@ -5292,6 +5365,10 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
|
||||
return;
|
||||
}
|
||||
|
||||
/* Need to apply this workaround early as it can set flush flags. */
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
|
||||
radv_emit_fb_mip_change_flush(cmd_buffer);
|
||||
|
||||
/* Use optimal packet order based on whether we need to sync the
|
||||
* pipeline.
|
||||
*/
|
||||
|
@@ -1389,6 +1389,8 @@ struct radv_cmd_state {
|
||||
uint32_t num_layout_transitions;
|
||||
bool pending_sqtt_barrier_end;
|
||||
enum rgp_flush_bits sqtt_flush_bits;
|
||||
|
||||
uint8_t cb_mip[MAX_RTS];
|
||||
};
|
||||
|
||||
struct radv_cmd_pool {
|
||||
|
@@ -166,6 +166,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
|
||||
amdgpu_cs_destroy_syncobj(ws->dev, ws->syncobj[i]);
|
||||
free(ws->syncobj);
|
||||
|
||||
pthread_mutex_destroy(&ws->syncobj_lock);
|
||||
u_rwlock_destroy(&ws->global_bo_list_lock);
|
||||
ac_addrlib_destroy(ws->addrlib);
|
||||
amdgpu_device_deinitialize(ws->dev);
|
||||
|
@@ -1776,19 +1776,21 @@ gl_nir_link_uniforms(struct gl_context *ctx,
|
||||
break;
|
||||
}
|
||||
assert(found);
|
||||
} else
|
||||
var->data.location = location;
|
||||
} else {
|
||||
/* this is the base block offset */
|
||||
location = buffer_block_index;
|
||||
var->data.location = buffer_block_index;
|
||||
location = 0;
|
||||
}
|
||||
assert(buffer_block_index >= 0);
|
||||
const struct gl_uniform_block *const block =
|
||||
&blocks[buffer_block_index];
|
||||
assert(location != -1);
|
||||
assert(location >= 0 && location < block->NumUniforms);
|
||||
|
||||
const struct gl_uniform_buffer_variable *const ubo_var =
|
||||
&block->Uniforms[location];
|
||||
|
||||
state.offset = ubo_var->Offset;
|
||||
var->data.location = location;
|
||||
}
|
||||
|
||||
/* Check if the uniform has been processed already for
|
||||
|
@@ -964,7 +964,7 @@ load("raw_output_pan", [1], [BASE], [CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# Loads the sampler paramaters <min_lod, max_lod, lod_bias>
|
||||
# src[] = { sampler_index }
|
||||
load("sampler_lod_parameters_pan", [1], [CAN_ELIMINATE, CAN_REORDER])
|
||||
load("sampler_lod_parameters_pan", [1], flags=[CAN_ELIMINATE, CAN_REORDER])
|
||||
|
||||
# R600 specific instrincs
|
||||
#
|
||||
|
@@ -984,13 +984,22 @@ optimizations.extend([
|
||||
(('ieq(is_not_used_by_if)', a, False), ('inot', 'a')),
|
||||
(('bcsel', a, True, False), a),
|
||||
(('bcsel', a, False, True), ('inot', a)),
|
||||
(('bcsel', a, 1.0, 0.0), ('b2f', a)),
|
||||
(('bcsel', a, 0.0, 1.0), ('b2f', ('inot', a))),
|
||||
(('bcsel', a, -1.0, -0.0), ('fneg', ('b2f', a))),
|
||||
(('bcsel', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
|
||||
(('bcsel', True, b, c), b),
|
||||
(('bcsel', False, b, c), c),
|
||||
|
||||
(('bcsel@16', a, 1.0, 0.0), ('b2f', a)),
|
||||
(('bcsel@16', a, 0.0, 1.0), ('b2f', ('inot', a))),
|
||||
(('bcsel@16', a, -1.0, -0.0), ('fneg', ('b2f', a))),
|
||||
(('bcsel@16', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
|
||||
(('bcsel@32', a, 1.0, 0.0), ('b2f', a)),
|
||||
(('bcsel@32', a, 0.0, 1.0), ('b2f', ('inot', a))),
|
||||
(('bcsel@32', a, -1.0, -0.0), ('fneg', ('b2f', a))),
|
||||
(('bcsel@32', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a)))),
|
||||
(('bcsel@64', a, 1.0, 0.0), ('b2f', a), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
|
||||
(('bcsel@64', a, 0.0, 1.0), ('b2f', ('inot', a)), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
|
||||
(('bcsel@64', a, -1.0, -0.0), ('fneg', ('b2f', a)), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
|
||||
(('bcsel@64', a, -0.0, -1.0), ('fneg', ('b2f', ('inot', a))), '!(options->lower_doubles_options & nir_lower_fp64_full_software)'),
|
||||
|
||||
(('bcsel', a, b, b), b),
|
||||
(('~fcsel', a, b, b), b),
|
||||
|
||||
|
@@ -1102,6 +1102,7 @@ static uint64_t mul_clamp(uint32_t a, uint32_t b)
|
||||
return a * b;
|
||||
}
|
||||
|
||||
/* recursively gather at most "buf_size" phi/bcsel sources */
|
||||
static unsigned
|
||||
search_phi_bcsel(nir_ssa_scalar scalar, nir_ssa_scalar *buf, unsigned buf_size, struct set *visited)
|
||||
{
|
||||
@@ -1112,15 +1113,17 @@ search_phi_bcsel(nir_ssa_scalar scalar, nir_ssa_scalar *buf, unsigned buf_size,
|
||||
if (scalar.def->parent_instr->type == nir_instr_type_phi) {
|
||||
nir_phi_instr *phi = nir_instr_as_phi(scalar.def->parent_instr);
|
||||
unsigned num_sources_left = exec_list_length(&phi->srcs);
|
||||
unsigned total_added = 0;
|
||||
nir_foreach_phi_src(src, phi) {
|
||||
unsigned added = search_phi_bcsel(
|
||||
(nir_ssa_scalar){src->src.ssa, 0}, buf + total_added, buf_size - num_sources_left, visited);
|
||||
buf_size -= added;
|
||||
total_added += added;
|
||||
num_sources_left--;
|
||||
if (buf_size >= num_sources_left) {
|
||||
unsigned total_added = 0;
|
||||
nir_foreach_phi_src(src, phi) {
|
||||
unsigned added = search_phi_bcsel(
|
||||
(nir_ssa_scalar){src->src.ssa, 0}, buf + total_added, buf_size - num_sources_left, visited);
|
||||
buf_size -= added;
|
||||
total_added += added;
|
||||
num_sources_left--;
|
||||
}
|
||||
return total_added;
|
||||
}
|
||||
return total_added;
|
||||
}
|
||||
|
||||
if (nir_ssa_scalar_is_alu(scalar)) {
|
||||
|
@@ -28,7 +28,6 @@
|
||||
#include "etnaviv_drmif.h"
|
||||
|
||||
void _etna_bo_del(struct etna_bo *bo);
|
||||
extern pthread_mutex_t etna_drm_table_lock;
|
||||
|
||||
static void add_bucket(struct etna_bo_cache *cache, int size)
|
||||
{
|
||||
|
@@ -30,8 +30,6 @@
|
||||
#include "etnaviv_priv.h"
|
||||
#include "etnaviv_drmif.h"
|
||||
|
||||
static pthread_mutex_t etna_drm_table_lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
|
||||
struct etna_device *etna_device_new(int fd)
|
||||
{
|
||||
struct etna_device *dev = calloc(sizeof(*dev), 1);
|
||||
|
@@ -50,6 +50,8 @@
|
||||
#include "etnaviv_drmif.h"
|
||||
#include "drm-uapi/etnaviv_drm.h"
|
||||
|
||||
extern pthread_mutex_t etna_drm_table_lock;
|
||||
|
||||
struct etna_bo_bucket {
|
||||
uint32_t size;
|
||||
struct list_head list;
|
||||
|
@@ -23,6 +23,7 @@ computerator_files = [
|
||||
'ir3_asm.c',
|
||||
'main.c',
|
||||
freedreno_xml_header_files,
|
||||
ir3_parser[1],
|
||||
]
|
||||
|
||||
computerator = executable(
|
||||
|
@@ -211,7 +211,7 @@ fdl6_layout(struct fdl_layout *layout,
|
||||
* may not be. note this only matters if last level is linear
|
||||
*/
|
||||
if (level == mip_levels - 1)
|
||||
height = align(nblocksy, 4);
|
||||
nblocksy = align(nblocksy, 4);
|
||||
|
||||
slice->offset = offset + layout->size;
|
||||
|
||||
|
@@ -371,6 +371,7 @@ void cso_destroy_context( struct cso_context *ctx )
|
||||
|
||||
{
|
||||
static struct pipe_sampler_view *views[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { NULL };
|
||||
static struct pipe_shader_buffer ssbos[PIPE_MAX_SHADER_BUFFERS] = { 0 };
|
||||
static void *zeros[PIPE_MAX_SAMPLERS] = { NULL };
|
||||
struct pipe_screen *scr = ctx->pipe->screen;
|
||||
enum pipe_shader_type sh;
|
||||
@@ -379,14 +380,25 @@ void cso_destroy_context( struct cso_context *ctx )
|
||||
PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS);
|
||||
int maxview = scr->get_shader_param(scr, sh,
|
||||
PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS);
|
||||
int maxssbo = scr->get_shader_param(scr, sh,
|
||||
PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
|
||||
int maxcb = scr->get_shader_param(scr, sh,
|
||||
PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
|
||||
assert(maxsam <= PIPE_MAX_SAMPLERS);
|
||||
assert(maxview <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
|
||||
assert(maxssbo <= PIPE_MAX_SHADER_BUFFERS);
|
||||
if (maxsam > 0) {
|
||||
ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros);
|
||||
}
|
||||
if (maxview > 0) {
|
||||
ctx->pipe->set_sampler_views(ctx->pipe, sh, 0, maxview, views);
|
||||
}
|
||||
if (maxssbo > 0) {
|
||||
ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0);
|
||||
}
|
||||
for (int i = 0; i < maxcb; i++) {
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -397,17 +409,13 @@ void cso_destroy_context( struct cso_context *ctx )
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_VERTEX, 0, NULL);
|
||||
if (ctx->has_geometry_shader) {
|
||||
ctx->pipe->bind_gs_state(ctx->pipe, NULL);
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_GEOMETRY, 0, NULL);
|
||||
}
|
||||
if (ctx->has_tessellation) {
|
||||
ctx->pipe->bind_tcs_state(ctx->pipe, NULL);
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_CTRL, 0, NULL);
|
||||
ctx->pipe->bind_tes_state(ctx->pipe, NULL);
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_TESS_EVAL, 0, NULL);
|
||||
}
|
||||
if (ctx->has_compute_shader) {
|
||||
ctx->pipe->bind_compute_state(ctx->pipe, NULL);
|
||||
ctx->pipe->set_constant_buffer(ctx->pipe, PIPE_SHADER_COMPUTE, 0, NULL);
|
||||
}
|
||||
ctx->pipe->bind_vertex_elements_state( ctx->pipe, NULL );
|
||||
|
||||
|
@@ -110,7 +110,7 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
return LP_MAX_TGSI_CONST_BUFFER_SIZE;
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
return PIPE_MAX_CONSTANT_BUFFERS;
|
||||
return LP_MAX_TGSI_CONST_BUFFERS;
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return LP_MAX_TGSI_TEMPS;
|
||||
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
|
||||
|
@@ -120,6 +120,7 @@ pipe_loader_load_options(struct pipe_loader_device *dev)
|
||||
driParseOptionInfo(&dev->option_info, merged_driconf, merged_count);
|
||||
driParseConfigFiles(&dev->option_cache, &dev->option_info, 0,
|
||||
dev->driver_name, NULL, NULL, 0, NULL, 0);
|
||||
free((void *)merged_driconf);
|
||||
}
|
||||
|
||||
char *
|
||||
|
@@ -484,6 +484,7 @@ fd6_rebind_resource(struct fd_context *ctx, struct fd_resource *rsc)
|
||||
if (rsc->seqno == state->key.view[i].rsc_seqno) {
|
||||
fd6_texture_state_destroy(entry->data);
|
||||
_mesa_hash_table_remove(fd6_ctx->tex_cache, entry);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -162,6 +162,8 @@ fd_screen_destroy(struct pipe_screen *pscreen)
|
||||
|
||||
simple_mtx_destroy(&screen->lock);
|
||||
|
||||
u_transfer_helper_destroy(pscreen->transfer_helper);
|
||||
|
||||
if (screen->compiler)
|
||||
ir3_compiler_destroy(screen->compiler);
|
||||
|
||||
|
@@ -867,68 +867,75 @@ bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
|
||||
auto bufid = nir_src_as_const_value(instr->src[0]);
|
||||
auto buf_offset = nir_src_as_const_value(instr->src[1]);
|
||||
|
||||
if (bufid) {
|
||||
if (buf_offset) {
|
||||
int buf_cmp = nir_intrinsic_component(instr);
|
||||
AluInstruction *ir = nullptr;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
assert(cmp < 4);
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
return true;
|
||||
|
||||
} else {
|
||||
return load_uniform_indirect(instr, from_nir(instr->src[1], 0, 0), 0, bufid->u32 + 1);
|
||||
}
|
||||
} else {
|
||||
if (buf_offset) {
|
||||
int buf_cmp = nir_intrinsic_component(instr);
|
||||
AluInstruction *ir = nullptr;
|
||||
auto kc_id = from_nir(instr->src[0], 0);
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
return true;
|
||||
}
|
||||
if (!buf_offset) {
|
||||
/* TODO: if buf_offset is constant then this can also be solved by using the CF indes
|
||||
* on the ALU block, and this would probably make sense when there are more then one
|
||||
* loads with the same buffer ID. */
|
||||
PValue bufid = from_nir(instr->src[0], 0, 0);
|
||||
|
||||
PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
|
||||
GPRVector trgt;
|
||||
std::array<int, 4> swz = {7,7,7,7};
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
trgt.set_reg_i(i, from_nir(instr->dest, i));
|
||||
swz[i] = i + nir_intrinsic_component(instr);
|
||||
for (unsigned i = 0; i < 4; ++i) {
|
||||
if (i < nir_dest_num_components(instr->dest)) {
|
||||
trgt.set_reg_i(i, from_nir(instr->dest, i));
|
||||
swz[i] = i + nir_intrinsic_component(instr);
|
||||
} else {
|
||||
trgt.set_reg_i(i, from_nir(instr->dest, 7));
|
||||
}
|
||||
}
|
||||
|
||||
auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
|
||||
1, bufid, bim_zero);
|
||||
FetchInstruction *ir;
|
||||
if (bufid) {
|
||||
ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
|
||||
1, nullptr, bim_none);
|
||||
} else {
|
||||
PValue bufid = from_nir(instr->src[0], 0, 0);
|
||||
ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
|
||||
1, bufid, bim_zero);
|
||||
}
|
||||
ir->set_dest_swizzle(swz);
|
||||
|
||||
emit_instruction(ir);
|
||||
m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
if (bufid) {
|
||||
int buf_cmp = nir_intrinsic_component(instr);
|
||||
AluInstruction *ir = nullptr;
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
assert(cmp < 4);
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
return true;
|
||||
|
||||
} else {
|
||||
int buf_cmp = nir_intrinsic_component(instr);
|
||||
AluInstruction *ir = nullptr;
|
||||
auto kc_id = from_nir(instr->src[0], 0);
|
||||
for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
|
||||
int cmp = buf_cmp + i;
|
||||
auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id));
|
||||
if (instr->dest.is_ssa)
|
||||
load_preloaded_value(instr->dest, i, u);
|
||||
else {
|
||||
ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
|
||||
emit_instruction(ir);
|
||||
}
|
||||
}
|
||||
if (ir)
|
||||
ir->set_flag(alu_last_instr);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
|
||||
|
@@ -38,6 +38,7 @@ void si_pm4_cmd_add(struct si_pm4_state *state, uint32_t dw)
|
||||
{
|
||||
assert(state->ndw < SI_PM4_MAX_DW);
|
||||
state->pm4[state->ndw++] = dw;
|
||||
state->last_opcode = -1;
|
||||
}
|
||||
|
||||
static void si_pm4_cmd_end(struct si_pm4_state *state, bool predicate)
|
||||
@@ -76,13 +77,15 @@ void si_pm4_set_reg(struct si_pm4_state *state, unsigned reg, uint32_t val)
|
||||
|
||||
reg >>= 2;
|
||||
|
||||
assert(state->ndw + 2 <= SI_PM4_MAX_DW);
|
||||
|
||||
if (opcode != state->last_opcode || reg != (state->last_reg + 1)) {
|
||||
si_pm4_cmd_begin(state, opcode);
|
||||
si_pm4_cmd_add(state, reg);
|
||||
state->pm4[state->ndw++] = reg;
|
||||
}
|
||||
|
||||
state->last_reg = reg;
|
||||
si_pm4_cmd_add(state, val);
|
||||
state->pm4[state->ndw++] = val;
|
||||
si_pm4_cmd_end(state, false);
|
||||
}
|
||||
|
||||
|
@@ -278,6 +278,7 @@ void *si_create_dcc_retile_cs(struct pipe_context *ctx)
|
||||
|
||||
void *cs = ctx->create_compute_state(ctx, &state);
|
||||
ureg_destroy(ureg);
|
||||
ureg_free_tokens(state.prog);
|
||||
return cs;
|
||||
}
|
||||
|
||||
|
@@ -1147,7 +1147,10 @@ static void gfx10_shader_ngg(struct si_screen *sscreen, struct si_shader *shader
|
||||
pm4, R_00B228_SPI_SHADER_PGM_RSRC1_GS,
|
||||
S_00B228_VGPRS((shader->config.num_vgprs - 1) / (wave_size == 32 ? 8 : 4)) |
|
||||
S_00B228_FLOAT_MODE(shader->config.float_mode) | S_00B228_DX10_CLAMP(1) |
|
||||
S_00B228_MEM_ORDERED(1) | S_00B228_WGP_MODE(1) |
|
||||
S_00B228_MEM_ORDERED(1) |
|
||||
/* Disable the WGP mode on gfx10.3 because it can hang. (it happened on VanGogh)
|
||||
* Let's disable it on all chips that disable exactly 1 CU per SA for GS. */
|
||||
S_00B228_WGP_MODE(sscreen->info.chip_class == GFX10) |
|
||||
S_00B228_GS_VGPR_COMP_CNT(gs_vgpr_comp_cnt));
|
||||
si_pm4_set_reg(pm4, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
|
||||
S_00B22C_SCRATCH_EN(shader->config.scratch_bytes_per_wave > 0) |
|
||||
@@ -3026,8 +3029,13 @@ bool si_update_ngg(struct si_context *sctx)
|
||||
* VGT_FLUSH is also emitted at the beginning of IBs when legacy GS ring
|
||||
* pointers are set.
|
||||
*/
|
||||
if ((sctx->chip_class == GFX10 || sctx->family == CHIP_SIENNA_CICHLID) && !new_ngg)
|
||||
if ((sctx->chip_class == GFX10 || sctx->family == CHIP_SIENNA_CICHLID) && !new_ngg) {
|
||||
sctx->flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
if (sctx->chip_class == GFX10) {
|
||||
/* Workaround for https://gitlab.freedesktop.org/mesa/mesa/-/issues/2941 */
|
||||
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
sctx->ngg = new_ngg;
|
||||
sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
|
||||
|
@@ -58,7 +58,7 @@ import sys
|
||||
def EXTENSIONS():
|
||||
return [
|
||||
Extension("VK_KHR_maintenance1", required=True),
|
||||
Extension("VK_KHR_external_memory", required=True),
|
||||
Extension("VK_KHR_external_memory"),
|
||||
Extension("VK_KHR_external_memory_fd"),
|
||||
Extension("VK_KHR_vulkan_memory_model"),
|
||||
Extension("VK_EXT_conditional_rendering", alias="cond_render", have_feature="conditionalRendering"),
|
||||
|
@@ -909,8 +909,10 @@ void anv_DestroyDescriptorPool(
|
||||
anv_descriptor_set_layout_unref(device, set->layout);
|
||||
}
|
||||
|
||||
if (pool->bo)
|
||||
if (pool->bo) {
|
||||
util_vma_heap_finish(&pool->bo_heap);
|
||||
anv_device_release_bo(device, pool->bo);
|
||||
}
|
||||
anv_state_stream_finish(&pool->surface_state_stream);
|
||||
|
||||
vk_object_base_finish(&pool->base);
|
||||
|
@@ -1181,6 +1181,10 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
|
||||
/* Mark this bindless sampler as bound to a texture unit.
|
||||
*/
|
||||
if (sampler->unit != value || !sampler->bound) {
|
||||
if (!flushed) {
|
||||
FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM);
|
||||
flushed = true;
|
||||
}
|
||||
sampler->unit = value;
|
||||
changed = true;
|
||||
}
|
||||
@@ -1188,6 +1192,10 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
|
||||
sh->Program->sh.HasBoundBindlessSampler = true;
|
||||
} else {
|
||||
if (sh->Program->SamplerUnits[unit] != value) {
|
||||
if (!flushed) {
|
||||
FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM);
|
||||
flushed = true;
|
||||
}
|
||||
sh->Program->SamplerUnits[unit] = value;
|
||||
changed = true;
|
||||
}
|
||||
@@ -1195,11 +1203,6 @@ _mesa_uniform(GLint location, GLsizei count, const GLvoid *values,
|
||||
}
|
||||
|
||||
if (changed) {
|
||||
if (!flushed) {
|
||||
FLUSH_VERTICES(ctx, _NEW_TEXTURE_OBJECT | _NEW_PROGRAM);
|
||||
flushed = true;
|
||||
}
|
||||
|
||||
struct gl_program *const prog = sh->Program;
|
||||
_mesa_update_shader_textures_used(shProg, prog);
|
||||
if (ctx->Driver.SamplerUniformChange)
|
||||
|
@@ -666,7 +666,7 @@ lower_ucp(struct st_context *st,
|
||||
PIPE_CAP_NIR_COMPACT_ARRAYS);
|
||||
bool use_eye = st->ctx->_Shader->CurrentProgram[MESA_SHADER_VERTEX] != NULL;
|
||||
|
||||
gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH];
|
||||
gl_state_index16 clipplane_state[MAX_CLIP_PLANES][STATE_LENGTH] = {{0}};
|
||||
for (int i = 0; i < MAX_CLIP_PLANES; ++i) {
|
||||
if (use_eye) {
|
||||
clipplane_state[i][0] = STATE_CLIPPLANE;
|
||||
|
Reference in New Issue
Block a user