Compare commits
39 Commits
mesa-17.3.
...
mesa-17.3.
Author | SHA1 | Date | |
---|---|---|---|
|
80f5f279b3 | ||
|
2adb90f40a | ||
|
2416223f1d | ||
|
9a7ffd93c2 | ||
|
9be5e0cf7c | ||
|
d774fe8ef9 | ||
|
cc2b5d6163 | ||
|
ecf2e33760 | ||
|
3561eabf87 | ||
|
5fe6c5fdfb | ||
|
467414c5cd | ||
|
d16639bdc3 | ||
|
b8296fc451 | ||
|
f82c02f1f1 | ||
|
419551fb27 | ||
|
8b628a174e | ||
|
ad6bcb6978 | ||
|
89dbb6e0a5 | ||
|
27819a4f23 | ||
|
6b16c99fb6 | ||
|
bce2836561 | ||
|
8a39fdd7e6 | ||
|
f8dca92cec | ||
|
925aa7723b | ||
|
f378cd34d9 | ||
|
b0e50e1e9c | ||
|
b9d5aab984 | ||
|
0a6e595f58 | ||
|
23cb876377 | ||
|
d37962a5be | ||
|
6028fa7999 | ||
|
3a31b5c00a | ||
|
590b9b794c | ||
|
b8ecf45c0d | ||
|
6aea554308 | ||
|
23539c0fa1 | ||
|
b5bdc36880 | ||
|
7295b97d61 | ||
|
3a67ca681b |
@@ -3,3 +3,22 @@ ab0809e5529725bd0af6f7b6ce06415020b9d32e meson: fix strtof locale support check
|
||||
|
||||
# fixes: The commit addresses Meson which is explicitly disabled for 17.3
|
||||
44fbbd6fd07e5784b05e08e762e54b6c71f95ab1 util: add mesa-sha1 test to meson
|
||||
|
||||
# stable: The commit is causing a regression
|
||||
# (https://bugs.freedesktop.org/show_bug.cgi?id=103626)
|
||||
18fde36ced4279f2577097a1a7d31b55f2f5f141 intel/fs: Use the original destination region for int MUL lowering
|
||||
|
||||
# stable: The commit addresses earlier commit 6132992cdb which did not land in
|
||||
# branch
|
||||
3d2b157e23c9d66df97d59be6efd1098878cc110 i965/fs: Use UW types when using V immediates
|
||||
|
||||
# extra: The commit just references a fix for an additional change in its v2.
|
||||
c1ff99fd70cd2ceb2cac4723e4fd5efc93834746 main: Clear shader program data whenever ProgramBinary is called
|
||||
|
||||
# fixes: The commit addresses earlier commits 40a01c9a0ef and 8d745abc009 which
|
||||
# did not land in branch
|
||||
9b0223046668593deb9c0be0b557994bb5218788 egl: pass the dri2_dpy to the $plat_teardown functions
|
||||
|
||||
# fixes: The commit addresses earlier commit d50937f137 which did not land in
|
||||
# branch
|
||||
78a8b73e7d45f55ced98a148b26247d91f4e0171 vulkan/wsi: free cmd pools
|
||||
|
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
f997e80f14c385f9a2ba827c2b74aebf1b7426712ca4a81c631ef9f78e437bf4 mesa-17.3.2.tar.gz
|
||||
e2844a13f2d6f8f24bee65804a51c42d8dc6ae9c36cff7ee61d0940e796d64c6 mesa-17.3.2.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
150
docs/relnotes/17.3.3.html
Normal file
150
docs/relnotes/17.3.3.html
Normal file
@@ -0,0 +1,150 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.3.3 Release Notes / January 18, 2018</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.3.3 is a bug fix release which fixes bugs found since the 17.3.2 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 17.3.3 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104214">Bug 104214</a> - Dota crashes when switching from game to desktop</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104492">Bug 104492</a> - Compute Shader: Wrong alignment when assigning struct value to structured SSBO</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=104551">Bug 104551</a> - Check if Mako templates for Python are installed</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Alex Smith (3):</p>
|
||||
<ul>
|
||||
<li>anv: Add missing unlock in anv_scratch_pool_alloc</li>
|
||||
<li>anv: Take write mask into account in has_color_buffer_write_enabled</li>
|
||||
<li>anv: Make sure state on primary is correct after CmdExecuteCommands</li>
|
||||
</ul>
|
||||
|
||||
<p>Andres Gomez (1):</p>
|
||||
<ul>
|
||||
<li>anv: Import mako templates only during execution of anv_extensions</li>
|
||||
</ul>
|
||||
|
||||
<p>Bas Nieuwenhuizen (11):</p>
|
||||
<ul>
|
||||
<li>radv: Invert condition for all samples identical during resolve.</li>
|
||||
<li>radv: Flush caches before subpass resolve.</li>
|
||||
<li>radv: Fix fragment resolve destination offset.</li>
|
||||
<li>radv: Use correct framebuffer size for partial FS resolves.</li>
|
||||
<li>radv: Always use fragment resolve if dest uses DCC.</li>
|
||||
<li>Revert "radv/gfx9: fix block compression texture views."</li>
|
||||
<li>radv: Use correct HTILE expanded words.</li>
|
||||
<li>radv: Allow writing 0 scissors.</li>
|
||||
<li>ac/nir: Handle loading data from compact arrays.</li>
|
||||
<li>radv: Invalidate L1 for VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT.</li>
|
||||
<li>ac/nir: Sanitize location_frac for local variables.</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (8):</p>
|
||||
<ul>
|
||||
<li>radv: fix events on compute queues.</li>
|
||||
<li>radv: fix pipeline statistics end query on compute queue</li>
|
||||
<li>radv/gfx9: fix 3d image to image transfers on compute queues.</li>
|
||||
<li>radv/gfx9: fix 3d image clears on compute queues</li>
|
||||
<li>radv/gfx9: fix buffer to image for 3d images on compute queues</li>
|
||||
<li>radv/gfx9: fix block compression texture views.</li>
|
||||
<li>radv/gfx9: use a bigger hammer to flush cb/db caches.</li>
|
||||
<li>radv/gfx9: use correct swizzle parameter to work out border swizzle.</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (1):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 17.3.2</li>
|
||||
</ul>
|
||||
|
||||
<p>Florian Will (1):</p>
|
||||
<ul>
|
||||
<li>glsl: Respect std430 layout in lower_buffer_access</li>
|
||||
</ul>
|
||||
|
||||
<p>Juan A. Suarez Romero (6):</p>
|
||||
<ul>
|
||||
<li>cherry-ignore: intel/fs: Use the original destination region for int MUL lowering</li>
|
||||
<li>cherry-ignore: i965/fs: Use UW types when using V immediates</li>
|
||||
<li>cherry-ignore: main: Clear shader program data whenever ProgramBinary is called</li>
|
||||
<li>cherry-ignore: egl: pass the dri2_dpy to the $plat_teardown functions</li>
|
||||
<li>cherry-ignore: vulkan/wsi: free cmd pools</li>
|
||||
<li>Update version to 17.3.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Józef Kucia (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: fix alpha-to-coverage if color writes are disabled</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>i965: Require space for MI_BATCHBUFFER_END.</li>
|
||||
<li>i965: Torch public intel_batchbuffer_emit_dword/float helpers.</li>
|
||||
</ul>
|
||||
|
||||
<p>Lucas Stach (1):</p>
|
||||
<ul>
|
||||
<li>etnaviv: disable in-place resolve for non-supertiled surfaces</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Iglesias Gonsálvez (1):</p>
|
||||
<ul>
|
||||
<li>anv: VkDescriptorSetLayoutBinding can have descriptorCount == 0</li>
|
||||
</ul>
|
||||
|
||||
<p>Thomas Hellstrom (1):</p>
|
||||
<ul>
|
||||
<li>loader/dri3: Avoid freeing renderbuffers in use</li>
|
||||
</ul>
|
||||
|
||||
<p>Tim Rowley (1):</p>
|
||||
<ul>
|
||||
<li>swr/rast: fix invalid sign masks in avx512 simdlib code</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -3073,6 +3073,7 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
|
||||
LLVMValueRef indir_index;
|
||||
LLVMValueRef ret;
|
||||
unsigned const_index;
|
||||
unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
|
||||
bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
|
||||
instr->variables[0]->var->data.mode == nir_var_shader_in;
|
||||
get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
|
||||
@@ -3098,13 +3099,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
|
||||
count -= chan / 4;
|
||||
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
|
||||
&ctx->ac, ctx->abi->inputs + idx + chan, count,
|
||||
4, false, true);
|
||||
stride, false, true);
|
||||
|
||||
values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
|
||||
tmp_vec,
|
||||
indir_index, "");
|
||||
} else
|
||||
values[chan] = ctx->abi->inputs[idx + chan + const_index * 4];
|
||||
values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
|
||||
}
|
||||
break;
|
||||
case nir_var_local:
|
||||
@@ -3115,13 +3116,13 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
|
||||
count -= chan / 4;
|
||||
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
|
||||
&ctx->ac, ctx->locals + idx + chan, count,
|
||||
4, true, true);
|
||||
stride, true, true);
|
||||
|
||||
values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
|
||||
tmp_vec,
|
||||
indir_index, "");
|
||||
} else {
|
||||
values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * 4], "");
|
||||
values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
|
||||
}
|
||||
}
|
||||
break;
|
||||
@@ -3143,14 +3144,14 @@ static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
|
||||
count -= chan / 4;
|
||||
LLVMValueRef tmp_vec = ac_build_gather_values_extended(
|
||||
&ctx->ac, ctx->outputs + idx + chan, count,
|
||||
4, true, true);
|
||||
stride, true, true);
|
||||
|
||||
values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
|
||||
tmp_vec,
|
||||
indir_index, "");
|
||||
} else {
|
||||
values[chan] = LLVMBuildLoad(ctx->ac.builder,
|
||||
ctx->outputs[idx + chan + const_index * 4],
|
||||
ctx->outputs[idx + chan + const_index * stride],
|
||||
"");
|
||||
}
|
||||
}
|
||||
@@ -5446,6 +5447,7 @@ setup_locals(struct ac_nir_context *ctx,
|
||||
nir_foreach_variable(variable, &func->impl->locals) {
|
||||
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
|
||||
variable->data.driver_location = ctx->num_locals * 4;
|
||||
variable->data.location_frac = 0;
|
||||
ctx->num_locals += attrib_count;
|
||||
}
|
||||
ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
|
||||
|
@@ -1919,11 +1919,11 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
|
||||
switch ((VkAccessFlagBits)(1 << b)) {
|
||||
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
|
||||
case VK_ACCESS_INDEX_READ_BIT:
|
||||
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
|
||||
break;
|
||||
case VK_ACCESS_UNIFORM_READ_BIT:
|
||||
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
|
||||
break;
|
||||
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
|
||||
case VK_ACCESS_SHADER_READ_BIT:
|
||||
case VK_ACCESS_TRANSFER_READ_BIT:
|
||||
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
|
||||
@@ -3583,7 +3583,8 @@ void radv_CmdEndRenderPass(
|
||||
|
||||
/*
|
||||
* For HTILE we have the following interesting clear words:
|
||||
* 0x0000030f: Uncompressed.
|
||||
* 0x0000030f: Uncompressed for depth+stencil HTILE.
|
||||
* 0x0000000f: Uncompressed for depth only HTILE.
|
||||
* 0xfffffff0: Clear depth to 1.0
|
||||
* 0x00000000: Clear depth to 0.0
|
||||
*/
|
||||
@@ -3632,7 +3633,8 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
|
||||
radv_initialize_htile(cmd_buffer, image, range, 0);
|
||||
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
|
||||
radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
|
||||
radv_initialize_htile(cmd_buffer, image, range, 0xffffffff);
|
||||
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0x30f : 0xf;
|
||||
radv_initialize_htile(cmd_buffer, image, range, clear_value);
|
||||
} else if (radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
|
||||
!radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
|
||||
VkImageSubresourceRange local_range = *range;
|
||||
@@ -3834,7 +3836,7 @@ static void write_event(struct radv_cmd_buffer *cmd_buffer,
|
||||
si_cs_emit_write_event_eop(cs,
|
||||
cmd_buffer->state.predicating,
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
false,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
1, va, 2, value);
|
||||
|
||||
|
@@ -344,7 +344,7 @@ static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
|
||||
static unsigned gfx9_border_color_swizzle(const enum vk_swizzle swizzle[4])
|
||||
{
|
||||
unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
|
||||
|
||||
@@ -449,7 +449,7 @@ si_make_texture_descriptor(struct radv_device *device,
|
||||
state[7] = 0;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
|
||||
unsigned bc_swizzle = gfx9_border_color_swizzle(swizzle);
|
||||
|
||||
/* Depth is the the last accessible layer on Gfx9.
|
||||
* The hw doesn't need to know the total number of layers.
|
||||
|
@@ -533,7 +533,7 @@ void radv_meta_build_resolve_shader_core(nir_builder *b,
|
||||
nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
|
||||
nir_builder_instr_insert(b, &tex_all_same->instr);
|
||||
|
||||
nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
|
||||
nir_ssa_def *all_same = nir_ieq(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
|
||||
nir_if *if_stmt = nir_if_create(b->shader);
|
||||
if_stmt->condition = nir_src_for_ssa(all_same);
|
||||
nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
|
||||
|
@@ -259,19 +259,20 @@ radv_device_finish_meta_itob_state(struct radv_device *device)
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_nir_btoi_compute_shader(struct radv_device *dev)
|
||||
build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
|
||||
{
|
||||
nir_builder b;
|
||||
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
|
||||
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
|
||||
false,
|
||||
false,
|
||||
GLSL_TYPE_FLOAT);
|
||||
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
|
||||
const struct glsl_type *img_type = glsl_sampler_type(dim,
|
||||
false,
|
||||
false,
|
||||
GLSL_TYPE_FLOAT);
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
|
||||
b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_cs");
|
||||
b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
|
||||
b.shader->info.cs.local_size[0] = 16;
|
||||
b.shader->info.cs.local_size[1] = 16;
|
||||
b.shader->info.cs.local_size[2] = 1;
|
||||
@@ -296,16 +297,16 @@ build_nir_btoi_compute_shader(struct radv_device *dev)
|
||||
|
||||
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
|
||||
nir_intrinsic_set_base(offset, 0);
|
||||
nir_intrinsic_set_range(offset, 12);
|
||||
nir_intrinsic_set_range(offset, 16);
|
||||
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
offset->num_components = 2;
|
||||
nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
|
||||
offset->num_components = is_3d ? 3 : 2;
|
||||
nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
|
||||
nir_builder_instr_insert(&b, &offset->instr);
|
||||
|
||||
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
|
||||
nir_intrinsic_set_base(stride, 0);
|
||||
nir_intrinsic_set_range(stride, 12);
|
||||
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
|
||||
nir_intrinsic_set_range(stride, 16);
|
||||
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
|
||||
stride->num_components = 1;
|
||||
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
|
||||
nir_builder_instr_insert(&b, &stride->instr);
|
||||
@@ -353,9 +354,10 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
struct radv_shader_module cs = { .nir = NULL };
|
||||
|
||||
cs.nir = build_nir_btoi_compute_shader(device);
|
||||
|
||||
struct radv_shader_module cs_3d = { .nir = NULL };
|
||||
cs.nir = build_nir_btoi_compute_shader(device, false);
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9)
|
||||
cs_3d.nir = build_nir_btoi_compute_shader(device, true);
|
||||
/*
|
||||
* two descriptors one for the image being sampled
|
||||
* one for the buffer being written.
|
||||
@@ -395,7 +397,7 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 12},
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
|
||||
};
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
|
||||
@@ -429,9 +431,33 @@ radv_device_init_meta_btoi_state(struct radv_device *device)
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = radv_shader_module_to_handle(&cs_3d),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = pipeline_shader_stage_3d,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.btoi.img_p_layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(radv_device_to_handle(device),
|
||||
radv_pipeline_cache_to_handle(&device->meta_state.cache),
|
||||
1, &vk_pipeline_info_3d, NULL,
|
||||
&device->meta_state.btoi.pipeline_3d);
|
||||
ralloc_free(cs_3d.nir);
|
||||
}
|
||||
ralloc_free(cs.nir);
|
||||
|
||||
return VK_SUCCESS;
|
||||
fail:
|
||||
ralloc_free(cs_3d.nir);
|
||||
ralloc_free(cs.nir);
|
||||
return result;
|
||||
}
|
||||
@@ -448,22 +474,25 @@ radv_device_finish_meta_btoi_state(struct radv_device *device)
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->btoi.pipeline, &state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->btoi.pipeline_3d, &state->alloc);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_nir_itoi_compute_shader(struct radv_device *dev)
|
||||
build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
|
||||
{
|
||||
nir_builder b;
|
||||
const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
|
||||
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
|
||||
const struct glsl_type *buf_type = glsl_sampler_type(dim,
|
||||
false,
|
||||
false,
|
||||
GLSL_TYPE_FLOAT);
|
||||
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
|
||||
const struct glsl_type *img_type = glsl_sampler_type(dim,
|
||||
false,
|
||||
false,
|
||||
GLSL_TYPE_FLOAT);
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
|
||||
b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs");
|
||||
b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
|
||||
b.shader->info.cs.local_size[0] = 16;
|
||||
b.shader->info.cs.local_size[1] = 16;
|
||||
b.shader->info.cs.local_size[2] = 1;
|
||||
@@ -488,18 +517,18 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
|
||||
|
||||
nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
|
||||
nir_intrinsic_set_base(src_offset, 0);
|
||||
nir_intrinsic_set_range(src_offset, 16);
|
||||
nir_intrinsic_set_range(src_offset, 24);
|
||||
src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
src_offset->num_components = 2;
|
||||
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
|
||||
src_offset->num_components = is_3d ? 3 : 2;
|
||||
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
|
||||
nir_builder_instr_insert(&b, &src_offset->instr);
|
||||
|
||||
nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
|
||||
nir_intrinsic_set_base(dst_offset, 0);
|
||||
nir_intrinsic_set_range(dst_offset, 16);
|
||||
dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
|
||||
dst_offset->num_components = 2;
|
||||
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
|
||||
nir_intrinsic_set_range(dst_offset, 24);
|
||||
dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
|
||||
dst_offset->num_components = is_3d ? 3 : 2;
|
||||
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
|
||||
nir_builder_instr_insert(&b, &dst_offset->instr);
|
||||
|
||||
nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
|
||||
@@ -507,15 +536,15 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
|
||||
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
|
||||
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
|
||||
tex->sampler_dim = GLSL_SAMPLER_DIM_2D;
|
||||
tex->sampler_dim = dim;
|
||||
tex->op = nir_texop_txf;
|
||||
tex->src[0].src_type = nir_tex_src_coord;
|
||||
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 3));
|
||||
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
|
||||
tex->src[1].src_type = nir_tex_src_lod;
|
||||
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
tex->dest_type = nir_type_float;
|
||||
tex->is_array = false;
|
||||
tex->coord_components = 2;
|
||||
tex->coord_components = is_3d ? 3 : 2;
|
||||
tex->texture = nir_deref_var_create(tex, input_img);
|
||||
tex->sampler = NULL;
|
||||
|
||||
@@ -539,9 +568,10 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
struct radv_shader_module cs = { .nir = NULL };
|
||||
|
||||
cs.nir = build_nir_itoi_compute_shader(device);
|
||||
|
||||
struct radv_shader_module cs_3d = { .nir = NULL };
|
||||
cs.nir = build_nir_itoi_compute_shader(device, false);
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9)
|
||||
cs_3d.nir = build_nir_itoi_compute_shader(device, true);
|
||||
/*
|
||||
* two descriptors one for the image being sampled
|
||||
* one for the buffer being written.
|
||||
@@ -581,7 +611,7 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
|
||||
};
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
|
||||
@@ -615,10 +645,35 @@ radv_device_init_meta_itoi_state(struct radv_device *device)
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = radv_shader_module_to_handle(&cs_3d),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = pipeline_shader_stage_3d,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.itoi.img_p_layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(radv_device_to_handle(device),
|
||||
radv_pipeline_cache_to_handle(&device->meta_state.cache),
|
||||
1, &vk_pipeline_info_3d, NULL,
|
||||
&device->meta_state.itoi.pipeline_3d);
|
||||
|
||||
ralloc_free(cs_3d.nir);
|
||||
}
|
||||
ralloc_free(cs.nir);
|
||||
|
||||
return VK_SUCCESS;
|
||||
fail:
|
||||
ralloc_free(cs.nir);
|
||||
ralloc_free(cs_3d.nir);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -634,18 +689,22 @@ radv_device_finish_meta_itoi_state(struct radv_device *device)
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->itoi.pipeline, &state->alloc);
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9)
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->itoi.pipeline_3d, &state->alloc);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_nir_cleari_compute_shader(struct radv_device *dev)
|
||||
build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
|
||||
{
|
||||
nir_builder b;
|
||||
const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
|
||||
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
|
||||
const struct glsl_type *img_type = glsl_sampler_type(dim,
|
||||
false,
|
||||
false,
|
||||
GLSL_TYPE_FLOAT);
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
|
||||
b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_cs");
|
||||
b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
|
||||
b.shader->info.cs.local_size[0] = 16;
|
||||
b.shader->info.cs.local_size[1] = 16;
|
||||
b.shader->info.cs.local_size[2] = 1;
|
||||
@@ -666,12 +725,29 @@ build_nir_cleari_compute_shader(struct radv_device *dev)
|
||||
|
||||
nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
|
||||
nir_intrinsic_set_base(clear_val, 0);
|
||||
nir_intrinsic_set_range(clear_val, 16);
|
||||
nir_intrinsic_set_range(clear_val, 20);
|
||||
clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
|
||||
clear_val->num_components = 4;
|
||||
nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
|
||||
nir_builder_instr_insert(&b, &clear_val->instr);
|
||||
|
||||
nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
|
||||
nir_intrinsic_set_base(layer, 0);
|
||||
nir_intrinsic_set_range(layer, 20);
|
||||
layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
|
||||
layer->num_components = 1;
|
||||
nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
|
||||
nir_builder_instr_insert(&b, &layer->instr);
|
||||
|
||||
nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
|
||||
|
||||
nir_ssa_def *comps[4];
|
||||
comps[0] = nir_channel(&b, global_id, 0);
|
||||
comps[1] = nir_channel(&b, global_id, 1);
|
||||
comps[2] = global_z;
|
||||
comps[3] = nir_imm_int(&b, 0);
|
||||
global_id = nir_vec(&b, comps, 4);
|
||||
|
||||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
|
||||
store->src[0] = nir_src_for_ssa(global_id);
|
||||
store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
|
||||
@@ -687,8 +763,10 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
|
||||
{
|
||||
VkResult result;
|
||||
struct radv_shader_module cs = { .nir = NULL };
|
||||
|
||||
cs.nir = build_nir_cleari_compute_shader(device);
|
||||
struct radv_shader_module cs_3d = { .nir = NULL };
|
||||
cs.nir = build_nir_cleari_compute_shader(device, false);
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9)
|
||||
cs_3d.nir = build_nir_cleari_compute_shader(device, true);
|
||||
|
||||
/*
|
||||
* two descriptors one for the image being sampled
|
||||
@@ -722,7 +800,7 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
|
||||
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
|
||||
};
|
||||
|
||||
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
|
||||
@@ -756,10 +834,38 @@ radv_device_init_meta_cleari_state(struct radv_device *device)
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
/* compute shader */
|
||||
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
|
||||
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.module = radv_shader_module_to_handle(&cs_3d),
|
||||
.pName = "main",
|
||||
.pSpecializationInfo = NULL,
|
||||
};
|
||||
|
||||
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
|
||||
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
|
||||
.stage = pipeline_shader_stage_3d,
|
||||
.flags = 0,
|
||||
.layout = device->meta_state.cleari.img_p_layout,
|
||||
};
|
||||
|
||||
result = radv_CreateComputePipelines(radv_device_to_handle(device),
|
||||
radv_pipeline_cache_to_handle(&device->meta_state.cache),
|
||||
1, &vk_pipeline_info_3d, NULL,
|
||||
&device->meta_state.cleari.pipeline_3d);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
ralloc_free(cs_3d.nir);
|
||||
}
|
||||
ralloc_free(cs.nir);
|
||||
return VK_SUCCESS;
|
||||
fail:
|
||||
ralloc_free(cs.nir);
|
||||
ralloc_free(cs_3d.nir);
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -775,6 +881,8 @@ radv_device_finish_meta_cleari_state(struct radv_device *device)
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->cleari.pipeline, &state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->cleari.pipeline_3d, &state->alloc);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -990,18 +1098,22 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
|
||||
create_iview(cmd_buffer, dst, &dst_view);
|
||||
btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
dst->image->type == VK_IMAGE_TYPE_3D)
|
||||
pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
|
||||
for (unsigned r = 0; r < num_rects; ++r) {
|
||||
unsigned push_constants[3] = {
|
||||
unsigned push_constants[4] = {
|
||||
rects[r].dst_x,
|
||||
rects[r].dst_y,
|
||||
src->pitch
|
||||
dst->layer,
|
||||
src->pitch,
|
||||
};
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
device->meta_state.btoi.img_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 12,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
|
||||
push_constants);
|
||||
|
||||
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
|
||||
@@ -1068,19 +1180,24 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
src->image->type == VK_IMAGE_TYPE_3D)
|
||||
pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
|
||||
for (unsigned r = 0; r < num_rects; ++r) {
|
||||
unsigned push_constants[4] = {
|
||||
unsigned push_constants[6] = {
|
||||
rects[r].src_x,
|
||||
rects[r].src_y,
|
||||
src->layer,
|
||||
rects[r].dst_x,
|
||||
rects[r].dst_y,
|
||||
dst->layer,
|
||||
};
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
device->meta_state.itoi.img_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
|
||||
push_constants);
|
||||
|
||||
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
|
||||
@@ -1128,19 +1245,24 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
|
||||
create_iview(cmd_buffer, dst, &dst_iview);
|
||||
cleari_bind_descriptors(cmd_buffer, &dst_iview);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
dst->image->type == VK_IMAGE_TYPE_3D)
|
||||
pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
|
||||
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
|
||||
|
||||
unsigned push_constants[4] = {
|
||||
unsigned push_constants[5] = {
|
||||
clear_color->uint32[0],
|
||||
clear_color->uint32[1],
|
||||
clear_color->uint32[2],
|
||||
clear_color->uint32[3],
|
||||
dst->layer,
|
||||
};
|
||||
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
device->meta_state.cleari.img_p_layout,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
|
||||
push_constants);
|
||||
|
||||
radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
|
||||
|
@@ -300,11 +300,10 @@ static void radv_pick_resolve_method_images(struct radv_image *src_image,
|
||||
enum radv_resolve_method *method)
|
||||
|
||||
{
|
||||
if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
|
||||
if (dest_image->surface.num_dcc_levels > 0)
|
||||
*method = RESOLVE_FRAGMENT;
|
||||
else
|
||||
*method = RESOLVE_COMPUTE;
|
||||
if (dest_image->surface.num_dcc_levels > 0) {
|
||||
*method = RESOLVE_FRAGMENT;
|
||||
} else if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
|
||||
*method = RESOLVE_COMPUTE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -487,6 +487,14 @@ radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
|
||||
if (!subpass->has_resolve)
|
||||
return;
|
||||
|
||||
/* Resolves happen before the end-of-subpass barriers get executed,
|
||||
* so we have to make the attachment shader-readable */
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
|
||||
RADV_CMD_FLAG_INV_GLOBAL_L2 |
|
||||
RADV_CMD_FLAG_INV_VMEM_L1;
|
||||
|
||||
for (uint32_t i = 0; i < subpass->color_count; ++i) {
|
||||
VkAttachmentReference src_att = subpass->color_attachments[i];
|
||||
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
|
||||
|
@@ -407,8 +407,8 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
|
||||
|
||||
unsigned push_constants[2] = {
|
||||
src_offset->x,
|
||||
src_offset->y,
|
||||
src_offset->x - dest_offset->x,
|
||||
src_offset->y - dest_offset->y,
|
||||
};
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
device->meta_state.resolve_fragment.p_layout,
|
||||
@@ -540,8 +540,8 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
|
||||
.pAttachments = (VkImageView[]) {
|
||||
radv_image_view_to_handle(&dest_iview),
|
||||
},
|
||||
.width = extent.width,
|
||||
.height = extent.height,
|
||||
.width = extent.width + dstOffset.x,
|
||||
.height = extent.height + dstOffset.y,
|
||||
.layers = 1
|
||||
}, &cmd_buffer->pool->alloc, &fb);
|
||||
|
||||
@@ -604,6 +604,16 @@ radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
|
||||
RADV_META_SAVE_CONSTANTS |
|
||||
RADV_META_SAVE_DESCRIPTORS);
|
||||
|
||||
/* Resolves happen before the end-of-subpass barriers get executed,
|
||||
* so we have to make the attachment shader-readable */
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_CB_META |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_DB |
|
||||
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META |
|
||||
RADV_CMD_FLAG_INV_GLOBAL_L2 |
|
||||
RADV_CMD_FLAG_INV_VMEM_L1;
|
||||
|
||||
for (uint32_t i = 0; i < subpass->color_count; ++i) {
|
||||
VkAttachmentReference src_att = subpass->color_attachments[i];
|
||||
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
|
||||
|
@@ -433,16 +433,19 @@ struct radv_meta_state {
|
||||
VkPipelineLayout img_p_layout;
|
||||
VkDescriptorSetLayout img_ds_layout;
|
||||
VkPipeline pipeline;
|
||||
VkPipeline pipeline_3d;
|
||||
} btoi;
|
||||
struct {
|
||||
VkPipelineLayout img_p_layout;
|
||||
VkDescriptorSetLayout img_ds_layout;
|
||||
VkPipeline pipeline;
|
||||
VkPipeline pipeline_3d;
|
||||
} itoi;
|
||||
struct {
|
||||
VkPipelineLayout img_p_layout;
|
||||
VkDescriptorSetLayout img_ds_layout;
|
||||
VkPipeline pipeline;
|
||||
VkPipeline pipeline_3d;
|
||||
} cleari;
|
||||
|
||||
struct {
|
||||
|
@@ -1152,7 +1152,7 @@ void radv_CmdEndQuery(
|
||||
si_cs_emit_write_event_eop(cs,
|
||||
false,
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
false,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||
V_028A90_BOTTOM_OF_PIPE_TS, 0,
|
||||
1, avail_va, 0, 1);
|
||||
break;
|
||||
|
@@ -676,7 +676,8 @@ si_write_scissors(struct radeon_winsys_cs *cs, int first,
|
||||
int i;
|
||||
float scale[3], translate[3], guardband_x = INFINITY, guardband_y = INFINITY;
|
||||
const float max_range = 32767.0f;
|
||||
assert(count);
|
||||
if (!count)
|
||||
return;
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_028250_PA_SC_VPORT_SCISSOR_0_TL + first * 4 * 2, count * 2);
|
||||
for (i = 0; i < count; i++) {
|
||||
@@ -988,6 +989,11 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
if (chip_class >= GFX9 && flush_cb_db) {
|
||||
unsigned cb_db_event, tc_flags;
|
||||
|
||||
#if 0
|
||||
/* This breaks a bunch of:
|
||||
dEQP-VK.renderpass.dedicated_allocation.formats.d32_sfloat_s8_uint.input*.
|
||||
use the big hammer always.
|
||||
*/
|
||||
/* Set the CB/DB flush event. */
|
||||
switch (flush_cb_db) {
|
||||
case RADV_CMD_FLAG_FLUSH_AND_INV_CB:
|
||||
@@ -1000,7 +1006,9 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
/* both CB & DB */
|
||||
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
}
|
||||
|
||||
#else
|
||||
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
|
||||
#endif
|
||||
/* TC | TC_WB = invalidate L2 data
|
||||
* TC_MD | TC_WB = invalidate L2 metadata
|
||||
* TC | TC_WB | TC_MD = invalidate L2 data & metadata
|
||||
|
@@ -72,16 +72,22 @@ lower_buffer_access::emit_access(void *mem_ctx,
|
||||
new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL),
|
||||
field->name);
|
||||
|
||||
field_offset =
|
||||
glsl_align(field_offset,
|
||||
field->type->std140_base_alignment(row_major));
|
||||
unsigned field_align;
|
||||
if (packing == GLSL_INTERFACE_PACKING_STD430)
|
||||
field_align = field->type->std430_base_alignment(row_major);
|
||||
else
|
||||
field_align = field->type->std140_base_alignment(row_major);
|
||||
field_offset = glsl_align(field_offset, field_align);
|
||||
|
||||
emit_access(mem_ctx, is_write, field_deref, base_offset,
|
||||
deref_offset + field_offset,
|
||||
row_major, 1, packing,
|
||||
writemask_for_size(field_deref->type->vector_elements));
|
||||
|
||||
field_offset += field->type->std140_size(row_major);
|
||||
if (packing == GLSL_INTERFACE_PACKING_STD430)
|
||||
field_offset += field->type->std430_size(row_major);
|
||||
else
|
||||
field_offset += field->type->std140_size(row_major);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
@@ -126,6 +126,7 @@ etna_compile_rs_state(struct etna_context *ctx, struct compiled_rs_state *cs,
|
||||
rs->source_offset == rs->dest_offset &&
|
||||
rs->source_format == rs->dest_format &&
|
||||
rs->source_tiling == rs->dest_tiling &&
|
||||
(rs->source_tiling & ETNA_LAYOUT_BIT_SUPER) &&
|
||||
rs->source_stride == rs->dest_stride &&
|
||||
!rs->downsample_x && !rs->downsample_y &&
|
||||
!rs->swap_rb && !rs->flip &&
|
||||
|
@@ -1228,10 +1228,13 @@ static void si_shader_selector_key_hw_vs(struct si_context *sctx,
|
||||
/* Find out if PS is disabled. */
|
||||
bool ps_disabled = true;
|
||||
if (ps) {
|
||||
const struct si_state_blend *blend = sctx->queued.named.blend;
|
||||
bool alpha_to_coverage = blend && blend->alpha_to_coverage;
|
||||
bool ps_modifies_zs = ps->info.uses_kill ||
|
||||
ps->info.writes_z ||
|
||||
ps->info.writes_stencil ||
|
||||
ps->info.writes_samplemask ||
|
||||
alpha_to_coverage ||
|
||||
si_get_alpha_test_func(sctx) != PIPE_FUNC_ALWAYS;
|
||||
|
||||
unsigned ps_colormask = sctx->framebuffer.colorbuf_enabled_4bit &
|
||||
|
@@ -270,7 +270,7 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In
|
||||
{
|
||||
__mmask16 m = 0xf;
|
||||
m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)),
|
||||
_mm512_set1_epi32(0x8000000));
|
||||
_mm512_set1_epi32(0x80000000));
|
||||
return __conv(_mm512_mask_i32gather_ps(
|
||||
__conv(old),
|
||||
m,
|
||||
|
@@ -271,7 +271,7 @@ static SIMDINLINE Float SIMDCALL mask_i32gather_ps(Float old, float const* p, In
|
||||
{
|
||||
__mmask16 m = 0xff;
|
||||
m = _mm512_mask_test_epi32_mask(m, _mm512_castps_si512(__conv(mask)),
|
||||
_mm512_set1_epi32(0x8000000));
|
||||
_mm512_set1_epi32(0x80000000));
|
||||
return __conv(_mm512_mask_i32gather_ps(
|
||||
__conv(old),
|
||||
m,
|
||||
|
@@ -540,7 +540,7 @@ static SIMDINLINE uint32_t SIMDCALL movemask_pd(Double a)
|
||||
}
|
||||
static SIMDINLINE uint32_t SIMDCALL movemask_ps(Float a)
|
||||
{
|
||||
__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(0x8000000));
|
||||
__mmask16 m = _mm512_test_epi32_mask(castps_si(a), set1_epi32(0x80000000));
|
||||
return static_cast<uint32_t>(m);
|
||||
}
|
||||
|
||||
|
@@ -1108,8 +1108,10 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
|
||||
__sync_synchronize();
|
||||
if (bo->exists)
|
||||
if (bo->exists) {
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
return &bo->bo;
|
||||
}
|
||||
|
||||
const struct anv_physical_device *physical_device =
|
||||
&device->instance->physicalDevice;
|
||||
|
@@ -103,7 +103,9 @@ VkResult anv_CreateDescriptorSetLayout(
|
||||
if (binding == NULL)
|
||||
continue;
|
||||
|
||||
assert(binding->descriptorCount > 0);
|
||||
if (binding->descriptorCount == 0)
|
||||
continue;
|
||||
|
||||
#ifndef NDEBUG
|
||||
set_layout->binding[b].type = binding->descriptorType;
|
||||
#endif
|
||||
|
@@ -29,8 +29,6 @@ import copy
|
||||
import re
|
||||
import xml.etree.cElementTree as et
|
||||
|
||||
from mako.template import Template
|
||||
|
||||
MAX_API_VERSION = '1.0.57'
|
||||
|
||||
class Extension:
|
||||
@@ -158,7 +156,7 @@ def _init_exts_from_xml(xml):
|
||||
ext = ext_name_map[ext_name]
|
||||
ext.type = ext_elem.attrib['type']
|
||||
|
||||
_TEMPLATE = Template(COPYRIGHT + """
|
||||
_TEMPLATE = COPYRIGHT + """
|
||||
#include "anv_private.h"
|
||||
|
||||
#include "vk_util.h"
|
||||
@@ -256,7 +254,7 @@ VkResult anv_EnumerateDeviceExtensionProperties(
|
||||
|
||||
return vk_outarray_status(&out);
|
||||
}
|
||||
""")
|
||||
"""
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser()
|
||||
@@ -280,5 +278,7 @@ if __name__ == '__main__':
|
||||
'device_extensions': [e for e in EXTENSIONS if e.type == 'device'],
|
||||
}
|
||||
|
||||
from mako.template import Template
|
||||
|
||||
with open(args.out, 'w') as f:
|
||||
f.write(_TEMPLATE.render(**template_env))
|
||||
f.write(Template(_TEMPLATE).render(**template_env))
|
||||
|
@@ -1151,6 +1151,15 @@ genX(CmdExecuteCommands)(
|
||||
anv_cmd_buffer_add_secondary(primary, secondary);
|
||||
}
|
||||
|
||||
/* The secondary may have selected a different pipeline (3D or compute) and
|
||||
* may have changed the current L3$ configuration. Reset our tracking
|
||||
* variables to invalid values to ensure that we re-emit these in the case
|
||||
* where we do any draws or compute dispatches from the primary after the
|
||||
* secondary has returned.
|
||||
*/
|
||||
primary->state.current_pipeline = UINT32_MAX;
|
||||
primary->state.current_l3_config = NULL;
|
||||
|
||||
/* Each of the secondary command buffers will use its own state base
|
||||
* address. We need to re-emit state base address for the primary after
|
||||
* all of the secondaries are done.
|
||||
|
@@ -1356,7 +1356,8 @@ emit_3dstate_gs(struct anv_pipeline *pipeline)
|
||||
}
|
||||
|
||||
static bool
|
||||
has_color_buffer_write_enabled(const struct anv_pipeline *pipeline)
|
||||
has_color_buffer_write_enabled(const struct anv_pipeline *pipeline,
|
||||
const VkPipelineColorBlendStateCreateInfo *blend)
|
||||
{
|
||||
const struct anv_shader_bin *shader_bin =
|
||||
pipeline->shaders[MESA_SHADER_FRAGMENT];
|
||||
@@ -1365,10 +1366,15 @@ has_color_buffer_write_enabled(const struct anv_pipeline *pipeline)
|
||||
|
||||
const struct anv_pipeline_bind_map *bind_map = &shader_bin->bind_map;
|
||||
for (int i = 0; i < bind_map->surface_count; i++) {
|
||||
if (bind_map->surface_to_descriptor[i].set !=
|
||||
ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
|
||||
struct anv_pipeline_binding *binding = &bind_map->surface_to_descriptor[i];
|
||||
|
||||
if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
|
||||
continue;
|
||||
if (bind_map->surface_to_descriptor[i].index != UINT32_MAX)
|
||||
|
||||
const VkPipelineColorBlendAttachmentState *a =
|
||||
&blend->pAttachments[binding->index];
|
||||
|
||||
if (binding->index != UINT32_MAX && a->colorWriteMask != 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1377,6 +1383,7 @@ has_color_buffer_write_enabled(const struct anv_pipeline *pipeline)
|
||||
|
||||
static void
|
||||
emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass,
|
||||
const VkPipelineColorBlendStateCreateInfo *blend,
|
||||
const VkPipelineMultisampleStateCreateInfo *multisample)
|
||||
{
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
@@ -1421,7 +1428,7 @@ emit_3dstate_wm(struct anv_pipeline *pipeline, struct anv_subpass *subpass,
|
||||
if (wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
|
||||
wm_prog_data->has_side_effects ||
|
||||
wm.PixelShaderKillsPixel ||
|
||||
has_color_buffer_write_enabled(pipeline))
|
||||
has_color_buffer_write_enabled(pipeline, blend))
|
||||
wm.ThreadDispatchEnable = true;
|
||||
|
||||
if (samples > 1) {
|
||||
@@ -1545,7 +1552,8 @@ emit_3dstate_ps(struct anv_pipeline *pipeline,
|
||||
#if GEN_GEN >= 8
|
||||
static void
|
||||
emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
|
||||
struct anv_subpass *subpass)
|
||||
struct anv_subpass *subpass,
|
||||
const VkPipelineColorBlendStateCreateInfo *blend)
|
||||
{
|
||||
const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
|
||||
|
||||
@@ -1600,7 +1608,7 @@ emit_3dstate_ps_extra(struct anv_pipeline *pipeline,
|
||||
* attachments, we need to force-enable here.
|
||||
*/
|
||||
if ((wm_prog_data->has_side_effects || wm_prog_data->uses_kill) &&
|
||||
!has_color_buffer_write_enabled(pipeline))
|
||||
!has_color_buffer_write_enabled(pipeline, blend))
|
||||
ps.PixelShaderHasUAV = true;
|
||||
|
||||
#if GEN_GEN >= 9
|
||||
@@ -1730,10 +1738,11 @@ genX(graphics_pipeline_create)(
|
||||
emit_3dstate_hs_te_ds(pipeline, pCreateInfo->pTessellationState);
|
||||
emit_3dstate_gs(pipeline);
|
||||
emit_3dstate_sbe(pipeline);
|
||||
emit_3dstate_wm(pipeline, subpass, pCreateInfo->pMultisampleState);
|
||||
emit_3dstate_wm(pipeline, subpass, pCreateInfo->pColorBlendState,
|
||||
pCreateInfo->pMultisampleState);
|
||||
emit_3dstate_ps(pipeline, pCreateInfo->pColorBlendState);
|
||||
#if GEN_GEN >= 8
|
||||
emit_3dstate_ps_extra(pipeline, subpass);
|
||||
emit_3dstate_ps_extra(pipeline, subpass, pCreateInfo->pColorBlendState);
|
||||
emit_3dstate_vf_topology(pipeline);
|
||||
#endif
|
||||
emit_3dstate_vf_statistics(pipeline);
|
||||
|
@@ -205,7 +205,6 @@ void
|
||||
loader_dri3_set_swap_interval(struct loader_dri3_drawable *draw, int interval)
|
||||
{
|
||||
draw->swap_interval = interval;
|
||||
dri3_update_num_back(draw);
|
||||
}
|
||||
|
||||
/** dri3_free_render_buffer
|
||||
@@ -377,7 +376,6 @@ dri3_handle_present_event(struct loader_dri3_drawable *draw,
|
||||
draw->flipping = false;
|
||||
break;
|
||||
}
|
||||
dri3_update_num_back(draw);
|
||||
|
||||
if (draw->vtable->show_fps)
|
||||
draw->vtable->show_fps(draw, ce->ust);
|
||||
@@ -402,7 +400,8 @@ dri3_handle_present_event(struct loader_dri3_drawable *draw,
|
||||
buf->busy = 0;
|
||||
|
||||
if (buf && draw->num_back <= b && b < LOADER_DRI3_MAX_BACK &&
|
||||
draw->cur_blit_source != b) {
|
||||
draw->cur_blit_source != b &&
|
||||
!buf->busy) {
|
||||
dri3_free_render_buffer(draw, buf);
|
||||
draw->buffers[b] = NULL;
|
||||
}
|
||||
@@ -537,6 +536,7 @@ dri3_find_back(struct loader_dri3_drawable *draw)
|
||||
/* Check whether we need to reuse the current back buffer as new back.
|
||||
* In that case, wait until it's not busy anymore.
|
||||
*/
|
||||
dri3_update_num_back(draw);
|
||||
num_to_consider = draw->num_back;
|
||||
if (!loader_dri3_have_image_blit(draw) && draw->cur_blit_source != -1) {
|
||||
num_to_consider = 1;
|
||||
|
@@ -665,11 +665,14 @@ brw_finish_batch(struct brw_context *brw)
|
||||
}
|
||||
}
|
||||
|
||||
/* Mark the end of the buffer. */
|
||||
intel_batchbuffer_emit_dword(&brw->batch, MI_BATCH_BUFFER_END);
|
||||
/* Emit MI_BATCH_BUFFER_END to finish our batch. Note that execbuf2
|
||||
* requires our batch size to be QWord aligned, so we pad it out if
|
||||
* necessary by emitting an extra MI_NOOP after the end.
|
||||
*/
|
||||
intel_batchbuffer_require_space(brw, 8, brw->batch.ring);
|
||||
*brw->batch.map_next++ = MI_BATCH_BUFFER_END;
|
||||
if (USED_BATCH(brw->batch) & 1) {
|
||||
/* Round batchbuffer usage to 2 DWORDs. */
|
||||
intel_batchbuffer_emit_dword(&brw->batch, MI_NOOP);
|
||||
*brw->batch.map_next++ = MI_NOOP;
|
||||
}
|
||||
|
||||
brw->batch.no_wrap = false;
|
||||
|
@@ -78,19 +78,6 @@ static inline uint32_t float_as_int(float f)
|
||||
return fi.d;
|
||||
}
|
||||
|
||||
static inline void
|
||||
intel_batchbuffer_emit_dword(struct intel_batchbuffer *batch, GLuint dword)
|
||||
{
|
||||
*batch->map_next++ = dword;
|
||||
assert(batch->ring != UNKNOWN_RING);
|
||||
}
|
||||
|
||||
static inline void
|
||||
intel_batchbuffer_emit_float(struct intel_batchbuffer *batch, float f)
|
||||
{
|
||||
intel_batchbuffer_emit_dword(batch, float_as_int(f));
|
||||
}
|
||||
|
||||
static inline void
|
||||
intel_batchbuffer_begin(struct brw_context *brw, int n, enum brw_gpu_ring ring)
|
||||
{
|
||||
|
Reference in New Issue
Block a user