Compare commits
47 Commits
mesa-17.2.
...
mesa-17.2.
Author | SHA1 | Date | |
---|---|---|---|
|
15f23fb855 | ||
|
dd4fafcfda | ||
|
17a3e4891b | ||
|
85cc3533fa | ||
|
93bd5fbfe1 | ||
|
833f12abdf | ||
|
1fc44a3ba0 | ||
|
6ccb75e869 | ||
|
b4fec4271b | ||
|
efb9b5740e | ||
|
e44393cb23 | ||
|
81b5a6a85b | ||
|
5795e42116 | ||
|
42e8f3ccdf | ||
|
e452ed26ff | ||
|
c7c6ba44ca | ||
|
bdab7c69f5 | ||
|
47507ec1fd | ||
|
3fb5cc2637 | ||
|
42d6e7eba2 | ||
|
f60ff71b6f | ||
|
f70f216564 | ||
|
827bf79052 | ||
|
2b10b3a417 | ||
|
27e7a3a5ef | ||
|
da005f5566 | ||
|
cc8ae8842b | ||
|
3165f9877e | ||
|
1e11687029 | ||
|
ea595756f8 | ||
|
ffd8120284 | ||
|
9d65214f3d | ||
|
a57390cee0 | ||
|
8b706102eb | ||
|
7f5d86ebaa | ||
|
1c1653d7b0 | ||
|
d4100b0d09 | ||
|
bb6e5e5476 | ||
|
08d49e074d | ||
|
75f5abb82f | ||
|
f0b6298c05 | ||
|
ac04187e33 | ||
|
b1514579c2 | ||
|
dc63c715cb | ||
|
9d9ea2c5a4 | ||
|
07b5c78836 | ||
|
59f7fdb85e |
@@ -88,6 +88,10 @@ LOCAL_CFLAGS += \
|
||||
|
||||
endif
|
||||
endif
|
||||
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
||||
LOCAL_CFLAGS_arm += -DUSE_ARM_ASM
|
||||
endif
|
||||
LOCAL_CFLAGS_arm64 += -DUSE_AARCH64_ASM
|
||||
|
||||
ifneq ($(LOCAL_IS_HOST_MODULE),true)
|
||||
LOCAL_CFLAGS += -DHAVE_LIBDRM
|
||||
|
4
bin/.cherry-ignore
Normal file
4
bin/.cherry-ignore
Normal file
@@ -0,0 +1,4 @@
|
||||
# fixes: The commits are too invasive for stable. Instead the offending patches
|
||||
# causing regressions have been reverted.
|
||||
365d34540f331df57780dddf8da87235be0a6bcb mesa: correctly calculate the storage offset for i915
|
||||
de0e62e1065e2d9172acf3ab7c70bba0160125c8 st/mesa: correctly calculate the storage offset
|
26
configure.ac
26
configure.ac
@@ -773,6 +773,20 @@ if test "x$enable_asm" = xyes; then
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
aarch64)
|
||||
case "$host_os" in
|
||||
linux*)
|
||||
asm_arch=aarch64
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
arm)
|
||||
case "$host_os" in
|
||||
linux*)
|
||||
asm_arch=arm
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
case "$asm_arch" in
|
||||
@@ -792,6 +806,14 @@ if test "x$enable_asm" = xyes; then
|
||||
DEFINES="$DEFINES -DUSE_PPC64LE_ASM"
|
||||
AC_MSG_RESULT([yes, ppc64le])
|
||||
;;
|
||||
aarch64)
|
||||
DEFINES="$DEFINES -DUSE_AARCH64_ASM"
|
||||
AC_MSG_RESULT([yes, aarch64])
|
||||
;;
|
||||
arm)
|
||||
DEFINES="$DEFINES -DUSE_ARM_ASM"
|
||||
AC_MSG_RESULT([yes, arm])
|
||||
;;
|
||||
*)
|
||||
AC_MSG_RESULT([no, platform not supported])
|
||||
;;
|
||||
@@ -2551,7 +2573,7 @@ if test -n "$with_gallium_drivers"; then
|
||||
if test "x$HAVE_SWR_AVX" != xyes -a \
|
||||
"x$HAVE_SWR_AVX2" != xyes -a \
|
||||
"x$HAVE_SWR_KNL" != xyes -a \
|
||||
"x$HAVE_SWR_SKX" != xyes -a; then
|
||||
"x$HAVE_SWR_SKX" != xyes; then
|
||||
AC_MSG_ERROR([swr enabled but no swr architectures selected])
|
||||
fi
|
||||
|
||||
@@ -2735,6 +2757,8 @@ AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64)
|
||||
AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64)
|
||||
AM_CONDITIONAL(HAVE_SPARC_ASM, test "x$asm_arch" = xsparc)
|
||||
AM_CONDITIONAL(HAVE_PPC64LE_ASM, test "x$asm_arch" = xppc64le)
|
||||
AM_CONDITIONAL(HAVE_AARCH64_ASM, test "x$asm_arch" = xaarch64)
|
||||
AM_CONDITIONAL(HAVE_ARM_ASM, test "x$asm_arch" = xarm)
|
||||
|
||||
AC_SUBST([NINE_MAJOR], 1)
|
||||
AC_SUBST([NINE_MINOR], 0)
|
||||
|
@@ -109,7 +109,7 @@ static void parse_relocs(Elf *elf, Elf_Data *relocs, Elf_Data *symbols,
|
||||
}
|
||||
}
|
||||
|
||||
void ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
bool ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
struct ac_shader_binary *binary)
|
||||
{
|
||||
char *elf_buffer;
|
||||
@@ -118,6 +118,7 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
Elf_Data *symbols = NULL, *relocs = NULL;
|
||||
size_t section_str_index;
|
||||
unsigned symbol_sh_link = 0;
|
||||
bool success = true;
|
||||
|
||||
/* One of the libelf implementations
|
||||
* (http://www.mr511.de/software/english.htm) requires calling
|
||||
@@ -137,7 +138,8 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
GElf_Shdr section_header;
|
||||
if (gelf_getshdr(section, §ion_header) != §ion_header) {
|
||||
fprintf(stderr, "Failed to read ELF section header\n");
|
||||
return;
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
name = elf_strptr(elf, section_str_index, section_header.sh_name);
|
||||
if (!strcmp(name, ".text")) {
|
||||
@@ -148,6 +150,11 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
} else if (!strcmp(name, ".AMDGPU.config")) {
|
||||
section_data = elf_getdata(section, section_data);
|
||||
binary->config_size = section_data->d_size;
|
||||
if (!binary->config_size) {
|
||||
fprintf(stderr, ".AMDGPU.config is empty!\n");
|
||||
success = false;
|
||||
break;
|
||||
}
|
||||
binary->config = MALLOC(binary->config_size * sizeof(unsigned char));
|
||||
memcpy(binary->config, section_data->d_buf, binary->config_size);
|
||||
} else if (!strcmp(name, ".AMDGPU.disasm")) {
|
||||
@@ -186,6 +193,7 @@ void ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
binary->global_symbol_count = 1;
|
||||
binary->config_size_per_symbol = binary->config_size;
|
||||
}
|
||||
return success;
|
||||
}
|
||||
|
||||
const unsigned char *ac_shader_binary_config_start(
|
||||
|
@@ -83,7 +83,7 @@ struct ac_shader_config {
|
||||
* Parse the elf binary stored in \p elf_data and create a
|
||||
* ac_shader_binary object.
|
||||
*/
|
||||
void ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
bool ac_elf_read(const char *elf_data, unsigned elf_size,
|
||||
struct ac_shader_binary *binary);
|
||||
|
||||
/**
|
||||
|
@@ -1315,7 +1315,6 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
|
||||
src0 = to_float(&ctx->ac, src0);
|
||||
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
|
||||
|
||||
/* TODO SI/CIK options here */
|
||||
if (ctx->options->chip_class >= VI) {
|
||||
LLVMValueRef args[2];
|
||||
/* Check if the result is a denormal - and flush to 0 if so. */
|
||||
@@ -1329,7 +1328,22 @@ static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
|
||||
|
||||
if (ctx->options->chip_class >= VI)
|
||||
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
|
||||
|
||||
else {
|
||||
/* for SI/CIK */
|
||||
/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
|
||||
* so compare the result and flush to 0 if it's smaller.
|
||||
*/
|
||||
LLVMValueRef temp, cond2;
|
||||
temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
|
||||
ctx->f32, result);
|
||||
cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
|
||||
LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
|
||||
temp, "");
|
||||
cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
|
||||
temp, ctx->f32zero, "");
|
||||
cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
|
||||
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@@ -1007,6 +1007,8 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
|
||||
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
|
||||
@@ -1043,7 +1045,6 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */
|
||||
radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */
|
||||
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, ds->db_htile_surface);
|
||||
}
|
||||
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
|
||||
@@ -2233,8 +2234,11 @@ VkResult radv_EndCommandBuffer(
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER)
|
||||
if (cmd_buffer->queue_family_index != RADV_QUEUE_TRANSFER) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == SI)
|
||||
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
|
||||
si_emit_cache_flush(cmd_buffer);
|
||||
}
|
||||
|
||||
if (!cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs) ||
|
||||
cmd_buffer->record_fail)
|
||||
|
@@ -3079,9 +3079,13 @@ radv_initialise_color_surface(struct radv_device *device,
|
||||
format != V_028C70_COLOR_24_8) |
|
||||
S_028C70_NUMBER_TYPE(ntype) |
|
||||
S_028C70_ENDIAN(endian);
|
||||
if (iview->image->info.samples > 1)
|
||||
if (iview->image->fmask.size)
|
||||
cb->cb_color_info |= S_028C70_COMPRESSION(1);
|
||||
if ((iview->image->info.samples > 1) && iview->image->fmask.size) {
|
||||
cb->cb_color_info |= S_028C70_COMPRESSION(1);
|
||||
if (device->physical_device->rad_info.chip_class == SI) {
|
||||
unsigned fmask_bankh = util_logbase2(iview->image->fmask.bank_height);
|
||||
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
|
||||
}
|
||||
}
|
||||
|
||||
if (iview->image->cmask.size &&
|
||||
!(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
|
||||
|
@@ -205,7 +205,6 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
|
||||
{
|
||||
uint64_t gpu_address = image->bo ? device->ws->buffer_get_va(image->bo) + image->offset : 0;
|
||||
uint64_t va = gpu_address;
|
||||
unsigned pitch = base_level_info->nblk_x * block_width;
|
||||
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
|
||||
uint64_t meta_va = 0;
|
||||
if (chip_class >= GFX9) {
|
||||
@@ -221,9 +220,6 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
|
||||
state[0] |= image->surface.u.legacy.tile_swizzle;
|
||||
state[1] &= C_008F14_BASE_ADDRESS_HI;
|
||||
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
|
||||
state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
|
||||
is_stencil));
|
||||
state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
|
||||
|
||||
if (chip_class >= VI) {
|
||||
state[6] &= C_008F28_COMPRESSION_EN;
|
||||
@@ -559,10 +555,11 @@ radv_query_opaque_metadata(struct radv_device *device,
|
||||
memcpy(&md->metadata[2], desc, sizeof(desc));
|
||||
|
||||
/* Dwords [10:..] contain the mipmap level offsets. */
|
||||
for (i = 0; i <= image->info.levels - 1; i++)
|
||||
md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
|
||||
|
||||
md->size_metadata = (11 + image->info.levels - 1) * 4;
|
||||
if (device->physical_device->rad_info.chip_class <= VI) {
|
||||
for (i = 0; i <= image->info.levels - 1; i++)
|
||||
md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
|
||||
md->size_metadata = (11 + image->info.levels - 1) * 4;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -477,48 +477,8 @@ radv_meta_build_nir_fs_noop(void)
|
||||
return b.shader;
|
||||
}
|
||||
|
||||
static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
|
||||
nir_ssa_def *input)
|
||||
{
|
||||
nir_const_value v;
|
||||
unsigned i;
|
||||
v.u32[0] = 0x3b4d2e1c; // 0.00313080009
|
||||
|
||||
nir_ssa_def *cmp[3];
|
||||
for (i = 0; i < 3; i++)
|
||||
cmp[i] = nir_flt(b, nir_channel(b, input, i),
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
|
||||
nir_ssa_def *ltvals[3];
|
||||
v.f32[0] = 12.92;
|
||||
for (i = 0; i < 3; i++)
|
||||
ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
|
||||
nir_ssa_def *gtvals[3];
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
v.f32[0] = 1.0/2.4;
|
||||
gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
v.f32[0] = 1.055;
|
||||
gtvals[i] = nir_fmul(b, gtvals[i],
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
v.f32[0] = 0.055;
|
||||
gtvals[i] = nir_fsub(b, gtvals[i],
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
}
|
||||
|
||||
nir_ssa_def *comp[4];
|
||||
for (i = 0; i < 3; i++)
|
||||
comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
|
||||
comp[3] = nir_channels(b, input, 3);
|
||||
return nir_vec(b, comp, 4);
|
||||
}
|
||||
|
||||
void radv_meta_build_resolve_shader_core(nir_builder *b,
|
||||
bool is_integer,
|
||||
bool is_srgb,
|
||||
int samples,
|
||||
nir_variable *input_img,
|
||||
nir_variable *color,
|
||||
@@ -596,10 +556,4 @@ void radv_meta_build_resolve_shader_core(nir_builder *b,
|
||||
|
||||
if (outer_if)
|
||||
b->cursor = nir_after_cf_node(&outer_if->cf_node);
|
||||
|
||||
if (is_srgb) {
|
||||
nir_ssa_def *newv = nir_load_var(b, color);
|
||||
newv = radv_meta_build_resolve_srgb_conversion(b, newv);
|
||||
nir_store_var(b, color, newv, 0xf);
|
||||
}
|
||||
}
|
||||
|
@@ -234,7 +234,6 @@ nir_shader *radv_meta_build_nir_fs_noop(void);
|
||||
|
||||
void radv_meta_build_resolve_shader_core(nir_builder *b,
|
||||
bool is_integer,
|
||||
bool is_srgb,
|
||||
int samples,
|
||||
nir_variable *input_img,
|
||||
nir_variable *color,
|
||||
|
@@ -979,7 +979,7 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
|
||||
if (iview->image->info.levels > 1)
|
||||
goto fail;
|
||||
|
||||
if (iview->image->surface.u.legacy.level[0].mode < RADEON_SURF_MODE_1D)
|
||||
if (iview->image->surface.is_linear)
|
||||
goto fail;
|
||||
if (!radv_image_extent_compare(iview->image, &iview->extent))
|
||||
goto fail;
|
||||
|
@@ -29,7 +29,9 @@
|
||||
#include "sid.h"
|
||||
|
||||
static VkResult
|
||||
create_pass(struct radv_device *device)
|
||||
create_pass(struct radv_device *device,
|
||||
uint32_t samples,
|
||||
VkRenderPass *pass)
|
||||
{
|
||||
VkResult result;
|
||||
VkDevice device_h = radv_device_to_handle(device);
|
||||
@@ -37,7 +39,7 @@ create_pass(struct radv_device *device)
|
||||
VkAttachmentDescription attachment;
|
||||
|
||||
attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;
|
||||
attachment.samples = 1;
|
||||
attachment.samples = samples;
|
||||
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
|
||||
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
|
||||
attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
|
||||
@@ -65,14 +67,18 @@ create_pass(struct radv_device *device)
|
||||
.dependencyCount = 0,
|
||||
},
|
||||
alloc,
|
||||
&device->meta_state.depth_decomp.pass);
|
||||
pass);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
create_pipeline(struct radv_device *device,
|
||||
VkShaderModule vs_module_h)
|
||||
VkShaderModule vs_module_h,
|
||||
uint32_t samples,
|
||||
VkRenderPass pass,
|
||||
VkPipeline *decompress_pipeline,
|
||||
VkPipeline *resummarize_pipeline)
|
||||
{
|
||||
VkResult result;
|
||||
VkDevice device_h = radv_device_to_handle(device);
|
||||
@@ -129,7 +135,7 @@ create_pipeline(struct radv_device *device,
|
||||
},
|
||||
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
|
||||
.rasterizationSamples = 1,
|
||||
.rasterizationSamples = samples,
|
||||
.sampleShadingEnable = false,
|
||||
.pSampleMask = NULL,
|
||||
.alphaToCoverageEnable = false,
|
||||
@@ -156,7 +162,7 @@ create_pipeline(struct radv_device *device,
|
||||
VK_DYNAMIC_STATE_SCISSOR,
|
||||
},
|
||||
},
|
||||
.renderPass = device->meta_state.depth_decomp.pass,
|
||||
.renderPass = pass,
|
||||
.subpass = 0,
|
||||
};
|
||||
|
||||
@@ -169,7 +175,7 @@ create_pipeline(struct radv_device *device,
|
||||
.db_flush_stencil_inplace = true,
|
||||
},
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.depth_decomp.decompress_pipeline);
|
||||
decompress_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
goto cleanup;
|
||||
|
||||
@@ -183,7 +189,7 @@ create_pipeline(struct radv_device *device,
|
||||
.db_resummarize = true,
|
||||
},
|
||||
&device->meta_state.alloc,
|
||||
&device->meta_state.depth_decomp.resummarize_pipeline);
|
||||
resummarize_pipeline);
|
||||
if (result != VK_SUCCESS)
|
||||
goto cleanup;
|
||||
|
||||
@@ -199,29 +205,31 @@ radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
|
||||
{
|
||||
struct radv_meta_state *state = &device->meta_state;
|
||||
VkDevice device_h = radv_device_to_handle(device);
|
||||
VkRenderPass pass_h = device->meta_state.depth_decomp.pass;
|
||||
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
|
||||
|
||||
if (pass_h)
|
||||
radv_DestroyRenderPass(device_h, pass_h,
|
||||
&device->meta_state.alloc);
|
||||
|
||||
VkPipeline pipeline_h = state->depth_decomp.decompress_pipeline;
|
||||
if (pipeline_h) {
|
||||
radv_DestroyPipeline(device_h, pipeline_h, alloc);
|
||||
}
|
||||
pipeline_h = state->depth_decomp.resummarize_pipeline;
|
||||
if (pipeline_h) {
|
||||
radv_DestroyPipeline(device_h, pipeline_h, alloc);
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
|
||||
VkRenderPass pass_h = state->depth_decomp[i].pass;
|
||||
if (pass_h) {
|
||||
radv_DestroyRenderPass(device_h, pass_h, alloc);
|
||||
}
|
||||
VkPipeline pipeline_h = state->depth_decomp[i].decompress_pipeline;
|
||||
if (pipeline_h) {
|
||||
radv_DestroyPipeline(device_h, pipeline_h, alloc);
|
||||
}
|
||||
pipeline_h = state->depth_decomp[i].resummarize_pipeline;
|
||||
if (pipeline_h) {
|
||||
radv_DestroyPipeline(device_h, pipeline_h, alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_device_init_meta_depth_decomp_state(struct radv_device *device)
|
||||
{
|
||||
struct radv_meta_state *state = &device->meta_state;
|
||||
VkResult res = VK_SUCCESS;
|
||||
|
||||
zero(device->meta_state.depth_decomp);
|
||||
zero(state->depth_decomp);
|
||||
|
||||
struct radv_shader_module vs_module = { .nir = radv_meta_build_nir_vs_generate_vertices() };
|
||||
if (!vs_module.nir) {
|
||||
@@ -230,14 +238,22 @@ radv_device_init_meta_depth_decomp_state(struct radv_device *device)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
res = create_pass(device);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
VkShaderModule vs_module_h = radv_shader_module_to_handle(&vs_module);
|
||||
res = create_pipeline(device, vs_module_h);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
|
||||
uint32_t samples = 1 << i;
|
||||
|
||||
res = create_pass(device, samples, &state->depth_decomp[i].pass);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
|
||||
res = create_pipeline(device, vs_module_h, samples,
|
||||
state->depth_decomp[i].pass,
|
||||
&state->depth_decomp[i].decompress_pipeline,
|
||||
&state->depth_decomp[i].resummarize_pipeline);
|
||||
if (res != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
goto cleanup;
|
||||
|
||||
@@ -283,10 +299,15 @@ emit_depth_decomp(struct radv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
|
||||
|
||||
enum radv_depth_op {
|
||||
DEPTH_DECOMPRESS,
|
||||
DEPTH_RESUMMARIZE,
|
||||
};
|
||||
|
||||
static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_image *image,
|
||||
VkImageSubresourceRange *subresourceRange,
|
||||
VkPipeline pipeline_h)
|
||||
enum radv_depth_op op)
|
||||
{
|
||||
struct radv_meta_saved_state saved_state;
|
||||
struct radv_meta_saved_pass_state saved_pass_state;
|
||||
@@ -296,6 +317,9 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
|
||||
subresourceRange->baseMipLevel);
|
||||
uint32_t height = radv_minify(image->info.height,
|
||||
subresourceRange->baseMipLevel);
|
||||
uint32_t samples = image->info.samples;
|
||||
uint32_t samples_log2 = ffs(samples) - 1;
|
||||
struct radv_meta_state *meta_state = &cmd_buffer->device->meta_state;
|
||||
|
||||
if (!image->surface.htile_size)
|
||||
return;
|
||||
@@ -339,7 +363,7 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
|
||||
radv_CmdBeginRenderPass(cmd_buffer_h,
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = cmd_buffer->device->meta_state.depth_decomp.pass,
|
||||
.renderPass = meta_state->depth_decomp[samples_log2].pass,
|
||||
.framebuffer = fb_h,
|
||||
.renderArea = {
|
||||
.offset = {
|
||||
@@ -356,6 +380,18 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
|
||||
},
|
||||
VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
VkPipeline pipeline_h;
|
||||
switch (op) {
|
||||
case DEPTH_DECOMPRESS:
|
||||
pipeline_h = meta_state->depth_decomp[samples_log2].decompress_pipeline;
|
||||
break;
|
||||
case DEPTH_RESUMMARIZE:
|
||||
pipeline_h = meta_state->depth_decomp[samples_log2].resummarize_pipeline;
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown operation");
|
||||
}
|
||||
|
||||
emit_depth_decomp(cmd_buffer, &(VkOffset2D){0, 0 }, &(VkExtent2D){width, height}, pipeline_h);
|
||||
radv_CmdEndRenderPass(cmd_buffer_h);
|
||||
|
||||
@@ -371,8 +407,7 @@ void radv_decompress_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
|
||||
VkImageSubresourceRange *subresourceRange)
|
||||
{
|
||||
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
|
||||
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
|
||||
cmd_buffer->device->meta_state.depth_decomp.decompress_pipeline);
|
||||
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_DECOMPRESS);
|
||||
}
|
||||
|
||||
void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
|
||||
@@ -380,6 +415,5 @@ void radv_resummarize_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
|
||||
VkImageSubresourceRange *subresourceRange)
|
||||
{
|
||||
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
|
||||
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange,
|
||||
cmd_buffer->device->meta_state.depth_decomp.resummarize_pipeline);
|
||||
radv_process_depth_image_inplace(cmd_buffer, image, subresourceRange, DEPTH_RESUMMARIZE);
|
||||
}
|
||||
|
@@ -382,6 +382,11 @@ void radv_CmdResolveImage(
|
||||
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
|
||||
|
||||
assert(src_image->info.samples > 1);
|
||||
if (src_image->info.samples <= 1) {
|
||||
/* this causes GPU hangs if we get past here */
|
||||
fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");
|
||||
return;
|
||||
}
|
||||
assert(dest_image->info.samples == 1);
|
||||
|
||||
if (src_image->info.samples >= 16) {
|
||||
|
@@ -31,6 +31,45 @@
|
||||
#include "sid.h"
|
||||
#include "vk_format.h"
|
||||
|
||||
static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
|
||||
nir_ssa_def *input)
|
||||
{
|
||||
nir_const_value v;
|
||||
unsigned i;
|
||||
v.u32[0] = 0x3b4d2e1c; // 0.00313080009
|
||||
|
||||
nir_ssa_def *cmp[3];
|
||||
for (i = 0; i < 3; i++)
|
||||
cmp[i] = nir_flt(b, nir_channel(b, input, i),
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
|
||||
nir_ssa_def *ltvals[3];
|
||||
v.f32[0] = 12.92;
|
||||
for (i = 0; i < 3; i++)
|
||||
ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
|
||||
nir_ssa_def *gtvals[3];
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
v.f32[0] = 1.0/2.4;
|
||||
gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
v.f32[0] = 1.055;
|
||||
gtvals[i] = nir_fmul(b, gtvals[i],
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
v.f32[0] = 0.055;
|
||||
gtvals[i] = nir_fsub(b, gtvals[i],
|
||||
nir_build_imm(b, 1, 32, v));
|
||||
}
|
||||
|
||||
nir_ssa_def *comp[4];
|
||||
for (i = 0; i < 3; i++)
|
||||
comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
|
||||
comp[3] = nir_channels(b, input, 1 << 3);
|
||||
return nir_vec(b, comp, 4);
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
|
||||
{
|
||||
@@ -88,10 +127,13 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
|
||||
nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
|
||||
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
|
||||
|
||||
radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb, samples,
|
||||
input_img, color, img_coord);
|
||||
radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
|
||||
color, img_coord);
|
||||
|
||||
nir_ssa_def *outval = nir_load_var(&b, color);
|
||||
if (is_srgb)
|
||||
outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
|
||||
|
||||
nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
|
||||
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
|
||||
store->src[0] = nir_src_for_ssa(coord);
|
||||
@@ -402,7 +444,7 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.image = radv_image_to_handle(dest_image),
|
||||
.viewType = radv_meta_get_view_type(dest_image),
|
||||
.format = dest_image->vk_format,
|
||||
.format = vk_to_non_srgb_format(dest_image->vk_format),
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = region->dstSubresource.mipLevel,
|
||||
|
@@ -51,7 +51,7 @@ build_nir_vertex_shader(void)
|
||||
}
|
||||
|
||||
static nir_shader *
|
||||
build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
|
||||
build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, int samples)
|
||||
{
|
||||
nir_builder b;
|
||||
char name[64];
|
||||
@@ -62,7 +62,7 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_
|
||||
false,
|
||||
GLSL_TYPE_FLOAT);
|
||||
|
||||
snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
|
||||
snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : "float");
|
||||
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
b.shader->info.name = ralloc_strdup(b.shader, name);
|
||||
|
||||
@@ -92,8 +92,8 @@ build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_
|
||||
nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3);
|
||||
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
|
||||
|
||||
radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb,samples,
|
||||
input_img, color, img_coord);
|
||||
radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
|
||||
color, img_coord);
|
||||
|
||||
nir_ssa_def *outval = nir_load_var(&b, color);
|
||||
nir_store_var(&b, color_out, outval, 0xf);
|
||||
@@ -177,31 +177,25 @@ create_resolve_pipeline(struct radv_device *device,
|
||||
VkFormat format)
|
||||
{
|
||||
VkResult result;
|
||||
bool is_integer = false, is_srgb = false;
|
||||
bool is_integer = false;
|
||||
uint32_t samples = 1 << samples_log2;
|
||||
unsigned fs_key = radv_format_meta_fs_key(format);
|
||||
const VkPipelineVertexInputStateCreateInfo *vi_create_info;
|
||||
vi_create_info = &normal_vi_create_info;
|
||||
if (vk_format_is_int(format))
|
||||
is_integer = true;
|
||||
else if (vk_format_is_srgb(format))
|
||||
is_srgb = true;
|
||||
|
||||
struct radv_shader_module fs = { .nir = NULL };
|
||||
fs.nir = build_resolve_fragment_shader(device, is_integer, is_srgb, samples);
|
||||
fs.nir = build_resolve_fragment_shader(device, is_integer, samples);
|
||||
struct radv_shader_module vs = {
|
||||
.nir = build_nir_vertex_shader(),
|
||||
};
|
||||
|
||||
VkRenderPass *rp = is_srgb ?
|
||||
&device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
|
||||
&device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
|
||||
VkRenderPass *rp = &device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
|
||||
|
||||
assert(!*rp);
|
||||
|
||||
VkPipeline *pipeline = is_srgb ?
|
||||
&device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
|
||||
&device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
|
||||
VkPipeline *pipeline = &device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
|
||||
assert(!*pipeline);
|
||||
|
||||
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
|
||||
@@ -350,8 +344,6 @@ radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
|
||||
for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
|
||||
res = create_resolve_pipeline(device, i, pipeline_formats[j]);
|
||||
}
|
||||
|
||||
res = create_resolve_pipeline(device, i, VK_FORMAT_R8G8B8A8_SRGB);
|
||||
}
|
||||
|
||||
return res;
|
||||
@@ -370,12 +362,6 @@ radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
|
||||
state->resolve_fragment.rc[i].pipeline[j],
|
||||
&state->alloc);
|
||||
}
|
||||
radv_DestroyRenderPass(radv_device_to_handle(device),
|
||||
state->resolve_fragment.rc[i].srgb_render_pass,
|
||||
&state->alloc);
|
||||
radv_DestroyPipeline(radv_device_to_handle(device),
|
||||
state->resolve_fragment.rc[i].srgb_pipeline,
|
||||
&state->alloc);
|
||||
}
|
||||
|
||||
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
|
||||
@@ -432,9 +418,7 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
|
||||
push_constants);
|
||||
|
||||
unsigned fs_key = radv_format_meta_fs_key(dest_iview->vk_format);
|
||||
VkPipeline pipeline_h = vk_format_is_srgb(dest_iview->vk_format) ?
|
||||
device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
|
||||
device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
|
||||
VkPipeline pipeline_h = device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
|
||||
|
||||
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
pipeline_h);
|
||||
@@ -485,9 +469,7 @@ void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
|
||||
radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
|
||||
}
|
||||
|
||||
rp = vk_format_is_srgb(dest_image->vk_format) ?
|
||||
device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
|
||||
device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
|
||||
rp = device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
|
||||
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
|
||||
|
||||
for (uint32_t r = 0; r < region_count; ++r) {
|
||||
|
@@ -433,8 +433,6 @@ struct radv_meta_state {
|
||||
VkPipelineLayout p_layout;
|
||||
|
||||
struct {
|
||||
VkRenderPass srgb_render_pass;
|
||||
VkPipeline srgb_pipeline;
|
||||
VkRenderPass render_pass[NUM_META_FS_KEYS];
|
||||
VkPipeline pipeline[NUM_META_FS_KEYS];
|
||||
} rc[MAX_SAMPLES_LOG2];
|
||||
@@ -444,7 +442,7 @@ struct radv_meta_state {
|
||||
VkPipeline decompress_pipeline;
|
||||
VkPipeline resummarize_pipeline;
|
||||
VkRenderPass pass;
|
||||
} depth_decomp;
|
||||
} depth_decomp[1 + MAX_SAMPLES_LOG2];
|
||||
|
||||
struct {
|
||||
VkPipeline cmask_eliminate_pipeline;
|
||||
|
@@ -1133,15 +1133,18 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
||||
void
|
||||
si_emit_set_predication_state(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
|
||||
{
|
||||
uint32_t val = 0;
|
||||
uint32_t op = PRED_OP(PREDICATION_OP_BOOL64) | PREDICATION_DRAW_VISIBLE;
|
||||
|
||||
if (va)
|
||||
val = (((va >> 32) & 0xff) |
|
||||
PRED_OP(PREDICATION_OP_BOOL64)|
|
||||
PREDICATION_DRAW_VISIBLE);
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
|
||||
radeon_emit(cmd_buffer->cs, va);
|
||||
radeon_emit(cmd_buffer->cs, val);
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
|
||||
radeon_emit(cmd_buffer->cs, op);
|
||||
radeon_emit(cmd_buffer->cs, va);
|
||||
radeon_emit(cmd_buffer->cs, va >> 32);
|
||||
} else {
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
|
||||
radeon_emit(cmd_buffer->cs, va);
|
||||
radeon_emit(cmd_buffer->cs, op | ((va >> 32) & 0xFF));
|
||||
}
|
||||
}
|
||||
|
||||
/* Set this if you want the 3D engine to wait until CP DMA is done.
|
||||
|
@@ -465,4 +465,27 @@ vk_format_get_component_bits(VkFormat format,
|
||||
}
|
||||
}
|
||||
|
||||
static inline VkFormat
|
||||
vk_to_non_srgb_format(VkFormat format)
|
||||
{
|
||||
switch(format) {
|
||||
case VK_FORMAT_R8_SRGB :
|
||||
return VK_FORMAT_R8_UNORM;
|
||||
case VK_FORMAT_R8G8_SRGB:
|
||||
return VK_FORMAT_R8G8_UNORM;
|
||||
case VK_FORMAT_R8G8B8_SRGB:
|
||||
return VK_FORMAT_R8G8B8_UNORM;
|
||||
case VK_FORMAT_B8G8R8_SRGB:
|
||||
return VK_FORMAT_B8G8R8_UNORM;
|
||||
case VK_FORMAT_R8G8B8A8_SRGB :
|
||||
return VK_FORMAT_R8G8B8A8_UNORM;
|
||||
case VK_FORMAT_B8G8R8A8_SRGB:
|
||||
return VK_FORMAT_B8G8R8A8_UNORM;
|
||||
case VK_FORMAT_A8B8G8R8_SRGB_PACK32:
|
||||
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* VK_FORMAT_H */
|
||||
|
@@ -46,6 +46,11 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
|
||||
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
|
||||
return false;
|
||||
|
||||
if (ws->info.chip_class >= GFX9) {
|
||||
fprintf(stderr, "radv: VEGA support not completed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* LLVM 5.0 is required for GFX9. */
|
||||
if (ws->info.chip_class >= GFX9 && HAVE_LLVM < 0x0500) {
|
||||
fprintf(stderr, "amdgpu: LLVM 5.0 is required, got LLVM %i.%i\n",
|
||||
|
@@ -4495,7 +4495,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,
|
||||
} else {
|
||||
if (var->type->is_numeric()) {
|
||||
/* Reduce cascading errors. */
|
||||
var->constant_value = type->qualifier.flags.q.constant
|
||||
rhs = var->constant_value = type->qualifier.flags.q.constant
|
||||
? ir_constant::zero(state, var->type) : NULL;
|
||||
}
|
||||
}
|
||||
|
@@ -725,6 +725,8 @@ ir_swizzle::constant_expression_value(struct hash_table *variable_context)
|
||||
case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_UINT64:data.u64[i] = v->value.u64[swiz_idx[i]]; break;
|
||||
case GLSL_TYPE_INT64: data.i64[i] = v->value.i64[swiz_idx[i]]; break;
|
||||
default: assert(!"Should not get here."); break;
|
||||
}
|
||||
}
|
||||
|
@@ -237,6 +237,12 @@ ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) {
|
||||
case GLSL_TYPE_BOOL:
|
||||
data.b[i] = found->constant->value.b[rhs_channel];
|
||||
break;
|
||||
case GLSL_TYPE_UINT64:
|
||||
data.u64[i] = found->constant->value.u64[rhs_channel];
|
||||
break;
|
||||
case GLSL_TYPE_INT64:
|
||||
data.i64[i] = found->constant->value.i64[rhs_channel];
|
||||
break;
|
||||
default:
|
||||
assert(!"not reached");
|
||||
break;
|
||||
|
@@ -646,6 +646,7 @@ dri2_x11_connect(struct dri2_egl_display *dri2_dpy)
|
||||
error != NULL || xfixes_query->major_version < 2) {
|
||||
_eglLog(_EGL_WARNING, "DRI2: failed to query xfixes version");
|
||||
free(error);
|
||||
free(xfixes_query);
|
||||
return EGL_FALSE;
|
||||
}
|
||||
free(xfixes_query);
|
||||
|
@@ -923,7 +923,7 @@ static void *
|
||||
_fixupNativeWindow(_EGLDisplay *disp, void *native_window)
|
||||
{
|
||||
#ifdef HAVE_X11_PLATFORM
|
||||
if (disp->Platform == _EGL_PLATFORM_X11 && native_window != NULL) {
|
||||
if (disp && disp->Platform == _EGL_PLATFORM_X11 && native_window != NULL) {
|
||||
/* The `native_window` parameter for the X11 platform differs between
|
||||
* eglCreateWindowSurface() and eglCreatePlatformPixmapSurfaceEXT(). In
|
||||
* eglCreateWindowSurface(), the type of `native_window` is an Xlib
|
||||
@@ -985,7 +985,7 @@ _fixupNativePixmap(_EGLDisplay *disp, void *native_pixmap)
|
||||
* `Pixmap*`. Convert `Pixmap*` to `Pixmap` because that's what
|
||||
* dri2_x11_create_pixmap_surface() expects.
|
||||
*/
|
||||
if (disp->Platform == _EGL_PLATFORM_X11 && native_pixmap != NULL)
|
||||
if (disp && disp->Platform == _EGL_PLATFORM_X11 && native_pixmap != NULL)
|
||||
return (void *)(* (Pixmap*) native_pixmap);
|
||||
#endif
|
||||
return native_pixmap;
|
||||
|
@@ -69,10 +69,17 @@ os_time_get_nano(void)
|
||||
|
||||
static LARGE_INTEGER frequency;
|
||||
LARGE_INTEGER counter;
|
||||
int64_t secs, nanosecs;
|
||||
if(!frequency.QuadPart)
|
||||
QueryPerformanceFrequency(&frequency);
|
||||
QueryPerformanceCounter(&counter);
|
||||
return counter.QuadPart*INT64_C(1000000000)/frequency.QuadPart;
|
||||
/* Compute seconds and nanoseconds parts separately to
|
||||
* reduce severity of precision loss.
|
||||
*/
|
||||
secs = counter.QuadPart / frequency.QuadPart;
|
||||
nanosecs = (counter.QuadPart % frequency.QuadPart) * INT64_C(1000000000)
|
||||
/ frequency.QuadPart;
|
||||
return secs*INT64_C(1000000000) + nanosecs;
|
||||
|
||||
#else
|
||||
|
||||
|
@@ -2006,6 +2006,7 @@ CodeEmitterNVC0::getSRegEncoding(const ValueRef& ref)
|
||||
void
|
||||
CodeEmitterNVC0::emitMOV(const Instruction *i)
|
||||
{
|
||||
assert(!i->saturate);
|
||||
if (i->def(0).getFile() == FILE_PREDICATE) {
|
||||
if (i->src(0).getFile() == FILE_GPR) {
|
||||
code[0] = 0xfc01c003;
|
||||
|
@@ -305,6 +305,8 @@ unsigned int Instruction::srcMask(unsigned int s) const
|
||||
case TGSI_OPCODE_TXD:
|
||||
case TGSI_OPCODE_TXL:
|
||||
case TGSI_OPCODE_TXP:
|
||||
case TGSI_OPCODE_TXF:
|
||||
case TGSI_OPCODE_TG4:
|
||||
case TGSI_OPCODE_TEX_LZ:
|
||||
case TGSI_OPCODE_TXF_LZ:
|
||||
case TGSI_OPCODE_LODQ:
|
||||
@@ -343,6 +345,8 @@ unsigned int Instruction::srcMask(unsigned int s) const
|
||||
}
|
||||
}
|
||||
return mask;
|
||||
case TGSI_OPCODE_TXQ:
|
||||
return 1;
|
||||
case TGSI_OPCODE_XPD:
|
||||
{
|
||||
unsigned int x = 0;
|
||||
|
@@ -727,7 +727,9 @@ ConstantFolding::expr(Instruction *i,
|
||||
// Leave PFETCH alone... we just folded its 2 args into 1.
|
||||
break;
|
||||
default:
|
||||
i->op = i->saturate ? OP_SAT : OP_MOV; /* SAT handled by unary() */
|
||||
i->op = i->saturate ? OP_SAT : OP_MOV;
|
||||
if (i->saturate)
|
||||
unary(i, *i->getSrc(0)->asImm());
|
||||
break;
|
||||
}
|
||||
i->subOp = 0;
|
||||
@@ -1509,6 +1511,17 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
||||
// This can get left behind some of the optimizations which simplify
|
||||
// saturatable values.
|
||||
if (newi->op == OP_MOV && newi->saturate) {
|
||||
ImmediateValue tmp;
|
||||
newi->saturate = 0;
|
||||
newi->op = OP_SAT;
|
||||
if (newi->src(0).getImmediate(tmp))
|
||||
unary(newi, tmp);
|
||||
}
|
||||
|
||||
if (newi->op != op)
|
||||
foldCount++;
|
||||
}
|
||||
|
@@ -771,6 +771,7 @@ static const struct debug_named_value common_debug_options[] = {
|
||||
{ "norbplus", DBG_NO_RB_PLUS, "Disable RB+." },
|
||||
{ "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." },
|
||||
{ "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" },
|
||||
{ "ce", DBG_CE, "Force enable the constant engine" },
|
||||
{ "noce", DBG_NO_CE, "Disable the constant engine"},
|
||||
{ "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" },
|
||||
{ "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" },
|
||||
|
@@ -65,12 +65,12 @@
|
||||
#define R600_PRIM_RECTANGLE_LIST PIPE_PRIM_MAX
|
||||
|
||||
/* Debug flags. */
|
||||
/* logging */
|
||||
/* logging and features */
|
||||
#define DBG_TEX (1 << 0)
|
||||
/* gap - reuse */
|
||||
#define DBG_COMPUTE (1 << 2)
|
||||
#define DBG_VM (1 << 3)
|
||||
/* gap - reuse */
|
||||
#define DBG_CE (1 << 4)
|
||||
/* shader logging */
|
||||
#define DBG_FS (1 << 5)
|
||||
#define DBG_VS (1 << 6)
|
||||
|
@@ -198,12 +198,24 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||
sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
|
||||
si_context_gfx_flush, sctx);
|
||||
|
||||
/* SI + AMDGPU + CE = GPU hang */
|
||||
if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib &&
|
||||
sscreen->b.chip_class != SI &&
|
||||
/* These can't use CE due to a power gating bug in the kernel. */
|
||||
sscreen->b.family != CHIP_CARRIZO &&
|
||||
sscreen->b.family != CHIP_STONEY) {
|
||||
bool enable_ce = sscreen->b.chip_class != SI && /* SI hangs */
|
||||
/* These can't use CE due to a power gating bug in the kernel. */
|
||||
sscreen->b.family != CHIP_CARRIZO &&
|
||||
sscreen->b.family != CHIP_STONEY;
|
||||
|
||||
/* CE is currently disabled by default, because it makes s_load latency
|
||||
* worse, because CE IB doesn't run in lockstep with DE.
|
||||
* Remove this line after that performance issue has been resolved.
|
||||
*/
|
||||
enable_ce = false;
|
||||
|
||||
/* Apply CE overrides. */
|
||||
if (sscreen->b.debug_flags & DBG_NO_CE)
|
||||
enable_ce = false;
|
||||
else if (sscreen->b.debug_flags & DBG_CE)
|
||||
enable_ce = true;
|
||||
|
||||
if (ws->cs_add_const_ib && enable_ce) {
|
||||
sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
|
||||
if (!sctx->ce_ib)
|
||||
goto fail;
|
||||
|
@@ -1400,7 +1400,7 @@ static void tex_fetch_args(
|
||||
* It's unnecessary if the original texture format was
|
||||
* Z32_FLOAT, but we don't know that here.
|
||||
*/
|
||||
if (ctx->screen->b.chip_class == VI)
|
||||
if (ctx->screen->b.chip_class >= VI)
|
||||
z = ac_build_clamp(&ctx->ac, z);
|
||||
|
||||
address[count++] = z;
|
||||
|
@@ -148,7 +148,10 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
|
||||
buffer_size = LLVMGetBufferSize(out_buffer);
|
||||
buffer_data = LLVMGetBufferStart(out_buffer);
|
||||
|
||||
ac_elf_read(buffer_data, buffer_size, binary);
|
||||
if (!ac_elf_read(buffer_data, buffer_size, binary)) {
|
||||
fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
|
||||
diag.retval = 1;
|
||||
}
|
||||
|
||||
/* Clean up */
|
||||
LLVMDisposeMemoryBuffer(out_buffer);
|
||||
|
@@ -3162,14 +3162,13 @@ si_make_texture_descriptor(struct si_screen *screen,
|
||||
uint32_t *fmask_state)
|
||||
{
|
||||
struct pipe_resource *res = &tex->resource.b.b;
|
||||
const struct util_format_description *base_desc, *desc;
|
||||
const struct util_format_description *desc;
|
||||
unsigned char swizzle[4];
|
||||
int first_non_void;
|
||||
unsigned num_format, data_format, type;
|
||||
uint64_t va;
|
||||
|
||||
desc = util_format_description(pipe_format);
|
||||
base_desc = util_format_description(res->format);
|
||||
|
||||
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
|
||||
@@ -3270,15 +3269,6 @@ si_make_texture_descriptor(struct si_screen *screen,
|
||||
data_format = 0;
|
||||
}
|
||||
|
||||
/* Enable clamping for UNORM depth formats promoted to Z32F. */
|
||||
if (screen->b.chip_class >= GFX9 &&
|
||||
util_format_has_depth(desc) &&
|
||||
num_format == V_008F14_IMG_NUM_FORMAT_FLOAT &&
|
||||
util_get_depth_format_type(base_desc) != UTIL_FORMAT_TYPE_FLOAT) {
|
||||
/* NUM_FORMAT=FLOAT and DATA_FORMAT=24_8 means "clamp to [0,1]". */
|
||||
data_format = V_008F14_IMG_DATA_FORMAT_24_8;
|
||||
}
|
||||
|
||||
/* S8 with Z32 HTILE needs a special format. */
|
||||
if (screen->b.chip_class >= GFX9 &&
|
||||
pipe_format == PIPE_FORMAT_S8_UINT &&
|
||||
|
@@ -189,7 +189,7 @@ void QueueWork(SWR_CONTEXT *pContext)
|
||||
|
||||
if (IsDraw)
|
||||
{
|
||||
InterlockedIncrement((volatile LONG*)&pContext->drawsOutstandingFE);
|
||||
InterlockedIncrement(&pContext->drawsOutstandingFE);
|
||||
}
|
||||
|
||||
_ReadWriteBarrier();
|
||||
|
@@ -409,12 +409,12 @@ struct DRAW_CONTEXT
|
||||
bool dependent; // Backend work is dependent on all previous BE
|
||||
bool isCompute; // Is this DC a compute context?
|
||||
bool cleanupState; // True if this is the last draw using an entry in the state ring.
|
||||
volatile bool doneFE; // Is FE work done for this draw?
|
||||
|
||||
FE_WORK FeWork;
|
||||
|
||||
volatile OSALIGNLINE(bool) doneFE; // Is FE work done for this draw?
|
||||
volatile OSALIGNLINE(uint32_t) FeLock;
|
||||
volatile int32_t threadsDone;
|
||||
volatile OSALIGNLINE(uint32_t) threadsDone;
|
||||
|
||||
SYNC_DESC retireCallback; // Call this func when this DC is retired.
|
||||
};
|
||||
@@ -503,9 +503,9 @@ struct SWR_CONTEXT
|
||||
// Scratch space for workers.
|
||||
uint8_t** ppScratch;
|
||||
|
||||
volatile int32_t drawsOutstandingFE;
|
||||
volatile OSALIGNLINE(uint32_t) drawsOutstandingFE;
|
||||
|
||||
CachingAllocator cachingArenaAllocator;
|
||||
OSALIGNLINE(CachingAllocator) cachingArenaAllocator;
|
||||
uint32_t frameCount;
|
||||
|
||||
uint32_t lastFrameChecked;
|
||||
|
@@ -393,7 +393,7 @@ INLINE void ExecuteCallbacks(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONT
|
||||
// inlined-only version
|
||||
INLINE int32_t CompleteDrawContextInl(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEXT* pDC)
|
||||
{
|
||||
int32_t result = InterlockedDecrement((volatile LONG*)&pDC->threadsDone);
|
||||
int32_t result = static_cast<int32_t>(InterlockedDecrement(&pDC->threadsDone));
|
||||
SWR_ASSERT(result >= 0);
|
||||
|
||||
AR_FLUSH(pDC->drawId);
|
||||
@@ -639,7 +639,7 @@ INLINE void CompleteDrawFE(SWR_CONTEXT* pContext, uint32_t workerId, DRAW_CONTEX
|
||||
_mm_mfence();
|
||||
pDC->doneFE = true;
|
||||
|
||||
InterlockedDecrement((volatile LONG*)&pContext->drawsOutstandingFE);
|
||||
InterlockedDecrement(&pContext->drawsOutstandingFE);
|
||||
}
|
||||
|
||||
void WorkOnFifoFE(SWR_CONTEXT *pContext, uint32_t workerId, uint32_t &curDrawFE)
|
||||
|
@@ -28,6 +28,10 @@ include $(CLEAR_VARS)
|
||||
LOCAL_SRC_FILES := \
|
||||
$(C_SOURCES)
|
||||
|
||||
ifeq ($(ARCH_ARM_HAVE_NEON),true)
|
||||
LOCAL_SRC_FILES += $(NEON_C_SOURCES)
|
||||
endif
|
||||
|
||||
LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H)
|
||||
LOCAL_C_INCLUDES := \
|
||||
$(MESA_TOP)/include/drm-uapi
|
||||
|
@@ -39,6 +39,14 @@ noinst_LTLIBRARIES = libvc4.la
|
||||
|
||||
libvc4_la_SOURCES = $(C_SOURCES)
|
||||
libvc4_la_LIBADD = $(SIM_LIB)
|
||||
|
||||
if HAVE_ARM_ASM
|
||||
noinst_LTLIBRARIES += libvc4_neon.la
|
||||
libvc4_la_LIBADD += libvc4_neon.la
|
||||
libvc4_neon_la_SOURCES = $(NEON_C_SOURCES)
|
||||
libvc4_neon_la_CFLAGS = $(AM_CFLAGS) -mfpu=neon
|
||||
endif
|
||||
|
||||
libvc4_la_LDFLAGS = $(SIM_LDFLAGS)
|
||||
|
||||
EXTRA_DIST = kernel/README
|
||||
|
@@ -57,7 +57,8 @@ C_SOURCES := \
|
||||
vc4_state.c \
|
||||
vc4_tiling.c \
|
||||
vc4_tiling_lt.c \
|
||||
vc4_tiling_lt_neon.c \
|
||||
vc4_tiling.h \
|
||||
vc4_uniforms.c \
|
||||
$()
|
||||
|
||||
NEON_C_SOURCES := vc4_tiling_lt_neon.c
|
||||
|
@@ -89,13 +89,15 @@ vc4_load_lt_image(void *dst, uint32_t dst_stride,
|
||||
void *src, uint32_t src_stride,
|
||||
int cpp, const struct pipe_box *box)
|
||||
{
|
||||
#ifdef USE_ARM_ASM
|
||||
if (util_cpu_caps.has_neon) {
|
||||
vc4_load_lt_image_neon(dst, dst_stride, src, src_stride,
|
||||
cpp, box);
|
||||
} else {
|
||||
vc4_load_lt_image_base(dst, dst_stride, src, src_stride,
|
||||
cpp, box);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
vc4_load_lt_image_base(dst, dst_stride, src, src_stride,
|
||||
cpp, box);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -103,13 +105,16 @@ vc4_store_lt_image(void *dst, uint32_t dst_stride,
|
||||
void *src, uint32_t src_stride,
|
||||
int cpp, const struct pipe_box *box)
|
||||
{
|
||||
#ifdef USE_ARM_ASM
|
||||
if (util_cpu_caps.has_neon) {
|
||||
vc4_store_lt_image_neon(dst, dst_stride, src, src_stride,
|
||||
cpp, box);
|
||||
} else {
|
||||
vc4_store_lt_image_base(dst, dst_stride, src, src_stride,
|
||||
cpp, box);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
vc4_store_lt_image_base(dst, dst_stride, src, src_stride,
|
||||
cpp, box);
|
||||
}
|
||||
|
||||
#endif /* VC4_TILING_H */
|
||||
|
@@ -601,8 +601,11 @@ wait_swap_interval(struct stw_framebuffer *fb)
|
||||
int64_t min_swap_period =
|
||||
1.0e6 / stw_dev->refresh_rate * stw_dev->swap_interval;
|
||||
|
||||
/* if time since last swap is less than wait period, wait */
|
||||
if (delta < min_swap_period) {
|
||||
/* If time since last swap is less than wait period, wait.
|
||||
* Note that it's possible for the delta to be negative because of
|
||||
* rollover. See https://bugs.freedesktop.org/show_bug.cgi?id=102241
|
||||
*/
|
||||
if ((delta >= 0) && (delta < min_swap_period)) {
|
||||
float fudge = 1.75f; /* emperical fudge factor */
|
||||
int64_t wait = (min_swap_period - delta) * fudge;
|
||||
os_time_sleep(wait);
|
||||
|
@@ -820,7 +820,7 @@ glXSwapBuffers(Display * dpy, GLXDrawable drawable)
|
||||
{
|
||||
#ifdef GLX_USE_APPLEGL
|
||||
struct glx_context * gc = __glXGetCurrentContext();
|
||||
if(gc != &DummyContext && apple_glx_is_current_drawable(dpy, gc->driContext, drawable)) {
|
||||
if(gc != &dummyContext && apple_glx_is_current_drawable(dpy, gc->driContext, drawable)) {
|
||||
apple_glx_swap_buffers(gc->driContext);
|
||||
} else {
|
||||
__glXSendError(dpy, GLXBadCurrentWindow, 0, X_GLXSwapBuffers, false);
|
||||
|
@@ -554,7 +554,7 @@
|
||||
<field name="Write Disable Blue" start="0" end="0" type="bool"/>
|
||||
</struct>
|
||||
|
||||
<struct name="BLEND_STATE" length="17">
|
||||
<struct name="BLEND_STATE" length="1">
|
||||
<field name="Alpha To Coverage Enable" start="31" end="31" type="bool"/>
|
||||
<field name="Independent Alpha Blend Enable" start="30" end="30" type="bool"/>
|
||||
<field name="Alpha To One Enable" start="29" end="29" type="bool"/>
|
||||
@@ -564,7 +564,7 @@
|
||||
<field name="Color Dither Enable" start="23" end="23" type="bool"/>
|
||||
<field name="X Dither Offset" start="21" end="22" type="uint"/>
|
||||
<field name="Y Dither Offset" start="19" end="20" type="uint"/>
|
||||
<group count="8" start="32" size="64">
|
||||
<group count="0" start="32" size="64">
|
||||
<field name="Entry" start="0" end="63" type="BLEND_STATE_ENTRY"/>
|
||||
</group>
|
||||
</struct>
|
||||
|
@@ -982,7 +982,8 @@ isl_calc_phys_total_extent_el_gen4_2d(
|
||||
&phys_slice0_sa);
|
||||
*total_extent_el = (struct isl_extent2d) {
|
||||
.w = isl_assert_div(phys_slice0_sa.w, fmtl->bw),
|
||||
.h = *array_pitch_el_rows * phys_level0_sa->array_len,
|
||||
.h = *array_pitch_el_rows * (phys_level0_sa->array_len - 1) +
|
||||
isl_assert_div(phys_slice0_sa.h, fmtl->bh),
|
||||
};
|
||||
}
|
||||
|
||||
@@ -1366,124 +1367,19 @@ isl_calc_row_pitch(const struct isl_device *dev,
|
||||
!pitch_in_range(row_pitch, _3DSTATE_HIER_DEPTH_BUFFER_SurfacePitch_bits(dev->info)))
|
||||
return false;
|
||||
|
||||
if (surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT)
|
||||
isl_finishme("validate row pitch of stencil surfaces");
|
||||
const uint32_t stencil_pitch_bits = dev->use_separate_stencil ?
|
||||
_3DSTATE_STENCIL_BUFFER_SurfacePitch_bits(dev->info) :
|
||||
_3DSTATE_DEPTH_BUFFER_SurfacePitch_bits(dev->info);
|
||||
|
||||
if ((surf_info->usage & ISL_SURF_USAGE_STENCIL_BIT) &&
|
||||
!pitch_in_range(row_pitch, stencil_pitch_bits))
|
||||
return false;
|
||||
|
||||
done:
|
||||
*out_row_pitch = row_pitch;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate and apply any padding required for the surface.
|
||||
*
|
||||
* @param[inout] total_h_el is updated with the new height
|
||||
* @param[out] pad_bytes is overwritten with additional padding requirements.
|
||||
*/
|
||||
static void
|
||||
isl_apply_surface_padding(const struct isl_device *dev,
|
||||
const struct isl_surf_init_info *restrict info,
|
||||
const struct isl_tile_info *tile_info,
|
||||
uint32_t *total_h_el,
|
||||
uint32_t *pad_bytes)
|
||||
{
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(info->format);
|
||||
|
||||
*pad_bytes = 0;
|
||||
|
||||
/* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
|
||||
* Formats >> Surface Padding Requirements >> Render Target and Media
|
||||
* Surfaces:
|
||||
*
|
||||
* The data port accesses data (pixels) outside of the surface if they
|
||||
* are contained in the same cache request as pixels that are within the
|
||||
* surface. These pixels will not be returned by the requesting message,
|
||||
* however if these pixels lie outside of defined pages in the GTT,
|
||||
* a GTT error will result when the cache request is processed. In
|
||||
* order to avoid these GTT errors, “padding” at the bottom of the
|
||||
* surface is sometimes necessary.
|
||||
*
|
||||
* From the Broadwell PRM >> Volume 5: Memory Views >> Common Surface
|
||||
* Formats >> Surface Padding Requirements >> Sampling Engine Surfaces:
|
||||
*
|
||||
* ... Lots of padding requirements, all listed separately below.
|
||||
*/
|
||||
|
||||
/* We can safely ignore the first padding requirement, quoted below,
|
||||
* because isl doesn't do buffers.
|
||||
*
|
||||
* - [pre-BDW] For buffers, which have no inherent “height,” padding
|
||||
* requirements are different. A buffer must be padded to the next
|
||||
* multiple of 256 array elements, with an additional 16 bytes added
|
||||
* beyond that to account for the L1 cache line.
|
||||
*/
|
||||
|
||||
/*
|
||||
* - For compressed textures [...], padding at the bottom of the surface
|
||||
* is to an even compressed row.
|
||||
*/
|
||||
if (isl_format_is_compressed(info->format))
|
||||
*total_h_el = isl_align(*total_h_el, 2);
|
||||
|
||||
/*
|
||||
* - For cube surfaces, an additional two rows of padding are required
|
||||
* at the bottom of the surface.
|
||||
*/
|
||||
if (info->usage & ISL_SURF_USAGE_CUBE_BIT)
|
||||
*total_h_el += 2;
|
||||
|
||||
/*
|
||||
* - For packed YUV, 96 bpt, 48 bpt, and 24 bpt surface formats,
|
||||
* additional padding is required. These surfaces require an extra row
|
||||
* plus 16 bytes of padding at the bottom in addition to the general
|
||||
* padding requirements.
|
||||
*/
|
||||
if (isl_format_is_yuv(info->format) &&
|
||||
(fmtl->bpb == 96 || fmtl->bpb == 48|| fmtl->bpb == 24)) {
|
||||
*total_h_el += 1;
|
||||
*pad_bytes += 16;
|
||||
}
|
||||
|
||||
/*
|
||||
* - For linear surfaces, additional padding of 64 bytes is required at
|
||||
* the bottom of the surface. This is in addition to the padding
|
||||
* required above.
|
||||
*/
|
||||
if (tile_info->tiling == ISL_TILING_LINEAR)
|
||||
*pad_bytes += 64;
|
||||
|
||||
/* The below text weakens, not strengthens, the padding requirements for
|
||||
* linear surfaces. Therefore we can safely ignore it.
|
||||
*
|
||||
* - [BDW+] For SURFTYPE_BUFFER, SURFTYPE_1D, and SURFTYPE_2D non-array,
|
||||
* non-MSAA, non-mip-mapped surfaces in linear memory, the only
|
||||
* padding requirement is to the next aligned 64-byte boundary beyond
|
||||
* the end of the surface. The rest of the padding requirements
|
||||
* documented above do not apply to these surfaces.
|
||||
*/
|
||||
|
||||
/*
|
||||
* - [SKL+] For SURFTYPE_2D and SURFTYPE_3D with linear mode and
|
||||
* height % 4 != 0, the surface must be padded with
|
||||
* 4-(height % 4)*Surface Pitch # of bytes.
|
||||
*/
|
||||
if (ISL_DEV_GEN(dev) >= 9 &&
|
||||
tile_info->tiling == ISL_TILING_LINEAR &&
|
||||
(info->dim == ISL_SURF_DIM_2D || info->dim == ISL_SURF_DIM_3D)) {
|
||||
*total_h_el = isl_align(*total_h_el, 4);
|
||||
}
|
||||
|
||||
/*
|
||||
* - [SKL+] For SURFTYPE_1D with linear mode, the surface must be padded
|
||||
* to 4 times the Surface Pitch # of bytes
|
||||
*/
|
||||
if (ISL_DEV_GEN(dev) >= 9 &&
|
||||
tile_info->tiling == ISL_TILING_LINEAR &&
|
||||
info->dim == ISL_SURF_DIM_1D) {
|
||||
*total_h_el += 4;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
isl_surf_init_s(const struct isl_device *dev,
|
||||
struct isl_surf *surf,
|
||||
@@ -1536,10 +1432,6 @@ isl_surf_init_s(const struct isl_device *dev,
|
||||
array_pitch_span, &array_pitch_el_rows,
|
||||
&phys_total_el);
|
||||
|
||||
uint32_t padded_h_el = phys_total_el.h;
|
||||
uint32_t pad_bytes;
|
||||
isl_apply_surface_padding(dev, info, &tile_info, &padded_h_el, &pad_bytes);
|
||||
|
||||
uint32_t row_pitch;
|
||||
if (!isl_calc_row_pitch(dev, info, &tile_info, dim_layout,
|
||||
&phys_total_el, &row_pitch))
|
||||
@@ -1548,7 +1440,7 @@ isl_surf_init_s(const struct isl_device *dev,
|
||||
uint32_t base_alignment;
|
||||
uint64_t size;
|
||||
if (tiling == ISL_TILING_LINEAR) {
|
||||
size = (uint64_t) row_pitch * padded_h_el + pad_bytes;
|
||||
size = (uint64_t) row_pitch * phys_total_el.h;
|
||||
|
||||
/* From the Broadwell PRM Vol 2d, RENDER_SURFACE_STATE::SurfaceBaseAddress:
|
||||
*
|
||||
@@ -1569,9 +1461,8 @@ isl_surf_init_s(const struct isl_device *dev,
|
||||
}
|
||||
base_alignment = isl_round_up_to_power_of_two(base_alignment);
|
||||
} else {
|
||||
padded_h_el += isl_align_div_npot(pad_bytes, row_pitch);
|
||||
const uint32_t total_h_tl =
|
||||
isl_align_div(padded_h_el, tile_info.logical_extent_el.height);
|
||||
isl_align_div(phys_total_el.h, tile_info.logical_extent_el.height);
|
||||
|
||||
size = (uint64_t) total_h_tl * tile_info.phys_extent_B.height * row_pitch;
|
||||
|
||||
|
@@ -395,7 +395,8 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d
|
||||
/* Nothing to do here */
|
||||
} else if (vk_format_is_depth_or_stencil(format)) {
|
||||
tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
if (physical_device->info.gen >= 8)
|
||||
if (vk_format_aspects(format) == VK_IMAGE_ASPECT_DEPTH_BIT ||
|
||||
physical_device->info.gen >= 8)
|
||||
tiled |= VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT;
|
||||
|
||||
tiled |= VK_FORMAT_FEATURE_BLIT_SRC_BIT |
|
||||
|
@@ -304,8 +304,9 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
src_format = dst_format = MESA_FORMAT_R_FLOAT32;
|
||||
}
|
||||
|
||||
enum isl_format src_isl_format = brw_isl_format_for_mesa_format(src_format);
|
||||
enum isl_aux_usage src_aux_usage =
|
||||
intel_miptree_texture_aux_usage(brw, src_mt, src_format);
|
||||
intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
|
||||
/* We do format workarounds for some depth formats so we can't reliably
|
||||
* sample with HiZ. One of these days, we should fix that.
|
||||
*/
|
||||
|
@@ -750,7 +750,7 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
|
||||
bo_wait_with_stall_warning(brw, bo, "CPU mapping");
|
||||
}
|
||||
|
||||
if (!bo->cache_coherent) {
|
||||
if (!bo->cache_coherent && !bo->bufmgr->has_llc) {
|
||||
/* If we're reusing an existing CPU mapping, the CPU caches may
|
||||
* contain stale data from the last time we read from that mapping.
|
||||
* (With the BO cache, it might even be data from a previous buffer!)
|
||||
@@ -760,6 +760,12 @@ brw_bo_map_cpu(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
|
||||
* We need to invalidate those cachelines so that we see the latest
|
||||
* contents, and so long as we only read from the CPU mmap we do not
|
||||
* need to write those cachelines back afterwards.
|
||||
*
|
||||
* On LLC, the emprical evidence suggests that writes from the GPU
|
||||
* that bypass the LLC (i.e. for scanout) do *invalidate* the CPU
|
||||
* cachelines. (Other reads, such as the display engine, bypass the
|
||||
* LLC entirely requiring us to keep dirty pixels for the scanout
|
||||
* out of any cache.)
|
||||
*/
|
||||
gen_invalidate_range(bo->map_cpu, bo->size);
|
||||
}
|
||||
@@ -897,6 +903,14 @@ can_map_cpu(struct brw_bo *bo, unsigned flags)
|
||||
if (bo->cache_coherent)
|
||||
return true;
|
||||
|
||||
/* Even if the buffer itself is not cache-coherent (such as a scanout), on
|
||||
* an LLC platform reads always are coherent (as they are performed via the
|
||||
* central system agent). It is just the writes that we need to take special
|
||||
* care to ensure that land in main memory and not stick in the CPU cache.
|
||||
*/
|
||||
if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc)
|
||||
return true;
|
||||
|
||||
/* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid
|
||||
* across batch flushes where the kernel will change cache domains of the
|
||||
* bo, invalidating continued access to the CPU mmap on non-LLC device.
|
||||
|
@@ -847,9 +847,15 @@ intel_miptree_create_for_bo(struct brw_context *brw,
|
||||
mt->bo = bo;
|
||||
mt->offset = offset;
|
||||
|
||||
if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX))
|
||||
if (!(layout_flags & MIPTREE_LAYOUT_DISABLE_AUX)) {
|
||||
intel_miptree_choose_aux_usage(brw, mt);
|
||||
|
||||
if (!intel_miptree_alloc_aux(brw, mt)) {
|
||||
intel_miptree_release(&mt);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return mt;
|
||||
}
|
||||
|
||||
@@ -979,11 +985,6 @@ intel_miptree_create_for_dri_image(struct brw_context *brw,
|
||||
}
|
||||
}
|
||||
|
||||
if (!intel_miptree_alloc_aux(brw, mt)) {
|
||||
intel_miptree_release(&mt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return mt;
|
||||
}
|
||||
|
||||
@@ -2719,6 +2720,7 @@ intel_miptree_make_shareable(struct brw_context *brw,
|
||||
}
|
||||
|
||||
mt->aux_usage = ISL_AUX_USAGE_NONE;
|
||||
mt->supports_fast_clear = false;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -2409,8 +2409,10 @@ namespace {
|
||||
class add_uniform_to_shader : public program_resource_visitor {
|
||||
public:
|
||||
add_uniform_to_shader(struct gl_shader_program *shader_program,
|
||||
struct gl_program_parameter_list *params)
|
||||
: shader_program(shader_program), params(params), idx(-1)
|
||||
struct gl_program_parameter_list *params,
|
||||
gl_shader_stage shader_type)
|
||||
: shader_program(shader_program), params(params), idx(-1),
|
||||
shader_type(shader_type)
|
||||
{
|
||||
/* empty */
|
||||
}
|
||||
@@ -2433,6 +2435,7 @@ private:
|
||||
struct gl_program_parameter_list *params;
|
||||
int idx;
|
||||
ir_variable *var;
|
||||
gl_shader_stage shader_type;
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
@@ -2444,18 +2447,49 @@ add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
|
||||
const enum glsl_interface_packing,
|
||||
bool /* last_field */)
|
||||
{
|
||||
/* opaque types don't use storage in the param list unless they are
|
||||
* bindless samplers or images.
|
||||
*/
|
||||
if (type->contains_opaque() && !var->data.bindless)
|
||||
/* atomics don't get real storage */
|
||||
if (type->contains_atomic())
|
||||
return;
|
||||
|
||||
assert(_mesa_lookup_parameter_index(params, name) < 0);
|
||||
gl_register_file file;
|
||||
if (type->without_array()->is_sampler() && !var->data.bindless) {
|
||||
file = PROGRAM_SAMPLER;
|
||||
} else {
|
||||
file = PROGRAM_UNIFORM;
|
||||
}
|
||||
|
||||
unsigned size = type_size(type) * 4;
|
||||
int index = _mesa_lookup_parameter_index(params, name);
|
||||
if (index < 0) {
|
||||
unsigned size = type_size(type) * 4;
|
||||
|
||||
int index = _mesa_add_parameter(params, PROGRAM_UNIFORM, name, size,
|
||||
type->gl_type, NULL, NULL);
|
||||
index = _mesa_add_parameter(params, file, name, size, type->gl_type,
|
||||
NULL, NULL);
|
||||
|
||||
/* Sampler uniform values are stored in prog->SamplerUnits,
|
||||
* and the entry in that array is selected by this index we
|
||||
* store in ParameterValues[].
|
||||
*/
|
||||
if (file == PROGRAM_SAMPLER) {
|
||||
unsigned location;
|
||||
const bool found =
|
||||
this->shader_program->UniformHash->get(location,
|
||||
params->Parameters[index].Name);
|
||||
assert(found);
|
||||
|
||||
if (!found)
|
||||
return;
|
||||
|
||||
struct gl_uniform_storage *storage =
|
||||
&this->shader_program->data->UniformStorage[location];
|
||||
|
||||
assert(storage->type->is_sampler() &&
|
||||
storage->opaque[shader_type].active);
|
||||
|
||||
for (unsigned int j = 0; j < size / 4; j++)
|
||||
params->ParameterValues[index + j][0].f =
|
||||
storage->opaque[shader_type].index + j;
|
||||
}
|
||||
}
|
||||
|
||||
/* The first part of the uniform that's processed determines the base
|
||||
* location of the whole uniform (for structures).
|
||||
@@ -2479,7 +2513,7 @@ _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
|
||||
struct gl_program_parameter_list
|
||||
*params)
|
||||
{
|
||||
add_uniform_to_shader add(shader_program, params);
|
||||
add_uniform_to_shader add(shader_program, params, sh->Stage);
|
||||
|
||||
foreach_in_list(ir_instruction, node, sh->ir) {
|
||||
ir_variable *var = node->as_variable();
|
||||
|
@@ -634,7 +634,7 @@ st_context_flush(struct st_context_iface *stctxi, unsigned flags,
|
||||
|
||||
st_flush(st, fence, pipe_flags);
|
||||
|
||||
if ((flags & ST_FLUSH_WAIT) && fence) {
|
||||
if ((flags & ST_FLUSH_WAIT) && fence && *fence) {
|
||||
st->pipe->screen->fence_finish(st->pipe->screen, NULL, *fence,
|
||||
PIPE_TIMEOUT_INFINITE);
|
||||
st->pipe->screen->fence_reference(st->pipe->screen, fence, NULL);
|
||||
|
@@ -44,7 +44,9 @@ libmesautil_la_SOURCES = \
|
||||
$(MESA_UTIL_FILES) \
|
||||
$(MESA_UTIL_GENERATED_FILES)
|
||||
|
||||
libmesautil_la_LIBADD = $(ZLIB_LIBS)
|
||||
libmesautil_la_LIBADD = \
|
||||
$(CLOCK_LIB) \
|
||||
$(ZLIB_LIBS)
|
||||
|
||||
roundeven_test_LDADD = -lm
|
||||
|
||||
|
Reference in New Issue
Block a user