Compare commits
56 Commits
explicit-s
...
mesa-17.2.
Author | SHA1 | Date | |
---|---|---|---|
|
29df4deef2 | ||
|
e4371d14f1 | ||
|
0b2c034f64 | ||
|
f4e163094d | ||
|
4a181e6244 | ||
|
8ef9fe7229 | ||
|
4f872e62c2 | ||
|
c5fac38ced | ||
|
579ecfd91e | ||
|
6b279b3271 | ||
|
6efb8d79a9 | ||
|
795b712bd7 | ||
|
43a2b178c2 | ||
|
d5def4f5a9 | ||
|
f2a60ff20a | ||
|
3c8673d420 | ||
|
381ccaa1cb | ||
|
2dd6030fbb | ||
|
a90d99f7a5 | ||
|
6806773905 | ||
|
3e872f4b53 | ||
|
21ea75b3e9 | ||
|
58fe86a6d6 | ||
|
d466a70532 | ||
|
e62eddcdbe | ||
|
6d07e58afb | ||
|
f9e563597d | ||
|
5b61ba4432 | ||
|
6625382b1c | ||
|
2bca74253d | ||
|
b36ff2d1f2 | ||
|
99b2613ce1 | ||
|
f9c7549605 | ||
|
2ce4f0afd3 | ||
|
546282e8bc | ||
|
de55bc8f49 | ||
|
f0b7563a36 | ||
|
5bee196840 | ||
|
2a1792981c | ||
|
a2b7477603 | ||
|
3e777d5cab | ||
|
9bb6aa5794 | ||
|
90bbcb93b1 | ||
|
529c440dd3 | ||
|
94e0de90ee | ||
|
3180f0fa0d | ||
|
e7f14a8b52 | ||
|
7f5d9d7a6d | ||
|
3d0960e761 | ||
|
bdbd8ab517 | ||
|
47bca2cfa7 | ||
|
04bb687f04 | ||
|
4e0f29ed0b | ||
|
0ccb853cc0 | ||
|
a455f594bb | ||
|
a955622c1a |
@@ -50,9 +50,22 @@ extern "C" {
|
||||
|
||||
#ifndef GL_VERSION_ES_CM_1_0
|
||||
#define GL_VERSION_ES_CM_1_0 1
|
||||
|
||||
/*
|
||||
* XXX: Temporary fix; needs to be reverted as part of the next
|
||||
* header update.
|
||||
* For more details:
|
||||
* https://github.com/KhronosGroup/OpenGL-Registry/pull/76
|
||||
* https://lists.freedesktop.org/archives/mesa-dev/2017-June/161647.html
|
||||
*/
|
||||
#include <KHR/khrplatform.h>
|
||||
typedef khronos_int8_t GLbyte;
|
||||
typedef khronos_float_t GLclampf;
|
||||
typedef short GLshort;
|
||||
typedef unsigned short GLushort;
|
||||
|
||||
typedef void GLvoid;
|
||||
typedef unsigned int GLenum;
|
||||
#include <KHR/khrplatform.h>
|
||||
typedef khronos_float_t GLfloat;
|
||||
typedef khronos_int32_t GLfixed;
|
||||
typedef unsigned int GLuint;
|
||||
|
@@ -104,7 +104,6 @@ GL_API void GL_APIENTRY glBlendEquationOES (GLenum mode);
|
||||
|
||||
#ifndef GL_OES_byte_coordinates
|
||||
#define GL_OES_byte_coordinates 1
|
||||
typedef khronos_int8_t GLbyte;
|
||||
#endif /* GL_OES_byte_coordinates */
|
||||
|
||||
#ifndef GL_OES_compressed_ETC1_RGB8_sub_texture
|
||||
@@ -128,7 +127,6 @@ typedef khronos_int8_t GLbyte;
|
||||
|
||||
#ifndef GL_OES_draw_texture
|
||||
#define GL_OES_draw_texture 1
|
||||
typedef short GLshort;
|
||||
#define GL_TEXTURE_CROP_RECT_OES 0x8B9D
|
||||
typedef void (GL_APIENTRYP PFNGLDRAWTEXSOESPROC) (GLshort x, GLshort y, GLshort z, GLshort width, GLshort height);
|
||||
typedef void (GL_APIENTRYP PFNGLDRAWTEXIOESPROC) (GLint x, GLint y, GLint z, GLint width, GLint height);
|
||||
@@ -409,7 +407,6 @@ GL_API GLbitfield GL_APIENTRY glQueryMatrixxOES (GLfixed *mantissa, GLint *expon
|
||||
|
||||
#ifndef GL_OES_single_precision
|
||||
#define GL_OES_single_precision 1
|
||||
typedef khronos_float_t GLclampf;
|
||||
typedef void (GL_APIENTRYP PFNGLCLEARDEPTHFOESPROC) (GLclampf depth);
|
||||
typedef void (GL_APIENTRYP PFNGLCLIPPLANEFOESPROC) (GLenum plane, const GLfloat *equation);
|
||||
typedef void (GL_APIENTRYP PFNGLDEPTHRANGEFOESPROC) (GLclampf n, GLclampf f);
|
||||
|
@@ -795,21 +795,21 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
|
||||
bool has_ds_bpermute,
|
||||
uint32_t mask,
|
||||
int idx,
|
||||
LLVMValueRef lds,
|
||||
LLVMValueRef val)
|
||||
{
|
||||
LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
|
||||
LLVMValueRef tl, trbl, args[2];
|
||||
LLVMValueRef result;
|
||||
|
||||
thread_id = ac_get_thread_id(ctx);
|
||||
|
||||
tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
|
||||
LLVMConstInt(ctx->i32, mask, false), "");
|
||||
|
||||
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
|
||||
LLVMConstInt(ctx->i32, idx, false), "");
|
||||
|
||||
if (has_ds_bpermute) {
|
||||
LLVMValueRef thread_id, tl_tid, trbl_tid;
|
||||
thread_id = ac_get_thread_id(ctx);
|
||||
|
||||
tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
|
||||
LLVMConstInt(ctx->i32, mask, false), "");
|
||||
|
||||
trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
|
||||
LLVMConstInt(ctx->i32, idx, false), "");
|
||||
|
||||
args[0] = LLVMBuildMul(ctx->builder, tl_tid,
|
||||
LLVMConstInt(ctx->i32, 4, false), "");
|
||||
args[1] = val;
|
||||
@@ -827,15 +827,42 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
|
||||
AC_FUNC_ATTR_READNONE |
|
||||
AC_FUNC_ATTR_CONVERGENT);
|
||||
} else {
|
||||
LLVMValueRef store_ptr, load_ptr0, load_ptr1;
|
||||
uint32_t masks[2];
|
||||
|
||||
store_ptr = ac_build_gep0(ctx, lds, thread_id);
|
||||
load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
|
||||
load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
|
||||
switch (mask) {
|
||||
case AC_TID_MASK_TOP_LEFT:
|
||||
masks[0] = 0x8000;
|
||||
if (idx == 1)
|
||||
masks[1] = 0x8055;
|
||||
else
|
||||
masks[1] = 0x80aa;
|
||||
|
||||
LLVMBuildStore(ctx->builder, val, store_ptr);
|
||||
tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
|
||||
trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
|
||||
break;
|
||||
case AC_TID_MASK_TOP:
|
||||
masks[0] = 0x8044;
|
||||
masks[1] = 0x80ee;
|
||||
break;
|
||||
case AC_TID_MASK_LEFT:
|
||||
masks[0] = 0x80a0;
|
||||
masks[1] = 0x80f5;
|
||||
break;
|
||||
}
|
||||
|
||||
args[0] = val;
|
||||
args[1] = LLVMConstInt(ctx->i32, masks[0], false);
|
||||
|
||||
tl = ac_build_intrinsic(ctx,
|
||||
"llvm.amdgcn.ds.swizzle", ctx->i32,
|
||||
args, 2,
|
||||
AC_FUNC_ATTR_READNONE |
|
||||
AC_FUNC_ATTR_CONVERGENT);
|
||||
|
||||
args[1] = LLVMConstInt(ctx->i32, masks[1], false);
|
||||
trbl = ac_build_intrinsic(ctx,
|
||||
"llvm.amdgcn.ds.swizzle", ctx->i32,
|
||||
args, 2,
|
||||
AC_FUNC_ATTR_READNONE |
|
||||
AC_FUNC_ATTR_CONVERGENT);
|
||||
}
|
||||
|
||||
tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
|
||||
|
@@ -173,7 +173,6 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
|
||||
bool has_ds_bpermute,
|
||||
uint32_t mask,
|
||||
int idx,
|
||||
LLVMValueRef lds,
|
||||
LLVMValueRef val);
|
||||
|
||||
#define AC_SENDMSG_GS 2
|
||||
|
@@ -1178,7 +1178,17 @@ static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx,
|
||||
*/
|
||||
LLVMConstInt(ctx->i1, 1, false),
|
||||
};
|
||||
return ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
|
||||
|
||||
LLVMValueRef lsb = ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32,
|
||||
params, 2,
|
||||
AC_FUNC_ATTR_READNONE);
|
||||
|
||||
/* TODO: We need an intrinsic to skip this conditional. */
|
||||
/* Check for zero: */
|
||||
return LLVMBuildSelect(ctx->builder, LLVMBuildICmp(ctx->builder,
|
||||
LLVMIntEQ, src0,
|
||||
ctx->i32_0, ""),
|
||||
LLVMConstInt(ctx->i32, -1, 0), lsb, "");
|
||||
}
|
||||
|
||||
static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx,
|
||||
@@ -1443,11 +1453,6 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
|
||||
int idx;
|
||||
LLVMValueRef result;
|
||||
|
||||
if (!ctx->lds && !ctx->has_ds_bpermute)
|
||||
ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
|
||||
LLVMArrayType(ctx->i32, 64),
|
||||
"ddxy_lds", LOCAL_ADDR_SPACE);
|
||||
|
||||
if (op == nir_op_fddx_fine || op == nir_op_fddx)
|
||||
mask = AC_TID_MASK_LEFT;
|
||||
else if (op == nir_op_fddy_fine || op == nir_op_fddy)
|
||||
@@ -1464,7 +1469,7 @@ static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
|
||||
idx = 2;
|
||||
|
||||
result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
|
||||
mask, idx, ctx->lds,
|
||||
mask, idx,
|
||||
src0);
|
||||
return result;
|
||||
}
|
||||
@@ -3400,7 +3405,10 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
|
||||
char intrinsic_name[64];
|
||||
const nir_variable *var = instr->variables[0]->var;
|
||||
const struct glsl_type *type = glsl_without_array(var->type);
|
||||
|
||||
LLVMValueRef glc = ctx->i1false;
|
||||
bool force_glc = ctx->options->chip_class == SI;
|
||||
if (force_glc)
|
||||
glc = ctx->i1true;
|
||||
if (ctx->stage == MESA_SHADER_FRAGMENT)
|
||||
ctx->shader_info->fs.writes_memory = true;
|
||||
|
||||
@@ -3410,7 +3418,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
|
||||
params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
|
||||
LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
|
||||
params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
|
||||
params[4] = ctx->i1false; /* glc */
|
||||
params[4] = glc; /* glc */
|
||||
params[5] = ctx->i1false; /* slc */
|
||||
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
|
||||
params, 6, 0);
|
||||
@@ -3418,7 +3426,6 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
|
||||
bool is_da = glsl_sampler_type_is_array(type) ||
|
||||
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
|
||||
LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
|
||||
LLVMValueRef glc = ctx->i1false;
|
||||
LLVMValueRef slc = ctx->i1false;
|
||||
|
||||
params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2]));
|
||||
@@ -5178,6 +5185,7 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
|
||||
unsigned index = target - V_008DFC_SQ_EXP_MRT;
|
||||
unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
|
||||
bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
|
||||
bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1;
|
||||
|
||||
switch(col_format) {
|
||||
case V_028714_SPI_SHADER_ZERO:
|
||||
@@ -5255,11 +5263,13 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
|
||||
break;
|
||||
|
||||
case V_028714_SPI_SHADER_UINT16_ABGR: {
|
||||
LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
|
||||
LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
|
||||
is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
|
||||
LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->i32, 3, 0);
|
||||
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
val[chan] = to_integer(&ctx->ac, values[chan]);
|
||||
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], max);
|
||||
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
|
||||
}
|
||||
|
||||
args->compr = 1;
|
||||
@@ -5269,14 +5279,18 @@ si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
|
||||
}
|
||||
|
||||
case V_028714_SPI_SHADER_SINT16_ABGR: {
|
||||
LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
|
||||
LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
|
||||
LLVMValueRef max_rgb = LLVMConstInt(ctx->i32,
|
||||
is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
|
||||
LLVMValueRef min_rgb = LLVMConstInt(ctx->i32,
|
||||
is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
|
||||
LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->i32one;
|
||||
LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->i32, -2, 0);
|
||||
|
||||
/* Clamp. */
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
val[chan] = to_integer(&ctx->ac, values[chan]);
|
||||
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], max);
|
||||
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], min);
|
||||
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
|
||||
val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
|
||||
}
|
||||
|
||||
args->compr = 1;
|
||||
@@ -5815,10 +5829,11 @@ si_export_mrt_z(struct nir_to_llvm_context *ctx,
|
||||
args.enabled_channels |= 0x4;
|
||||
}
|
||||
|
||||
/* SI (except OLAND) has a bug that it only looks
|
||||
/* SI (except OLAND and HAINAN) has a bug that it only looks
|
||||
* at the X writemask component. */
|
||||
if (ctx->options->chip_class == SI &&
|
||||
ctx->options->family != CHIP_OLAND)
|
||||
ctx->options->family != CHIP_OLAND &&
|
||||
ctx->options->family != CHIP_HAINAN)
|
||||
args.enabled_channels |= 0x1;
|
||||
|
||||
ac_build_export(&ctx->ac, &args);
|
||||
|
@@ -57,6 +57,7 @@ struct ac_tcs_variant_key {
|
||||
struct ac_fs_variant_key {
|
||||
uint32_t col_format;
|
||||
uint32_t is_int8;
|
||||
uint32_t is_int10;
|
||||
};
|
||||
|
||||
union ac_shader_variant_key {
|
||||
|
@@ -257,6 +257,18 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
|
||||
AddrSurfInfoIn->width = u_minify(config->info.width, level);
|
||||
AddrSurfInfoIn->height = u_minify(config->info.height, level);
|
||||
|
||||
/* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
|
||||
* because GFX9 needs linear alignment of 256 bytes.
|
||||
*/
|
||||
if (config->info.levels == 1 &&
|
||||
AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
|
||||
AddrSurfInfoIn->bpp) {
|
||||
unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
|
||||
|
||||
assert(util_is_power_of_two(AddrSurfInfoIn->bpp));
|
||||
AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
|
||||
}
|
||||
|
||||
if (config->is_3d)
|
||||
AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
|
||||
else if (config->is_cube)
|
||||
|
@@ -928,15 +928,17 @@ void radv_GetPhysicalDeviceMemoryProperties(
|
||||
};
|
||||
|
||||
STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
|
||||
uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
|
||||
physical_device->rad_info.vram_vis_size);
|
||||
|
||||
pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
|
||||
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
|
||||
.size = physical_device->rad_info.vram_size -
|
||||
physical_device->rad_info.vram_vis_size,
|
||||
visible_vram_size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
};
|
||||
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
|
||||
.size = physical_device->rad_info.vram_vis_size,
|
||||
.size = visible_vram_size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
};
|
||||
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
|
||||
@@ -3246,6 +3248,8 @@ radv_initialise_ds_surface(struct radv_device *device,
|
||||
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
|
||||
tile_mode_index = si_tile_mode_index(iview->image, level, true);
|
||||
ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
|
||||
if (stencil_only)
|
||||
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
|
||||
}
|
||||
|
||||
ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
|
||||
@@ -3624,9 +3628,14 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
|
||||
const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
|
||||
VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
|
||||
{
|
||||
pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
|
||||
VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
|
||||
|
||||
if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
|
||||
pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
|
||||
VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
|
||||
} else {
|
||||
pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
|
||||
pExternalSemaphoreProperties->compatibleHandleTypes = 0;
|
||||
pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
|
||||
}
|
||||
}
|
||||
|
@@ -181,6 +181,11 @@ radv_make_buffer_descriptor(struct radv_device *device,
|
||||
state[0] = va;
|
||||
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
|
||||
S_008F04_STRIDE(stride);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class < VI && stride) {
|
||||
range /= stride;
|
||||
}
|
||||
|
||||
state[2] = range;
|
||||
state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(desc->swizzle[0])) |
|
||||
S_008F0C_DST_SEL_Y(radv_map_swizzle(desc->swizzle[1])) |
|
||||
|
@@ -695,6 +695,8 @@ static VkFormat pipeline_formats[] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_FORMAT_R8G8B8A8_UINT,
|
||||
VK_FORMAT_R8G8B8A8_SINT,
|
||||
VK_FORMAT_A2R10G10B10_UINT_PACK32,
|
||||
VK_FORMAT_A2R10G10B10_SINT_PACK32,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_UINT,
|
||||
|
@@ -1134,6 +1134,8 @@ static VkFormat pipeline_formats[] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_FORMAT_R8G8B8A8_UINT,
|
||||
VK_FORMAT_R8G8B8A8_SINT,
|
||||
VK_FORMAT_A2R10G10B10_UINT_PACK32,
|
||||
VK_FORMAT_A2R10G10B10_SINT_PACK32,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_UINT,
|
||||
|
@@ -754,6 +754,8 @@ static VkFormat pipeline_formats[] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_FORMAT_R8G8B8A8_UINT,
|
||||
VK_FORMAT_R8G8B8A8_SINT,
|
||||
VK_FORMAT_A2R10G10B10_UINT_PACK32,
|
||||
VK_FORMAT_A2R10G10B10_SINT_PACK32,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_UINT,
|
||||
|
@@ -160,6 +160,8 @@ static VkFormat pipeline_formats[] = {
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_FORMAT_R8G8B8A8_UINT,
|
||||
VK_FORMAT_R8G8B8A8_SINT,
|
||||
VK_FORMAT_A2R10G10B10_UINT_PACK32,
|
||||
VK_FORMAT_A2R10G10B10_SINT_PACK32,
|
||||
VK_FORMAT_R16G16B16A16_UNORM,
|
||||
VK_FORMAT_R16G16B16A16_SNORM,
|
||||
VK_FORMAT_R16G16B16A16_UINT,
|
||||
|
@@ -1067,20 +1067,37 @@ format_is_int8(VkFormat format)
|
||||
desc->channel[channel].size == 8;
|
||||
}
|
||||
|
||||
static bool
|
||||
format_is_int10(VkFormat format)
|
||||
{
|
||||
const struct vk_format_description *desc = vk_format_description(format);
|
||||
|
||||
if (desc->nr_channels != 4)
|
||||
return false;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
if (desc->channel[i].pure_integer && desc->channel[i].size == 10)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned radv_format_meta_fs_key(VkFormat format)
|
||||
{
|
||||
unsigned col_format = si_choose_spi_color_format(format, false, false) - 1;
|
||||
bool is_int8 = format_is_int8(format);
|
||||
bool is_int10 = format_is_int10(format);
|
||||
|
||||
return col_format + (is_int8 ? 3 : 0);
|
||||
return col_format + (is_int8 ? 3 : is_int10 ? 5 : 0);
|
||||
}
|
||||
|
||||
static unsigned
|
||||
radv_pipeline_compute_is_int8(const VkGraphicsPipelineCreateInfo *pCreateInfo)
|
||||
static void
|
||||
radv_pipeline_compute_get_int_clamp(const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
unsigned *is_int8, unsigned *is_int10)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
|
||||
struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
|
||||
unsigned is_int8 = 0;
|
||||
*is_int8 = 0;
|
||||
*is_int10 = 0;
|
||||
|
||||
for (unsigned i = 0; i < subpass->color_count; ++i) {
|
||||
struct radv_render_pass_attachment *attachment;
|
||||
@@ -1091,10 +1108,10 @@ radv_pipeline_compute_is_int8(const VkGraphicsPipelineCreateInfo *pCreateInfo)
|
||||
attachment = pass->attachments + subpass->color_attachments[i].attachment;
|
||||
|
||||
if (format_is_int8(attachment->format))
|
||||
is_int8 |= 1 << i;
|
||||
*is_int8 |= 1 << i;
|
||||
if (format_is_int10(attachment->format))
|
||||
*is_int10 |= 1 << i;
|
||||
}
|
||||
|
||||
return is_int8;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2053,9 +2070,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
|
||||
}
|
||||
|
||||
if (modules[MESA_SHADER_FRAGMENT]) {
|
||||
union ac_shader_variant_key key;
|
||||
union ac_shader_variant_key key = {0};
|
||||
key.fs.col_format = pipeline->graphics.blend.spi_shader_col_format;
|
||||
key.fs.is_int8 = radv_pipeline_compute_is_int8(pCreateInfo);
|
||||
|
||||
if (pipeline->device->physical_device->rad_info.chip_class < VI)
|
||||
radv_pipeline_compute_get_int_clamp(pCreateInfo, &key.fs.is_int8, &key.fs.is_int10);
|
||||
|
||||
const VkPipelineShaderStageCreateInfo *stage = pStages[MESA_SHADER_FRAGMENT];
|
||||
|
||||
|
@@ -84,7 +84,7 @@ typedef uint32_t xcb_window_t;
|
||||
#define MAX_PUSH_DESCRIPTORS 32
|
||||
#define MAX_DYNAMIC_BUFFERS 16
|
||||
#define MAX_SAMPLES_LOG2 4
|
||||
#define NUM_META_FS_KEYS 11
|
||||
#define NUM_META_FS_KEYS 13
|
||||
#define RADV_MAX_DRM_DEVICES 8
|
||||
|
||||
#define NUM_DEPTH_CLEAR_PIPELINES 3
|
||||
|
@@ -250,8 +250,8 @@ optimizations = [
|
||||
(('ishr', a, 0), a),
|
||||
(('ushr', 0, a), 0),
|
||||
(('ushr', a, 0), a),
|
||||
(('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
|
||||
(('iand', 0xffff, ('ushr', a, 16)), ('ushr', a, 16)),
|
||||
(('iand', 0xff, ('ushr@32', a, 24)), ('ushr', a, 24)),
|
||||
(('iand', 0xffff, ('ushr@32', a, 16)), ('ushr', a, 16)),
|
||||
# Exponential/logarithmic identities
|
||||
(('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
|
||||
(('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
|
||||
|
@@ -721,7 +721,7 @@ translate_image_format(SpvImageFormat format)
|
||||
case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */
|
||||
case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */
|
||||
case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */
|
||||
case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */
|
||||
case SpvImageFormatR16ui: return 0x8234; /* GL_R16UI */
|
||||
case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */
|
||||
default:
|
||||
assert(!"Invalid image format");
|
||||
|
@@ -1083,6 +1083,10 @@ dmabuf_handle_modifier(void *data, struct zwp_linux_dmabuf_v1 *dmabuf,
|
||||
struct dri2_egl_display *dri2_dpy = data;
|
||||
uint64_t *mod = NULL;
|
||||
|
||||
if (modifier_hi == (DRM_FORMAT_MOD_INVALID >> 32) &&
|
||||
modifier_lo == (DRM_FORMAT_MOD_INVALID & 0xffffffff))
|
||||
return;
|
||||
|
||||
switch (format) {
|
||||
case WL_DRM_FORMAT_ARGB8888:
|
||||
mod = u_vector_add(&dri2_dpy->wl_modifiers.argb8888);
|
||||
|
@@ -328,17 +328,6 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
|
||||
break;
|
||||
}
|
||||
|
||||
/* The EGL_KHR_create_context_no_error spec says:
|
||||
*
|
||||
* "BAD_MATCH is generated if the EGL_CONTEXT_OPENGL_NO_ERROR_KHR is TRUE at
|
||||
* the same time as a debug or robustness context is specified."
|
||||
*/
|
||||
if (ctx->Flags & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR ||
|
||||
ctx->Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) {
|
||||
err = EGL_BAD_MATCH;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Canonicalize value to EGL_TRUE/EGL_FALSE definitions */
|
||||
ctx->NoError = !!val;
|
||||
break;
|
||||
@@ -489,6 +478,16 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
|
||||
break;
|
||||
}
|
||||
|
||||
/* The EGL_KHR_create_context_no_error spec says:
|
||||
*
|
||||
* "BAD_MATCH is generated if the EGL_CONTEXT_OPENGL_NO_ERROR_KHR is TRUE at
|
||||
* the same time as a debug or robustness context is specified."
|
||||
*/
|
||||
if (ctx->NoError && (ctx->Flags & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR ||
|
||||
ctx->Flags & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR)) {
|
||||
err = EGL_BAD_MATCH;
|
||||
}
|
||||
|
||||
if ((ctx->Flags & ~(EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR
|
||||
| EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR
|
||||
| EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR)) != 0) {
|
||||
|
@@ -925,10 +925,6 @@ static inline void radeon_set_ctl_const(struct radeon_winsys_cs *cs, unsigned re
|
||||
/*
|
||||
* common helpers
|
||||
*/
|
||||
static inline uint32_t S_FIXED(float value, uint32_t frac_bits)
|
||||
{
|
||||
return value * (1 << frac_bits);
|
||||
}
|
||||
|
||||
/* 12.4 fixed-point */
|
||||
static inline unsigned r600_pack_float_12p4(float x)
|
||||
|
@@ -1024,6 +1024,25 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned get_max_threads_per_block(struct r600_common_screen *screen,
|
||||
enum pipe_shader_ir ir_type)
|
||||
{
|
||||
if (ir_type != PIPE_SHADER_IR_TGSI)
|
||||
return 256;
|
||||
|
||||
/* Only 16 waves per thread-group on gfx9. */
|
||||
if (screen->chip_class >= GFX9)
|
||||
return 1024;
|
||||
|
||||
/* Up to 40 waves per thread-group on GCN < gfx9. Expose a nice
|
||||
* round number.
|
||||
*/
|
||||
if (screen->chip_class >= SI)
|
||||
return 2048;
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
enum pipe_shader_ir ir_type,
|
||||
enum pipe_compute_cap param,
|
||||
@@ -1078,27 +1097,17 @@ static int r600_get_compute_param(struct pipe_screen *screen,
|
||||
case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
|
||||
if (ret) {
|
||||
uint64_t *block_size = ret;
|
||||
if (rscreen->chip_class >= SI &&
|
||||
ir_type == PIPE_SHADER_IR_TGSI) {
|
||||
block_size[0] = 2048;
|
||||
block_size[1] = 2048;
|
||||
block_size[2] = 2048;
|
||||
} else {
|
||||
block_size[0] = 256;
|
||||
block_size[1] = 256;
|
||||
block_size[2] = 256;
|
||||
}
|
||||
unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type);
|
||||
block_size[0] = threads_per_block;
|
||||
block_size[1] = threads_per_block;
|
||||
block_size[2] = threads_per_block;
|
||||
}
|
||||
return 3 * sizeof(uint64_t);
|
||||
|
||||
case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
|
||||
if (ret) {
|
||||
uint64_t *max_threads_per_block = ret;
|
||||
if (rscreen->chip_class >= SI &&
|
||||
ir_type == PIPE_SHADER_IR_TGSI)
|
||||
*max_threads_per_block = 2048;
|
||||
else
|
||||
*max_threads_per_block = 256;
|
||||
*max_threads_per_block = get_max_threads_per_block(rscreen, ir_type);
|
||||
}
|
||||
return sizeof(uint64_t);
|
||||
case PIPE_COMPUTE_CAP_ADDRESS_BITS:
|
||||
|
@@ -1006,4 +1006,9 @@ vi_dcc_enabled(struct r600_texture *tex, unsigned level)
|
||||
(((unsigned)(s2x) & 0xf) << 16) | (((unsigned)(s2y) & 0xf) << 20) | \
|
||||
(((unsigned)(s3x) & 0xf) << 24) | (((unsigned)(s3y) & 0xf) << 28))
|
||||
|
||||
static inline int S_FIXED(float value, unsigned frac_bits)
|
||||
{
|
||||
return value * (1 << frac_bits);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -1373,6 +1373,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
"IMM[1] UINT32 {1, 2, 4, 8}\n"
|
||||
"IMM[2] UINT32 {16, 32, 64, 128}\n"
|
||||
"IMM[3] UINT32 {1000000, 0, %u, 0}\n" /* for timestamp conversion */
|
||||
"IMM[4] UINT32 {0, 0, 0, 0}\n"
|
||||
|
||||
"AND TEMP[5], CONST[0].wwww, IMM[2].xxxx\n"
|
||||
"UIF TEMP[5]\n"
|
||||
@@ -1472,7 +1473,7 @@ static void r600_create_query_result_shader(struct r600_common_context *rctx)
|
||||
/* Convert to boolean */
|
||||
"AND TEMP[4], CONST[0].wwww, IMM[1].wwww\n"
|
||||
"UIF TEMP[4]\n"
|
||||
"U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[0].xxxx\n"
|
||||
"U64SNE TEMP[0].x, TEMP[0].xyxy, IMM[4].zwzw\n"
|
||||
"AND TEMP[0].x, TEMP[0].xxxx, IMM[1].xxxx\n"
|
||||
"MOV TEMP[0].y, IMM[0].xxxx\n"
|
||||
"ENDIF\n"
|
||||
|
@@ -121,9 +121,7 @@ si_blit_dbcb_copy(struct si_context *sctx,
|
||||
|
||||
assert(sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled);
|
||||
|
||||
bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
|
||||
sctx->decompression_enabled = true;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = false;
|
||||
|
||||
while (level_mask) {
|
||||
unsigned level = u_bit_scan(&level_mask);
|
||||
@@ -169,7 +167,6 @@ si_blit_dbcb_copy(struct si_context *sctx,
|
||||
}
|
||||
|
||||
sctx->decompression_enabled = false;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
|
||||
sctx->dbcb_depth_copy_enabled = false;
|
||||
sctx->dbcb_stencil_copy_enabled = false;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
@@ -225,9 +222,7 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
|
||||
|
||||
surf_tmpl.format = texture->resource.b.b.format;
|
||||
|
||||
bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
|
||||
sctx->decompression_enabled = true;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = false;
|
||||
|
||||
while (level_mask) {
|
||||
unsigned level = u_bit_scan(&level_mask);
|
||||
@@ -267,7 +262,6 @@ si_blit_decompress_zs_planes_in_place(struct si_context *sctx,
|
||||
texture->stencil_dirty_level_mask &= ~fully_decompressed_mask;
|
||||
|
||||
sctx->decompression_enabled = false;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
|
||||
sctx->db_flush_depth_inplace = false;
|
||||
sctx->db_flush_stencil_inplace = false;
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
@@ -474,9 +468,7 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
|
||||
custom_blend = sctx->custom_blend_eliminate_fastclear;
|
||||
}
|
||||
|
||||
bool old_update_dirtiness = sctx->framebuffer.do_update_surf_dirtiness;
|
||||
sctx->decompression_enabled = true;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = false;
|
||||
|
||||
while (level_mask) {
|
||||
unsigned level = u_bit_scan(&level_mask);
|
||||
@@ -519,7 +511,6 @@ static void si_blit_decompress_color(struct pipe_context *ctx,
|
||||
}
|
||||
|
||||
sctx->decompression_enabled = false;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = old_update_dirtiness;
|
||||
|
||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_CB |
|
||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
||||
@@ -971,10 +962,32 @@ static void si_decompress_subresource(struct pipe_context *ctx,
|
||||
if (!(rtex->surface.flags & RADEON_SURF_SBUFFER))
|
||||
planes &= ~PIPE_MASK_S;
|
||||
|
||||
/* If we've rendered into the framebuffer and it's a blitting
|
||||
* source, make sure the decompression pass is invoked
|
||||
* by dirtying the framebuffer.
|
||||
*/
|
||||
if (sctx->framebuffer.state.zsbuf &&
|
||||
sctx->framebuffer.state.zsbuf->u.tex.level == level &&
|
||||
sctx->framebuffer.state.zsbuf->texture == tex)
|
||||
si_update_fb_dirtiness_after_rendering(sctx);
|
||||
|
||||
si_decompress_depth(sctx, rtex, planes,
|
||||
level, level,
|
||||
first_layer, last_layer);
|
||||
} else if (rtex->fmask.size || rtex->cmask.size || rtex->dcc_offset) {
|
||||
/* If we've rendered into the framebuffer and it's a blitting
|
||||
* source, make sure the decompression pass is invoked
|
||||
* by dirtying the framebuffer.
|
||||
*/
|
||||
for (unsigned i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
|
||||
if (sctx->framebuffer.state.cbufs[i] &&
|
||||
sctx->framebuffer.state.cbufs[i]->u.tex.level == level &&
|
||||
sctx->framebuffer.state.cbufs[i]->texture == tex) {
|
||||
si_update_fb_dirtiness_after_rendering(sctx);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
si_blit_decompress_color(ctx, rtex, level, level,
|
||||
first_layer, last_layer, false);
|
||||
}
|
||||
|
@@ -970,7 +970,7 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
||||
|
||||
if (!util_queue_init(&sscreen->shader_compiler_queue_low_priority,
|
||||
"si_shader_low",
|
||||
32, num_compiler_threads,
|
||||
32, num_compiler_threads_lowprio,
|
||||
UTIL_QUEUE_INIT_RESIZE_IF_FULL |
|
||||
UTIL_QUEUE_INIT_USE_MINIMUM_PRIORITY)) {
|
||||
si_destroy_shader_cache(sscreen);
|
||||
@@ -1002,8 +1002,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
||||
sscreen->b.info.pfp_fw_version >= 211 &&
|
||||
sscreen->b.info.me_fw_version >= 173) ||
|
||||
(sscreen->b.chip_class == SI &&
|
||||
sscreen->b.info.pfp_fw_version >= 121 &&
|
||||
sscreen->b.info.me_fw_version >= 87);
|
||||
sscreen->b.info.pfp_fw_version >= 79 &&
|
||||
sscreen->b.info.me_fw_version >= 142);
|
||||
|
||||
sscreen->has_ds_bpermute = sscreen->b.chip_class >= VI;
|
||||
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
|
||||
|
@@ -113,10 +113,15 @@ struct si_screen {
|
||||
|
||||
/* Shader compiler queue for multithreaded compilation. */
|
||||
struct util_queue shader_compiler_queue;
|
||||
LLVMTargetMachineRef tm[4]; /* used by the queue only */
|
||||
/* Use at most 3 normal compiler threads on quadcore and better.
|
||||
* Hyperthreaded CPUs report the number of threads, but we want
|
||||
* the number of cores. */
|
||||
LLVMTargetMachineRef tm[3]; /* used by the queue only */
|
||||
|
||||
struct util_queue shader_compiler_queue_low_priority;
|
||||
LLVMTargetMachineRef tm_low_priority[4];
|
||||
/* Use at most 2 low priority threads on quadcore and better.
|
||||
* We want to minimize the impact on multithreaded Mesa. */
|
||||
LLVMTargetMachineRef tm_low_priority[2]; /* at most 2 threads */
|
||||
};
|
||||
|
||||
struct si_blend_color {
|
||||
@@ -182,7 +187,6 @@ struct si_framebuffer {
|
||||
ubyte dirty_cbufs;
|
||||
bool dirty_zsbuf;
|
||||
bool any_dst_linear;
|
||||
bool do_update_surf_dirtiness;
|
||||
};
|
||||
|
||||
struct si_clip_state {
|
||||
|
@@ -174,6 +174,20 @@ unsigned si_shader_io_get_unique_index(unsigned semantic_name, unsigned index)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that builds an LLVM IR PHI node and immediately adds
|
||||
* incoming edges.
|
||||
*/
|
||||
static LLVMValueRef
|
||||
build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
|
||||
unsigned count_incoming, LLVMValueRef *values,
|
||||
LLVMBasicBlockRef *blocks)
|
||||
{
|
||||
LLVMValueRef phi = LLVMBuildPhi(ctx->builder, type, "");
|
||||
LLVMAddIncoming(phi, values, blocks, count_incoming);
|
||||
return phi;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the value of a shader input parameter and extract a bitfield.
|
||||
*/
|
||||
@@ -2698,6 +2712,7 @@ si_insert_input_ptr_as_2xi32(struct si_shader_context *ctx, LLVMValueRef ret,
|
||||
static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
LLVMBuilderRef builder = ctx->gallivm.builder;
|
||||
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
|
||||
|
||||
si_copy_tcs_inputs(bld_base);
|
||||
@@ -2706,8 +2721,29 @@ static void si_llvm_emit_tcs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||
invocation_id = unpack_param(ctx, ctx->param_tcs_rel_ids, 8, 5);
|
||||
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
|
||||
|
||||
if (ctx->screen->b.chip_class >= GFX9) {
|
||||
LLVMBasicBlockRef blocks[2] = {
|
||||
LLVMGetInsertBlock(builder),
|
||||
ctx->merged_wrap_if_state.entry_block
|
||||
};
|
||||
LLVMValueRef values[2];
|
||||
|
||||
lp_build_endif(&ctx->merged_wrap_if_state);
|
||||
|
||||
values[0] = rel_patch_id;
|
||||
values[1] = LLVMGetUndef(ctx->i32);
|
||||
rel_patch_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
|
||||
|
||||
values[0] = tf_lds_offset;
|
||||
values[1] = LLVMGetUndef(ctx->i32);
|
||||
tf_lds_offset = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
|
||||
|
||||
values[0] = invocation_id;
|
||||
values[1] = ctx->i32_1; /* cause the epilog to skip threads */
|
||||
invocation_id = build_phi(&ctx->ac, ctx->i32, 2, values, blocks);
|
||||
}
|
||||
|
||||
/* Return epilog parameters from this function. */
|
||||
LLVMBuilderRef builder = ctx->gallivm.builder;
|
||||
LLVMValueRef ret = ctx->return_value;
|
||||
unsigned vgpr;
|
||||
|
||||
@@ -2879,7 +2915,12 @@ static void si_llvm_emit_es_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||
|
||||
if (ctx->screen->b.chip_class >= GFX9 && info->num_outputs) {
|
||||
unsigned itemsize_dw = es->selector->esgs_itemsize / 4;
|
||||
lds_base = LLVMBuildMul(gallivm->builder, ac_get_thread_id(&ctx->ac),
|
||||
LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
|
||||
LLVMValueRef wave_idx = unpack_param(ctx, ctx->param_merged_wave_info, 24, 4);
|
||||
vertex_idx = LLVMBuildOr(gallivm->builder, vertex_idx,
|
||||
LLVMBuildMul(gallivm->builder, wave_idx,
|
||||
LLVMConstInt(ctx->i32, 64, false), ""), "");
|
||||
lds_base = LLVMBuildMul(gallivm->builder, vertex_idx,
|
||||
LLVMConstInt(ctx->i32, itemsize_dw, 0), "");
|
||||
}
|
||||
|
||||
@@ -2930,6 +2971,9 @@ static void si_llvm_emit_gs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||
|
||||
ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE,
|
||||
si_get_gs_wave_id(ctx));
|
||||
|
||||
if (ctx->screen->b.chip_class >= GFX9)
|
||||
lp_build_endif(&ctx->merged_wrap_if_state);
|
||||
}
|
||||
|
||||
static void si_llvm_emit_vs_epilogue(struct lp_build_tgsi_context *bld_base)
|
||||
@@ -3427,7 +3471,7 @@ static void si_llvm_emit_ddxy(
|
||||
|
||||
val = LLVMBuildBitCast(gallivm->builder, emit_data->args[0], ctx->i32, "");
|
||||
val = ac_build_ddxy(&ctx->ac, ctx->screen->has_ds_bpermute,
|
||||
mask, idx, ctx->lds, val);
|
||||
mask, idx, val);
|
||||
emit_data->output[emit_data->chan] = val;
|
||||
}
|
||||
|
||||
@@ -4466,20 +4510,6 @@ static void create_function(struct si_shader_context *ctx)
|
||||
assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
|
||||
shader->info.num_input_vgprs -= num_prolog_vgprs;
|
||||
|
||||
if (!ctx->screen->has_ds_bpermute &&
|
||||
bld_base->info &&
|
||||
(bld_base->info->opcode_count[TGSI_OPCODE_DDX] > 0 ||
|
||||
bld_base->info->opcode_count[TGSI_OPCODE_DDY] > 0 ||
|
||||
bld_base->info->opcode_count[TGSI_OPCODE_DDX_FINE] > 0 ||
|
||||
bld_base->info->opcode_count[TGSI_OPCODE_DDY_FINE] > 0 ||
|
||||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0 ||
|
||||
bld_base->info->opcode_count[TGSI_OPCODE_INTERP_SAMPLE] > 0))
|
||||
ctx->lds =
|
||||
LLVMAddGlobalInAddressSpace(gallivm->module,
|
||||
LLVMArrayType(ctx->i32, 64),
|
||||
"ddxy_lds",
|
||||
LOCAL_ADDR_SPACE);
|
||||
|
||||
if (shader->key.as_ls ||
|
||||
ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||
/* GFX9 has the ESGS ring buffer in LDS. */
|
||||
@@ -5497,14 +5527,20 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
|
||||
preload_ring_buffers(ctx);
|
||||
|
||||
/* For GFX9 merged shaders:
|
||||
* - Set EXEC. If the prolog is present, set EXEC there instead.
|
||||
* - Set EXEC for the first shader. If the prolog is present, set
|
||||
* EXEC there instead.
|
||||
* - Add a barrier before the second shader.
|
||||
* - In the second shader, reset EXEC to ~0 and wrap the main part in
|
||||
* an if-statement. This is required for correctness in geometry
|
||||
* shaders, to ensure that empty GS waves do not send GS_EMIT and
|
||||
* GS_CUT messages.
|
||||
*
|
||||
* The same thing for monolithic shaders is done in
|
||||
* si_build_wrapper_function.
|
||||
* For monolithic merged shaders, the first shader is wrapped in an
|
||||
* if-block together with its prolog in si_build_wrapper_function.
|
||||
*/
|
||||
if (ctx->screen->b.chip_class >= GFX9 && !is_monolithic) {
|
||||
if (sel->info.num_instructions > 1 && /* not empty shader */
|
||||
if (ctx->screen->b.chip_class >= GFX9) {
|
||||
if (!is_monolithic &&
|
||||
sel->info.num_instructions > 1 && /* not empty shader */
|
||||
(shader->key.as_es || shader->key.as_ls) &&
|
||||
(ctx->type == PIPE_SHADER_TESS_EVAL ||
|
||||
(ctx->type == PIPE_SHADER_VERTEX &&
|
||||
@@ -5513,9 +5549,19 @@ static bool si_compile_tgsi_main(struct si_shader_context *ctx,
|
||||
ctx->param_merged_wave_info, 0);
|
||||
} else if (ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||
ctx->type == PIPE_SHADER_GEOMETRY) {
|
||||
si_init_exec_from_input(ctx,
|
||||
ctx->param_merged_wave_info, 8);
|
||||
if (!is_monolithic)
|
||||
si_init_exec_full_mask(ctx);
|
||||
|
||||
/* The barrier must execute for all shaders in a
|
||||
* threadgroup.
|
||||
*/
|
||||
si_llvm_emit_barrier(NULL, bld_base, NULL);
|
||||
|
||||
LLVMValueRef num_threads = unpack_param(ctx, ctx->param_merged_wave_info, 8, 8);
|
||||
LLVMValueRef ena =
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
|
||||
ac_get_thread_id(&ctx->ac), num_threads, "");
|
||||
lp_build_if(&ctx->merged_wrap_if_state, &ctx->gallivm, ena);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5986,15 +6032,9 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
|
||||
|
||||
/* Merged shaders are executed conditionally depending
|
||||
* on the number of enabled threads passed in the input SGPRs. */
|
||||
if (is_merged_shader(ctx->shader) &&
|
||||
(part == 0 || part == next_shader_first_part)) {
|
||||
if (is_merged_shader(ctx->shader) && part == 0) {
|
||||
LLVMValueRef ena, count = initial[3];
|
||||
|
||||
/* The thread count for the 2nd shader is at bit-offset 8. */
|
||||
if (part == next_shader_first_part) {
|
||||
count = LLVMBuildLShr(builder, count,
|
||||
LLVMConstInt(ctx->i32, 8, 0), "");
|
||||
}
|
||||
count = LLVMBuildAnd(builder, count,
|
||||
LLVMConstInt(ctx->i32, 0x7f, 0), "");
|
||||
ena = LLVMBuildICmp(builder, LLVMIntULT,
|
||||
@@ -6051,26 +6091,20 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
|
||||
ret = LLVMBuildCall(builder, parts[part], in, num_params, "");
|
||||
|
||||
if (is_merged_shader(ctx->shader) &&
|
||||
(part + 1 == next_shader_first_part ||
|
||||
part + 1 == num_parts)) {
|
||||
part + 1 == next_shader_first_part) {
|
||||
lp_build_endif(&if_state);
|
||||
|
||||
if (part + 1 == next_shader_first_part) {
|
||||
/* A barrier is required between 2 merged shaders. */
|
||||
si_llvm_emit_barrier(NULL, &ctx->bld_base, NULL);
|
||||
|
||||
/* The second half of the merged shader should use
|
||||
* the inputs from the toplevel (wrapper) function,
|
||||
* not the return value from the last call.
|
||||
*
|
||||
* That's because the last call was executed condi-
|
||||
* tionally, so we can't consume it in the main
|
||||
* block.
|
||||
*/
|
||||
memcpy(out, initial, sizeof(initial));
|
||||
num_out = initial_num_out;
|
||||
num_out_sgpr = initial_num_out_sgpr;
|
||||
}
|
||||
/* The second half of the merged shader should use
|
||||
* the inputs from the toplevel (wrapper) function,
|
||||
* not the return value from the last call.
|
||||
*
|
||||
* That's because the last call was executed condi-
|
||||
* tionally, so we can't consume it in the main
|
||||
* block.
|
||||
*/
|
||||
memcpy(out, initial, sizeof(initial));
|
||||
num_out = initial_num_out;
|
||||
num_out_sgpr = initial_num_out_sgpr;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@@ -25,6 +25,7 @@
|
||||
#define SI_SHADER_PRIVATE_H
|
||||
|
||||
#include "si_shader.h"
|
||||
#include "gallivm/lp_bld_flow.h"
|
||||
#include "gallivm/lp_bld_init.h"
|
||||
#include "gallivm/lp_bld_tgsi.h"
|
||||
#include "tgsi/tgsi_parse.h"
|
||||
@@ -105,6 +106,8 @@ struct si_shader_context {
|
||||
unsigned flow_depth;
|
||||
unsigned flow_depth_max;
|
||||
|
||||
struct lp_build_if_state merged_wrap_if_state;
|
||||
|
||||
struct tgsi_array_info *temp_arrays;
|
||||
LLVMValueRef *temp_array_allocas;
|
||||
|
||||
|
@@ -756,7 +756,7 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
|
||||
*/
|
||||
if (array_size > 16 ||
|
||||
!ctx->screen->llvm_has_working_vgpr_indexing) {
|
||||
array_alloca = LLVMBuildAlloca(builder,
|
||||
array_alloca = lp_build_alloca_undef(&ctx->gallivm,
|
||||
LLVMArrayType(ctx->f32,
|
||||
array_size), "array");
|
||||
ctx->temp_array_allocas[id] = array_alloca;
|
||||
|
@@ -74,11 +74,6 @@ static unsigned si_map_swizzle(unsigned swizzle)
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t S_FIXED(float value, uint32_t frac_bits)
|
||||
{
|
||||
return value * (1 << frac_bits);
|
||||
}
|
||||
|
||||
/* 12.4 fixed-point */
|
||||
static unsigned si_pack_float_12p4(float x)
|
||||
{
|
||||
@@ -2457,6 +2452,38 @@ static void si_init_depth_surface(struct si_context *sctx,
|
||||
surf->depth_initialized = true;
|
||||
}
|
||||
|
||||
void si_update_fb_dirtiness_after_rendering(struct si_context *sctx)
|
||||
{
|
||||
if (sctx->decompression_enabled)
|
||||
return;
|
||||
|
||||
if (sctx->framebuffer.state.zsbuf) {
|
||||
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
|
||||
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
|
||||
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
}
|
||||
if (sctx->framebuffer.compressed_cb_mask) {
|
||||
struct pipe_surface *surf;
|
||||
struct r600_texture *rtex;
|
||||
unsigned mask = sctx->framebuffer.compressed_cb_mask;
|
||||
|
||||
do {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
surf = sctx->framebuffer.state.cbufs[i];
|
||||
rtex = (struct r600_texture*)surf->texture;
|
||||
|
||||
if (rtex->fmask.size)
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
if (rtex->dcc_gather_statistics)
|
||||
rtex->separate_dcc_dirty = true;
|
||||
} while (mask);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state)
|
||||
{
|
||||
for (int i = 0; i < state->nr_cbufs; ++i) {
|
||||
@@ -2484,6 +2511,8 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||
bool unbound = false;
|
||||
int i;
|
||||
|
||||
si_update_fb_dirtiness_after_rendering(sctx);
|
||||
|
||||
for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
|
||||
if (!sctx->framebuffer.state.cbufs[i])
|
||||
continue;
|
||||
@@ -2681,7 +2710,6 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||
* changes come from the decompression passes themselves.
|
||||
*/
|
||||
sctx->need_check_render_feedback = true;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3989,6 +4017,8 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
|
||||
si_update_fb_dirtiness_after_rendering(sctx);
|
||||
|
||||
/* Multisample surfaces are flushed in si_decompress_textures. */
|
||||
if (sctx->framebuffer.nr_samples <= 1 &&
|
||||
sctx->framebuffer.state.nr_cbufs) {
|
||||
@@ -3996,7 +4026,6 @@ static void si_texture_barrier(struct pipe_context *ctx, unsigned flags)
|
||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
||||
SI_CONTEXT_FLUSH_AND_INV_CB;
|
||||
}
|
||||
sctx->framebuffer.do_update_surf_dirtiness = true;
|
||||
}
|
||||
|
||||
/* This only ensures coherency for shader image/buffer stores. */
|
||||
|
@@ -384,6 +384,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
||||
const struct pipe_sampler_view *state,
|
||||
unsigned width0, unsigned height0,
|
||||
unsigned force_level);
|
||||
void si_update_fb_dirtiness_after_rendering(struct si_context *sctx);
|
||||
|
||||
/* si_state_shader.c */
|
||||
bool si_update_shaders(struct si_context *sctx);
|
||||
|
@@ -1207,7 +1207,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
sctx->framebuffer.dirty_cbufs |=
|
||||
((1 << sctx->framebuffer.state.nr_cbufs) - 1);
|
||||
sctx->framebuffer.dirty_zsbuf = true;
|
||||
sctx->framebuffer.do_update_surf_dirtiness = true;
|
||||
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
|
||||
si_update_all_texture_descriptors(sctx);
|
||||
}
|
||||
@@ -1392,36 +1391,6 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
|
||||
}
|
||||
|
||||
if (sctx->framebuffer.do_update_surf_dirtiness) {
|
||||
/* Set the depth buffer as dirty. */
|
||||
if (sctx->framebuffer.state.zsbuf) {
|
||||
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
|
||||
struct r600_texture *rtex = (struct r600_texture *)surf->texture;
|
||||
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
|
||||
if (rtex->surface.flags & RADEON_SURF_SBUFFER)
|
||||
rtex->stencil_dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
}
|
||||
if (sctx->framebuffer.compressed_cb_mask) {
|
||||
struct pipe_surface *surf;
|
||||
struct r600_texture *rtex;
|
||||
unsigned mask = sctx->framebuffer.compressed_cb_mask;
|
||||
|
||||
do {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
surf = sctx->framebuffer.state.cbufs[i];
|
||||
rtex = (struct r600_texture*)surf->texture;
|
||||
|
||||
if (rtex->fmask.size)
|
||||
rtex->dirty_level_mask |= 1 << surf->u.tex.level;
|
||||
if (rtex->dcc_gather_statistics)
|
||||
rtex->separate_dcc_dirty = true;
|
||||
} while (mask);
|
||||
}
|
||||
sctx->framebuffer.do_update_surf_dirtiness = false;
|
||||
}
|
||||
|
||||
sctx->b.num_draw_calls++;
|
||||
if (info->primitive_restart)
|
||||
sctx->b.num_prim_restart_calls++;
|
||||
|
@@ -53,7 +53,7 @@ env.CodeGenerate(
|
||||
source = '',
|
||||
command = python_cmd + ' $SCRIPT --output $TARGET --gen_h'
|
||||
)
|
||||
Depends('rasterizer/codegen/gen_knobs.cpp',
|
||||
Depends('rasterizer/codegen/gen_knobs.h',
|
||||
swrroot + 'rasterizer/codegen/templates/gen_knobs.cpp')
|
||||
|
||||
env.CodeGenerate(
|
||||
|
@@ -140,6 +140,26 @@ extern GlobalKnobs g_GlobalKnobs;
|
||||
//========================================================
|
||||
void KnobBase::autoExpandEnvironmentVariables(std::string &text)
|
||||
{
|
||||
#if (__GNUC__) && (GCC_VERSION < 409000)
|
||||
// <regex> isn't implemented prior to gcc-4.9.0
|
||||
// unix style variable replacement
|
||||
size_t start;
|
||||
while ((start = text.find("${'${'}")) != std::string::npos) {
|
||||
size_t end = text.find("}");
|
||||
if (end == std::string::npos)
|
||||
break;
|
||||
const std::string var = GetEnv(text.substr(start + 2, end - start - 2));
|
||||
text.replace(start, end - start + 1, var);
|
||||
}
|
||||
// win32 style variable replacement
|
||||
while ((start = text.find("%")) != std::string::npos) {
|
||||
size_t end = text.find("%", start + 1);
|
||||
if (end == std::string::npos)
|
||||
break;
|
||||
const std::string var = GetEnv(text.substr(start + 1, end - start - 1));
|
||||
text.replace(start, end - start + 1, var);
|
||||
}
|
||||
#else
|
||||
{
|
||||
// unix style variable replacement
|
||||
static std::regex env("\\$\\{([^}]+)\\}");
|
||||
@@ -164,6 +184,7 @@ void KnobBase::autoExpandEnvironmentVariables(std::string &text)
|
||||
text.replace(match.prefix().length(), match[0].length(), var);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@@ -81,8 +81,11 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
offsets[output_buffer] = so->output[i].dst_offset;
|
||||
}
|
||||
|
||||
unsigned attrib_slot = so->output[i].register_index;
|
||||
attrib_slot = swr_so_adjust_attrib(attrib_slot, ctx->vs);
|
||||
|
||||
state.stream.decl[num].bufferIndex = output_buffer;
|
||||
state.stream.decl[num].attribSlot = so->output[i].register_index - 1;
|
||||
state.stream.decl[num].attribSlot = attrib_slot;
|
||||
state.stream.decl[num].componentMask =
|
||||
((1 << so->output[i].num_components) - 1)
|
||||
<< so->output[i].start_component;
|
||||
@@ -129,10 +132,36 @@ swr_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
* XXX setup provokingVertex & topologyProvokingVertex */
|
||||
SWR_FRONTEND_STATE feState = {0};
|
||||
|
||||
feState.vsVertexSize =
|
||||
VERTEX_ATTRIB_START_SLOT +
|
||||
+ ctx->vs->info.base.num_outputs
|
||||
- (ctx->vs->info.base.writes_position ? 1 : 0);
|
||||
// feState.vsVertexSize seeds the PA size that is used as an interface
|
||||
// between all the shader stages, so it has to be large enough to
|
||||
// incorporate all interfaces between stages
|
||||
|
||||
// max of gs and vs num_outputs
|
||||
feState.vsVertexSize = ctx->vs->info.base.num_outputs;
|
||||
if (ctx->gs &&
|
||||
ctx->gs->info.base.num_outputs > feState.vsVertexSize) {
|
||||
feState.vsVertexSize = ctx->gs->info.base.num_outputs;
|
||||
}
|
||||
|
||||
if (ctx->vs->info.base.num_outputs) {
|
||||
// gs does not adjust for position in SGV slot at input from vs
|
||||
if (!ctx->gs)
|
||||
feState.vsVertexSize--;
|
||||
}
|
||||
|
||||
// other (non-SGV) slots start at VERTEX_ATTRIB_START_SLOT
|
||||
feState.vsVertexSize += VERTEX_ATTRIB_START_SLOT;
|
||||
|
||||
// The PA in the clipper does not handle BE vertex sizes
|
||||
// different from FE. Increase vertexsize only for the cases that needed it
|
||||
|
||||
// primid needs a slot
|
||||
if (ctx->fs->info.base.uses_primid)
|
||||
feState.vsVertexSize++;
|
||||
// sprite coord enable
|
||||
if (ctx->rasterizer->sprite_coord_enable)
|
||||
feState.vsVertexSize++;
|
||||
|
||||
|
||||
if (ctx->rasterizer->flatshade_first) {
|
||||
feState.provokingVertex = {1, 0, 0};
|
||||
|
@@ -414,7 +414,10 @@ BuilderSWR::swr_gs_llvm_emit_vertex(const struct lp_build_tgsi_gs_iface *gs_base
|
||||
} else if (iface->info->output_semantic_name[attrib] == TGSI_SEMANTIC_POSITION) {
|
||||
attribSlot = VERTEX_POSITION_SLOT;
|
||||
} else {
|
||||
attribSlot = VERTEX_ATTRIB_START_SLOT + attrib - 1;
|
||||
attribSlot = VERTEX_ATTRIB_START_SLOT + attrib;
|
||||
if (iface->info->writes_position) {
|
||||
attribSlot--;
|
||||
}
|
||||
}
|
||||
|
||||
#if USE_SIMD16_FRONTEND
|
||||
@@ -923,6 +926,33 @@ swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key)
|
||||
return func;
|
||||
}
|
||||
|
||||
unsigned
|
||||
swr_so_adjust_attrib(unsigned in_attrib,
|
||||
swr_vertex_shader *swr_vs)
|
||||
{
|
||||
ubyte semantic_name;
|
||||
unsigned attrib;
|
||||
|
||||
attrib = in_attrib + VERTEX_ATTRIB_START_SLOT;
|
||||
|
||||
if (swr_vs) {
|
||||
semantic_name = swr_vs->info.base.output_semantic_name[in_attrib];
|
||||
if (semantic_name == TGSI_SEMANTIC_POSITION) {
|
||||
attrib = VERTEX_POSITION_SLOT;
|
||||
} else if (semantic_name == TGSI_SEMANTIC_PSIZE) {
|
||||
attrib = VERTEX_SGV_SLOT;
|
||||
} else if (semantic_name == TGSI_SEMANTIC_LAYER) {
|
||||
attrib = VERTEX_SGV_SLOT;
|
||||
} else {
|
||||
if (swr_vs->info.base.writes_position) {
|
||||
attrib--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return attrib;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
locate_linkage(ubyte name, ubyte index, struct tgsi_shader_info *info)
|
||||
{
|
||||
|
@@ -30,6 +30,9 @@ struct swr_jit_fs_key;
|
||||
struct swr_jit_vs_key;
|
||||
struct swr_jit_gs_key;
|
||||
|
||||
unsigned swr_so_adjust_attrib(unsigned in_attrib,
|
||||
swr_vertex_shader *swr_vs);
|
||||
|
||||
PFN_VERTEX_FUNC
|
||||
swr_compile_vs(struct swr_context *ctx, swr_jit_vs_key &key);
|
||||
|
||||
|
@@ -345,13 +345,14 @@ swr_create_vs_state(struct pipe_context *pipe,
|
||||
// soState.streamToRasterizer not used
|
||||
|
||||
for (uint32_t i = 0; i < stream_output->num_outputs; i++) {
|
||||
unsigned attrib_slot = stream_output->output[i].register_index;
|
||||
attrib_slot = swr_so_adjust_attrib(attrib_slot, swr_vs);
|
||||
swr_vs->soState.streamMasks[stream_output->output[i].stream] |=
|
||||
1 << (stream_output->output[i].register_index - 1);
|
||||
(1 << attrib_slot);
|
||||
}
|
||||
for (uint32_t i = 0; i < MAX_SO_STREAMS; i++) {
|
||||
swr_vs->soState.streamNumEntries[i] =
|
||||
_mm_popcnt_u32(swr_vs->soState.streamMasks[i]);
|
||||
swr_vs->soState.vertexAttribOffset[i] = VERTEX_ATTRIB_START_SLOT; // TODO: optimize
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -212,14 +212,16 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
|
||||
if (vc4_tile_blit(pctx, blit_info))
|
||||
return;
|
||||
|
||||
if (util_try_blit_via_copy_region(pctx, &info)) {
|
||||
return; /* done */
|
||||
}
|
||||
|
||||
if (info.mask & PIPE_MASK_S) {
|
||||
fprintf(stderr, "cannot blit stencil, skipping\n");
|
||||
if (util_try_blit_via_copy_region(pctx, &info))
|
||||
return;
|
||||
|
||||
info.mask &= ~PIPE_MASK_S;
|
||||
fprintf(stderr, "cannot blit stencil, skipping\n");
|
||||
}
|
||||
|
||||
vc4_render_blit(pctx, &info);
|
||||
if (vc4_render_blit(pctx, &info))
|
||||
return;
|
||||
|
||||
fprintf(stderr, "Unsupported blit\n");
|
||||
}
|
||||
|
@@ -409,7 +409,7 @@ int virgl_encoder_set_index_buffer(struct virgl_context *ctx,
|
||||
virgl_encoder_write_res(ctx, res);
|
||||
if (ib) {
|
||||
virgl_encoder_write_dword(ctx->cbuf, ib->index_size);
|
||||
virgl_encoder_write_dword(ctx->cbuf, 0);
|
||||
virgl_encoder_write_dword(ctx->cbuf, ib->offset);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@@ -48,6 +48,15 @@ virgl_tgsi_transform_property(struct tgsi_transform_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
if (inst->Instruction.Precise)
|
||||
inst->Instruction.Precise = 0;
|
||||
ctx->emit_instruction(ctx, inst);
|
||||
}
|
||||
|
||||
struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)
|
||||
{
|
||||
|
||||
@@ -61,6 +70,7 @@ struct tgsi_token *virgl_tgsi_transform(const struct tgsi_token *tokens_in)
|
||||
|
||||
memset(&transform, 0, sizeof(transform));
|
||||
transform.base.transform_property = virgl_tgsi_transform_property;
|
||||
transform.base.transform_instruction = virgl_tgsi_transform_instruction;
|
||||
tgsi_transform_shader(tokens_in, new_tokens, newLen, &transform.base);
|
||||
|
||||
return new_tokens;
|
||||
|
@@ -284,6 +284,7 @@ struct st_context_attribs
|
||||
};
|
||||
|
||||
struct st_context_iface;
|
||||
struct st_manager;
|
||||
|
||||
/**
|
||||
* Represent a windowing system drawable.
|
||||
@@ -316,6 +317,11 @@ struct st_framebuffer_iface
|
||||
*/
|
||||
uint32_t ID;
|
||||
|
||||
/**
|
||||
* The state tracker manager that manages this object.
|
||||
*/
|
||||
struct st_manager *state_manager;
|
||||
|
||||
/**
|
||||
* Available for the state tracker manager to use.
|
||||
*/
|
||||
@@ -375,6 +381,11 @@ struct st_context_iface
|
||||
void *st_context_private;
|
||||
void *st_manager_private;
|
||||
|
||||
/**
|
||||
* The state tracker manager that manages this object.
|
||||
*/
|
||||
struct st_manager *state_manager;
|
||||
|
||||
/**
|
||||
* The CSO context associated with this context in case we need to draw
|
||||
* something before swap buffers.
|
||||
@@ -483,6 +494,16 @@ struct st_manager
|
||||
*/
|
||||
void (*set_background_context)(struct st_context_iface *stctxi,
|
||||
struct util_queue_monitoring *queue_info);
|
||||
|
||||
/**
|
||||
* Destroy any private data used by the state tracker manager.
|
||||
*/
|
||||
void (*destroy)(struct st_manager *smapi);
|
||||
|
||||
/**
|
||||
* Available for the state tracker manager to use.
|
||||
*/
|
||||
void *st_manager_private;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -1183,26 +1183,30 @@ dri2_query_image(__DRIimage *image, int attrib, int *value)
|
||||
switch (attrib) {
|
||||
case __DRI_IMAGE_ATTRIB_STRIDE:
|
||||
whandle.type = DRM_API_HANDLE_TYPE_KMS;
|
||||
image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage);
|
||||
if (!image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage))
|
||||
return GL_FALSE;
|
||||
*value = whandle.stride;
|
||||
return GL_TRUE;
|
||||
case __DRI_IMAGE_ATTRIB_OFFSET:
|
||||
whandle.type = DRM_API_HANDLE_TYPE_KMS;
|
||||
image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage);
|
||||
if (!image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage))
|
||||
return GL_FALSE;
|
||||
*value = whandle.offset;
|
||||
return GL_TRUE;
|
||||
case __DRI_IMAGE_ATTRIB_HANDLE:
|
||||
whandle.type = DRM_API_HANDLE_TYPE_KMS;
|
||||
image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage);
|
||||
if (!image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage))
|
||||
return GL_FALSE;
|
||||
*value = whandle.handle;
|
||||
return GL_TRUE;
|
||||
case __DRI_IMAGE_ATTRIB_NAME:
|
||||
whandle.type = DRM_API_HANDLE_TYPE_SHARED;
|
||||
image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage);
|
||||
if (!image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage))
|
||||
return GL_FALSE;
|
||||
*value = whandle.handle;
|
||||
return GL_TRUE;
|
||||
case __DRI_IMAGE_ATTRIB_FD:
|
||||
@@ -1235,14 +1239,22 @@ dri2_query_image(__DRIimage *image, int attrib, int *value)
|
||||
return GL_TRUE;
|
||||
case __DRI_IMAGE_ATTRIB_MODIFIER_UPPER:
|
||||
whandle.type = DRM_API_HANDLE_TYPE_KMS;
|
||||
image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage);
|
||||
whandle.modifier = DRM_FORMAT_MOD_INVALID;
|
||||
if (!image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage))
|
||||
return GL_FALSE;
|
||||
if (whandle.modifier == DRM_FORMAT_MOD_INVALID)
|
||||
return GL_FALSE;
|
||||
*value = (whandle.modifier >> 32) & 0xffffffff;
|
||||
return GL_TRUE;
|
||||
case __DRI_IMAGE_ATTRIB_MODIFIER_LOWER:
|
||||
whandle.type = DRM_API_HANDLE_TYPE_KMS;
|
||||
image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage);
|
||||
whandle.modifier = DRM_FORMAT_MOD_INVALID;
|
||||
if (!image->texture->screen->resource_get_handle(image->texture->screen,
|
||||
NULL, image->texture, &whandle, usage))
|
||||
return GL_FALSE;
|
||||
if (whandle.modifier == DRM_FORMAT_MOD_INVALID)
|
||||
return GL_FALSE;
|
||||
*value = whandle.modifier & 0xffffffff;
|
||||
return GL_TRUE;
|
||||
default:
|
||||
|
@@ -158,6 +158,7 @@ dri_create_buffer(__DRIscreen * sPriv,
|
||||
dPriv->driverPrivate = (void *)drawable;
|
||||
p_atomic_set(&drawable->base.stamp, 1);
|
||||
drawable->base.ID = p_atomic_inc_return(&drifb_ID);
|
||||
drawable->base.state_manager = &screen->base;
|
||||
|
||||
return GL_TRUE;
|
||||
fail:
|
||||
|
@@ -133,6 +133,11 @@ dri_fill_in_modes(struct dri_screen *screen)
|
||||
MESA_FORMAT_B8G8R8A8_SRGB,
|
||||
MESA_FORMAT_B8G8R8X8_SRGB,
|
||||
MESA_FORMAT_B5G6R5_UNORM,
|
||||
#ifdef ANDROID
|
||||
/*
|
||||
* To reduce the risk of breaking non-Android users in stable release
|
||||
* let's keep these for Android alone until this is handled properly.
|
||||
*/
|
||||
|
||||
/* The 32-bit RGBA format must not precede the 32-bit BGRA format.
|
||||
* Likewise for RGBX and BGRX. Otherwise, the GLX client and the GLX
|
||||
@@ -154,6 +159,7 @@ dri_fill_in_modes(struct dri_screen *screen)
|
||||
|
||||
/* Required by Android, for HAL_PIXEL_FORMAT_RGBX_8888. */
|
||||
MESA_FORMAT_R8G8B8X8_UNORM,
|
||||
#endif
|
||||
};
|
||||
static const enum pipe_format pipe_formats[] = {
|
||||
PIPE_FORMAT_BGRA8888_UNORM,
|
||||
@@ -161,8 +167,14 @@ dri_fill_in_modes(struct dri_screen *screen)
|
||||
PIPE_FORMAT_BGRA8888_SRGB,
|
||||
PIPE_FORMAT_BGRX8888_SRGB,
|
||||
PIPE_FORMAT_B5G6R5_UNORM,
|
||||
#ifdef ANDROID
|
||||
/*
|
||||
* To reduce the risk of breaking non-Android users in stable release
|
||||
* let's keep these for Android alone until this is handled properly.
|
||||
*/
|
||||
PIPE_FORMAT_RGBA8888_UNORM,
|
||||
PIPE_FORMAT_RGBX8888_UNORM,
|
||||
#endif
|
||||
};
|
||||
mesa_format format;
|
||||
__DRIconfig **configs = NULL;
|
||||
@@ -457,6 +469,9 @@ dri_destroy_option_cache(struct dri_screen * screen)
|
||||
void
|
||||
dri_destroy_screen_helper(struct dri_screen * screen)
|
||||
{
|
||||
if (screen->base.destroy)
|
||||
screen->base.destroy(&screen->base);
|
||||
|
||||
if (screen->st_api && screen->st_api->destroy)
|
||||
screen->st_api->destroy(screen->st_api);
|
||||
|
||||
|
@@ -181,6 +181,9 @@ xmesa_close_display(Display *display)
|
||||
* xmdpy->screen->destroy(xmdpy->screen);
|
||||
* }
|
||||
*/
|
||||
|
||||
if (xmdpy->smapi->destroy)
|
||||
xmdpy->smapi->destroy(xmdpy->smapi);
|
||||
free(xmdpy->smapi);
|
||||
|
||||
XFree((char *) info);
|
||||
|
@@ -304,6 +304,7 @@ xmesa_create_st_framebuffer(XMesaDisplay xmdpy, XMesaBuffer b)
|
||||
stfbi->flush_front = xmesa_st_framebuffer_flush_front;
|
||||
stfbi->validate = xmesa_st_framebuffer_validate;
|
||||
stfbi->ID = p_atomic_inc_return(&xmesa_stfbi_ID);
|
||||
stfbi->state_manager = xmdpy->smapi;
|
||||
p_atomic_set(&stfbi->stamp, 1);
|
||||
stfbi->st_manager_private = (void *) xstfb;
|
||||
|
||||
|
@@ -439,6 +439,7 @@ osmesa_st_framebuffer_validate(struct st_context_iface *stctx,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static uint32_t osmesa_fb_ID = 0;
|
||||
|
||||
static struct st_framebuffer_iface *
|
||||
osmesa_create_st_framebuffer(void)
|
||||
@@ -448,6 +449,8 @@ osmesa_create_st_framebuffer(void)
|
||||
stfbi->flush_front = osmesa_st_framebuffer_flush_front;
|
||||
stfbi->validate = osmesa_st_framebuffer_validate;
|
||||
p_atomic_set(&stfbi->stamp, 1);
|
||||
stfbi->ID = p_atomic_inc_return(&osmesa_fb_ID);
|
||||
stfbi->state_manager = get_st_manager();
|
||||
}
|
||||
return stfbi;
|
||||
}
|
||||
@@ -508,6 +511,14 @@ osmesa_find_buffer(enum pipe_format color_format,
|
||||
static void
|
||||
osmesa_destroy_buffer(struct osmesa_buffer *osbuffer)
|
||||
{
|
||||
struct st_api *stapi = get_st_api();
|
||||
|
||||
/*
|
||||
* Notify the state manager that the associated framebuffer interface
|
||||
* is no longer valid.
|
||||
*/
|
||||
stapi->destroy_drawable(stapi, osbuffer->stfb);
|
||||
|
||||
FREE(osbuffer->stfb);
|
||||
FREE(osbuffer);
|
||||
}
|
||||
|
@@ -199,6 +199,9 @@ stw_cleanup(void)
|
||||
DeleteCriticalSection(&stw_dev->fb_mutex);
|
||||
DeleteCriticalSection(&stw_dev->ctx_mutex);
|
||||
|
||||
if (stw_dev->smapi->destroy)
|
||||
stw_dev->smapi->destroy(stw_dev->smapi);
|
||||
|
||||
FREE(stw_dev->smapi);
|
||||
stw_dev->stapi->destroy(stw_dev->stapi);
|
||||
|
||||
|
@@ -235,6 +235,7 @@ stw_st_create_framebuffer(struct stw_framebuffer *fb)
|
||||
stwfb->fb = fb;
|
||||
stwfb->stvis = fb->pfi->stvis;
|
||||
stwfb->base.ID = p_atomic_inc_return(&stwfb_ID);
|
||||
stwfb->base.state_manager = stw_dev->smapi;
|
||||
|
||||
stwfb->base.visual = &stwfb->stvis;
|
||||
p_atomic_set(&stwfb->base.stamp, 1);
|
||||
|
@@ -3,6 +3,7 @@ BLORP_FILES = \
|
||||
blorp/blorp.h \
|
||||
blorp/blorp_blit.c \
|
||||
blorp/blorp_clear.c \
|
||||
blorp/blorp_nir_builder.h \
|
||||
blorp/blorp_genX_exec.h \
|
||||
blorp/blorp_priv.h
|
||||
|
||||
|
@@ -912,6 +912,7 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
||||
memcpy(prog_data->base.base.param, param,
|
||||
sizeof(gl_constant_value*) * param_count);
|
||||
prog_data->base.base.nr_params = param_count;
|
||||
prog_data->base.base.nr_pull_params = 0;
|
||||
ralloc_free(param);
|
||||
}
|
||||
}
|
||||
|
@@ -566,7 +566,7 @@ void anv_CmdBindVertexBuffers(
|
||||
/* We have to defer setting up vertex buffer since we need the buffer
|
||||
* stride from the pipeline. */
|
||||
|
||||
assert(firstBinding + bindingCount < MAX_VBS);
|
||||
assert(firstBinding + bindingCount <= MAX_VBS);
|
||||
for (uint32_t i = 0; i < bindingCount; i++) {
|
||||
vb[firstBinding + i].buffer = anv_buffer_from_handle(pBuffers[i]);
|
||||
vb[firstBinding + i].offset = pOffsets[i];
|
||||
|
@@ -496,10 +496,6 @@ static const VkExtensionProperties device_extensions[] = {
|
||||
.extensionName = VK_KHR_VARIABLE_POINTERS_EXTENSION_NAME,
|
||||
.specVersion = 1,
|
||||
},
|
||||
{
|
||||
.extensionName = VK_KHX_MULTIVIEW_EXTENSION_NAME,
|
||||
.specVersion = 1,
|
||||
},
|
||||
};
|
||||
|
||||
static void *
|
||||
|
@@ -89,7 +89,7 @@ struct gen_l3_config;
|
||||
*/
|
||||
#define ANV_HZ_FC_VAL 1.0f
|
||||
|
||||
#define MAX_VBS 31
|
||||
#define MAX_VBS 28
|
||||
#define MAX_SETS 8
|
||||
#define MAX_RTS 8
|
||||
#define MAX_VIEWPORTS 16
|
||||
|
@@ -504,6 +504,7 @@ loader_dri3_copy_sub_buffer(struct loader_dri3_drawable *draw,
|
||||
x, y, width, height, __BLIT_FLAG_FLUSH);
|
||||
}
|
||||
|
||||
loader_dri3_swapbuffer_barrier(draw);
|
||||
dri3_fence_reset(draw->conn, back);
|
||||
dri3_copy_area(draw->conn,
|
||||
dri3_back_buffer(draw)->pixmap,
|
||||
@@ -595,6 +596,7 @@ loader_dri3_wait_gl(struct loader_dri3_drawable *draw)
|
||||
front->height,
|
||||
0, 0, front->width,
|
||||
front->height, __BLIT_FLAG_FLUSH);
|
||||
loader_dri3_swapbuffer_barrier(draw);
|
||||
loader_dri3_copy_drawable(draw, draw->drawable, front->pixmap);
|
||||
}
|
||||
|
||||
@@ -1258,6 +1260,7 @@ dri3_get_buffer(__DRIdrawable *driDrawable,
|
||||
}
|
||||
break;
|
||||
case loader_dri3_buffer_front:
|
||||
loader_dri3_swapbuffer_barrier(draw);
|
||||
dri3_fence_reset(draw->conn, new_buffer);
|
||||
dri3_copy_area(draw->conn,
|
||||
draw->drawable,
|
||||
@@ -1431,3 +1434,18 @@ loader_dri3_update_drawable_geometry(struct loader_dri3_drawable *draw)
|
||||
free(geom_reply);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Make sure the server has flushed all pending swap buffers to hardware
|
||||
* for this drawable. Ideally we'd want to send an X protocol request to
|
||||
* have the server block our connection until the swaps are complete. That
|
||||
* would avoid the potential round-trip here.
|
||||
*/
|
||||
void
|
||||
loader_dri3_swapbuffer_barrier(struct loader_dri3_drawable *draw)
|
||||
{
|
||||
int64_t ust, msc, sbc;
|
||||
|
||||
(void) loader_dri3_wait_for_sbc(draw, 0, &ust, &msc, &sbc);
|
||||
}
|
||||
|
@@ -241,4 +241,7 @@ loader_dri3_get_buffers(__DRIdrawable *driDrawable,
|
||||
|
||||
void
|
||||
loader_dri3_update_drawable_geometry(struct loader_dri3_drawable *draw);
|
||||
|
||||
void
|
||||
loader_dri3_swapbuffer_barrier(struct loader_dri3_drawable *draw);
|
||||
#endif
|
||||
|
@@ -1103,7 +1103,7 @@ brw_blorp_mcs_partial_resolve(struct brw_context *brw,
|
||||
DBG("%s to mt %p layers %u-%u\n", __FUNCTION__, mt,
|
||||
start_layer, start_layer + num_layers - 1);
|
||||
|
||||
assert(mt->aux_usage = ISL_AUX_USAGE_MCS);
|
||||
assert(mt->aux_usage == ISL_AUX_USAGE_MCS);
|
||||
|
||||
const mesa_format format = _mesa_get_srgb_format_linear(mt->format);
|
||||
enum isl_format isl_format = brw_blorp_to_isl_format(brw, format, true);
|
||||
|
@@ -1212,6 +1212,14 @@ brw_begin_perf_query(struct gl_context *ctx,
|
||||
obj->oa.begin_report_id = brw->perfquery.next_query_start_report_id;
|
||||
brw->perfquery.next_query_start_report_id += 2;
|
||||
|
||||
/* We flush the batchbuffer here to minimize the chances that MI_RPC
|
||||
* delimiting commands end up in different batchbuffers. If that's the
|
||||
* case, the measurement will include the time it takes for the kernel
|
||||
* scheduler to load a new request into the hardware. This is manifested in
|
||||
* tools like frameretrace by spikes in the "GPU Core Clocks" counter.
|
||||
*/
|
||||
intel_batchbuffer_flush(brw);
|
||||
|
||||
/* Take a starting OA counter snapshot. */
|
||||
brw->vtbl.emit_mi_report_perf_count(brw, obj->oa.bo, 0,
|
||||
obj->oa.begin_report_id);
|
||||
@@ -1298,14 +1306,6 @@ brw_end_perf_query(struct gl_context *ctx,
|
||||
obj->oa.begin_report_id + 1);
|
||||
}
|
||||
|
||||
/* We flush the batchbuffer here to minimize the chances that MI_RPC
|
||||
* delimiting commands end up in different batchbuffers. If that's the
|
||||
* case, the measurement will include the time it takes for the kernel
|
||||
* scheduler to load a new request into the hardware. This is manifested
|
||||
* in tools like frameretrace by spikes in the "GPU Core Clocks"
|
||||
* counter.
|
||||
*/
|
||||
intel_batchbuffer_flush(brw);
|
||||
--brw->perfquery.n_active_oa_queries;
|
||||
|
||||
/* NB: even though the query has now ended, it can't be accumulated
|
||||
|
@@ -101,9 +101,9 @@ get_isl_surf(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
assert(view->levels == 1 && view->array_len == 1);
|
||||
assert(*tile_x == 0 && *tile_y == 0);
|
||||
|
||||
offset += intel_miptree_get_tile_offsets(mt, view->base_level,
|
||||
view->base_array_layer,
|
||||
tile_x, tile_y);
|
||||
*offset += intel_miptree_get_tile_offsets(mt, view->base_level,
|
||||
view->base_array_layer,
|
||||
tile_x, tile_y);
|
||||
|
||||
/* Minify the logical dimensions of the texture. */
|
||||
const unsigned l = view->base_level - mt->first_level;
|
||||
|
@@ -187,7 +187,6 @@ get_blit_intratile_offset_el(const struct brw_context *brw,
|
||||
* The offsets we get from ISL in the tiled case are already aligned.
|
||||
* In the linear case, we need to do some of our own aligning.
|
||||
*/
|
||||
assert(mt->surf.row_pitch % 64 == 0);
|
||||
uint32_t delta = *base_address_offset & 63;
|
||||
assert(delta % mt->cpp == 0);
|
||||
*base_address_offset -= delta;
|
||||
@@ -831,11 +830,11 @@ intel_miptree_set_alpha_to_one(struct brw_context *brw,
|
||||
if (brw->gen >= 8) {
|
||||
OUT_RELOC64(mt->bo,
|
||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
||||
offset);
|
||||
mt->offset + offset);
|
||||
} else {
|
||||
OUT_RELOC(mt->bo,
|
||||
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
|
||||
offset);
|
||||
mt->offset + offset);
|
||||
}
|
||||
OUT_BATCH(0xffffffff); /* white, but only alpha gets written */
|
||||
ADVANCE_BATCH_TILED(dst_y_tiled, false);
|
||||
|
@@ -294,7 +294,7 @@ do_blit_bitmap( struct gl_context *ctx,
|
||||
color,
|
||||
irb->mt->surf.row_pitch,
|
||||
irb->mt->bo,
|
||||
0,
|
||||
irb->mt->offset,
|
||||
irb->mt->surf.tiling,
|
||||
dstx + px,
|
||||
dsty + py,
|
||||
|
@@ -358,7 +358,8 @@ _mesa_get_uniform(struct gl_context *ctx, GLuint program, GLint location,
|
||||
*/
|
||||
if (returnType == uni->type->base_type ||
|
||||
((returnType == GLSL_TYPE_INT || returnType == GLSL_TYPE_UINT) &&
|
||||
(uni->type->is_sampler() || uni->type->is_image()))) {
|
||||
(uni->type->is_sampler() || uni->type->is_image())) ||
|
||||
(returnType == GLSL_TYPE_UINT64 && uni->is_bindless)) {
|
||||
memcpy(paramsOut, src, bytes);
|
||||
} else {
|
||||
union gl_constant_value *const dst =
|
||||
|
@@ -399,7 +399,7 @@ find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id)
|
||||
}
|
||||
|
||||
struct rename_reg_pair {
|
||||
bool valid;
|
||||
int old_reg;
|
||||
int new_reg;
|
||||
};
|
||||
|
||||
@@ -568,7 +568,7 @@ public:
|
||||
|
||||
void simplify_cmp(void);
|
||||
|
||||
void rename_temp_registers(struct rename_reg_pair *renames);
|
||||
void rename_temp_registers(int num_renames, struct rename_reg_pair *renames);
|
||||
void get_first_temp_read(int *first_reads);
|
||||
void get_first_temp_write(int *first_writes);
|
||||
void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes);
|
||||
@@ -3790,7 +3790,7 @@ get_image_qualifiers(ir_dereference *ir, const glsl_type **type,
|
||||
for (unsigned i = 0; i < struct_type->length; i++) {
|
||||
if (!strcmp(struct_type->fields.structure[i].name,
|
||||
deref_record->field)) {
|
||||
*type = struct_type->fields.structure[i].type;
|
||||
*type = struct_type->fields.structure[i].type->without_array();
|
||||
*memory_coherent =
|
||||
struct_type->fields.structure[i].memory_coherent;
|
||||
*memory_volatile =
|
||||
@@ -4646,6 +4646,7 @@ glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
ctx = NULL;
|
||||
prog = NULL;
|
||||
precise = 0;
|
||||
shader_program = NULL;
|
||||
shader = NULL;
|
||||
options = NULL;
|
||||
@@ -4835,37 +4836,36 @@ glsl_to_tgsi_visitor::simplify_cmp(void)
|
||||
|
||||
/* Replaces all references to a temporary register index with another index. */
|
||||
void
|
||||
glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames)
|
||||
glsl_to_tgsi_visitor::rename_temp_registers(int num_renames, struct rename_reg_pair *renames)
|
||||
{
|
||||
foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
|
||||
unsigned j;
|
||||
int k;
|
||||
for (j = 0; j < num_inst_src_regs(inst); j++) {
|
||||
if (inst->src[j].file == PROGRAM_TEMPORARY) {
|
||||
int old_idx = inst->src[j].index;
|
||||
if (renames[old_idx].valid)
|
||||
inst->src[j].index = renames[old_idx].new_reg;
|
||||
}
|
||||
if (inst->src[j].file == PROGRAM_TEMPORARY)
|
||||
for (k = 0; k < num_renames; k++)
|
||||
if (inst->src[j].index == renames[k].old_reg)
|
||||
inst->src[j].index = renames[k].new_reg;
|
||||
}
|
||||
|
||||
for (j = 0; j < inst->tex_offset_num_offset; j++) {
|
||||
if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) {
|
||||
int old_idx = inst->tex_offsets[j].index;
|
||||
if (renames[old_idx].valid)
|
||||
inst->tex_offsets[j].index = renames[old_idx].new_reg;
|
||||
}
|
||||
if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY)
|
||||
for (k = 0; k < num_renames; k++)
|
||||
if (inst->tex_offsets[j].index == renames[k].old_reg)
|
||||
inst->tex_offsets[j].index = renames[k].new_reg;
|
||||
}
|
||||
|
||||
if (inst->resource.file == PROGRAM_TEMPORARY) {
|
||||
int old_idx = inst->resource.index;
|
||||
if (renames[old_idx].valid)
|
||||
inst->resource.index = renames[old_idx].new_reg;
|
||||
for (k = 0; k < num_renames; k++)
|
||||
if (inst->resource.index == renames[k].old_reg)
|
||||
inst->resource.index = renames[k].new_reg;
|
||||
}
|
||||
|
||||
for (j = 0; j < num_inst_dst_regs(inst); j++) {
|
||||
if (inst->dst[j].file == PROGRAM_TEMPORARY) {
|
||||
int old_idx = inst->dst[j].index;
|
||||
if (renames[old_idx].valid)
|
||||
inst->dst[j].index = renames[old_idx].new_reg;}
|
||||
if (inst->dst[j].file == PROGRAM_TEMPORARY)
|
||||
for (k = 0; k < num_renames; k++)
|
||||
if (inst->dst[j].index == renames[k].old_reg)
|
||||
inst->dst[j].index = renames[k].new_reg;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5446,6 +5446,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
|
||||
int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
|
||||
struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
|
||||
int i, j;
|
||||
int num_renames = 0;
|
||||
|
||||
/* Read the indices of the last read and first write to each temp register
|
||||
* into an array so that we don't have to traverse the instruction list as
|
||||
@@ -5472,8 +5473,9 @@ glsl_to_tgsi_visitor::merge_registers(void)
|
||||
* as the register at index j. */
|
||||
if (first_writes[i] <= first_writes[j] &&
|
||||
last_reads[i] <= first_writes[j]) {
|
||||
renames[j].new_reg = i;
|
||||
renames[j].valid = true;
|
||||
renames[num_renames].old_reg = j;
|
||||
renames[num_renames].new_reg = i;
|
||||
num_renames++;
|
||||
|
||||
/* Update the first_writes and last_reads arrays with the new
|
||||
* values for the merged register index, and mark the newly unused
|
||||
@@ -5486,7 +5488,7 @@ glsl_to_tgsi_visitor::merge_registers(void)
|
||||
}
|
||||
}
|
||||
|
||||
rename_temp_registers(renames);
|
||||
rename_temp_registers(num_renames, renames);
|
||||
ralloc_free(renames);
|
||||
ralloc_free(last_reads);
|
||||
ralloc_free(first_writes);
|
||||
@@ -5501,6 +5503,7 @@ glsl_to_tgsi_visitor::renumber_registers(void)
|
||||
int new_index = 0;
|
||||
int *first_writes = ralloc_array(mem_ctx, int, this->next_temp);
|
||||
struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp);
|
||||
int num_renames = 0;
|
||||
|
||||
for (i = 0; i < this->next_temp; i++) {
|
||||
first_writes[i] = -1;
|
||||
@@ -5510,13 +5513,14 @@ glsl_to_tgsi_visitor::renumber_registers(void)
|
||||
for (i = 0; i < this->next_temp; i++) {
|
||||
if (first_writes[i] < 0) continue;
|
||||
if (i != new_index) {
|
||||
renames[i].new_reg = new_index;
|
||||
renames[i].valid = true;
|
||||
renames[num_renames].old_reg = i;
|
||||
renames[num_renames].new_reg = new_index;
|
||||
num_renames++;
|
||||
}
|
||||
new_index++;
|
||||
}
|
||||
|
||||
rename_temp_registers(renames);
|
||||
rename_temp_registers(num_renames, renames);
|
||||
this->next_temp = new_index;
|
||||
ralloc_free(renames);
|
||||
ralloc_free(first_writes);
|
||||
|
@@ -61,9 +61,13 @@
|
||||
#include "util/list.h"
|
||||
|
||||
struct hash_table;
|
||||
static struct hash_table *st_fbi_ht; /* framebuffer iface objects hash table */
|
||||
struct st_manager_private
|
||||
{
|
||||
struct hash_table *stfbi_ht; /* framebuffer iface objects hash table */
|
||||
mtx_t st_mutex;
|
||||
};
|
||||
|
||||
static mtx_t st_mutex = _MTX_INITIALIZER_NP;
|
||||
static void st_manager_destroy(struct st_manager *);
|
||||
|
||||
/**
|
||||
* Map an attachment to a buffer index.
|
||||
@@ -511,45 +515,63 @@ st_framebuffer_iface_equal(const void *a, const void *b)
|
||||
|
||||
|
||||
static boolean
|
||||
st_framebuffer_iface_lookup(const struct st_framebuffer_iface *stfbi)
|
||||
st_framebuffer_iface_lookup(struct st_manager *smapi,
|
||||
const struct st_framebuffer_iface *stfbi)
|
||||
{
|
||||
struct st_manager_private *smPriv =
|
||||
(struct st_manager_private *)smapi->st_manager_private;
|
||||
struct hash_entry *entry;
|
||||
|
||||
mtx_lock(&st_mutex);
|
||||
entry = _mesa_hash_table_search(st_fbi_ht, stfbi);
|
||||
mtx_unlock(&st_mutex);
|
||||
assert(smPriv);
|
||||
assert(smPriv->stfbi_ht);
|
||||
|
||||
mtx_lock(&smPriv->st_mutex);
|
||||
entry = _mesa_hash_table_search(smPriv->stfbi_ht, stfbi);
|
||||
mtx_unlock(&smPriv->st_mutex);
|
||||
|
||||
return entry != NULL;
|
||||
}
|
||||
|
||||
|
||||
static boolean
|
||||
st_framebuffer_iface_insert(struct st_framebuffer_iface *stfbi)
|
||||
st_framebuffer_iface_insert(struct st_manager *smapi,
|
||||
struct st_framebuffer_iface *stfbi)
|
||||
{
|
||||
struct st_manager_private *smPriv =
|
||||
(struct st_manager_private *)smapi->st_manager_private;
|
||||
struct hash_entry *entry;
|
||||
|
||||
mtx_lock(&st_mutex);
|
||||
entry = _mesa_hash_table_insert(st_fbi_ht, stfbi, stfbi);
|
||||
mtx_unlock(&st_mutex);
|
||||
assert(smPriv);
|
||||
assert(smPriv->stfbi_ht);
|
||||
|
||||
mtx_lock(&smPriv->st_mutex);
|
||||
entry = _mesa_hash_table_insert(smPriv->stfbi_ht, stfbi, stfbi);
|
||||
mtx_unlock(&smPriv->st_mutex);
|
||||
|
||||
return entry != NULL;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
st_framebuffer_iface_remove(struct st_framebuffer_iface *stfbi)
|
||||
st_framebuffer_iface_remove(struct st_manager *smapi,
|
||||
struct st_framebuffer_iface *stfbi)
|
||||
{
|
||||
struct st_manager_private *smPriv =
|
||||
(struct st_manager_private *)smapi->st_manager_private;
|
||||
struct hash_entry *entry;
|
||||
|
||||
mtx_lock(&st_mutex);
|
||||
entry = _mesa_hash_table_search(st_fbi_ht, stfbi);
|
||||
if (!smPriv || !smPriv->stfbi_ht)
|
||||
return;
|
||||
|
||||
mtx_lock(&smPriv->st_mutex);
|
||||
entry = _mesa_hash_table_search(smPriv->stfbi_ht, stfbi);
|
||||
if (!entry)
|
||||
goto unlock;
|
||||
|
||||
_mesa_hash_table_remove(st_fbi_ht, entry);
|
||||
_mesa_hash_table_remove(smPriv->stfbi_ht, entry);
|
||||
|
||||
unlock:
|
||||
mtx_unlock(&st_mutex);
|
||||
mtx_unlock(&smPriv->st_mutex);
|
||||
}
|
||||
|
||||
|
||||
@@ -561,7 +583,10 @@ static void
|
||||
st_api_destroy_drawable(struct st_api *stapi,
|
||||
struct st_framebuffer_iface *stfbi)
|
||||
{
|
||||
st_framebuffer_iface_remove(stfbi);
|
||||
if (!stfbi)
|
||||
return;
|
||||
|
||||
st_framebuffer_iface_remove(stfbi->state_manager, stfbi);
|
||||
}
|
||||
|
||||
|
||||
@@ -572,16 +597,24 @@ st_api_destroy_drawable(struct st_api *stapi,
|
||||
static void
|
||||
st_framebuffers_purge(struct st_context *st)
|
||||
{
|
||||
struct st_context_iface *st_iface = &st->iface;
|
||||
struct st_manager *smapi = st_iface->state_manager;
|
||||
struct st_framebuffer *stfb, *next;
|
||||
|
||||
assert(smapi);
|
||||
|
||||
LIST_FOR_EACH_ENTRY_SAFE_REV(stfb, next, &st->winsys_buffers, head) {
|
||||
struct st_framebuffer_iface *stfbi = stfb->iface;
|
||||
|
||||
assert(stfbi);
|
||||
|
||||
/**
|
||||
* If the corresponding framebuffer interface object no longer exists,
|
||||
* remove the framebuffer object from the context's winsys buffers list,
|
||||
* and unreference the framebuffer object, so its resources can be
|
||||
* deleted.
|
||||
*/
|
||||
if (!st_framebuffer_iface_lookup(stfb->iface)) {
|
||||
if (!st_framebuffer_iface_lookup(smapi, stfbi)) {
|
||||
LIST_DEL(&stfb->head);
|
||||
st_framebuffer_reference(&stfb, NULL);
|
||||
}
|
||||
@@ -609,6 +642,16 @@ st_context_flush(struct st_context_iface *stctxi, unsigned flags,
|
||||
|
||||
if (flags & ST_FLUSH_FRONT)
|
||||
st_manager_flush_frontbuffer(st);
|
||||
|
||||
/* DRI3 changes the framebuffer after SwapBuffers, but we need to invoke
|
||||
* st_manager_validate_framebuffers to notice that.
|
||||
*
|
||||
* Set gfx_shaders_may_be_dirty to invoke st_validate_state in the next
|
||||
* draw call, which will invoke st_manager_validate_framebuffers, but it
|
||||
* won't dirty states if there is no change.
|
||||
*/
|
||||
if (flags & ST_FLUSH_END_OF_FRAME)
|
||||
st->gfx_shaders_may_be_dirty = true;
|
||||
}
|
||||
|
||||
static boolean
|
||||
@@ -778,6 +821,21 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Create a hash table for the framebuffer interface objects
|
||||
* if it has not been created for this st manager.
|
||||
*/
|
||||
if (smapi->st_manager_private == NULL) {
|
||||
struct st_manager_private *smPriv;
|
||||
|
||||
smPriv = CALLOC_STRUCT(st_manager_private);
|
||||
mtx_init(&smPriv->st_mutex, mtx_plain);
|
||||
smPriv->stfbi_ht = _mesa_hash_table_create(NULL,
|
||||
st_framebuffer_iface_hash,
|
||||
st_framebuffer_iface_equal);
|
||||
smapi->st_manager_private = smPriv;
|
||||
smapi->destroy = st_manager_destroy;
|
||||
}
|
||||
|
||||
if (attribs->flags & ST_CONTEXT_FLAG_ROBUST_ACCESS)
|
||||
ctx_flags |= PIPE_CONTEXT_ROBUST_BUFFER_ACCESS;
|
||||
|
||||
@@ -846,6 +904,7 @@ st_api_create_context(struct st_api *stapi, struct st_manager *smapi,
|
||||
st->iface.st_context_private = (void *) smapi;
|
||||
st->iface.cso_context = st->cso_context;
|
||||
st->iface.pipe = st->pipe;
|
||||
st->iface.state_manager = smapi;
|
||||
|
||||
*error = ST_CONTEXT_SUCCESS;
|
||||
return &st->iface;
|
||||
@@ -888,7 +947,7 @@ st_framebuffer_reuse_or_create(struct st_context *st,
|
||||
/* add the referenced framebuffer interface object to
|
||||
* the framebuffer interface object hash table.
|
||||
*/
|
||||
if (!st_framebuffer_iface_insert(stfbi)) {
|
||||
if (!st_framebuffer_iface_insert(stfbi->state_manager, stfbi)) {
|
||||
st_framebuffer_reference(&cur, NULL);
|
||||
return NULL;
|
||||
}
|
||||
@@ -964,8 +1023,6 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
|
||||
static void
|
||||
st_api_destroy(struct st_api *stapi)
|
||||
{
|
||||
_mesa_hash_table_destroy(st_fbi_ht, NULL);
|
||||
mtx_destroy(&st_mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1051,6 +1108,19 @@ st_manager_add_color_renderbuffer(struct st_context *st,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
st_manager_destroy(struct st_manager *smapi)
|
||||
{
|
||||
struct st_manager_private *smPriv = smapi->st_manager_private;
|
||||
|
||||
if (smPriv && smPriv->stfbi_ht) {
|
||||
_mesa_hash_table_destroy(smPriv->stfbi_ht, NULL);
|
||||
mtx_destroy(&smPriv->st_mutex);
|
||||
free(smPriv);
|
||||
smapi->st_manager_private = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned
|
||||
get_version(struct pipe_screen *screen,
|
||||
struct st_config_options *options, gl_api api)
|
||||
@@ -1106,12 +1176,5 @@ static const struct st_api st_gl_api = {
|
||||
struct st_api *
|
||||
st_gl_api_create(void)
|
||||
{
|
||||
/* Create a hash table for all the framebuffer interface objects */
|
||||
|
||||
mtx_init(&st_mutex, mtx_plain);
|
||||
st_fbi_ht = _mesa_hash_table_create(NULL,
|
||||
st_framebuffer_iface_hash,
|
||||
st_framebuffer_iface_equal);
|
||||
|
||||
return (struct st_api *) &st_gl_api;
|
||||
}
|
||||
|
Reference in New Issue
Block a user