Compare commits
37 Commits
mesa-19.1.
...
mesa-17.0.
Author | SHA1 | Date | |
---|---|---|---|
|
d283ec0a7b | ||
|
9577977266 | ||
|
8621961d43 | ||
|
7d5a98f106 | ||
|
4e6445caa9 | ||
|
e405d0d3c6 | ||
|
0c4b8c75e2 | ||
|
e35cfa15cf | ||
|
34f902e17e | ||
|
e4cf4690d1 | ||
|
7f6c6b9101 | ||
|
23ead4c7b2 | ||
|
7856dfdbab | ||
|
41b93b1fe0 | ||
|
8857256214 | ||
|
516b34908d | ||
|
0645c0e0d4 | ||
|
6aade42111 | ||
|
4dc6ed53c1 | ||
|
cfe14ab39c | ||
|
250b1cad3b | ||
|
9318d81574 | ||
|
00cdbfe6ef | ||
|
83deab2f6a | ||
|
de2dfa1dc3 | ||
|
5c2951c7f9 | ||
|
e3bfa959a8 | ||
|
a259b800df | ||
|
45f13c2be0 | ||
|
b72f8de873 | ||
|
1cc5774e5e | ||
|
50a607cf70 | ||
|
613154fc8f | ||
|
ff81869f0d | ||
|
9cb066601c | ||
|
45297f7e4a | ||
|
acc7837799 |
@@ -43,6 +43,7 @@ LOCAL_CFLAGS += \
|
||||
-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
|
||||
|
||||
LOCAL_CFLAGS += \
|
||||
-DENABLE_SHADER_CACHE \
|
||||
-D__STDC_LIMIT_MACROS \
|
||||
-DHAVE___BUILTIN_EXPECT \
|
||||
-DHAVE___BUILTIN_FFS \
|
||||
|
@@ -1766,6 +1766,7 @@ if test -n "$with_vulkan_drivers"; then
|
||||
fi
|
||||
|
||||
|
||||
DEFINES="$DEFINES -DENABLE_SHADER_CACHE"
|
||||
AM_CONDITIONAL(NEED_MEGADRIVER, test -n "$DRI_DIRS")
|
||||
AM_CONDITIONAL(NEED_LIBMESA, test "x$enable_glx" = xxlib -o \
|
||||
"x$enable_osmesa" = xyes -o \
|
||||
|
@@ -32,9 +32,6 @@ lib_LTLIBRARIES = libvulkan_radeon.la
|
||||
# The gallium includes are for the util/u_math.h include from main/macros.h
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
$(AMDGPU_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(top_srcdir)/src \
|
||||
@@ -48,7 +45,10 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/include
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
$(AMDGPU_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES)
|
||||
|
||||
AM_CFLAGS = \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
|
@@ -989,8 +989,7 @@ VkResult radv_QueueSubmit(
|
||||
if (queue->device->trace_bo)
|
||||
*queue->device->trace_id_ptr = 0;
|
||||
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
|
||||
pSubmits[i].commandBufferCount,
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
|
||||
b ? pSubmits[i].waitSemaphoreCount : 0,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
|
||||
|
@@ -30,7 +30,7 @@
|
||||
|
||||
/* Operations for lower_instructions() */
|
||||
#define SUB_TO_ADD_NEG 0x01
|
||||
#define DIV_TO_MUL_RCP 0x02
|
||||
#define FDIV_TO_MUL_RCP 0x02
|
||||
#define EXP_TO_EXP2 0x04
|
||||
#define POW_TO_EXP2 0x08
|
||||
#define LOG_TO_LOG2 0x10
|
||||
@@ -49,6 +49,8 @@
|
||||
#define FIND_LSB_TO_FLOAT_CAST 0x20000
|
||||
#define FIND_MSB_TO_FLOAT_CAST 0x40000
|
||||
#define IMUL_HIGH_TO_MUL 0x80000
|
||||
#define DDIV_TO_MUL_RCP 0x100000
|
||||
#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
|
||||
|
||||
/**
|
||||
* \see class lower_packing_builtins_visitor
|
||||
|
@@ -54,8 +54,8 @@
|
||||
* want to recognize add(op0, neg(op1)) or the other way around to
|
||||
* produce a subtract anyway.
|
||||
*
|
||||
* DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
|
||||
* --------------------------------------
|
||||
* FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
|
||||
* ---------------------------------------------------------
|
||||
* Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
|
||||
*
|
||||
* Many GPUs don't have a divide instruction (945 and 965 included),
|
||||
@@ -63,9 +63,11 @@
|
||||
* reciprocal. By breaking the operation down, constant reciprocals
|
||||
* can get constant folded.
|
||||
*
|
||||
* DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
|
||||
* handles the integer case, converting to and from floating point so that
|
||||
* RCP is possible.
|
||||
* FDIV_TO_MUL_RCP only lowers single-precision floating point division;
|
||||
* DDIV_TO_MUL_RCP only lowers double-precision floating point division.
|
||||
* DIV_TO_MUL_RCP is a convenience macro that sets both flags.
|
||||
* INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
|
||||
* point so that RCP is possible.
|
||||
*
|
||||
* EXP_TO_EXP2 and LOG_TO_LOG2:
|
||||
* ----------------------------
|
||||
@@ -326,7 +328,8 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
|
||||
/* Don't generate new IR that would need to be lowered in an additional
|
||||
* pass.
|
||||
*/
|
||||
if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
|
||||
if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
|
||||
(lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
|
||||
div_to_mul_rcp(div_expr);
|
||||
|
||||
ir_expression *const floor_expr =
|
||||
@@ -1599,8 +1602,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
|
||||
case ir_binop_div:
|
||||
if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
|
||||
int_div_to_mul_rcp(ir);
|
||||
else if ((ir->operands[1]->type->is_float() ||
|
||||
ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
|
||||
else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
|
||||
(ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
|
||||
div_to_mul_rcp(ir);
|
||||
break;
|
||||
|
||||
|
@@ -37,6 +37,8 @@
|
||||
|
||||
bool error = false;
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
|
||||
static void
|
||||
expect_equal(uint64_t actual, uint64_t expected, const char *test)
|
||||
{
|
||||
@@ -378,10 +380,12 @@ test_put_key_and_get_key(void)
|
||||
|
||||
disk_cache_destroy(cache);
|
||||
}
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
int err;
|
||||
|
||||
test_disk_cache_create();
|
||||
@@ -392,6 +396,7 @@ main(void)
|
||||
|
||||
err = rmrf_local(CACHE_TEST_TMP);
|
||||
expect_equal(err, 0, "Removing " CACHE_TEST_TMP " again");
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
return error ? 1 : 0;
|
||||
}
|
||||
|
@@ -210,43 +210,27 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
return true;
|
||||
|
||||
case nir_type_int:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
int64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.i32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.i64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
|
||||
if (val != const_val->data.i)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
case nir_type_uint:
|
||||
case nir_type_bool32:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
uint64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.u32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.u64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.u32[new_swizzle[i]] !=
|
||||
(uint32_t)const_val->data.u)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
if (val != const_val->data.u)
|
||||
return false;
|
||||
case 64:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.u64[new_swizzle[i]] != const_val->data.u)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("Invalid alu source type");
|
||||
|
@@ -1199,7 +1199,8 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
|
||||
is_vertex_input = false;
|
||||
location += vtn_var->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0;
|
||||
} else {
|
||||
unreachable("Location must be on input or output variable");
|
||||
vtn_warn("Location must be on input or output variable");
|
||||
return;
|
||||
}
|
||||
|
||||
if (vtn_var->var) {
|
||||
|
@@ -527,7 +527,7 @@ lp_build_gather(struct gallivm_state *gallivm,
|
||||
if (vec_zext) {
|
||||
res = LLVMBuildZExt(gallivm->builder, res, res_t, "");
|
||||
if (vector_justify) {
|
||||
#if PIPE_ARCH_BIG_ENDIAN
|
||||
#ifdef PIPE_ARCH_BIG_ENDIAN
|
||||
unsigned sv = dst_type.width - src_width;
|
||||
res = LLVMBuildShl(gallivm->builder, res,
|
||||
lp_build_const_int_vec(gallivm, res_type, sv), "");
|
||||
|
@@ -149,6 +149,7 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
|
||||
break;
|
||||
case CPUFREQ_MAXIMUM:
|
||||
snprintf(gr->name, sizeof(gr->name), "%s-Max", cfi->name);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2017 by the following authors:
|
||||
@@ -2028,6 +2028,8 @@ static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val)
|
||||
}
|
||||
|
||||
#define REG_A5XX_GRAS_SU_CNTL 0x0000e090
|
||||
#define A5XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001
|
||||
#define A5XX_GRAS_SU_CNTL_CULL_BACK 0x00000002
|
||||
#define A5XX_GRAS_SU_CNTL_FRONT_CW 0x00000004
|
||||
#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8
|
||||
#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3
|
||||
@@ -2909,6 +2911,12 @@ static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val)
|
||||
{
|
||||
return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK;
|
||||
}
|
||||
#define A5XX_VPC_PACK_PSIZELOC__MASK 0x0000ff00
|
||||
#define A5XX_VPC_PACK_PSIZELOC__SHIFT 8
|
||||
static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val)
|
||||
{
|
||||
return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK;
|
||||
}
|
||||
|
||||
#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL 0x0000e2a0
|
||||
|
||||
@@ -3049,19 +3057,15 @@ static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val)
|
||||
{
|
||||
return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK;
|
||||
}
|
||||
#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000
|
||||
#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x3ff00000
|
||||
#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20
|
||||
static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val)
|
||||
{
|
||||
return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK;
|
||||
}
|
||||
#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0xc0000000
|
||||
#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 30
|
||||
static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
|
||||
{
|
||||
return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK;
|
||||
}
|
||||
#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000
|
||||
#define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000
|
||||
#define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000
|
||||
|
||||
static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; }
|
||||
|
||||
@@ -3167,6 +3171,12 @@ static inline uint32_t A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
|
||||
#define REG_A5XX_SP_FS_CONFIG_MAX_CONST 0x0000e58b
|
||||
|
||||
#define REG_A5XX_SP_VS_CTRL_REG0 0x0000e590
|
||||
#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00000008
|
||||
#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 3
|
||||
static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
|
||||
}
|
||||
#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
|
||||
#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
|
||||
static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
|
||||
@@ -3259,6 +3269,12 @@ static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
|
||||
#define REG_A5XX_SP_VS_OBJ_START_HI 0x0000e5ad
|
||||
|
||||
#define REG_A5XX_SP_FS_CTRL_REG0 0x0000e5c0
|
||||
#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00000008
|
||||
#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 3
|
||||
static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
|
||||
}
|
||||
#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
|
||||
#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
|
||||
static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
|
||||
@@ -3328,6 +3344,7 @@ static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val)
|
||||
{
|
||||
return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK;
|
||||
}
|
||||
#define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400
|
||||
|
||||
#define REG_A5XX_UNKNOWN_E5DB 0x0000e5db
|
||||
|
||||
@@ -3381,6 +3398,12 @@ static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_sample
|
||||
#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL 0x0000e764
|
||||
|
||||
#define REG_A5XX_HLSQ_CONTROL_0_REG 0x0000e784
|
||||
#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000001
|
||||
#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 0
|
||||
static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
|
||||
}
|
||||
|
||||
#define REG_A5XX_HLSQ_CONTROL_1_REG 0x0000e785
|
||||
#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK 0x0000003f
|
||||
|
@@ -60,12 +60,6 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
|
||||
info->restart_index : 0xffffffff);
|
||||
|
||||
/* points + psize -> spritelist: */
|
||||
if (ctx->rasterizer->point_size_per_vertex &&
|
||||
fd5_emit_get_vp(emit)->writes_psize &&
|
||||
(info->mode == PIPE_PRIM_POINTS))
|
||||
primtype = DI_PT_POINTLIST_PSIZE;
|
||||
|
||||
fd5_emit_render_cntl(ctx, false);
|
||||
fd5_draw_emit(ctx->batch, ring, primtype,
|
||||
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
|
||||
@@ -214,35 +208,44 @@ fd5_clear(struct fd_context *ctx, unsigned buffers,
|
||||
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
|
||||
continue;
|
||||
|
||||
enum pipe_format pfmt = pfb->cbufs[i]->format;
|
||||
|
||||
// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
|
||||
float f[4];
|
||||
switch (fd5_pipe2swap(pfb->cbufs[i]->format)) {
|
||||
union pipe_color_union swapped;
|
||||
switch (fd5_pipe2swap(pfmt)) {
|
||||
case WZYX:
|
||||
f[0] = color->f[0];
|
||||
f[1] = color->f[1];
|
||||
f[2] = color->f[2];
|
||||
f[3] = color->f[3];
|
||||
swapped.ui[0] = color->ui[0];
|
||||
swapped.ui[1] = color->ui[1];
|
||||
swapped.ui[2] = color->ui[2];
|
||||
swapped.ui[3] = color->ui[3];
|
||||
break;
|
||||
case WXYZ:
|
||||
f[2] = color->f[0];
|
||||
f[1] = color->f[1];
|
||||
f[0] = color->f[2];
|
||||
f[3] = color->f[3];
|
||||
swapped.ui[2] = color->ui[0];
|
||||
swapped.ui[1] = color->ui[1];
|
||||
swapped.ui[0] = color->ui[2];
|
||||
swapped.ui[3] = color->ui[3];
|
||||
break;
|
||||
case ZYXW:
|
||||
f[3] = color->f[0];
|
||||
f[0] = color->f[1];
|
||||
f[1] = color->f[2];
|
||||
f[2] = color->f[3];
|
||||
swapped.ui[3] = color->ui[0];
|
||||
swapped.ui[0] = color->ui[1];
|
||||
swapped.ui[1] = color->ui[2];
|
||||
swapped.ui[2] = color->ui[3];
|
||||
break;
|
||||
case XYZW:
|
||||
f[3] = color->f[0];
|
||||
f[2] = color->f[1];
|
||||
f[1] = color->f[2];
|
||||
f[0] = color->f[3];
|
||||
swapped.ui[3] = color->ui[0];
|
||||
swapped.ui[2] = color->ui[1];
|
||||
swapped.ui[1] = color->ui[2];
|
||||
swapped.ui[0] = color->ui[3];
|
||||
break;
|
||||
}
|
||||
util_pack_color(f, pfb->cbufs[i]->format, &uc);
|
||||
|
||||
if (util_format_is_pure_uint(pfmt)) {
|
||||
util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
|
||||
} else if (util_format_is_pure_sint(pfmt)) {
|
||||
util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
|
||||
} else {
|
||||
util_pack_color(swapped.f, pfmt, &uc);
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
|
||||
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
|
||||
|
@@ -366,6 +366,7 @@ fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
struct fd_resource *rsc = fd_resource(vb->buffer);
|
||||
enum pipe_format pfmt = elem->src_format;
|
||||
enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt);
|
||||
bool isint = util_format_is_pure_integer(pfmt);
|
||||
uint32_t off = vb->buffer_offset + elem->src_offset;
|
||||
uint32_t size = fd_bo_size(rsc->bo) - off;
|
||||
debug_assert(fmt != ~0);
|
||||
@@ -379,7 +380,8 @@ fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
OUT_RING(ring, A5XX_VFD_DECODE_INSTR_IDX(j) |
|
||||
A5XX_VFD_DECODE_INSTR_FORMAT(fmt) |
|
||||
COND(elem->instance_divisor, A5XX_VFD_DECODE_INSTR_INSTANCED) |
|
||||
0xc0000000); // XXX
|
||||
A5XX_VFD_DECODE_INSTR_UNK30 |
|
||||
COND(!isint, A5XX_VFD_DECODE_INSTR_FLOAT));
|
||||
OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1);
|
||||
|
@@ -109,7 +109,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
|
||||
OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format));
|
||||
OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
|
||||
COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
|
||||
|
||||
/* when we support UBWC, these would be the system memory
|
||||
* addr/pitch/etc:
|
||||
|
@@ -336,10 +336,14 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
uint32_t pos_regid, psize_regid, color_regid[8];
|
||||
uint32_t face_regid, coord_regid, zwcoord_regid;
|
||||
uint32_t vcoord_regid, vertex_regid, instance_regid;
|
||||
enum a3xx_threadsize fssz;
|
||||
uint8_t psize_loc = ~0;
|
||||
int i, j;
|
||||
|
||||
setup_stages(emit, s);
|
||||
|
||||
fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS;
|
||||
|
||||
pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
|
||||
psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
|
||||
vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID);
|
||||
@@ -364,7 +368,7 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
|
||||
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
|
||||
zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
|
||||
vcoord_regid = (s[FS].v->total_in > 0) ? regid(0,0) : regid(63,0);
|
||||
vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0);
|
||||
|
||||
/* we could probably divide this up into things that need to be
|
||||
* emitted if frag-prog is dirty vs if vert-prog is dirty..
|
||||
@@ -472,8 +476,10 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
if (pos_regid != regid(63,0))
|
||||
ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
|
||||
|
||||
if (psize_regid != regid(63,0))
|
||||
if (psize_regid != regid(63,0)) {
|
||||
psize_loc = l.max_loc;
|
||||
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
|
||||
}
|
||||
|
||||
if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
|
||||
!emit->key.binning_pass) {
|
||||
@@ -551,7 +557,8 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
|
||||
OUT_RING(ring, 0x00000881); /* XXX HLSQ_CONTROL_0 */
|
||||
OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
|
||||
0x00000880); /* XXX HLSQ_CONTROL_0 */
|
||||
OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
|
||||
OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
|
||||
0xfcfcfc00); /* XXX */
|
||||
@@ -564,7 +571,8 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
|
||||
OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
|
||||
COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) |
|
||||
0x4000e | /* XXX set pretty much everywhere */
|
||||
0x40006 | /* XXX set pretty much everywhere */
|
||||
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
|
||||
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
|
||||
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
|
||||
A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
|
||||
@@ -692,7 +700,7 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
|
||||
OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
|
||||
(s[VS].v->writes_psize ? 0x0c00 : 0xff00)); // XXX
|
||||
A5XX_VPC_PACK_PSIZELOC(psize_loc));
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8);
|
||||
for (i = 0; i < 8; i++)
|
||||
|
@@ -76,11 +76,11 @@ fd5_rasterizer_state_create(struct pipe_context *pctx,
|
||||
// if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
|
||||
// cso->fill_back != PIPE_POLYGON_MODE_FILL)
|
||||
// so->pc_prim_vtx_cntl2 |= A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
|
||||
//
|
||||
// if (cso->cull_face & PIPE_FACE_FRONT)
|
||||
// so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
|
||||
// if (cso->cull_face & PIPE_FACE_BACK)
|
||||
// so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
|
||||
|
||||
if (cso->cull_face & PIPE_FACE_FRONT)
|
||||
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
|
||||
if (cso->cull_face & PIPE_FACE_BACK)
|
||||
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
|
||||
if (!cso->front_ccw)
|
||||
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
|
||||
// if (!cso->flatshade_first)
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -4185,41 +4185,63 @@ static int egcm_double_to_int(struct r600_shader_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cayman_emit_unary_double_raw(struct r600_bytecode *bc,
|
||||
unsigned op,
|
||||
int dst_reg,
|
||||
struct r600_shader_src *src,
|
||||
bool abs)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
const int last_slot = 3;
|
||||
int r;
|
||||
|
||||
/* these have to write the result to X/Y by the looks of it */
|
||||
for (int i = 0 ; i < last_slot; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
|
||||
r600_bytecode_src(&alu.src[0], src, 1);
|
||||
r600_bytecode_src(&alu.src[1], src, 0);
|
||||
|
||||
if (abs)
|
||||
r600_bytecode_src_set_abs(&alu.src[1]);
|
||||
|
||||
alu.dst.sel = dst_reg;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = (i == 0 || i == 1);
|
||||
|
||||
if (bc->chip_class != CAYMAN || i == last_slot - 1)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
int i, r;
|
||||
struct r600_bytecode_alu alu;
|
||||
int last_slot = 3;
|
||||
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
int t1 = ctx->temp_reg;
|
||||
|
||||
/* these have to write the result to X/Y by the looks of it */
|
||||
for (i = 0 ; i < last_slot; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ctx->inst_info->op;
|
||||
/* should only be one src regs */
|
||||
assert(inst->Instruction.NumSrcRegs == 1);
|
||||
|
||||
/* should only be one src regs */
|
||||
assert (inst->Instruction.NumSrcRegs == 1);
|
||||
/* only support one double at a time */
|
||||
assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
|
||||
inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
|
||||
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
|
||||
|
||||
/* RSQ should take the absolute value of src */
|
||||
if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
|
||||
ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) {
|
||||
r600_bytecode_src_set_abs(&alu.src[1]);
|
||||
}
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = (i == 0 || i == 1);
|
||||
|
||||
if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = cayman_emit_unary_double_raw(
|
||||
ctx->bc, ctx->inst_info->op, t1,
|
||||
&ctx->src[0],
|
||||
ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
|
||||
ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (i = 0 ; i <= lasti; i++) {
|
||||
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
|
||||
@@ -4326,25 +4348,27 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
|
||||
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
int t1 = ctx->temp_reg;
|
||||
|
||||
for (k = 0; k < 2; k++) {
|
||||
if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
|
||||
continue;
|
||||
/* t1 would get overwritten below if we actually tried to
|
||||
* multiply two pairs of doubles at a time. */
|
||||
assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
|
||||
inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ctx->inst_info->op;
|
||||
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
|
||||
r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
|
||||
}
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ctx->inst_info->op;
|
||||
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
|
||||
r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
|
||||
}
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i <= lasti; i++) {
|
||||
@@ -4366,6 +4390,63 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit RECIP_64 + MUL_64 to implement division.
|
||||
*/
|
||||
static int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
int r;
|
||||
struct r600_bytecode_alu alu;
|
||||
int t1 = ctx->temp_reg;
|
||||
int k;
|
||||
|
||||
/* Only support one double at a time. This is the same constraint as
|
||||
* in DMUL lowering. */
|
||||
assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
|
||||
inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
|
||||
|
||||
k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
|
||||
|
||||
r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_MUL_64;
|
||||
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
|
||||
|
||||
alu.src[1].sel = t1;
|
||||
alu.src[1].chan = (i == 3) ? 0 : 1;
|
||||
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.src[0].sel = t1;
|
||||
alu.src[0].chan = i;
|
||||
tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
|
||||
alu.dst.write = 1;
|
||||
if (i == 1)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* r600 - trunc to -PI..PI range
|
||||
* r700 - normalize by dividing by 2PI
|
||||
@@ -9376,6 +9457,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
|
||||
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
|
||||
[TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
|
||||
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
|
||||
@@ -9598,6 +9680,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
|
||||
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
|
||||
[TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
|
||||
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
|
||||
|
@@ -320,14 +320,21 @@ static void si_sampler_view_add_buffer(struct si_context *sctx,
|
||||
if (resource->target == PIPE_BUFFER)
|
||||
return;
|
||||
|
||||
/* Now add separate DCC if it's present. */
|
||||
/* Now add separate DCC or HTILE. */
|
||||
rtex = (struct r600_texture*)resource;
|
||||
if (!rtex->dcc_separate_buffer)
|
||||
return;
|
||||
if (rtex->dcc_separate_buffer) {
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->dcc_separate_buffer, usage,
|
||||
RADEON_PRIO_DCC, check_mem);
|
||||
}
|
||||
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->dcc_separate_buffer, usage,
|
||||
RADEON_PRIO_DCC, check_mem);
|
||||
if (rtex->htile_buffer &&
|
||||
rtex->tc_compatible_htile &&
|
||||
!is_stencil_sampler) {
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->htile_buffer, usage,
|
||||
RADEON_PRIO_HTILE, check_mem);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
|
||||
|
@@ -717,8 +717,10 @@ static void si_update_poly_offset_state(struct si_context *sctx)
|
||||
{
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
|
||||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
|
||||
si_pm4_bind_state(sctx, poly_offset, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Use the user format, not db_render_format, so that the polygon
|
||||
* offset behaves as expected by applications.
|
||||
@@ -1363,11 +1365,17 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
return V_008F14_IMG_DATA_FORMAT_16;
|
||||
case PIPE_FORMAT_X24S8_UINT:
|
||||
case PIPE_FORMAT_S8X24_UINT:
|
||||
/*
|
||||
* Implemented as an 8_8_8_8 data format to fix texture
|
||||
* gathers in stencil sampling. This affects at least
|
||||
* GL45-CTS.texture_cube_map_array.sampling on VI.
|
||||
*/
|
||||
return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return V_008F14_IMG_DATA_FORMAT_8_24;
|
||||
case PIPE_FORMAT_X8Z24_UNORM:
|
||||
case PIPE_FORMAT_S8X24_UINT:
|
||||
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
|
||||
return V_008F14_IMG_DATA_FORMAT_24_8;
|
||||
case PIPE_FORMAT_S8_UINT:
|
||||
@@ -2794,14 +2802,22 @@ si_make_texture_descriptor(struct si_screen *screen,
|
||||
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
|
||||
const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
|
||||
const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
|
||||
|
||||
switch (pipe_format) {
|
||||
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
|
||||
case PIPE_FORMAT_X24S8_UINT:
|
||||
case PIPE_FORMAT_X32_S8X24_UINT:
|
||||
case PIPE_FORMAT_X8Z24_UNORM:
|
||||
util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
|
||||
break;
|
||||
case PIPE_FORMAT_X24S8_UINT:
|
||||
/*
|
||||
* X24S8 is implemented as an 8_8_8_8 data format, to
|
||||
* fix texture gathers. This affects at least
|
||||
* GL45-CTS.texture_cube_map_array.sampling on VI.
|
||||
*/
|
||||
util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
|
||||
break;
|
||||
default:
|
||||
util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
|
||||
}
|
||||
|
@@ -850,11 +850,12 @@ void si_emit_cache_flush(struct si_context *sctx)
|
||||
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 ||
|
||||
(rctx->chip_class <= CIK &&
|
||||
(rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
|
||||
/* Invalidate L1 & L2. (L1 is always invalidated)
|
||||
/* Invalidate L1 & L2. (L1 is always invalidated on SI)
|
||||
* WB must be set on VI+ when TC_ACTION is set.
|
||||
*/
|
||||
si_emit_surface_sync(rctx, cp_coher_cntl |
|
||||
S_0085F0_TC_ACTION_ENA(1) |
|
||||
S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
|
||||
cp_coher_cntl = 0;
|
||||
sctx->b.num_L2_invalidates++;
|
||||
|
@@ -217,6 +217,15 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
|
||||
out_numThreadsPerProcGroup++;
|
||||
}
|
||||
|
||||
/* Prune empty numa nodes */
|
||||
for (auto it = out_nodes.begin(); it != out_nodes.end(); ) {
|
||||
if ((*it).cores.size() == 0)
|
||||
it = out_nodes.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
|
||||
/* Prune empty core nodes */
|
||||
for (uint32_t node = 0; node < out_nodes.size(); node++) {
|
||||
auto& numaNode = out_nodes[node];
|
||||
auto it = numaNode.cores.begin();
|
||||
|
@@ -29,7 +29,7 @@
|
||||
#include "swr_query.h"
|
||||
#include "swr_screen.h"
|
||||
#include "swr_state.h"
|
||||
|
||||
#include "common/os.h"
|
||||
|
||||
static struct swr_query *
|
||||
swr_query(struct pipe_query *p)
|
||||
@@ -45,7 +45,8 @@ swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
|
||||
assert(type < PIPE_QUERY_TYPES);
|
||||
assert(index < MAX_SO_STREAMS);
|
||||
|
||||
pq = CALLOC_STRUCT(swr_query);
|
||||
pq = (struct swr_query *) AlignedMalloc(sizeof(struct swr_query), 64);
|
||||
memset(pq, 0, sizeof(*pq));
|
||||
|
||||
if (pq) {
|
||||
pq->type = type;
|
||||
@@ -67,7 +68,7 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
|
||||
swr_fence_reference(pipe->screen, &pq->fence, NULL);
|
||||
}
|
||||
|
||||
FREE(pq);
|
||||
AlignedFree(pq);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -34,7 +34,7 @@ struct swr_query_result {
|
||||
uint64_t timestamp_end;
|
||||
};
|
||||
|
||||
struct swr_query {
|
||||
OSALIGNLINE(struct) swr_query {
|
||||
unsigned type; /* PIPE_QUERY_* */
|
||||
unsigned index;
|
||||
|
||||
|
@@ -26,6 +26,9 @@
|
||||
#include "blorp_priv.h"
|
||||
#include "brw_meta_util.h"
|
||||
|
||||
/* header-only include needed for _mesa_unorm_to_float and friends. */
|
||||
#include "mesa/main/format_utils.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLORP
|
||||
|
||||
static const bool split_blorp_blit_debug = false;
|
||||
@@ -2204,6 +2207,75 @@ get_ccs_compatible_uint_format(const struct isl_format_layout *fmtl)
|
||||
}
|
||||
}
|
||||
|
||||
/* Takes an isl_color_value and returns a color value that is the original
|
||||
* color value only bit-casted to a UINT format. This value, together with
|
||||
* the format from get_ccs_compatible_uint_format, will yield the same bit
|
||||
* value as the original color and format.
|
||||
*/
|
||||
static union isl_color_value
|
||||
bitcast_color_value_to_uint(union isl_color_value color,
|
||||
const struct isl_format_layout *fmtl)
|
||||
{
|
||||
/* All CCS formats have the same number of bits in each channel */
|
||||
const struct isl_channel_layout *chan = &fmtl->channels.r;
|
||||
|
||||
union isl_color_value bits;
|
||||
switch (chan->type) {
|
||||
case ISL_UINT:
|
||||
case ISL_SINT:
|
||||
/* Hardware will ignore the high bits so there's no need to cast */
|
||||
bits = color;
|
||||
break;
|
||||
|
||||
case ISL_UNORM:
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bits.u32[i] = _mesa_float_to_unorm(color.f32[i], chan->bits);
|
||||
break;
|
||||
|
||||
case ISL_SNORM:
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bits.i32[i] = _mesa_float_to_snorm(color.f32[i], chan->bits);
|
||||
break;
|
||||
|
||||
case ISL_SFLOAT:
|
||||
switch (chan->bits) {
|
||||
case 16:
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bits.u32[i] = _mesa_float_to_half(color.f32[i]);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
bits = color;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid float format size");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid channel type");
|
||||
}
|
||||
|
||||
switch (fmtl->format) {
|
||||
case ISL_FORMAT_B8G8R8A8_UNORM:
|
||||
case ISL_FORMAT_B8G8R8A8_UNORM_SRGB:
|
||||
case ISL_FORMAT_B8G8R8X8_UNORM:
|
||||
case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: {
|
||||
/* If it's a BGRA format, we need to swap blue and red */
|
||||
uint32_t tmp = bits.u32[0];
|
||||
bits.u32[0] = bits.u32[2];
|
||||
bits.u32[2] = tmp;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break; /* Nothing to do */
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
static void
|
||||
surf_convert_to_uncompressed(const struct isl_device *isl_dev,
|
||||
struct brw_blorp_surface_info *info,
|
||||
@@ -2320,6 +2392,16 @@ blorp_copy(struct blorp_batch *batch,
|
||||
params.src.view.format = get_copy_format_for_bpb(isl_dev, src_fmtl->bpb);
|
||||
}
|
||||
|
||||
if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) {
|
||||
params.src.clear_color =
|
||||
bitcast_color_value_to_uint(params.src.clear_color, src_fmtl);
|
||||
}
|
||||
|
||||
if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E) {
|
||||
params.dst.clear_color =
|
||||
bitcast_color_value_to_uint(params.dst.clear_color, dst_fmtl);
|
||||
}
|
||||
|
||||
wm_prog_key.src_bpc =
|
||||
isl_format_get_layout(params.src.view.format)->channels.r.bits;
|
||||
wm_prog_key.dst_bpc =
|
||||
|
@@ -75,8 +75,11 @@ choose_isl_surf_usage(VkImageUsageFlags vk_usage,
|
||||
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
}
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
|
||||
/* blorp implements transfers by rendering into the destination image. */
|
||||
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT &&
|
||||
aspect == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
/* blorp implements transfers by rendering into the destination image.
|
||||
* Only request this with color images, as we deal with depth/stencil
|
||||
* formats differently. */
|
||||
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
|
||||
}
|
||||
|
||||
|
@@ -908,6 +908,17 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
blorp_batch_finish(&batch);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -975,6 +986,17 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
brw_blorp_to_isl_format(brw, format, true),
|
||||
resolve_op);
|
||||
blorp_batch_finish(&batch);
|
||||
|
||||
/*
|
||||
* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -36,6 +36,7 @@
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_blorp.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLIT
|
||||
|
||||
@@ -174,14 +175,46 @@ brw_fast_clear_depth(struct gl_context *ctx)
|
||||
mt->depth_clear_value = depth_clear_value;
|
||||
}
|
||||
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 313:
|
||||
*
|
||||
* "If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
|
||||
* must be issued before the rectangle primitive used for the depth
|
||||
* buffer clear operation.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
if (brw->gen == 6) {
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 313:
|
||||
*
|
||||
* "If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
|
||||
* must be issued before the rectangle primitive used for the depth
|
||||
* buffer clear operation.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
} else if (brw->gen >= 7) {
|
||||
/*
|
||||
* From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
|
||||
*
|
||||
* If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
|
||||
* enabled must be issued before the rectangle primitive used for the
|
||||
* depth buffer clear operation.
|
||||
*
|
||||
* Same applies for Gen8 and Gen9.
|
||||
*
|
||||
* In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 PIPE_CONTROL,
|
||||
* Depth Cache Flush Enable:
|
||||
*
|
||||
* This bit must not be set when Depth Stall Enable bit is set in
|
||||
* this packet.
|
||||
*
|
||||
* This is confirmed to hold for real, HSW gets immediate gpu hangs.
|
||||
*
|
||||
* Therefore issue two pipe control flushes, one for cache flush and
|
||||
* another for depth stall.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
|
||||
}
|
||||
|
||||
if (fb->MaxNumLayers > 0) {
|
||||
for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
|
||||
@@ -201,7 +234,12 @@ brw_fast_clear_depth(struct gl_context *ctx)
|
||||
* by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
|
||||
* followed by Depth FLUSH'
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
/* Now, the HiZ buffer contains data that needs to be resolved to the depth
|
||||
|
@@ -511,6 +511,22 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
/*
|
||||
* From the Broadwell PRM, volume 7, "Depth Buffer Clear":
|
||||
*
|
||||
* Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM
|
||||
* or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
|
||||
* DEPTH_STALL bit and Depth FLUSH bits "set" before starting to render.
|
||||
* DepthStall and DepthFlush are not needed between consecutive depth
|
||||
* clear passes nor is it required if th e depth clear pass was done with
|
||||
* "full_surf_clear" bit set in the 3DSTATE_WM_HZ_OP.
|
||||
*
|
||||
* TODO: Such as the spec says, this could be conditional.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_STALL);
|
||||
|
||||
/* Mark this buffer as needing a TC flush, as we've rendered to it. */
|
||||
brw_render_cache_set_add_bo(brw, mt->bo);
|
||||
|
||||
|
@@ -25,6 +25,7 @@
|
||||
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_mipmap_tree.h"
|
||||
#include "intel_fbo.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
@@ -179,7 +180,9 @@ genX(blorp_exec)(struct blorp_batch *batch,
|
||||
* data with different formats, which blorp does for stencil and depth
|
||||
* data.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
if (params->src.enabled)
|
||||
brw_render_cache_set_check_flush(brw, params->src.addr.buffer);
|
||||
brw_render_cache_set_check_flush(brw, params->dst.addr.buffer);
|
||||
|
||||
brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
|
||||
|
||||
@@ -256,8 +259,6 @@ retry:
|
||||
brw->no_depth_or_stencil = false;
|
||||
brw->ib.type = -1;
|
||||
|
||||
/* Flush the sampler cache so any texturing from the destination is
|
||||
* coherent.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
if (params->dst.enabled)
|
||||
brw_render_cache_set_add_bo(brw, params->dst.addr.buffer);
|
||||
}
|
||||
|
@@ -1071,7 +1071,8 @@ _mesa_PopAttrib(void)
|
||||
if (ctx->Extensions.ARB_color_buffer_float)
|
||||
_mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
|
||||
color->ClampFragmentColor);
|
||||
_mesa_ClampColor(GL_CLAMP_READ_COLOR_ARB, color->ClampReadColor);
|
||||
if (ctx->Extensions.ARB_color_buffer_float || ctx->Version >= 30)
|
||||
_mesa_ClampColor(GL_CLAMP_READ_COLOR_ARB, color->ClampReadColor);
|
||||
|
||||
/* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */
|
||||
if (ctx->Extensions.EXT_framebuffer_sRGB)
|
||||
|
@@ -1612,6 +1612,7 @@ _mesa_LinkProgram(GLuint programObj)
|
||||
"glLinkProgram"));
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
/**
|
||||
* Generate a SHA-1 hash value string for given source string.
|
||||
*/
|
||||
@@ -1723,6 +1724,8 @@ read_shader(const gl_shader_stage stage, const char *source)
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
/**
|
||||
* Called via glShaderSource() and glShaderSourceARB() API functions.
|
||||
* Basically, concatenate the source code strings into one long string
|
||||
@@ -1795,6 +1798,7 @@ _mesa_ShaderSource(GLuint shaderObj, GLsizei count,
|
||||
source[totalLength - 1] = '\0';
|
||||
source[totalLength - 2] = '\0';
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
/* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
|
||||
* if corresponding entry found from MESA_SHADER_READ_PATH.
|
||||
*/
|
||||
@@ -1805,6 +1809,7 @@ _mesa_ShaderSource(GLuint shaderObj, GLsizei count,
|
||||
free(source);
|
||||
source = replacement;
|
||||
}
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
shader_source(sh, source);
|
||||
|
||||
|
@@ -955,7 +955,7 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
|
||||
case3fid(MUL, UMUL, DMUL);
|
||||
case3fid(MAD, UMAD, DMAD);
|
||||
case3fid(FMA, UMAD, DFMA);
|
||||
case3(DIV, IDIV, UDIV);
|
||||
case4d(DIV, IDIV, UDIV, DDIV);
|
||||
case4d(MAX, IMAX, UMAX, DMAX);
|
||||
case4d(MIN, IMIN, UMIN, DMIN);
|
||||
case2iu(MOD, UMOD);
|
||||
@@ -1710,10 +1710,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
|
||||
emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
|
||||
break;
|
||||
case ir_binop_div:
|
||||
if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
|
||||
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
|
||||
else
|
||||
emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
|
||||
emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
|
||||
break;
|
||||
case ir_binop_mod:
|
||||
if (result_dst.type == GLSL_TYPE_FLOAT)
|
||||
@@ -6918,7 +6915,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
|
||||
lower_instructions(ir,
|
||||
MOD_TO_FLOOR |
|
||||
DIV_TO_MUL_RCP |
|
||||
FDIV_TO_MUL_RCP |
|
||||
EXP_TO_EXP2 |
|
||||
LOG_TO_LOG2 |
|
||||
LDEXP_TO_ARITH |
|
||||
|
@@ -21,6 +21,8 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
@@ -705,3 +707,5 @@ disk_cache_has_key(struct disk_cache *cache, cache_key key)
|
||||
|
||||
return memcmp(entry, key, CACHE_KEY_SIZE) == 0;
|
||||
}
|
||||
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
@@ -40,6 +40,8 @@ struct disk_cache;
|
||||
|
||||
/* Provide inlined stub functions if the shader cache is disabled. */
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
|
||||
/**
|
||||
* Create a new cache object.
|
||||
*
|
||||
@@ -129,6 +131,46 @@ disk_cache_put_key(struct disk_cache *cache, cache_key key);
|
||||
bool
|
||||
disk_cache_has_key(struct disk_cache *cache, cache_key key);
|
||||
|
||||
#else
|
||||
|
||||
static inline struct disk_cache *
|
||||
disk_cache_create(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
disk_cache_destroy(struct disk_cache *cache) {
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void
|
||||
disk_cache_put(struct disk_cache *cache, cache_key key,
|
||||
const void *data, size_t size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline uint8_t *
|
||||
disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
disk_cache_put_key(struct disk_cache *cache, cache_key key)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
disk_cache_has_key(struct disk_cache *cache, cache_key key)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -31,7 +31,6 @@ void SHA1Pad(SHA1_CTX *);
|
||||
void SHA1Transform(uint32_t [5], const uint8_t [SHA1_BLOCK_LENGTH]);
|
||||
void SHA1Update(SHA1_CTX *, const uint8_t *, size_t);
|
||||
void SHA1Final(uint8_t [SHA1_DIGEST_LENGTH], SHA1_CTX *);
|
||||
__END_DECLS
|
||||
|
||||
#define HTONDIGEST(x) do { \
|
||||
x[0] = htonl(x[0]); \
|
||||
|
@@ -265,7 +265,8 @@ VkBool32 wsi_get_physical_device_xcb_presentation_support(
|
||||
return false;
|
||||
|
||||
if (!wsi_conn->has_dri3) {
|
||||
fprintf(stderr, "vulkan: No DRI3 support\n");
|
||||
fprintf(stderr, "vulkan: No DRI3 support detected - required for presentation\n");
|
||||
fprintf(stderr, "Note: Buggy applications may crash, if they do please report to vendor\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -313,7 +314,8 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
if (!wsi_conn->has_dri3) {
|
||||
fprintf(stderr, "vulkan: No DRI3 support\n");
|
||||
fprintf(stderr, "vulkan: No DRI3 support detected - required for presentation\n");
|
||||
fprintf(stderr, "Note: Buggy applications may crash, if they do please report to vendor\n");
|
||||
*pSupported = false;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
Reference in New Issue
Block a user