Compare commits
50 Commits
explicit-s
...
mesa-19.3.
Author | SHA1 | Date | |
---|---|---|---|
|
3d9c678772 | ||
|
bb08c0f04d | ||
|
3c1b3656b9 | ||
|
05be725923 | ||
|
8608c460d1 | ||
|
2f66f619c8 | ||
|
2bd7416823 | ||
|
0aef18324a | ||
|
3211308e88 | ||
|
f7d76ad80e | ||
|
52005416a0 | ||
|
0b670a919c | ||
|
b90f5a9ea0 | ||
|
2ea5038045 | ||
|
5ca2bb392f | ||
|
01f6321c09 | ||
|
15342abc5b | ||
|
08501e77af | ||
|
49af89a0b9 | ||
|
dd4b73ad38 | ||
|
a8faeff399 | ||
|
0d846243a0 | ||
|
bc5357bf33 | ||
|
5cee7ad873 | ||
|
184d39301d | ||
|
9bca129bb4 | ||
|
6daaf66f66 | ||
|
4d21f802b5 | ||
|
090469173c | ||
|
59bc14186e | ||
|
5032575b94 | ||
|
b981ca4d7e | ||
|
3544a01121 | ||
|
bb9d1ed2bd | ||
|
5f8e0c715e | ||
|
f0104d8fef | ||
|
cb66ea7780 | ||
|
75886fafaa | ||
|
b3fd30921a | ||
|
ea886e49be | ||
|
307e5cc8fd | ||
|
0b8836cb23 | ||
|
39e9739a3b | ||
|
de705da8a6 | ||
|
640747a298 | ||
|
9df4763440 | ||
|
2b1b7afb5c | ||
|
084926926c | ||
|
1beee9dd9f | ||
|
20512e9ddb |
2
bin/.cherry-ignore
Normal file
2
bin/.cherry-ignore
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
# This is reverted shortly after landing
|
||||||
|
4432a2d14d80081d062f7939a950d65ea3a16eed
|
@@ -92,7 +92,7 @@ is_revert_nomination()
|
|||||||
}
|
}
|
||||||
|
|
||||||
# Use the last branchpoint as our limit for the search
|
# Use the last branchpoint as our limit for the search
|
||||||
latest_branchpoint=`git merge-base origin/master HEAD`
|
latest_branchpoint=`git merge-base upstream/master HEAD`
|
||||||
|
|
||||||
# List all the commits between day 1 and the branch point...
|
# List all the commits between day 1 and the branch point...
|
||||||
git log --reverse --pretty=%H $latest_branchpoint > already_landed
|
git log --reverse --pretty=%H $latest_branchpoint > already_landed
|
||||||
@@ -103,7 +103,7 @@ git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_bra
|
|||||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||||
|
|
||||||
# Grep for potential candidates
|
# Grep for potential candidates
|
||||||
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\<fixes\>\|\<broken by\>\|This reverts commit' $latest_branchpoint..origin/master |\
|
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\<fixes\>\|\<broken by\>\|This reverts commit' $latest_branchpoint..upstream/master |\
|
||||||
while read sha
|
while read sha
|
||||||
do
|
do
|
||||||
# Check to see whether the patch is on the ignore list.
|
# Check to see whether the patch is on the ignore list.
|
||||||
|
@@ -16,3 +16,5 @@ VK_INTEL_performance_query on Intel.
|
|||||||
Meson support for windows using MSVC and MinGW
|
Meson support for windows using MSVC and MinGW
|
||||||
scons has been deprecated for non windows
|
scons has been deprecated for non windows
|
||||||
Initial Intel gen12 (Tigerlake) support on anvil and iris
|
Initial Intel gen12 (Tigerlake) support on anvil and iris
|
||||||
|
New compiler backend "ACO" for RADV (RADV_PERFTEST=aco)
|
||||||
|
VK_EXT_shader_demote_to_helper_invocation on RADV/ACO.
|
||||||
|
@@ -85,6 +85,7 @@ ACO_FILES = \
|
|||||||
compiler/aco_register_allocation.cpp \
|
compiler/aco_register_allocation.cpp \
|
||||||
compiler/aco_live_var_analysis.cpp \
|
compiler/aco_live_var_analysis.cpp \
|
||||||
compiler/aco_lower_bool_phis.cpp \
|
compiler/aco_lower_bool_phis.cpp \
|
||||||
|
compiler/aco_lower_to_cssa.cpp \
|
||||||
compiler/aco_lower_to_hw_instr.cpp \
|
compiler/aco_lower_to_hw_instr.cpp \
|
||||||
compiler/aco_optimizer.cpp \
|
compiler/aco_optimizer.cpp \
|
||||||
compiler/aco_opt_value_numbering.cpp \
|
compiler/aco_opt_value_numbering.cpp \
|
||||||
|
@@ -114,6 +114,11 @@ unsigned
|
|||||||
ac_get_tbuffer_format(enum chip_class chip_class,
|
ac_get_tbuffer_format(enum chip_class chip_class,
|
||||||
unsigned dfmt, unsigned nfmt)
|
unsigned dfmt, unsigned nfmt)
|
||||||
{
|
{
|
||||||
|
// Some games try to access vertex buffers without a valid format.
|
||||||
|
// This is a game bug, but we should still handle it gracefully.
|
||||||
|
if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
|
||||||
|
return V_008F0C_IMG_FORMAT_INVALID;
|
||||||
|
|
||||||
if (chip_class >= GFX10) {
|
if (chip_class >= GFX10) {
|
||||||
unsigned format;
|
unsigned format;
|
||||||
switch (dfmt) {
|
switch (dfmt) {
|
||||||
|
@@ -317,6 +317,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
|||||||
|
|
||||||
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt);
|
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt);
|
||||||
uint32_t encoding = (0b111010 << 26);
|
uint32_t encoding = (0b111010 << 26);
|
||||||
|
assert(img_format <= 0x7F);
|
||||||
assert(!mtbuf->dlc || ctx.chip_class >= GFX10);
|
assert(!mtbuf->dlc || ctx.chip_class >= GFX10);
|
||||||
encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
|
encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
|
||||||
encoding |= (mtbuf->glc ? 1 : 0) << 14;
|
encoding |= (mtbuf->glc ? 1 : 0) << 14;
|
||||||
|
@@ -1263,14 +1263,14 @@ setup_isel_context(Program* program,
|
|||||||
} else if (program->chip_class >= GFX8) {
|
} else if (program->chip_class >= GFX8) {
|
||||||
program->physical_sgprs = 800;
|
program->physical_sgprs = 800;
|
||||||
program->sgpr_alloc_granule = 15;
|
program->sgpr_alloc_granule = 15;
|
||||||
program->sgpr_limit = 102;
|
|
||||||
} else {
|
|
||||||
program->physical_sgprs = 512;
|
|
||||||
program->sgpr_alloc_granule = 7;
|
|
||||||
if (options->family == CHIP_TONGA || options->family == CHIP_ICELAND)
|
if (options->family == CHIP_TONGA || options->family == CHIP_ICELAND)
|
||||||
program->sgpr_limit = 94; /* workaround hardware bug */
|
program->sgpr_limit = 94; /* workaround hardware bug */
|
||||||
else
|
else
|
||||||
program->sgpr_limit = 104;
|
program->sgpr_limit = 102;
|
||||||
|
} else {
|
||||||
|
program->physical_sgprs = 512;
|
||||||
|
program->sgpr_alloc_granule = 7;
|
||||||
|
program->sgpr_limit = 104;
|
||||||
}
|
}
|
||||||
/* TODO: we don't have to allocate VCC if we don't need it */
|
/* TODO: we don't have to allocate VCC if we don't need it */
|
||||||
program->needs_vcc = true;
|
program->needs_vcc = true;
|
||||||
|
@@ -172,11 +172,11 @@ bool can_move_instr(aco_ptr<Instruction>& instr, Instruction* current, int movin
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool can_reorder(Instruction* candidate, bool allow_smem)
|
bool can_reorder(Instruction* candidate)
|
||||||
{
|
{
|
||||||
switch (candidate->format) {
|
switch (candidate->format) {
|
||||||
case Format::SMEM:
|
case Format::SMEM:
|
||||||
return allow_smem || static_cast<SMEM_instruction*>(candidate)->can_reorder;
|
return static_cast<SMEM_instruction*>(candidate)->can_reorder;
|
||||||
case Format::MUBUF:
|
case Format::MUBUF:
|
||||||
return static_cast<MUBUF_instruction*>(candidate)->can_reorder;
|
return static_cast<MUBUF_instruction*>(candidate)->can_reorder;
|
||||||
case Format::MIMG:
|
case Format::MIMG:
|
||||||
@@ -200,7 +200,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
int window_size = SMEM_WINDOW_SIZE;
|
int window_size = SMEM_WINDOW_SIZE;
|
||||||
int max_moves = SMEM_MAX_MOVES;
|
int max_moves = SMEM_MAX_MOVES;
|
||||||
int16_t k = 0;
|
int16_t k = 0;
|
||||||
bool can_reorder_cur = can_reorder(current, false);
|
bool can_reorder_cur = can_reorder(current);
|
||||||
|
|
||||||
/* don't move s_memtime/s_memrealtime */
|
/* don't move s_memtime/s_memrealtime */
|
||||||
if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime)
|
if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime)
|
||||||
@@ -224,6 +224,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
|
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
|
||||||
assert(candidate_idx >= 0);
|
assert(candidate_idx >= 0);
|
||||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||||
|
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||||
|
|
||||||
/* break if we'd make the previous SMEM instruction stall */
|
/* break if we'd make the previous SMEM instruction stall */
|
||||||
bool can_stall_prev_smem = idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
|
bool can_stall_prev_smem = idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
|
||||||
@@ -231,7 +232,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/* break when encountering another MEM instruction, logical_start or barriers */
|
/* break when encountering another MEM instruction, logical_start or barriers */
|
||||||
if (!can_reorder(candidate.get(), false) && !can_reorder_cur)
|
if (!can_reorder_candidate && !can_reorder_cur)
|
||||||
break;
|
break;
|
||||||
if (candidate->opcode == aco_opcode::p_logical_start)
|
if (candidate->opcode == aco_opcode::p_logical_start)
|
||||||
break;
|
break;
|
||||||
@@ -239,6 +240,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
break;
|
break;
|
||||||
if (!can_move_instr(candidate, current, moving_interaction))
|
if (!can_move_instr(candidate, current, moving_interaction))
|
||||||
break;
|
break;
|
||||||
|
if (candidate->isVMEM())
|
||||||
|
break;
|
||||||
register_pressure.update(register_demand[candidate_idx]);
|
register_pressure.update(register_demand[candidate_idx]);
|
||||||
|
|
||||||
/* if current depends on candidate, add additional dependencies and continue */
|
/* if current depends on candidate, add additional dependencies and continue */
|
||||||
@@ -264,6 +267,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
if (op.isTemp())
|
if (op.isTemp())
|
||||||
ctx.depends_on[op.tempId()] = true;
|
ctx.depends_on[op.tempId()] = true;
|
||||||
}
|
}
|
||||||
|
can_reorder_cur &= can_reorder_candidate;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -280,6 +284,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
if (op.isTemp())
|
if (op.isTemp())
|
||||||
ctx.depends_on[op.tempId()] = true;
|
ctx.depends_on[op.tempId()] = true;
|
||||||
}
|
}
|
||||||
|
can_reorder_cur &= can_reorder_candidate;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -323,12 +328,14 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
insert_idx = idx + 1;
|
insert_idx = idx + 1;
|
||||||
moving_interaction = barrier_none;
|
moving_interaction = barrier_none;
|
||||||
moving_spill = false;
|
moving_spill = false;
|
||||||
|
can_reorder_cur = true;
|
||||||
|
|
||||||
bool found_dependency = false;
|
bool found_dependency = false;
|
||||||
/* second, check if we have instructions after current to move up */
|
/* second, check if we have instructions after current to move up */
|
||||||
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
|
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
|
||||||
assert(candidate_idx < (int) block->instructions.size());
|
assert(candidate_idx < (int) block->instructions.size());
|
||||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||||
|
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||||
|
|
||||||
if (candidate->opcode == aco_opcode::p_logical_end)
|
if (candidate->opcode == aco_opcode::p_logical_end)
|
||||||
break;
|
break;
|
||||||
@@ -369,7 +376,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!can_reorder(candidate.get(), false) && !can_reorder_cur)
|
if (!can_reorder_candidate && !can_reorder_cur)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (!found_dependency) {
|
if (!found_dependency) {
|
||||||
@@ -380,8 +387,10 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
/* update register pressure */
|
/* update register pressure */
|
||||||
register_pressure.update(register_demand[candidate_idx - 1]);
|
register_pressure.update(register_demand[candidate_idx - 1]);
|
||||||
|
|
||||||
if (is_dependency)
|
if (is_dependency) {
|
||||||
|
can_reorder_cur &= can_reorder_candidate;
|
||||||
continue;
|
continue;
|
||||||
|
}
|
||||||
assert(insert_idx != idx);
|
assert(insert_idx != idx);
|
||||||
|
|
||||||
// TODO: correctly calculate register pressure for this case
|
// TODO: correctly calculate register pressure for this case
|
||||||
@@ -392,6 +401,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
register_pressure_unknown = true;
|
register_pressure_unknown = true;
|
||||||
}
|
}
|
||||||
if (register_pressure_unknown) {
|
if (register_pressure_unknown) {
|
||||||
|
if (candidate->isVMEM())
|
||||||
|
break;
|
||||||
for (const Definition& def : candidate->definitions) {
|
for (const Definition& def : candidate->definitions) {
|
||||||
if (def.isTemp())
|
if (def.isTemp())
|
||||||
ctx.RAR_dependencies[def.tempId()] = true;
|
ctx.RAR_dependencies[def.tempId()] = true;
|
||||||
@@ -400,6 +411,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
|||||||
if (op.isTemp())
|
if (op.isTemp())
|
||||||
ctx.RAR_dependencies[op.tempId()] = true;
|
ctx.RAR_dependencies[op.tempId()] = true;
|
||||||
}
|
}
|
||||||
|
can_reorder_cur &= can_reorder_candidate;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -440,7 +452,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
int max_moves = VMEM_MAX_MOVES;
|
int max_moves = VMEM_MAX_MOVES;
|
||||||
int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
|
int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
|
||||||
int16_t k = 0;
|
int16_t k = 0;
|
||||||
bool can_reorder_cur = can_reorder(current, false);
|
/* initially true as we don't pull other VMEM instructions
|
||||||
|
* through the current instruction */
|
||||||
|
bool can_reorder_vmem = true;
|
||||||
|
bool can_reorder_smem = true;
|
||||||
|
|
||||||
/* create the initial set of values which current depends on */
|
/* create the initial set of values which current depends on */
|
||||||
std::fill(ctx.depends_on.begin(), ctx.depends_on.end(), false);
|
std::fill(ctx.depends_on.begin(), ctx.depends_on.end(), false);
|
||||||
@@ -467,9 +482,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
|
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
|
||||||
assert(candidate_idx >= 0);
|
assert(candidate_idx >= 0);
|
||||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||||
|
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||||
|
|
||||||
/* break when encountering another VMEM instruction, logical_start or barriers */
|
/* break when encountering another VMEM instruction, logical_start or barriers */
|
||||||
if (!can_reorder(candidate.get(), true) && !can_reorder_cur)
|
if (!can_reorder_smem && candidate->format == Format::SMEM && !can_reorder_candidate)
|
||||||
break;
|
break;
|
||||||
if (candidate->opcode == aco_opcode::p_logical_start)
|
if (candidate->opcode == aco_opcode::p_logical_start)
|
||||||
break;
|
break;
|
||||||
@@ -487,10 +503,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
bool part_of_clause = false;
|
bool part_of_clause = false;
|
||||||
if (candidate->isVMEM()) {
|
if (candidate->isVMEM()) {
|
||||||
bool same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
|
bool same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
|
||||||
|
bool can_reorder = can_reorder_vmem || can_reorder_candidate;
|
||||||
int grab_dist = clause_insert_idx - candidate_idx;
|
int grab_dist = clause_insert_idx - candidate_idx;
|
||||||
/* We can't easily tell how much this will decrease the def-to-use
|
/* We can't easily tell how much this will decrease the def-to-use
|
||||||
* distances, so just use how far it will be moved as a heuristic. */
|
* distances, so just use how far it will be moved as a heuristic. */
|
||||||
part_of_clause = same_resource && grab_dist < clause_max_grab_dist;
|
part_of_clause = can_reorder && same_resource && grab_dist < clause_max_grab_dist;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* if current depends on candidate, add additional dependencies and continue */
|
/* if current depends on candidate, add additional dependencies and continue */
|
||||||
@@ -522,6 +539,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
register_pressure_clause.update(register_demand[candidate_idx]);
|
register_pressure_clause.update(register_demand[candidate_idx]);
|
||||||
|
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||||
|
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -555,6 +574,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
register_pressure_clause.update(register_demand[candidate_idx]);
|
register_pressure_clause.update(register_demand[candidate_idx]);
|
||||||
|
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||||
|
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -605,12 +626,16 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
int insert_idx = idx;
|
int insert_idx = idx;
|
||||||
moving_interaction = barrier_none;
|
moving_interaction = barrier_none;
|
||||||
moving_spill = false;
|
moving_spill = false;
|
||||||
|
// TODO: differentiate between loads and stores (load-load can always reorder)
|
||||||
|
can_reorder_vmem = true;
|
||||||
|
can_reorder_smem = true;
|
||||||
|
|
||||||
bool found_dependency = false;
|
bool found_dependency = false;
|
||||||
/* second, check if we have instructions after current to move up */
|
/* second, check if we have instructions after current to move up */
|
||||||
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
|
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
|
||||||
assert(candidate_idx < (int) block->instructions.size());
|
assert(candidate_idx < (int) block->instructions.size());
|
||||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||||
|
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||||
|
|
||||||
if (candidate->opcode == aco_opcode::p_logical_end)
|
if (candidate->opcode == aco_opcode::p_logical_end)
|
||||||
break;
|
break;
|
||||||
@@ -623,7 +648,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
/* check if candidate depends on current */
|
/* check if candidate depends on current */
|
||||||
bool is_dependency = !can_reorder(candidate.get(), true) && !can_reorder_cur;
|
bool is_dependency = false;
|
||||||
|
if (candidate->format == Format::SMEM)
|
||||||
|
is_dependency = !can_reorder_smem && !can_reorder_candidate;
|
||||||
|
if (candidate->isVMEM())
|
||||||
|
is_dependency = !can_reorder_vmem && !can_reorder_candidate;
|
||||||
for (const Operand& op : candidate->operands) {
|
for (const Operand& op : candidate->operands) {
|
||||||
if (op.isTemp() && ctx.depends_on[op.tempId()]) {
|
if (op.isTemp() && ctx.depends_on[op.tempId()]) {
|
||||||
is_dependency = true;
|
is_dependency = true;
|
||||||
@@ -645,6 +674,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
if (op.isTemp())
|
if (op.isTemp())
|
||||||
ctx.RAR_dependencies[op.tempId()] = true;
|
ctx.RAR_dependencies[op.tempId()] = true;
|
||||||
}
|
}
|
||||||
|
/* update flag whether we can reorder other memory instructions */
|
||||||
|
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||||
|
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||||
|
|
||||||
if (!found_dependency) {
|
if (!found_dependency) {
|
||||||
insert_idx = candidate_idx;
|
insert_idx = candidate_idx;
|
||||||
found_dependency = true;
|
found_dependency = true;
|
||||||
@@ -652,7 +685,9 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
register_pressure = register_demand[insert_idx - 1];
|
register_pressure = register_demand[insert_idx - 1];
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (candidate->isVMEM()) {
|
} else if (candidate->isVMEM()) {
|
||||||
|
/* don't move up dependencies of other VMEM instructions */
|
||||||
for (const Definition& def : candidate->definitions) {
|
for (const Definition& def : candidate->definitions) {
|
||||||
if (def.isTemp())
|
if (def.isTemp())
|
||||||
ctx.depends_on[def.tempId()] = true;
|
ctx.depends_on[def.tempId()] = true;
|
||||||
@@ -681,6 +716,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
|||||||
if (op.isTemp())
|
if (op.isTemp())
|
||||||
ctx.RAR_dependencies[op.tempId()] = true;
|
ctx.RAR_dependencies[op.tempId()] = true;
|
||||||
}
|
}
|
||||||
|
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||||
|
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1291,9 +1291,9 @@ Temp load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
|
|||||||
rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||||
}
|
}
|
||||||
/* older generations need element size = 16 bytes. element size removed in GFX9 */
|
/* older generations need element size = 4 bytes. element size removed in GFX9 */
|
||||||
if (ctx.program->chip_class <= GFX8)
|
if (ctx.program->chip_class <= GFX8)
|
||||||
rsrc_conf |= S_008F0C_ELEMENT_SIZE(3);
|
rsrc_conf |= S_008F0C_ELEMENT_SIZE(1);
|
||||||
|
|
||||||
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
|
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
|
||||||
private_segment_buffer, Operand(-1u),
|
private_segment_buffer, Operand(-1u),
|
||||||
@@ -1530,12 +1530,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
|||||||
/* spill vgpr */
|
/* spill vgpr */
|
||||||
ctx.program->config->spilled_vgprs += (*it)->operands[0].size();
|
ctx.program->config->spilled_vgprs += (*it)->operands[0].size();
|
||||||
uint32_t spill_slot = vgpr_slot[spill_id];
|
uint32_t spill_slot = vgpr_slot[spill_id];
|
||||||
bool add_offset = ctx.program->config->scratch_bytes_per_wave + vgpr_spill_slots * 4 > 4096;
|
bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096;
|
||||||
unsigned base_offset = add_offset ? 0 : ctx.program->config->scratch_bytes_per_wave;
|
unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||||
|
|
||||||
/* check if the scratch resource descriptor already exists */
|
/* check if the scratch resource descriptor already exists */
|
||||||
if (scratch_rsrc == Temp()) {
|
if (scratch_rsrc == Temp()) {
|
||||||
unsigned offset = ctx.program->config->scratch_bytes_per_wave - base_offset;
|
unsigned offset = add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0;
|
||||||
scratch_rsrc = load_scratch_resource(ctx, scratch_offset,
|
scratch_rsrc = load_scratch_resource(ctx, scratch_offset,
|
||||||
last_top_level_block_idx == block.index ?
|
last_top_level_block_idx == block.index ?
|
||||||
instructions : ctx.program->blocks[last_top_level_block_idx].instructions,
|
instructions : ctx.program->blocks[last_top_level_block_idx].instructions,
|
||||||
@@ -1544,37 +1544,21 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned offset = base_offset + spill_slot * 4;
|
unsigned offset = base_offset + spill_slot * 4;
|
||||||
aco_opcode opcode;
|
aco_opcode opcode = aco_opcode::buffer_store_dword;
|
||||||
assert((*it)->operands[0].isTemp());
|
assert((*it)->operands[0].isTemp());
|
||||||
Temp temp = (*it)->operands[0].getTemp();
|
Temp temp = (*it)->operands[0].getTemp();
|
||||||
assert(temp.type() == RegType::vgpr && !temp.is_linear());
|
assert(temp.type() == RegType::vgpr && !temp.is_linear());
|
||||||
switch (temp.size()) {
|
if (temp.size() > 1) {
|
||||||
case 1: opcode = aco_opcode::buffer_store_dword; break;
|
|
||||||
case 2: opcode = aco_opcode::buffer_store_dwordx2; break;
|
|
||||||
case 6: temp = bld.tmp(v3); /* fallthrough */
|
|
||||||
case 3: opcode = aco_opcode::buffer_store_dwordx3; break;
|
|
||||||
case 8: temp = bld.tmp(v4); /* fallthrough */
|
|
||||||
case 4: opcode = aco_opcode::buffer_store_dwordx4; break;
|
|
||||||
default: {
|
|
||||||
Instruction* split{create_instruction<Pseudo_instruction>(aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())};
|
Instruction* split{create_instruction<Pseudo_instruction>(aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())};
|
||||||
split->operands[0] = Operand(temp);
|
split->operands[0] = Operand(temp);
|
||||||
for (unsigned i = 0; i < temp.size(); i++)
|
for (unsigned i = 0; i < temp.size(); i++)
|
||||||
split->definitions[i] = bld.def(v1);
|
split->definitions[i] = bld.def(v1);
|
||||||
bld.insert(split);
|
bld.insert(split);
|
||||||
opcode = aco_opcode::buffer_store_dword;
|
|
||||||
for (unsigned i = 0; i < temp.size(); i++)
|
for (unsigned i = 0; i < temp.size(); i++)
|
||||||
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
|
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
|
||||||
continue;
|
} else {
|
||||||
|
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if ((*it)->operands[0].size() > 4) {
|
|
||||||
Temp temp2 = bld.pseudo(aco_opcode::p_split_vector, bld.def(temp.regClass()), Definition(temp), (*it)->operands[0]);
|
|
||||||
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp2, offset, false);
|
|
||||||
offset += temp.size() * 4;
|
|
||||||
}
|
|
||||||
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
|
|
||||||
|
|
||||||
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
||||||
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
|
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
|
||||||
|
|
||||||
@@ -1615,12 +1599,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
|||||||
if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
|
if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
|
||||||
/* reload vgpr */
|
/* reload vgpr */
|
||||||
uint32_t spill_slot = vgpr_slot[spill_id];
|
uint32_t spill_slot = vgpr_slot[spill_id];
|
||||||
bool add_offset = ctx.program->config->scratch_bytes_per_wave + vgpr_spill_slots * 4 > 4096;
|
bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096;
|
||||||
unsigned base_offset = add_offset ? 0 : ctx.program->config->scratch_bytes_per_wave;
|
unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||||
|
|
||||||
/* check if the scratch resource descriptor already exists */
|
/* check if the scratch resource descriptor already exists */
|
||||||
if (scratch_rsrc == Temp()) {
|
if (scratch_rsrc == Temp()) {
|
||||||
unsigned offset = ctx.program->config->scratch_bytes_per_wave - base_offset;
|
unsigned offset = add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0;
|
||||||
scratch_rsrc = load_scratch_resource(ctx, scratch_offset,
|
scratch_rsrc = load_scratch_resource(ctx, scratch_offset,
|
||||||
last_top_level_block_idx == block.index ?
|
last_top_level_block_idx == block.index ?
|
||||||
instructions : ctx.program->blocks[last_top_level_block_idx].instructions,
|
instructions : ctx.program->blocks[last_top_level_block_idx].instructions,
|
||||||
@@ -1629,35 +1613,20 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned offset = base_offset + spill_slot * 4;
|
unsigned offset = base_offset + spill_slot * 4;
|
||||||
aco_opcode opcode;
|
aco_opcode opcode = aco_opcode::buffer_load_dword;
|
||||||
Definition def = (*it)->definitions[0];
|
Definition def = (*it)->definitions[0];
|
||||||
switch (def.size()) {
|
if (def.size() > 1) {
|
||||||
case 1: opcode = aco_opcode::buffer_load_dword; break;
|
|
||||||
case 2: opcode = aco_opcode::buffer_load_dwordx2; break;
|
|
||||||
case 6: def = bld.def(v3); /* fallthrough */
|
|
||||||
case 3: opcode = aco_opcode::buffer_load_dwordx3; break;
|
|
||||||
case 8: def = bld.def(v4); /* fallthrough */
|
|
||||||
case 4: opcode = aco_opcode::buffer_load_dwordx4; break;
|
|
||||||
default: {
|
|
||||||
Instruction* vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
|
Instruction* vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
|
||||||
vec->definitions[0] = def;
|
vec->definitions[0] = def;
|
||||||
opcode = aco_opcode::buffer_load_dword;
|
|
||||||
for (unsigned i = 0; i < def.size(); i++) {
|
for (unsigned i = 0; i < def.size(); i++) {
|
||||||
Temp tmp = bld.tmp(v1);
|
Temp tmp = bld.tmp(v1);
|
||||||
vec->operands[i] = Operand(tmp);
|
vec->operands[i] = Operand(tmp);
|
||||||
bld.mubuf(opcode, Definition(tmp), Operand(), scratch_rsrc, scratch_offset, offset + i * 4, false);
|
bld.mubuf(opcode, Definition(tmp), Operand(), scratch_rsrc, scratch_offset, offset + i * 4, false);
|
||||||
}
|
}
|
||||||
bld.insert(vec);
|
bld.insert(vec);
|
||||||
continue;
|
} else {
|
||||||
|
bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
|
|
||||||
if ((*it)->definitions[0].size() > 4) {
|
|
||||||
Temp temp2 = bld.mubuf(opcode, bld.def(def.regClass()), Operand(), scratch_rsrc, scratch_offset, offset + def.size() * 4, false);
|
|
||||||
bld.pseudo(aco_opcode::p_create_vector, (*it)->definitions[0], def.getTemp(), temp2);
|
|
||||||
}
|
|
||||||
|
|
||||||
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
||||||
uint32_t spill_slot = sgpr_slot[spill_id];
|
uint32_t spill_slot = sgpr_slot[spill_id];
|
||||||
reload_in_loop[spill_slot / 64] = block.loop_nest_depth > 0;
|
reload_in_loop[spill_slot / 64] = block.loop_nest_depth > 0;
|
||||||
|
@@ -25,6 +25,7 @@
|
|||||||
* IN THE SOFTWARE.
|
* IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dirent.h"
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <linux/audit.h>
|
#include <linux/audit.h>
|
||||||
@@ -47,7 +48,6 @@
|
|||||||
#include "radv_shader.h"
|
#include "radv_shader.h"
|
||||||
#include "radv_cs.h"
|
#include "radv_cs.h"
|
||||||
#include "util/disk_cache.h"
|
#include "util/disk_cache.h"
|
||||||
#include "util/strtod.h"
|
|
||||||
#include "vk_util.h"
|
#include "vk_util.h"
|
||||||
#include <xf86drm.h>
|
#include <xf86drm.h>
|
||||||
#include <amdgpu.h>
|
#include <amdgpu.h>
|
||||||
@@ -682,7 +682,6 @@ VkResult radv_CreateInstance(
|
|||||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||||
instance->engineVersion = engine_version;
|
instance->engineVersion = engine_version;
|
||||||
|
|
||||||
_mesa_locale_init();
|
|
||||||
glsl_type_singleton_init_or_ref();
|
glsl_type_singleton_init_or_ref();
|
||||||
|
|
||||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||||
@@ -713,7 +712,6 @@ void radv_DestroyInstance(
|
|||||||
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
||||||
|
|
||||||
glsl_type_singleton_decref();
|
glsl_type_singleton_decref();
|
||||||
_mesa_locale_fini();
|
|
||||||
|
|
||||||
driDestroyOptionCache(&instance->dri_options);
|
driDestroyOptionCache(&instance->dri_options);
|
||||||
driDestroyOptionInfo(&instance->available_dri_options);
|
driDestroyOptionInfo(&instance->available_dri_options);
|
||||||
@@ -2069,25 +2067,61 @@ bool radv_sc_read(int fd, void *buf, size_t size, bool timeout)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool radv_close_all_fds(const int *keep_fds, int keep_fd_count)
|
||||||
|
{
|
||||||
|
DIR *d;
|
||||||
|
struct dirent *dir;
|
||||||
|
d = opendir("/proc/self/fd");
|
||||||
|
if (!d)
|
||||||
|
return false;
|
||||||
|
int dir_fd = dirfd(d);
|
||||||
|
|
||||||
|
while ((dir = readdir(d)) != NULL) {
|
||||||
|
if (dir->d_name[0] == '.')
|
||||||
|
continue;
|
||||||
|
|
||||||
|
int fd = atoi(dir->d_name);
|
||||||
|
if (fd == dir_fd)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
bool keep = false;
|
||||||
|
for (int i = 0; !keep && i < keep_fd_count; ++i)
|
||||||
|
if (keep_fds[i] == fd)
|
||||||
|
keep = true;
|
||||||
|
|
||||||
|
if (keep)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
closedir(d);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
static void run_secure_compile_device(struct radv_device *device, unsigned process,
|
static void run_secure_compile_device(struct radv_device *device, unsigned process,
|
||||||
int *fd_secure_input, int *fd_secure_output)
|
int fd_secure_input, int fd_secure_output)
|
||||||
{
|
{
|
||||||
enum radv_secure_compile_type sc_type;
|
enum radv_secure_compile_type sc_type;
|
||||||
if (install_seccomp_filter() == -1) {
|
|
||||||
|
const int needed_fds[] = {
|
||||||
|
fd_secure_input,
|
||||||
|
fd_secure_output,
|
||||||
|
};
|
||||||
|
if (!radv_close_all_fds(needed_fds, ARRAY_SIZE(needed_fds)) || install_seccomp_filter() == -1) {
|
||||||
sc_type = RADV_SC_TYPE_INIT_FAILURE;
|
sc_type = RADV_SC_TYPE_INIT_FAILURE;
|
||||||
} else {
|
} else {
|
||||||
sc_type = RADV_SC_TYPE_INIT_SUCCESS;
|
sc_type = RADV_SC_TYPE_INIT_SUCCESS;
|
||||||
device->sc_state->secure_compile_processes[process].fd_secure_input = fd_secure_input[0];
|
device->sc_state->secure_compile_processes[process].fd_secure_input = fd_secure_input;
|
||||||
device->sc_state->secure_compile_processes[process].fd_secure_output = fd_secure_output[1];
|
device->sc_state->secure_compile_processes[process].fd_secure_output = fd_secure_output;
|
||||||
}
|
}
|
||||||
|
|
||||||
write(fd_secure_output[1], &sc_type, sizeof(sc_type));
|
write(fd_secure_output, &sc_type, sizeof(sc_type));
|
||||||
|
|
||||||
if (sc_type == RADV_SC_TYPE_INIT_FAILURE)
|
if (sc_type == RADV_SC_TYPE_INIT_FAILURE)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
radv_sc_read(fd_secure_input[0], &sc_type, sizeof(sc_type), false);
|
radv_sc_read(fd_secure_input, &sc_type, sizeof(sc_type), false);
|
||||||
|
|
||||||
if (sc_type == RADV_SC_TYPE_COMPILE_PIPELINE) {
|
if (sc_type == RADV_SC_TYPE_COMPILE_PIPELINE) {
|
||||||
struct radv_pipeline *pipeline;
|
struct radv_pipeline *pipeline;
|
||||||
@@ -2100,20 +2134,20 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
|||||||
|
|
||||||
/* Read pipeline layout */
|
/* Read pipeline layout */
|
||||||
struct radv_pipeline_layout layout;
|
struct radv_pipeline_layout layout;
|
||||||
sc_read = radv_sc_read(fd_secure_input[0], &layout, sizeof(struct radv_pipeline_layout), true);
|
sc_read = radv_sc_read(fd_secure_input, &layout, sizeof(struct radv_pipeline_layout), true);
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &layout.num_sets, sizeof(uint32_t), true);
|
sc_read &= radv_sc_read(fd_secure_input, &layout.num_sets, sizeof(uint32_t), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
for (uint32_t set = 0; set < layout.num_sets; set++) {
|
for (uint32_t set = 0; set < layout.num_sets; set++) {
|
||||||
uint32_t layout_size;
|
uint32_t layout_size;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &layout_size, sizeof(uint32_t), true);
|
sc_read &= radv_sc_read(fd_secure_input, &layout_size, sizeof(uint32_t), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
layout.set[set].layout = malloc(layout_size);
|
layout.set[set].layout = malloc(layout_size);
|
||||||
layout.set[set].layout->layout_size = layout_size;
|
layout.set[set].layout->layout_size = layout_size;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], layout.set[set].layout,
|
sc_read &= radv_sc_read(fd_secure_input, layout.set[set].layout,
|
||||||
layout.set[set].layout->layout_size, true);
|
layout.set[set].layout->layout_size, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2121,16 +2155,16 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
|||||||
|
|
||||||
/* Read pipeline key */
|
/* Read pipeline key */
|
||||||
struct radv_pipeline_key key;
|
struct radv_pipeline_key key;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &key, sizeof(struct radv_pipeline_key), true);
|
sc_read &= radv_sc_read(fd_secure_input, &key, sizeof(struct radv_pipeline_key), true);
|
||||||
|
|
||||||
/* Read pipeline create flags */
|
/* Read pipeline create flags */
|
||||||
VkPipelineCreateFlags flags;
|
VkPipelineCreateFlags flags;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &flags, sizeof(VkPipelineCreateFlags), true);
|
sc_read &= radv_sc_read(fd_secure_input, &flags, sizeof(VkPipelineCreateFlags), true);
|
||||||
|
|
||||||
/* Read stage and shader information */
|
/* Read stage and shader information */
|
||||||
uint32_t num_stages;
|
uint32_t num_stages;
|
||||||
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
|
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &num_stages, sizeof(uint32_t), true);
|
sc_read &= radv_sc_read(fd_secure_input, &num_stages, sizeof(uint32_t), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
@@ -2138,33 +2172,33 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
|||||||
|
|
||||||
/* Read stage */
|
/* Read stage */
|
||||||
gl_shader_stage stage;
|
gl_shader_stage stage;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &stage, sizeof(gl_shader_stage), true);
|
sc_read &= radv_sc_read(fd_secure_input, &stage, sizeof(gl_shader_stage), true);
|
||||||
|
|
||||||
VkPipelineShaderStageCreateInfo *pStage = calloc(1, sizeof(VkPipelineShaderStageCreateInfo));
|
VkPipelineShaderStageCreateInfo *pStage = calloc(1, sizeof(VkPipelineShaderStageCreateInfo));
|
||||||
|
|
||||||
/* Read entry point name */
|
/* Read entry point name */
|
||||||
size_t name_size;
|
size_t name_size;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &name_size, sizeof(size_t), true);
|
sc_read &= radv_sc_read(fd_secure_input, &name_size, sizeof(size_t), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
char *ep_name = malloc(name_size);
|
char *ep_name = malloc(name_size);
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], ep_name, name_size, true);
|
sc_read &= radv_sc_read(fd_secure_input, ep_name, name_size, true);
|
||||||
pStage->pName = ep_name;
|
pStage->pName = ep_name;
|
||||||
|
|
||||||
/* Read shader module */
|
/* Read shader module */
|
||||||
size_t module_size;
|
size_t module_size;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &module_size, sizeof(size_t), true);
|
sc_read &= radv_sc_read(fd_secure_input, &module_size, sizeof(size_t), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
struct radv_shader_module *module = malloc(module_size);
|
struct radv_shader_module *module = malloc(module_size);
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], module, module_size, true);
|
sc_read &= radv_sc_read(fd_secure_input, module, module_size, true);
|
||||||
pStage->module = radv_shader_module_to_handle(module);
|
pStage->module = radv_shader_module_to_handle(module);
|
||||||
|
|
||||||
/* Read specialization info */
|
/* Read specialization info */
|
||||||
bool has_spec_info;
|
bool has_spec_info;
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &has_spec_info, sizeof(bool), true);
|
sc_read &= radv_sc_read(fd_secure_input, &has_spec_info, sizeof(bool), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
@@ -2172,21 +2206,21 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
|||||||
VkSpecializationInfo *specInfo = malloc(sizeof(VkSpecializationInfo));
|
VkSpecializationInfo *specInfo = malloc(sizeof(VkSpecializationInfo));
|
||||||
pStage->pSpecializationInfo = specInfo;
|
pStage->pSpecializationInfo = specInfo;
|
||||||
|
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &specInfo->dataSize, sizeof(size_t), true);
|
sc_read &= radv_sc_read(fd_secure_input, &specInfo->dataSize, sizeof(size_t), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
void *si_data = malloc(specInfo->dataSize);
|
void *si_data = malloc(specInfo->dataSize);
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], si_data, specInfo->dataSize, true);
|
sc_read &= radv_sc_read(fd_secure_input, si_data, specInfo->dataSize, true);
|
||||||
specInfo->pData = si_data;
|
specInfo->pData = si_data;
|
||||||
|
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &specInfo->mapEntryCount, sizeof(uint32_t), true);
|
sc_read &= radv_sc_read(fd_secure_input, &specInfo->mapEntryCount, sizeof(uint32_t), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
|
|
||||||
VkSpecializationMapEntry *mapEntries = malloc(sizeof(VkSpecializationMapEntry) * specInfo->mapEntryCount);
|
VkSpecializationMapEntry *mapEntries = malloc(sizeof(VkSpecializationMapEntry) * specInfo->mapEntryCount);
|
||||||
for (uint32_t j = 0; j < specInfo->mapEntryCount; j++) {
|
for (uint32_t j = 0; j < specInfo->mapEntryCount; j++) {
|
||||||
sc_read &= radv_sc_read(fd_secure_input[0], &mapEntries[j], sizeof(VkSpecializationMapEntry), true);
|
sc_read &= radv_sc_read(fd_secure_input, &mapEntries[j], sizeof(VkSpecializationMapEntry), true);
|
||||||
if (!sc_read)
|
if (!sc_read)
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
}
|
}
|
||||||
@@ -2222,7 +2256,7 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
|||||||
vk_free(&device->alloc, pipeline);
|
vk_free(&device->alloc, pipeline);
|
||||||
|
|
||||||
sc_type = RADV_SC_TYPE_COMPILE_PIPELINE_FINISHED;
|
sc_type = RADV_SC_TYPE_COMPILE_PIPELINE_FINISHED;
|
||||||
write(fd_secure_output[1], &sc_type, sizeof(sc_type));
|
write(fd_secure_output, &sc_type, sizeof(sc_type));
|
||||||
|
|
||||||
} else if (sc_type == RADV_SC_TYPE_DESTROY_DEVICE) {
|
} else if (sc_type == RADV_SC_TYPE_DESTROY_DEVICE) {
|
||||||
goto secure_compile_exit;
|
goto secure_compile_exit;
|
||||||
@@ -2230,10 +2264,8 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
|||||||
}
|
}
|
||||||
|
|
||||||
secure_compile_exit:
|
secure_compile_exit:
|
||||||
close(fd_secure_input[1]);
|
close(fd_secure_input);
|
||||||
close(fd_secure_input[0]);
|
close(fd_secure_output);
|
||||||
close(fd_secure_output[1]);
|
|
||||||
close(fd_secure_output[0]);
|
|
||||||
_exit(0);
|
_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2278,7 +2310,7 @@ static VkResult fork_secure_compile_device(struct radv_device *device)
|
|||||||
for (unsigned process = 0; process < sc_threads; process++) {
|
for (unsigned process = 0; process < sc_threads; process++) {
|
||||||
if ((device->sc_state->secure_compile_processes[process].sc_pid = fork()) == 0) {
|
if ((device->sc_state->secure_compile_processes[process].sc_pid = fork()) == 0) {
|
||||||
device->sc_state->secure_compile_thread_counter = process;
|
device->sc_state->secure_compile_thread_counter = process;
|
||||||
run_secure_compile_device(device, process, fd_secure_input[process], fd_secure_output[process]);
|
run_secure_compile_device(device, process, fd_secure_input[process][0], fd_secure_output[process][1]);
|
||||||
} else {
|
} else {
|
||||||
if (device->sc_state->secure_compile_processes[process].sc_pid == -1)
|
if (device->sc_state->secure_compile_processes[process].sc_pid == -1)
|
||||||
return VK_ERROR_INITIALIZATION_FAILED;
|
return VK_ERROR_INITIALIZATION_FAILED;
|
||||||
|
@@ -4646,10 +4646,10 @@ radv_secure_compile(struct radv_pipeline *pipeline,
|
|||||||
|
|
||||||
/* Do an early exit if all cache entries are already there. */
|
/* Do an early exit if all cache entries are already there. */
|
||||||
bool may_need_copy_shader = pStages[MESA_SHADER_GEOMETRY];
|
bool may_need_copy_shader = pStages[MESA_SHADER_GEOMETRY];
|
||||||
void *main_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[0], 20);
|
void *main_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[0], NULL);
|
||||||
void *copy_entry = NULL;
|
void *copy_entry = NULL;
|
||||||
if (may_need_copy_shader)
|
if (may_need_copy_shader)
|
||||||
copy_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[1], 20);
|
copy_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[1], NULL);
|
||||||
|
|
||||||
bool has_all_cache_entries = main_entry && (!may_need_copy_shader || copy_entry);
|
bool has_all_cache_entries = main_entry && (!may_need_copy_shader || copy_entry);
|
||||||
free(main_entry);
|
free(main_entry);
|
||||||
@@ -5065,6 +5065,19 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
|
|||||||
assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
|
assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static struct radv_pipeline_key
|
||||||
|
radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline,
|
||||||
|
const VkComputePipelineCreateInfo *pCreateInfo)
|
||||||
|
{
|
||||||
|
struct radv_pipeline_key key;
|
||||||
|
memset(&key, 0, sizeof(key));
|
||||||
|
|
||||||
|
if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
|
||||||
|
key.optimisations_disabled = 1;
|
||||||
|
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
static VkResult radv_compute_pipeline_create(
|
static VkResult radv_compute_pipeline_create(
|
||||||
VkDevice _device,
|
VkDevice _device,
|
||||||
VkPipelineCache _cache,
|
VkPipelineCache _cache,
|
||||||
@@ -5098,13 +5111,16 @@ static VkResult radv_compute_pipeline_create(
|
|||||||
|
|
||||||
pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
|
pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
|
||||||
|
|
||||||
|
struct radv_pipeline_key key =
|
||||||
|
radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
|
||||||
|
|
||||||
if (radv_device_use_secure_compile(device->instance)) {
|
if (radv_device_use_secure_compile(device->instance)) {
|
||||||
result = radv_secure_compile(pipeline, device, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, 1);
|
result = radv_secure_compile(pipeline, device, &key, pStages, pCreateInfo->flags, 1);
|
||||||
*pPipeline = radv_pipeline_to_handle(pipeline);
|
*pPipeline = radv_pipeline_to_handle(pipeline);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
} else {
|
} else {
|
||||||
radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
|
radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
|
||||||
}
|
}
|
||||||
|
|
||||||
pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
|
pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
|
||||||
|
@@ -1582,7 +1582,7 @@ static bool radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws, const uint32_t *
|
|||||||
&tmp);
|
&tmp);
|
||||||
if (ret == 0) {
|
if (ret == 0) {
|
||||||
return true;
|
return true;
|
||||||
} else if (ret == -1 && errno == ETIME) {
|
} else if (ret == -ETIME) {
|
||||||
return false;
|
return false;
|
||||||
} else {
|
} else {
|
||||||
fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
|
fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
|
||||||
|
@@ -301,8 +301,8 @@ class Variable(Value):
|
|||||||
# constant. If we want to support names that have numeric or
|
# constant. If we want to support names that have numeric or
|
||||||
# punctuation characters, we can me the first assertion more flexible.
|
# punctuation characters, we can me the first assertion more flexible.
|
||||||
assert self.var_name.isalpha()
|
assert self.var_name.isalpha()
|
||||||
assert self.var_name is not 'True'
|
assert self.var_name != 'True'
|
||||||
assert self.var_name is not 'False'
|
assert self.var_name != 'False'
|
||||||
|
|
||||||
self.is_constant = m.group('const') is not None
|
self.is_constant = m.group('const') is not None
|
||||||
self.cond = m.group('cond')
|
self.cond = m.group('cond')
|
||||||
|
@@ -5152,7 +5152,8 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Set shader info defaults */
|
/* Set shader info defaults */
|
||||||
b->shader->info.gs.invocations = 1;
|
if (stage == MESA_SHADER_GEOMETRY)
|
||||||
|
b->shader->info.gs.invocations = 1;
|
||||||
|
|
||||||
/* Parse rounding mode execution modes. This has to happen earlier than
|
/* Parse rounding mode execution modes. This has to happen earlier than
|
||||||
* other changes in the execution modes since they can affect, for example,
|
* other changes in the execution modes since they can affect, for example,
|
||||||
|
@@ -138,15 +138,6 @@ _eglNativePlatformDetectNativeDisplay(void *nativeDisplay)
|
|||||||
if (first_pointer == gbm_create_device)
|
if (first_pointer == gbm_create_device)
|
||||||
return _EGL_PLATFORM_DRM;
|
return _EGL_PLATFORM_DRM;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_X11_PLATFORM
|
|
||||||
/* If not matched to any other platform, fallback to x11. */
|
|
||||||
return _EGL_PLATFORM_X11;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_HAIKU_PLATFORM
|
|
||||||
return _EGL_PLATFORM_HAIKU;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return _EGL_INVALID_PLATFORM;
|
return _EGL_INVALID_PLATFORM;
|
||||||
|
@@ -1,4 +1,4 @@
|
|||||||
# Copyright © 2017 Intel Corporation
|
# Copyright © 2017-2019 Intel Corporation
|
||||||
|
|
||||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
# of this software and associated documentation files (the "Software"), to deal
|
# of this software and associated documentation files (the "Software"), to deal
|
||||||
@@ -149,6 +149,7 @@ if not with_glvnd
|
|||||||
else
|
else
|
||||||
egl_lib_name = 'EGL_mesa'
|
egl_lib_name = 'EGL_mesa'
|
||||||
egl_lib_version = '0.0.0'
|
egl_lib_version = '0.0.0'
|
||||||
|
deps_for_egl += dep_glvnd
|
||||||
files_egl += [g_egldispatchstubs_h, g_egldispatchstubs_c]
|
files_egl += [g_egldispatchstubs_h, g_egldispatchstubs_c]
|
||||||
files_egl += files('main/eglglvnd.c', 'main/egldispatchstubs.c')
|
files_egl += files('main/eglglvnd.c', 'main/egldispatchstubs.c')
|
||||||
install_data(
|
install_data(
|
||||||
|
@@ -39,7 +39,6 @@
|
|||||||
#include "compiler/glsl_types.h"
|
#include "compiler/glsl_types.h"
|
||||||
#include "util/debug.h"
|
#include "util/debug.h"
|
||||||
#include "util/disk_cache.h"
|
#include "util/disk_cache.h"
|
||||||
#include "util/strtod.h"
|
|
||||||
#include "vk_format.h"
|
#include "vk_format.h"
|
||||||
#include "vk_util.h"
|
#include "vk_util.h"
|
||||||
|
|
||||||
@@ -431,7 +430,6 @@ tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
|
|||||||
return vk_error(instance, result);
|
return vk_error(instance, result);
|
||||||
}
|
}
|
||||||
|
|
||||||
_mesa_locale_init();
|
|
||||||
glsl_type_singleton_init_or_ref();
|
glsl_type_singleton_init_or_ref();
|
||||||
|
|
||||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||||
@@ -457,7 +455,6 @@ tu_DestroyInstance(VkInstance _instance,
|
|||||||
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
||||||
|
|
||||||
glsl_type_singleton_decref();
|
glsl_type_singleton_decref();
|
||||||
_mesa_locale_fini();
|
|
||||||
|
|
||||||
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
||||||
|
|
||||||
|
@@ -470,10 +470,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
|||||||
case PIPE_SHADER_FRAGMENT:
|
case PIPE_SHADER_FRAGMENT:
|
||||||
case PIPE_SHADER_VERTEX:
|
case PIPE_SHADER_VERTEX:
|
||||||
break;
|
break;
|
||||||
case PIPE_SHADER_GEOMETRY:
|
|
||||||
if (is_a6xx(screen))
|
|
||||||
break;
|
|
||||||
return 0;
|
|
||||||
case PIPE_SHADER_COMPUTE:
|
case PIPE_SHADER_COMPUTE:
|
||||||
if (has_compute(screen))
|
if (has_compute(screen))
|
||||||
break;
|
break;
|
||||||
|
@@ -136,6 +136,7 @@ enum {
|
|||||||
#define IRIS_DIRTY_VF_STATISTICS (1ull << 57)
|
#define IRIS_DIRTY_VF_STATISTICS (1ull << 57)
|
||||||
#define IRIS_DIRTY_PMA_FIX (1ull << 58)
|
#define IRIS_DIRTY_PMA_FIX (1ull << 58)
|
||||||
#define IRIS_DIRTY_DEPTH_BOUNDS (1ull << 59)
|
#define IRIS_DIRTY_DEPTH_BOUNDS (1ull << 59)
|
||||||
|
#define IRIS_DIRTY_RENDER_BUFFER (1ull << 60)
|
||||||
|
|
||||||
#define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_CS | \
|
#define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_CS | \
|
||||||
IRIS_DIRTY_SAMPLER_STATES_CS | \
|
IRIS_DIRTY_SAMPLER_STATES_CS | \
|
||||||
@@ -151,7 +152,8 @@ enum {
|
|||||||
IRIS_DIRTY_BINDINGS_TES | \
|
IRIS_DIRTY_BINDINGS_TES | \
|
||||||
IRIS_DIRTY_BINDINGS_GS | \
|
IRIS_DIRTY_BINDINGS_GS | \
|
||||||
IRIS_DIRTY_BINDINGS_FS | \
|
IRIS_DIRTY_BINDINGS_FS | \
|
||||||
IRIS_DIRTY_BINDINGS_CS)
|
IRIS_DIRTY_BINDINGS_CS | \
|
||||||
|
IRIS_DIRTY_RENDER_BUFFER)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Non-orthogonal state (NOS) dependency flags.
|
* Non-orthogonal state (NOS) dependency flags.
|
||||||
|
@@ -3023,31 +3023,14 @@ iris_set_framebuffer_state(struct pipe_context *ctx,
|
|||||||
/* Render target change */
|
/* Render target change */
|
||||||
ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS;
|
ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS;
|
||||||
|
|
||||||
|
ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
|
||||||
|
|
||||||
ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
|
ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
|
||||||
|
|
||||||
ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_FRAMEBUFFER];
|
ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_FRAMEBUFFER];
|
||||||
|
|
||||||
if (GEN_GEN == 8)
|
if (GEN_GEN == 8)
|
||||||
ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
|
ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
|
||||||
|
|
||||||
#if GEN_GEN == 11
|
|
||||||
// XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?)
|
|
||||||
// XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6
|
|
||||||
|
|
||||||
/* The PIPE_CONTROL command description says:
|
|
||||||
*
|
|
||||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
|
||||||
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
|
|
||||||
* Target Cache Flush by enabling this bit. When render target flush
|
|
||||||
* is set due to new association of BTI, PS Scoreboard Stall bit must
|
|
||||||
* be set in this packet."
|
|
||||||
*/
|
|
||||||
// XXX: does this need to happen at 3DSTATE_BTP_PS time?
|
|
||||||
iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
|
|
||||||
"workaround: RT BTI change [draw]",
|
|
||||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
|
||||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -5297,6 +5280,24 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (GEN_GEN >= 11 && (dirty & IRIS_DIRTY_RENDER_BUFFER)) {
|
||||||
|
// XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?)
|
||||||
|
// XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6
|
||||||
|
|
||||||
|
/* The PIPE_CONTROL command description says:
|
||||||
|
*
|
||||||
|
* "Whenever a Binding Table Index (BTI) used by a Render Target
|
||||||
|
* Message points to a different RENDER_SURFACE_STATE, SW must issue a
|
||||||
|
* Render Target Cache Flush by enabling this bit. When render target
|
||||||
|
* flush is set due to new association of BTI, PS Scoreboard Stall bit
|
||||||
|
* must be set in this packet."
|
||||||
|
*/
|
||||||
|
// XXX: does this need to happen at 3DSTATE_BTP_PS time?
|
||||||
|
iris_emit_pipe_control_flush(batch, "workaround: RT BTI change [draw]",
|
||||||
|
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||||
|
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||||
|
}
|
||||||
|
|
||||||
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
|
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
|
||||||
if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
|
if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
|
||||||
iris_populate_binding_table(ice, batch, stage, false);
|
iris_populate_binding_table(ice, batch, stage, false);
|
||||||
@@ -5508,7 +5509,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
|||||||
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
|
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
|
||||||
cl.NonPerspectiveBarycentricEnable = true;
|
cl.NonPerspectiveBarycentricEnable = true;
|
||||||
|
|
||||||
cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0;
|
cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1;
|
||||||
cl.MaximumVPIndex = ice->state.num_viewports - 1;
|
cl.MaximumVPIndex = ice->state.num_viewports - 1;
|
||||||
}
|
}
|
||||||
iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
|
iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
|
||||||
|
@@ -122,6 +122,8 @@ private:
|
|||||||
void emitSAM();
|
void emitSAM();
|
||||||
void emitRAM();
|
void emitRAM();
|
||||||
|
|
||||||
|
void emitPSETP();
|
||||||
|
|
||||||
void emitMOV();
|
void emitMOV();
|
||||||
void emitS2R();
|
void emitS2R();
|
||||||
void emitCS2R();
|
void emitCS2R();
|
||||||
@@ -690,6 +692,31 @@ CodeEmitterGM107::emitRAM()
|
|||||||
* predicate/cc
|
* predicate/cc
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
|
||||||
|
void
|
||||||
|
CodeEmitterGM107::emitPSETP()
|
||||||
|
{
|
||||||
|
|
||||||
|
emitInsn(0x50900000);
|
||||||
|
|
||||||
|
switch (insn->op) {
|
||||||
|
case OP_AND: emitField(0x18, 3, 0); break;
|
||||||
|
case OP_OR: emitField(0x18, 3, 1); break;
|
||||||
|
case OP_XOR: emitField(0x18, 3, 2); break;
|
||||||
|
default:
|
||||||
|
assert(!"unexpected operation");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// emitINV (0x2a);
|
||||||
|
emitPRED(0x27); // TODO: support 3-arg
|
||||||
|
emitINV (0x20, insn->src(1));
|
||||||
|
emitPRED(0x1d, insn->src(1));
|
||||||
|
emitINV (0x0f, insn->src(0));
|
||||||
|
emitPRED(0x0c, insn->src(0));
|
||||||
|
emitPRED(0x03, insn->def(0));
|
||||||
|
emitPRED(0x00);
|
||||||
|
}
|
||||||
|
|
||||||
/*******************************************************************************
|
/*******************************************************************************
|
||||||
* movement / conversion
|
* movement / conversion
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
@@ -3557,7 +3584,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
|
|||||||
case OP_AND:
|
case OP_AND:
|
||||||
case OP_OR:
|
case OP_OR:
|
||||||
case OP_XOR:
|
case OP_XOR:
|
||||||
emitLOP();
|
switch (insn->def(0).getFile()) {
|
||||||
|
case FILE_GPR: emitLOP(); break;
|
||||||
|
case FILE_PREDICATE: emitPSETP(); break;
|
||||||
|
default:
|
||||||
|
assert(!"invalid bool op");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case OP_NOT:
|
case OP_NOT:
|
||||||
emitNOT();
|
emitNOT();
|
||||||
|
@@ -1591,6 +1591,12 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
|||||||
if (insn.getOpcode() == TGSI_OPCODE_STORE &&
|
if (insn.getOpcode() == TGSI_OPCODE_STORE &&
|
||||||
dst.getFile() != TGSI_FILE_MEMORY) {
|
dst.getFile() != TGSI_FILE_MEMORY) {
|
||||||
info->io.globalAccess |= 0x2;
|
info->io.globalAccess |= 0x2;
|
||||||
|
|
||||||
|
if (dst.getFile() == TGSI_FILE_INPUT) {
|
||||||
|
// TODO: Handle indirect somehow?
|
||||||
|
const int i = dst.getIndex(0);
|
||||||
|
info->in[i].mask |= 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dst.getFile() == TGSI_FILE_OUTPUT) {
|
if (dst.getFile() == TGSI_FILE_OUTPUT) {
|
||||||
|
@@ -1802,6 +1802,9 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
|
|||||||
{
|
{
|
||||||
uint32_t base = slot * NVC0_SU_INFO__STRIDE;
|
uint32_t base = slot * NVC0_SU_INFO__STRIDE;
|
||||||
|
|
||||||
|
// We don't upload surface info for bindless for GM107+
|
||||||
|
assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET);
|
||||||
|
|
||||||
if (ptr) {
|
if (ptr) {
|
||||||
ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
|
ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
|
||||||
if (bindless)
|
if (bindless)
|
||||||
@@ -2204,7 +2207,7 @@ getDestType(const ImgType type) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
|
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded)
|
||||||
{
|
{
|
||||||
const TexInstruction::ImgFormatDesc *format = su->tex.format;
|
const TexInstruction::ImgFormatDesc *format = su->tex.format;
|
||||||
int width = format->bits[0] + format->bits[1] +
|
int width = format->bits[0] + format->bits[1] +
|
||||||
@@ -2223,21 +2226,38 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
|
|||||||
if (width < 32)
|
if (width < 32)
|
||||||
untypedDst[0] = bld.getSSA();
|
untypedDst[0] = bld.getSSA();
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++) {
|
if (loaded && loaded[0]) {
|
||||||
typedDst[i] = su->getDef(i);
|
for (int i = 0; i < 4; i++) {
|
||||||
|
if (loaded[i])
|
||||||
|
typedDst[i] = loaded[i]->getDef(0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
typedDst[i] = su->getDef(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the untyped dsts as the su's destinations
|
// Set the untyped dsts as the su's destinations
|
||||||
for (int i = 0; i < 4; i++)
|
if (loaded && loaded[0]) {
|
||||||
su->setDef(i, untypedDst[i]);
|
for (int i = 0; i < 4; i++)
|
||||||
|
if (loaded[i])
|
||||||
|
loaded[i]->setDef(0, untypedDst[i]);
|
||||||
|
} else {
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
su->setDef(i, untypedDst[i]);
|
||||||
|
|
||||||
bld.setPosition(su, true);
|
bld.setPosition(su, true);
|
||||||
|
}
|
||||||
|
|
||||||
// Unpack each component into the typed dsts
|
// Unpack each component into the typed dsts
|
||||||
int bits = 0;
|
int bits = 0;
|
||||||
for (int i = 0; i < 4; bits += format->bits[i], i++) {
|
for (int i = 0; i < 4; bits += format->bits[i], i++) {
|
||||||
if (!typedDst[i])
|
if (!typedDst[i])
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if (loaded && loaded[0])
|
||||||
|
bld.setPosition(loaded[i], true);
|
||||||
|
|
||||||
if (i >= format->components) {
|
if (i >= format->components) {
|
||||||
if (format->type == FLOAT ||
|
if (format->type == FLOAT ||
|
||||||
format->type == UNORM ||
|
format->type == UNORM ||
|
||||||
@@ -2308,7 +2328,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
|
|||||||
processSurfaceCoordsNVE4(su);
|
processSurfaceCoordsNVE4(su);
|
||||||
|
|
||||||
if (su->op == OP_SULDP) {
|
if (su->op == OP_SULDP) {
|
||||||
convertSurfaceFormat(su);
|
convertSurfaceFormat(su, NULL);
|
||||||
insertOOBSurfaceOpResult(su);
|
insertOOBSurfaceOpResult(su);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2421,7 +2441,7 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
|
|||||||
processSurfaceCoordsNVC0(su);
|
processSurfaceCoordsNVC0(su);
|
||||||
|
|
||||||
if (su->op == OP_SULDP) {
|
if (su->op == OP_SULDP) {
|
||||||
convertSurfaceFormat(su);
|
convertSurfaceFormat(su, NULL);
|
||||||
insertOOBSurfaceOpResult(su);
|
insertOOBSurfaceOpResult(su);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2463,14 +2483,16 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
TexInstruction *
|
||||||
NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
|
NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su, Instruction *ret[4])
|
||||||
{
|
{
|
||||||
const int slot = su->tex.r;
|
const int slot = su->tex.r;
|
||||||
const int dim = su->tex.target.getDim();
|
const int dim = su->tex.target.getDim();
|
||||||
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
|
const bool array = su->tex.target.isArray() || su->tex.target.isCube();
|
||||||
|
const int arg = dim + array;
|
||||||
Value *ind = su->getIndirectR();
|
Value *ind = su->getIndirectR();
|
||||||
Value *handle;
|
Value *handle;
|
||||||
|
Instruction *pred = NULL, *pred2d = NULL;
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
|
|
||||||
bld.setPosition(su, false);
|
bld.setPosition(su, false);
|
||||||
@@ -2489,67 +2511,153 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
|
|||||||
assert(pos == 0);
|
assert(pos == 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dim == 2 && !array) {
|
||||||
|
// This might be a 2d slice of a 3d texture, try to load the z
|
||||||
|
// coordinate in.
|
||||||
|
Value *v;
|
||||||
|
if (!su->tex.bindless)
|
||||||
|
v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
|
||||||
|
else
|
||||||
|
v = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), ind, bld.mkImm(11));
|
||||||
|
Value *is_3d = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), v, bld.mkImm(1));
|
||||||
|
pred2d = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
|
||||||
|
TYPE_U32, bld.mkImm(0), is_3d);
|
||||||
|
|
||||||
|
bld.mkOp2(OP_SHR, TYPE_U32, v, v, bld.loadImm(NULL, 16));
|
||||||
|
su->moveSources(dim, 1);
|
||||||
|
su->setSrc(dim, v);
|
||||||
|
su->tex.target = nv50_ir::TEX_TARGET_3D;
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
|
||||||
if (su->tex.bindless)
|
if (su->tex.bindless)
|
||||||
handle = ind;
|
handle = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ind, bld.mkImm(2047));
|
||||||
else
|
else
|
||||||
handle = loadTexHandle(ind, slot + 32);
|
handle = loadTexHandle(ind, slot + 32);
|
||||||
|
|
||||||
su->setSrc(arg + pos, handle);
|
su->setSrc(arg + pos, handle);
|
||||||
|
|
||||||
// The address check doesn't make sense here. The format check could make
|
// The address check doesn't make sense here. The format check could make
|
||||||
// sense but it's a bit of a pain.
|
// sense but it's a bit of a pain.
|
||||||
if (su->tex.bindless)
|
if (!su->tex.bindless) {
|
||||||
return;
|
// prevent read fault when the image is not actually bound
|
||||||
|
pred =
|
||||||
|
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
|
||||||
|
TYPE_U32, bld.mkImm(0),
|
||||||
|
loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
|
||||||
|
if (su->op != OP_SUSTP && su->tex.format) {
|
||||||
|
const TexInstruction::ImgFormatDesc *format = su->tex.format;
|
||||||
|
int blockwidth = format->bits[0] + format->bits[1] +
|
||||||
|
format->bits[2] + format->bits[3];
|
||||||
|
|
||||||
// prevent read fault when the image is not actually bound
|
assert(format->components != 0);
|
||||||
CmpInstruction *pred =
|
// make sure that the format doesn't mismatch when it's not FMT_NONE
|
||||||
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
|
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
|
||||||
TYPE_U32, bld.mkImm(0),
|
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
|
||||||
loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
|
loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
|
||||||
if (su->op != OP_SUSTP && su->tex.format) {
|
pred->getDef(0));
|
||||||
const TexInstruction::ImgFormatDesc *format = su->tex.format;
|
}
|
||||||
int blockwidth = format->bits[0] + format->bits[1] +
|
|
||||||
format->bits[2] + format->bits[3];
|
|
||||||
|
|
||||||
assert(format->components != 0);
|
|
||||||
// make sure that the format doesn't mismatch when it's not FMT_NONE
|
|
||||||
bld.mkCmp(OP_SET_OR, CC_NE, TYPE_U32, pred->getDef(0),
|
|
||||||
TYPE_U32, bld.loadImm(NULL, blockwidth / 8),
|
|
||||||
loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
|
|
||||||
pred->getDef(0));
|
|
||||||
}
|
}
|
||||||
su->setPredicate(CC_NOT_P, pred->getDef(0));
|
|
||||||
|
// Now we have "pred" which (optionally) contains whether to do the surface
|
||||||
|
// op at all, and a "pred2d" which indicates that, in case of doing the
|
||||||
|
// surface op, we have to create a 2d and 3d version, conditioned on pred2d.
|
||||||
|
TexInstruction *su2d = NULL;
|
||||||
|
if (pred2d) {
|
||||||
|
su2d = cloneForward(func, su)->asTex();
|
||||||
|
for (unsigned i = 0; su->defExists(i); ++i)
|
||||||
|
su2d->setDef(i, bld.getSSA());
|
||||||
|
su2d->moveSources(dim + 1, -1);
|
||||||
|
su2d->tex.target = nv50_ir::TEX_TARGET_2D;
|
||||||
|
}
|
||||||
|
if (pred2d && pred) {
|
||||||
|
Instruction *pred3d = bld.mkOp2(OP_AND, TYPE_U8,
|
||||||
|
bld.getSSA(1, FILE_PREDICATE),
|
||||||
|
pred->getDef(0), pred2d->getDef(0));
|
||||||
|
pred3d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
|
||||||
|
pred3d->src(1).mod = Modifier(NV50_IR_MOD_NOT);
|
||||||
|
su->setPredicate(CC_P, pred3d->getDef(0));
|
||||||
|
pred2d = bld.mkOp2(OP_AND, TYPE_U8, bld.getSSA(1, FILE_PREDICATE),
|
||||||
|
pred->getDef(0), pred2d->getDef(0));
|
||||||
|
pred2d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
|
||||||
|
} else if (pred) {
|
||||||
|
su->setPredicate(CC_NOT_P, pred->getDef(0));
|
||||||
|
} else if (pred2d) {
|
||||||
|
su->setPredicate(CC_NOT_P, pred2d->getDef(0));
|
||||||
|
}
|
||||||
|
if (su2d) {
|
||||||
|
su2d->setPredicate(CC_P, pred2d->getDef(0));
|
||||||
|
bld.insert(su2d);
|
||||||
|
|
||||||
|
// Create a UNION so that RA assigns the same registers
|
||||||
|
bld.setPosition(su, true);
|
||||||
|
for (unsigned i = 0; su->defExists(i); ++i) {
|
||||||
|
assert(i < 4);
|
||||||
|
|
||||||
|
ValueDef &def = su->def(i);
|
||||||
|
ValueDef &def2 = su2d->def(i);
|
||||||
|
Instruction *mov = NULL;
|
||||||
|
|
||||||
|
if (pred) {
|
||||||
|
mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||||
|
mov->setPredicate(CC_P, pred->getDef(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
|
||||||
|
bld.getSSA(),
|
||||||
|
NULL, def2.get());
|
||||||
|
def.replace(uni->getDef(0), false);
|
||||||
|
uni->setSrc(0, def.get());
|
||||||
|
if (mov)
|
||||||
|
uni->setSrc(2, mov->getDef(0));
|
||||||
|
}
|
||||||
|
} else if (pred) {
|
||||||
|
// Create a UNION so that RA assigns the same registers
|
||||||
|
bld.setPosition(su, true);
|
||||||
|
for (unsigned i = 0; su->defExists(i); ++i) {
|
||||||
|
assert(i < 4);
|
||||||
|
|
||||||
|
ValueDef &def = su->def(i);
|
||||||
|
|
||||||
|
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||||
|
mov->setPredicate(CC_P, pred->getDef(0));
|
||||||
|
|
||||||
|
Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
|
||||||
|
bld.getSSA(),
|
||||||
|
NULL, mov->getDef(0));
|
||||||
|
def.replace(uni->getDef(0), false);
|
||||||
|
uni->setSrc(0, def.get());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return su2d;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
|
NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
|
||||||
{
|
{
|
||||||
processSurfaceCoordsGM107(su);
|
// processSurfaceCoords also takes care of fixing up the outputs and
|
||||||
|
// union'ing them with 0 as necessary. Additionally it may create a second
|
||||||
|
// surface which needs some of the similar fixups.
|
||||||
|
|
||||||
|
Instruction *loaded[4] = {};
|
||||||
|
TexInstruction *su2 = processSurfaceCoordsGM107(su, loaded);
|
||||||
|
|
||||||
if (su->op == OP_SULDP) {
|
if (su->op == OP_SULDP) {
|
||||||
convertSurfaceFormat(su);
|
convertSurfaceFormat(su, loaded);
|
||||||
insertOOBSurfaceOpResult(su);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (su->op == OP_SUREDP) {
|
if (su->op == OP_SUREDP) {
|
||||||
Value *def = su->getDef(0);
|
|
||||||
|
|
||||||
su->op = OP_SUREDB;
|
su->op = OP_SUREDB;
|
||||||
|
}
|
||||||
|
|
||||||
// There may not be a predicate in the bindless case.
|
// If we fixed up the type of the regular surface load instruction, we also
|
||||||
if (su->getPredicate()) {
|
// have to fix up the copy.
|
||||||
su->setDef(0, bld.getSSA());
|
if (su2) {
|
||||||
|
su2->op = su->op;
|
||||||
bld.setPosition(su, true);
|
su2->dType = su->dType;
|
||||||
|
su2->sType = su->sType;
|
||||||
// make sure to initialize dst value when the atomic operation is not
|
|
||||||
// performed
|
|
||||||
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
|
||||||
|
|
||||||
assert(su->cc == CC_NOT_P);
|
|
||||||
mov->setPredicate(CC_P, su->getPredicate());
|
|
||||||
|
|
||||||
bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -171,10 +171,10 @@ private:
|
|||||||
Value *loadMsInfo32(Value *ptr, uint32_t off);
|
Value *loadMsInfo32(Value *ptr, uint32_t off);
|
||||||
|
|
||||||
void adjustCoordinatesMS(TexInstruction *);
|
void adjustCoordinatesMS(TexInstruction *);
|
||||||
void processSurfaceCoordsGM107(TexInstruction *);
|
TexInstruction *processSurfaceCoordsGM107(TexInstruction *, Instruction *[4]);
|
||||||
void processSurfaceCoordsNVE4(TexInstruction *);
|
void processSurfaceCoordsNVE4(TexInstruction *);
|
||||||
void processSurfaceCoordsNVC0(TexInstruction *);
|
void processSurfaceCoordsNVC0(TexInstruction *);
|
||||||
void convertSurfaceFormat(TexInstruction *);
|
void convertSurfaceFormat(TexInstruction *, Instruction **);
|
||||||
void insertOOBSurfaceOpResult(TexInstruction *);
|
void insertOOBSurfaceOpResult(TexInstruction *);
|
||||||
Value *calculateSampleOffset(Value *sampleID);
|
Value *calculateSampleOffset(Value *sampleID);
|
||||||
|
|
||||||
|
@@ -1433,7 +1433,15 @@ gm107_create_image_handle(struct pipe_context *pipe,
|
|||||||
|
|
||||||
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
|
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
|
||||||
|
|
||||||
return 0x100000000ULL | tic->id;
|
// Compute handle. This will include the TIC as well as some additional
|
||||||
|
// info regarding the bound 3d surface layer, if applicable.
|
||||||
|
uint64_t handle = 0x100000000ULL | tic->id;
|
||||||
|
struct nv04_resource *res = nv04_resource(view->resource);
|
||||||
|
if (res->base.target == PIPE_TEXTURE_3D) {
|
||||||
|
handle |= 1 << 11;
|
||||||
|
handle |= view->u.tex.first_layer << (11 + 16);
|
||||||
|
}
|
||||||
|
return handle;
|
||||||
|
|
||||||
fail:
|
fail:
|
||||||
FREE(tic);
|
FREE(tic);
|
||||||
|
@@ -903,6 +903,10 @@ static void si_disk_cache_create(struct si_screen *sscreen)
|
|||||||
/* These flags affect shader compilation. */
|
/* These flags affect shader compilation. */
|
||||||
#define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
|
#define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
|
||||||
uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
|
uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
|
||||||
|
/* Reserve left-most bit for tgsi/nir selector */
|
||||||
|
assert(!(shader_debug_flags & (1u << 31)));
|
||||||
|
shader_debug_flags |= (uint32_t)
|
||||||
|
((sscreen->options.enable_nir & 0x1) << 31);
|
||||||
|
|
||||||
/* Add the high bits of 32-bit addresses, which affects
|
/* Add the high bits of 32-bit addresses, which affects
|
||||||
* how 32-bit addresses are expanded to 64 bits.
|
* how 32-bit addresses are expanded to 64 bits.
|
||||||
@@ -1026,6 +1030,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
#define OPT_BOOL(name, dflt, description) \
|
||||||
|
sscreen->options.name = \
|
||||||
|
driQueryOptionb(config->options, "radeonsi_"#name);
|
||||||
|
#include "si_debug_options.h"
|
||||||
|
}
|
||||||
|
|
||||||
si_disk_cache_create(sscreen);
|
si_disk_cache_create(sscreen);
|
||||||
|
|
||||||
/* Determine the number of shader compiler threads. */
|
/* Determine the number of shader compiler threads. */
|
||||||
@@ -1146,13 +1157,6 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
|||||||
sscreen->commutative_blend_add =
|
sscreen->commutative_blend_add =
|
||||||
driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
|
driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
|
||||||
|
|
||||||
{
|
|
||||||
#define OPT_BOOL(name, dflt, description) \
|
|
||||||
sscreen->options.name = \
|
|
||||||
driQueryOptionb(config->options, "radeonsi_"#name);
|
|
||||||
#include "si_debug_options.h"
|
|
||||||
}
|
|
||||||
|
|
||||||
sscreen->use_ngg = sscreen->info.chip_class >= GFX10 &&
|
sscreen->use_ngg = sscreen->info.chip_class >= GFX10 &&
|
||||||
sscreen->info.family != CHIP_NAVI14 &&
|
sscreen->info.family != CHIP_NAVI14 &&
|
||||||
!(sscreen->debug_flags & DBG(NO_NGG));
|
!(sscreen->debug_flags & DBG(NO_NGG));
|
||||||
|
@@ -1231,6 +1231,14 @@ swr_update_derived(struct pipe_context *pipe,
|
|||||||
util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
|
util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
|
||||||
&vp->minZ, &vp->maxZ);
|
&vp->minZ, &vp->maxZ);
|
||||||
|
|
||||||
|
if (rasterizer->depth_clip_near) {
|
||||||
|
vp->minZ = 0.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rasterizer->depth_clip_far) {
|
||||||
|
vp->maxZ = 1.0f;
|
||||||
|
}
|
||||||
|
|
||||||
vpm->m00[i] = state->scale[0];
|
vpm->m00[i] = state->scale[0];
|
||||||
vpm->m11[i] = state->scale[1];
|
vpm->m11[i] = state->scale[1];
|
||||||
vpm->m22[i] = state->scale[2];
|
vpm->m22[i] = state->scale[2];
|
||||||
|
@@ -488,9 +488,10 @@ get_render_pass(struct zink_context *ctx)
|
|||||||
struct zink_render_pass_state state;
|
struct zink_render_pass_state state;
|
||||||
|
|
||||||
for (int i = 0; i < fb->nr_cbufs; i++) {
|
for (int i = 0; i < fb->nr_cbufs; i++) {
|
||||||
struct zink_resource *cbuf = zink_resource(fb->cbufs[i]->texture);
|
struct pipe_resource *res = fb->cbufs[i]->texture;
|
||||||
state.rts[i].format = cbuf->format;
|
state.rts[i].format = zink_get_format(screen, fb->cbufs[i]->format);
|
||||||
state.rts[i].samples = cbuf->base.nr_samples > 0 ? cbuf->base.nr_samples : VK_SAMPLE_COUNT_1_BIT;
|
state.rts[i].samples = res->nr_samples > 0 ? res->nr_samples :
|
||||||
|
VK_SAMPLE_COUNT_1_BIT;
|
||||||
}
|
}
|
||||||
state.num_cbufs = fb->nr_cbufs;
|
state.num_cbufs = fb->nr_cbufs;
|
||||||
|
|
||||||
@@ -993,6 +994,25 @@ get_gfx_program(struct zink_context *ctx)
|
|||||||
return ctx->curr_program;
|
return ctx->curr_program;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
line_width_needed(enum pipe_prim_type reduced_prim,
|
||||||
|
VkPolygonMode polygon_mode)
|
||||||
|
{
|
||||||
|
switch (reduced_prim) {
|
||||||
|
case PIPE_PRIM_POINTS:
|
||||||
|
return false;
|
||||||
|
|
||||||
|
case PIPE_PRIM_LINES:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
case PIPE_PRIM_TRIANGLES:
|
||||||
|
return polygon_mode == VK_POLYGON_MODE_LINE;
|
||||||
|
|
||||||
|
default:
|
||||||
|
unreachable("unexpected reduced prim");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
zink_draw_vbo(struct pipe_context *pctx,
|
zink_draw_vbo(struct pipe_context *pctx,
|
||||||
const struct pipe_draw_info *dinfo)
|
const struct pipe_draw_info *dinfo)
|
||||||
@@ -1156,7 +1176,7 @@ zink_draw_vbo(struct pipe_context *pctx,
|
|||||||
vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
|
vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (reduced_prim == PIPE_PRIM_LINES) {
|
if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
|
||||||
if (screen->feats.wideLines || ctx->line_width == 1.0f)
|
if (screen->feats.wideLines || ctx->line_width == 1.0f)
|
||||||
vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width);
|
vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width);
|
||||||
else
|
else
|
||||||
@@ -1294,6 +1314,10 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
|
|||||||
zink_batch_reference_resoure(batch, src);
|
zink_batch_reference_resoure(batch, src);
|
||||||
zink_batch_reference_resoure(batch, dst);
|
zink_batch_reference_resoure(batch, dst);
|
||||||
|
|
||||||
|
if (src->layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
|
||||||
|
zink_resource_barrier(batch->cmdbuf, src, src->aspect,
|
||||||
|
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||||
|
|
||||||
if (dst->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
if (dst->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
||||||
zink_resource_barrier(batch->cmdbuf, dst, dst->aspect,
|
zink_resource_barrier(batch->cmdbuf, dst, dst->aspect,
|
||||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||||
|
@@ -137,6 +137,7 @@ resource_create(struct pipe_screen *pscreen,
|
|||||||
|
|
||||||
VkImageCreateInfo ici = {};
|
VkImageCreateInfo ici = {};
|
||||||
ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||||
|
ici.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
||||||
|
|
||||||
switch (templ->target) {
|
switch (templ->target) {
|
||||||
case PIPE_TEXTURE_1D:
|
case PIPE_TEXTURE_1D:
|
||||||
@@ -146,7 +147,7 @@ resource_create(struct pipe_screen *pscreen,
|
|||||||
|
|
||||||
case PIPE_TEXTURE_CUBE:
|
case PIPE_TEXTURE_CUBE:
|
||||||
case PIPE_TEXTURE_CUBE_ARRAY:
|
case PIPE_TEXTURE_CUBE_ARRAY:
|
||||||
ici.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
|
ici.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
|
||||||
/* fall-through */
|
/* fall-through */
|
||||||
case PIPE_TEXTURE_2D:
|
case PIPE_TEXTURE_2D:
|
||||||
case PIPE_TEXTURE_2D_ARRAY:
|
case PIPE_TEXTURE_2D_ARRAY:
|
||||||
@@ -157,7 +158,7 @@ resource_create(struct pipe_screen *pscreen,
|
|||||||
case PIPE_TEXTURE_3D:
|
case PIPE_TEXTURE_3D:
|
||||||
ici.imageType = VK_IMAGE_TYPE_3D;
|
ici.imageType = VK_IMAGE_TYPE_3D;
|
||||||
if (templ->bind & PIPE_BIND_RENDER_TARGET)
|
if (templ->bind & PIPE_BIND_RENDER_TARGET)
|
||||||
ici.flags = VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
ici.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case PIPE_BUFFER:
|
case PIPE_BUFFER:
|
||||||
|
@@ -125,6 +125,8 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
|
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
|
||||||
|
return 0; /* TODO: re-enable after implementing nir_texop_txd */
|
||||||
|
|
||||||
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
|
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
|
||||||
case PIPE_CAP_VERTEX_SHADER_SATURATE:
|
case PIPE_CAP_VERTEX_SHADER_SATURATE:
|
||||||
return 1;
|
return 1;
|
||||||
@@ -284,7 +286,7 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
|||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
|
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
|
||||||
return 1;
|
return 0;
|
||||||
|
|
||||||
case PIPE_CAP_NIR_COMPACT_ARRAYS:
|
case PIPE_CAP_NIR_COMPACT_ARRAYS:
|
||||||
return 1;
|
return 1;
|
||||||
@@ -549,7 +551,7 @@ static const VkFormat formats[PIPE_FORMAT_COUNT] = {
|
|||||||
[PIPE_FORMAT_Z32_FLOAT] = VK_FORMAT_D32_SFLOAT,
|
[PIPE_FORMAT_Z32_FLOAT] = VK_FORMAT_D32_SFLOAT,
|
||||||
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = VK_FORMAT_D32_SFLOAT_S8_UINT,
|
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||||
[PIPE_FORMAT_Z16_UNORM] = VK_FORMAT_D16_UNORM,
|
[PIPE_FORMAT_Z16_UNORM] = VK_FORMAT_D16_UNORM,
|
||||||
[PIPE_FORMAT_X8Z24_UNORM] = VK_FORMAT_X8_D24_UNORM_PACK32,
|
[PIPE_FORMAT_Z24X8_UNORM] = VK_FORMAT_X8_D24_UNORM_PACK32,
|
||||||
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = VK_FORMAT_D24_UNORM_S8_UINT,
|
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = VK_FORMAT_D24_UNORM_S8_UINT,
|
||||||
|
|
||||||
// compressed formats
|
// compressed formats
|
||||||
|
@@ -940,7 +940,7 @@ dri2_create_image_from_fd(__DRIscreen *_screen,
|
|||||||
whandles[i].stride = (unsigned)strides[index];
|
whandles[i].stride = (unsigned)strides[index];
|
||||||
whandles[i].offset = (unsigned)offsets[index];
|
whandles[i].offset = (unsigned)offsets[index];
|
||||||
whandles[i].modifier = modifier;
|
whandles[i].modifier = modifier;
|
||||||
whandles[i].plane = i;
|
whandles[i].plane = index;
|
||||||
}
|
}
|
||||||
|
|
||||||
img = dri2_create_image_from_winsys(_screen, width, height, use, map,
|
img = dri2_create_image_from_winsys(_screen, width, height, use, map,
|
||||||
|
@@ -32,6 +32,7 @@ libgraw_gdi = shared_library(
|
|||||||
dependencies : [
|
dependencies : [
|
||||||
dep_ws2_32, idep_mesautil, driver_swrast,
|
dep_ws2_32, idep_mesautil, driver_swrast,
|
||||||
],
|
],
|
||||||
|
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libgraw.dll
|
||||||
)
|
)
|
||||||
|
|
||||||
libgraw = libgraw_gdi
|
libgraw = libgraw_gdi
|
||||||
|
@@ -32,6 +32,7 @@ libgraw_null = shared_library(
|
|||||||
include_directories : inc_common,
|
include_directories : inc_common,
|
||||||
link_with : libgallium,
|
link_with : libgallium,
|
||||||
dependencies : idep_mesautil,
|
dependencies : idep_mesautil,
|
||||||
|
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libgraw_null.dll
|
||||||
)
|
)
|
||||||
|
|
||||||
libgraw = libgraw_null
|
libgraw = libgraw_null
|
||||||
|
@@ -58,6 +58,7 @@ libosmesa = shared_library(
|
|||||||
dep_ws2_32, dep_selinux, dep_thread, dep_clock, dep_unwind,
|
dep_ws2_32, dep_selinux, dep_thread, dep_clock, dep_unwind,
|
||||||
driver_swrast, driver_swr,
|
driver_swrast, driver_swr,
|
||||||
],
|
],
|
||||||
|
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libosmesa.dll
|
||||||
soversion : host_machine.system() == 'windows' ? '' : '8',
|
soversion : host_machine.system() == 'windows' ? '' : '8',
|
||||||
version : '8.0.0',
|
version : '8.0.0',
|
||||||
install : true,
|
install : true,
|
||||||
|
@@ -47,6 +47,15 @@ endif
|
|||||||
|
|
||||||
pipe_loader_install_dir = join_paths(get_option('libdir'), 'gallium-pipe')
|
pipe_loader_install_dir = join_paths(get_option('libdir'), 'gallium-pipe')
|
||||||
|
|
||||||
|
_kmsro_targets = [
|
||||||
|
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
|
||||||
|
driver_panfrost, driver_lima,
|
||||||
|
]
|
||||||
|
|
||||||
|
if with_gallium_v3d
|
||||||
|
_kmsro_targets += [idep_xmlconfig, dep_expat]
|
||||||
|
endif
|
||||||
|
|
||||||
pipe_loaders = [
|
pipe_loaders = [
|
||||||
[with_gallium_i915, 'i915', driver_i915, []],
|
[with_gallium_i915, 'i915', driver_i915, []],
|
||||||
[with_gallium_nouveau, 'nouveau', driver_nouveau, []],
|
[with_gallium_nouveau, 'nouveau', driver_nouveau, []],
|
||||||
@@ -54,7 +63,7 @@ pipe_loaders = [
|
|||||||
[with_gallium_r600, 'r600', driver_r600, []],
|
[with_gallium_r600, 'r600', driver_r600, []],
|
||||||
[with_gallium_radeonsi, 'radeonsi', [driver_radeonsi, idep_xmlconfig], []],
|
[with_gallium_radeonsi, 'radeonsi', [driver_radeonsi, idep_xmlconfig], []],
|
||||||
[with_gallium_freedreno, 'msm', driver_freedreno, []],
|
[with_gallium_freedreno, 'msm', driver_freedreno, []],
|
||||||
[with_gallium_panfrost, 'kmsro', [driver_kmsro, driver_panfrost], []],
|
[with_gallium_kmsro, 'kmsro', _kmsro_targets, []],
|
||||||
[with_gallium_svga, 'vmwgfx', driver_svga, []],
|
[with_gallium_svga, 'vmwgfx', driver_svga, []],
|
||||||
[with_gallium_softpipe, 'swrast', [driver_swrast, driver_swr], [libwsw, libws_null]],
|
[with_gallium_softpipe, 'swrast', [driver_swrast, driver_swr], [libwsw, libws_null]],
|
||||||
]
|
]
|
||||||
|
@@ -736,8 +736,7 @@ namespace brw {
|
|||||||
src_reg
|
src_reg
|
||||||
fix_byte_src(const src_reg &src) const
|
fix_byte_src(const src_reg &src) const
|
||||||
{
|
{
|
||||||
if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) ||
|
if (shader->devinfo->gen < 11 || type_sz(src.type) != 1)
|
||||||
type_sz(src.type) != 1)
|
|
||||||
return src;
|
return src;
|
||||||
|
|
||||||
dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ?
|
dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ?
|
||||||
|
@@ -1505,8 +1505,15 @@ generate_code(struct brw_codegen *p,
|
|||||||
bool debug_flag = INTEL_DEBUG &
|
bool debug_flag = INTEL_DEBUG &
|
||||||
intel_debug_flag_for_shader_stage(nir->info.stage);
|
intel_debug_flag_for_shader_stage(nir->info.stage);
|
||||||
struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
|
struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
|
||||||
|
|
||||||
|
/* `send_count` explicitly does not include spills or fills, as we'd
|
||||||
|
* like to use it as a metric for intentional memory access or other
|
||||||
|
* shared function use. Otherwise, subtle changes to scheduling or
|
||||||
|
* register allocation could cause it to fluctuate wildly - and that
|
||||||
|
* effect is already counted in spill/fill counts.
|
||||||
|
*/
|
||||||
int spill_count = 0, fill_count = 0;
|
int spill_count = 0, fill_count = 0;
|
||||||
int loop_count = 0;
|
int loop_count = 0, send_count = 0;
|
||||||
|
|
||||||
foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
|
foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
|
||||||
struct brw_reg src[3], dst;
|
struct brw_reg src[3], dst;
|
||||||
@@ -1746,6 +1753,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
|
generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
|
||||||
} else {
|
} else {
|
||||||
generate_math1_gen4(p, inst, dst, src[0]);
|
generate_math1_gen4(p, inst, dst, src[0]);
|
||||||
|
send_count++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -1759,6 +1767,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
generate_math_gen6(p, inst, dst, src[0], src[1]);
|
generate_math_gen6(p, inst, dst, src[0], src[1]);
|
||||||
} else {
|
} else {
|
||||||
generate_math2_gen4(p, inst, dst, src[0], src[1]);
|
generate_math2_gen4(p, inst, dst, src[0], src[1]);
|
||||||
|
send_count++;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@@ -1775,14 +1784,17 @@ generate_code(struct brw_codegen *p,
|
|||||||
case SHADER_OPCODE_SAMPLEINFO:
|
case SHADER_OPCODE_SAMPLEINFO:
|
||||||
generate_tex(p, prog_data, nir->info.stage,
|
generate_tex(p, prog_data, nir->info.stage,
|
||||||
inst, dst, src[0], src[1], src[2]);
|
inst, dst, src[0], src[1], src[2]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
||||||
generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
|
generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VS_OPCODE_URB_WRITE:
|
case VS_OPCODE_URB_WRITE:
|
||||||
generate_vs_urb_write(p, inst);
|
generate_vs_urb_write(p, inst);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||||
@@ -1797,10 +1809,12 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||||
generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
|
generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
||||||
generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
|
generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
|
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
|
||||||
@@ -1809,14 +1823,17 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
case GS_OPCODE_URB_WRITE:
|
case GS_OPCODE_URB_WRITE:
|
||||||
generate_gs_urb_write(p, inst);
|
generate_gs_urb_write(p, inst);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GS_OPCODE_URB_WRITE_ALLOCATE:
|
case GS_OPCODE_URB_WRITE_ALLOCATE:
|
||||||
generate_gs_urb_write_allocate(p, inst);
|
generate_gs_urb_write_allocate(p, inst);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GS_OPCODE_SVB_WRITE:
|
case GS_OPCODE_SVB_WRITE:
|
||||||
generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
|
generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GS_OPCODE_SVB_SET_DST_INDEX:
|
case GS_OPCODE_SVB_SET_DST_INDEX:
|
||||||
@@ -1825,6 +1842,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
case GS_OPCODE_THREAD_END:
|
case GS_OPCODE_THREAD_END:
|
||||||
generate_gs_thread_end(p, inst);
|
generate_gs_thread_end(p, inst);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GS_OPCODE_SET_WRITE_OFFSET:
|
case GS_OPCODE_SET_WRITE_OFFSET:
|
||||||
@@ -1837,6 +1855,7 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
case GS_OPCODE_FF_SYNC:
|
case GS_OPCODE_FF_SYNC:
|
||||||
generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
|
generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
|
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
|
||||||
@@ -1866,12 +1885,14 @@ generate_code(struct brw_codegen *p,
|
|||||||
case SHADER_OPCODE_SHADER_TIME_ADD:
|
case SHADER_OPCODE_SHADER_TIME_ADD:
|
||||||
brw_shader_time_add(p, src[0],
|
brw_shader_time_add(p, src[0],
|
||||||
prog_data->base.binding_table.shader_time_start);
|
prog_data->base.binding_table.shader_time_start);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VEC4_OPCODE_UNTYPED_ATOMIC:
|
case VEC4_OPCODE_UNTYPED_ATOMIC:
|
||||||
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||||
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
|
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
|
||||||
!inst->dst.is_null(), inst->header_size);
|
!inst->dst.is_null(), inst->header_size);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
|
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
|
||||||
@@ -1879,16 +1900,19 @@ generate_code(struct brw_codegen *p,
|
|||||||
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||||
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
|
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
|
||||||
src[2].ud);
|
src[2].ud);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
|
case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||||
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||||
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
|
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
|
||||||
src[2].ud, inst->header_size);
|
src[2].ud, inst->header_size);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_MEMORY_FENCE:
|
case SHADER_OPCODE_MEMORY_FENCE:
|
||||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false, /* bti */ 0);
|
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false, /* bti */ 0);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
||||||
@@ -2068,10 +2092,12 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
case TCS_OPCODE_URB_WRITE:
|
case TCS_OPCODE_URB_WRITE:
|
||||||
generate_tcs_urb_write(p, inst, src[0]);
|
generate_tcs_urb_write(p, inst, src[0]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case VEC4_OPCODE_URB_READ:
|
case VEC4_OPCODE_URB_READ:
|
||||||
generate_vec4_urb_read(p, inst, dst, src[0]);
|
generate_vec4_urb_read(p, inst, dst, src[0]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
|
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
|
||||||
@@ -2113,15 +2139,18 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
case TCS_OPCODE_RELEASE_INPUT:
|
case TCS_OPCODE_RELEASE_INPUT:
|
||||||
generate_tcs_release_input(p, dst, src[0], src[1]);
|
generate_tcs_release_input(p, dst, src[0], src[1]);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case TCS_OPCODE_THREAD_END:
|
case TCS_OPCODE_THREAD_END:
|
||||||
generate_tcs_thread_end(p, inst);
|
generate_tcs_thread_end(p, inst);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_BARRIER:
|
case SHADER_OPCODE_BARRIER:
|
||||||
brw_barrier(p, src[0]);
|
brw_barrier(p, src[0]);
|
||||||
brw_WAIT(p);
|
brw_WAIT(p);
|
||||||
|
send_count++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case SHADER_OPCODE_MOV_INDIRECT:
|
case SHADER_OPCODE_MOV_INDIRECT:
|
||||||
@@ -2188,9 +2217,9 @@ generate_code(struct brw_codegen *p,
|
|||||||
sha1buf);
|
sha1buf);
|
||||||
|
|
||||||
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
|
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
|
||||||
"spills:fills. Compacted %d to %d bytes (%.0f%%)\n",
|
"spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n",
|
||||||
stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
|
stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
|
||||||
spill_count, fill_count, before_size, after_size,
|
spill_count, fill_count, send_count, before_size, after_size,
|
||||||
100.0f * (before_size - after_size) / before_size);
|
100.0f * (before_size - after_size) / before_size);
|
||||||
|
|
||||||
/* overriding the shader makes disasm_info invalid */
|
/* overriding the shader makes disasm_info invalid */
|
||||||
@@ -2205,10 +2234,11 @@ generate_code(struct brw_codegen *p,
|
|||||||
|
|
||||||
compiler->shader_debug_log(log_data,
|
compiler->shader_debug_log(log_data,
|
||||||
"%s vec4 shader: %d inst, %d loops, %u cycles, "
|
"%s vec4 shader: %d inst, %d loops, %u cycles, "
|
||||||
"%d:%d spills:fills, compacted %d to %d bytes.",
|
"%d:%d spills:fills, %u sends, "
|
||||||
|
"compacted %d to %d bytes.",
|
||||||
stage_abbrev, before_size / 16,
|
stage_abbrev, before_size / 16,
|
||||||
loop_count, cfg->cycle_count, spill_count,
|
loop_count, cfg->cycle_count, spill_count,
|
||||||
fill_count, before_size, after_size);
|
fill_count, send_count, before_size, after_size);
|
||||||
if (stats) {
|
if (stats) {
|
||||||
stats->dispatch_width = 0;
|
stats->dispatch_width = 0;
|
||||||
stats->instructions = before_size / 16;
|
stats->instructions = before_size / 16;
|
||||||
|
@@ -1043,7 +1043,8 @@ static const struct gen_device_info gen_device_info_ehl_2x4 = {
|
|||||||
.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
|
.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
|
||||||
.simulator_id = 22, \
|
.simulator_id = 22, \
|
||||||
.urb.size = (_gt) == 1 ? 512 : 1024, \
|
.urb.size = (_gt) == 1 ? 512 : 1024, \
|
||||||
.num_subslices = _dual_subslices
|
.num_subslices = _dual_subslices, \
|
||||||
|
.num_eu_per_subslice = 16
|
||||||
|
|
||||||
#define dual_subslices(args...) { args, }
|
#define dual_subslices(args...) { args, }
|
||||||
|
|
||||||
|
@@ -532,9 +532,11 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
|
|||||||
if (use_softpin) {
|
if (use_softpin) {
|
||||||
gem_handle = anv_gem_create(pool->device, newbo_size);
|
gem_handle = anv_gem_create(pool->device, newbo_size);
|
||||||
map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
|
map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
|
||||||
if (map == MAP_FAILED)
|
if (map == MAP_FAILED) {
|
||||||
|
anv_gem_close(pool->device, gem_handle);
|
||||||
return vk_errorf(pool->device->instance, pool->device,
|
return vk_errorf(pool->device->instance, pool->device,
|
||||||
VK_ERROR_MEMORY_MAP_FAILED, "gem mmap failed: %m");
|
VK_ERROR_MEMORY_MAP_FAILED, "gem mmap failed: %m");
|
||||||
|
}
|
||||||
assert(center_bo_offset == 0);
|
assert(center_bo_offset == 0);
|
||||||
} else {
|
} else {
|
||||||
/* Just leak the old map until we destroy the pool. We can't munmap it
|
/* Just leak the old map until we destroy the pool. We can't munmap it
|
||||||
|
@@ -32,7 +32,6 @@
|
|||||||
#include "drm-uapi/drm_fourcc.h"
|
#include "drm-uapi/drm_fourcc.h"
|
||||||
|
|
||||||
#include "anv_private.h"
|
#include "anv_private.h"
|
||||||
#include "util/strtod.h"
|
|
||||||
#include "util/debug.h"
|
#include "util/debug.h"
|
||||||
#include "util/build_id.h"
|
#include "util/build_id.h"
|
||||||
#include "util/disk_cache.h"
|
#include "util/disk_cache.h"
|
||||||
@@ -792,7 +791,6 @@ VkResult anv_CreateInstance(
|
|||||||
instance->pipeline_cache_enabled =
|
instance->pipeline_cache_enabled =
|
||||||
env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
|
env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
|
||||||
|
|
||||||
_mesa_locale_init();
|
|
||||||
glsl_type_singleton_init_or_ref();
|
glsl_type_singleton_init_or_ref();
|
||||||
|
|
||||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||||
@@ -831,7 +829,6 @@ void anv_DestroyInstance(
|
|||||||
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
||||||
|
|
||||||
glsl_type_singleton_decref();
|
glsl_type_singleton_decref();
|
||||||
_mesa_locale_fini();
|
|
||||||
|
|
||||||
driDestroyOptionCache(&instance->dri_options);
|
driDestroyOptionCache(&instance->dri_options);
|
||||||
driDestroyOptionInfo(&instance->available_dri_options);
|
driDestroyOptionInfo(&instance->available_dri_options);
|
||||||
|
@@ -2216,12 +2216,15 @@ compute_pipeline_create(
|
|||||||
|
|
||||||
pipeline->blend_state.map = NULL;
|
pipeline->blend_state.map = NULL;
|
||||||
|
|
||||||
result = anv_reloc_list_init(&pipeline->batch_relocs,
|
const VkAllocationCallbacks *alloc =
|
||||||
pAllocator ? pAllocator : &device->alloc);
|
pAllocator ? pAllocator : &device->alloc;
|
||||||
|
|
||||||
|
result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
|
||||||
if (result != VK_SUCCESS) {
|
if (result != VK_SUCCESS) {
|
||||||
vk_free2(&device->alloc, pAllocator, pipeline);
|
vk_free2(&device->alloc, pAllocator, pipeline);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
pipeline->batch.alloc = alloc;
|
||||||
pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
|
pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
|
||||||
pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
|
pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
|
||||||
pipeline->batch.relocs = &pipeline->batch_relocs;
|
pipeline->batch.relocs = &pipeline->batch_relocs;
|
||||||
|
@@ -94,12 +94,7 @@ VkResult genX(CreateQueryPool)(
|
|||||||
uint64s_per_slot += 4;
|
uint64s_per_slot += 4;
|
||||||
break;
|
break;
|
||||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||||
uint64s_per_slot = 2 * OA_REPORT_N_UINT64; /* begin & end OA reports */
|
uint64s_per_slot = 72; /* 576 bytes, see layout below */
|
||||||
uint64s_per_slot += 4; /* PerfCounter 1 & 2 */
|
|
||||||
uint64s_per_slot++; /* 2 * 32bit RPSTAT register */
|
|
||||||
uint64s_per_slot++; /* 64bit marker */
|
|
||||||
uint64s_per_slot++; /* availability */
|
|
||||||
uint64s_per_slot = align_u32(uint64s_per_slot, 8); /* OA reports must be aligned to 64 bytes */
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
@@ -179,54 +174,51 @@ anv_query_address(struct anv_query_pool *pool, uint32_t query)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* VK_INTEL_performance_query layout:
|
* VK_INTEL_performance_query layout (576 bytes) :
|
||||||
*
|
*
|
||||||
* ------------------------------
|
* ------------------------------
|
||||||
* | end MI_RPC (256b) |
|
* | availability (8b) |
|
||||||
* |----------------------------|
|
* |----------------------------|
|
||||||
* | begin MI_RPC (256b) |
|
* | marker (8b) |
|
||||||
* |----------------------------|
|
|
||||||
* | begin perfcntr 1 & 2 (16b) |
|
|
||||||
* |----------------------------|
|
|
||||||
* | end perfcntr 1 & 2 (16b) |
|
|
||||||
* |----------------------------|
|
* |----------------------------|
|
||||||
* | begin RPSTAT register (4b) |
|
* | begin RPSTAT register (4b) |
|
||||||
* |----------------------------|
|
* |----------------------------|
|
||||||
* | end RPSTAT register (4b) |
|
* | end RPSTAT register (4b) |
|
||||||
* |----------------------------|
|
* |----------------------------|
|
||||||
* | marker (8b) |
|
* | begin perfcntr 1 & 2 (16b) |
|
||||||
* |----------------------------|
|
* |----------------------------|
|
||||||
* | availability (8b) |
|
* | end perfcntr 1 & 2 (16b) |
|
||||||
|
* |----------------------------|
|
||||||
|
* | Unused (8b) |
|
||||||
|
* |----------------------------|
|
||||||
|
* | begin MI_RPC (256b) |
|
||||||
|
* |----------------------------|
|
||||||
|
* | end MI_RPC (256b) |
|
||||||
* ------------------------------
|
* ------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
intel_perf_mi_rpc_offset(bool end)
|
intel_perf_marker_offset(void)
|
||||||
{
|
{
|
||||||
return end ? 0 : 256;
|
return 8;
|
||||||
}
|
|
||||||
|
|
||||||
static uint32_t
|
|
||||||
intel_perf_counter(bool end)
|
|
||||||
{
|
|
||||||
uint32_t offset = 512;
|
|
||||||
offset += end ? 2 * sizeof(uint64_t) : 0;
|
|
||||||
return offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
intel_perf_rpstart_offset(bool end)
|
intel_perf_rpstart_offset(bool end)
|
||||||
{
|
{
|
||||||
uint32_t offset = intel_perf_counter(false) +
|
return 16 + (end ? sizeof(uint32_t) : 0);
|
||||||
4 * sizeof(uint64_t);
|
|
||||||
offset += end ? sizeof(uint32_t) : 0;
|
|
||||||
return offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
intel_perf_marker_offset(void)
|
intel_perf_counter(bool end)
|
||||||
{
|
{
|
||||||
return intel_perf_rpstart_offset(false) + sizeof(uint64_t);
|
return 24 + (end ? (2 * sizeof(uint64_t)) : 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t
|
||||||
|
intel_perf_mi_rpc_offset(bool end)
|
||||||
|
{
|
||||||
|
return 64 + (end ? 256 : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@@ -251,11 +243,7 @@ query_slot(struct anv_query_pool *pool, uint32_t query)
|
|||||||
static bool
|
static bool
|
||||||
query_is_available(struct anv_query_pool *pool, uint32_t query)
|
query_is_available(struct anv_query_pool *pool, uint32_t query)
|
||||||
{
|
{
|
||||||
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) {
|
return *(volatile uint64_t *)query_slot(pool, query);
|
||||||
return *(volatile uint64_t *)((uint8_t *)query_slot(pool, query) +
|
|
||||||
pool->stride - 8);
|
|
||||||
} else
|
|
||||||
return *(volatile uint64_t *)query_slot(pool, query);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static VkResult
|
static VkResult
|
||||||
|
@@ -27,7 +27,11 @@
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct anv_instance instance;
|
struct anv_instance instance = {
|
||||||
|
.physicalDevice = {
|
||||||
|
.use_softpin = true,
|
||||||
|
},
|
||||||
|
};
|
||||||
struct anv_device device = {
|
struct anv_device device = {
|
||||||
.instance = &instance,
|
.instance = &instance,
|
||||||
};
|
};
|
||||||
|
@@ -111,7 +111,7 @@ static void validate_monotonic(int32_t **blocks)
|
|||||||
|
|
||||||
static void run_test()
|
static void run_test()
|
||||||
{
|
{
|
||||||
struct anv_instance instance;
|
struct anv_instance instance = { };
|
||||||
struct anv_device device = {
|
struct anv_device device = {
|
||||||
.instance = &instance,
|
.instance = &instance,
|
||||||
};
|
};
|
||||||
|
@@ -36,7 +36,7 @@
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct anv_instance instance;
|
struct anv_instance instance = { };
|
||||||
struct anv_device device = {
|
struct anv_device device = {
|
||||||
.instance = &instance,
|
.instance = &instance,
|
||||||
};
|
};
|
||||||
|
@@ -35,7 +35,7 @@
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct anv_instance instance;
|
struct anv_instance instance = { };
|
||||||
struct anv_device device = {
|
struct anv_device device = {
|
||||||
.instance = &instance,
|
.instance = &instance,
|
||||||
};
|
};
|
||||||
|
@@ -56,7 +56,7 @@ static void *alloc_states(void *_job)
|
|||||||
|
|
||||||
static void run_test()
|
static void run_test()
|
||||||
{
|
{
|
||||||
struct anv_instance instance;
|
struct anv_instance instance = { };
|
||||||
struct anv_device device = {
|
struct anv_device device = {
|
||||||
.instance = &instance,
|
.instance = &instance,
|
||||||
};
|
};
|
||||||
|
@@ -27,7 +27,11 @@
|
|||||||
|
|
||||||
int main(int argc, char **argv)
|
int main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
struct anv_instance instance;
|
struct anv_instance instance = {
|
||||||
|
.physicalDevice = {
|
||||||
|
.use_softpin = true,
|
||||||
|
},
|
||||||
|
};
|
||||||
struct anv_device device = {
|
struct anv_device device = {
|
||||||
.instance = &instance,
|
.instance = &instance,
|
||||||
};
|
};
|
||||||
|
@@ -36,6 +36,8 @@ libosmesa = shared_library(
|
|||||||
link_whole : libglapi_static,
|
link_whole : libglapi_static,
|
||||||
link_with : [libmesa_classic, osmesa_link_with],
|
link_with : [libmesa_classic, osmesa_link_with],
|
||||||
dependencies : [dep_thread, dep_selinux],
|
dependencies : [dep_thread, dep_selinux],
|
||||||
|
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libosmesa.dll
|
||||||
|
soversion : host_machine.system() == 'windows' ? '' : '8',
|
||||||
version : '8.0.0',
|
version : '8.0.0',
|
||||||
install : true,
|
install : true,
|
||||||
)
|
)
|
||||||
|
@@ -350,6 +350,12 @@ clear_bufferiv(struct gl_context *ctx, GLenum buffer, GLint drawbuffer,
|
|||||||
_mesa_update_state( ctx );
|
_mesa_update_state( ctx );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!no_error && ctx->DrawBuffer->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
|
||||||
|
_mesa_error(ctx, GL_INVALID_FRAMEBUFFER_OPERATION_EXT,
|
||||||
|
"glClearBufferiv(incomplete framebuffer)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
switch (buffer) {
|
switch (buffer) {
|
||||||
case GL_STENCIL:
|
case GL_STENCIL:
|
||||||
/* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
|
/* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
|
||||||
@@ -686,6 +692,12 @@ clear_bufferfi(struct gl_context *ctx, GLenum buffer, GLint drawbuffer,
|
|||||||
drawbuffer);
|
drawbuffer);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (ctx->DrawBuffer->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
|
||||||
|
_mesa_error(ctx, GL_INVALID_FRAMEBUFFER_OPERATION_EXT,
|
||||||
|
"glClearBufferfi(incomplete framebuffer)");
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ctx->RasterDiscard)
|
if (ctx->RasterDiscard)
|
||||||
|
@@ -325,6 +325,7 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
|
|||||||
cso_set_stream_outputs(cso, 0, NULL, NULL);
|
cso_set_stream_outputs(cso, 0, NULL, NULL);
|
||||||
cso_set_sample_mask(cso, ~0);
|
cso_set_sample_mask(cso, ~0);
|
||||||
cso_set_min_samples(cso, 1);
|
cso_set_min_samples(cso, 1);
|
||||||
|
st->clear.raster.multisample = st->state.fb_num_samples > 1;
|
||||||
cso_set_rasterizer(cso, &st->clear.raster);
|
cso_set_rasterizer(cso, &st->clear.raster);
|
||||||
|
|
||||||
/* viewport state: viewport matching window dims */
|
/* viewport state: viewport matching window dims */
|
||||||
|
Reference in New Issue
Block a user