Compare commits
50 Commits
explicit-s
...
mesa-19.3.
Author | SHA1 | Date | |
---|---|---|---|
|
3d9c678772 | ||
|
bb08c0f04d | ||
|
3c1b3656b9 | ||
|
05be725923 | ||
|
8608c460d1 | ||
|
2f66f619c8 | ||
|
2bd7416823 | ||
|
0aef18324a | ||
|
3211308e88 | ||
|
f7d76ad80e | ||
|
52005416a0 | ||
|
0b670a919c | ||
|
b90f5a9ea0 | ||
|
2ea5038045 | ||
|
5ca2bb392f | ||
|
01f6321c09 | ||
|
15342abc5b | ||
|
08501e77af | ||
|
49af89a0b9 | ||
|
dd4b73ad38 | ||
|
a8faeff399 | ||
|
0d846243a0 | ||
|
bc5357bf33 | ||
|
5cee7ad873 | ||
|
184d39301d | ||
|
9bca129bb4 | ||
|
6daaf66f66 | ||
|
4d21f802b5 | ||
|
090469173c | ||
|
59bc14186e | ||
|
5032575b94 | ||
|
b981ca4d7e | ||
|
3544a01121 | ||
|
bb9d1ed2bd | ||
|
5f8e0c715e | ||
|
f0104d8fef | ||
|
cb66ea7780 | ||
|
75886fafaa | ||
|
b3fd30921a | ||
|
ea886e49be | ||
|
307e5cc8fd | ||
|
0b8836cb23 | ||
|
39e9739a3b | ||
|
de705da8a6 | ||
|
640747a298 | ||
|
9df4763440 | ||
|
2b1b7afb5c | ||
|
084926926c | ||
|
1beee9dd9f | ||
|
20512e9ddb |
2
bin/.cherry-ignore
Normal file
2
bin/.cherry-ignore
Normal file
@@ -0,0 +1,2 @@
|
||||
# This is reverted shortly after landing
|
||||
4432a2d14d80081d062f7939a950d65ea3a16eed
|
@@ -92,7 +92,7 @@ is_revert_nomination()
|
||||
}
|
||||
|
||||
# Use the last branchpoint as our limit for the search
|
||||
latest_branchpoint=`git merge-base origin/master HEAD`
|
||||
latest_branchpoint=`git merge-base upstream/master HEAD`
|
||||
|
||||
# List all the commits between day 1 and the branch point...
|
||||
git log --reverse --pretty=%H $latest_branchpoint > already_landed
|
||||
@@ -103,7 +103,7 @@ git log --reverse --pretty=medium --grep="cherry picked from commit" $latest_bra
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||
|
||||
# Grep for potential candidates
|
||||
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\<fixes\>\|\<broken by\>\|This reverts commit' $latest_branchpoint..origin/master |\
|
||||
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-stable\|^CC:.*mesa-dev\|\<fixes\>\|\<broken by\>\|This reverts commit' $latest_branchpoint..upstream/master |\
|
||||
while read sha
|
||||
do
|
||||
# Check to see whether the patch is on the ignore list.
|
||||
|
@@ -16,3 +16,5 @@ VK_INTEL_performance_query on Intel.
|
||||
Meson support for windows using MSVC and MinGW
|
||||
scons has been deprecated for non windows
|
||||
Initial Intel gen12 (Tigerlake) support on anvil and iris
|
||||
New compiler backend "ACO" for RADV (RADV_PERFTEST=aco)
|
||||
VK_EXT_shader_demote_to_helper_invocation on RADV/ACO.
|
||||
|
@@ -85,6 +85,7 @@ ACO_FILES = \
|
||||
compiler/aco_register_allocation.cpp \
|
||||
compiler/aco_live_var_analysis.cpp \
|
||||
compiler/aco_lower_bool_phis.cpp \
|
||||
compiler/aco_lower_to_cssa.cpp \
|
||||
compiler/aco_lower_to_hw_instr.cpp \
|
||||
compiler/aco_optimizer.cpp \
|
||||
compiler/aco_opt_value_numbering.cpp \
|
||||
|
@@ -114,6 +114,11 @@ unsigned
|
||||
ac_get_tbuffer_format(enum chip_class chip_class,
|
||||
unsigned dfmt, unsigned nfmt)
|
||||
{
|
||||
// Some games try to access vertex buffers without a valid format.
|
||||
// This is a game bug, but we should still handle it gracefully.
|
||||
if (dfmt == V_008F0C_IMG_FORMAT_INVALID)
|
||||
return V_008F0C_IMG_FORMAT_INVALID;
|
||||
|
||||
if (chip_class >= GFX10) {
|
||||
unsigned format;
|
||||
switch (dfmt) {
|
||||
|
@@ -317,6 +317,7 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
|
||||
|
||||
uint32_t img_format = ac_get_tbuffer_format(ctx.chip_class, mtbuf->dfmt, mtbuf->nfmt);
|
||||
uint32_t encoding = (0b111010 << 26);
|
||||
assert(img_format <= 0x7F);
|
||||
assert(!mtbuf->dlc || ctx.chip_class >= GFX10);
|
||||
encoding |= (mtbuf->dlc ? 1 : 0) << 15; /* DLC bit replaces one bit of the OPCODE on GFX10 */
|
||||
encoding |= (mtbuf->glc ? 1 : 0) << 14;
|
||||
|
@@ -1263,13 +1263,13 @@ setup_isel_context(Program* program,
|
||||
} else if (program->chip_class >= GFX8) {
|
||||
program->physical_sgprs = 800;
|
||||
program->sgpr_alloc_granule = 15;
|
||||
if (options->family == CHIP_TONGA || options->family == CHIP_ICELAND)
|
||||
program->sgpr_limit = 94; /* workaround hardware bug */
|
||||
else
|
||||
program->sgpr_limit = 102;
|
||||
} else {
|
||||
program->physical_sgprs = 512;
|
||||
program->sgpr_alloc_granule = 7;
|
||||
if (options->family == CHIP_TONGA || options->family == CHIP_ICELAND)
|
||||
program->sgpr_limit = 94; /* workaround hardware bug */
|
||||
else
|
||||
program->sgpr_limit = 104;
|
||||
}
|
||||
/* TODO: we don't have to allocate VCC if we don't need it */
|
||||
|
@@ -172,11 +172,11 @@ bool can_move_instr(aco_ptr<Instruction>& instr, Instruction* current, int movin
|
||||
}
|
||||
}
|
||||
|
||||
bool can_reorder(Instruction* candidate, bool allow_smem)
|
||||
bool can_reorder(Instruction* candidate)
|
||||
{
|
||||
switch (candidate->format) {
|
||||
case Format::SMEM:
|
||||
return allow_smem || static_cast<SMEM_instruction*>(candidate)->can_reorder;
|
||||
return static_cast<SMEM_instruction*>(candidate)->can_reorder;
|
||||
case Format::MUBUF:
|
||||
return static_cast<MUBUF_instruction*>(candidate)->can_reorder;
|
||||
case Format::MIMG:
|
||||
@@ -200,7 +200,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
int window_size = SMEM_WINDOW_SIZE;
|
||||
int max_moves = SMEM_MAX_MOVES;
|
||||
int16_t k = 0;
|
||||
bool can_reorder_cur = can_reorder(current, false);
|
||||
bool can_reorder_cur = can_reorder(current);
|
||||
|
||||
/* don't move s_memtime/s_memrealtime */
|
||||
if (current->opcode == aco_opcode::s_memtime || current->opcode == aco_opcode::s_memrealtime)
|
||||
@@ -224,6 +224,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
|
||||
assert(candidate_idx >= 0);
|
||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||
|
||||
/* break if we'd make the previous SMEM instruction stall */
|
||||
bool can_stall_prev_smem = idx <= ctx.last_SMEM_dep_idx && candidate_idx < ctx.last_SMEM_dep_idx;
|
||||
@@ -231,7 +232,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
break;
|
||||
|
||||
/* break when encountering another MEM instruction, logical_start or barriers */
|
||||
if (!can_reorder(candidate.get(), false) && !can_reorder_cur)
|
||||
if (!can_reorder_candidate && !can_reorder_cur)
|
||||
break;
|
||||
if (candidate->opcode == aco_opcode::p_logical_start)
|
||||
break;
|
||||
@@ -239,6 +240,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
break;
|
||||
if (!can_move_instr(candidate, current, moving_interaction))
|
||||
break;
|
||||
if (candidate->isVMEM())
|
||||
break;
|
||||
register_pressure.update(register_demand[candidate_idx]);
|
||||
|
||||
/* if current depends on candidate, add additional dependencies and continue */
|
||||
@@ -264,6 +267,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
if (op.isTemp())
|
||||
ctx.depends_on[op.tempId()] = true;
|
||||
}
|
||||
can_reorder_cur &= can_reorder_candidate;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -280,6 +284,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
if (op.isTemp())
|
||||
ctx.depends_on[op.tempId()] = true;
|
||||
}
|
||||
can_reorder_cur &= can_reorder_candidate;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -323,12 +328,14 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
insert_idx = idx + 1;
|
||||
moving_interaction = barrier_none;
|
||||
moving_spill = false;
|
||||
can_reorder_cur = true;
|
||||
|
||||
bool found_dependency = false;
|
||||
/* second, check if we have instructions after current to move up */
|
||||
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
|
||||
assert(candidate_idx < (int) block->instructions.size());
|
||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||
|
||||
if (candidate->opcode == aco_opcode::p_logical_end)
|
||||
break;
|
||||
@@ -369,7 +376,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
}
|
||||
}
|
||||
|
||||
if (!can_reorder(candidate.get(), false) && !can_reorder_cur)
|
||||
if (!can_reorder_candidate && !can_reorder_cur)
|
||||
break;
|
||||
|
||||
if (!found_dependency) {
|
||||
@@ -380,8 +387,10 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
/* update register pressure */
|
||||
register_pressure.update(register_demand[candidate_idx - 1]);
|
||||
|
||||
if (is_dependency)
|
||||
if (is_dependency) {
|
||||
can_reorder_cur &= can_reorder_candidate;
|
||||
continue;
|
||||
}
|
||||
assert(insert_idx != idx);
|
||||
|
||||
// TODO: correctly calculate register pressure for this case
|
||||
@@ -392,6 +401,8 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
register_pressure_unknown = true;
|
||||
}
|
||||
if (register_pressure_unknown) {
|
||||
if (candidate->isVMEM())
|
||||
break;
|
||||
for (const Definition& def : candidate->definitions) {
|
||||
if (def.isTemp())
|
||||
ctx.RAR_dependencies[def.tempId()] = true;
|
||||
@@ -400,6 +411,7 @@ void schedule_SMEM(sched_ctx& ctx, Block* block,
|
||||
if (op.isTemp())
|
||||
ctx.RAR_dependencies[op.tempId()] = true;
|
||||
}
|
||||
can_reorder_cur &= can_reorder_candidate;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -440,7 +452,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
int max_moves = VMEM_MAX_MOVES;
|
||||
int clause_max_grab_dist = VMEM_CLAUSE_MAX_GRAB_DIST;
|
||||
int16_t k = 0;
|
||||
bool can_reorder_cur = can_reorder(current, false);
|
||||
/* initially true as we don't pull other VMEM instructions
|
||||
* through the current instruction */
|
||||
bool can_reorder_vmem = true;
|
||||
bool can_reorder_smem = true;
|
||||
|
||||
/* create the initial set of values which current depends on */
|
||||
std::fill(ctx.depends_on.begin(), ctx.depends_on.end(), false);
|
||||
@@ -467,9 +482,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
for (int candidate_idx = idx - 1; k < max_moves && candidate_idx > (int) idx - window_size; candidate_idx--) {
|
||||
assert(candidate_idx >= 0);
|
||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||
|
||||
/* break when encountering another VMEM instruction, logical_start or barriers */
|
||||
if (!can_reorder(candidate.get(), true) && !can_reorder_cur)
|
||||
if (!can_reorder_smem && candidate->format == Format::SMEM && !can_reorder_candidate)
|
||||
break;
|
||||
if (candidate->opcode == aco_opcode::p_logical_start)
|
||||
break;
|
||||
@@ -487,10 +503,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
bool part_of_clause = false;
|
||||
if (candidate->isVMEM()) {
|
||||
bool same_resource = candidate->operands[1].tempId() == current->operands[1].tempId();
|
||||
bool can_reorder = can_reorder_vmem || can_reorder_candidate;
|
||||
int grab_dist = clause_insert_idx - candidate_idx;
|
||||
/* We can't easily tell how much this will decrease the def-to-use
|
||||
* distances, so just use how far it will be moved as a heuristic. */
|
||||
part_of_clause = same_resource && grab_dist < clause_max_grab_dist;
|
||||
part_of_clause = can_reorder && same_resource && grab_dist < clause_max_grab_dist;
|
||||
}
|
||||
|
||||
/* if current depends on candidate, add additional dependencies and continue */
|
||||
@@ -522,6 +539,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
}
|
||||
}
|
||||
register_pressure_clause.update(register_demand[candidate_idx]);
|
||||
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -555,6 +574,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
}
|
||||
}
|
||||
register_pressure_clause.update(register_demand[candidate_idx]);
|
||||
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -605,12 +626,16 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
int insert_idx = idx;
|
||||
moving_interaction = barrier_none;
|
||||
moving_spill = false;
|
||||
// TODO: differentiate between loads and stores (load-load can always reorder)
|
||||
can_reorder_vmem = true;
|
||||
can_reorder_smem = true;
|
||||
|
||||
bool found_dependency = false;
|
||||
/* second, check if we have instructions after current to move up */
|
||||
for (int candidate_idx = idx + 1; k < max_moves && candidate_idx < (int) idx + window_size; candidate_idx++) {
|
||||
assert(candidate_idx < (int) block->instructions.size());
|
||||
aco_ptr<Instruction>& candidate = block->instructions[candidate_idx];
|
||||
bool can_reorder_candidate = can_reorder(candidate.get());
|
||||
|
||||
if (candidate->opcode == aco_opcode::p_logical_end)
|
||||
break;
|
||||
@@ -623,7 +648,11 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
break;
|
||||
|
||||
/* check if candidate depends on current */
|
||||
bool is_dependency = !can_reorder(candidate.get(), true) && !can_reorder_cur;
|
||||
bool is_dependency = false;
|
||||
if (candidate->format == Format::SMEM)
|
||||
is_dependency = !can_reorder_smem && !can_reorder_candidate;
|
||||
if (candidate->isVMEM())
|
||||
is_dependency = !can_reorder_vmem && !can_reorder_candidate;
|
||||
for (const Operand& op : candidate->operands) {
|
||||
if (op.isTemp() && ctx.depends_on[op.tempId()]) {
|
||||
is_dependency = true;
|
||||
@@ -645,6 +674,10 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
if (op.isTemp())
|
||||
ctx.RAR_dependencies[op.tempId()] = true;
|
||||
}
|
||||
/* update flag whether we can reorder other memory instructions */
|
||||
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||
|
||||
if (!found_dependency) {
|
||||
insert_idx = candidate_idx;
|
||||
found_dependency = true;
|
||||
@@ -652,7 +685,9 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
register_pressure = register_demand[insert_idx - 1];
|
||||
continue;
|
||||
}
|
||||
|
||||
} else if (candidate->isVMEM()) {
|
||||
/* don't move up dependencies of other VMEM instructions */
|
||||
for (const Definition& def : candidate->definitions) {
|
||||
if (def.isTemp())
|
||||
ctx.depends_on[def.tempId()] = true;
|
||||
@@ -681,6 +716,8 @@ void schedule_VMEM(sched_ctx& ctx, Block* block,
|
||||
if (op.isTemp())
|
||||
ctx.RAR_dependencies[op.tempId()] = true;
|
||||
}
|
||||
can_reorder_smem &= candidate->format != Format::SMEM || can_reorder_candidate;
|
||||
can_reorder_vmem &= !candidate->isVMEM() || can_reorder_candidate;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@@ -1291,9 +1291,9 @@ Temp load_scratch_resource(spill_ctx& ctx, Temp& scratch_offset,
|
||||
rsrc_conf |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
}
|
||||
/* older generations need element size = 16 bytes. element size removed in GFX9 */
|
||||
/* older generations need element size = 4 bytes. element size removed in GFX9 */
|
||||
if (ctx.program->chip_class <= GFX8)
|
||||
rsrc_conf |= S_008F0C_ELEMENT_SIZE(3);
|
||||
rsrc_conf |= S_008F0C_ELEMENT_SIZE(1);
|
||||
|
||||
return bld.pseudo(aco_opcode::p_create_vector, bld.def(s4),
|
||||
private_segment_buffer, Operand(-1u),
|
||||
@@ -1530,12 +1530,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
/* spill vgpr */
|
||||
ctx.program->config->spilled_vgprs += (*it)->operands[0].size();
|
||||
uint32_t spill_slot = vgpr_slot[spill_id];
|
||||
bool add_offset = ctx.program->config->scratch_bytes_per_wave + vgpr_spill_slots * 4 > 4096;
|
||||
unsigned base_offset = add_offset ? 0 : ctx.program->config->scratch_bytes_per_wave;
|
||||
bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096;
|
||||
unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||
|
||||
/* check if the scratch resource descriptor already exists */
|
||||
if (scratch_rsrc == Temp()) {
|
||||
unsigned offset = ctx.program->config->scratch_bytes_per_wave - base_offset;
|
||||
unsigned offset = add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0;
|
||||
scratch_rsrc = load_scratch_resource(ctx, scratch_offset,
|
||||
last_top_level_block_idx == block.index ?
|
||||
instructions : ctx.program->blocks[last_top_level_block_idx].instructions,
|
||||
@@ -1544,37 +1544,21 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
}
|
||||
|
||||
unsigned offset = base_offset + spill_slot * 4;
|
||||
aco_opcode opcode;
|
||||
aco_opcode opcode = aco_opcode::buffer_store_dword;
|
||||
assert((*it)->operands[0].isTemp());
|
||||
Temp temp = (*it)->operands[0].getTemp();
|
||||
assert(temp.type() == RegType::vgpr && !temp.is_linear());
|
||||
switch (temp.size()) {
|
||||
case 1: opcode = aco_opcode::buffer_store_dword; break;
|
||||
case 2: opcode = aco_opcode::buffer_store_dwordx2; break;
|
||||
case 6: temp = bld.tmp(v3); /* fallthrough */
|
||||
case 3: opcode = aco_opcode::buffer_store_dwordx3; break;
|
||||
case 8: temp = bld.tmp(v4); /* fallthrough */
|
||||
case 4: opcode = aco_opcode::buffer_store_dwordx4; break;
|
||||
default: {
|
||||
if (temp.size() > 1) {
|
||||
Instruction* split{create_instruction<Pseudo_instruction>(aco_opcode::p_split_vector, Format::PSEUDO, 1, temp.size())};
|
||||
split->operands[0] = Operand(temp);
|
||||
for (unsigned i = 0; i < temp.size(); i++)
|
||||
split->definitions[i] = bld.def(v1);
|
||||
bld.insert(split);
|
||||
opcode = aco_opcode::buffer_store_dword;
|
||||
for (unsigned i = 0; i < temp.size(); i++)
|
||||
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, split->definitions[i].getTemp(), offset + i * 4, false);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if ((*it)->operands[0].size() > 4) {
|
||||
Temp temp2 = bld.pseudo(aco_opcode::p_split_vector, bld.def(temp.regClass()), Definition(temp), (*it)->operands[0]);
|
||||
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp2, offset, false);
|
||||
offset += temp.size() * 4;
|
||||
}
|
||||
} else {
|
||||
bld.mubuf(opcode, Operand(), scratch_rsrc, scratch_offset, temp, offset, false);
|
||||
|
||||
}
|
||||
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
||||
ctx.program->config->spilled_sgprs += (*it)->operands[0].size();
|
||||
|
||||
@@ -1615,12 +1599,12 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
if (vgpr_slot.find(spill_id) != vgpr_slot.end()) {
|
||||
/* reload vgpr */
|
||||
uint32_t spill_slot = vgpr_slot[spill_id];
|
||||
bool add_offset = ctx.program->config->scratch_bytes_per_wave + vgpr_spill_slots * 4 > 4096;
|
||||
unsigned base_offset = add_offset ? 0 : ctx.program->config->scratch_bytes_per_wave;
|
||||
bool add_offset_to_sgpr = ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size + vgpr_spill_slots * 4 > 4096;
|
||||
unsigned base_offset = add_offset_to_sgpr ? 0 : ctx.program->config->scratch_bytes_per_wave / ctx.program->wave_size;
|
||||
|
||||
/* check if the scratch resource descriptor already exists */
|
||||
if (scratch_rsrc == Temp()) {
|
||||
unsigned offset = ctx.program->config->scratch_bytes_per_wave - base_offset;
|
||||
unsigned offset = add_offset_to_sgpr ? ctx.program->config->scratch_bytes_per_wave : 0;
|
||||
scratch_rsrc = load_scratch_resource(ctx, scratch_offset,
|
||||
last_top_level_block_idx == block.index ?
|
||||
instructions : ctx.program->blocks[last_top_level_block_idx].instructions,
|
||||
@@ -1629,35 +1613,20 @@ void assign_spill_slots(spill_ctx& ctx, unsigned spills_to_vgpr) {
|
||||
}
|
||||
|
||||
unsigned offset = base_offset + spill_slot * 4;
|
||||
aco_opcode opcode;
|
||||
aco_opcode opcode = aco_opcode::buffer_load_dword;
|
||||
Definition def = (*it)->definitions[0];
|
||||
switch (def.size()) {
|
||||
case 1: opcode = aco_opcode::buffer_load_dword; break;
|
||||
case 2: opcode = aco_opcode::buffer_load_dwordx2; break;
|
||||
case 6: def = bld.def(v3); /* fallthrough */
|
||||
case 3: opcode = aco_opcode::buffer_load_dwordx3; break;
|
||||
case 8: def = bld.def(v4); /* fallthrough */
|
||||
case 4: opcode = aco_opcode::buffer_load_dwordx4; break;
|
||||
default: {
|
||||
if (def.size() > 1) {
|
||||
Instruction* vec{create_instruction<Pseudo_instruction>(aco_opcode::p_create_vector, Format::PSEUDO, def.size(), 1)};
|
||||
vec->definitions[0] = def;
|
||||
opcode = aco_opcode::buffer_load_dword;
|
||||
for (unsigned i = 0; i < def.size(); i++) {
|
||||
Temp tmp = bld.tmp(v1);
|
||||
vec->operands[i] = Operand(tmp);
|
||||
bld.mubuf(opcode, Definition(tmp), Operand(), scratch_rsrc, scratch_offset, offset + i * 4, false);
|
||||
}
|
||||
bld.insert(vec);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
bld.mubuf(opcode, def, Operand(), scratch_rsrc, scratch_offset, offset, false);
|
||||
if ((*it)->definitions[0].size() > 4) {
|
||||
Temp temp2 = bld.mubuf(opcode, bld.def(def.regClass()), Operand(), scratch_rsrc, scratch_offset, offset + def.size() * 4, false);
|
||||
bld.pseudo(aco_opcode::p_create_vector, (*it)->definitions[0], def.getTemp(), temp2);
|
||||
}
|
||||
|
||||
} else if (sgpr_slot.find(spill_id) != sgpr_slot.end()) {
|
||||
uint32_t spill_slot = sgpr_slot[spill_id];
|
||||
reload_in_loop[spill_slot / 64] = block.loop_nest_depth > 0;
|
||||
|
@@ -25,6 +25,7 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "dirent.h"
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <linux/audit.h>
|
||||
@@ -47,7 +48,6 @@
|
||||
#include "radv_shader.h"
|
||||
#include "radv_cs.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/strtod.h"
|
||||
#include "vk_util.h"
|
||||
#include <xf86drm.h>
|
||||
#include <amdgpu.h>
|
||||
@@ -682,7 +682,6 @@ VkResult radv_CreateInstance(
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
|
||||
instance->engineVersion = engine_version;
|
||||
|
||||
_mesa_locale_init();
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||
@@ -713,7 +712,6 @@ void radv_DestroyInstance(
|
||||
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
||||
|
||||
glsl_type_singleton_decref();
|
||||
_mesa_locale_fini();
|
||||
|
||||
driDestroyOptionCache(&instance->dri_options);
|
||||
driDestroyOptionInfo(&instance->available_dri_options);
|
||||
@@ -2069,25 +2067,61 @@ bool radv_sc_read(int fd, void *buf, size_t size, bool timeout)
|
||||
}
|
||||
}
|
||||
|
||||
static bool radv_close_all_fds(const int *keep_fds, int keep_fd_count)
|
||||
{
|
||||
DIR *d;
|
||||
struct dirent *dir;
|
||||
d = opendir("/proc/self/fd");
|
||||
if (!d)
|
||||
return false;
|
||||
int dir_fd = dirfd(d);
|
||||
|
||||
while ((dir = readdir(d)) != NULL) {
|
||||
if (dir->d_name[0] == '.')
|
||||
continue;
|
||||
|
||||
int fd = atoi(dir->d_name);
|
||||
if (fd == dir_fd)
|
||||
continue;
|
||||
|
||||
bool keep = false;
|
||||
for (int i = 0; !keep && i < keep_fd_count; ++i)
|
||||
if (keep_fds[i] == fd)
|
||||
keep = true;
|
||||
|
||||
if (keep)
|
||||
continue;
|
||||
|
||||
close(fd);
|
||||
}
|
||||
closedir(d);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void run_secure_compile_device(struct radv_device *device, unsigned process,
|
||||
int *fd_secure_input, int *fd_secure_output)
|
||||
int fd_secure_input, int fd_secure_output)
|
||||
{
|
||||
enum radv_secure_compile_type sc_type;
|
||||
if (install_seccomp_filter() == -1) {
|
||||
|
||||
const int needed_fds[] = {
|
||||
fd_secure_input,
|
||||
fd_secure_output,
|
||||
};
|
||||
if (!radv_close_all_fds(needed_fds, ARRAY_SIZE(needed_fds)) || install_seccomp_filter() == -1) {
|
||||
sc_type = RADV_SC_TYPE_INIT_FAILURE;
|
||||
} else {
|
||||
sc_type = RADV_SC_TYPE_INIT_SUCCESS;
|
||||
device->sc_state->secure_compile_processes[process].fd_secure_input = fd_secure_input[0];
|
||||
device->sc_state->secure_compile_processes[process].fd_secure_output = fd_secure_output[1];
|
||||
device->sc_state->secure_compile_processes[process].fd_secure_input = fd_secure_input;
|
||||
device->sc_state->secure_compile_processes[process].fd_secure_output = fd_secure_output;
|
||||
}
|
||||
|
||||
write(fd_secure_output[1], &sc_type, sizeof(sc_type));
|
||||
write(fd_secure_output, &sc_type, sizeof(sc_type));
|
||||
|
||||
if (sc_type == RADV_SC_TYPE_INIT_FAILURE)
|
||||
goto secure_compile_exit;
|
||||
|
||||
while (true) {
|
||||
radv_sc_read(fd_secure_input[0], &sc_type, sizeof(sc_type), false);
|
||||
radv_sc_read(fd_secure_input, &sc_type, sizeof(sc_type), false);
|
||||
|
||||
if (sc_type == RADV_SC_TYPE_COMPILE_PIPELINE) {
|
||||
struct radv_pipeline *pipeline;
|
||||
@@ -2100,20 +2134,20 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
||||
|
||||
/* Read pipeline layout */
|
||||
struct radv_pipeline_layout layout;
|
||||
sc_read = radv_sc_read(fd_secure_input[0], &layout, sizeof(struct radv_pipeline_layout), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &layout.num_sets, sizeof(uint32_t), true);
|
||||
sc_read = radv_sc_read(fd_secure_input, &layout, sizeof(struct radv_pipeline_layout), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &layout.num_sets, sizeof(uint32_t), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
for (uint32_t set = 0; set < layout.num_sets; set++) {
|
||||
uint32_t layout_size;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &layout_size, sizeof(uint32_t), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &layout_size, sizeof(uint32_t), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
layout.set[set].layout = malloc(layout_size);
|
||||
layout.set[set].layout->layout_size = layout_size;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], layout.set[set].layout,
|
||||
sc_read &= radv_sc_read(fd_secure_input, layout.set[set].layout,
|
||||
layout.set[set].layout->layout_size, true);
|
||||
}
|
||||
|
||||
@@ -2121,16 +2155,16 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
||||
|
||||
/* Read pipeline key */
|
||||
struct radv_pipeline_key key;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &key, sizeof(struct radv_pipeline_key), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &key, sizeof(struct radv_pipeline_key), true);
|
||||
|
||||
/* Read pipeline create flags */
|
||||
VkPipelineCreateFlags flags;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &flags, sizeof(VkPipelineCreateFlags), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &flags, sizeof(VkPipelineCreateFlags), true);
|
||||
|
||||
/* Read stage and shader information */
|
||||
uint32_t num_stages;
|
||||
const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, };
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &num_stages, sizeof(uint32_t), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &num_stages, sizeof(uint32_t), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
@@ -2138,33 +2172,33 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
||||
|
||||
/* Read stage */
|
||||
gl_shader_stage stage;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &stage, sizeof(gl_shader_stage), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &stage, sizeof(gl_shader_stage), true);
|
||||
|
||||
VkPipelineShaderStageCreateInfo *pStage = calloc(1, sizeof(VkPipelineShaderStageCreateInfo));
|
||||
|
||||
/* Read entry point name */
|
||||
size_t name_size;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &name_size, sizeof(size_t), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &name_size, sizeof(size_t), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
char *ep_name = malloc(name_size);
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], ep_name, name_size, true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, ep_name, name_size, true);
|
||||
pStage->pName = ep_name;
|
||||
|
||||
/* Read shader module */
|
||||
size_t module_size;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &module_size, sizeof(size_t), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &module_size, sizeof(size_t), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
struct radv_shader_module *module = malloc(module_size);
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], module, module_size, true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, module, module_size, true);
|
||||
pStage->module = radv_shader_module_to_handle(module);
|
||||
|
||||
/* Read specialization info */
|
||||
bool has_spec_info;
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &has_spec_info, sizeof(bool), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &has_spec_info, sizeof(bool), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
@@ -2172,21 +2206,21 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
||||
VkSpecializationInfo *specInfo = malloc(sizeof(VkSpecializationInfo));
|
||||
pStage->pSpecializationInfo = specInfo;
|
||||
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &specInfo->dataSize, sizeof(size_t), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &specInfo->dataSize, sizeof(size_t), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
void *si_data = malloc(specInfo->dataSize);
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], si_data, specInfo->dataSize, true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, si_data, specInfo->dataSize, true);
|
||||
specInfo->pData = si_data;
|
||||
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &specInfo->mapEntryCount, sizeof(uint32_t), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &specInfo->mapEntryCount, sizeof(uint32_t), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
|
||||
VkSpecializationMapEntry *mapEntries = malloc(sizeof(VkSpecializationMapEntry) * specInfo->mapEntryCount);
|
||||
for (uint32_t j = 0; j < specInfo->mapEntryCount; j++) {
|
||||
sc_read &= radv_sc_read(fd_secure_input[0], &mapEntries[j], sizeof(VkSpecializationMapEntry), true);
|
||||
sc_read &= radv_sc_read(fd_secure_input, &mapEntries[j], sizeof(VkSpecializationMapEntry), true);
|
||||
if (!sc_read)
|
||||
goto secure_compile_exit;
|
||||
}
|
||||
@@ -2222,7 +2256,7 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
||||
vk_free(&device->alloc, pipeline);
|
||||
|
||||
sc_type = RADV_SC_TYPE_COMPILE_PIPELINE_FINISHED;
|
||||
write(fd_secure_output[1], &sc_type, sizeof(sc_type));
|
||||
write(fd_secure_output, &sc_type, sizeof(sc_type));
|
||||
|
||||
} else if (sc_type == RADV_SC_TYPE_DESTROY_DEVICE) {
|
||||
goto secure_compile_exit;
|
||||
@@ -2230,10 +2264,8 @@ static void run_secure_compile_device(struct radv_device *device, unsigned proce
|
||||
}
|
||||
|
||||
secure_compile_exit:
|
||||
close(fd_secure_input[1]);
|
||||
close(fd_secure_input[0]);
|
||||
close(fd_secure_output[1]);
|
||||
close(fd_secure_output[0]);
|
||||
close(fd_secure_input);
|
||||
close(fd_secure_output);
|
||||
_exit(0);
|
||||
}
|
||||
|
||||
@@ -2278,7 +2310,7 @@ static VkResult fork_secure_compile_device(struct radv_device *device)
|
||||
for (unsigned process = 0; process < sc_threads; process++) {
|
||||
if ((device->sc_state->secure_compile_processes[process].sc_pid = fork()) == 0) {
|
||||
device->sc_state->secure_compile_thread_counter = process;
|
||||
run_secure_compile_device(device, process, fd_secure_input[process], fd_secure_output[process]);
|
||||
run_secure_compile_device(device, process, fd_secure_input[process][0], fd_secure_output[process][1]);
|
||||
} else {
|
||||
if (device->sc_state->secure_compile_processes[process].sc_pid == -1)
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
|
@@ -4646,10 +4646,10 @@ radv_secure_compile(struct radv_pipeline *pipeline,
|
||||
|
||||
/* Do an early exit if all cache entries are already there. */
|
||||
bool may_need_copy_shader = pStages[MESA_SHADER_GEOMETRY];
|
||||
void *main_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[0], 20);
|
||||
void *main_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[0], NULL);
|
||||
void *copy_entry = NULL;
|
||||
if (may_need_copy_shader)
|
||||
copy_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[1], 20);
|
||||
copy_entry = disk_cache_get(device->physical_device->disk_cache, allowed_hashes[1], NULL);
|
||||
|
||||
bool has_all_cache_entries = main_entry && (!may_need_copy_shader || copy_entry);
|
||||
free(main_entry);
|
||||
@@ -5065,6 +5065,19 @@ radv_compute_generate_pm4(struct radv_pipeline *pipeline)
|
||||
assert(pipeline->cs.cdw <= pipeline->cs.max_dw);
|
||||
}
|
||||
|
||||
static struct radv_pipeline_key
|
||||
radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline,
|
||||
const VkComputePipelineCreateInfo *pCreateInfo)
|
||||
{
|
||||
struct radv_pipeline_key key;
|
||||
memset(&key, 0, sizeof(key));
|
||||
|
||||
if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
|
||||
key.optimisations_disabled = 1;
|
||||
|
||||
return key;
|
||||
}
|
||||
|
||||
static VkResult radv_compute_pipeline_create(
|
||||
VkDevice _device,
|
||||
VkPipelineCache _cache,
|
||||
@@ -5098,13 +5111,16 @@ static VkResult radv_compute_pipeline_create(
|
||||
|
||||
pStages[MESA_SHADER_COMPUTE] = &pCreateInfo->stage;
|
||||
|
||||
struct radv_pipeline_key key =
|
||||
radv_generate_compute_pipeline_key(pipeline, pCreateInfo);
|
||||
|
||||
if (radv_device_use_secure_compile(device->instance)) {
|
||||
result = radv_secure_compile(pipeline, device, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, 1);
|
||||
result = radv_secure_compile(pipeline, device, &key, pStages, pCreateInfo->flags, 1);
|
||||
*pPipeline = radv_pipeline_to_handle(pipeline);
|
||||
|
||||
return result;
|
||||
} else {
|
||||
radv_create_shaders(pipeline, device, cache, &(struct radv_pipeline_key) {0}, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
|
||||
radv_create_shaders(pipeline, device, cache, &key, pStages, pCreateInfo->flags, pipeline_feedback, stage_feedbacks);
|
||||
}
|
||||
|
||||
pipeline->user_data_0[MESA_SHADER_COMPUTE] = radv_pipeline_stage_to_user_data_0(pipeline, MESA_SHADER_COMPUTE, device->physical_device->rad_info.chip_class);
|
||||
|
@@ -1582,7 +1582,7 @@ static bool radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws, const uint32_t *
|
||||
&tmp);
|
||||
if (ret == 0) {
|
||||
return true;
|
||||
} else if (ret == -1 && errno == ETIME) {
|
||||
} else if (ret == -ETIME) {
|
||||
return false;
|
||||
} else {
|
||||
fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
|
||||
|
@@ -301,8 +301,8 @@ class Variable(Value):
|
||||
# constant. If we want to support names that have numeric or
|
||||
# punctuation characters, we can me the first assertion more flexible.
|
||||
assert self.var_name.isalpha()
|
||||
assert self.var_name is not 'True'
|
||||
assert self.var_name is not 'False'
|
||||
assert self.var_name != 'True'
|
||||
assert self.var_name != 'False'
|
||||
|
||||
self.is_constant = m.group('const') is not None
|
||||
self.cond = m.group('cond')
|
||||
|
@@ -5152,6 +5152,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
}
|
||||
|
||||
/* Set shader info defaults */
|
||||
if (stage == MESA_SHADER_GEOMETRY)
|
||||
b->shader->info.gs.invocations = 1;
|
||||
|
||||
/* Parse rounding mode execution modes. This has to happen earlier than
|
||||
|
@@ -138,15 +138,6 @@ _eglNativePlatformDetectNativeDisplay(void *nativeDisplay)
|
||||
if (first_pointer == gbm_create_device)
|
||||
return _EGL_PLATFORM_DRM;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_X11_PLATFORM
|
||||
/* If not matched to any other platform, fallback to x11. */
|
||||
return _EGL_PLATFORM_X11;
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_HAIKU_PLATFORM
|
||||
return _EGL_PLATFORM_HAIKU;
|
||||
#endif
|
||||
}
|
||||
|
||||
return _EGL_INVALID_PLATFORM;
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright © 2017 Intel Corporation
|
||||
# Copyright © 2017-2019 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -149,6 +149,7 @@ if not with_glvnd
|
||||
else
|
||||
egl_lib_name = 'EGL_mesa'
|
||||
egl_lib_version = '0.0.0'
|
||||
deps_for_egl += dep_glvnd
|
||||
files_egl += [g_egldispatchstubs_h, g_egldispatchstubs_c]
|
||||
files_egl += files('main/eglglvnd.c', 'main/egldispatchstubs.c')
|
||||
install_data(
|
||||
|
@@ -39,7 +39,6 @@
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/disk_cache.h"
|
||||
#include "util/strtod.h"
|
||||
#include "vk_format.h"
|
||||
#include "vk_util.h"
|
||||
|
||||
@@ -431,7 +430,6 @@ tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
|
||||
return vk_error(instance, result);
|
||||
}
|
||||
|
||||
_mesa_locale_init();
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||
@@ -457,7 +455,6 @@ tu_DestroyInstance(VkInstance _instance,
|
||||
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
||||
|
||||
glsl_type_singleton_decref();
|
||||
_mesa_locale_fini();
|
||||
|
||||
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
||||
|
||||
|
@@ -470,10 +470,6 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen,
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
case PIPE_SHADER_VERTEX:
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
if (is_a6xx(screen))
|
||||
break;
|
||||
return 0;
|
||||
case PIPE_SHADER_COMPUTE:
|
||||
if (has_compute(screen))
|
||||
break;
|
||||
|
@@ -136,6 +136,7 @@ enum {
|
||||
#define IRIS_DIRTY_VF_STATISTICS (1ull << 57)
|
||||
#define IRIS_DIRTY_PMA_FIX (1ull << 58)
|
||||
#define IRIS_DIRTY_DEPTH_BOUNDS (1ull << 59)
|
||||
#define IRIS_DIRTY_RENDER_BUFFER (1ull << 60)
|
||||
|
||||
#define IRIS_ALL_DIRTY_FOR_COMPUTE (IRIS_DIRTY_CS | \
|
||||
IRIS_DIRTY_SAMPLER_STATES_CS | \
|
||||
@@ -151,7 +152,8 @@ enum {
|
||||
IRIS_DIRTY_BINDINGS_TES | \
|
||||
IRIS_DIRTY_BINDINGS_GS | \
|
||||
IRIS_DIRTY_BINDINGS_FS | \
|
||||
IRIS_DIRTY_BINDINGS_CS)
|
||||
IRIS_DIRTY_BINDINGS_CS | \
|
||||
IRIS_DIRTY_RENDER_BUFFER)
|
||||
|
||||
/**
|
||||
* Non-orthogonal state (NOS) dependency flags.
|
||||
|
@@ -3023,31 +3023,14 @@ iris_set_framebuffer_state(struct pipe_context *ctx,
|
||||
/* Render target change */
|
||||
ice->state.dirty |= IRIS_DIRTY_BINDINGS_FS;
|
||||
|
||||
ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
|
||||
|
||||
ice->state.dirty |= IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES;
|
||||
|
||||
ice->state.dirty |= ice->state.dirty_for_nos[IRIS_NOS_FRAMEBUFFER];
|
||||
|
||||
if (GEN_GEN == 8)
|
||||
ice->state.dirty |= IRIS_DIRTY_PMA_FIX;
|
||||
|
||||
#if GEN_GEN == 11
|
||||
// XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?)
|
||||
// XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6
|
||||
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target Message
|
||||
* points to a different RENDER_SURFACE_STATE, SW must issue a Render
|
||||
* Target Cache Flush by enabling this bit. When render target flush
|
||||
* is set due to new association of BTI, PS Scoreboard Stall bit must
|
||||
* be set in this packet."
|
||||
*/
|
||||
// XXX: does this need to happen at 3DSTATE_BTP_PS time?
|
||||
iris_emit_pipe_control_flush(&ice->batches[IRIS_BATCH_RENDER],
|
||||
"workaround: RT BTI change [draw]",
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
#endif
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -5297,6 +5280,24 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
||||
}
|
||||
}
|
||||
|
||||
if (GEN_GEN >= 11 && (dirty & IRIS_DIRTY_RENDER_BUFFER)) {
|
||||
// XXX: we may want to flag IRIS_DIRTY_MULTISAMPLE (or SAMPLE_MASK?)
|
||||
// XXX: see commit 979fc1bc9bcc64027ff2cfafd285676f31b930a6
|
||||
|
||||
/* The PIPE_CONTROL command description says:
|
||||
*
|
||||
* "Whenever a Binding Table Index (BTI) used by a Render Target
|
||||
* Message points to a different RENDER_SURFACE_STATE, SW must issue a
|
||||
* Render Target Cache Flush by enabling this bit. When render target
|
||||
* flush is set due to new association of BTI, PS Scoreboard Stall bit
|
||||
* must be set in this packet."
|
||||
*/
|
||||
// XXX: does this need to happen at 3DSTATE_BTP_PS time?
|
||||
iris_emit_pipe_control_flush(batch, "workaround: RT BTI change [draw]",
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
}
|
||||
|
||||
for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
|
||||
if (dirty & (IRIS_DIRTY_BINDINGS_VS << stage)) {
|
||||
iris_populate_binding_table(ice, batch, stage, false);
|
||||
@@ -5508,7 +5509,7 @@ iris_upload_dirty_render_state(struct iris_context *ice,
|
||||
BRW_BARYCENTRIC_NONPERSPECTIVE_BITS)
|
||||
cl.NonPerspectiveBarycentricEnable = true;
|
||||
|
||||
cl.ForceZeroRTAIndexEnable = cso_fb->layers == 0;
|
||||
cl.ForceZeroRTAIndexEnable = cso_fb->layers <= 1;
|
||||
cl.MaximumVPIndex = ice->state.num_viewports - 1;
|
||||
}
|
||||
iris_emit_merge(batch, cso_rast->clip, dynamic_clip,
|
||||
|
@@ -122,6 +122,8 @@ private:
|
||||
void emitSAM();
|
||||
void emitRAM();
|
||||
|
||||
void emitPSETP();
|
||||
|
||||
void emitMOV();
|
||||
void emitS2R();
|
||||
void emitCS2R();
|
||||
@@ -690,6 +692,31 @@ CodeEmitterGM107::emitRAM()
|
||||
* predicate/cc
|
||||
******************************************************************************/
|
||||
|
||||
void
|
||||
CodeEmitterGM107::emitPSETP()
|
||||
{
|
||||
|
||||
emitInsn(0x50900000);
|
||||
|
||||
switch (insn->op) {
|
||||
case OP_AND: emitField(0x18, 3, 0); break;
|
||||
case OP_OR: emitField(0x18, 3, 1); break;
|
||||
case OP_XOR: emitField(0x18, 3, 2); break;
|
||||
default:
|
||||
assert(!"unexpected operation");
|
||||
break;
|
||||
}
|
||||
|
||||
// emitINV (0x2a);
|
||||
emitPRED(0x27); // TODO: support 3-arg
|
||||
emitINV (0x20, insn->src(1));
|
||||
emitPRED(0x1d, insn->src(1));
|
||||
emitINV (0x0f, insn->src(0));
|
||||
emitPRED(0x0c, insn->src(0));
|
||||
emitPRED(0x03, insn->def(0));
|
||||
emitPRED(0x00);
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* movement / conversion
|
||||
******************************************************************************/
|
||||
@@ -3557,7 +3584,12 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
|
||||
case OP_AND:
|
||||
case OP_OR:
|
||||
case OP_XOR:
|
||||
emitLOP();
|
||||
switch (insn->def(0).getFile()) {
|
||||
case FILE_GPR: emitLOP(); break;
|
||||
case FILE_PREDICATE: emitPSETP(); break;
|
||||
default:
|
||||
assert(!"invalid bool op");
|
||||
}
|
||||
break;
|
||||
case OP_NOT:
|
||||
emitNOT();
|
||||
|
@@ -1591,6 +1591,12 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
|
||||
if (insn.getOpcode() == TGSI_OPCODE_STORE &&
|
||||
dst.getFile() != TGSI_FILE_MEMORY) {
|
||||
info->io.globalAccess |= 0x2;
|
||||
|
||||
if (dst.getFile() == TGSI_FILE_INPUT) {
|
||||
// TODO: Handle indirect somehow?
|
||||
const int i = dst.getIndex(0);
|
||||
info->in[i].mask |= 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (dst.getFile() == TGSI_FILE_OUTPUT) {
|
||||
|
@@ -1802,6 +1802,9 @@ NVC0LoweringPass::loadSuInfo32(Value *ptr, int slot, uint32_t off, bool bindless
|
||||
{
|
||||
uint32_t base = slot * NVC0_SU_INFO__STRIDE;
|
||||
|
||||
// We don't upload surface info for bindless for GM107+
|
||||
assert(!bindless || targ->getChipset() < NVISA_GM107_CHIPSET);
|
||||
|
||||
if (ptr) {
|
||||
ptr = bld.mkOp2v(OP_ADD, TYPE_U32, bld.getSSA(), ptr, bld.mkImm(slot));
|
||||
if (bindless)
|
||||
@@ -2204,7 +2207,7 @@ getDestType(const ImgType type) {
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
|
||||
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su, Instruction **loaded)
|
||||
{
|
||||
const TexInstruction::ImgFormatDesc *format = su->tex.format;
|
||||
int width = format->bits[0] + format->bits[1] +
|
||||
@@ -2223,21 +2226,38 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
|
||||
if (width < 32)
|
||||
untypedDst[0] = bld.getSSA();
|
||||
|
||||
if (loaded && loaded[0]) {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (loaded[i])
|
||||
typedDst[i] = loaded[i]->getDef(0);
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++) {
|
||||
typedDst[i] = su->getDef(i);
|
||||
}
|
||||
}
|
||||
|
||||
// Set the untyped dsts as the su's destinations
|
||||
if (loaded && loaded[0]) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
if (loaded[i])
|
||||
loaded[i]->setDef(0, untypedDst[i]);
|
||||
} else {
|
||||
for (int i = 0; i < 4; i++)
|
||||
su->setDef(i, untypedDst[i]);
|
||||
|
||||
bld.setPosition(su, true);
|
||||
}
|
||||
|
||||
// Unpack each component into the typed dsts
|
||||
int bits = 0;
|
||||
for (int i = 0; i < 4; bits += format->bits[i], i++) {
|
||||
if (!typedDst[i])
|
||||
continue;
|
||||
|
||||
if (loaded && loaded[0])
|
||||
bld.setPosition(loaded[i], true);
|
||||
|
||||
if (i >= format->components) {
|
||||
if (format->type == FLOAT ||
|
||||
format->type == UNORM ||
|
||||
@@ -2308,7 +2328,7 @@ NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
|
||||
processSurfaceCoordsNVE4(su);
|
||||
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
convertSurfaceFormat(su, NULL);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
@@ -2421,7 +2441,7 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
|
||||
processSurfaceCoordsNVC0(su);
|
||||
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
convertSurfaceFormat(su, NULL);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
@@ -2463,14 +2483,16 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
|
||||
TexInstruction *
|
||||
NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su, Instruction *ret[4])
|
||||
{
|
||||
const int slot = su->tex.r;
|
||||
const int dim = su->tex.target.getDim();
|
||||
const int arg = dim + (su->tex.target.isArray() || su->tex.target.isCube());
|
||||
const bool array = su->tex.target.isArray() || su->tex.target.isCube();
|
||||
const int arg = dim + array;
|
||||
Value *ind = su->getIndirectR();
|
||||
Value *handle;
|
||||
Instruction *pred = NULL, *pred2d = NULL;
|
||||
int pos = 0;
|
||||
|
||||
bld.setPosition(su, false);
|
||||
@@ -2489,19 +2511,38 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
|
||||
assert(pos == 0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (dim == 2 && !array) {
|
||||
// This might be a 2d slice of a 3d texture, try to load the z
|
||||
// coordinate in.
|
||||
Value *v;
|
||||
if (!su->tex.bindless)
|
||||
v = loadSuInfo32(ind, slot, NVC0_SU_INFO_UNK1C, su->tex.bindless);
|
||||
else
|
||||
v = bld.mkOp2v(OP_SHR, TYPE_U32, bld.getSSA(), ind, bld.mkImm(11));
|
||||
Value *is_3d = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), v, bld.mkImm(1));
|
||||
pred2d = bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
|
||||
TYPE_U32, bld.mkImm(0), is_3d);
|
||||
|
||||
bld.mkOp2(OP_SHR, TYPE_U32, v, v, bld.loadImm(NULL, 16));
|
||||
su->moveSources(dim, 1);
|
||||
su->setSrc(dim, v);
|
||||
su->tex.target = nv50_ir::TEX_TARGET_3D;
|
||||
pos++;
|
||||
}
|
||||
|
||||
if (su->tex.bindless)
|
||||
handle = ind;
|
||||
handle = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), ind, bld.mkImm(2047));
|
||||
else
|
||||
handle = loadTexHandle(ind, slot + 32);
|
||||
|
||||
su->setSrc(arg + pos, handle);
|
||||
|
||||
// The address check doesn't make sense here. The format check could make
|
||||
// sense but it's a bit of a pain.
|
||||
if (su->tex.bindless)
|
||||
return;
|
||||
|
||||
if (!su->tex.bindless) {
|
||||
// prevent read fault when the image is not actually bound
|
||||
CmpInstruction *pred =
|
||||
pred =
|
||||
bld.mkCmp(OP_SET, CC_EQ, TYPE_U32, bld.getSSA(1, FILE_PREDICATE),
|
||||
TYPE_U32, bld.mkImm(0),
|
||||
loadSuInfo32(ind, slot, NVC0_SU_INFO_ADDR, su->tex.bindless));
|
||||
@@ -2517,39 +2558,106 @@ NVC0LoweringPass::processSurfaceCoordsGM107(TexInstruction *su)
|
||||
loadSuInfo32(ind, slot, NVC0_SU_INFO_BSIZE, su->tex.bindless),
|
||||
pred->getDef(0));
|
||||
}
|
||||
}
|
||||
|
||||
// Now we have "pred" which (optionally) contains whether to do the surface
|
||||
// op at all, and a "pred2d" which indicates that, in case of doing the
|
||||
// surface op, we have to create a 2d and 3d version, conditioned on pred2d.
|
||||
TexInstruction *su2d = NULL;
|
||||
if (pred2d) {
|
||||
su2d = cloneForward(func, su)->asTex();
|
||||
for (unsigned i = 0; su->defExists(i); ++i)
|
||||
su2d->setDef(i, bld.getSSA());
|
||||
su2d->moveSources(dim + 1, -1);
|
||||
su2d->tex.target = nv50_ir::TEX_TARGET_2D;
|
||||
}
|
||||
if (pred2d && pred) {
|
||||
Instruction *pred3d = bld.mkOp2(OP_AND, TYPE_U8,
|
||||
bld.getSSA(1, FILE_PREDICATE),
|
||||
pred->getDef(0), pred2d->getDef(0));
|
||||
pred3d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
|
||||
pred3d->src(1).mod = Modifier(NV50_IR_MOD_NOT);
|
||||
su->setPredicate(CC_P, pred3d->getDef(0));
|
||||
pred2d = bld.mkOp2(OP_AND, TYPE_U8, bld.getSSA(1, FILE_PREDICATE),
|
||||
pred->getDef(0), pred2d->getDef(0));
|
||||
pred2d->src(0).mod = Modifier(NV50_IR_MOD_NOT);
|
||||
} else if (pred) {
|
||||
su->setPredicate(CC_NOT_P, pred->getDef(0));
|
||||
} else if (pred2d) {
|
||||
su->setPredicate(CC_NOT_P, pred2d->getDef(0));
|
||||
}
|
||||
if (su2d) {
|
||||
su2d->setPredicate(CC_P, pred2d->getDef(0));
|
||||
bld.insert(su2d);
|
||||
|
||||
// Create a UNION so that RA assigns the same registers
|
||||
bld.setPosition(su, true);
|
||||
for (unsigned i = 0; su->defExists(i); ++i) {
|
||||
assert(i < 4);
|
||||
|
||||
ValueDef &def = su->def(i);
|
||||
ValueDef &def2 = su2d->def(i);
|
||||
Instruction *mov = NULL;
|
||||
|
||||
if (pred) {
|
||||
mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||
mov->setPredicate(CC_P, pred->getDef(0));
|
||||
}
|
||||
|
||||
Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
|
||||
bld.getSSA(),
|
||||
NULL, def2.get());
|
||||
def.replace(uni->getDef(0), false);
|
||||
uni->setSrc(0, def.get());
|
||||
if (mov)
|
||||
uni->setSrc(2, mov->getDef(0));
|
||||
}
|
||||
} else if (pred) {
|
||||
// Create a UNION so that RA assigns the same registers
|
||||
bld.setPosition(su, true);
|
||||
for (unsigned i = 0; su->defExists(i); ++i) {
|
||||
assert(i < 4);
|
||||
|
||||
ValueDef &def = su->def(i);
|
||||
|
||||
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||
mov->setPredicate(CC_P, pred->getDef(0));
|
||||
|
||||
Instruction *uni = ret[i] = bld.mkOp2(OP_UNION, TYPE_U32,
|
||||
bld.getSSA(),
|
||||
NULL, mov->getDef(0));
|
||||
def.replace(uni->getDef(0), false);
|
||||
uni->setSrc(0, def.get());
|
||||
}
|
||||
}
|
||||
|
||||
return su2d;
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
|
||||
{
|
||||
processSurfaceCoordsGM107(su);
|
||||
// processSurfaceCoords also takes care of fixing up the outputs and
|
||||
// union'ing them with 0 as necessary. Additionally it may create a second
|
||||
// surface which needs some of the similar fixups.
|
||||
|
||||
Instruction *loaded[4] = {};
|
||||
TexInstruction *su2 = processSurfaceCoordsGM107(su, loaded);
|
||||
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
convertSurfaceFormat(su, loaded);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDP) {
|
||||
Value *def = su->getDef(0);
|
||||
|
||||
su->op = OP_SUREDB;
|
||||
|
||||
// There may not be a predicate in the bindless case.
|
||||
if (su->getPredicate()) {
|
||||
su->setDef(0, bld.getSSA());
|
||||
|
||||
bld.setPosition(su, true);
|
||||
|
||||
// make sure to initialize dst value when the atomic operation is not
|
||||
// performed
|
||||
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||
|
||||
assert(su->cc == CC_NOT_P);
|
||||
mov->setPredicate(CC_P, su->getPredicate());
|
||||
|
||||
bld.mkOp2(OP_UNION, TYPE_U32, def, su->getDef(0), mov->getDef(0));
|
||||
}
|
||||
|
||||
// If we fixed up the type of the regular surface load instruction, we also
|
||||
// have to fix up the copy.
|
||||
if (su2) {
|
||||
su2->op = su->op;
|
||||
su2->dType = su->dType;
|
||||
su2->sType = su->sType;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -171,10 +171,10 @@ private:
|
||||
Value *loadMsInfo32(Value *ptr, uint32_t off);
|
||||
|
||||
void adjustCoordinatesMS(TexInstruction *);
|
||||
void processSurfaceCoordsGM107(TexInstruction *);
|
||||
TexInstruction *processSurfaceCoordsGM107(TexInstruction *, Instruction *[4]);
|
||||
void processSurfaceCoordsNVE4(TexInstruction *);
|
||||
void processSurfaceCoordsNVC0(TexInstruction *);
|
||||
void convertSurfaceFormat(TexInstruction *);
|
||||
void convertSurfaceFormat(TexInstruction *, Instruction **);
|
||||
void insertOOBSurfaceOpResult(TexInstruction *);
|
||||
Value *calculateSampleOffset(Value *sampleID);
|
||||
|
||||
|
@@ -1433,7 +1433,15 @@ gm107_create_image_handle(struct pipe_context *pipe,
|
||||
|
||||
nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
|
||||
|
||||
return 0x100000000ULL | tic->id;
|
||||
// Compute handle. This will include the TIC as well as some additional
|
||||
// info regarding the bound 3d surface layer, if applicable.
|
||||
uint64_t handle = 0x100000000ULL | tic->id;
|
||||
struct nv04_resource *res = nv04_resource(view->resource);
|
||||
if (res->base.target == PIPE_TEXTURE_3D) {
|
||||
handle |= 1 << 11;
|
||||
handle |= view->u.tex.first_layer << (11 + 16);
|
||||
}
|
||||
return handle;
|
||||
|
||||
fail:
|
||||
FREE(tic);
|
||||
|
@@ -903,6 +903,10 @@ static void si_disk_cache_create(struct si_screen *sscreen)
|
||||
/* These flags affect shader compilation. */
|
||||
#define ALL_FLAGS (DBG(SI_SCHED) | DBG(GISEL))
|
||||
uint64_t shader_debug_flags = sscreen->debug_flags & ALL_FLAGS;
|
||||
/* Reserve left-most bit for tgsi/nir selector */
|
||||
assert(!(shader_debug_flags & (1u << 31)));
|
||||
shader_debug_flags |= (uint32_t)
|
||||
((sscreen->options.enable_nir & 0x1) << 31);
|
||||
|
||||
/* Add the high bits of 32-bit addresses, which affects
|
||||
* how 32-bit addresses are expanded to 64 bits.
|
||||
@@ -1026,6 +1030,13 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
{
|
||||
#define OPT_BOOL(name, dflt, description) \
|
||||
sscreen->options.name = \
|
||||
driQueryOptionb(config->options, "radeonsi_"#name);
|
||||
#include "si_debug_options.h"
|
||||
}
|
||||
|
||||
si_disk_cache_create(sscreen);
|
||||
|
||||
/* Determine the number of shader compiler threads. */
|
||||
@@ -1146,13 +1157,6 @@ radeonsi_screen_create_impl(struct radeon_winsys *ws,
|
||||
sscreen->commutative_blend_add =
|
||||
driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
|
||||
|
||||
{
|
||||
#define OPT_BOOL(name, dflt, description) \
|
||||
sscreen->options.name = \
|
||||
driQueryOptionb(config->options, "radeonsi_"#name);
|
||||
#include "si_debug_options.h"
|
||||
}
|
||||
|
||||
sscreen->use_ngg = sscreen->info.chip_class >= GFX10 &&
|
||||
sscreen->info.family != CHIP_NAVI14 &&
|
||||
!(sscreen->debug_flags & DBG(NO_NGG));
|
||||
|
@@ -1231,6 +1231,14 @@ swr_update_derived(struct pipe_context *pipe,
|
||||
util_viewport_zmin_zmax(state, rasterizer->clip_halfz,
|
||||
&vp->minZ, &vp->maxZ);
|
||||
|
||||
if (rasterizer->depth_clip_near) {
|
||||
vp->minZ = 0.0f;
|
||||
}
|
||||
|
||||
if (rasterizer->depth_clip_far) {
|
||||
vp->maxZ = 1.0f;
|
||||
}
|
||||
|
||||
vpm->m00[i] = state->scale[0];
|
||||
vpm->m11[i] = state->scale[1];
|
||||
vpm->m22[i] = state->scale[2];
|
||||
|
@@ -488,9 +488,10 @@ get_render_pass(struct zink_context *ctx)
|
||||
struct zink_render_pass_state state;
|
||||
|
||||
for (int i = 0; i < fb->nr_cbufs; i++) {
|
||||
struct zink_resource *cbuf = zink_resource(fb->cbufs[i]->texture);
|
||||
state.rts[i].format = cbuf->format;
|
||||
state.rts[i].samples = cbuf->base.nr_samples > 0 ? cbuf->base.nr_samples : VK_SAMPLE_COUNT_1_BIT;
|
||||
struct pipe_resource *res = fb->cbufs[i]->texture;
|
||||
state.rts[i].format = zink_get_format(screen, fb->cbufs[i]->format);
|
||||
state.rts[i].samples = res->nr_samples > 0 ? res->nr_samples :
|
||||
VK_SAMPLE_COUNT_1_BIT;
|
||||
}
|
||||
state.num_cbufs = fb->nr_cbufs;
|
||||
|
||||
@@ -993,6 +994,25 @@ get_gfx_program(struct zink_context *ctx)
|
||||
return ctx->curr_program;
|
||||
}
|
||||
|
||||
static bool
|
||||
line_width_needed(enum pipe_prim_type reduced_prim,
|
||||
VkPolygonMode polygon_mode)
|
||||
{
|
||||
switch (reduced_prim) {
|
||||
case PIPE_PRIM_POINTS:
|
||||
return false;
|
||||
|
||||
case PIPE_PRIM_LINES:
|
||||
return true;
|
||||
|
||||
case PIPE_PRIM_TRIANGLES:
|
||||
return polygon_mode == VK_POLYGON_MODE_LINE;
|
||||
|
||||
default:
|
||||
unreachable("unexpected reduced prim");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
zink_draw_vbo(struct pipe_context *pctx,
|
||||
const struct pipe_draw_info *dinfo)
|
||||
@@ -1156,7 +1176,7 @@ zink_draw_vbo(struct pipe_context *pctx,
|
||||
vkCmdSetScissor(batch->cmdbuf, 0, 1, &fb_scissor);
|
||||
}
|
||||
|
||||
if (reduced_prim == PIPE_PRIM_LINES) {
|
||||
if (line_width_needed(reduced_prim, rast_state->hw_state.polygon_mode)) {
|
||||
if (screen->feats.wideLines || ctx->line_width == 1.0f)
|
||||
vkCmdSetLineWidth(batch->cmdbuf, ctx->line_width);
|
||||
else
|
||||
@@ -1294,6 +1314,10 @@ blit_native(struct zink_context *ctx, const struct pipe_blit_info *info)
|
||||
zink_batch_reference_resoure(batch, src);
|
||||
zink_batch_reference_resoure(batch, dst);
|
||||
|
||||
if (src->layout != VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL)
|
||||
zink_resource_barrier(batch->cmdbuf, src, src->aspect,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
|
||||
|
||||
if (dst->layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL)
|
||||
zink_resource_barrier(batch->cmdbuf, dst, dst->aspect,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
|
||||
|
@@ -137,6 +137,7 @@ resource_create(struct pipe_screen *pscreen,
|
||||
|
||||
VkImageCreateInfo ici = {};
|
||||
ici.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
|
||||
ici.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
|
||||
|
||||
switch (templ->target) {
|
||||
case PIPE_TEXTURE_1D:
|
||||
@@ -146,7 +147,7 @@ resource_create(struct pipe_screen *pscreen,
|
||||
|
||||
case PIPE_TEXTURE_CUBE:
|
||||
case PIPE_TEXTURE_CUBE_ARRAY:
|
||||
ici.flags = VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
|
||||
ici.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
|
||||
/* fall-through */
|
||||
case PIPE_TEXTURE_2D:
|
||||
case PIPE_TEXTURE_2D_ARRAY:
|
||||
@@ -157,7 +158,7 @@ resource_create(struct pipe_screen *pscreen,
|
||||
case PIPE_TEXTURE_3D:
|
||||
ici.imageType = VK_IMAGE_TYPE_3D;
|
||||
if (templ->bind & PIPE_BIND_RENDER_TARGET)
|
||||
ici.flags = VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||
ici.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
|
||||
break;
|
||||
|
||||
case PIPE_BUFFER:
|
||||
|
@@ -125,6 +125,8 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
return 1;
|
||||
|
||||
case PIPE_CAP_FRAGMENT_SHADER_TEXTURE_LOD:
|
||||
return 0; /* TODO: re-enable after implementing nir_texop_txd */
|
||||
|
||||
case PIPE_CAP_FRAGMENT_SHADER_DERIVATIVES:
|
||||
case PIPE_CAP_VERTEX_SHADER_SATURATE:
|
||||
return 1;
|
||||
@@ -284,7 +286,7 @@ zink_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
|
||||
return 1;
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_NIR_COMPACT_ARRAYS:
|
||||
return 1;
|
||||
@@ -549,7 +551,7 @@ static const VkFormat formats[PIPE_FORMAT_COUNT] = {
|
||||
[PIPE_FORMAT_Z32_FLOAT] = VK_FORMAT_D32_SFLOAT,
|
||||
[PIPE_FORMAT_Z32_FLOAT_S8X24_UINT] = VK_FORMAT_D32_SFLOAT_S8_UINT,
|
||||
[PIPE_FORMAT_Z16_UNORM] = VK_FORMAT_D16_UNORM,
|
||||
[PIPE_FORMAT_X8Z24_UNORM] = VK_FORMAT_X8_D24_UNORM_PACK32,
|
||||
[PIPE_FORMAT_Z24X8_UNORM] = VK_FORMAT_X8_D24_UNORM_PACK32,
|
||||
[PIPE_FORMAT_Z24_UNORM_S8_UINT] = VK_FORMAT_D24_UNORM_S8_UINT,
|
||||
|
||||
// compressed formats
|
||||
|
@@ -940,7 +940,7 @@ dri2_create_image_from_fd(__DRIscreen *_screen,
|
||||
whandles[i].stride = (unsigned)strides[index];
|
||||
whandles[i].offset = (unsigned)offsets[index];
|
||||
whandles[i].modifier = modifier;
|
||||
whandles[i].plane = i;
|
||||
whandles[i].plane = index;
|
||||
}
|
||||
|
||||
img = dri2_create_image_from_winsys(_screen, width, height, use, map,
|
||||
|
@@ -32,6 +32,7 @@ libgraw_gdi = shared_library(
|
||||
dependencies : [
|
||||
dep_ws2_32, idep_mesautil, driver_swrast,
|
||||
],
|
||||
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libgraw.dll
|
||||
)
|
||||
|
||||
libgraw = libgraw_gdi
|
||||
|
@@ -32,6 +32,7 @@ libgraw_null = shared_library(
|
||||
include_directories : inc_common,
|
||||
link_with : libgallium,
|
||||
dependencies : idep_mesautil,
|
||||
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libgraw_null.dll
|
||||
)
|
||||
|
||||
libgraw = libgraw_null
|
||||
|
@@ -58,6 +58,7 @@ libosmesa = shared_library(
|
||||
dep_ws2_32, dep_selinux, dep_thread, dep_clock, dep_unwind,
|
||||
driver_swrast, driver_swr,
|
||||
],
|
||||
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libosmesa.dll
|
||||
soversion : host_machine.system() == 'windows' ? '' : '8',
|
||||
version : '8.0.0',
|
||||
install : true,
|
||||
|
@@ -47,6 +47,15 @@ endif
|
||||
|
||||
pipe_loader_install_dir = join_paths(get_option('libdir'), 'gallium-pipe')
|
||||
|
||||
_kmsro_targets = [
|
||||
driver_kmsro, driver_v3d, driver_vc4, driver_freedreno, driver_etnaviv,
|
||||
driver_panfrost, driver_lima,
|
||||
]
|
||||
|
||||
if with_gallium_v3d
|
||||
_kmsro_targets += [idep_xmlconfig, dep_expat]
|
||||
endif
|
||||
|
||||
pipe_loaders = [
|
||||
[with_gallium_i915, 'i915', driver_i915, []],
|
||||
[with_gallium_nouveau, 'nouveau', driver_nouveau, []],
|
||||
@@ -54,7 +63,7 @@ pipe_loaders = [
|
||||
[with_gallium_r600, 'r600', driver_r600, []],
|
||||
[with_gallium_radeonsi, 'radeonsi', [driver_radeonsi, idep_xmlconfig], []],
|
||||
[with_gallium_freedreno, 'msm', driver_freedreno, []],
|
||||
[with_gallium_panfrost, 'kmsro', [driver_kmsro, driver_panfrost], []],
|
||||
[with_gallium_kmsro, 'kmsro', _kmsro_targets, []],
|
||||
[with_gallium_svga, 'vmwgfx', driver_svga, []],
|
||||
[with_gallium_softpipe, 'swrast', [driver_swrast, driver_swr], [libwsw, libws_null]],
|
||||
]
|
||||
|
@@ -736,8 +736,7 @@ namespace brw {
|
||||
src_reg
|
||||
fix_byte_src(const src_reg &src) const
|
||||
{
|
||||
if ((shader->devinfo->gen < 11 && !shader->devinfo->is_geminilake) ||
|
||||
type_sz(src.type) != 1)
|
||||
if (shader->devinfo->gen < 11 || type_sz(src.type) != 1)
|
||||
return src;
|
||||
|
||||
dst_reg temp = vgrf(src.type == BRW_REGISTER_TYPE_UB ?
|
||||
|
@@ -1505,8 +1505,15 @@ generate_code(struct brw_codegen *p,
|
||||
bool debug_flag = INTEL_DEBUG &
|
||||
intel_debug_flag_for_shader_stage(nir->info.stage);
|
||||
struct disasm_info *disasm_info = disasm_initialize(devinfo, cfg);
|
||||
|
||||
/* `send_count` explicitly does not include spills or fills, as we'd
|
||||
* like to use it as a metric for intentional memory access or other
|
||||
* shared function use. Otherwise, subtle changes to scheduling or
|
||||
* register allocation could cause it to fluctuate wildly - and that
|
||||
* effect is already counted in spill/fill counts.
|
||||
*/
|
||||
int spill_count = 0, fill_count = 0;
|
||||
int loop_count = 0;
|
||||
int loop_count = 0, send_count = 0;
|
||||
|
||||
foreach_block_and_inst (block, vec4_instruction, inst, cfg) {
|
||||
struct brw_reg src[3], dst;
|
||||
@@ -1746,6 +1753,7 @@ generate_code(struct brw_codegen *p,
|
||||
generate_math_gen6(p, inst, dst, src[0], brw_null_reg());
|
||||
} else {
|
||||
generate_math1_gen4(p, inst, dst, src[0]);
|
||||
send_count++;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -1759,6 +1767,7 @@ generate_code(struct brw_codegen *p,
|
||||
generate_math_gen6(p, inst, dst, src[0], src[1]);
|
||||
} else {
|
||||
generate_math2_gen4(p, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -1775,14 +1784,17 @@ generate_code(struct brw_codegen *p,
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
generate_tex(p, prog_data, nir->info.stage,
|
||||
inst, dst, src[0], src[1], src[2]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_GET_BUFFER_SIZE:
|
||||
generate_get_buffer_size(p, prog_data, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case VS_OPCODE_URB_WRITE:
|
||||
generate_vs_urb_write(p, inst);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||
@@ -1797,10 +1809,12 @@ generate_code(struct brw_codegen *p,
|
||||
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
generate_pull_constant_load(p, prog_data, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
|
||||
generate_pull_constant_load_gen7(p, prog_data, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
|
||||
@@ -1809,14 +1823,17 @@ generate_code(struct brw_codegen *p,
|
||||
|
||||
case GS_OPCODE_URB_WRITE:
|
||||
generate_gs_urb_write(p, inst);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case GS_OPCODE_URB_WRITE_ALLOCATE:
|
||||
generate_gs_urb_write_allocate(p, inst);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SVB_WRITE:
|
||||
generate_gs_svb_write(p, prog_data, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SVB_SET_DST_INDEX:
|
||||
@@ -1825,6 +1842,7 @@ generate_code(struct brw_codegen *p,
|
||||
|
||||
case GS_OPCODE_THREAD_END:
|
||||
generate_gs_thread_end(p, inst);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case GS_OPCODE_SET_WRITE_OFFSET:
|
||||
@@ -1837,6 +1855,7 @@ generate_code(struct brw_codegen *p,
|
||||
|
||||
case GS_OPCODE_FF_SYNC:
|
||||
generate_gs_ff_sync(p, inst, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
|
||||
@@ -1866,12 +1885,14 @@ generate_code(struct brw_codegen *p,
|
||||
case SHADER_OPCODE_SHADER_TIME_ADD:
|
||||
brw_shader_time_add(p, src[0],
|
||||
prog_data->base.binding_table.shader_time_start);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case VEC4_OPCODE_UNTYPED_ATOMIC:
|
||||
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
|
||||
!inst->dst.is_null(), inst->header_size);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case VEC4_OPCODE_UNTYPED_SURFACE_READ:
|
||||
@@ -1879,16 +1900,19 @@ generate_code(struct brw_codegen *p,
|
||||
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
|
||||
src[2].ud);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
|
||||
assert(src[2].file == BRW_IMMEDIATE_VALUE);
|
||||
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
|
||||
src[2].ud, inst->header_size);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_MEMORY_FENCE:
|
||||
brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false, /* bti */ 0);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
|
||||
@@ -2068,10 +2092,12 @@ generate_code(struct brw_codegen *p,
|
||||
|
||||
case TCS_OPCODE_URB_WRITE:
|
||||
generate_tcs_urb_write(p, inst, src[0]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case VEC4_OPCODE_URB_READ:
|
||||
generate_vec4_urb_read(p, inst, dst, src[0]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case TCS_OPCODE_SET_INPUT_URB_OFFSETS:
|
||||
@@ -2113,15 +2139,18 @@ generate_code(struct brw_codegen *p,
|
||||
|
||||
case TCS_OPCODE_RELEASE_INPUT:
|
||||
generate_tcs_release_input(p, dst, src[0], src[1]);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case TCS_OPCODE_THREAD_END:
|
||||
generate_tcs_thread_end(p, inst);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_BARRIER:
|
||||
brw_barrier(p, src[0]);
|
||||
brw_WAIT(p);
|
||||
send_count++;
|
||||
break;
|
||||
|
||||
case SHADER_OPCODE_MOV_INDIRECT:
|
||||
@@ -2188,9 +2217,9 @@ generate_code(struct brw_codegen *p,
|
||||
sha1buf);
|
||||
|
||||
fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
|
||||
"spills:fills. Compacted %d to %d bytes (%.0f%%)\n",
|
||||
"spills:fills, %u sends. Compacted %d to %d bytes (%.0f%%)\n",
|
||||
stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
|
||||
spill_count, fill_count, before_size, after_size,
|
||||
spill_count, fill_count, send_count, before_size, after_size,
|
||||
100.0f * (before_size - after_size) / before_size);
|
||||
|
||||
/* overriding the shader makes disasm_info invalid */
|
||||
@@ -2205,10 +2234,11 @@ generate_code(struct brw_codegen *p,
|
||||
|
||||
compiler->shader_debug_log(log_data,
|
||||
"%s vec4 shader: %d inst, %d loops, %u cycles, "
|
||||
"%d:%d spills:fills, compacted %d to %d bytes.",
|
||||
"%d:%d spills:fills, %u sends, "
|
||||
"compacted %d to %d bytes.",
|
||||
stage_abbrev, before_size / 16,
|
||||
loop_count, cfg->cycle_count, spill_count,
|
||||
fill_count, before_size, after_size);
|
||||
fill_count, send_count, before_size, after_size);
|
||||
if (stats) {
|
||||
stats->dispatch_width = 0;
|
||||
stats->instructions = before_size / 16;
|
||||
|
@@ -1043,7 +1043,8 @@ static const struct gen_device_info gen_device_info_ehl_2x4 = {
|
||||
.gt = _gt, .num_slices = _slices, .l3_banks = _l3, \
|
||||
.simulator_id = 22, \
|
||||
.urb.size = (_gt) == 1 ? 512 : 1024, \
|
||||
.num_subslices = _dual_subslices
|
||||
.num_subslices = _dual_subslices, \
|
||||
.num_eu_per_subslice = 16
|
||||
|
||||
#define dual_subslices(args...) { args, }
|
||||
|
||||
|
@@ -532,9 +532,11 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
|
||||
if (use_softpin) {
|
||||
gem_handle = anv_gem_create(pool->device, newbo_size);
|
||||
map = anv_gem_mmap(pool->device, gem_handle, 0, newbo_size, 0);
|
||||
if (map == MAP_FAILED)
|
||||
if (map == MAP_FAILED) {
|
||||
anv_gem_close(pool->device, gem_handle);
|
||||
return vk_errorf(pool->device->instance, pool->device,
|
||||
VK_ERROR_MEMORY_MAP_FAILED, "gem mmap failed: %m");
|
||||
}
|
||||
assert(center_bo_offset == 0);
|
||||
} else {
|
||||
/* Just leak the old map until we destroy the pool. We can't munmap it
|
||||
|
@@ -32,7 +32,6 @@
|
||||
#include "drm-uapi/drm_fourcc.h"
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "util/strtod.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/build_id.h"
|
||||
#include "util/disk_cache.h"
|
||||
@@ -792,7 +791,6 @@ VkResult anv_CreateInstance(
|
||||
instance->pipeline_cache_enabled =
|
||||
env_var_as_boolean("ANV_ENABLE_PIPELINE_CACHE", true);
|
||||
|
||||
_mesa_locale_init();
|
||||
glsl_type_singleton_init_or_ref();
|
||||
|
||||
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
|
||||
@@ -831,7 +829,6 @@ void anv_DestroyInstance(
|
||||
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
||||
|
||||
glsl_type_singleton_decref();
|
||||
_mesa_locale_fini();
|
||||
|
||||
driDestroyOptionCache(&instance->dri_options);
|
||||
driDestroyOptionInfo(&instance->available_dri_options);
|
||||
|
@@ -2216,12 +2216,15 @@ compute_pipeline_create(
|
||||
|
||||
pipeline->blend_state.map = NULL;
|
||||
|
||||
result = anv_reloc_list_init(&pipeline->batch_relocs,
|
||||
pAllocator ? pAllocator : &device->alloc);
|
||||
const VkAllocationCallbacks *alloc =
|
||||
pAllocator ? pAllocator : &device->alloc;
|
||||
|
||||
result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_free2(&device->alloc, pAllocator, pipeline);
|
||||
return result;
|
||||
}
|
||||
pipeline->batch.alloc = alloc;
|
||||
pipeline->batch.next = pipeline->batch.start = pipeline->batch_data;
|
||||
pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data);
|
||||
pipeline->batch.relocs = &pipeline->batch_relocs;
|
||||
|
@@ -94,12 +94,7 @@ VkResult genX(CreateQueryPool)(
|
||||
uint64s_per_slot += 4;
|
||||
break;
|
||||
case VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL: {
|
||||
uint64s_per_slot = 2 * OA_REPORT_N_UINT64; /* begin & end OA reports */
|
||||
uint64s_per_slot += 4; /* PerfCounter 1 & 2 */
|
||||
uint64s_per_slot++; /* 2 * 32bit RPSTAT register */
|
||||
uint64s_per_slot++; /* 64bit marker */
|
||||
uint64s_per_slot++; /* availability */
|
||||
uint64s_per_slot = align_u32(uint64s_per_slot, 8); /* OA reports must be aligned to 64 bytes */
|
||||
uint64s_per_slot = 72; /* 576 bytes, see layout below */
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@@ -179,54 +174,51 @@ anv_query_address(struct anv_query_pool *pool, uint32_t query)
|
||||
}
|
||||
|
||||
/**
|
||||
* VK_INTEL_performance_query layout:
|
||||
* VK_INTEL_performance_query layout (576 bytes) :
|
||||
*
|
||||
* ------------------------------
|
||||
* | end MI_RPC (256b) |
|
||||
* | availability (8b) |
|
||||
* |----------------------------|
|
||||
* | begin MI_RPC (256b) |
|
||||
* |----------------------------|
|
||||
* | begin perfcntr 1 & 2 (16b) |
|
||||
* |----------------------------|
|
||||
* | end perfcntr 1 & 2 (16b) |
|
||||
* | marker (8b) |
|
||||
* |----------------------------|
|
||||
* | begin RPSTAT register (4b) |
|
||||
* |----------------------------|
|
||||
* | end RPSTAT register (4b) |
|
||||
* |----------------------------|
|
||||
* | marker (8b) |
|
||||
* | begin perfcntr 1 & 2 (16b) |
|
||||
* |----------------------------|
|
||||
* | availability (8b) |
|
||||
* | end perfcntr 1 & 2 (16b) |
|
||||
* |----------------------------|
|
||||
* | Unused (8b) |
|
||||
* |----------------------------|
|
||||
* | begin MI_RPC (256b) |
|
||||
* |----------------------------|
|
||||
* | end MI_RPC (256b) |
|
||||
* ------------------------------
|
||||
*/
|
||||
|
||||
static uint32_t
|
||||
intel_perf_mi_rpc_offset(bool end)
|
||||
intel_perf_marker_offset(void)
|
||||
{
|
||||
return end ? 0 : 256;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
intel_perf_counter(bool end)
|
||||
{
|
||||
uint32_t offset = 512;
|
||||
offset += end ? 2 * sizeof(uint64_t) : 0;
|
||||
return offset;
|
||||
return 8;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
intel_perf_rpstart_offset(bool end)
|
||||
{
|
||||
uint32_t offset = intel_perf_counter(false) +
|
||||
4 * sizeof(uint64_t);
|
||||
offset += end ? sizeof(uint32_t) : 0;
|
||||
return offset;
|
||||
return 16 + (end ? sizeof(uint32_t) : 0);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
intel_perf_marker_offset(void)
|
||||
intel_perf_counter(bool end)
|
||||
{
|
||||
return intel_perf_rpstart_offset(false) + sizeof(uint64_t);
|
||||
return 24 + (end ? (2 * sizeof(uint64_t)) : 0);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
intel_perf_mi_rpc_offset(bool end)
|
||||
{
|
||||
return 64 + (end ? 256 : 0);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -251,10 +243,6 @@ query_slot(struct anv_query_pool *pool, uint32_t query)
|
||||
static bool
|
||||
query_is_available(struct anv_query_pool *pool, uint32_t query)
|
||||
{
|
||||
if (pool->type == VK_QUERY_TYPE_PERFORMANCE_QUERY_INTEL) {
|
||||
return *(volatile uint64_t *)((uint8_t *)query_slot(pool, query) +
|
||||
pool->stride - 8);
|
||||
} else
|
||||
return *(volatile uint64_t *)query_slot(pool, query);
|
||||
}
|
||||
|
||||
|
@@ -27,7 +27,11 @@
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct anv_instance instance;
|
||||
struct anv_instance instance = {
|
||||
.physicalDevice = {
|
||||
.use_softpin = true,
|
||||
},
|
||||
};
|
||||
struct anv_device device = {
|
||||
.instance = &instance,
|
||||
};
|
||||
|
@@ -111,7 +111,7 @@ static void validate_monotonic(int32_t **blocks)
|
||||
|
||||
static void run_test()
|
||||
{
|
||||
struct anv_instance instance;
|
||||
struct anv_instance instance = { };
|
||||
struct anv_device device = {
|
||||
.instance = &instance,
|
||||
};
|
||||
|
@@ -36,7 +36,7 @@
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct anv_instance instance;
|
||||
struct anv_instance instance = { };
|
||||
struct anv_device device = {
|
||||
.instance = &instance,
|
||||
};
|
||||
|
@@ -35,7 +35,7 @@
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct anv_instance instance;
|
||||
struct anv_instance instance = { };
|
||||
struct anv_device device = {
|
||||
.instance = &instance,
|
||||
};
|
||||
|
@@ -56,7 +56,7 @@ static void *alloc_states(void *_job)
|
||||
|
||||
static void run_test()
|
||||
{
|
||||
struct anv_instance instance;
|
||||
struct anv_instance instance = { };
|
||||
struct anv_device device = {
|
||||
.instance = &instance,
|
||||
};
|
||||
|
@@ -27,7 +27,11 @@
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct anv_instance instance;
|
||||
struct anv_instance instance = {
|
||||
.physicalDevice = {
|
||||
.use_softpin = true,
|
||||
},
|
||||
};
|
||||
struct anv_device device = {
|
||||
.instance = &instance,
|
||||
};
|
||||
|
@@ -36,6 +36,8 @@ libosmesa = shared_library(
|
||||
link_whole : libglapi_static,
|
||||
link_with : [libmesa_classic, osmesa_link_with],
|
||||
dependencies : [dep_thread, dep_selinux],
|
||||
name_prefix : host_machine.system() == 'windows' ? '' : 'lib', # otherwise mingw will create libosmesa.dll
|
||||
soversion : host_machine.system() == 'windows' ? '' : '8',
|
||||
version : '8.0.0',
|
||||
install : true,
|
||||
)
|
||||
|
@@ -350,6 +350,12 @@ clear_bufferiv(struct gl_context *ctx, GLenum buffer, GLint drawbuffer,
|
||||
_mesa_update_state( ctx );
|
||||
}
|
||||
|
||||
if (!no_error && ctx->DrawBuffer->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
|
||||
_mesa_error(ctx, GL_INVALID_FRAMEBUFFER_OPERATION_EXT,
|
||||
"glClearBufferiv(incomplete framebuffer)");
|
||||
return;
|
||||
}
|
||||
|
||||
switch (buffer) {
|
||||
case GL_STENCIL:
|
||||
/* Page 264 (page 280 of the PDF) of the OpenGL 3.0 spec says:
|
||||
@@ -686,6 +692,12 @@ clear_bufferfi(struct gl_context *ctx, GLenum buffer, GLint drawbuffer,
|
||||
drawbuffer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx->DrawBuffer->_Status != GL_FRAMEBUFFER_COMPLETE_EXT) {
|
||||
_mesa_error(ctx, GL_INVALID_FRAMEBUFFER_OPERATION_EXT,
|
||||
"glClearBufferfi(incomplete framebuffer)");
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->RasterDiscard)
|
||||
|
@@ -325,6 +325,7 @@ clear_with_quad(struct gl_context *ctx, unsigned clear_buffers)
|
||||
cso_set_stream_outputs(cso, 0, NULL, NULL);
|
||||
cso_set_sample_mask(cso, ~0);
|
||||
cso_set_min_samples(cso, 1);
|
||||
st->clear.raster.multisample = st->state.fb_num_samples > 1;
|
||||
cso_set_rasterizer(cso, &st->clear.raster);
|
||||
|
||||
/* viewport state: viewport matching window dims */
|
||||
|
Reference in New Issue
Block a user