Compare commits
38 Commits
25.0-branc
...
mesa-20.2.
Author | SHA1 | Date | |
---|---|---|---|
|
e60a1d5bc0 | ||
|
b454e44579 | ||
|
ee63146f37 | ||
|
6b6a38a8be | ||
|
2d2ba264e9 | ||
|
d584839b9b | ||
|
6e5874416d | ||
|
0ba14f96fc | ||
|
8acf2493a9 | ||
|
e196f50d76 | ||
|
6b823dfc01 | ||
|
c612c06cf1 | ||
|
16da4a483c | ||
|
5b25e9de0c | ||
|
c09c88fbac | ||
|
f3c3a1ceff | ||
|
9f5c75a90f | ||
|
53f5f43627 | ||
|
7665280b28 | ||
|
a6ccd24636 | ||
|
3f0a10b7da | ||
|
d4d36010a8 | ||
|
1edc9549d2 | ||
|
ee4ccf0031 | ||
|
9c719ad7c9 | ||
|
3bf0368f9e | ||
|
b0b55fa939 | ||
|
3183610228 | ||
|
6eadb68e98 | ||
|
81df3a4a4c | ||
|
d0b0165808 | ||
|
9a5b5cdf9c | ||
|
1ed360d24b | ||
|
761c979ae0 | ||
|
20663891c1 | ||
|
c2f85ade7d | ||
|
cb341c7f86 | ||
|
0b8f4381b1 |
@@ -222,7 +222,7 @@ x86_build:
|
||||
extends:
|
||||
- .use-x86_build-base
|
||||
variables:
|
||||
FDO_DISTRIBUTION_TAG: &x86_build "2020-07-28-x86-2"
|
||||
FDO_DISTRIBUTION_TAG: &x86_build "2020-08-08-glvnd"
|
||||
|
||||
.use-x86_build:
|
||||
variables:
|
||||
|
@@ -92,7 +92,7 @@ rm -rf $WAYLAND_PROTOCOLS_VERSION
|
||||
# The version of libglvnd-dev in debian is too old
|
||||
# Check this page to see when this local compilation can be dropped in favour of the package:
|
||||
# https://packages.debian.org/libglvnd-dev
|
||||
GLVND_VERSION=1.2.0
|
||||
GLVND_VERSION=1.3.2
|
||||
wget https://gitlab.freedesktop.org/glvnd/libglvnd/-/archive/v$GLVND_VERSION/libglvnd-v$GLVND_VERSION.tar.gz
|
||||
tar -xvf libglvnd-v$GLVND_VERSION.tar.gz && rm libglvnd-v$GLVND_VERSION.tar.gz
|
||||
pushd libglvnd-v$GLVND_VERSION; ./autogen.sh; ./configure; make install; popd
|
||||
|
1028
.pick_status.json
Normal file
1028
.pick_status.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -240,8 +240,8 @@ class UI:
|
||||
|
||||
{err}
|
||||
|
||||
You can either cancel, or resolve the conflicts, commit the
|
||||
changes and select ok."""))
|
||||
You can either cancel, or resolve the conflicts (`git mergetool`), finish the
|
||||
cherry-pick (`git cherry-pick --continue`) and select ok."""))
|
||||
|
||||
can_btn = urwid.Button('Cancel')
|
||||
urwid.connect_signal(can_btn, 'click', reset_cb)
|
||||
|
@@ -1553,7 +1553,7 @@ endif
|
||||
|
||||
dep_glvnd = null_dep
|
||||
if with_glvnd
|
||||
dep_glvnd = dependency('libglvnd', version : '>= 1.2.0')
|
||||
dep_glvnd = dependency('libglvnd', version : '>= 1.3.2')
|
||||
pre_args += '-DUSE_LIBGLVND=1'
|
||||
endif
|
||||
|
||||
|
@@ -165,12 +165,6 @@ void mark_block_wqm(wqm_ctx &ctx, unsigned block_idx)
|
||||
|
||||
ctx.branch_wqm[block_idx] = true;
|
||||
Block& block = ctx.program->blocks[block_idx];
|
||||
aco_ptr<Instruction>& branch = block.instructions.back();
|
||||
|
||||
if (branch->opcode != aco_opcode::p_branch) {
|
||||
assert(!branch->operands.empty() && branch->operands[0].isTemp());
|
||||
set_needs_wqm(ctx, branch->operands[0].getTemp());
|
||||
}
|
||||
|
||||
/* TODO: this sets more branch conditions to WQM than it needs to
|
||||
* it should be enough to stop at the "exec mask top level" */
|
||||
@@ -233,6 +227,11 @@ void get_block_needs(wqm_ctx &ctx, exec_ctx &exec_ctx, Block* block)
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->format == Format::PSEUDO_BRANCH && ctx.branch_wqm[block->index]) {
|
||||
needs = WQM;
|
||||
propagate_wqm = true;
|
||||
}
|
||||
|
||||
if (propagate_wqm) {
|
||||
for (const Operand& op : instr->operands) {
|
||||
if (op.isTemp()) {
|
||||
|
@@ -936,6 +936,14 @@ void init_context(isel_context *ctx, nir_shader *shader)
|
||||
|
||||
ctx->allocated.reset(allocated.release());
|
||||
ctx->cf_info.nir_to_aco.reset(nir_to_aco.release());
|
||||
|
||||
/* align and copy constant data */
|
||||
while (ctx->program->constant_data.size() % 4u)
|
||||
ctx->program->constant_data.push_back(0);
|
||||
ctx->constant_data_offset = ctx->program->constant_data.size();
|
||||
ctx->program->constant_data.insert(ctx->program->constant_data.end(),
|
||||
(uint8_t*)shader->constant_data,
|
||||
(uint8_t*)shader->constant_data + shader->constant_data_size);
|
||||
}
|
||||
|
||||
Pseudo_instruction *add_startpgm(struct isel_context *ctx)
|
||||
@@ -1304,16 +1312,6 @@ lower_bit_size_callback(const nir_alu_instr *alu, void *_)
|
||||
void
|
||||
setup_nir(isel_context *ctx, nir_shader *nir)
|
||||
{
|
||||
Program *program = ctx->program;
|
||||
|
||||
/* align and copy constant data */
|
||||
while (program->constant_data.size() % 4u)
|
||||
program->constant_data.push_back(0);
|
||||
ctx->constant_data_offset = program->constant_data.size();
|
||||
program->constant_data.insert(program->constant_data.end(),
|
||||
(uint8_t*)nir->constant_data,
|
||||
(uint8_t*)nir->constant_data + nir->constant_data_size);
|
||||
|
||||
/* the variable setup has to be done before lower_io / CSE */
|
||||
setup_variables(ctx, nir);
|
||||
|
||||
|
@@ -174,7 +174,7 @@ struct memory_sync_info {
|
||||
return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
|
||||
}
|
||||
};
|
||||
static_assert(sizeof(memory_sync_info) == 3);
|
||||
static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
|
||||
|
||||
enum fp_round {
|
||||
fp_round_ne = 0,
|
||||
|
@@ -495,6 +495,12 @@ HazardResult perform_hazard_query(hazard_query *query, Instruction *instr, bool
|
||||
if (first->bar_classes && second->bar_classes)
|
||||
return hazard_fail_barrier;
|
||||
|
||||
/* Don't move memory accesses to before control barriers. I don't think
|
||||
* this is necessary for the Vulkan memory model, but it might be for GLSL450. */
|
||||
unsigned control_classes = storage_buffer | storage_atomic_counter | storage_image | storage_shared;
|
||||
if (first->has_control_barrier && ((second->access_atomic | second->access_relaxed) & control_classes))
|
||||
return hazard_fail_barrier;
|
||||
|
||||
/* don't move memory loads/stores past potentially aliasing loads/stores */
|
||||
unsigned aliasing_storage = instr->format == Format::SMEM ?
|
||||
query->aliasing_storage_smem :
|
||||
|
@@ -1233,13 +1233,13 @@ static void
|
||||
radv_image_alloc_single_sample_cmask(const struct radv_image *image,
|
||||
struct radeon_surf *surf)
|
||||
{
|
||||
assert(image->info.storage_samples == 1 || surf->cmask_offset);
|
||||
|
||||
if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 ||
|
||||
image->info.levels > 1 || image->info.depth > 1 ||
|
||||
radv_image_has_dcc(image) || !radv_image_use_fast_clear_for_image(image))
|
||||
return;
|
||||
|
||||
assert(image->info.storage_samples == 1);
|
||||
|
||||
surf->cmask_offset = align64(surf->total_size, surf->cmask_alignment);
|
||||
surf->total_size = surf->cmask_offset + surf->cmask_size;
|
||||
surf->alignment = MAX2(surf->alignment, surf->cmask_alignment);
|
||||
@@ -1702,7 +1702,8 @@ bool radv_layout_can_fast_clear(const struct radv_image *image,
|
||||
bool in_render_loop,
|
||||
unsigned queue_mask)
|
||||
{
|
||||
return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
|
||||
queue_mask == (1u << RADV_QUEUE_GENERAL);
|
||||
}
|
||||
|
||||
bool radv_layout_dcc_compressed(const struct radv_device *device,
|
||||
|
@@ -102,8 +102,19 @@ si_emit_compute(struct radv_physical_device *physical_device,
|
||||
S_00B858_SH1_CU_EN(0xffff));
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX10)
|
||||
if (physical_device->rad_info.chip_class >= GFX9) {
|
||||
radeon_set_uconfig_reg(cs, R_0301EC_CP_COHER_START_DELAY,
|
||||
physical_device->rad_info.chip_class >= GFX10 ? 0x20 : 0);
|
||||
}
|
||||
|
||||
if (physical_device->rad_info.chip_class >= GFX10) {
|
||||
radeon_set_sh_reg(cs, R_00B890_COMPUTE_USER_ACCUM_0, 0);
|
||||
radeon_set_sh_reg(cs, R_00B894_COMPUTE_USER_ACCUM_1, 0);
|
||||
radeon_set_sh_reg(cs, R_00B898_COMPUTE_USER_ACCUM_2, 0);
|
||||
radeon_set_sh_reg(cs, R_00B89C_COMPUTE_USER_ACCUM_3, 0);
|
||||
radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, 0);
|
||||
radeon_set_sh_reg(cs, R_00B9F4_COMPUTE_DISPATCH_TUNNEL, 0);
|
||||
}
|
||||
|
||||
/* This register has been moved to R_00CD20_COMPUTE_MAX_WAVE_ID
|
||||
* and is now per pipe, so it should be handled in the
|
||||
@@ -325,6 +336,10 @@ si_emit_graphics(struct radv_device *device,
|
||||
late_alloc_wave64_gs = 0;
|
||||
cu_mask_gs = 0xffff;
|
||||
}
|
||||
|
||||
/* Limit LATE_ALLOC_GS for prevent a hang (hw bug). */
|
||||
if (physical_device->rad_info.chip_class == GFX10)
|
||||
late_alloc_wave64_gs = MIN2(late_alloc_wave64_gs, 64);
|
||||
} else {
|
||||
if (!physical_device->rad_info.use_late_alloc) {
|
||||
late_alloc_wave64 = 0;
|
||||
@@ -413,6 +428,23 @@ si_emit_graphics(struct radv_device *device,
|
||||
S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_RD));
|
||||
radeon_set_context_reg(cs, R_028428_CB_COVERAGE_OUT_CONTROL, 0);
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0);
|
||||
radeon_set_sh_reg(cs, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0);
|
||||
radeon_set_sh_reg(cs, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0);
|
||||
radeon_set_sh_reg(cs, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0);
|
||||
radeon_set_sh_reg(cs, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0);
|
||||
radeon_set_sh_reg(cs, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0);
|
||||
radeon_set_sh_reg(cs, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0);
|
||||
radeon_set_sh_reg(cs, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0);
|
||||
radeon_set_sh_reg(cs, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0);
|
||||
radeon_set_sh_reg(cs, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0);
|
||||
radeon_set_sh_reg(cs, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0);
|
||||
radeon_set_sh_reg(cs, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0);
|
||||
radeon_set_sh_reg(cs, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0);
|
||||
radeon_set_sh_reg(cs, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0);
|
||||
radeon_set_sh_reg(cs, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0);
|
||||
radeon_set_sh_reg(cs, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0);
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B0C0_SPI_SHADER_REQ_CTRL_PS,
|
||||
S_00B0C0_SOFT_GROUPING_EN(1) |
|
||||
S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1));
|
||||
|
@@ -1,10 +1,63 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
import argparse
|
||||
from generate.eglFunctionList import EGL_FUNCTIONS as GLVND_ENTRYPOINTS
|
||||
|
||||
|
||||
PREFIX = 'EGL_ENTRYPOINT('
|
||||
SUFFIX = ')'
|
||||
|
||||
|
||||
# These entrypoints should *not* be in the GLVND entrypoints
|
||||
GLVND_EXCLUDED_ENTRYPOINTS = [
|
||||
# EGL_KHR_debug
|
||||
'eglDebugMessageControlKHR',
|
||||
'eglQueryDebugKHR',
|
||||
'eglLabelObjectKHR',
|
||||
]
|
||||
|
||||
|
||||
def check_entrypoint_sorted(entrypoints):
|
||||
print('Checking that EGL API entrypoints are sorted...')
|
||||
|
||||
for i, _ in enumerate(entrypoints):
|
||||
# Can't compare the first one with the previous
|
||||
if i == 0:
|
||||
continue
|
||||
if entrypoints[i - 1] > entrypoints[i]:
|
||||
print('ERROR: ' + entrypoints[i] + ' should come before ' + entrypoints[i - 1])
|
||||
exit(1)
|
||||
|
||||
print('All good :)')
|
||||
|
||||
|
||||
def check_glvnd_entrypoints(egl_entrypoints, glvnd_entrypoints):
|
||||
print('Checking the GLVND entrypoints against the plain EGL ones...')
|
||||
success = True
|
||||
|
||||
for egl_entrypoint in egl_entrypoints:
|
||||
if egl_entrypoint in GLVND_EXCLUDED_ENTRYPOINTS:
|
||||
continue
|
||||
if egl_entrypoint not in glvnd_entrypoints:
|
||||
print('ERROR: ' + egl_entrypoint + ' is missing from the GLVND entrypoints (src/egl/generate/eglFunctionList.py)')
|
||||
success = False
|
||||
|
||||
for glvnd_entrypoint in glvnd_entrypoints:
|
||||
if glvnd_entrypoint not in egl_entrypoints:
|
||||
print('ERROR: ' + glvnd_entrypoint + ' is missing from the plain EGL entrypoints (src/egl/main/eglentrypoint.h)')
|
||||
success = False
|
||||
|
||||
for glvnd_entrypoint in GLVND_EXCLUDED_ENTRYPOINTS:
|
||||
if glvnd_entrypoint in glvnd_entrypoints:
|
||||
print('ERROR: ' + glvnd_entrypoint + ' is should *not* be in the GLVND entrypoints (src/egl/generate/eglFunctionList.py)')
|
||||
success = False
|
||||
|
||||
if success:
|
||||
print('All good :)')
|
||||
else:
|
||||
exit(1)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('header')
|
||||
@@ -20,17 +73,11 @@ def main():
|
||||
assert line.endswith(SUFFIX)
|
||||
entrypoints.append(line[len(PREFIX):-len(SUFFIX)])
|
||||
|
||||
print('Checking EGL API entrypoints are sorted')
|
||||
check_entrypoint_sorted(entrypoints)
|
||||
|
||||
for i, _ in enumerate(entrypoints):
|
||||
# Can't compare the first one with the previous
|
||||
if i == 0:
|
||||
continue
|
||||
if entrypoints[i - 1] > entrypoints[i]:
|
||||
print('ERROR: ' + entrypoints[i] + ' should come before ' + entrypoints[i - 1])
|
||||
exit(1)
|
||||
glvnd_entrypoints = [x[0] for x in GLVND_ENTRYPOINTS]
|
||||
|
||||
print('All good :)')
|
||||
check_glvnd_entrypoints(entrypoints, glvnd_entrypoints)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
@@ -2452,7 +2452,7 @@ registers-gmu:
|
||||
00000000 0x12b: 00000000
|
||||
00000000 0x140: 00000000
|
||||
indexed-registers:
|
||||
- regs-name: CP_SEQ_STAT
|
||||
- regs-name: CP_SQE_STAT
|
||||
dwords: 51
|
||||
PC: 00c5
|
||||
$01: deadd00d $11: 00000000
|
||||
|
@@ -101,6 +101,23 @@ regval(const char *name)
|
||||
* Line reading and string helpers:
|
||||
*/
|
||||
|
||||
static char *
|
||||
replacestr(char *line, const char *find, const char *replace)
|
||||
{
|
||||
char *tail, *s;
|
||||
|
||||
if (!(s = strstr(line, find)))
|
||||
return line;
|
||||
|
||||
tail = s + strlen(find);
|
||||
|
||||
char *newline;
|
||||
asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
|
||||
free(line);
|
||||
|
||||
return newline;
|
||||
}
|
||||
|
||||
static char *lastline;
|
||||
static char *pushedline;
|
||||
|
||||
@@ -120,6 +137,10 @@ popline(void)
|
||||
if (getline(&r, &n, in) < 0)
|
||||
exit(0);
|
||||
|
||||
/* Handle section name typo's from earlier kernels: */
|
||||
r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
|
||||
r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
|
||||
|
||||
lastline = r;
|
||||
return r;
|
||||
}
|
||||
@@ -471,7 +492,7 @@ decode_clusters(void)
|
||||
*/
|
||||
|
||||
static void
|
||||
dump_cp_seq_stat(uint32_t *stat)
|
||||
dump_cp_sqe_stat(uint32_t *stat)
|
||||
{
|
||||
printf("\t PC: %04x\n", stat[0]);
|
||||
stat++;
|
||||
@@ -850,23 +871,23 @@ decode_indexed_registers(void)
|
||||
* so far) not useful, so skip them if not in verbose mode:
|
||||
*/
|
||||
bool dump = verbose ||
|
||||
!strcmp(name, "CP_SEQ_STAT") ||
|
||||
!strcmp(name, "CP_SQE_STAT") ||
|
||||
!strcmp(name, "CP_DRAW_STATE") ||
|
||||
!strcmp(name, "CP_ROQ") ||
|
||||
0;
|
||||
|
||||
if (!strcmp(name, "CP_SEQ_STAT"))
|
||||
dump_cp_seq_stat(buf);
|
||||
if (!strcmp(name, "CP_SQE_STAT"))
|
||||
dump_cp_sqe_stat(buf);
|
||||
|
||||
if (!strcmp(name, "CP_UCODE_DBG_DATA"))
|
||||
dump_cp_ucode_dbg(buf);
|
||||
|
||||
/* note that name was typo'd in earlier kernels: */
|
||||
if (!strcmp(name, "CP_MEMPOOL") || !strcmp(name, "CP_MEMPOOOL"))
|
||||
if (!strcmp(name, "CP_MEMPOOL"))
|
||||
dump_cp_mem_pool(buf);
|
||||
|
||||
if (dump)
|
||||
dump_hex_ascii(buf, 4 * sizedwords, 1);
|
||||
|
||||
free(buf);
|
||||
|
||||
continue;
|
||||
|
@@ -30,7 +30,7 @@ if not dep_lua.found()
|
||||
dep_lua = dependency('lua52', required: false)
|
||||
endif
|
||||
if not dep_lua.found()
|
||||
dep_lua = dependency('lua', required: false)
|
||||
dep_lua = dependency('lua', required: false, version : '>=5.2')
|
||||
endif
|
||||
|
||||
dep_libarchive = dependency('libarchive', required: false)
|
||||
|
@@ -24,6 +24,10 @@ xml_files = [
|
||||
'a4xx.xml',
|
||||
'a5xx.xml',
|
||||
'a6xx.xml',
|
||||
'a6xx_gmu.xml',
|
||||
'ocmem.xml',
|
||||
'adreno_control_regs.xml',
|
||||
'adreno_pipe_regs.xml',
|
||||
'adreno_common.xml',
|
||||
'adreno_pm4.xml',
|
||||
]
|
||||
|
@@ -1138,6 +1138,7 @@ etna_compile_shader_nir(struct etna_shader_variant *v)
|
||||
|
||||
NIR_PASS_V(s, nir_opt_dce);
|
||||
|
||||
NIR_PASS_V(s, nir_lower_bool_to_bitsize);
|
||||
NIR_PASS_V(s, etna_lower_alu, c->specs->has_new_transcendentals);
|
||||
|
||||
if (DBG_ENABLED(ETNA_DBG_DUMP_SHADERS))
|
||||
|
@@ -466,16 +466,9 @@ gpu_supports_render_format(struct etna_screen *screen, enum pipe_format format,
|
||||
if (fmt == ETNA_NO_MATCH)
|
||||
return false;
|
||||
|
||||
/* Validate MSAA; number of samples must be allowed, and render target
|
||||
* must have MSAA'able format. */
|
||||
if (sample_count > 1) {
|
||||
if (!VIV_FEATURE(screen, chipFeatures, MSAA))
|
||||
/* MSAA is broken */
|
||||
if (sample_count > 1)
|
||||
return false;
|
||||
if (!translate_samples_to_xyscale(sample_count, NULL, NULL))
|
||||
return false;
|
||||
if (translate_ts_format(format) == ETNA_NO_MATCH)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (format == PIPE_FORMAT_R8_UNORM)
|
||||
return VIV_FEATURE(screen, chipMinorFeatures5, HALTI5);
|
||||
|
@@ -283,7 +283,7 @@ iris_get_monitor_result(struct pipe_context *ctx,
|
||||
assert(gen_perf_is_query_ready(perf_ctx, monitor->query, batch));
|
||||
|
||||
unsigned bytes_written;
|
||||
gen_perf_get_query_data(perf_ctx, monitor->query,
|
||||
gen_perf_get_query_data(perf_ctx, monitor->query, batch,
|
||||
monitor->result_size,
|
||||
(unsigned*) monitor->result_buffer,
|
||||
&bytes_written);
|
||||
|
@@ -214,7 +214,8 @@ iris_get_perf_query_data(struct pipe_context *pipe,
|
||||
struct gen_perf_query_object *obj = perf_query->query;
|
||||
struct gen_perf_context *perf_ctx = ice->perf_ctx;
|
||||
|
||||
gen_perf_get_query_data(perf_ctx, obj, data_size, data, bytes_written);
|
||||
gen_perf_get_query_data(perf_ctx, obj, &ice->batches[IRIS_BATCH_RENDER],
|
||||
data_size, data, bytes_written);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -1887,6 +1887,16 @@ static void clamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts
|
||||
*max_gsprims = MIN2(*max_gsprims, 1 + max_reuse);
|
||||
}
|
||||
|
||||
unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader)
|
||||
{
|
||||
const struct si_shader_selector *sel = shader->selector;
|
||||
|
||||
if (sel->type == PIPE_SHADER_GEOMETRY && sel->so.num_outputs)
|
||||
return 44;
|
||||
|
||||
return 8;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine subgroup information like maximum number of vertices and prims.
|
||||
*
|
||||
@@ -1907,19 +1917,15 @@ bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader)
|
||||
const unsigned min_verts_per_prim = gs_type == PIPE_SHADER_GEOMETRY ? max_verts_per_prim : 1;
|
||||
|
||||
/* All these are in dwords: */
|
||||
/* We can't allow using the whole LDS, because GS waves compete with
|
||||
* other shader stages for LDS space.
|
||||
*
|
||||
* TODO: We should really take the shader's internal LDS use into
|
||||
* account. The linker will fail if the size is greater than
|
||||
* 8K dwords.
|
||||
/* GE can only use 8K dwords (32KB) of LDS per workgroup.
|
||||
*/
|
||||
const unsigned max_lds_size = 8 * 1024 - 768;
|
||||
const unsigned max_lds_size = 8 * 1024 - gfx10_ngg_get_scratch_dw_size(shader);
|
||||
const unsigned target_lds_size = max_lds_size;
|
||||
unsigned esvert_lds_size = 0;
|
||||
unsigned gsprim_lds_size = 0;
|
||||
|
||||
/* All these are per subgroup: */
|
||||
const unsigned min_esverts = gs_sel->screen->info.chip_class >= GFX10_3 ? 29 : 24;
|
||||
bool max_vert_out_per_gs_instance = false;
|
||||
unsigned max_gsprims_base = 128; /* default prim group size clamp */
|
||||
unsigned max_esverts_base = 128;
|
||||
@@ -2008,7 +2014,7 @@ retry_select_mode:
|
||||
|
||||
/* Round up towards full wave sizes for better ALU utilization. */
|
||||
if (!max_vert_out_per_gs_instance) {
|
||||
const unsigned wavesize = gs_sel->screen->ge_wave_size;
|
||||
const unsigned wavesize = si_get_shader_wave_size(shader);
|
||||
unsigned orig_max_esverts;
|
||||
unsigned orig_max_gsprims;
|
||||
do {
|
||||
@@ -2021,19 +2027,30 @@ retry_select_mode:
|
||||
max_esverts =
|
||||
MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size);
|
||||
max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
|
||||
|
||||
max_gsprims = align(max_gsprims, wavesize);
|
||||
max_gsprims = MIN2(max_gsprims, max_gsprims_base);
|
||||
if (gsprim_lds_size)
|
||||
if (gsprim_lds_size) {
|
||||
/* Don't count unusable vertices to the LDS size. Those are vertices above
|
||||
* the maximum number of vertices that can occur in the workgroup,
|
||||
* which is e.g. max_gsprims * 3 for triangles.
|
||||
*/
|
||||
unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim);
|
||||
max_gsprims =
|
||||
MIN2(max_gsprims, (max_lds_size - max_esverts * esvert_lds_size) / gsprim_lds_size);
|
||||
MIN2(max_gsprims, (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size);
|
||||
}
|
||||
clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, use_adjacency);
|
||||
assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1);
|
||||
} while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims);
|
||||
}
|
||||
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
max_esverts = MAX2(max_esverts, 23 + max_verts_per_prim);
|
||||
/* Verify the restriction. */
|
||||
assert(max_esverts >= min_esverts - 1 + max_verts_per_prim);
|
||||
} else {
|
||||
/* Hardware restriction: minimum value of max_esverts */
|
||||
max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim);
|
||||
}
|
||||
|
||||
unsigned max_out_vertices =
|
||||
max_vert_out_per_gs_instance
|
||||
@@ -2061,13 +2078,15 @@ retry_select_mode:
|
||||
shader->ngg.prim_amp_factor = prim_amp_factor;
|
||||
shader->ngg.max_vert_out_per_gs_instance = max_vert_out_per_gs_instance;
|
||||
|
||||
shader->gs_info.esgs_ring_size = 4 * max_esverts * esvert_lds_size;
|
||||
/* Don't count unusable vertices. */
|
||||
shader->gs_info.esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) *
|
||||
esvert_lds_size;
|
||||
shader->ngg.ngg_emit_size = max_gsprims * gsprim_lds_size;
|
||||
|
||||
assert(shader->ngg.hw_max_esverts >= 24); /* HW limitation */
|
||||
assert(shader->ngg.hw_max_esverts >= min_esverts); /* HW limitation */
|
||||
|
||||
/* If asserts are disabled, we use the same conditions to return false */
|
||||
return max_esverts >= max_verts_per_prim && max_gsprims >= 1 &&
|
||||
max_out_vertices <= 256 &&
|
||||
shader->ngg.hw_max_esverts >= 24;
|
||||
shader->ngg.hw_max_esverts >= min_esverts;
|
||||
}
|
||||
|
@@ -816,7 +816,7 @@ static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *sh
|
||||
*/
|
||||
struct ac_rtld_symbol *sym = &lds_symbols[num_lds_symbols++];
|
||||
sym->name = "esgs_ring";
|
||||
sym->size = shader->gs_info.esgs_ring_size;
|
||||
sym->size = shader->gs_info.esgs_ring_size * 4;
|
||||
sym->align = 64 * 1024;
|
||||
}
|
||||
|
||||
@@ -1392,12 +1392,8 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad
|
||||
ctx->gs_generated_prims[i] = ac_build_alloca(&ctx->ac, ctx->ac.i32, "");
|
||||
}
|
||||
|
||||
unsigned scratch_size = 8;
|
||||
if (sel->so.num_outputs)
|
||||
scratch_size = 44;
|
||||
|
||||
assert(!ctx->gs_ngg_scratch);
|
||||
LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, scratch_size);
|
||||
LLVMTypeRef ai32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
|
||||
ctx->gs_ngg_scratch =
|
||||
LLVMAddGlobalInAddressSpace(ctx->ac.module, ai32, "ngg_scratch", AC_ADDR_SPACE_LDS);
|
||||
LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(ai32));
|
||||
@@ -1425,7 +1421,7 @@ static bool si_build_main_function(struct si_shader_context *ctx, struct si_shad
|
||||
* compaction is enabled.
|
||||
*/
|
||||
if (!ctx->gs_ngg_scratch && (sel->so.num_outputs || shader->key.opt.ngg_culling)) {
|
||||
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, 8);
|
||||
LLVMTypeRef asi32 = LLVMArrayType(ctx->ac.i32, gfx10_ngg_get_scratch_dw_size(shader));
|
||||
ctx->gs_ngg_scratch =
|
||||
LLVMAddGlobalInAddressSpace(ctx->ac.module, asi32, "ngg_scratch", AC_ADDR_SPACE_LDS);
|
||||
LLVMSetInitializer(ctx->gs_ngg_scratch, LLVMGetUndef(asi32));
|
||||
|
@@ -219,6 +219,7 @@ void gfx10_emit_ngg_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LL
|
||||
void gfx10_ngg_gs_emit_vertex(struct si_shader_context *ctx, unsigned stream, LLVMValueRef *addrs);
|
||||
void gfx10_ngg_gs_emit_prologue(struct si_shader_context *ctx);
|
||||
void gfx10_ngg_gs_emit_epilogue(struct si_shader_context *ctx);
|
||||
unsigned gfx10_ngg_get_scratch_dw_size(struct si_shader *shader);
|
||||
bool gfx10_ngg_calculate_subgroup_info(struct si_shader *shader);
|
||||
|
||||
/* si_shader_llvm.c */
|
||||
|
@@ -763,6 +763,7 @@ static void si_emit_clip_regs(struct si_context *sctx)
|
||||
unsigned initial_cdw = sctx->gfx_cs->current.cdw;
|
||||
unsigned pa_cl_cntl = S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0F) != 0) |
|
||||
S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xF0) != 0) |
|
||||
S_02881C_BYPASS_VTX_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
|
||||
S_02881C_BYPASS_PRIM_RATE_COMBINER_GFX103(sctx->chip_class >= GFX10_3) |
|
||||
clipdist_mask | (culldist_mask << 8);
|
||||
|
||||
@@ -3747,26 +3748,12 @@ static void gfx10_make_texture_descriptor(
|
||||
S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) |
|
||||
S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) |
|
||||
S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type);
|
||||
|
||||
if (res->target == PIPE_TEXTURE_1D ||
|
||||
res->target == PIPE_TEXTURE_2D) {
|
||||
/* 1D, 2D, and 2D_MSAA can set a custom pitch for shader resources
|
||||
* starting with gfx10.3 (ignored if pitch <= width). Other texture
|
||||
* targets can't. CB and DB can't set a custom pitch for any target.
|
||||
*/
|
||||
if (screen->info.chip_class >= GFX10_3)
|
||||
state[4] = S_00A010_DEPTH(tex->surface.u.gfx9.surf_pitch - 1);
|
||||
else
|
||||
state[4] = 0;
|
||||
} else {
|
||||
/* Depth is the last accessible layer on gfx9+. The hw doesn't need
|
||||
* to know the total number of layers.
|
||||
*/
|
||||
state[4] = S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ?
|
||||
depth - 1 : last_layer) |
|
||||
S_00A010_BASE_ARRAY(first_layer);
|
||||
}
|
||||
|
||||
/* Depth is the the last accessible layer on gfx9+. The hw doesn't need
|
||||
* to know the total number of layers.
|
||||
*/
|
||||
state[4] =
|
||||
S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) |
|
||||
S_00A010_BASE_ARRAY(first_layer);
|
||||
state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) |
|
||||
S_00A014_MAX_MIP(res->nr_samples > 1 ? util_logbase2(res->nr_samples)
|
||||
: tex->buffer.b.b.last_level) |
|
||||
@@ -5367,6 +5354,7 @@ void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing)
|
||||
|
||||
if (sctx->chip_class >= GFX10_3) {
|
||||
si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL_GFX103, 0xff);
|
||||
si_pm4_set_reg(pm4, 0x28848, 1 << 9); /* This fixes sample shading. */
|
||||
}
|
||||
|
||||
sctx->cs_preamble_state = pm4;
|
||||
|
@@ -703,7 +703,7 @@ void gfx9_get_gs_info(struct si_shader_selector *es, struct si_shader_selector *
|
||||
out->gs_prims_per_subgroup = gs_prims;
|
||||
out->gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations;
|
||||
out->max_prims_per_subgroup = out->gs_inst_prims_in_subgroup * gs->gs_max_out_vertices;
|
||||
out->esgs_ring_size = 4 * esgs_lds_size;
|
||||
out->esgs_ring_size = esgs_lds_size;
|
||||
|
||||
assert(out->max_prims_per_subgroup <= max_out_prims);
|
||||
}
|
||||
|
@@ -1061,17 +1061,6 @@ gen_perf_wait_query(struct gen_perf_context *perf_ctx,
|
||||
perf_cfg->vtbl.batchbuffer_flush(perf_ctx->ctx, __FILE__, __LINE__);
|
||||
|
||||
perf_cfg->vtbl.bo_wait_rendering(bo);
|
||||
|
||||
/* Due to a race condition between the OA unit signaling report
|
||||
* availability and the report actually being written into memory,
|
||||
* we need to wait for all the reports to come in before we can
|
||||
* read them.
|
||||
*/
|
||||
if (query->queryinfo->kind == GEN_PERF_QUERY_TYPE_OA ||
|
||||
query->queryinfo->kind == GEN_PERF_QUERY_TYPE_RAW) {
|
||||
while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
@@ -1087,8 +1076,8 @@ gen_perf_is_query_ready(struct gen_perf_context *perf_ctx,
|
||||
return (query->oa.results_accumulated ||
|
||||
(query->oa.bo &&
|
||||
!perf_cfg->vtbl.batch_references(current_batch, query->oa.bo) &&
|
||||
!perf_cfg->vtbl.bo_busy(query->oa.bo) &&
|
||||
read_oa_samples_for_query(perf_ctx, query, current_batch)));
|
||||
!perf_cfg->vtbl.bo_busy(query->oa.bo)));
|
||||
|
||||
case GEN_PERF_QUERY_TYPE_PIPELINE:
|
||||
return (query->pipeline_stats.bo &&
|
||||
!perf_cfg->vtbl.batch_references(current_batch, query->pipeline_stats.bo) &&
|
||||
@@ -1513,6 +1502,7 @@ get_pipeline_stats_data(struct gen_perf_context *perf_ctx,
|
||||
void
|
||||
gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
void *current_batch,
|
||||
int data_size,
|
||||
unsigned *data,
|
||||
unsigned *bytes_written)
|
||||
@@ -1524,6 +1514,17 @@ gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
||||
case GEN_PERF_QUERY_TYPE_OA:
|
||||
case GEN_PERF_QUERY_TYPE_RAW:
|
||||
if (!query->oa.results_accumulated) {
|
||||
/* Due to the sampling frequency of the OA buffer by the i915-perf
|
||||
* driver, there can be a 5ms delay between the Mesa seeing the query
|
||||
* complete and i915 making all the OA buffer reports available to us.
|
||||
* We need to wait for all the reports to come in before we can do
|
||||
* the post processing removing unrelated deltas.
|
||||
* There is a i915-perf series to address this issue, but it's
|
||||
* not been merged upstream yet.
|
||||
*/
|
||||
while (!read_oa_samples_for_query(perf_ctx, query, current_batch))
|
||||
;
|
||||
|
||||
read_gt_frequency(perf_ctx, query);
|
||||
uint32_t *begin_report = query->oa.map;
|
||||
uint32_t *end_report = query->oa.map + MI_RPC_BO_END_OFFSET_BYTES;
|
||||
|
@@ -76,6 +76,7 @@ void gen_perf_delete_query(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query);
|
||||
void gen_perf_get_query_data(struct gen_perf_context *perf_ctx,
|
||||
struct gen_perf_query_object *query,
|
||||
void *current_batch,
|
||||
int data_size,
|
||||
unsigned *data,
|
||||
unsigned *bytes_written);
|
||||
|
@@ -28,7 +28,7 @@
|
||||
/*
|
||||
* Gen-specific function declarations. This header must *not* be included
|
||||
* directly. Instead, it is included multiple times by anv_private.h.
|
||||
*
|
||||
*
|
||||
* In this header file, the usual genx() macro is available.
|
||||
*/
|
||||
|
||||
@@ -36,6 +36,16 @@
|
||||
#error This file is included by means other than anv_private.h
|
||||
#endif
|
||||
|
||||
extern const uint32_t genX(vk_to_gen_cullmode)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_gen_front_face)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_gen_primitive_type)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_gen_compare_op)[];
|
||||
|
||||
extern const uint32_t genX(vk_to_gen_stencil_op)[];
|
||||
|
||||
VkResult genX(init_device_state)(struct anv_device *device);
|
||||
|
||||
void genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer);
|
||||
@@ -62,8 +72,6 @@ void genX(flush_pipeline_select_gpgpu)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct gen_l3_config *cfg);
|
||||
|
||||
void genX(cmd_buffer_emit_clip)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
void genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
void genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
|
@@ -1883,46 +1883,6 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
|
||||
pCreateInfo->pRasterizationState->frontFace;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE &&
|
||||
subpass->depth_stencil_attachment) {
|
||||
dynamic->depth_test_enable =
|
||||
pCreateInfo->pDepthStencilState->depthTestEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE &&
|
||||
subpass->depth_stencil_attachment) {
|
||||
dynamic->depth_write_enable =
|
||||
pCreateInfo->pDepthStencilState->depthWriteEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP &&
|
||||
subpass->depth_stencil_attachment) {
|
||||
dynamic->depth_compare_op =
|
||||
pCreateInfo->pDepthStencilState->depthCompareOp;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE &&
|
||||
subpass->depth_stencil_attachment) {
|
||||
dynamic->depth_bounds_test_enable =
|
||||
pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE &&
|
||||
subpass->depth_stencil_attachment) {
|
||||
dynamic->stencil_test_enable =
|
||||
pCreateInfo->pDepthStencilState->stencilTestEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP &&
|
||||
subpass->depth_stencil_attachment) {
|
||||
const VkPipelineDepthStencilStateCreateInfo *info =
|
||||
pCreateInfo->pDepthStencilState;
|
||||
memcpy(&dynamic->stencil_op.front, &info->front,
|
||||
sizeof(dynamic->stencil_op.front));
|
||||
memcpy(&dynamic->stencil_op.back, &info->back,
|
||||
sizeof(dynamic->stencil_op.back));
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {
|
||||
assert(pCreateInfo->pInputAssemblyState);
|
||||
bool has_tess = false;
|
||||
@@ -2007,6 +1967,40 @@ copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,
|
||||
dynamic->stencil_reference.back =
|
||||
pCreateInfo->pDepthStencilState->back.reference;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {
|
||||
dynamic->depth_test_enable =
|
||||
pCreateInfo->pDepthStencilState->depthTestEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {
|
||||
dynamic->depth_write_enable =
|
||||
pCreateInfo->pDepthStencilState->depthWriteEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {
|
||||
dynamic->depth_compare_op =
|
||||
pCreateInfo->pDepthStencilState->depthCompareOp;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
|
||||
dynamic->depth_bounds_test_enable =
|
||||
pCreateInfo->pDepthStencilState->depthBoundsTestEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {
|
||||
dynamic->stencil_test_enable =
|
||||
pCreateInfo->pDepthStencilState->stencilTestEnable;
|
||||
}
|
||||
|
||||
if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {
|
||||
const VkPipelineDepthStencilStateCreateInfo *info =
|
||||
pCreateInfo->pDepthStencilState;
|
||||
memcpy(&dynamic->stencil_op.front, &info->front,
|
||||
sizeof(dynamic->stencil_op.front));
|
||||
memcpy(&dynamic->stencil_op.back, &info->back,
|
||||
sizeof(dynamic->stencil_op.back));
|
||||
}
|
||||
}
|
||||
|
||||
const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =
|
||||
|
@@ -405,14 +405,14 @@ anv_queue_submit_add_fence_bo(struct anv_queue_submit *submit,
|
||||
{
|
||||
if (submit->fence_bo_count >= submit->fence_bo_array_length) {
|
||||
uint32_t new_len = MAX2(submit->fence_bo_array_length * 2, 64);
|
||||
|
||||
submit->fence_bos =
|
||||
uintptr_t *new_fence_bos =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->fence_bos, new_len * sizeof(*submit->fence_bos),
|
||||
8, submit->alloc_scope);
|
||||
if (submit->fence_bos == NULL)
|
||||
if (new_fence_bos == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->fence_bos = new_fence_bos;
|
||||
submit->fence_bo_array_length = new_len;
|
||||
}
|
||||
|
||||
@@ -433,14 +433,14 @@ anv_queue_submit_add_syncobj(struct anv_queue_submit* submit,
|
||||
|
||||
if (submit->fence_count >= submit->fence_array_length) {
|
||||
uint32_t new_len = MAX2(submit->fence_array_length * 2, 64);
|
||||
|
||||
submit->fences =
|
||||
struct drm_i915_gem_exec_fence *new_fences =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->fences, new_len * sizeof(*submit->fences),
|
||||
8, submit->alloc_scope);
|
||||
if (submit->fences == NULL)
|
||||
if (new_fences == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->fences = new_fences;
|
||||
submit->fence_array_length = new_len;
|
||||
}
|
||||
|
||||
@@ -483,21 +483,24 @@ anv_queue_submit_add_timeline_wait(struct anv_queue_submit* submit,
|
||||
{
|
||||
if (submit->wait_timeline_count >= submit->wait_timeline_array_length) {
|
||||
uint32_t new_len = MAX2(submit->wait_timeline_array_length * 2, 64);
|
||||
|
||||
submit->wait_timelines =
|
||||
struct anv_timeline **new_wait_timelines =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->wait_timelines, new_len * sizeof(*submit->wait_timelines),
|
||||
8, submit->alloc_scope);
|
||||
if (submit->wait_timelines == NULL)
|
||||
if (new_wait_timelines == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->wait_timeline_values =
|
||||
submit->wait_timelines = new_wait_timelines;
|
||||
|
||||
uint64_t *new_wait_timeline_values =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->wait_timeline_values, new_len * sizeof(*submit->wait_timeline_values),
|
||||
8, submit->alloc_scope);
|
||||
if (submit->wait_timeline_values == NULL)
|
||||
if (new_wait_timeline_values == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->wait_timeline_values = new_wait_timeline_values;
|
||||
|
||||
submit->wait_timeline_array_length = new_len;
|
||||
}
|
||||
|
||||
@@ -519,21 +522,24 @@ anv_queue_submit_add_timeline_signal(struct anv_queue_submit* submit,
|
||||
|
||||
if (submit->signal_timeline_count >= submit->signal_timeline_array_length) {
|
||||
uint32_t new_len = MAX2(submit->signal_timeline_array_length * 2, 64);
|
||||
|
||||
submit->signal_timelines =
|
||||
struct anv_timeline **new_signal_timelines =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->signal_timelines, new_len * sizeof(*submit->signal_timelines),
|
||||
8, submit->alloc_scope);
|
||||
if (submit->signal_timelines == NULL)
|
||||
if (new_signal_timelines == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->signal_timeline_values =
|
||||
submit->signal_timelines = new_signal_timelines;
|
||||
|
||||
uint64_t *new_signal_timeline_values =
|
||||
vk_realloc(submit->alloc,
|
||||
submit->signal_timeline_values, new_len * sizeof(*submit->signal_timeline_values),
|
||||
8, submit->alloc_scope);
|
||||
if (submit->signal_timeline_values == NULL)
|
||||
if (new_signal_timeline_values == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
submit->signal_timeline_values = new_signal_timeline_values;
|
||||
|
||||
submit->signal_timeline_array_length = new_len;
|
||||
}
|
||||
|
||||
|
@@ -198,39 +198,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
|
||||
|
||||
static const uint32_t vk_to_gen_cullmode[] = {
|
||||
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
||||
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
||||
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
|
||||
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
|
||||
};
|
||||
static const uint32_t vk_to_gen_front_face[] = {
|
||||
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
|
||||
[VK_FRONT_FACE_CLOCKWISE] = 0
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_compare_op[] = {
|
||||
[VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
|
||||
[VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
|
||||
[VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
|
||||
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL,
|
||||
[VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
|
||||
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
|
||||
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL,
|
||||
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_stencil_op[] = {
|
||||
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
|
||||
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
|
||||
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
|
||||
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
|
||||
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
|
||||
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
|
||||
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
|
||||
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
|
||||
};
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_RENDER_TARGETS |
|
||||
ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH |
|
||||
@@ -245,8 +212,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
.GlobalDepthOffsetConstant = d->depth_bias.bias,
|
||||
.GlobalDepthOffsetScale = d->depth_bias.slope,
|
||||
.GlobalDepthOffsetClamp = d->depth_bias.clamp,
|
||||
.FrontWinding = vk_to_gen_front_face[d->front_face],
|
||||
.CullMode = vk_to_gen_cullmode[d->cull_mode],
|
||||
.FrontWinding = genX(vk_to_gen_front_face)[d->front_face],
|
||||
.CullMode = genX(vk_to_gen_cullmode)[d->cull_mode],
|
||||
};
|
||||
GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
|
||||
|
||||
@@ -307,16 +274,16 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
.DepthTestEnable = d->depth_test_enable,
|
||||
.DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,
|
||||
.DepthTestFunction = vk_to_gen_compare_op[d->depth_compare_op],
|
||||
.DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op],
|
||||
.StencilTestEnable = d->stencil_test_enable,
|
||||
.StencilFailOp = vk_to_gen_stencil_op[d->stencil_op.front.fail_op],
|
||||
.StencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.front.pass_op],
|
||||
.StencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.front.depth_fail_op],
|
||||
.StencilTestFunction = vk_to_gen_compare_op[d->stencil_op.front.compare_op],
|
||||
.BackfaceStencilFailOp = vk_to_gen_stencil_op[d->stencil_op.back.fail_op],
|
||||
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.back.pass_op],
|
||||
.BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.back.depth_fail_op],
|
||||
.BackfaceStencilTestFunction = vk_to_gen_compare_op[d->stencil_op.back.compare_op],
|
||||
.StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op],
|
||||
.StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op],
|
||||
.StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op],
|
||||
.StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op],
|
||||
.BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op],
|
||||
.BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op],
|
||||
.BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op],
|
||||
.BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op],
|
||||
};
|
||||
GENX(DEPTH_STENCIL_STATE_pack)(NULL, depth_stencil_dw, &depth_stencil);
|
||||
|
||||
@@ -359,26 +326,13 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
static const uint32_t vk_to_gen_primitive_type[] = {
|
||||
[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
|
||||
};
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)) {
|
||||
uint32_t topology;
|
||||
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
|
||||
topology = d->primitive_topology;
|
||||
else
|
||||
topology = vk_to_gen_primitive_type[d->primitive_topology];
|
||||
topology = genX(vk_to_gen_primitive_type)[d->primitive_topology];
|
||||
|
||||
cmd_buffer->state.gfx.primitive_topology = topology;
|
||||
}
|
||||
|
@@ -439,51 +439,6 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf);
|
||||
}
|
||||
|
||||
static const uint32_t vk_to_gen_cullmode[] = {
|
||||
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
||||
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
||||
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
|
||||
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
|
||||
};
|
||||
static const uint32_t vk_to_gen_front_face[] = {
|
||||
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
|
||||
[VK_FRONT_FACE_CLOCKWISE] = 0
|
||||
};
|
||||
static const uint32_t vk_to_gen_primitive_type[] = {
|
||||
[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_compare_op[] = {
|
||||
[VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
|
||||
[VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
|
||||
[VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
|
||||
[VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROPLEQUAL,
|
||||
[VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
|
||||
[VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
|
||||
[VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROPGEQUAL,
|
||||
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_stencil_op[] = {
|
||||
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
|
||||
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
|
||||
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
|
||||
[VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
|
||||
[VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
|
||||
[VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
|
||||
[VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
|
||||
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
|
||||
};
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |
|
||||
ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |
|
||||
@@ -494,8 +449,8 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
.GlobalDepthOffsetConstant = d->depth_bias.bias,
|
||||
.GlobalDepthOffsetScale = d->depth_bias.slope,
|
||||
.GlobalDepthOffsetClamp = d->depth_bias.clamp,
|
||||
.CullMode = vk_to_gen_cullmode[d->cull_mode],
|
||||
.FrontWinding = vk_to_gen_front_face[d->front_face],
|
||||
.CullMode = genX(vk_to_gen_cullmode)[d->cull_mode],
|
||||
.FrontWinding = genX(vk_to_gen_front_face)[d->front_face],
|
||||
};
|
||||
GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
|
||||
@@ -556,16 +511,16 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
.DepthTestEnable = d->depth_test_enable,
|
||||
.DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,
|
||||
.DepthTestFunction = vk_to_gen_compare_op[d->depth_compare_op],
|
||||
.DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op],
|
||||
.StencilTestEnable = d->stencil_test_enable,
|
||||
.StencilFailOp = vk_to_gen_stencil_op[d->stencil_op.front.fail_op],
|
||||
.StencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.front.pass_op],
|
||||
.StencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.front.depth_fail_op],
|
||||
.StencilTestFunction = vk_to_gen_compare_op[d->stencil_op.front.compare_op],
|
||||
.BackfaceStencilFailOp = vk_to_gen_stencil_op[d->stencil_op.back.fail_op],
|
||||
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.back.pass_op],
|
||||
.BackfaceStencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.back.depth_fail_op],
|
||||
.BackfaceStencilTestFunction = vk_to_gen_compare_op[d->stencil_op.back.compare_op],
|
||||
.StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op],
|
||||
.StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op],
|
||||
.StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op],
|
||||
.StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op],
|
||||
.BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op],
|
||||
.BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op],
|
||||
.BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op],
|
||||
.BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op],
|
||||
};
|
||||
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,
|
||||
&wm_depth_stencil);
|
||||
@@ -625,16 +580,16 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
|
||||
.DepthTestEnable = d->depth_test_enable,
|
||||
.DepthBufferWriteEnable = d->depth_test_enable && d->depth_write_enable,
|
||||
.DepthTestFunction = vk_to_gen_compare_op[d->depth_compare_op],
|
||||
.DepthTestFunction = genX(vk_to_gen_compare_op)[d->depth_compare_op],
|
||||
.StencilTestEnable = d->stencil_test_enable,
|
||||
.StencilFailOp = vk_to_gen_stencil_op[d->stencil_op.front.fail_op],
|
||||
.StencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.front.pass_op],
|
||||
.StencilPassDepthFailOp = vk_to_gen_stencil_op[d->stencil_op.front.depth_fail_op],
|
||||
.StencilTestFunction = vk_to_gen_compare_op[d->stencil_op.front.compare_op],
|
||||
.BackfaceStencilFailOp = vk_to_gen_stencil_op[d->stencil_op.back.fail_op],
|
||||
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[d->stencil_op.back.pass_op],
|
||||
.BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[d->stencil_op.back.depth_fail_op],
|
||||
.BackfaceStencilTestFunction = vk_to_gen_compare_op[d->stencil_op.back.compare_op],
|
||||
.StencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.fail_op],
|
||||
.StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.pass_op],
|
||||
.StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.front.depth_fail_op],
|
||||
.StencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.front.compare_op],
|
||||
.BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.fail_op],
|
||||
.BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.pass_op],
|
||||
.BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[d->stencil_op.back.depth_fail_op],
|
||||
.BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[d->stencil_op.back.compare_op],
|
||||
|
||||
};
|
||||
GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dwords, &wm_depth_stencil);
|
||||
@@ -684,7 +639,7 @@ genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
|
||||
topology = d->primitive_topology;
|
||||
else
|
||||
topology = vk_to_gen_primitive_type[d->primitive_topology];
|
||||
topology = genX(vk_to_gen_primitive_type)[d->primitive_topology];
|
||||
|
||||
cmd_buffer->state.gfx.primitive_topology = topology;
|
||||
|
||||
|
@@ -3263,26 +3263,44 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
||||
cmd_buffer->state.push_constants_dirty &= ~flushed;
|
||||
}
|
||||
|
||||
void
|
||||
genX(cmd_buffer_emit_clip)(struct anv_cmd_buffer *cmd_buffer)
|
||||
static void
|
||||
cmd_buffer_emit_clip(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
const uint32_t clip_states =
|
||||
#if GEN_GEN <= 7
|
||||
ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_CULL_MODE |
|
||||
#endif
|
||||
ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
|
||||
ANV_CMD_DIRTY_PIPELINE;
|
||||
|
||||
if ((cmd_buffer->state.gfx.dirty & clip_states) == 0)
|
||||
return;
|
||||
|
||||
#if GEN_GEN <= 7
|
||||
const struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
|
||||
#endif
|
||||
struct GENX(3DSTATE_CLIP) clip = {
|
||||
GENX(3DSTATE_CLIP_header),
|
||||
#if GEN_GEN <= 7
|
||||
.FrontWinding = genX(vk_to_gen_front_face)[d->front_face],
|
||||
.CullMode = genX(vk_to_gen_cullmode)[d->cull_mode],
|
||||
#endif
|
||||
};
|
||||
uint32_t dwords[GENX(3DSTATE_CLIP_length)];
|
||||
|
||||
struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
|
||||
ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)) {
|
||||
uint32_t dwords[GENX(3DSTATE_CLIP_length)];
|
||||
int32_t count =
|
||||
const struct brw_vue_prog_data *last =
|
||||
anv_pipeline_get_last_vue_prog_data(pipeline);
|
||||
if (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
|
||||
clip.MaximumVPIndex =
|
||||
cmd_buffer->state.gfx.dynamic.viewport.count > 0 ?
|
||||
cmd_buffer->state.gfx.dynamic.viewport.count - 1 : 0;
|
||||
|
||||
struct GENX(3DSTATE_CLIP) clip = {
|
||||
GENX(3DSTATE_CLIP_header),
|
||||
.MaximumVPIndex = count,
|
||||
};
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords,
|
||||
pipeline->gen7.clip);
|
||||
cmd_buffer->state.gfx.dynamic.viewport.count - 1 : 0;
|
||||
}
|
||||
|
||||
GENX(3DSTATE_CLIP_pack)(NULL, dwords, &clip);
|
||||
anv_batch_emit_merge(&cmd_buffer->batch, dwords,
|
||||
pipeline->gen7.clip);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -3469,10 +3487,10 @@ genX(cmd_buffer_flush_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
if (dirty)
|
||||
cmd_buffer_emit_descriptor_pointers(cmd_buffer, dirty);
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {
|
||||
genX(cmd_buffer_emit_clip)(cmd_buffer);
|
||||
cmd_buffer_emit_clip(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT)
|
||||
gen8_cmd_buffer_emit_viewport(cmd_buffer);
|
||||
}
|
||||
|
||||
if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_VIEWPORT |
|
||||
ANV_CMD_DIRTY_PIPELINE)) {
|
||||
|
@@ -438,24 +438,6 @@ emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
|
||||
#endif
|
||||
}
|
||||
|
||||
static const uint32_t vk_to_gen_cullmode[] = {
|
||||
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
||||
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
||||
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
|
||||
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_fillmode[] = {
|
||||
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
|
||||
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
|
||||
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_front_face[] = {
|
||||
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
|
||||
[VK_FRONT_FACE_CLOCKWISE] = 0
|
||||
};
|
||||
|
||||
static VkLineRasterizationModeEXT
|
||||
vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,
|
||||
const VkPipelineMultisampleStateCreateInfo *ms_info)
|
||||
@@ -574,6 +556,24 @@ gen7_ms_rast_mode(struct anv_graphics_pipeline *pipeline,
|
||||
}
|
||||
#endif
|
||||
|
||||
const uint32_t genX(vk_to_gen_cullmode)[] = {
|
||||
[VK_CULL_MODE_NONE] = CULLMODE_NONE,
|
||||
[VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
|
||||
[VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
|
||||
[VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
|
||||
};
|
||||
|
||||
const uint32_t genX(vk_to_gen_fillmode)[] = {
|
||||
[VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
|
||||
[VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
|
||||
[VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
|
||||
};
|
||||
|
||||
const uint32_t genX(vk_to_gen_front_face)[] = {
|
||||
[VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
|
||||
[VK_FRONT_FACE_CLOCKWISE] = 0
|
||||
};
|
||||
|
||||
static void
|
||||
emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
||||
const VkPipelineInputAssemblyStateCreateInfo *ia_info,
|
||||
@@ -681,13 +681,13 @@ emit_rs_state(struct anv_graphics_pipeline *pipeline,
|
||||
|
||||
raster.FrontWinding =
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE ?
|
||||
0 : vk_to_gen_front_face[rs_info->frontFace];
|
||||
0 : genX(vk_to_gen_front_face)[rs_info->frontFace];
|
||||
raster.CullMode =
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE ?
|
||||
0 : vk_to_gen_cullmode[rs_info->cullMode];
|
||||
0 : genX(vk_to_gen_cullmode)[rs_info->cullMode];
|
||||
|
||||
raster.FrontFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
|
||||
raster.BackFaceFillMode = vk_to_gen_fillmode[rs_info->polygonMode];
|
||||
raster.FrontFaceFillMode = genX(vk_to_gen_fillmode)[rs_info->polygonMode];
|
||||
raster.BackFaceFillMode = genX(vk_to_gen_fillmode)[rs_info->polygonMode];
|
||||
raster.ScissorRectangleEnable = true;
|
||||
|
||||
#if GEN_GEN >= 9
|
||||
@@ -843,7 +843,7 @@ static const uint32_t vk_to_gen_blend_op[] = {
|
||||
[VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_compare_op[] = {
|
||||
const uint32_t genX(vk_to_gen_compare_op)[] = {
|
||||
[VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
|
||||
[VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
|
||||
[VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
|
||||
@@ -854,7 +854,7 @@ static const uint32_t vk_to_gen_compare_op[] = {
|
||||
[VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
|
||||
};
|
||||
|
||||
static const uint32_t vk_to_gen_stencil_op[] = {
|
||||
const uint32_t genX(vk_to_gen_stencil_op)[] = {
|
||||
[VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
|
||||
[VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
|
||||
[VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
|
||||
@@ -865,6 +865,19 @@ static const uint32_t vk_to_gen_stencil_op[] = {
|
||||
[VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
|
||||
};
|
||||
|
||||
const uint32_t genX(vk_to_gen_primitive_type)[] = {
|
||||
[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
|
||||
[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
|
||||
};
|
||||
|
||||
/* This function sanitizes the VkStencilOpState by looking at the compare ops
|
||||
* and trying to determine whether or not a given stencil op can ever actually
|
||||
* occur. Stencil ops which can never occur are set to VK_STENCIL_OP_KEEP.
|
||||
@@ -1056,7 +1069,7 @@ emit_ds_state(struct anv_graphics_pipeline *pipeline,
|
||||
|
||||
.DepthTestFunction =
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP ?
|
||||
0 : vk_to_gen_compare_op[info.depthCompareOp],
|
||||
0 : genX(vk_to_gen_compare_op)[info.depthCompareOp],
|
||||
|
||||
.DoubleSidedStencilEnable = true,
|
||||
|
||||
@@ -1064,14 +1077,14 @@ emit_ds_state(struct anv_graphics_pipeline *pipeline,
|
||||
dynamic_states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE ?
|
||||
0 : info.stencilTestEnable,
|
||||
|
||||
.StencilFailOp = vk_to_gen_stencil_op[info.front.failOp],
|
||||
.StencilPassDepthPassOp = vk_to_gen_stencil_op[info.front.passOp],
|
||||
.StencilPassDepthFailOp = vk_to_gen_stencil_op[info.front.depthFailOp],
|
||||
.StencilTestFunction = vk_to_gen_compare_op[info.front.compareOp],
|
||||
.BackfaceStencilFailOp = vk_to_gen_stencil_op[info.back.failOp],
|
||||
.BackfaceStencilPassDepthPassOp = vk_to_gen_stencil_op[info.back.passOp],
|
||||
.BackfaceStencilPassDepthFailOp =vk_to_gen_stencil_op[info.back.depthFailOp],
|
||||
.BackfaceStencilTestFunction = vk_to_gen_compare_op[info.back.compareOp],
|
||||
.StencilFailOp = genX(vk_to_gen_stencil_op)[info.front.failOp],
|
||||
.StencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[info.front.passOp],
|
||||
.StencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[info.front.depthFailOp],
|
||||
.StencilTestFunction = genX(vk_to_gen_compare_op)[info.front.compareOp],
|
||||
.BackfaceStencilFailOp = genX(vk_to_gen_stencil_op)[info.back.failOp],
|
||||
.BackfaceStencilPassDepthPassOp = genX(vk_to_gen_stencil_op)[info.back.passOp],
|
||||
.BackfaceStencilPassDepthFailOp = genX(vk_to_gen_stencil_op)[info.back.depthFailOp],
|
||||
.BackfaceStencilTestFunction = genX(vk_to_gen_compare_op)[info.back.compareOp],
|
||||
};
|
||||
|
||||
if (dynamic_stencil_op) {
|
||||
@@ -1339,8 +1352,8 @@ emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
|
||||
!(last->vue_map.slots_valid & VARYING_BIT_LAYER);
|
||||
|
||||
#if GEN_GEN == 7
|
||||
clip.FrontWinding = vk_to_gen_front_face[rs_info->frontFace];
|
||||
clip.CullMode = vk_to_gen_cullmode[rs_info->cullMode];
|
||||
clip.FrontWinding = genX(vk_to_gen_front_face)[rs_info->frontFace];
|
||||
clip.CullMode = genX(vk_to_gen_cullmode)[rs_info->cullMode];
|
||||
clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
|
||||
clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
|
||||
clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
|
||||
|
@@ -323,7 +323,7 @@ brw_get_perf_query_data(struct gl_context *ctx,
|
||||
*/
|
||||
assert(o->Ready);
|
||||
|
||||
gen_perf_get_query_data(brw->perf_ctx, obj,
|
||||
gen_perf_get_query_data(brw->perf_ctx, obj, &brw->batch,
|
||||
data_size, data, bytes_written);
|
||||
}
|
||||
|
||||
|
@@ -97,9 +97,12 @@ dst_register(struct st_translate *t, gl_register_file file, GLuint index)
|
||||
else
|
||||
assert(index < VARYING_SLOT_MAX);
|
||||
|
||||
assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
|
||||
|
||||
return t->outputs[t->outputMapping[index]];
|
||||
if (t->outputMapping[index] < ARRAY_SIZE(t->outputs))
|
||||
return t->outputs[t->outputMapping[index]];
|
||||
else {
|
||||
assert(t->procType == PIPE_SHADER_VERTEX);
|
||||
return ureg_dst(ureg_DECL_constant(t->ureg, 0));
|
||||
}
|
||||
|
||||
case PROGRAM_ADDRESS:
|
||||
return t->address[index];
|
||||
@@ -149,8 +152,12 @@ src_register(struct st_translate *t,
|
||||
}
|
||||
|
||||
case PROGRAM_OUTPUT:
|
||||
assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
|
||||
return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
|
||||
if (t->outputMapping[index] < ARRAY_SIZE(t->outputs))
|
||||
return ureg_src(t->outputs[t->outputMapping[index]]);
|
||||
else {
|
||||
assert(t->procType == PIPE_SHADER_VERTEX);
|
||||
return ureg_DECL_constant(t->ureg, 0);
|
||||
}
|
||||
|
||||
case PROGRAM_ADDRESS:
|
||||
return ureg_src(t->address[index]);
|
||||
|
@@ -214,7 +214,7 @@ DRI_CONF_OPT_END
|
||||
|
||||
#define DRI_CONF_FORCE_GL_VENDOR(def) \
|
||||
DRI_CONF_OPT_BEGIN(force_gl_vendor, string, def) \
|
||||
DRI_CONF_DESC("Allow GPU vendor to be overridden.") \
|
||||
DRI_CONF_DESC("Override GPU vendor string.") \
|
||||
DRI_CONF_OPT_END
|
||||
|
||||
#define DRI_CONF_FORCE_COMPAT_PROFILE(def) \
|
||||
|
Reference in New Issue
Block a user