Compare commits
26 Commits
mesa-20.0.
...
mesa-20.0.
Author | SHA1 | Date | |
---|---|---|---|
|
6bbbef9699 | ||
|
fa0dcef2ef | ||
|
4558bdb95a | ||
|
75ea9c808d | ||
|
e5f13bca20 | ||
|
f93c8d8598 | ||
|
c4e1dd07eb | ||
|
f86e27156d | ||
|
9724b0f32c | ||
|
a3bd400c14 | ||
|
32dc7fff47 | ||
|
027f9c887c | ||
|
f3f4751851 | ||
|
a25c7674aa | ||
|
1d17f42732 | ||
|
e393404ff1 | ||
|
1f8db81632 | ||
|
8f29aaa2cf | ||
|
1e0cc313ba | ||
|
d96f0faacf | ||
|
203710e94c | ||
|
d189ab9fcc | ||
|
bd934ff613 | ||
|
51f7d81dd2 | ||
|
06a9d51f27 | ||
|
419c992e65 |
1233
.pick_status.json
1233
.pick_status.json
File diff suppressed because it is too large
Load Diff
@@ -374,13 +374,16 @@ wait_imm kill(Instruction* instr, wait_ctx& ctx)
|
||||
|
||||
imm.combine(parse_wait_instr(ctx, instr));
|
||||
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
/* Seems to be required on GFX10 to achieve correct behaviour.
|
||||
* It shouldn't cost anything anyways since we're about to do s_endpgm.
|
||||
*/
|
||||
if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb)
|
||||
imm.lgkm = 0;
|
||||
|
||||
/* It's required to wait for scalar stores before "writing back" data.
|
||||
* It shouldn't cost anything anyways since we're about to do s_endpgm.
|
||||
*/
|
||||
if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb) {
|
||||
assert(ctx.chip_class >= GFX8);
|
||||
imm.lgkm = 0;
|
||||
}
|
||||
|
||||
if (ctx.chip_class >= GFX10) {
|
||||
/* GFX10: A store followed by a load at the same address causes a problem because
|
||||
* the load doesn't load the correct values unless we wait for the store first.
|
||||
* This is NOT mitigated by an s_nop.
|
||||
|
@@ -2657,6 +2657,7 @@ void load_lds(isel_context *ctx, unsigned elem_size_bytes, Temp dst,
|
||||
unsigned total_bytes = num_components * elem_size_bytes;
|
||||
std::array<Temp, NIR_MAX_VEC_COMPONENTS> result;
|
||||
bool large_ds_read = ctx->options->chip_class >= GFX7;
|
||||
bool usable_read2 = ctx->options->chip_class >= GFX7;
|
||||
|
||||
while (bytes_read < total_bytes) {
|
||||
unsigned todo = total_bytes - bytes_read;
|
||||
@@ -2668,7 +2669,7 @@ void load_lds(isel_context *ctx, unsigned elem_size_bytes, Temp dst,
|
||||
if (todo >= 16 && aligned16 && large_ds_read) {
|
||||
op = aco_opcode::ds_read_b128;
|
||||
todo = 16;
|
||||
} else if (todo >= 16 && aligned8) {
|
||||
} else if (todo >= 16 && aligned8 && usable_read2) {
|
||||
op = aco_opcode::ds_read2_b64;
|
||||
read2 = true;
|
||||
todo = 16;
|
||||
@@ -2678,7 +2679,7 @@ void load_lds(isel_context *ctx, unsigned elem_size_bytes, Temp dst,
|
||||
} else if (todo >= 8 && aligned8) {
|
||||
op = aco_opcode::ds_read_b64;
|
||||
todo = 8;
|
||||
} else if (todo >= 8) {
|
||||
} else if (todo >= 8 && usable_read2) {
|
||||
op = aco_opcode::ds_read2_b32;
|
||||
read2 = true;
|
||||
todo = 8;
|
||||
@@ -2783,6 +2784,7 @@ void ds_write_helper(isel_context *ctx, Operand m, Temp address, Temp data, unsi
|
||||
Builder bld(ctx->program, ctx->block);
|
||||
unsigned bytes_written = 0;
|
||||
bool large_ds_write = ctx->options->chip_class >= GFX7;
|
||||
bool usable_write2 = ctx->options->chip_class >= GFX7;
|
||||
|
||||
while (bytes_written < total_size * 4) {
|
||||
unsigned todo = total_size * 4 - bytes_written;
|
||||
@@ -2795,7 +2797,7 @@ void ds_write_helper(isel_context *ctx, Operand m, Temp address, Temp data, unsi
|
||||
if (todo >= 16 && aligned16 && large_ds_write) {
|
||||
op = aco_opcode::ds_write_b128;
|
||||
size = 4;
|
||||
} else if (todo >= 16 && aligned8) {
|
||||
} else if (todo >= 16 && aligned8 && usable_write2) {
|
||||
op = aco_opcode::ds_write2_b64;
|
||||
write2 = true;
|
||||
size = 4;
|
||||
@@ -2805,7 +2807,7 @@ void ds_write_helper(isel_context *ctx, Operand m, Temp address, Temp data, unsi
|
||||
} else if (todo >= 8 && aligned8) {
|
||||
op = aco_opcode::ds_write_b64;
|
||||
size = 2;
|
||||
} else if (todo >= 8) {
|
||||
} else if (todo >= 8 && usable_write2) {
|
||||
op = aco_opcode::ds_write2_b32;
|
||||
write2 = true;
|
||||
size = 2;
|
||||
|
@@ -1844,6 +1844,9 @@ bool combine_salu_n2(opt_ctx& ctx, aco_ptr<Instruction>& instr)
|
||||
if (instr->definitions[1].isTemp() && ctx.uses[instr->definitions[1].tempId()])
|
||||
return false;
|
||||
|
||||
if (instr->definitions[0].isTemp() && ctx.info[instr->definitions[0].tempId()].is_uniform_bool())
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
Instruction *op2_instr = follow_operand(ctx, instr->operands[i]);
|
||||
if (!op2_instr || (op2_instr->opcode != aco_opcode::s_not_b32 && op2_instr->opcode != aco_opcode::s_not_b64))
|
||||
@@ -2601,7 +2604,7 @@ void select_instruction(opt_ctx &ctx, aco_ptr<Instruction>& instr)
|
||||
continue;
|
||||
/* if one of the operands is sgpr, we cannot add a literal somewhere else on pre-GFX10 or operands other than the 1st */
|
||||
if (instr->operands[i].getTemp().type() == RegType::sgpr && (i > 0 || ctx.program->chip_class < GFX10)) {
|
||||
if (ctx.info[instr->operands[i].tempId()].is_literal()) {
|
||||
if (!sgpr_used && ctx.info[instr->operands[i].tempId()].is_literal()) {
|
||||
literal_uses = ctx.uses[instr->operands[i].tempId()];
|
||||
literal_idx = i;
|
||||
} else {
|
||||
|
@@ -114,6 +114,11 @@ void setup_reduce_temp(Program* program)
|
||||
}
|
||||
}
|
||||
|
||||
if (op == gfx10_wave64_bpermute) {
|
||||
instr->operands[1] = Operand(reduceTmp);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* same as before, except for the vector temporary instead of the reduce temporary */
|
||||
unsigned cluster_size = static_cast<Pseudo_reduction_instruction *>(instr)->cluster_size;
|
||||
bool need_vtmp = op == imul32 || op == fadd64 || op == fmul64 ||
|
||||
@@ -121,7 +126,7 @@ void setup_reduce_temp(Program* program)
|
||||
op == umax64 || op == imin64 || op == imax64 ||
|
||||
op == imul64;
|
||||
|
||||
if (program->chip_class >= GFX10 && cluster_size == 64 && op != gfx10_wave64_bpermute)
|
||||
if (program->chip_class >= GFX10 && cluster_size == 64)
|
||||
need_vtmp = true;
|
||||
if (program->chip_class >= GFX10 && op == iadd64)
|
||||
need_vtmp = true;
|
||||
|
@@ -1279,7 +1279,7 @@ for bit_size in [8, 16, 32, 64]:
|
||||
('bcsel', ('ilt', a, ('isub', a, b)), intmin, ('isub', a, b))), 'options->lower_add_sat'),
|
||||
]
|
||||
|
||||
invert = OrderedDict([('feq', 'fne'), ('fne', 'feq'), ('fge', 'flt'), ('flt', 'fge')])
|
||||
invert = OrderedDict([('feq', 'fne'), ('fne', 'feq')])
|
||||
|
||||
for left, right in itertools.combinations_with_replacement(invert.keys(), 2):
|
||||
optimizations.append((('inot', ('ior(is_used_once)', (left, a, b), (right, c, d))),
|
||||
|
@@ -305,6 +305,12 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
|
||||
|
||||
reg = ir3_reg_clone(ctx->shader, reg);
|
||||
|
||||
/* Half constant registers seems to handle only 32-bit values
|
||||
* within floating-point opcodes. So convert back to 32-bit values.
|
||||
*/
|
||||
if (f_opcode && (new_flags & IR3_REG_HALF))
|
||||
reg->uim_val = fui(_mesa_half_to_float(reg->uim_val));
|
||||
|
||||
/* in some cases, there are restrictions on (abs)/(neg) plus const..
|
||||
* so just evaluate those and clear the flags:
|
||||
*/
|
||||
@@ -350,12 +356,6 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags
|
||||
swiz = i % 4;
|
||||
idx = i / 4;
|
||||
|
||||
/* Half constant registers seems to handle only 32-bit values
|
||||
* within floating-point opcodes. So convert back to 32-bit values. */
|
||||
if (f_opcode && (new_flags & IR3_REG_HALF)) {
|
||||
reg->uim_val = fui(_mesa_half_to_float(reg->uim_val));
|
||||
}
|
||||
|
||||
const_state->immediates[idx].val[swiz] = reg->uim_val;
|
||||
const_state->immediates_count = idx + 1;
|
||||
const_state->immediate_idx++;
|
||||
|
@@ -685,7 +685,7 @@ static void si_bind_blend_state(struct pipe_context *ctx, void *state)
|
||||
|
||||
if (old_blend->cb_target_mask != blend->cb_target_mask ||
|
||||
old_blend->dual_src_blend != blend->dual_src_blend ||
|
||||
(old_blend->blend_enable_4bit != blend->blend_enable_4bit &&
|
||||
(old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit &&
|
||||
sctx->framebuffer.nr_samples >= 2 &&
|
||||
sctx->screen->dcc_msaa_allowed))
|
||||
si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state);
|
||||
@@ -2242,13 +2242,6 @@ static bool si_is_format_supported(struct pipe_screen *screen,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (util_format_get_num_planes(format) >= 2) {
|
||||
return util_format_planar_is_supported(screen, format, target,
|
||||
sample_count,
|
||||
storage_sample_count,
|
||||
usage);
|
||||
}
|
||||
|
||||
if (MAX2(1, sample_count) < MAX2(1, storage_sample_count))
|
||||
return false;
|
||||
|
||||
|
@@ -207,6 +207,8 @@ static const struct vgpu10_format_entry format_conversion_table[] =
|
||||
[ PIPE_FORMAT_L32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32_SINT, TF_XXX1 },
|
||||
[ PIPE_FORMAT_L32A32_SINT ] = { SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_R32G32_SINT, TF_XXXY },
|
||||
[ PIPE_FORMAT_R10G10B10A2_UINT ] = { SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, SVGA3D_R10G10B10A2_UINT, 0 },
|
||||
/* Must specify following entry to give the sense of size of format_conversion_table[] */
|
||||
[ PIPE_FORMAT_COUNT ] = {SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, SVGA3D_FORMAT_INVALID, 0 },
|
||||
};
|
||||
|
||||
|
||||
|
@@ -133,25 +133,26 @@ svga_transfer_dma(struct svga_context *svga,
|
||||
}
|
||||
}
|
||||
else {
|
||||
int y, h, srcy;
|
||||
int y, h, y_max;
|
||||
unsigned blockheight =
|
||||
util_format_get_blockheight(st->base.resource->format);
|
||||
|
||||
h = st->hw_nblocksy * blockheight;
|
||||
srcy = 0;
|
||||
y_max = st->box.y + st->box.h;
|
||||
|
||||
for (y = 0; y < st->box.h; y += h) {
|
||||
for (y = st->box.y; y < y_max; y += h) {
|
||||
unsigned offset, length;
|
||||
void *hw, *sw;
|
||||
|
||||
if (y + h > st->box.h)
|
||||
h = st->box.h - y;
|
||||
if (y + h > y_max)
|
||||
h = y_max - y;
|
||||
|
||||
/* Transfer band must be aligned to pixel block boundaries */
|
||||
assert(y % blockheight == 0);
|
||||
assert(h % blockheight == 0);
|
||||
|
||||
offset = y * st->base.stride / blockheight;
|
||||
/* First band starts at the top of the SW buffer. */
|
||||
offset = (y - st->box.y) * st->base.stride / blockheight;
|
||||
length = h * st->base.stride / blockheight;
|
||||
|
||||
sw = (uint8_t *) st->swbuf + offset;
|
||||
@@ -159,9 +160,9 @@ svga_transfer_dma(struct svga_context *svga,
|
||||
if (transfer == SVGA3D_WRITE_HOST_VRAM) {
|
||||
unsigned usage = PIPE_TRANSFER_WRITE;
|
||||
|
||||
/* Wait for the previous DMAs to complete */
|
||||
/* TODO: keep one DMA (at half the size) in the background */
|
||||
if (y) {
|
||||
/* Don't write to an in-flight DMA buffer. Synchronize or
|
||||
* discard in-flight storage. */
|
||||
if (y != st->box.y) {
|
||||
svga_context_flush(svga, NULL);
|
||||
usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
|
||||
}
|
||||
@@ -177,7 +178,7 @@ svga_transfer_dma(struct svga_context *svga,
|
||||
svga_transfer_dma_band(svga, st, transfer,
|
||||
st->box.x, y, st->box.z,
|
||||
st->box.w, h, st->box.d,
|
||||
0, srcy, 0, flags);
|
||||
0, 0, 0, flags);
|
||||
|
||||
/*
|
||||
* Prevent the texture contents to be discarded on the next band
|
||||
|
@@ -131,7 +131,7 @@ emulate_point_sprite(struct svga_context *svga,
|
||||
tgsi_dump(new_tokens, 0);
|
||||
}
|
||||
|
||||
templ.tokens = new_tokens;
|
||||
pipe_shader_state_from_tgsi(&templ, new_tokens);
|
||||
templ.stream_output.num_outputs = 0;
|
||||
|
||||
if (streamout) {
|
||||
|
@@ -43,7 +43,7 @@
|
||||
//========================================================
|
||||
void KnobBase::autoExpandEnvironmentVariables(std::string& text)
|
||||
{
|
||||
#if (__GNUC__) && (GCC_VERSION < 409000)
|
||||
#if (__GNUC__) && (GCC_VERSION < 40900)
|
||||
// <regex> isn't implemented prior to gcc-4.9.0
|
||||
// unix style variable replacement
|
||||
size_t start;
|
||||
|
@@ -182,7 +182,7 @@ static INLINE void _mm256_storeu2_m128i(__m128i* hi, __m128i* lo, __m256i a)
|
||||
}
|
||||
|
||||
// gcc prior to 4.9 doesn't have _mm*_undefined_*
|
||||
#if (__GNUC__) && (GCC_VERSION < 409000)
|
||||
#if (__GNUC__) && (GCC_VERSION < 40900)
|
||||
#define _mm_undefined_si128 _mm_setzero_si128
|
||||
#define _mm256_undefined_ps _mm256_setzero_ps
|
||||
#endif
|
||||
|
@@ -1395,15 +1395,11 @@ dri2_query_dma_buf_modifiers(__DRIscreen *_screen, int fourcc, int max,
|
||||
(pscreen->is_format_supported(pscreen, format, screen->target, 0, 0,
|
||||
PIPE_BIND_RENDER_TARGET) ||
|
||||
pscreen->is_format_supported(pscreen, format, screen->target, 0, 0,
|
||||
PIPE_BIND_SAMPLER_VIEW))) {
|
||||
PIPE_BIND_SAMPLER_VIEW) ||
|
||||
dri2_yuv_dma_buf_supported(screen, map))) {
|
||||
pscreen->query_dmabuf_modifiers(pscreen, format, max, modifiers,
|
||||
external_only, count);
|
||||
return true;
|
||||
} else if (dri2_yuv_dma_buf_supported(screen, map)) {
|
||||
*count = 1;
|
||||
if (modifiers)
|
||||
modifiers[0] = DRM_FORMAT_MOD_NONE;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@@ -1030,7 +1030,8 @@ try_immediate_source(const nir_alu_instr *instr, src_reg *op,
|
||||
} else {
|
||||
uint8_t vf_values[4] = { 0, 0, 0, 0 };
|
||||
|
||||
for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(vf_values); i++) {
|
||||
|
||||
if (op[idx].abs)
|
||||
f[i] = fabs(f[i]);
|
||||
|
||||
|
@@ -1447,7 +1447,7 @@ gen_get_device_info_from_fd(int fd, struct gen_device_info *devinfo)
|
||||
return false;
|
||||
|
||||
if (!getparam(fd, I915_PARAM_REVISION, &devinfo->revision))
|
||||
return false;
|
||||
devinfo->revision = 0;
|
||||
|
||||
if (!query_topology(devinfo, fd)) {
|
||||
if (devinfo->gen >= 10) {
|
||||
|
@@ -2833,6 +2833,10 @@ cmd_buffer_emit_push_constant(struct anv_cmd_buffer *cmd_buffer,
|
||||
const struct anv_pipeline_bind_map *bind_map =
|
||||
&pipeline->shaders[stage]->bind_map;
|
||||
|
||||
#if GEN_GEN >= 12
|
||||
c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
|
||||
#endif
|
||||
|
||||
#if GEN_GEN >= 8 || GEN_IS_HASWELL
|
||||
/* The Skylake PRM contains the following restriction:
|
||||
*
|
||||
@@ -2893,6 +2897,7 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
|
||||
if (count == 0) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CONSTANT_ALL), c) {
|
||||
c.ShaderUpdateEnable = shader_mask;
|
||||
c.MOCS = cmd_buffer->device->isl_dev.mocs.internal;
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -2923,7 +2928,8 @@ cmd_buffer_emit_push_constant_all(struct anv_cmd_buffer *cmd_buffer,
|
||||
dw = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
|
||||
GENX(3DSTATE_CONSTANT_ALL),
|
||||
.ShaderUpdateEnable = shader_mask,
|
||||
.PointerBufferMask = buffers);
|
||||
.PointerBufferMask = buffers,
|
||||
.MOCS = cmd_buffer->device->isl_dev.mocs.internal);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
const struct anv_push_range *range = &bind_map->push_ranges[i];
|
||||
|
@@ -49,7 +49,7 @@ enum {
|
||||
PAN_SYSVAL_SSBO = 4,
|
||||
PAN_SYSVAL_NUM_WORK_GROUPS = 5,
|
||||
PAN_SYSVAL_SAMPLER = 7,
|
||||
} pan_sysval;
|
||||
};
|
||||
|
||||
#define PAN_TXS_SYSVAL_ID(texidx, dim, is_array) \
|
||||
((texidx) | ((dim) << 7) | ((is_array) ? (1 << 9) : 0))
|
||||
@@ -65,7 +65,7 @@ enum {
|
||||
PAN_VERTEX_ID = 16,
|
||||
PAN_INSTANCE_ID = 17,
|
||||
PAN_MAX_ATTRIBUTE
|
||||
} pan_special_attributes;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int work_register_count;
|
||||
|
@@ -379,27 +379,16 @@ def parse(filename):
|
||||
channel.shift = le_shift
|
||||
le_shift += channel.size
|
||||
|
||||
be_shift = 0
|
||||
for channel in be_channels[3::-1]:
|
||||
channel.shift = be_shift
|
||||
be_shift += channel.size
|
||||
|
||||
assert le_shift == be_shift
|
||||
for i in range(4):
|
||||
assert (le_swizzles[i] != SWIZZLE_NONE) == (be_swizzles[i] != SWIZZLE_NONE)
|
||||
|
||||
format = Format(name, layout, block_width, block_height, block_depth, le_channels, le_swizzles, be_channels, be_swizzles, colorspace)
|
||||
|
||||
if format.is_array() and not format.is_bitmask():
|
||||
# Formats accessed as arrays by the pack functions (R32G32_FLOAT or
|
||||
# R8G8B8_UNORM, for example) should not be channel-ordering-reversed
|
||||
# for BE.
|
||||
# Note that __eq__ on channels ignores .shift!
|
||||
assert(format.be_channels == format.le_channels)
|
||||
assert(format.be_swizzles == format.le_swizzles)
|
||||
format.be_channels = format.le_channels
|
||||
else:
|
||||
be_shift = 0
|
||||
for channel in format.be_channels[3::-1]:
|
||||
channel.shift = be_shift
|
||||
be_shift += channel.size
|
||||
|
||||
assert le_shift == be_shift
|
||||
|
||||
formats.append(format)
|
||||
return formats
|
||||
|
||||
|
@@ -1319,43 +1319,56 @@ static struct overlay_draw *render_swapchain_display(struct swapchain_data *data
|
||||
|
||||
device_data->vtable.CmdEndRenderPass(draw->command_buffer);
|
||||
|
||||
/* Bounce the image to display back to present layout. */
|
||||
imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
imb.pNext = nullptr;
|
||||
imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
imb.oldLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
|
||||
imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
imb.image = data->images[image_index];
|
||||
imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
imb.subresourceRange.baseMipLevel = 0;
|
||||
imb.subresourceRange.levelCount = 1;
|
||||
imb.subresourceRange.baseArrayLayer = 0;
|
||||
imb.subresourceRange.layerCount = 1;
|
||||
imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index;
|
||||
imb.dstQueueFamilyIndex = present_queue->family_index;
|
||||
device_data->vtable.CmdPipelineBarrier(draw->command_buffer,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
|
||||
0, /* dependency flags */
|
||||
0, nullptr, /* memory barriers */
|
||||
0, nullptr, /* buffer memory barriers */
|
||||
1, &imb); /* image memory barriers */
|
||||
if (device_data->graphic_queue->family_index != present_queue->family_index)
|
||||
{
|
||||
/* Transfer the image back to the present queue family
|
||||
* image layout was already changed to present by the render pass
|
||||
*/
|
||||
imb.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
|
||||
imb.pNext = nullptr;
|
||||
imb.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
imb.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
imb.oldLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
imb.newLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
|
||||
imb.image = data->images[image_index];
|
||||
imb.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
|
||||
imb.subresourceRange.baseMipLevel = 0;
|
||||
imb.subresourceRange.levelCount = 1;
|
||||
imb.subresourceRange.baseArrayLayer = 0;
|
||||
imb.subresourceRange.layerCount = 1;
|
||||
imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index;
|
||||
imb.dstQueueFamilyIndex = present_queue->family_index;
|
||||
device_data->vtable.CmdPipelineBarrier(draw->command_buffer,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
|
||||
0, /* dependency flags */
|
||||
0, nullptr, /* memory barriers */
|
||||
0, nullptr, /* buffer memory barriers */
|
||||
1, &imb); /* image memory barriers */
|
||||
}
|
||||
|
||||
device_data->vtable.EndCommandBuffer(draw->command_buffer);
|
||||
|
||||
VkPipelineStageFlags *stages_wait = (VkPipelineStageFlags*) malloc(sizeof(VkPipelineStageFlags) * n_wait_semaphores);
|
||||
for (unsigned i = 0; i < n_wait_semaphores; i++)
|
||||
{
|
||||
// wait in the fragment stage until the swapchain image is ready
|
||||
stages_wait[i] = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
|
||||
}
|
||||
|
||||
VkSubmitInfo submit_info = {};
|
||||
VkPipelineStageFlags stage_wait = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submit_info.commandBufferCount = 1;
|
||||
submit_info.pCommandBuffers = &draw->command_buffer;
|
||||
submit_info.pWaitDstStageMask = &stage_wait;
|
||||
submit_info.pWaitDstStageMask = stages_wait;
|
||||
submit_info.waitSemaphoreCount = n_wait_semaphores;
|
||||
submit_info.pWaitSemaphores = wait_semaphores;
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &draw->semaphore;
|
||||
|
||||
device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence);
|
||||
|
||||
free(stages_wait);
|
||||
|
||||
return draw;
|
||||
}
|
||||
@@ -1890,15 +1903,18 @@ static VkResult overlay_QueuePresentKHR(
|
||||
struct swapchain_data *swapchain_data =
|
||||
FIND(struct swapchain_data, swapchain);
|
||||
|
||||
uint32_t image_index = pPresentInfo->pImageIndices[i];
|
||||
|
||||
before_present(swapchain_data,
|
||||
queue_data,
|
||||
pPresentInfo->pWaitSemaphores,
|
||||
pPresentInfo->waitSemaphoreCount,
|
||||
pPresentInfo->pImageIndices[i]);
|
||||
image_index);
|
||||
|
||||
VkPresentInfoKHR present_info = *pPresentInfo;
|
||||
present_info.swapchainCount = 1;
|
||||
present_info.pSwapchains = &swapchain;
|
||||
present_info.pImageIndices = &image_index;
|
||||
|
||||
uint64_t ts0 = os_time_get();
|
||||
result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
|
||||
@@ -1910,11 +1926,13 @@ static VkResult overlay_QueuePresentKHR(
|
||||
VkSwapchainKHR swapchain = pPresentInfo->pSwapchains[i];
|
||||
struct swapchain_data *swapchain_data =
|
||||
FIND(struct swapchain_data, swapchain);
|
||||
|
||||
uint32_t image_index = pPresentInfo->pImageIndices[i];
|
||||
|
||||
VkPresentInfoKHR present_info = *pPresentInfo;
|
||||
present_info.swapchainCount = 1;
|
||||
present_info.pSwapchains = &swapchain;
|
||||
|
||||
uint32_t image_index = pPresentInfo->pImageIndices[i];
|
||||
present_info.pImageIndices = &image_index;
|
||||
|
||||
struct overlay_draw *draw = before_present(swapchain_data,
|
||||
queue_data,
|
||||
|
Reference in New Issue
Block a user