Compare commits
60 Commits
20.3-branc
...
mesa-18.2.
Author | SHA1 | Date | |
---|---|---|---|
|
86aa912dda | ||
|
2ec87de498 | ||
|
54cd81dfc5 | ||
|
5457e58a64 | ||
|
1e9c422894 | ||
|
4320851198 | ||
|
f69fcede0a | ||
|
26c07daf9d | ||
|
f3fc2d40fe | ||
|
4477635b69 | ||
|
bc6b6cb290 | ||
|
3ff3bfa3f5 | ||
|
c2268223c8 | ||
|
b9a97a8b88 | ||
|
dbb5396667 | ||
|
586ac9c237 | ||
|
f070d5a568 | ||
|
b1e0876a6b | ||
|
dcd3786e6e | ||
|
d82c36a4c7 | ||
|
8061ee5883 | ||
|
bbd95de921 | ||
|
b696ab172c | ||
|
f7e8bc0f23 | ||
|
90278c7f95 | ||
|
0c1832765f | ||
|
94da454726 | ||
|
dadc50add5 | ||
|
e91782ed55 | ||
|
9df3460724 | ||
|
8be5985e65 | ||
|
6606cacd3d | ||
|
1378f33142 | ||
|
9dacf10ca8 | ||
|
7af6be8864 | ||
|
9ad14f71e6 | ||
|
6ae0a639ec | ||
|
c709206977 | ||
|
33ac5fb678 | ||
|
f0ae95492a | ||
|
a42afc8504 | ||
|
adfbf1fe84 | ||
|
4a25d8b623 | ||
|
4a769c8850 | ||
|
d39fb6d157 | ||
|
ed117c27e1 | ||
|
fdbbe4c50c | ||
|
3c3589a0ba | ||
|
37fa81f631 | ||
|
71aa72d695 | ||
|
c8d41bc58d | ||
|
c3b1a6d7fa | ||
|
cce78368df | ||
|
b6e9ef1556 | ||
|
c18ed873a5 | ||
|
88c36f4379 | ||
|
bbeb78620c | ||
|
9ddff68f6f | ||
|
2e903df72f | ||
|
cb542ac550 |
3
bin/.cherry-ignore
Normal file
3
bin/.cherry-ignore
Normal file
@@ -0,0 +1,3 @@
|
||||
# fixes: This commit has more than one Fixes tag but the commit it
|
||||
# addresses didn't land in branch.
|
||||
6ff1c479968819b93c46d24bd898e89ce14ac401 autotools: don't ship the git_sha1.h generated in git in the tarballs
|
@@ -43,13 +43,15 @@ def main():
|
||||
master = os.path.join(to, os.path.basename(args.megadriver))
|
||||
|
||||
if not os.path.exists(to):
|
||||
if os.path.lexists(to):
|
||||
os.unlink(to)
|
||||
os.makedirs(to)
|
||||
shutil.copy(args.megadriver, master)
|
||||
|
||||
for driver in args.drivers:
|
||||
abs_driver = os.path.join(to, driver)
|
||||
|
||||
if os.path.exists(abs_driver):
|
||||
if os.path.lexists(abs_driver):
|
||||
os.unlink(abs_driver)
|
||||
print('installing {} to {}'.format(args.megadriver, abs_driver))
|
||||
os.link(master, abs_driver)
|
||||
@@ -60,7 +62,7 @@ def main():
|
||||
|
||||
name, ext = os.path.splitext(driver)
|
||||
while ext != '.so':
|
||||
if os.path.exists(name):
|
||||
if os.path.lexists(name):
|
||||
os.unlink(name)
|
||||
os.symlink(driver, name)
|
||||
name, ext = os.path.splitext(name)
|
||||
|
26
configure.ac
26
configure.ac
@@ -1503,15 +1503,15 @@ fi
|
||||
AC_ARG_WITH([gl-lib-name],
|
||||
[AS_HELP_STRING([--with-gl-lib-name@<:@=NAME@:>@],
|
||||
[specify GL library name @<:@default=GL@:>@])],
|
||||
[GL_LIB=$withval],
|
||||
[GL_LIB="$DEFAULT_GL_LIB_NAME"])
|
||||
[AC_MSG_ERROR([--with-gl-lib-name is no longer supported. Rename the library manually if needed.])],
|
||||
[])
|
||||
AC_ARG_WITH([osmesa-lib-name],
|
||||
[AS_HELP_STRING([--with-osmesa-lib-name@<:@=NAME@:>@],
|
||||
[specify OSMesa library name @<:@default=OSMesa@:>@])],
|
||||
[OSMESA_LIB=$withval],
|
||||
[OSMESA_LIB=OSMesa])
|
||||
AS_IF([test "x$GL_LIB" = xyes], [GL_LIB="$DEFAULT_GL_LIB_NAME"])
|
||||
AS_IF([test "x$OSMESA_LIB" = xyes], [OSMESA_LIB=OSMesa])
|
||||
[AC_MSG_ERROR([--with-osmesa-lib-name is no longer supported. Rename the library manually if needed.])],
|
||||
[])
|
||||
GL_LIB="$DEFAULT_GL_LIB_NAME"
|
||||
OSMESA_LIB=OSMesa
|
||||
|
||||
dnl
|
||||
dnl Mangled Mesa support
|
||||
@@ -1523,6 +1523,9 @@ AC_ARG_ENABLE([mangling],
|
||||
[enable_mangling=no]
|
||||
)
|
||||
if test "x${enable_mangling}" = "xyes" ; then
|
||||
if test "x$enable_libglvnd" = xyes; then
|
||||
AC_MSG_ERROR([Conflicting options --enable-mangling and --enable-libglvnd.])
|
||||
fi
|
||||
DEFINES="${DEFINES} -DUSE_MGL_NAMESPACE"
|
||||
GL_LIB="Mangled${GL_LIB}"
|
||||
OSMESA_LIB="Mangled${OSMESA_LIB}"
|
||||
@@ -1530,6 +1533,15 @@ fi
|
||||
AC_SUBST([GL_LIB])
|
||||
AC_SUBST([OSMESA_LIB])
|
||||
|
||||
dnl HACK when building glx + glvnd we ship gl.pc, despite that glvnd should do it
|
||||
dnl Thus we need to use GL as a DSO name.
|
||||
if test "x$enable_libglvnd" = xyes -a "x$enable_glx" != xno; then
|
||||
GL_PKGCONF_LIB="GL"
|
||||
else
|
||||
GL_PKGCONF_LIB="$GL_LIB"
|
||||
fi
|
||||
AC_SUBST([GL_PKGCONF_LIB])
|
||||
|
||||
# Check for libdrm
|
||||
PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
|
||||
[have_libdrm=yes], [have_libdrm=no])
|
||||
@@ -1658,6 +1670,8 @@ xxlib | xgallium-xlib)
|
||||
xdri)
|
||||
# DRI-based GLX
|
||||
|
||||
require_dri_shared_libs_and_glapi "GLX"
|
||||
|
||||
# find the DRI deps for libGL
|
||||
dri_modules="x11 xext xdamage >= $XDAMAGE_REQUIRED xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED"
|
||||
|
||||
|
@@ -989,7 +989,7 @@ if cc.links('''
|
||||
freelocale(loc);
|
||||
return 0;
|
||||
}''',
|
||||
extra_args : pre_args,
|
||||
args : pre_args,
|
||||
name : 'strtod has locale support')
|
||||
pre_args += '-DHAVE_STRTOD_L'
|
||||
endif
|
||||
|
@@ -27,4 +27,6 @@ include $(LOCAL_PATH)/Makefile.sources
|
||||
|
||||
include $(LOCAL_PATH)/Android.addrlib.mk
|
||||
include $(LOCAL_PATH)/Android.common.mk
|
||||
ifneq ($(filter radeonsi,$(BOARD_GPU_DRIVERS)),)
|
||||
include $(LOCAL_PATH)/vulkan/Android.mk
|
||||
endif
|
||||
|
@@ -149,7 +149,8 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
||||
char features[256];
|
||||
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
||||
LLVMTargetRef target = ac_get_llvm_target(triple);
|
||||
bool barrier_does_waitcnt = family != CHIP_VEGA20;
|
||||
bool barrier_does_waitcnt = (tm_options & AC_TM_AUTO_WAITCNT_BEFORE_BARRIER) &&
|
||||
family != CHIP_VEGA20;
|
||||
|
||||
snprintf(features, sizeof(features),
|
||||
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
|
||||
|
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
|
||||
AC_TM_CHECK_IR = (1 << 5),
|
||||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
|
||||
AC_TM_CREATE_LOW_OPT = (1 << 7),
|
||||
AC_TM_AUTO_WAITCNT_BEFORE_BARRIER = (1 << 8),
|
||||
};
|
||||
|
||||
enum ac_float_mode {
|
||||
|
@@ -62,6 +62,7 @@ LOCAL_SRC_FILES := \
|
||||
$(VULKAN_FILES)
|
||||
|
||||
LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions
|
||||
LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
$(call mesa-build-with-llvm)
|
||||
|
||||
@@ -140,6 +141,7 @@ LOCAL_SRC_FILES := \
|
||||
$(VULKAN_ANDROID_FILES)
|
||||
|
||||
LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions
|
||||
LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
$(call mesa-build-with-llvm)
|
||||
|
||||
|
@@ -124,7 +124,7 @@ VULKAN_LIB_DEPS += \
|
||||
endif
|
||||
|
||||
if HAVE_PLATFORM_ANDROID
|
||||
AM_CPPFLAGS += $(ANDROID_CPPFLAGS)
|
||||
AM_CPPFLAGS += $(ANDROID_CPPFLAGS) -DVK_USE_PLATFORM_ANDROID_KHR
|
||||
AM_CFLAGS += $(ANDROID_CFLAGS)
|
||||
VULKAN_LIB_DEPS += $(ANDROID_LIBS)
|
||||
VULKAN_SOURCES += $(VULKAN_ANDROID_FILES)
|
||||
|
@@ -2307,6 +2307,7 @@ VkResult radv_BeginCommandBuffer(
|
||||
cmd_buffer->state.last_num_instances = -1;
|
||||
cmd_buffer->state.last_vertex_offset = -1;
|
||||
cmd_buffer->state.last_first_instance = -1;
|
||||
cmd_buffer->state.predication_type = -1;
|
||||
cmd_buffer->usage_flags = pBeginInfo->flags;
|
||||
|
||||
/* setup initial configuration into command buffer */
|
||||
@@ -4126,15 +4127,18 @@ static void radv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
if (radv_image_has_dcc(image)) {
|
||||
uint32_t value = 0xffffffffu; /* Fully expanded mode. */
|
||||
bool need_decompress_pass = false;
|
||||
|
||||
if (radv_layout_dcc_compressed(image, dst_layout,
|
||||
dst_queue_mask)) {
|
||||
value = 0x20202020u;
|
||||
need_decompress_pass = true;
|
||||
}
|
||||
|
||||
radv_initialize_dcc(cmd_buffer, image, value);
|
||||
|
||||
radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image, false);
|
||||
radv_set_dcc_need_cmask_elim_pred(cmd_buffer, image,
|
||||
need_decompress_pass);
|
||||
}
|
||||
|
||||
if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
|
||||
|
@@ -480,6 +480,9 @@ radv_handle_per_app_options(struct radv_instance *instance,
|
||||
*/
|
||||
instance->perftest_flags |= RADV_PERFTEST_SISCHED;
|
||||
}
|
||||
} else if (!strcmp(name, "DOOM_VFR")) {
|
||||
/* Work around a Doom VFR game bug */
|
||||
instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -105,7 +105,7 @@ EXTENSIONS = [
|
||||
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
|
||||
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
|
||||
Extension('VK_EXT_shader_stencil_export', 1, True),
|
||||
Extension('VK_EXT_vertex_attribute_divisor', 1, True),
|
||||
Extension('VK_EXT_vertex_attribute_divisor', 2, True),
|
||||
Extension('VK_AMD_draw_indirect_count', 1, True),
|
||||
Extension('VK_AMD_gcn_shader', 1, True),
|
||||
Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'),
|
||||
|
@@ -612,7 +612,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
|
||||
}
|
||||
|
||||
if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
|
||||
physical_device->rad_info.chip_class < GFX9 &&
|
||||
physical_device->rad_info.family != CHIP_VEGA10 &&
|
||||
physical_device->rad_info.family != CHIP_RAVEN &&
|
||||
physical_device->rad_info.family != CHIP_STONEY) {
|
||||
out_properties->linearTilingFeatures = linear;
|
||||
out_properties->optimalTilingFeatures = tiled;
|
||||
|
@@ -603,7 +603,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
|
||||
pipeline = cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
|
||||
}
|
||||
|
||||
if (radv_image_has_dcc(image)) {
|
||||
if (!decompress_dcc && radv_image_has_dcc(image)) {
|
||||
old_predicating = cmd_buffer->state.predicating;
|
||||
|
||||
radv_emit_set_predication_state_from_image(cmd_buffer, image, true);
|
||||
@@ -671,7 +671,7 @@ radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer,
|
||||
&cmd_buffer->pool->alloc);
|
||||
|
||||
}
|
||||
if (radv_image_has_dcc(image)) {
|
||||
if (!decompress_dcc && radv_image_has_dcc(image)) {
|
||||
cmd_buffer->state.predicating = old_predicating;
|
||||
|
||||
radv_emit_set_predication_state_from_image(cmd_buffer, image, false);
|
||||
|
@@ -1991,8 +1991,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||
uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
|
||||
|
||||
if (divisor) {
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
|
||||
ctx->abi.start_instance, "");
|
||||
buffer_index = ctx->abi.instance_id;
|
||||
|
||||
if (divisor != 1) {
|
||||
buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
|
||||
@@ -2009,6 +2008,8 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||
} else {
|
||||
buffer_index = ctx->ac.i32_0;
|
||||
}
|
||||
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.start_instance, buffer_index, "");
|
||||
} else
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
|
||||
ctx->abi.base_vertex, "");
|
||||
|
@@ -673,7 +673,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
||||
if (!cs->num_buffers)
|
||||
continue;
|
||||
|
||||
if (unique_bo_count == 0) {
|
||||
if (unique_bo_count == 0 && !cs->num_virtual_buffers) {
|
||||
memcpy(handles, cs->handles, cs->num_buffers * sizeof(amdgpu_bo_handle));
|
||||
unique_bo_count = cs->num_buffers;
|
||||
continue;
|
||||
|
@@ -528,6 +528,16 @@
|
||||
<field name="number of attribute arrays" size="5" start="0" type="uint"/>
|
||||
</packet>
|
||||
|
||||
<packet code="71" name="VCM Cache Size" min_ver="41">
|
||||
<field name="Number of 16-vertex batches for rendering" size="4" start="4" type="uint"/>
|
||||
<field name="Number of 16-vertex batches for binning" size="4" start="0" type="uint"/>
|
||||
</packet>
|
||||
|
||||
<packet code="73" name="VCM Cache Size" max_ver="33">
|
||||
<field name="Number of 16-vertex batches for rendering" size="4" start="4" type="uint"/>
|
||||
<field name="Number of 16-vertex batches for binning" size="4" start="0" type="uint"/>
|
||||
</packet>
|
||||
|
||||
<packet code="73" name="Transform Feedback Buffer" min_ver="41">
|
||||
<field name="Buffer Address" size="32" start="32" type="address"/>
|
||||
<field name="Buffer Size in 32-bit words" size="30" start="2" type="uint"/>
|
||||
|
@@ -27,13 +27,14 @@
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* Struct for tracking features of the V3D chip. This is where we'll store
|
||||
* boolean flags for features in a specific version, but for now it's just the
|
||||
* version
|
||||
* Struct for tracking features of the V3D chip across driver and compiler.
|
||||
*/
|
||||
struct v3d_device_info {
|
||||
/** Simple V3D version: major * 10 + minor */
|
||||
uint8_t ver;
|
||||
|
||||
/** Size of the VPM, in bytes. */
|
||||
int vpm_size;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -462,6 +462,7 @@ struct choose_scoreboard {
|
||||
int last_magic_sfu_write_tick;
|
||||
int last_ldvary_tick;
|
||||
int last_uniforms_reset_tick;
|
||||
int last_thrsw_tick;
|
||||
bool tlb_locked;
|
||||
};
|
||||
|
||||
@@ -1095,10 +1096,16 @@ qpu_instruction_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
}
|
||||
|
||||
static bool
|
||||
valid_thrsw_sequence(struct v3d_compile *c,
|
||||
valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard,
|
||||
struct qinst *qinst, int instructions_in_sequence,
|
||||
bool is_thrend)
|
||||
{
|
||||
/* No emitting our thrsw while the previous thrsw hasn't happened yet. */
|
||||
if (scoreboard->last_thrsw_tick + 3 >
|
||||
scoreboard->tick - instructions_in_sequence) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int slot = 0; slot < instructions_in_sequence; slot++) {
|
||||
/* No scheduling SFU when the result would land in the other
|
||||
* thread. The simulator complains for safety, though it
|
||||
@@ -1159,7 +1166,8 @@ emit_thrsw(struct v3d_compile *c,
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig))
|
||||
break;
|
||||
|
||||
if (!valid_thrsw_sequence(c, prev_inst, slots_filled + 1,
|
||||
if (!valid_thrsw_sequence(c, scoreboard,
|
||||
prev_inst, slots_filled + 1,
|
||||
is_thrend)) {
|
||||
break;
|
||||
}
|
||||
@@ -1173,7 +1181,9 @@ emit_thrsw(struct v3d_compile *c,
|
||||
if (merge_inst) {
|
||||
merge_inst->qpu.sig.thrsw = true;
|
||||
needs_free = true;
|
||||
scoreboard->last_thrsw_tick = scoreboard->tick - slots_filled;
|
||||
} else {
|
||||
scoreboard->last_thrsw_tick = scoreboard->tick;
|
||||
insert_scheduled_instruction(c, block, scoreboard, inst);
|
||||
time++;
|
||||
slots_filled++;
|
||||
@@ -1475,6 +1485,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
|
||||
scoreboard.last_ldvary_tick = -10;
|
||||
scoreboard.last_magic_sfu_write_tick = -10;
|
||||
scoreboard.last_uniforms_reset_tick = -10;
|
||||
scoreboard.last_thrsw_tick = -10;
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Pre-schedule instructions\n");
|
||||
|
@@ -648,6 +648,9 @@ struct v3d_vs_prog_data {
|
||||
|
||||
/* Total number of components written, for the shader state record. */
|
||||
uint32_t vpm_output_size;
|
||||
|
||||
/* Value to be programmed in VCM_CACHE_SIZE. */
|
||||
uint8_t vcm_cache_size;
|
||||
};
|
||||
|
||||
struct v3d_fs_prog_data {
|
||||
@@ -928,7 +931,7 @@ VIR_A_ALU2(OR)
|
||||
VIR_A_ALU2(XOR)
|
||||
VIR_A_ALU2(VADD)
|
||||
VIR_A_ALU2(VSUB)
|
||||
VIR_A_ALU2(STVPMV)
|
||||
VIR_A_NODST_2(STVPMV)
|
||||
VIR_A_ALU1(NOT)
|
||||
VIR_A_ALU1(NEG)
|
||||
VIR_A_ALU1(FLAPUSH)
|
||||
|
@@ -452,6 +452,16 @@ vir_emit_def(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
assert(inst->dst.file == QFILE_NULL);
|
||||
|
||||
/* If we're emitting an instruction that's a def, it had better be
|
||||
* writing a register.
|
||||
*/
|
||||
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
|
||||
v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
|
||||
assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
|
||||
v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
|
||||
}
|
||||
|
||||
inst->dst = vir_get_temp(c);
|
||||
|
||||
if (inst->dst.file == QFILE_TEMP)
|
||||
@@ -746,10 +756,28 @@ uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
|
||||
if (prog_data->uses_iid)
|
||||
prog_data->vpm_input_size++;
|
||||
|
||||
/* Input/output segment size are in 8x32-bit multiples. */
|
||||
/* Input/output segment size are in sectors (8 rows of 32 bits per
|
||||
* channel).
|
||||
*/
|
||||
prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
|
||||
prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
|
||||
|
||||
/* Compute VCM cache size. We set up our program to take up less than
|
||||
* half of the VPM, so that any set of bin and render programs won't
|
||||
* run out of space. We need space for at least one input segment,
|
||||
* and then allocate the rest to output segments (one for the current
|
||||
* program, the rest to VCM). The valid range of the VCM cache size
|
||||
* field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
|
||||
* batches.
|
||||
*/
|
||||
assert(c->devinfo->vpm_size);
|
||||
int sector_size = 16 * sizeof(uint32_t) * 8;
|
||||
int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
|
||||
int half_vpm = vpm_size_in_sectors / 2;
|
||||
int vpm_output_batches = half_vpm - prog_data->vpm_input_size;
|
||||
assert(vpm_output_batches >= 2);
|
||||
prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
|
||||
|
||||
return v3d_return_qpu_insts(c, final_assembly_size);
|
||||
}
|
||||
|
||||
|
@@ -94,6 +94,15 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
|
||||
}
|
||||
}
|
||||
|
||||
/* Refuse to spill a ldvary's dst, because that means
|
||||
* that ldvary's r5 would end up being used across a
|
||||
* thrsw.
|
||||
*/
|
||||
if (inst->qpu.sig.ldvary) {
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
BITSET_CLEAR(c->spillable, inst->dst.index);
|
||||
}
|
||||
|
||||
if (inst->is_last_thrsw)
|
||||
started_last_seg = true;
|
||||
|
||||
@@ -102,7 +111,7 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
|
||||
started_last_seg = true;
|
||||
|
||||
/* Track when we're in between a TMU setup and the
|
||||
* final LDTMU from that TMU setup. We can't
|
||||
* final LDTMU or TMUWT from that TMU setup. We can't
|
||||
* spill/fill any temps during that time, because that
|
||||
* involves inserting a new TMU setup/LDTMU sequence.
|
||||
*/
|
||||
@@ -110,6 +119,10 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
|
||||
is_last_ldtmu(inst, block))
|
||||
in_tmu_operation = false;
|
||||
|
||||
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
||||
inst->qpu.alu.add.op == V3D_QPU_A_TMUWT)
|
||||
in_tmu_operation = false;
|
||||
|
||||
if (v3d_qpu_writes_tmu(&inst->qpu))
|
||||
in_tmu_operation = true;
|
||||
}
|
||||
@@ -206,6 +219,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
|
||||
inst->dst);
|
||||
v3d_emit_spill_tmua(c, spill_offset);
|
||||
vir_emit_thrsw(c);
|
||||
vir_TMUWT(c);
|
||||
c->spills++;
|
||||
}
|
||||
|
||||
|
@@ -1928,6 +1928,11 @@ ast_expression::do_hir(exec_list *instructions,
|
||||
|
||||
error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
|
||||
|
||||
if (error_emitted) {
|
||||
result = ir_rvalue::error_value(ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
type = arithmetic_result_type(op[0], op[1], false, state, & loc);
|
||||
|
||||
ir_rvalue *temp_rhs;
|
||||
|
@@ -826,7 +826,7 @@ ir_dereference_array::constant_expression_value(void *mem_ctx,
|
||||
const unsigned component = idx->value.u[0];
|
||||
|
||||
return new(mem_ctx) ir_constant(array, component);
|
||||
} else {
|
||||
} else if (array->type->is_array()) {
|
||||
const unsigned index = idx->value.u[0];
|
||||
return array->get_array_element(index)->clone(mem_ctx, NULL);
|
||||
}
|
||||
|
@@ -1134,6 +1134,25 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
return (config_count != 0);
|
||||
}
|
||||
|
||||
#ifdef HAVE_DRM_GRALLOC
|
||||
static int
|
||||
droid_open_device_drm_gralloc(struct dri2_egl_display *dri2_dpy)
|
||||
{
|
||||
int fd = -1, err = -EINVAL;
|
||||
|
||||
if (dri2_dpy->gralloc->perform)
|
||||
err = dri2_dpy->gralloc->perform(dri2_dpy->gralloc,
|
||||
GRALLOC_MODULE_PERFORM_GET_DRM_FD,
|
||||
&fd);
|
||||
if (err || fd < 0) {
|
||||
_eglLog(_EGL_WARNING, "fail to get drm fd");
|
||||
fd = -1;
|
||||
}
|
||||
|
||||
return (fd >= 0) ? fcntl(fd, F_DUPFD_CLOEXEC, 3) : -1;
|
||||
}
|
||||
#endif /* HAVE_DRM_GRALLOC */
|
||||
|
||||
static const struct dri2_egl_display_vtbl droid_display_vtbl = {
|
||||
.authenticate = NULL,
|
||||
.create_window_surface = droid_create_window_surface,
|
||||
@@ -1384,7 +1403,11 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
|
||||
disp->DriverData = (void *) dri2_dpy;
|
||||
|
||||
#ifdef HAVE_DRM_GRALLOC
|
||||
dri2_dpy->fd = droid_open_device_drm_gralloc(dri2_dpy);
|
||||
#else
|
||||
dri2_dpy->fd = droid_open_device(disp);
|
||||
#endif
|
||||
if (dri2_dpy->fd < 0) {
|
||||
err = "DRI2: failed to open device";
|
||||
goto cleanup;
|
||||
|
@@ -201,6 +201,17 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
|
||||
struct dri2_egl_display *dri2_dpy =
|
||||
dri2_egl_display(dri2_surf->base.Resource.Display);
|
||||
|
||||
/* Update the surface size as soon as native window is resized; from user
|
||||
* pov, this makes the effect that resize is done inmediately after native
|
||||
* window resize, without requiring to wait until the first draw.
|
||||
*
|
||||
* A more detailed and lengthy explanation can be found at
|
||||
* https://lists.freedesktop.org/archives/mesa-dev/2018-June/196474.html
|
||||
*/
|
||||
if (!dri2_surf->back) {
|
||||
dri2_surf->base.Width = wl_win->width;
|
||||
dri2_surf->base.Height = wl_win->height;
|
||||
}
|
||||
dri2_dpy->flush->invalidate(dri2_surf->dri_drawable);
|
||||
}
|
||||
|
||||
@@ -258,6 +269,9 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
|
||||
goto cleanup_surf;
|
||||
}
|
||||
|
||||
dri2_surf->base.Width = window->width;
|
||||
dri2_surf->base.Height = window->height;
|
||||
|
||||
visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config);
|
||||
assert(visual_idx != -1);
|
||||
|
||||
@@ -577,8 +591,8 @@ update_buffers(struct dri2_egl_surface *dri2_surf)
|
||||
struct dri2_egl_display *dri2_dpy =
|
||||
dri2_egl_display(dri2_surf->base.Resource.Display);
|
||||
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->height) {
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
|
||||
|
||||
dri2_wl_release_buffers(dri2_surf);
|
||||
|
||||
@@ -1632,8 +1646,8 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
|
||||
if (dri2_surf->back)
|
||||
return 0;
|
||||
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->height) {
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
|
||||
|
||||
dri2_wl_release_buffers(dri2_surf);
|
||||
|
||||
|
@@ -107,12 +107,17 @@ static const struct loader_dri3_vtable egl_dri3_vtable = {
|
||||
static EGLBoolean
|
||||
dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
|
||||
{
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
|
||||
struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
|
||||
xcb_drawable_t drawable = dri3_surf->loader_drawable.drawable;
|
||||
|
||||
(void) drv;
|
||||
|
||||
loader_dri3_drawable_fini(&dri3_surf->loader_drawable);
|
||||
|
||||
if (surf->Type == EGL_PBUFFER_BIT)
|
||||
xcb_free_pixmap (dri2_dpy->conn, drawable);
|
||||
|
||||
dri2_fini_surface(surf);
|
||||
free(surf);
|
||||
|
||||
|
@@ -99,10 +99,10 @@ endif
|
||||
|
||||
if with_platform_x11
|
||||
files_egl += files('drivers/dri2/platform_x11.c')
|
||||
incs_for_egl += inc_loader
|
||||
if with_dri3
|
||||
files_egl += files('drivers/dri2/platform_x11_dri3.c')
|
||||
link_for_egl += libloader_dri3_helper
|
||||
incs_for_egl += inc_loader
|
||||
endif
|
||||
deps_for_egl += [dep_x11_xcb, dep_xcb_dri2, dep_xcb_xfixes]
|
||||
endif
|
||||
|
@@ -1131,6 +1131,31 @@ static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
|
||||
mgr->dirty_real_vb_mask = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
|
||||
unsigned *indirect_data, unsigned stride,
|
||||
unsigned draw_count)
|
||||
{
|
||||
assert(info->index_size);
|
||||
info->indirect = NULL;
|
||||
|
||||
for (unsigned i = 0; i < draw_count; i++) {
|
||||
unsigned offset = i * stride / 4;
|
||||
|
||||
info->count = indirect_data[offset + 0];
|
||||
info->instance_count = indirect_data[offset + 1];
|
||||
|
||||
if (!info->count || !info->instance_count)
|
||||
continue;
|
||||
|
||||
info->start = indirect_data[offset + 2];
|
||||
info->index_bias = indirect_data[offset + 3];
|
||||
info->start_instance = indirect_data[offset + 4];
|
||||
|
||||
u_vbuf_draw_vbo(mgr, info);
|
||||
}
|
||||
}
|
||||
|
||||
void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
|
||||
{
|
||||
struct pipe_context *pipe = mgr->pipe;
|
||||
@@ -1160,33 +1185,163 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
|
||||
|
||||
new_info = *info;
|
||||
|
||||
/* Fallback. We need to know all the parameters. */
|
||||
/* Handle indirect (multi)draws. */
|
||||
if (new_info.indirect) {
|
||||
struct pipe_transfer *transfer = NULL;
|
||||
int *data;
|
||||
const struct pipe_draw_indirect_info *indirect = new_info.indirect;
|
||||
unsigned draw_count = 0;
|
||||
|
||||
if (new_info.index_size) {
|
||||
data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
|
||||
new_info.indirect->offset, 20,
|
||||
PIPE_TRANSFER_READ, &transfer);
|
||||
new_info.index_bias = data[3];
|
||||
new_info.start_instance = data[4];
|
||||
}
|
||||
else {
|
||||
data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
|
||||
new_info.indirect->offset, 16,
|
||||
PIPE_TRANSFER_READ, &transfer);
|
||||
new_info.start_instance = data[3];
|
||||
/* Get the number of draws. */
|
||||
if (indirect->indirect_draw_count) {
|
||||
pipe_buffer_read(pipe, indirect->indirect_draw_count,
|
||||
indirect->indirect_draw_count_offset,
|
||||
4, &draw_count);
|
||||
} else {
|
||||
draw_count = indirect->draw_count;
|
||||
}
|
||||
|
||||
new_info.count = data[0];
|
||||
new_info.instance_count = data[1];
|
||||
new_info.start = data[2];
|
||||
pipe_buffer_unmap(pipe, transfer);
|
||||
new_info.indirect = NULL;
|
||||
|
||||
if (!new_info.count)
|
||||
if (!draw_count)
|
||||
return;
|
||||
|
||||
unsigned data_size = (draw_count - 1) * indirect->stride +
|
||||
(new_info.index_size ? 20 : 16);
|
||||
unsigned *data = malloc(data_size);
|
||||
if (!data)
|
||||
return; /* report an error? */
|
||||
|
||||
/* Read the used buffer range only once, because the read can be
|
||||
* uncached.
|
||||
*/
|
||||
pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
|
||||
data);
|
||||
|
||||
if (info->index_size) {
|
||||
/* Indexed multidraw. */
|
||||
unsigned index_bias0 = data[3];
|
||||
bool index_bias_same = true;
|
||||
|
||||
/* If we invoke the translate path, we have to split the multidraw. */
|
||||
if (incompatible_vb_mask ||
|
||||
mgr->ve->incompatible_elem_mask) {
|
||||
u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
|
||||
indirect->stride, draw_count);
|
||||
free(data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* See if index_bias is the same for all draws. */
|
||||
for (unsigned i = 1; i < draw_count; i++) {
|
||||
if (data[i * indirect->stride / 4 + 3] != index_bias0) {
|
||||
index_bias_same = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Split the multidraw if index_bias is different. */
|
||||
if (!index_bias_same) {
|
||||
u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
|
||||
indirect->stride, draw_count);
|
||||
free(data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we don't need to use the translate path and index_bias is
|
||||
* the same, we can process the multidraw with the time complexity
|
||||
* equal to 1 draw call (except for the index range computation).
|
||||
* We only need to compute the index range covering all draw calls
|
||||
* of the multidraw.
|
||||
*
|
||||
* The driver will not look at these values because indirect != NULL.
|
||||
* These values determine the user buffer bounds to upload.
|
||||
*/
|
||||
new_info.index_bias = index_bias0;
|
||||
new_info.min_index = ~0u;
|
||||
new_info.max_index = 0;
|
||||
new_info.start_instance = ~0u;
|
||||
unsigned end_instance = 0;
|
||||
|
||||
struct pipe_transfer *transfer = NULL;
|
||||
const uint8_t *indices;
|
||||
|
||||
if (info->has_user_indices) {
|
||||
indices = (uint8_t*)info->index.user;
|
||||
} else {
|
||||
indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
|
||||
PIPE_TRANSFER_READ, &transfer);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < draw_count; i++) {
|
||||
unsigned offset = i * indirect->stride / 4;
|
||||
unsigned start = data[offset + 2];
|
||||
unsigned count = data[offset + 0];
|
||||
unsigned start_instance = data[offset + 4];
|
||||
unsigned instance_count = data[offset + 1];
|
||||
|
||||
if (!count || !instance_count)
|
||||
continue;
|
||||
|
||||
/* Update the ranges of instances. */
|
||||
new_info.start_instance = MIN2(new_info.start_instance,
|
||||
start_instance);
|
||||
end_instance = MAX2(end_instance, start_instance + instance_count);
|
||||
|
||||
/* Update the index range. */
|
||||
unsigned min, max;
|
||||
new_info.count = count; /* only used by get_minmax_index */
|
||||
u_vbuf_get_minmax_index_mapped(&new_info,
|
||||
indices +
|
||||
new_info.index_size * start,
|
||||
&min, &max);
|
||||
|
||||
new_info.min_index = MIN2(new_info.min_index, min);
|
||||
new_info.max_index = MAX2(new_info.max_index, max);
|
||||
}
|
||||
free(data);
|
||||
|
||||
if (transfer)
|
||||
pipe_buffer_unmap(pipe, transfer);
|
||||
|
||||
/* Set the final instance count. */
|
||||
new_info.instance_count = end_instance - new_info.start_instance;
|
||||
|
||||
if (new_info.start_instance == ~0u || !new_info.instance_count)
|
||||
return;
|
||||
} else {
|
||||
/* Non-indexed multidraw.
|
||||
*
|
||||
* Keep the draw call indirect and compute minimums & maximums,
|
||||
* which will determine the user buffer bounds to upload, but
|
||||
* the driver will not look at these values because indirect != NULL.
|
||||
*
|
||||
* This efficiently processes the multidraw with the time complexity
|
||||
* equal to 1 draw call.
|
||||
*/
|
||||
new_info.start = ~0u;
|
||||
new_info.start_instance = ~0u;
|
||||
unsigned end_vertex = 0;
|
||||
unsigned end_instance = 0;
|
||||
|
||||
for (unsigned i = 0; i < draw_count; i++) {
|
||||
unsigned offset = i * indirect->stride / 4;
|
||||
unsigned start = data[offset + 2];
|
||||
unsigned count = data[offset + 0];
|
||||
unsigned start_instance = data[offset + 3];
|
||||
unsigned instance_count = data[offset + 1];
|
||||
|
||||
new_info.start = MIN2(new_info.start, start);
|
||||
new_info.start_instance = MIN2(new_info.start_instance,
|
||||
start_instance);
|
||||
|
||||
end_vertex = MAX2(end_vertex, start + count);
|
||||
end_instance = MAX2(end_instance, start_instance + instance_count);
|
||||
}
|
||||
|
||||
/* Set the final counts. */
|
||||
new_info.count = end_vertex - new_info.start;
|
||||
new_info.instance_count = end_instance - new_info.start_instance;
|
||||
|
||||
if (new_info.start == ~0u || !new_info.count || !new_info.instance_count)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_info.index_size) {
|
||||
@@ -1211,7 +1366,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
|
||||
* We would have to break this drawing operation into several ones. */
|
||||
/* Use some heuristic to see if unrolling indices improves
|
||||
* performance. */
|
||||
if (!new_info.primitive_restart &&
|
||||
if (!info->indirect &&
|
||||
!new_info.primitive_restart &&
|
||||
num_vertices > new_info.count*2 &&
|
||||
num_vertices - new_info.count > 32 &&
|
||||
!u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
|
||||
|
@@ -2151,13 +2151,36 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su)
|
||||
{
|
||||
if (!su->getPredicate())
|
||||
return;
|
||||
|
||||
bld.setPosition(su, true);
|
||||
|
||||
for (unsigned i = 0; su->defExists(i); ++i) {
|
||||
ValueDef &def = su->def(i);
|
||||
|
||||
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||
assert(su->cc == CC_NOT_P);
|
||||
mov->setPredicate(CC_P, su->getPredicate());
|
||||
Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0));
|
||||
|
||||
def.replace(uni->getDef(0), false);
|
||||
uni->setSrc(0, def.get());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
|
||||
{
|
||||
processSurfaceCoordsNVE4(su);
|
||||
|
||||
if (su->op == OP_SULDP)
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
|
||||
assert(su->getPredicate());
|
||||
@@ -2267,8 +2290,10 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
|
||||
|
||||
processSurfaceCoordsNVC0(su);
|
||||
|
||||
if (su->op == OP_SULDP)
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
|
||||
const int dim = su->tex.target.getDim();
|
||||
@@ -2370,8 +2395,10 @@ NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
|
||||
{
|
||||
processSurfaceCoordsGM107(su);
|
||||
|
||||
if (su->op == OP_SULDP)
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDP) {
|
||||
Value *def = su->getDef(0);
|
||||
|
@@ -172,6 +172,7 @@ private:
|
||||
void processSurfaceCoordsNVE4(TexInstruction *);
|
||||
void processSurfaceCoordsNVC0(TexInstruction *);
|
||||
void convertSurfaceFormat(TexInstruction *);
|
||||
void insertOOBSurfaceOpResult(TexInstruction *);
|
||||
Value *calculateSampleOffset(Value *sampleID);
|
||||
|
||||
protected:
|
||||
|
@@ -715,7 +715,6 @@ static void compute_emit_cs(struct r600_context *rctx,
|
||||
rctx->cmd_buf_is_compute = true;
|
||||
}
|
||||
|
||||
r600_need_cs_space(rctx, 0, true);
|
||||
if (rctx->cs_shader_state.shader->ir_type == PIPE_SHADER_IR_TGSI) {
|
||||
r600_shader_select(&rctx->b.b, rctx->cs_shader_state.shader->sel, &compute_dirty);
|
||||
current = rctx->cs_shader_state.shader->sel->current;
|
||||
@@ -742,16 +741,22 @@ static void compute_emit_cs(struct r600_context *rctx,
|
||||
}
|
||||
rctx->cs_block_grid_sizes[3] = rctx->cs_block_grid_sizes[7] = 0;
|
||||
rctx->driver_consts[PIPE_SHADER_COMPUTE].cs_block_grid_size_dirty = true;
|
||||
|
||||
evergreen_emit_atomic_buffer_setup_count(rctx, current, combined_atomics, &atomic_used_mask);
|
||||
r600_need_cs_space(rctx, 0, true, util_bitcount(atomic_used_mask));
|
||||
|
||||
if (need_buf_const) {
|
||||
eg_setup_buffer_constants(rctx, PIPE_SHADER_COMPUTE);
|
||||
}
|
||||
r600_update_driver_const_buffers(rctx, true);
|
||||
|
||||
if (evergreen_emit_atomic_buffer_setup(rctx, current, combined_atomics, &atomic_used_mask)) {
|
||||
evergreen_emit_atomic_buffer_setup(rctx, true, combined_atomics, atomic_used_mask);
|
||||
if (atomic_used_mask) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
}
|
||||
}
|
||||
} else
|
||||
r600_need_cs_space(rctx, 0, true, 0);
|
||||
|
||||
/* Initialize all the compute-related registers.
|
||||
*
|
||||
|
@@ -109,7 +109,7 @@ void evergreen_cp_dma_clear_buffer(struct r600_context *rctx,
|
||||
|
||||
r600_need_cs_space(rctx,
|
||||
10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
|
||||
R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
|
||||
R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
|
||||
|
||||
/* Flush the caches for the first copy only. */
|
||||
if (rctx->b.flags) {
|
||||
|
@@ -4030,7 +4030,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
|
||||
|
||||
if (!buffers || !buffers[idx].buffer) {
|
||||
pipe_resource_reference(&abuf->buffer, NULL);
|
||||
astate->enabled_mask &= ~(1 << i);
|
||||
continue;
|
||||
}
|
||||
buf = &buffers[idx];
|
||||
@@ -4038,7 +4037,6 @@ static void evergreen_set_hw_atomic_buffers(struct pipe_context *ctx,
|
||||
pipe_resource_reference(&abuf->buffer, buf->buffer);
|
||||
abuf->buffer_offset = buf->buffer_offset;
|
||||
abuf->buffer_size = buf->buffer_size;
|
||||
astate->enabled_mask |= (1 << i);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4868,20 +4866,15 @@ static void cayman_write_count_to_gds(struct r600_context *rctx,
|
||||
radeon_emit(cs, reloc);
|
||||
}
|
||||
|
||||
bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
|
||||
struct r600_pipe_shader *cs_shader,
|
||||
struct r600_shader_atomic *combined_atomics,
|
||||
uint8_t *atomic_used_mask_p)
|
||||
void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
|
||||
struct r600_pipe_shader *cs_shader,
|
||||
struct r600_shader_atomic *combined_atomics,
|
||||
uint8_t *atomic_used_mask_p)
|
||||
{
|
||||
struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
|
||||
unsigned pkt_flags = 0;
|
||||
uint8_t atomic_used_mask = 0;
|
||||
int i, j, k;
|
||||
bool is_compute = cs_shader ? true : false;
|
||||
|
||||
if (is_compute)
|
||||
pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
|
||||
|
||||
for (i = 0; i < (is_compute ? 1 : EG_NUM_HW_STAGES); i++) {
|
||||
uint8_t num_atomic_stage;
|
||||
struct r600_pipe_shader *pshader;
|
||||
@@ -4914,8 +4907,25 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
|
||||
}
|
||||
}
|
||||
}
|
||||
*atomic_used_mask_p = atomic_used_mask;
|
||||
}
|
||||
|
||||
void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
|
||||
bool is_compute,
|
||||
struct r600_shader_atomic *combined_atomics,
|
||||
uint8_t atomic_used_mask)
|
||||
{
|
||||
struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
|
||||
unsigned pkt_flags = 0;
|
||||
uint32_t mask;
|
||||
|
||||
if (is_compute)
|
||||
pkt_flags = RADEON_CP_PACKET3_COMPUTE_MODE;
|
||||
|
||||
mask = atomic_used_mask;
|
||||
if (!mask)
|
||||
return;
|
||||
|
||||
uint32_t mask = atomic_used_mask;
|
||||
while (mask) {
|
||||
unsigned atomic_index = u_bit_scan(&mask);
|
||||
struct r600_shader_atomic *atomic = &combined_atomics[atomic_index];
|
||||
@@ -4927,8 +4937,6 @@ bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
|
||||
else
|
||||
evergreen_emit_set_append_cnt(rctx, atomic, resource, pkt_flags);
|
||||
}
|
||||
*atomic_used_mask_p = atomic_used_mask;
|
||||
return true;
|
||||
}
|
||||
|
||||
void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
|
||||
@@ -4940,7 +4948,7 @@ void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
|
||||
struct r600_atomic_buffer_state *astate = &rctx->atomic_buffer_state;
|
||||
uint32_t pkt_flags = 0;
|
||||
uint32_t event = EVENT_TYPE_PS_DONE;
|
||||
uint32_t mask = astate->enabled_mask;
|
||||
uint32_t mask;
|
||||
uint64_t dst_offset;
|
||||
unsigned reloc;
|
||||
|
||||
|
@@ -31,7 +31,7 @@
|
||||
|
||||
|
||||
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
|
||||
boolean count_draw_in)
|
||||
boolean count_draw_in, unsigned num_atomics)
|
||||
{
|
||||
/* Flush the DMA IB if it's not empty. */
|
||||
if (radeon_emitted(ctx->b.dma.cs, 0))
|
||||
@@ -61,6 +61,9 @@ void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw,
|
||||
num_dw += R600_MAX_FLUSH_CS_DWORDS + R600_MAX_DRAW_CS_DWORDS;
|
||||
}
|
||||
|
||||
/* add atomic counters, 8 pre + 8 post per counter + 16 post if any counters */
|
||||
num_dw += (num_atomics * 16) + (num_atomics ? 16 : 0);
|
||||
|
||||
/* Count in r600_suspend_queries. */
|
||||
num_dw += ctx->b.num_cs_dw_queries_suspend;
|
||||
|
||||
@@ -526,7 +529,7 @@ void r600_cp_dma_copy_buffer(struct r600_context *rctx,
|
||||
|
||||
r600_need_cs_space(rctx,
|
||||
10 + (rctx->b.flags ? R600_MAX_FLUSH_CS_DWORDS : 0) +
|
||||
3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE);
|
||||
3 + R600_MAX_PFP_SYNC_ME_DWORDS, FALSE, 0);
|
||||
|
||||
/* Flush the caches for the first copy only. */
|
||||
if (rctx->b.flags) {
|
||||
|
@@ -446,8 +446,6 @@ struct r600_shader_state {
|
||||
};
|
||||
|
||||
struct r600_atomic_buffer_state {
|
||||
uint32_t enabled_mask;
|
||||
uint32_t dirty_mask;
|
||||
struct pipe_shader_buffer buffer[EG_MAX_ATOMIC_BUFFERS];
|
||||
};
|
||||
|
||||
@@ -773,7 +771,7 @@ void r600_context_gfx_flush(void *context, unsigned flags,
|
||||
struct pipe_fence_handle **fence);
|
||||
void r600_begin_new_cs(struct r600_context *ctx);
|
||||
void r600_flush_emit(struct r600_context *ctx);
|
||||
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in);
|
||||
void r600_need_cs_space(struct r600_context *ctx, unsigned num_dw, boolean count_draw_in, unsigned num_atomics);
|
||||
void r600_emit_pfp_sync_me(struct r600_context *rctx);
|
||||
void r600_cp_dma_copy_buffer(struct r600_context *rctx,
|
||||
struct pipe_resource *dst, uint64_t dst_offset,
|
||||
@@ -1067,10 +1065,14 @@ void r600_delete_shader_selector(struct pipe_context *ctx,
|
||||
struct r600_pipe_shader_selector *sel);
|
||||
|
||||
struct r600_shader_atomic;
|
||||
bool evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
|
||||
struct r600_pipe_shader *cs_shader,
|
||||
void evergreen_emit_atomic_buffer_setup_count(struct r600_context *rctx,
|
||||
struct r600_pipe_shader *cs_shader,
|
||||
struct r600_shader_atomic *combined_atomics,
|
||||
uint8_t *atomic_used_mask_p);
|
||||
void evergreen_emit_atomic_buffer_setup(struct r600_context *rctx,
|
||||
bool is_compute,
|
||||
struct r600_shader_atomic *combined_atomics,
|
||||
uint8_t *atomic_used_mask_p);
|
||||
uint8_t atomic_used_mask);
|
||||
void evergreen_emit_atomic_buffer_save(struct r600_context *rctx,
|
||||
bool is_compute,
|
||||
struct r600_shader_atomic *combined_atomics,
|
||||
|
@@ -2085,8 +2085,9 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
||||
: (rctx->tes_shader)? rctx->tes_shader->info.properties[TGSI_PROPERTY_TES_PRIM_MODE]
|
||||
: info->mode;
|
||||
|
||||
if (rctx->b.chip_class >= EVERGREEN)
|
||||
evergreen_emit_atomic_buffer_setup(rctx, NULL, combined_atomics, &atomic_used_mask);
|
||||
if (rctx->b.chip_class >= EVERGREEN) {
|
||||
evergreen_emit_atomic_buffer_setup_count(rctx, NULL, combined_atomics, &atomic_used_mask);
|
||||
}
|
||||
|
||||
if (index_size) {
|
||||
index_offset += info->start * index_size;
|
||||
@@ -2172,7 +2173,7 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
||||
evergreen_setup_tess_constants(rctx, info, &num_patches);
|
||||
|
||||
/* Emit states. */
|
||||
r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE);
|
||||
r600_need_cs_space(rctx, has_user_indices ? 5 : 0, TRUE, util_bitcount(atomic_used_mask));
|
||||
r600_flush_emit(rctx);
|
||||
|
||||
mask = rctx->dirty_atoms;
|
||||
@@ -2180,6 +2181,10 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
||||
r600_emit_atom(rctx, rctx->atoms[u_bit_scan64(&mask)]);
|
||||
}
|
||||
|
||||
if (rctx->b.chip_class >= EVERGREEN) {
|
||||
evergreen_emit_atomic_buffer_setup(rctx, false, combined_atomics, atomic_used_mask);
|
||||
}
|
||||
|
||||
if (rctx->b.chip_class == CAYMAN) {
|
||||
/* Copied from radeonsi. */
|
||||
unsigned primgroup_size = 128; /* recommended without a GS */
|
||||
@@ -3284,7 +3289,7 @@ static void r600_set_active_query_state(struct pipe_context *ctx, boolean enable
|
||||
static void r600_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
|
||||
bool include_draw_vbo)
|
||||
{
|
||||
r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo);
|
||||
r600_need_cs_space((struct r600_context*)ctx, num_dw, include_draw_vbo, 0);
|
||||
}
|
||||
|
||||
/* keep this at the end of this file, please */
|
||||
|
@@ -114,6 +114,7 @@ static void si_init_compiler(struct si_screen *sscreen,
|
||||
sscreen->info.chip_class <= VI;
|
||||
|
||||
enum ac_target_machine_options tm_options =
|
||||
AC_TM_AUTO_WAITCNT_BEFORE_BARRIER |
|
||||
(sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
|
||||
(sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
|
||||
(sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
|
||||
|
@@ -37,7 +37,7 @@ extern "C" {
|
||||
struct pipe_screen *swr_create_screen(struct sw_winsys *winsys);
|
||||
|
||||
// arch-specific dll entry point
|
||||
PUBLIC struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
|
||||
struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
|
||||
|
||||
// cleanup for failed screen creation
|
||||
void swr_destroy_screen_internal(struct swr_screen **screen);
|
||||
|
@@ -1143,12 +1143,10 @@ swr_validate_env_options(struct swr_screen *screen)
|
||||
}
|
||||
|
||||
|
||||
PUBLIC
|
||||
struct pipe_screen *
|
||||
swr_create_screen_internal(struct sw_winsys *winsys)
|
||||
{
|
||||
struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
|
||||
memset(screen, 0, sizeof(struct swr_screen));
|
||||
|
||||
if (!screen)
|
||||
return NULL;
|
||||
|
@@ -585,6 +585,8 @@ v3d_get_device_info(struct v3d_screen *screen)
|
||||
uint32_t minor = (ident1.value >> 0) & 0xf;
|
||||
screen->devinfo.ver = major * 10 + minor;
|
||||
|
||||
screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 1024;
|
||||
|
||||
switch (screen->devinfo.ver) {
|
||||
case 33:
|
||||
case 41:
|
||||
|
@@ -306,6 +306,13 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
||||
}
|
||||
}
|
||||
|
||||
cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) {
|
||||
vcm.number_of_16_vertex_batches_for_binning =
|
||||
v3d->prog.cs->prog_data.vs->vcm_cache_size;
|
||||
vcm.number_of_16_vertex_batches_for_rendering =
|
||||
v3d->prog.vs->prog_data.vs->vcm_cache_size;
|
||||
}
|
||||
|
||||
cl_emit(&job->bcl, GL_SHADER_STATE, state) {
|
||||
state.address = cl_address(job->indirect.bo, shader_rec_offset);
|
||||
state.number_of_attribute_arrays = num_elements_to_emit;
|
||||
|
@@ -222,6 +222,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
|
||||
attr.coordinate_shader_vpm_offset = 0;
|
||||
attr.vertex_shader_vpm_offset = 0;
|
||||
}
|
||||
|
||||
vc4_bo_unreference(&bo);
|
||||
}
|
||||
|
||||
cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
|
||||
|
@@ -121,7 +121,8 @@ vc4_fence_server_sync(struct pipe_context *pctx,
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
struct vc4_fence *fence = vc4_fence(pfence);
|
||||
|
||||
sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd);
|
||||
if (fence->fd >= 0)
|
||||
sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -142,8 +143,12 @@ vc4_fence_context_init(struct vc4_context *vc4)
|
||||
/* Since we initialize the in_fence_fd to -1 (no wait necessary),
|
||||
* we also need to initialize our in_syncobj as signaled.
|
||||
*/
|
||||
return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
|
||||
&vc4->in_syncobj);
|
||||
if (vc4->screen->has_syncobj) {
|
||||
return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
|
||||
&vc4->in_syncobj);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -38,6 +38,7 @@
|
||||
#include "vc4_context.h"
|
||||
#include "vc4_qpu.h"
|
||||
#include "vc4_qir.h"
|
||||
#include "mesa/state_tracker/st_glsl_types.h"
|
||||
|
||||
static struct qreg
|
||||
ntq_get_src(struct vc4_compile *c, nir_src src, int i);
|
||||
@@ -50,6 +51,12 @@ type_size(const struct glsl_type *type)
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
static int
|
||||
uniforms_type_size(const struct glsl_type *type)
|
||||
{
|
||||
return st_glsl_storage_type_size(type, false);
|
||||
}
|
||||
|
||||
static void
|
||||
resize_qreg_array(struct vc4_compile *c,
|
||||
struct qreg **regs,
|
||||
@@ -1685,7 +1692,7 @@ static void
|
||||
ntq_setup_uniforms(struct vc4_compile *c)
|
||||
{
|
||||
nir_foreach_variable(var, &c->s->uniforms) {
|
||||
uint32_t vec4_count = type_size(var->type);
|
||||
uint32_t vec4_count = uniforms_type_size(var->type);
|
||||
unsigned vec4_size = 4 * sizeof(float);
|
||||
|
||||
declare_uniform_range(c, var->data.driver_location * vec4_size,
|
||||
@@ -2469,9 +2476,13 @@ vc4_shader_state_create(struct pipe_context *pctx,
|
||||
*/
|
||||
s = cso->ir.nir;
|
||||
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size,
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform,
|
||||
type_size,
|
||||
(nir_lower_io_options)0);
|
||||
} else {
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_uniform,
|
||||
uniforms_type_size,
|
||||
(nir_lower_io_options)0);
|
||||
} else {
|
||||
assert(cso->type == PIPE_SHADER_IR_TGSI);
|
||||
|
||||
if (vc4_debug & VC4_DEBUG_TGSI) {
|
||||
|
@@ -614,7 +614,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
}
|
||||
|
||||
so->texture_p0 =
|
||||
(VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
|
||||
(VC4_SET_FIELD((rsc->slices[0].offset +
|
||||
cso->u.tex.first_layer *
|
||||
rsc->cube_map_stride) >> 12, VC4_TEX_P0_OFFSET) |
|
||||
VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
|
||||
VC4_SET_FIELD(so->force_first_level ?
|
||||
cso->u.tex.last_level :
|
||||
|
@@ -26,8 +26,12 @@
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#if !defined(ANDROID) || ANDROID_API_LEVEL >= 26
|
||||
/* Android's libc began supporting shm in Oreo */
|
||||
#define HAVE_SHM
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "pipe/p_format.h"
|
||||
@@ -83,6 +87,7 @@ dri_sw_is_displaytarget_format_supported( struct sw_winsys *ws,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHM
|
||||
static char *
|
||||
alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size)
|
||||
{
|
||||
@@ -101,6 +106,7 @@ alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size)
|
||||
|
||||
return addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct sw_displaytarget *
|
||||
dri_sw_displaytarget_create(struct sw_winsys *winsys,
|
||||
@@ -131,8 +137,11 @@ dri_sw_displaytarget_create(struct sw_winsys *winsys,
|
||||
size = dri_sw_dt->stride * nblocksy;
|
||||
|
||||
dri_sw_dt->shmid = -1;
|
||||
|
||||
#ifdef HAVE_SHM
|
||||
if (ws->lf->put_image_shm)
|
||||
dri_sw_dt->data = alloc_shm(dri_sw_dt, size);
|
||||
#endif
|
||||
|
||||
if(!dri_sw_dt->data)
|
||||
dri_sw_dt->data = align_malloc(size, alignment);
|
||||
@@ -156,8 +165,10 @@ dri_sw_displaytarget_destroy(struct sw_winsys *ws,
|
||||
struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt);
|
||||
|
||||
if (dri_sw_dt->shmid >= 0) {
|
||||
#ifdef HAVE_SHM
|
||||
shmdt(dri_sw_dt->data);
|
||||
shmctl(dri_sw_dt->shmid, IPC_RMID, 0);
|
||||
#endif
|
||||
} else {
|
||||
align_free(dri_sw_dt->data);
|
||||
}
|
||||
|
@@ -176,6 +176,8 @@ kms_sw_displaytarget_create(struct sw_winsys *ws,
|
||||
|
||||
list_inithead(&kms_sw_dt->planes);
|
||||
kms_sw_dt->ref_count = 1;
|
||||
kms_sw_dt->mapped = MAP_FAILED;
|
||||
kms_sw_dt->ro_mapped = MAP_FAILED;
|
||||
|
||||
kms_sw_dt->format = format;
|
||||
|
||||
@@ -262,7 +264,7 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,
|
||||
|
||||
prot = (flags == PIPE_TRANSFER_READ) ? PROT_READ : (PROT_READ | PROT_WRITE);
|
||||
void **ptr = (flags == PIPE_TRANSFER_READ) ? &kms_sw_dt->ro_mapped : &kms_sw_dt->mapped;
|
||||
if (!*ptr) {
|
||||
if (*ptr == MAP_FAILED) {
|
||||
void *tmp = mmap(0, kms_sw_dt->size, prot, MAP_SHARED,
|
||||
kms_sw->fd, map_req.offset);
|
||||
if (tmp == MAP_FAILED)
|
||||
@@ -332,6 +334,8 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
|
||||
FREE(kms_sw_dt);
|
||||
return NULL;
|
||||
}
|
||||
kms_sw_dt->mapped = MAP_FAILED;
|
||||
kms_sw_dt->ro_mapped = MAP_FAILED;
|
||||
kms_sw_dt->size = lseek_ret;
|
||||
kms_sw_dt->ref_count = 1;
|
||||
kms_sw_dt->handle = handle;
|
||||
@@ -368,10 +372,14 @@ kms_sw_displaytarget_unmap(struct sw_winsys *ws,
|
||||
DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->mapped);
|
||||
DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->ro_mapped);
|
||||
|
||||
munmap(kms_sw_dt->mapped, kms_sw_dt->size);
|
||||
kms_sw_dt->mapped = NULL;
|
||||
munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size);
|
||||
kms_sw_dt->ro_mapped = NULL;
|
||||
if (kms_sw_dt->mapped != MAP_FAILED) {
|
||||
munmap(kms_sw_dt->mapped, kms_sw_dt->size);
|
||||
kms_sw_dt->mapped = MAP_FAILED;
|
||||
}
|
||||
if (kms_sw_dt->ro_mapped != MAP_FAILED) {
|
||||
munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size);
|
||||
kms_sw_dt->ro_mapped = MAP_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
static struct sw_displaytarget *
|
||||
|
@@ -19,9 +19,6 @@
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
if HAVE_SHARED_GLAPI
|
||||
SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
|
||||
endif
|
||||
|
||||
SUBDIRS =
|
||||
|
||||
@@ -181,7 +178,7 @@ GL_LIBS = \
|
||||
$(LIBDRM_LIBS) \
|
||||
libglx.la \
|
||||
$(top_builddir)/src/mapi/glapi/libglapi.la \
|
||||
$(SHARED_GLAPI_LIB) \
|
||||
$(top_builddir)/src/mapi/shared-glapi/libglapi.la \
|
||||
$(GL_LIB_DEPS)
|
||||
|
||||
GL_LDFLAGS = \
|
||||
|
@@ -152,7 +152,7 @@ static const struct extension_info known_glx_extensions[] = {
|
||||
{ GLX(ATI_pixel_format_float), VER(0,0), N, N, N, N },
|
||||
{ GLX(INTEL_swap_event), VER(0,0), Y, N, N, N },
|
||||
{ GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N },
|
||||
{ GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
|
||||
{ GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, N, Y },
|
||||
{ GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y },
|
||||
{ GLX(MESA_swap_control), VER(0,0), Y, N, N, Y },
|
||||
{ GLX(NV_float_buffer), VER(0,0), N, N, N, N },
|
||||
|
@@ -21,7 +21,9 @@
|
||||
|
||||
noinst_PROGRAMS += \
|
||||
tools/aubinator \
|
||||
tools/aubinator_error_decode
|
||||
tools/aubinator_error_decode \
|
||||
tools/error2aub
|
||||
|
||||
|
||||
tools_aubinator_SOURCES = \
|
||||
tools/aubinator.c \
|
||||
@@ -59,3 +61,23 @@ tools_aubinator_error_decode_LDADD = \
|
||||
tools_aubinator_error_decode_CFLAGS = \
|
||||
$(AM_CFLAGS) \
|
||||
$(ZLIB_CFLAGS)
|
||||
|
||||
|
||||
tools_error2aub_SOURCES = \
|
||||
tools/gen_context.h \
|
||||
tools/gen8_context.h \
|
||||
tools/gen10_context.h \
|
||||
tools/aub_write.h \
|
||||
tools/aub_write.c \
|
||||
tools/error2aub.c
|
||||
|
||||
tools_error2aub_CFLAGS = \
|
||||
$(AM_CFLAGS) \
|
||||
$(ZLIB_CFLAGS)
|
||||
|
||||
tools_error2aub_LDADD = \
|
||||
dev/libintel_dev.la \
|
||||
$(PTHREAD_LIBS) \
|
||||
$(DLOPEN_LIBS) \
|
||||
$(ZLIB_LIBS) \
|
||||
-lm
|
||||
|
@@ -75,18 +75,6 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
|
||||
if (format == ISL_FORMAT_UNSUPPORTED)
|
||||
format = surf->surf->format;
|
||||
|
||||
if (format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
/* Unfortunately, ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as
|
||||
* a render target, which would prevent us from blitting to 24-bit
|
||||
* depth. The miptree consists of 32 bits per pixel, arranged as 24-bit
|
||||
* depth values interleaved with 8 "don't care" bits. Since depth
|
||||
* values don't require any blending, it doesn't matter how we interpret
|
||||
* the bit pattern as long as we copy the right amount of data, so just
|
||||
* map it as 8-bit BGRA.
|
||||
*/
|
||||
format = ISL_FORMAT_B8G8R8A8_UNORM;
|
||||
}
|
||||
|
||||
info->surf = *surf->surf;
|
||||
info->addr = surf->addr;
|
||||
|
||||
|
@@ -776,6 +776,14 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
|
||||
* grid of samples with in a pixel. Sample number layout shows the
|
||||
* rectangular grid of samples roughly corresponding to the real sample
|
||||
* locations with in a pixel.
|
||||
*
|
||||
* In the case of 2x MSAA, the layout of sample indices is reversed from
|
||||
* the layout of sample numbers:
|
||||
*
|
||||
* sample index layout : --------- sample number layout : ---------
|
||||
* | 0 | 1 | | 1 | 0 |
|
||||
* --------- ---------
|
||||
*
|
||||
* In case of 4x MSAA, layout of sample indices matches the layout of
|
||||
* sample numbers:
|
||||
* ---------
|
||||
@@ -819,7 +827,9 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
|
||||
key->x_scale * key->y_scale));
|
||||
sample = nir_f2i32(b, sample);
|
||||
|
||||
if (tex_samples == 8) {
|
||||
if (tex_samples == 2) {
|
||||
sample = nir_isub(b, nir_imm_int(b, 1), sample);
|
||||
} else if (tex_samples == 8) {
|
||||
sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573),
|
||||
nir_ishl(b, sample, nir_imm_int(b, 2))),
|
||||
nir_imm_int(b, 0xf));
|
||||
@@ -984,14 +994,14 @@ convert_color(struct nir_builder *b, nir_ssa_def *color,
|
||||
nir_ssa_def *value;
|
||||
|
||||
if (key->dst_format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
/* The destination image is bound as R32_UNORM but the data needs to be
|
||||
/* The destination image is bound as R32_UINT but the data needs to be
|
||||
* in R24_UNORM_X8_TYPELESS. The bottom 24 are the actual data and the
|
||||
* top 8 need to be zero. We can accomplish this by simply multiplying
|
||||
* by a factor to scale things down.
|
||||
*/
|
||||
float factor = (float)((1 << 24) - 1) / (float)UINT32_MAX;
|
||||
value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)),
|
||||
nir_imm_float(b, factor));
|
||||
unsigned factor = (1 << 24) - 1;
|
||||
value = nir_fsat(b, nir_channel(b, color, 0));
|
||||
value = nir_f2i32(b, nir_fmul(b, value, nir_imm_float(b, factor)));
|
||||
} else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) {
|
||||
value = nir_format_linear_to_srgb(b, nir_channel(b, color, 0));
|
||||
} else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
|
||||
@@ -1976,7 +1986,7 @@ try_blorp_blit(struct blorp_batch *batch,
|
||||
isl_format_rgbx_to_rgba(params->dst.view.format);
|
||||
} else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
wm_prog_key->dst_format = params->dst.view.format;
|
||||
params->dst.view.format = ISL_FORMAT_R32_UNORM;
|
||||
params->dst.view.format = ISL_FORMAT_R32_UINT;
|
||||
} else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) {
|
||||
params->dst.view.swizzle =
|
||||
isl_swizzle_compose(params->dst.view.swizzle,
|
||||
@@ -2240,6 +2250,17 @@ blorp_blit(struct blorp_batch *batch,
|
||||
}
|
||||
}
|
||||
|
||||
/* ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as a render target,
|
||||
* which requires shader math to render to it. Blitting Z24X8 to Z24X8
|
||||
* is fairly common though, so we'd like to avoid it. Since we don't need
|
||||
* to blend depth values, we can simply pick a renderable format with the
|
||||
* right number of bits-per-pixel, like 8-bit BGRA.
|
||||
*/
|
||||
if (dst_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS &&
|
||||
src_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
src_format = dst_format = ISL_FORMAT_B8G8R8A8_UNORM;
|
||||
}
|
||||
|
||||
brw_blorp_surface_info_init(batch->blorp, ¶ms.src, src_surf, src_level,
|
||||
src_layer, src_format, false);
|
||||
brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, dst_surf, dst_level,
|
||||
|
@@ -42,10 +42,10 @@ prefix##0YOffset = 0.5;
|
||||
* c 1
|
||||
*/
|
||||
#define GEN_SAMPLE_POS_2X(prefix) \
|
||||
prefix##0XOffset = 0.25; \
|
||||
prefix##0YOffset = 0.25; \
|
||||
prefix##1XOffset = 0.75; \
|
||||
prefix##1YOffset = 0.75;
|
||||
prefix##0XOffset = 0.75; \
|
||||
prefix##0YOffset = 0.75; \
|
||||
prefix##1XOffset = 0.25; \
|
||||
prefix##1YOffset = 0.25;
|
||||
|
||||
/**
|
||||
* Sample positions:
|
||||
|
@@ -5115,6 +5115,25 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
|
||||
}
|
||||
}
|
||||
|
||||
if (devinfo->gen < 6) {
|
||||
/* From the G45 PRM, Volume 4 Page 361:
|
||||
*
|
||||
* "Operand Alignment Rule: With the exceptions listed below, a
|
||||
* source/destination operand in general should be aligned to even
|
||||
* 256-bit physical register with a region size equal to two 256-bit
|
||||
* physical registers."
|
||||
*
|
||||
* Normally we enforce this by allocating virtual registers to the
|
||||
* even-aligned class. But we need to handle payload registers.
|
||||
*/
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == FIXED_GRF && (inst->src[i].nr & 1) &&
|
||||
inst->size_read(i) > REG_SIZE) {
|
||||
max_width = MIN2(max_width, 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* From the IVB PRMs:
|
||||
* "When an instruction is SIMD32, the low 16 bits of the execution mask
|
||||
* are applied for both halves of the SIMD32 instruction. If different
|
||||
@@ -6321,6 +6340,7 @@ fs_visitor::optimize()
|
||||
if (OPT(lower_load_payload)) {
|
||||
split_virtual_grfs();
|
||||
OPT(register_coalesce);
|
||||
OPT(lower_simd_width);
|
||||
OPT(compute_to_mrf);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
@@ -713,18 +713,6 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
||||
nir_validate_shader(*producer);
|
||||
nir_validate_shader(*consumer);
|
||||
|
||||
const bool p_is_scalar =
|
||||
compiler->scalar_stage[(*producer)->info.stage];
|
||||
const bool c_is_scalar =
|
||||
compiler->scalar_stage[(*consumer)->info.stage];
|
||||
|
||||
if (p_is_scalar && c_is_scalar) {
|
||||
NIR_PASS_V(*producer, nir_lower_io_to_scalar_early, nir_var_shader_out);
|
||||
NIR_PASS_V(*consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);
|
||||
*producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
|
||||
*consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
|
||||
}
|
||||
|
||||
NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
|
||||
NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
|
||||
|
||||
@@ -741,7 +729,12 @@ brw_nir_link_shaders(const struct brw_compiler *compiler,
|
||||
NIR_PASS_V(*consumer, nir_lower_indirect_derefs,
|
||||
brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage));
|
||||
|
||||
const bool p_is_scalar =
|
||||
compiler->scalar_stage[(*producer)->info.stage];
|
||||
*producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
|
||||
|
||||
const bool c_is_scalar =
|
||||
compiler->scalar_stage[(*consumer)->info.stage];
|
||||
*consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
|
||||
}
|
||||
}
|
||||
|
@@ -590,7 +590,7 @@ handle_memtrace_reg_write(uint32_t *p)
|
||||
uint32_t pphwsp_addr = context_descriptor & 0xfffff000;
|
||||
struct gen_batch_decode_bo pphwsp_bo = get_ggtt_batch_bo(NULL, pphwsp_addr);
|
||||
uint32_t *context = (uint32_t *)((uint8_t *)pphwsp_bo.map +
|
||||
(pphwsp_bo.addr - pphwsp_addr) +
|
||||
(pphwsp_addr - pphwsp_bo.addr) +
|
||||
pphwsp_size);
|
||||
|
||||
uint32_t ring_buffer_head = context[5];
|
||||
@@ -601,7 +601,7 @@ handle_memtrace_reg_write(uint32_t *p)
|
||||
struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL,
|
||||
ring_buffer_start);
|
||||
assert(ring_bo.size > 0);
|
||||
void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - ring_buffer_start);
|
||||
void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr);
|
||||
|
||||
if (context_descriptor & 0x100 /* ppgtt */) {
|
||||
batch_ctx.get_bo = get_ppgtt_batch_bo;
|
||||
|
@@ -205,7 +205,7 @@ main(int argc, char *argv[])
|
||||
BO_TYPE_UNKNOWN = 0,
|
||||
BO_TYPE_BATCH,
|
||||
BO_TYPE_USER,
|
||||
} bo_type;
|
||||
} bo_type = BO_TYPE_UNKNOWN;
|
||||
uint64_t bo_addr;
|
||||
|
||||
char *line = NULL;
|
||||
|
@@ -340,18 +340,16 @@ try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
|
||||
if (binding->immutable_samplers == NULL)
|
||||
return false;
|
||||
|
||||
unsigned texture_index = tex->texture_index;
|
||||
assert(tex->texture_index == 0);
|
||||
unsigned array_index = 0;
|
||||
if (deref->deref_type != nir_deref_type_var) {
|
||||
assert(deref->deref_type == nir_deref_type_array);
|
||||
nir_const_value *const_index = nir_src_as_const_value(deref->arr.index);
|
||||
if (!const_index)
|
||||
return false;
|
||||
size_t hw_binding_size =
|
||||
anv_descriptor_set_binding_layout_get_hw_size(binding);
|
||||
texture_index += MIN2(const_index->u32[0], hw_binding_size - 1);
|
||||
array_index = MIN2(const_index->u32[0], binding->array_size - 1);
|
||||
}
|
||||
const struct anv_sampler *sampler =
|
||||
binding->immutable_samplers[texture_index];
|
||||
const struct anv_sampler *sampler = binding->immutable_samplers[array_index];
|
||||
|
||||
if (sampler->conversion == NULL)
|
||||
return false;
|
||||
|
@@ -496,7 +496,6 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
|
||||
uint32_t stride)
|
||||
{
|
||||
struct brw_bo *bo;
|
||||
unsigned int page_size = getpagesize();
|
||||
int ret;
|
||||
struct bo_cache_bucket *bucket;
|
||||
bool alloc_from_cache;
|
||||
@@ -522,12 +521,12 @@ bo_alloc_internal(struct brw_bufmgr *bufmgr,
|
||||
* allocation up.
|
||||
*/
|
||||
if (bucket == NULL) {
|
||||
bo_size = size;
|
||||
if (bo_size < page_size)
|
||||
bo_size = page_size;
|
||||
unsigned int page_size = getpagesize();
|
||||
bo_size = size == 0 ? page_size : ALIGN(size, page_size);
|
||||
} else {
|
||||
bo_size = bucket->size;
|
||||
}
|
||||
assert(bo_size);
|
||||
|
||||
mtx_lock(&bufmgr->lock);
|
||||
/* Get a buffer out of the cache if available */
|
||||
|
@@ -695,7 +695,7 @@ brw_initialize_context_constants(struct brw_context *brw)
|
||||
/* ARB_viewport_array, OES_viewport_array */
|
||||
if (devinfo->gen >= 6) {
|
||||
ctx->Const.MaxViewports = GEN6_NUM_VIEWPORTS;
|
||||
ctx->Const.ViewportSubpixelBits = 0;
|
||||
ctx->Const.ViewportSubpixelBits = 8;
|
||||
|
||||
/* Cast to float before negating because MaxViewportWidth is unsigned.
|
||||
*/
|
||||
|
@@ -38,13 +38,13 @@
|
||||
/**
|
||||
* 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
|
||||
*
|
||||
* 2x MSAA sample positions are (0.25, 0.25) and (0.75, 0.75):
|
||||
* 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25):
|
||||
* 4 c
|
||||
* 4 0
|
||||
* c 1
|
||||
* 4 1
|
||||
* c 0
|
||||
*/
|
||||
static const uint32_t
|
||||
brw_multisample_positions_1x_2x = 0x0088cc44;
|
||||
brw_multisample_positions_1x_2x = 0x008844cc;
|
||||
|
||||
/**
|
||||
* Sample positions:
|
||||
|
@@ -68,10 +68,10 @@ gen6_get_sample_position(struct gl_context *ctx,
|
||||
* index layout in case of 2X and 4x MSAA, but they are different in
|
||||
* case of 8X MSAA.
|
||||
*
|
||||
* 2X MSAA sample index / number layout
|
||||
* ---------
|
||||
* | 0 | 1 |
|
||||
* ---------
|
||||
* 8X MSAA sample index layout 8x MSAA sample number layout
|
||||
* --------- ---------
|
||||
* | 0 | 1 | | 1 | 0 |
|
||||
* --------- ---------
|
||||
*
|
||||
* 4X MSAA sample index / number layout
|
||||
* ---------
|
||||
@@ -107,7 +107,7 @@ gen6_get_sample_position(struct gl_context *ctx,
|
||||
void
|
||||
gen6_set_sample_maps(struct gl_context *ctx)
|
||||
{
|
||||
uint8_t map_2x[2] = {0, 1};
|
||||
uint8_t map_2x[2] = {1, 0};
|
||||
uint8_t map_4x[4] = {0, 1, 2, 3};
|
||||
uint8_t map_8x[8] = {3, 7, 5, 0, 1, 2, 4, 6};
|
||||
uint8_t map_16x[16] = { 15, 10, 9, 7, 4, 1, 3, 13,
|
||||
|
@@ -7,7 +7,7 @@ Name: gl
|
||||
Description: Mesa OpenGL library
|
||||
Requires.private: @GL_PC_REQ_PRIV@
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -l@GL_LIB@
|
||||
Libs: -L${libdir} -l@GL_PKGCONF_LIB@
|
||||
Libs.private: @GL_PC_LIB_PRIV@
|
||||
Cflags: -I${includedir} @GL_PC_CFLAGS@
|
||||
glx_tls: @GLX_TLS@
|
||||
|
@@ -1229,7 +1229,7 @@ void st_init_extensions(struct pipe_screen *screen,
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_UNORM,
|
||||
PIPE_TEXTURE_2D, 0, 0,
|
||||
PIPE_BIND_SAMPLER_VIEW) &&
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_SRGB,
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB,
|
||||
PIPE_TEXTURE_2D, 0, 0,
|
||||
PIPE_BIND_SAMPLER_VIEW) &&
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_R16_UNORM,
|
||||
|
@@ -120,6 +120,10 @@ TODO: document the other workarounds.
|
||||
<option name="allow_glsl_extension_directive_midshader" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Metro 2033 Redux / Metro Last Night Redux" executable="metro">
|
||||
<option name="allow_glsl_extension_directive_midshader" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Worms W.M.D" executable="Worms W.M.Dx64">
|
||||
<option name="allow_higher_compat_version" value="true" />
|
||||
</application>
|
||||
|
Reference in New Issue
Block a user