Compare commits
42 Commits
explicit-s
...
mesa-18.2.
Author | SHA1 | Date | |
---|---|---|---|
|
dcd3786e6e | ||
|
d82c36a4c7 | ||
|
8061ee5883 | ||
|
bbd95de921 | ||
|
b696ab172c | ||
|
f7e8bc0f23 | ||
|
90278c7f95 | ||
|
0c1832765f | ||
|
94da454726 | ||
|
dadc50add5 | ||
|
e91782ed55 | ||
|
9df3460724 | ||
|
8be5985e65 | ||
|
6606cacd3d | ||
|
1378f33142 | ||
|
9dacf10ca8 | ||
|
7af6be8864 | ||
|
9ad14f71e6 | ||
|
6ae0a639ec | ||
|
c709206977 | ||
|
33ac5fb678 | ||
|
f0ae95492a | ||
|
a42afc8504 | ||
|
adfbf1fe84 | ||
|
4a25d8b623 | ||
|
4a769c8850 | ||
|
d39fb6d157 | ||
|
ed117c27e1 | ||
|
fdbbe4c50c | ||
|
3c3589a0ba | ||
|
37fa81f631 | ||
|
71aa72d695 | ||
|
c8d41bc58d | ||
|
c3b1a6d7fa | ||
|
cce78368df | ||
|
b6e9ef1556 | ||
|
c18ed873a5 | ||
|
88c36f4379 | ||
|
bbeb78620c | ||
|
9ddff68f6f | ||
|
2e903df72f | ||
|
cb542ac550 |
@@ -43,13 +43,15 @@ def main():
|
||||
master = os.path.join(to, os.path.basename(args.megadriver))
|
||||
|
||||
if not os.path.exists(to):
|
||||
if os.path.lexists(to):
|
||||
os.unlink(to)
|
||||
os.makedirs(to)
|
||||
shutil.copy(args.megadriver, master)
|
||||
|
||||
for driver in args.drivers:
|
||||
abs_driver = os.path.join(to, driver)
|
||||
|
||||
if os.path.exists(abs_driver):
|
||||
if os.path.lexists(abs_driver):
|
||||
os.unlink(abs_driver)
|
||||
print('installing {} to {}'.format(args.megadriver, abs_driver))
|
||||
os.link(master, abs_driver)
|
||||
@@ -60,7 +62,7 @@ def main():
|
||||
|
||||
name, ext = os.path.splitext(driver)
|
||||
while ext != '.so':
|
||||
if os.path.exists(name):
|
||||
if os.path.lexists(name):
|
||||
os.unlink(name)
|
||||
os.symlink(driver, name)
|
||||
name, ext = os.path.splitext(name)
|
||||
|
26
configure.ac
26
configure.ac
@@ -1503,15 +1503,15 @@ fi
|
||||
AC_ARG_WITH([gl-lib-name],
|
||||
[AS_HELP_STRING([--with-gl-lib-name@<:@=NAME@:>@],
|
||||
[specify GL library name @<:@default=GL@:>@])],
|
||||
[GL_LIB=$withval],
|
||||
[GL_LIB="$DEFAULT_GL_LIB_NAME"])
|
||||
[AC_MSG_ERROR([--with-gl-lib-name is no longer supported. Rename the library manually if needed.])],
|
||||
[])
|
||||
AC_ARG_WITH([osmesa-lib-name],
|
||||
[AS_HELP_STRING([--with-osmesa-lib-name@<:@=NAME@:>@],
|
||||
[specify OSMesa library name @<:@default=OSMesa@:>@])],
|
||||
[OSMESA_LIB=$withval],
|
||||
[OSMESA_LIB=OSMesa])
|
||||
AS_IF([test "x$GL_LIB" = xyes], [GL_LIB="$DEFAULT_GL_LIB_NAME"])
|
||||
AS_IF([test "x$OSMESA_LIB" = xyes], [OSMESA_LIB=OSMesa])
|
||||
[AC_MSG_ERROR([--with-osmesa-lib-name is no longer supported. Rename the library manually if needed.])],
|
||||
[])
|
||||
GL_LIB="$DEFAULT_GL_LIB_NAME"
|
||||
OSMESA_LIB=OSMesa
|
||||
|
||||
dnl
|
||||
dnl Mangled Mesa support
|
||||
@@ -1523,6 +1523,9 @@ AC_ARG_ENABLE([mangling],
|
||||
[enable_mangling=no]
|
||||
)
|
||||
if test "x${enable_mangling}" = "xyes" ; then
|
||||
if test "x$enable_libglvnd" = xyes; then
|
||||
AC_MSG_ERROR([Conflicting options --enable-mangling and --enable-libglvnd.])
|
||||
fi
|
||||
DEFINES="${DEFINES} -DUSE_MGL_NAMESPACE"
|
||||
GL_LIB="Mangled${GL_LIB}"
|
||||
OSMESA_LIB="Mangled${OSMESA_LIB}"
|
||||
@@ -1530,6 +1533,15 @@ fi
|
||||
AC_SUBST([GL_LIB])
|
||||
AC_SUBST([OSMESA_LIB])
|
||||
|
||||
dnl HACK when building glx + glvnd we ship gl.pc, despite that glvnd should do it
|
||||
dnl Thus we need to use GL as a DSO name.
|
||||
if test "x$enable_libglvnd" = xyes -a "x$enable_glx" != xno; then
|
||||
GL_PKGCONF_LIB="GL"
|
||||
else
|
||||
GL_PKGCONF_LIB="$GL_LIB"
|
||||
fi
|
||||
AC_SUBST([GL_PKGCONF_LIB])
|
||||
|
||||
# Check for libdrm
|
||||
PKG_CHECK_MODULES([LIBDRM], [libdrm >= $LIBDRM_REQUIRED],
|
||||
[have_libdrm=yes], [have_libdrm=no])
|
||||
@@ -1658,6 +1670,8 @@ xxlib | xgallium-xlib)
|
||||
xdri)
|
||||
# DRI-based GLX
|
||||
|
||||
require_dri_shared_libs_and_glapi "GLX"
|
||||
|
||||
# find the DRI deps for libGL
|
||||
dri_modules="x11 xext xdamage >= $XDAMAGE_REQUIRED xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED"
|
||||
|
||||
|
@@ -989,7 +989,7 @@ if cc.links('''
|
||||
freelocale(loc);
|
||||
return 0;
|
||||
}''',
|
||||
extra_args : pre_args,
|
||||
args : pre_args,
|
||||
name : 'strtod has locale support')
|
||||
pre_args += '-DHAVE_STRTOD_L'
|
||||
endif
|
||||
|
@@ -27,4 +27,6 @@ include $(LOCAL_PATH)/Makefile.sources
|
||||
|
||||
include $(LOCAL_PATH)/Android.addrlib.mk
|
||||
include $(LOCAL_PATH)/Android.common.mk
|
||||
ifneq ($(filter radeonsi,$(BOARD_GPU_DRIVERS)),)
|
||||
include $(LOCAL_PATH)/vulkan/Android.mk
|
||||
endif
|
||||
|
@@ -62,6 +62,7 @@ LOCAL_SRC_FILES := \
|
||||
$(VULKAN_FILES)
|
||||
|
||||
LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions
|
||||
LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
$(call mesa-build-with-llvm)
|
||||
|
||||
@@ -140,6 +141,7 @@ LOCAL_SRC_FILES := \
|
||||
$(VULKAN_ANDROID_FILES)
|
||||
|
||||
LOCAL_CFLAGS += -DFORCE_BUILD_AMDGPU # instructs LLVM to declare LLVMInitializeAMDGPU* functions
|
||||
LOCAL_CFLAGS += -DVK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
$(call mesa-build-with-llvm)
|
||||
|
||||
|
@@ -124,7 +124,7 @@ VULKAN_LIB_DEPS += \
|
||||
endif
|
||||
|
||||
if HAVE_PLATFORM_ANDROID
|
||||
AM_CPPFLAGS += $(ANDROID_CPPFLAGS)
|
||||
AM_CPPFLAGS += $(ANDROID_CPPFLAGS) -DVK_USE_PLATFORM_ANDROID_KHR
|
||||
AM_CFLAGS += $(ANDROID_CFLAGS)
|
||||
VULKAN_LIB_DEPS += $(ANDROID_LIBS)
|
||||
VULKAN_SOURCES += $(VULKAN_ANDROID_FILES)
|
||||
|
@@ -105,7 +105,7 @@ EXTENSIONS = [
|
||||
Extension('VK_EXT_sampler_filter_minmax', 1, 'device->rad_info.chip_class >= CIK'),
|
||||
Extension('VK_EXT_shader_viewport_index_layer', 1, True),
|
||||
Extension('VK_EXT_shader_stencil_export', 1, True),
|
||||
Extension('VK_EXT_vertex_attribute_divisor', 1, True),
|
||||
Extension('VK_EXT_vertex_attribute_divisor', 2, True),
|
||||
Extension('VK_AMD_draw_indirect_count', 1, True),
|
||||
Extension('VK_AMD_gcn_shader', 1, True),
|
||||
Extension('VK_AMD_rasterization_order', 1, 'device->has_out_of_order_rast'),
|
||||
|
@@ -612,7 +612,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
|
||||
}
|
||||
|
||||
if (desc->layout == VK_FORMAT_LAYOUT_ETC &&
|
||||
physical_device->rad_info.chip_class < GFX9 &&
|
||||
physical_device->rad_info.family != CHIP_VEGA10 &&
|
||||
physical_device->rad_info.family != CHIP_RAVEN &&
|
||||
physical_device->rad_info.family != CHIP_STONEY) {
|
||||
out_properties->linearTilingFeatures = linear;
|
||||
out_properties->optimalTilingFeatures = tiled;
|
||||
|
@@ -1991,8 +1991,7 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||
uint32_t divisor = ctx->options->key.vs.instance_rate_divisors[attrib_index];
|
||||
|
||||
if (divisor) {
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.instance_id,
|
||||
ctx->abi.start_instance, "");
|
||||
buffer_index = ctx->abi.instance_id;
|
||||
|
||||
if (divisor != 1) {
|
||||
buffer_index = LLVMBuildUDiv(ctx->ac.builder, buffer_index,
|
||||
@@ -2007,8 +2006,10 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||
MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
|
||||
}
|
||||
} else {
|
||||
buffer_index = ctx->ac.i32_0;
|
||||
unreachable("Invalid vertex attribute divisor of 0.");
|
||||
}
|
||||
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.start_instance, buffer_index, "");
|
||||
} else
|
||||
buffer_index = LLVMBuildAdd(ctx->ac.builder, ctx->abi.vertex_id,
|
||||
ctx->abi.base_vertex, "");
|
||||
|
@@ -528,6 +528,16 @@
|
||||
<field name="number of attribute arrays" size="5" start="0" type="uint"/>
|
||||
</packet>
|
||||
|
||||
<packet code="71" name="VCM Cache Size" min_ver="41">
|
||||
<field name="Number of 16-vertex batches for rendering" size="4" start="4" type="uint"/>
|
||||
<field name="Number of 16-vertex batches for binning" size="4" start="0" type="uint"/>
|
||||
</packet>
|
||||
|
||||
<packet code="73" name="VCM Cache Size" max_ver="33">
|
||||
<field name="Number of 16-vertex batches for rendering" size="4" start="4" type="uint"/>
|
||||
<field name="Number of 16-vertex batches for binning" size="4" start="0" type="uint"/>
|
||||
</packet>
|
||||
|
||||
<packet code="73" name="Transform Feedback Buffer" min_ver="41">
|
||||
<field name="Buffer Address" size="32" start="32" type="address"/>
|
||||
<field name="Buffer Size in 32-bit words" size="30" start="2" type="uint"/>
|
||||
|
@@ -27,13 +27,14 @@
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
* Struct for tracking features of the V3D chip. This is where we'll store
|
||||
* boolean flags for features in a specific version, but for now it's just the
|
||||
* version
|
||||
* Struct for tracking features of the V3D chip across driver and compiler.
|
||||
*/
|
||||
struct v3d_device_info {
|
||||
/** Simple V3D version: major * 10 + minor */
|
||||
uint8_t ver;
|
||||
|
||||
/** Size of the VPM, in bytes. */
|
||||
int vpm_size;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -462,6 +462,7 @@ struct choose_scoreboard {
|
||||
int last_magic_sfu_write_tick;
|
||||
int last_ldvary_tick;
|
||||
int last_uniforms_reset_tick;
|
||||
int last_thrsw_tick;
|
||||
bool tlb_locked;
|
||||
};
|
||||
|
||||
@@ -1095,10 +1096,16 @@ qpu_instruction_valid_in_thrend_slot(struct v3d_compile *c,
|
||||
}
|
||||
|
||||
static bool
|
||||
valid_thrsw_sequence(struct v3d_compile *c,
|
||||
valid_thrsw_sequence(struct v3d_compile *c, struct choose_scoreboard *scoreboard,
|
||||
struct qinst *qinst, int instructions_in_sequence,
|
||||
bool is_thrend)
|
||||
{
|
||||
/* No emitting our thrsw while the previous thrsw hasn't happened yet. */
|
||||
if (scoreboard->last_thrsw_tick + 3 >
|
||||
scoreboard->tick - instructions_in_sequence) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int slot = 0; slot < instructions_in_sequence; slot++) {
|
||||
/* No scheduling SFU when the result would land in the other
|
||||
* thread. The simulator complains for safety, though it
|
||||
@@ -1159,7 +1166,8 @@ emit_thrsw(struct v3d_compile *c,
|
||||
if (!v3d_qpu_sig_pack(c->devinfo, &sig, &packed_sig))
|
||||
break;
|
||||
|
||||
if (!valid_thrsw_sequence(c, prev_inst, slots_filled + 1,
|
||||
if (!valid_thrsw_sequence(c, scoreboard,
|
||||
prev_inst, slots_filled + 1,
|
||||
is_thrend)) {
|
||||
break;
|
||||
}
|
||||
@@ -1173,7 +1181,9 @@ emit_thrsw(struct v3d_compile *c,
|
||||
if (merge_inst) {
|
||||
merge_inst->qpu.sig.thrsw = true;
|
||||
needs_free = true;
|
||||
scoreboard->last_thrsw_tick = scoreboard->tick - slots_filled;
|
||||
} else {
|
||||
scoreboard->last_thrsw_tick = scoreboard->tick;
|
||||
insert_scheduled_instruction(c, block, scoreboard, inst);
|
||||
time++;
|
||||
slots_filled++;
|
||||
@@ -1475,6 +1485,7 @@ v3d_qpu_schedule_instructions(struct v3d_compile *c)
|
||||
scoreboard.last_ldvary_tick = -10;
|
||||
scoreboard.last_magic_sfu_write_tick = -10;
|
||||
scoreboard.last_uniforms_reset_tick = -10;
|
||||
scoreboard.last_thrsw_tick = -10;
|
||||
|
||||
if (debug) {
|
||||
fprintf(stderr, "Pre-schedule instructions\n");
|
||||
|
@@ -648,6 +648,9 @@ struct v3d_vs_prog_data {
|
||||
|
||||
/* Total number of components written, for the shader state record. */
|
||||
uint32_t vpm_output_size;
|
||||
|
||||
/* Value to be programmed in VCM_CACHE_SIZE. */
|
||||
uint8_t vcm_cache_size;
|
||||
};
|
||||
|
||||
struct v3d_fs_prog_data {
|
||||
@@ -928,7 +931,7 @@ VIR_A_ALU2(OR)
|
||||
VIR_A_ALU2(XOR)
|
||||
VIR_A_ALU2(VADD)
|
||||
VIR_A_ALU2(VSUB)
|
||||
VIR_A_ALU2(STVPMV)
|
||||
VIR_A_NODST_2(STVPMV)
|
||||
VIR_A_ALU1(NOT)
|
||||
VIR_A_ALU1(NEG)
|
||||
VIR_A_ALU1(FLAPUSH)
|
||||
|
@@ -452,6 +452,16 @@ vir_emit_def(struct v3d_compile *c, struct qinst *inst)
|
||||
{
|
||||
assert(inst->dst.file == QFILE_NULL);
|
||||
|
||||
/* If we're emitting an instruction that's a def, it had better be
|
||||
* writing a register.
|
||||
*/
|
||||
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
|
||||
assert(inst->qpu.alu.add.op == V3D_QPU_A_NOP ||
|
||||
v3d_qpu_add_op_has_dst(inst->qpu.alu.add.op));
|
||||
assert(inst->qpu.alu.mul.op == V3D_QPU_M_NOP ||
|
||||
v3d_qpu_mul_op_has_dst(inst->qpu.alu.mul.op));
|
||||
}
|
||||
|
||||
inst->dst = vir_get_temp(c);
|
||||
|
||||
if (inst->dst.file == QFILE_TEMP)
|
||||
@@ -746,10 +756,28 @@ uint64_t *v3d_compile_vs(const struct v3d_compiler *compiler,
|
||||
if (prog_data->uses_iid)
|
||||
prog_data->vpm_input_size++;
|
||||
|
||||
/* Input/output segment size are in 8x32-bit multiples. */
|
||||
/* Input/output segment size are in sectors (8 rows of 32 bits per
|
||||
* channel).
|
||||
*/
|
||||
prog_data->vpm_input_size = align(prog_data->vpm_input_size, 8) / 8;
|
||||
prog_data->vpm_output_size = align(c->num_vpm_writes, 8) / 8;
|
||||
|
||||
/* Compute VCM cache size. We set up our program to take up less than
|
||||
* half of the VPM, so that any set of bin and render programs won't
|
||||
* run out of space. We need space for at least one input segment,
|
||||
* and then allocate the rest to output segments (one for the current
|
||||
* program, the rest to VCM). The valid range of the VCM cache size
|
||||
* field is 1-4 16-vertex batches, but GFXH-1744 limits us to 2-4
|
||||
* batches.
|
||||
*/
|
||||
assert(c->devinfo->vpm_size);
|
||||
int sector_size = 16 * sizeof(uint32_t) * 8;
|
||||
int vpm_size_in_sectors = c->devinfo->vpm_size / sector_size;
|
||||
int half_vpm = vpm_size_in_sectors / 2;
|
||||
int vpm_output_batches = half_vpm - prog_data->vpm_input_size;
|
||||
assert(vpm_output_batches >= 2);
|
||||
prog_data->vcm_cache_size = CLAMP(vpm_output_batches - 1, 2, 4);
|
||||
|
||||
return v3d_return_qpu_insts(c, final_assembly_size);
|
||||
}
|
||||
|
||||
|
@@ -94,6 +94,15 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
|
||||
}
|
||||
}
|
||||
|
||||
/* Refuse to spill a ldvary's dst, because that means
|
||||
* that ldvary's r5 would end up being used across a
|
||||
* thrsw.
|
||||
*/
|
||||
if (inst->qpu.sig.ldvary) {
|
||||
assert(inst->dst.file == QFILE_TEMP);
|
||||
BITSET_CLEAR(c->spillable, inst->dst.index);
|
||||
}
|
||||
|
||||
if (inst->is_last_thrsw)
|
||||
started_last_seg = true;
|
||||
|
||||
@@ -102,7 +111,7 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
|
||||
started_last_seg = true;
|
||||
|
||||
/* Track when we're in between a TMU setup and the
|
||||
* final LDTMU from that TMU setup. We can't
|
||||
* final LDTMU or TMUWT from that TMU setup. We can't
|
||||
* spill/fill any temps during that time, because that
|
||||
* involves inserting a new TMU setup/LDTMU sequence.
|
||||
*/
|
||||
@@ -110,6 +119,10 @@ v3d_choose_spill_node(struct v3d_compile *c, struct ra_graph *g,
|
||||
is_last_ldtmu(inst, block))
|
||||
in_tmu_operation = false;
|
||||
|
||||
if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU &&
|
||||
inst->qpu.alu.add.op == V3D_QPU_A_TMUWT)
|
||||
in_tmu_operation = false;
|
||||
|
||||
if (v3d_qpu_writes_tmu(&inst->qpu))
|
||||
in_tmu_operation = true;
|
||||
}
|
||||
@@ -206,6 +219,7 @@ v3d_spill_reg(struct v3d_compile *c, int spill_temp)
|
||||
inst->dst);
|
||||
v3d_emit_spill_tmua(c, spill_offset);
|
||||
vir_emit_thrsw(c);
|
||||
vir_TMUWT(c);
|
||||
c->spills++;
|
||||
}
|
||||
|
||||
|
@@ -1928,6 +1928,11 @@ ast_expression::do_hir(exec_list *instructions,
|
||||
|
||||
error_emitted = op[0]->type->is_error() || op[1]->type->is_error();
|
||||
|
||||
if (error_emitted) {
|
||||
result = ir_rvalue::error_value(ctx);
|
||||
break;
|
||||
}
|
||||
|
||||
type = arithmetic_result_type(op[0], op[1], false, state, & loc);
|
||||
|
||||
ir_rvalue *temp_rhs;
|
||||
|
@@ -201,6 +201,17 @@ resize_callback(struct wl_egl_window *wl_win, void *data)
|
||||
struct dri2_egl_display *dri2_dpy =
|
||||
dri2_egl_display(dri2_surf->base.Resource.Display);
|
||||
|
||||
/* Update the surface size as soon as native window is resized; from user
|
||||
* pov, this makes the effect that resize is done inmediately after native
|
||||
* window resize, without requiring to wait until the first draw.
|
||||
*
|
||||
* A more detailed and lengthy explanation can be found at
|
||||
* https://lists.freedesktop.org/archives/mesa-dev/2018-June/196474.html
|
||||
*/
|
||||
if (!dri2_surf->back) {
|
||||
dri2_surf->base.Width = wl_win->width;
|
||||
dri2_surf->base.Height = wl_win->height;
|
||||
}
|
||||
dri2_dpy->flush->invalidate(dri2_surf->dri_drawable);
|
||||
}
|
||||
|
||||
@@ -258,6 +269,9 @@ dri2_wl_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
|
||||
goto cleanup_surf;
|
||||
}
|
||||
|
||||
dri2_surf->base.Width = window->width;
|
||||
dri2_surf->base.Height = window->height;
|
||||
|
||||
visual_idx = dri2_wl_visual_idx_from_config(dri2_dpy, config);
|
||||
assert(visual_idx != -1);
|
||||
|
||||
@@ -577,8 +591,8 @@ update_buffers(struct dri2_egl_surface *dri2_surf)
|
||||
struct dri2_egl_display *dri2_dpy =
|
||||
dri2_egl_display(dri2_surf->base.Resource.Display);
|
||||
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->height) {
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
|
||||
|
||||
dri2_wl_release_buffers(dri2_surf);
|
||||
|
||||
@@ -1632,8 +1646,8 @@ swrast_update_buffers(struct dri2_egl_surface *dri2_surf)
|
||||
if (dri2_surf->back)
|
||||
return 0;
|
||||
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->height) {
|
||||
if (dri2_surf->base.Width != dri2_surf->wl_win->attached_width ||
|
||||
dri2_surf->base.Height != dri2_surf->wl_win->attached_height) {
|
||||
|
||||
dri2_wl_release_buffers(dri2_surf);
|
||||
|
||||
|
@@ -107,12 +107,17 @@ static const struct loader_dri3_vtable egl_dri3_vtable = {
|
||||
static EGLBoolean
|
||||
dri3_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
|
||||
{
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
|
||||
struct dri3_egl_surface *dri3_surf = dri3_egl_surface(surf);
|
||||
xcb_drawable_t drawable = dri3_surf->loader_drawable.drawable;
|
||||
|
||||
(void) drv;
|
||||
|
||||
loader_dri3_drawable_fini(&dri3_surf->loader_drawable);
|
||||
|
||||
if (surf->Type == EGL_PBUFFER_BIT)
|
||||
xcb_free_pixmap (dri2_dpy->conn, drawable);
|
||||
|
||||
dri2_fini_surface(surf);
|
||||
free(surf);
|
||||
|
||||
|
@@ -1131,6 +1131,31 @@ static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
|
||||
mgr->dirty_real_vb_mask = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
|
||||
unsigned *indirect_data, unsigned stride,
|
||||
unsigned draw_count)
|
||||
{
|
||||
assert(info->index_size);
|
||||
info->indirect = NULL;
|
||||
|
||||
for (unsigned i = 0; i < draw_count; i++) {
|
||||
unsigned offset = i * stride / 4;
|
||||
|
||||
info->count = indirect_data[offset + 0];
|
||||
info->instance_count = indirect_data[offset + 1];
|
||||
|
||||
if (!info->count || !info->instance_count)
|
||||
continue;
|
||||
|
||||
info->start = indirect_data[offset + 2];
|
||||
info->index_bias = indirect_data[offset + 3];
|
||||
info->start_instance = indirect_data[offset + 4];
|
||||
|
||||
u_vbuf_draw_vbo(mgr, info);
|
||||
}
|
||||
}
|
||||
|
||||
void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
|
||||
{
|
||||
struct pipe_context *pipe = mgr->pipe;
|
||||
@@ -1160,33 +1185,163 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
|
||||
|
||||
new_info = *info;
|
||||
|
||||
/* Fallback. We need to know all the parameters. */
|
||||
/* Handle indirect (multi)draws. */
|
||||
if (new_info.indirect) {
|
||||
struct pipe_transfer *transfer = NULL;
|
||||
int *data;
|
||||
const struct pipe_draw_indirect_info *indirect = new_info.indirect;
|
||||
unsigned draw_count = 0;
|
||||
|
||||
if (new_info.index_size) {
|
||||
data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
|
||||
new_info.indirect->offset, 20,
|
||||
PIPE_TRANSFER_READ, &transfer);
|
||||
new_info.index_bias = data[3];
|
||||
new_info.start_instance = data[4];
|
||||
}
|
||||
else {
|
||||
data = pipe_buffer_map_range(pipe, new_info.indirect->buffer,
|
||||
new_info.indirect->offset, 16,
|
||||
PIPE_TRANSFER_READ, &transfer);
|
||||
new_info.start_instance = data[3];
|
||||
/* Get the number of draws. */
|
||||
if (indirect->indirect_draw_count) {
|
||||
pipe_buffer_read(pipe, indirect->indirect_draw_count,
|
||||
indirect->indirect_draw_count_offset,
|
||||
4, &draw_count);
|
||||
} else {
|
||||
draw_count = indirect->draw_count;
|
||||
}
|
||||
|
||||
new_info.count = data[0];
|
||||
new_info.instance_count = data[1];
|
||||
new_info.start = data[2];
|
||||
pipe_buffer_unmap(pipe, transfer);
|
||||
new_info.indirect = NULL;
|
||||
|
||||
if (!new_info.count)
|
||||
if (!draw_count)
|
||||
return;
|
||||
|
||||
unsigned data_size = (draw_count - 1) * indirect->stride +
|
||||
(new_info.index_size ? 20 : 16);
|
||||
unsigned *data = malloc(data_size);
|
||||
if (!data)
|
||||
return; /* report an error? */
|
||||
|
||||
/* Read the used buffer range only once, because the read can be
|
||||
* uncached.
|
||||
*/
|
||||
pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
|
||||
data);
|
||||
|
||||
if (info->index_size) {
|
||||
/* Indexed multidraw. */
|
||||
unsigned index_bias0 = data[3];
|
||||
bool index_bias_same = true;
|
||||
|
||||
/* If we invoke the translate path, we have to split the multidraw. */
|
||||
if (incompatible_vb_mask ||
|
||||
mgr->ve->incompatible_elem_mask) {
|
||||
u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
|
||||
indirect->stride, draw_count);
|
||||
free(data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* See if index_bias is the same for all draws. */
|
||||
for (unsigned i = 1; i < draw_count; i++) {
|
||||
if (data[i * indirect->stride / 4 + 3] != index_bias0) {
|
||||
index_bias_same = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Split the multidraw if index_bias is different. */
|
||||
if (!index_bias_same) {
|
||||
u_vbuf_split_indexed_multidraw(mgr, &new_info, data,
|
||||
indirect->stride, draw_count);
|
||||
free(data);
|
||||
return;
|
||||
}
|
||||
|
||||
/* If we don't need to use the translate path and index_bias is
|
||||
* the same, we can process the multidraw with the time complexity
|
||||
* equal to 1 draw call (except for the index range computation).
|
||||
* We only need to compute the index range covering all draw calls
|
||||
* of the multidraw.
|
||||
*
|
||||
* The driver will not look at these values because indirect != NULL.
|
||||
* These values determine the user buffer bounds to upload.
|
||||
*/
|
||||
new_info.index_bias = index_bias0;
|
||||
new_info.min_index = ~0u;
|
||||
new_info.max_index = 0;
|
||||
new_info.start_instance = ~0u;
|
||||
unsigned end_instance = 0;
|
||||
|
||||
struct pipe_transfer *transfer = NULL;
|
||||
const uint8_t *indices;
|
||||
|
||||
if (info->has_user_indices) {
|
||||
indices = (uint8_t*)info->index.user;
|
||||
} else {
|
||||
indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
|
||||
PIPE_TRANSFER_READ, &transfer);
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < draw_count; i++) {
|
||||
unsigned offset = i * indirect->stride / 4;
|
||||
unsigned start = data[offset + 2];
|
||||
unsigned count = data[offset + 0];
|
||||
unsigned start_instance = data[offset + 4];
|
||||
unsigned instance_count = data[offset + 1];
|
||||
|
||||
if (!count || !instance_count)
|
||||
continue;
|
||||
|
||||
/* Update the ranges of instances. */
|
||||
new_info.start_instance = MIN2(new_info.start_instance,
|
||||
start_instance);
|
||||
end_instance = MAX2(end_instance, start_instance + instance_count);
|
||||
|
||||
/* Update the index range. */
|
||||
unsigned min, max;
|
||||
new_info.count = count; /* only used by get_minmax_index */
|
||||
u_vbuf_get_minmax_index_mapped(&new_info,
|
||||
indices +
|
||||
new_info.index_size * start,
|
||||
&min, &max);
|
||||
|
||||
new_info.min_index = MIN2(new_info.min_index, min);
|
||||
new_info.max_index = MAX2(new_info.max_index, max);
|
||||
}
|
||||
free(data);
|
||||
|
||||
if (transfer)
|
||||
pipe_buffer_unmap(pipe, transfer);
|
||||
|
||||
/* Set the final instance count. */
|
||||
new_info.instance_count = end_instance - new_info.start_instance;
|
||||
|
||||
if (new_info.start_instance == ~0u || !new_info.instance_count)
|
||||
return;
|
||||
} else {
|
||||
/* Non-indexed multidraw.
|
||||
*
|
||||
* Keep the draw call indirect and compute minimums & maximums,
|
||||
* which will determine the user buffer bounds to upload, but
|
||||
* the driver will not look at these values because indirect != NULL.
|
||||
*
|
||||
* This efficiently processes the multidraw with the time complexity
|
||||
* equal to 1 draw call.
|
||||
*/
|
||||
new_info.start = ~0u;
|
||||
new_info.start_instance = ~0u;
|
||||
unsigned end_vertex = 0;
|
||||
unsigned end_instance = 0;
|
||||
|
||||
for (unsigned i = 0; i < draw_count; i++) {
|
||||
unsigned offset = i * indirect->stride / 4;
|
||||
unsigned start = data[offset + 2];
|
||||
unsigned count = data[offset + 0];
|
||||
unsigned start_instance = data[offset + 3];
|
||||
unsigned instance_count = data[offset + 1];
|
||||
|
||||
new_info.start = MIN2(new_info.start, start);
|
||||
new_info.start_instance = MIN2(new_info.start_instance,
|
||||
start_instance);
|
||||
|
||||
end_vertex = MAX2(end_vertex, start + count);
|
||||
end_instance = MAX2(end_instance, start_instance + instance_count);
|
||||
}
|
||||
|
||||
/* Set the final counts. */
|
||||
new_info.count = end_vertex - new_info.start;
|
||||
new_info.instance_count = end_instance - new_info.start_instance;
|
||||
|
||||
if (new_info.start == ~0u || !new_info.count || !new_info.instance_count)
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (new_info.index_size) {
|
||||
@@ -1211,7 +1366,8 @@ void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info)
|
||||
* We would have to break this drawing operation into several ones. */
|
||||
/* Use some heuristic to see if unrolling indices improves
|
||||
* performance. */
|
||||
if (!new_info.primitive_restart &&
|
||||
if (!info->indirect &&
|
||||
!new_info.primitive_restart &&
|
||||
num_vertices > new_info.count*2 &&
|
||||
num_vertices - new_info.count > 32 &&
|
||||
!u_vbuf_mapping_vertex_buffer_blocks(mgr)) {
|
||||
|
@@ -2151,13 +2151,36 @@ NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::insertOOBSurfaceOpResult(TexInstruction *su)
|
||||
{
|
||||
if (!su->getPredicate())
|
||||
return;
|
||||
|
||||
bld.setPosition(su, true);
|
||||
|
||||
for (unsigned i = 0; su->defExists(i); ++i) {
|
||||
ValueDef &def = su->def(i);
|
||||
|
||||
Instruction *mov = bld.mkMov(bld.getSSA(), bld.loadImm(NULL, 0));
|
||||
assert(su->cc == CC_NOT_P);
|
||||
mov->setPredicate(CC_P, su->getPredicate());
|
||||
Instruction *uni = bld.mkOp2(OP_UNION, TYPE_U32, bld.getSSA(), NULL, mov->getDef(0));
|
||||
|
||||
def.replace(uni->getDef(0), false);
|
||||
uni->setSrc(0, def.get());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
NVC0LoweringPass::handleSurfaceOpNVE4(TexInstruction *su)
|
||||
{
|
||||
processSurfaceCoordsNVE4(su);
|
||||
|
||||
if (su->op == OP_SULDP)
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
|
||||
assert(su->getPredicate());
|
||||
@@ -2267,8 +2290,10 @@ NVC0LoweringPass::handleSurfaceOpNVC0(TexInstruction *su)
|
||||
|
||||
processSurfaceCoordsNVC0(su);
|
||||
|
||||
if (su->op == OP_SULDP)
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDB || su->op == OP_SUREDP) {
|
||||
const int dim = su->tex.target.getDim();
|
||||
@@ -2370,8 +2395,10 @@ NVC0LoweringPass::handleSurfaceOpGM107(TexInstruction *su)
|
||||
{
|
||||
processSurfaceCoordsGM107(su);
|
||||
|
||||
if (su->op == OP_SULDP)
|
||||
if (su->op == OP_SULDP) {
|
||||
convertSurfaceFormat(su);
|
||||
insertOOBSurfaceOpResult(su);
|
||||
}
|
||||
|
||||
if (su->op == OP_SUREDP) {
|
||||
Value *def = su->getDef(0);
|
||||
|
@@ -172,6 +172,7 @@ private:
|
||||
void processSurfaceCoordsNVE4(TexInstruction *);
|
||||
void processSurfaceCoordsNVC0(TexInstruction *);
|
||||
void convertSurfaceFormat(TexInstruction *);
|
||||
void insertOOBSurfaceOpResult(TexInstruction *);
|
||||
Value *calculateSampleOffset(Value *sampleID);
|
||||
|
||||
protected:
|
||||
|
@@ -37,7 +37,7 @@ extern "C" {
|
||||
struct pipe_screen *swr_create_screen(struct sw_winsys *winsys);
|
||||
|
||||
// arch-specific dll entry point
|
||||
PUBLIC struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
|
||||
struct pipe_screen *swr_create_screen_internal(struct sw_winsys *winsys);
|
||||
|
||||
// cleanup for failed screen creation
|
||||
void swr_destroy_screen_internal(struct swr_screen **screen);
|
||||
|
@@ -1143,12 +1143,10 @@ swr_validate_env_options(struct swr_screen *screen)
|
||||
}
|
||||
|
||||
|
||||
PUBLIC
|
||||
struct pipe_screen *
|
||||
swr_create_screen_internal(struct sw_winsys *winsys)
|
||||
{
|
||||
struct swr_screen *screen = CALLOC_STRUCT(swr_screen);
|
||||
memset(screen, 0, sizeof(struct swr_screen));
|
||||
|
||||
if (!screen)
|
||||
return NULL;
|
||||
|
@@ -585,6 +585,8 @@ v3d_get_device_info(struct v3d_screen *screen)
|
||||
uint32_t minor = (ident1.value >> 0) & 0xf;
|
||||
screen->devinfo.ver = major * 10 + minor;
|
||||
|
||||
screen->devinfo.vpm_size = (ident1.value >> 28 & 0xf) * 1024;
|
||||
|
||||
switch (screen->devinfo.ver) {
|
||||
case 33:
|
||||
case 41:
|
||||
|
@@ -306,6 +306,13 @@ v3d_emit_gl_shader_state(struct v3d_context *v3d,
|
||||
}
|
||||
}
|
||||
|
||||
cl_emit(&job->bcl, VCM_CACHE_SIZE, vcm) {
|
||||
vcm.number_of_16_vertex_batches_for_binning =
|
||||
v3d->prog.cs->prog_data.vs->vcm_cache_size;
|
||||
vcm.number_of_16_vertex_batches_for_rendering =
|
||||
v3d->prog.vs->prog_data.vs->vcm_cache_size;
|
||||
}
|
||||
|
||||
cl_emit(&job->bcl, GL_SHADER_STATE, state) {
|
||||
state.address = cl_address(job->indirect.bo, shader_rec_offset);
|
||||
state.number_of_attribute_arrays = num_elements_to_emit;
|
||||
|
@@ -222,6 +222,8 @@ vc4_emit_gl_shader_state(struct vc4_context *vc4,
|
||||
attr.coordinate_shader_vpm_offset = 0;
|
||||
attr.vertex_shader_vpm_offset = 0;
|
||||
}
|
||||
|
||||
vc4_bo_unreference(&bo);
|
||||
}
|
||||
|
||||
cl_emit(&job->bcl, GL_SHADER_STATE, shader_state) {
|
||||
|
@@ -121,7 +121,8 @@ vc4_fence_server_sync(struct pipe_context *pctx,
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
struct vc4_fence *fence = vc4_fence(pfence);
|
||||
|
||||
sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd);
|
||||
if (fence->fd >= 0)
|
||||
sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -142,8 +143,12 @@ vc4_fence_context_init(struct vc4_context *vc4)
|
||||
/* Since we initialize the in_fence_fd to -1 (no wait necessary),
|
||||
* we also need to initialize our in_syncobj as signaled.
|
||||
*/
|
||||
return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
|
||||
&vc4->in_syncobj);
|
||||
if (vc4->screen->has_syncobj) {
|
||||
return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED,
|
||||
&vc4->in_syncobj);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -38,6 +38,7 @@
|
||||
#include "vc4_context.h"
|
||||
#include "vc4_qpu.h"
|
||||
#include "vc4_qir.h"
|
||||
#include "mesa/state_tracker/st_glsl_types.h"
|
||||
|
||||
static struct qreg
|
||||
ntq_get_src(struct vc4_compile *c, nir_src src, int i);
|
||||
@@ -50,6 +51,12 @@ type_size(const struct glsl_type *type)
|
||||
return glsl_count_attribute_slots(type, false);
|
||||
}
|
||||
|
||||
static int
|
||||
uniforms_type_size(const struct glsl_type *type)
|
||||
{
|
||||
return st_glsl_storage_type_size(type, false);
|
||||
}
|
||||
|
||||
static void
|
||||
resize_qreg_array(struct vc4_compile *c,
|
||||
struct qreg **regs,
|
||||
@@ -1685,7 +1692,7 @@ static void
|
||||
ntq_setup_uniforms(struct vc4_compile *c)
|
||||
{
|
||||
nir_foreach_variable(var, &c->s->uniforms) {
|
||||
uint32_t vec4_count = type_size(var->type);
|
||||
uint32_t vec4_count = uniforms_type_size(var->type);
|
||||
unsigned vec4_size = 4 * sizeof(float);
|
||||
|
||||
declare_uniform_range(c, var->data.driver_location * vec4_size,
|
||||
@@ -2469,9 +2476,13 @@ vc4_shader_state_create(struct pipe_context *pctx,
|
||||
*/
|
||||
s = cso->ir.nir;
|
||||
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_all, type_size,
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_all & ~nir_var_uniform,
|
||||
type_size,
|
||||
(nir_lower_io_options)0);
|
||||
} else {
|
||||
NIR_PASS_V(s, nir_lower_io, nir_var_uniform,
|
||||
uniforms_type_size,
|
||||
(nir_lower_io_options)0);
|
||||
} else {
|
||||
assert(cso->type == PIPE_SHADER_IR_TGSI);
|
||||
|
||||
if (vc4_debug & VC4_DEBUG_TGSI) {
|
||||
|
@@ -614,7 +614,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
}
|
||||
|
||||
so->texture_p0 =
|
||||
(VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
|
||||
(VC4_SET_FIELD((rsc->slices[0].offset +
|
||||
cso->u.tex.first_layer *
|
||||
rsc->cube_map_stride) >> 12, VC4_TEX_P0_OFFSET) |
|
||||
VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
|
||||
VC4_SET_FIELD(so->force_first_level ?
|
||||
cso->u.tex.last_level :
|
||||
|
@@ -26,8 +26,12 @@
|
||||
*
|
||||
**************************************************************************/
|
||||
|
||||
#if !defined(ANDROID) || ANDROID_API_LEVEL >= 26
|
||||
/* Android's libc began supporting shm in Oreo */
|
||||
#define HAVE_SHM
|
||||
#include <sys/ipc.h>
|
||||
#include <sys/shm.h>
|
||||
#endif
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "pipe/p_format.h"
|
||||
@@ -83,6 +87,7 @@ dri_sw_is_displaytarget_format_supported( struct sw_winsys *ws,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
#ifdef HAVE_SHM
|
||||
static char *
|
||||
alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size)
|
||||
{
|
||||
@@ -101,6 +106,7 @@ alloc_shm(struct dri_sw_displaytarget *dri_sw_dt, unsigned size)
|
||||
|
||||
return addr;
|
||||
}
|
||||
#endif
|
||||
|
||||
static struct sw_displaytarget *
|
||||
dri_sw_displaytarget_create(struct sw_winsys *winsys,
|
||||
@@ -131,8 +137,11 @@ dri_sw_displaytarget_create(struct sw_winsys *winsys,
|
||||
size = dri_sw_dt->stride * nblocksy;
|
||||
|
||||
dri_sw_dt->shmid = -1;
|
||||
|
||||
#ifdef HAVE_SHM
|
||||
if (ws->lf->put_image_shm)
|
||||
dri_sw_dt->data = alloc_shm(dri_sw_dt, size);
|
||||
#endif
|
||||
|
||||
if(!dri_sw_dt->data)
|
||||
dri_sw_dt->data = align_malloc(size, alignment);
|
||||
@@ -156,8 +165,10 @@ dri_sw_displaytarget_destroy(struct sw_winsys *ws,
|
||||
struct dri_sw_displaytarget *dri_sw_dt = dri_sw_displaytarget(dt);
|
||||
|
||||
if (dri_sw_dt->shmid >= 0) {
|
||||
#ifdef HAVE_SHM
|
||||
shmdt(dri_sw_dt->data);
|
||||
shmctl(dri_sw_dt->shmid, IPC_RMID, 0);
|
||||
#endif
|
||||
} else {
|
||||
align_free(dri_sw_dt->data);
|
||||
}
|
||||
|
@@ -19,9 +19,6 @@
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
if HAVE_SHARED_GLAPI
|
||||
SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
|
||||
endif
|
||||
|
||||
SUBDIRS =
|
||||
|
||||
@@ -181,7 +178,7 @@ GL_LIBS = \
|
||||
$(LIBDRM_LIBS) \
|
||||
libglx.la \
|
||||
$(top_builddir)/src/mapi/glapi/libglapi.la \
|
||||
$(SHARED_GLAPI_LIB) \
|
||||
$(top_builddir)/src/mapi/shared-glapi/libglapi.la \
|
||||
$(GL_LIB_DEPS)
|
||||
|
||||
GL_LDFLAGS = \
|
||||
|
@@ -152,7 +152,7 @@ static const struct extension_info known_glx_extensions[] = {
|
||||
{ GLX(ATI_pixel_format_float), VER(0,0), N, N, N, N },
|
||||
{ GLX(INTEL_swap_event), VER(0,0), Y, N, N, N },
|
||||
{ GLX(MESA_copy_sub_buffer), VER(0,0), Y, N, N, N },
|
||||
{ GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, Y, N },
|
||||
{ GLX(MESA_multithread_makecurrent),VER(0,0), Y, N, N, Y },
|
||||
{ GLX(MESA_query_renderer), VER(0,0), Y, N, N, Y },
|
||||
{ GLX(MESA_swap_control), VER(0,0), Y, N, N, Y },
|
||||
{ GLX(NV_float_buffer), VER(0,0), N, N, N, N },
|
||||
|
@@ -21,7 +21,9 @@
|
||||
|
||||
noinst_PROGRAMS += \
|
||||
tools/aubinator \
|
||||
tools/aubinator_error_decode
|
||||
tools/aubinator_error_decode \
|
||||
tools/error2aub
|
||||
|
||||
|
||||
tools_aubinator_SOURCES = \
|
||||
tools/aubinator.c \
|
||||
@@ -59,3 +61,23 @@ tools_aubinator_error_decode_LDADD = \
|
||||
tools_aubinator_error_decode_CFLAGS = \
|
||||
$(AM_CFLAGS) \
|
||||
$(ZLIB_CFLAGS)
|
||||
|
||||
|
||||
tools_error2aub_SOURCES = \
|
||||
tools/gen_context.h \
|
||||
tools/gen8_context.h \
|
||||
tools/gen10_context.h \
|
||||
tools/aub_write.h \
|
||||
tools/aub_write.c \
|
||||
tools/error2aub.c
|
||||
|
||||
tools_error2aub_CFLAGS = \
|
||||
$(AM_CFLAGS) \
|
||||
$(ZLIB_CFLAGS)
|
||||
|
||||
tools_error2aub_LDADD = \
|
||||
dev/libintel_dev.la \
|
||||
$(PTHREAD_LIBS) \
|
||||
$(DLOPEN_LIBS) \
|
||||
$(ZLIB_LIBS) \
|
||||
-lm
|
||||
|
@@ -75,18 +75,6 @@ brw_blorp_surface_info_init(struct blorp_context *blorp,
|
||||
if (format == ISL_FORMAT_UNSUPPORTED)
|
||||
format = surf->surf->format;
|
||||
|
||||
if (format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
/* Unfortunately, ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as
|
||||
* a render target, which would prevent us from blitting to 24-bit
|
||||
* depth. The miptree consists of 32 bits per pixel, arranged as 24-bit
|
||||
* depth values interleaved with 8 "don't care" bits. Since depth
|
||||
* values don't require any blending, it doesn't matter how we interpret
|
||||
* the bit pattern as long as we copy the right amount of data, so just
|
||||
* map it as 8-bit BGRA.
|
||||
*/
|
||||
format = ISL_FORMAT_B8G8R8A8_UNORM;
|
||||
}
|
||||
|
||||
info->surf = *surf->surf;
|
||||
info->addr = surf->addr;
|
||||
|
||||
|
@@ -776,6 +776,14 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
|
||||
* grid of samples with in a pixel. Sample number layout shows the
|
||||
* rectangular grid of samples roughly corresponding to the real sample
|
||||
* locations with in a pixel.
|
||||
*
|
||||
* In the case of 2x MSAA, the layout of sample indices is reversed from
|
||||
* the layout of sample numbers:
|
||||
*
|
||||
* sample index layout : --------- sample number layout : ---------
|
||||
* | 0 | 1 | | 1 | 0 |
|
||||
* --------- ---------
|
||||
*
|
||||
* In case of 4x MSAA, layout of sample indices matches the layout of
|
||||
* sample numbers:
|
||||
* ---------
|
||||
@@ -819,7 +827,9 @@ blorp_nir_manual_blend_bilinear(nir_builder *b, nir_ssa_def *pos,
|
||||
key->x_scale * key->y_scale));
|
||||
sample = nir_f2i32(b, sample);
|
||||
|
||||
if (tex_samples == 8) {
|
||||
if (tex_samples == 2) {
|
||||
sample = nir_isub(b, nir_imm_int(b, 1), sample);
|
||||
} else if (tex_samples == 8) {
|
||||
sample = nir_iand(b, nir_ishr(b, nir_imm_int(b, 0x64210573),
|
||||
nir_ishl(b, sample, nir_imm_int(b, 2))),
|
||||
nir_imm_int(b, 0xf));
|
||||
@@ -984,14 +994,14 @@ convert_color(struct nir_builder *b, nir_ssa_def *color,
|
||||
nir_ssa_def *value;
|
||||
|
||||
if (key->dst_format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
/* The destination image is bound as R32_UNORM but the data needs to be
|
||||
/* The destination image is bound as R32_UINT but the data needs to be
|
||||
* in R24_UNORM_X8_TYPELESS. The bottom 24 are the actual data and the
|
||||
* top 8 need to be zero. We can accomplish this by simply multiplying
|
||||
* by a factor to scale things down.
|
||||
*/
|
||||
float factor = (float)((1 << 24) - 1) / (float)UINT32_MAX;
|
||||
value = nir_fmul(b, nir_fsat(b, nir_channel(b, color, 0)),
|
||||
nir_imm_float(b, factor));
|
||||
unsigned factor = (1 << 24) - 1;
|
||||
value = nir_fsat(b, nir_channel(b, color, 0));
|
||||
value = nir_f2i32(b, nir_fmul(b, value, nir_imm_float(b, factor)));
|
||||
} else if (key->dst_format == ISL_FORMAT_L8_UNORM_SRGB) {
|
||||
value = nir_format_linear_to_srgb(b, nir_channel(b, color, 0));
|
||||
} else if (key->dst_format == ISL_FORMAT_R8G8B8_UNORM_SRGB) {
|
||||
@@ -1976,7 +1986,7 @@ try_blorp_blit(struct blorp_batch *batch,
|
||||
isl_format_rgbx_to_rgba(params->dst.view.format);
|
||||
} else if (params->dst.view.format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
wm_prog_key->dst_format = params->dst.view.format;
|
||||
params->dst.view.format = ISL_FORMAT_R32_UNORM;
|
||||
params->dst.view.format = ISL_FORMAT_R32_UINT;
|
||||
} else if (params->dst.view.format == ISL_FORMAT_A4B4G4R4_UNORM) {
|
||||
params->dst.view.swizzle =
|
||||
isl_swizzle_compose(params->dst.view.swizzle,
|
||||
@@ -2240,6 +2250,17 @@ blorp_blit(struct blorp_batch *batch,
|
||||
}
|
||||
}
|
||||
|
||||
/* ISL_FORMAT_R24_UNORM_X8_TYPELESS it isn't supported as a render target,
|
||||
* which requires shader math to render to it. Blitting Z24X8 to Z24X8
|
||||
* is fairly common though, so we'd like to avoid it. Since we don't need
|
||||
* to blend depth values, we can simply pick a renderable format with the
|
||||
* right number of bits-per-pixel, like 8-bit BGRA.
|
||||
*/
|
||||
if (dst_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS &&
|
||||
src_surf->surf->format == ISL_FORMAT_R24_UNORM_X8_TYPELESS) {
|
||||
src_format = dst_format = ISL_FORMAT_B8G8R8A8_UNORM;
|
||||
}
|
||||
|
||||
brw_blorp_surface_info_init(batch->blorp, ¶ms.src, src_surf, src_level,
|
||||
src_layer, src_format, false);
|
||||
brw_blorp_surface_info_init(batch->blorp, ¶ms.dst, dst_surf, dst_level,
|
||||
|
@@ -42,10 +42,10 @@ prefix##0YOffset = 0.5;
|
||||
* c 1
|
||||
*/
|
||||
#define GEN_SAMPLE_POS_2X(prefix) \
|
||||
prefix##0XOffset = 0.25; \
|
||||
prefix##0YOffset = 0.25; \
|
||||
prefix##1XOffset = 0.75; \
|
||||
prefix##1YOffset = 0.75;
|
||||
prefix##0XOffset = 0.75; \
|
||||
prefix##0YOffset = 0.75; \
|
||||
prefix##1XOffset = 0.25; \
|
||||
prefix##1YOffset = 0.25;
|
||||
|
||||
/**
|
||||
* Sample positions:
|
||||
|
@@ -5115,6 +5115,25 @@ get_fpu_lowered_simd_width(const struct gen_device_info *devinfo,
|
||||
}
|
||||
}
|
||||
|
||||
if (devinfo->gen < 6) {
|
||||
/* From the G45 PRM, Volume 4 Page 361:
|
||||
*
|
||||
* "Operand Alignment Rule: With the exceptions listed below, a
|
||||
* source/destination operand in general should be aligned to even
|
||||
* 256-bit physical register with a region size equal to two 256-bit
|
||||
* physical registers."
|
||||
*
|
||||
* Normally we enforce this by allocating virtual registers to the
|
||||
* even-aligned class. But we need to handle payload registers.
|
||||
*/
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == FIXED_GRF && (inst->src[i].nr & 1) &&
|
||||
inst->size_read(i) > REG_SIZE) {
|
||||
max_width = MIN2(max_width, 8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* From the IVB PRMs:
|
||||
* "When an instruction is SIMD32, the low 16 bits of the execution mask
|
||||
* are applied for both halves of the SIMD32 instruction. If different
|
||||
@@ -6321,6 +6340,7 @@ fs_visitor::optimize()
|
||||
if (OPT(lower_load_payload)) {
|
||||
split_virtual_grfs();
|
||||
OPT(register_coalesce);
|
||||
OPT(lower_simd_width);
|
||||
OPT(compute_to_mrf);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
@@ -590,7 +590,7 @@ handle_memtrace_reg_write(uint32_t *p)
|
||||
uint32_t pphwsp_addr = context_descriptor & 0xfffff000;
|
||||
struct gen_batch_decode_bo pphwsp_bo = get_ggtt_batch_bo(NULL, pphwsp_addr);
|
||||
uint32_t *context = (uint32_t *)((uint8_t *)pphwsp_bo.map +
|
||||
(pphwsp_bo.addr - pphwsp_addr) +
|
||||
(pphwsp_addr - pphwsp_bo.addr) +
|
||||
pphwsp_size);
|
||||
|
||||
uint32_t ring_buffer_head = context[5];
|
||||
@@ -601,7 +601,7 @@ handle_memtrace_reg_write(uint32_t *p)
|
||||
struct gen_batch_decode_bo ring_bo = get_ggtt_batch_bo(NULL,
|
||||
ring_buffer_start);
|
||||
assert(ring_bo.size > 0);
|
||||
void *commands = (uint8_t *)ring_bo.map + (ring_bo.addr - ring_buffer_start);
|
||||
void *commands = (uint8_t *)ring_bo.map + (ring_buffer_start - ring_bo.addr);
|
||||
|
||||
if (context_descriptor & 0x100 /* ppgtt */) {
|
||||
batch_ctx.get_bo = get_ppgtt_batch_bo;
|
||||
|
@@ -205,7 +205,7 @@ main(int argc, char *argv[])
|
||||
BO_TYPE_UNKNOWN = 0,
|
||||
BO_TYPE_BATCH,
|
||||
BO_TYPE_USER,
|
||||
} bo_type;
|
||||
} bo_type = BO_TYPE_UNKNOWN;
|
||||
uint64_t bo_addr;
|
||||
|
||||
char *line = NULL;
|
||||
|
@@ -38,13 +38,13 @@
|
||||
/**
|
||||
* 1x MSAA has a single sample at the center: (0.5, 0.5) -> (0x8, 0x8).
|
||||
*
|
||||
* 2x MSAA sample positions are (0.25, 0.25) and (0.75, 0.75):
|
||||
* 2x MSAA sample positions are (0.75, 0.75) and (0.25, 0.25):
|
||||
* 4 c
|
||||
* 4 0
|
||||
* c 1
|
||||
* 4 1
|
||||
* c 0
|
||||
*/
|
||||
static const uint32_t
|
||||
brw_multisample_positions_1x_2x = 0x0088cc44;
|
||||
brw_multisample_positions_1x_2x = 0x008844cc;
|
||||
|
||||
/**
|
||||
* Sample positions:
|
||||
|
@@ -68,10 +68,10 @@ gen6_get_sample_position(struct gl_context *ctx,
|
||||
* index layout in case of 2X and 4x MSAA, but they are different in
|
||||
* case of 8X MSAA.
|
||||
*
|
||||
* 2X MSAA sample index / number layout
|
||||
* ---------
|
||||
* | 0 | 1 |
|
||||
* ---------
|
||||
* 8X MSAA sample index layout 8x MSAA sample number layout
|
||||
* --------- ---------
|
||||
* | 0 | 1 | | 1 | 0 |
|
||||
* --------- ---------
|
||||
*
|
||||
* 4X MSAA sample index / number layout
|
||||
* ---------
|
||||
@@ -107,7 +107,7 @@ gen6_get_sample_position(struct gl_context *ctx,
|
||||
void
|
||||
gen6_set_sample_maps(struct gl_context *ctx)
|
||||
{
|
||||
uint8_t map_2x[2] = {0, 1};
|
||||
uint8_t map_2x[2] = {1, 0};
|
||||
uint8_t map_4x[4] = {0, 1, 2, 3};
|
||||
uint8_t map_8x[8] = {3, 7, 5, 0, 1, 2, 4, 6};
|
||||
uint8_t map_16x[16] = { 15, 10, 9, 7, 4, 1, 3, 13,
|
||||
|
@@ -7,7 +7,7 @@ Name: gl
|
||||
Description: Mesa OpenGL library
|
||||
Requires.private: @GL_PC_REQ_PRIV@
|
||||
Version: @PACKAGE_VERSION@
|
||||
Libs: -L${libdir} -l@GL_LIB@
|
||||
Libs: -L${libdir} -l@GL_PKGCONF_LIB@
|
||||
Libs.private: @GL_PC_LIB_PRIV@
|
||||
Cflags: -I${includedir} @GL_PC_CFLAGS@
|
||||
glx_tls: @GLX_TLS@
|
||||
|
@@ -1229,7 +1229,7 @@ void st_init_extensions(struct pipe_screen *screen,
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_UNORM,
|
||||
PIPE_TEXTURE_2D, 0, 0,
|
||||
PIPE_BIND_SAMPLER_VIEW) &&
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_B8G8R8A8_SRGB,
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB,
|
||||
PIPE_TEXTURE_2D, 0, 0,
|
||||
PIPE_BIND_SAMPLER_VIEW) &&
|
||||
screen->is_format_supported(screen, PIPE_FORMAT_R16_UNORM,
|
||||
|
@@ -120,6 +120,10 @@ TODO: document the other workarounds.
|
||||
<option name="allow_glsl_extension_directive_midshader" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Metro 2033 Redux / Metro Last Night Redux" executable="metro">
|
||||
<option name="allow_glsl_extension_directive_midshader" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Worms W.M.D" executable="Worms W.M.Dx64">
|
||||
<option name="allow_higher_compat_version" value="true" />
|
||||
</application>
|
||||
|
Reference in New Issue
Block a user