Compare commits
50 Commits
mesa-24.2.
...
mesa-24.2.
Author | SHA1 | Date | |
---|---|---|---|
|
22fafc9824 | ||
|
a2d4bd10c3 | ||
|
14f6b72604 | ||
|
f11e04e331 | ||
|
a4b0f0f765 | ||
|
5854ff2dd9 | ||
|
9f8856c5af | ||
|
10dfd5d13b | ||
|
b01adb2118 | ||
|
27b6484317 | ||
|
df083003ab | ||
|
89dbb1ca29 | ||
|
a8ea86d2e8 | ||
|
56bef9de05 | ||
|
7335dbb895 | ||
|
f6b2fe8455 | ||
|
db297c6534 | ||
|
46ad101f67 | ||
|
fbba6b7b8d | ||
|
fef78c34aa | ||
|
0ffd6a87d0 | ||
|
3dc242cb5e | ||
|
cf393b4076 | ||
|
6d680b5d39 | ||
|
75a131315f | ||
|
88b8d72234 | ||
|
3cf68b1295 | ||
|
239fb0bdd2 | ||
|
cd2ea3a45c | ||
|
7ce99a3f63 | ||
|
fef088fd5d | ||
|
6f625b1b95 | ||
|
c9f1f288b6 | ||
|
2ff3011b02 | ||
|
8fd12baaaa | ||
|
ec87b9cb5c | ||
|
0a8b665c8f | ||
|
3fcb2db345 | ||
|
6e51ea0001 | ||
|
3f8d9068e1 | ||
|
3610b26a3e | ||
|
5c6f02805f | ||
|
4194c3b925 | ||
|
c7dfe51eb3 | ||
|
54386fe91a | ||
|
7880933b15 | ||
|
34ad0f8bd7 | ||
|
19e29aca4b | ||
|
f55d119d6a | ||
|
30645ecbf8 |
@@ -84,6 +84,7 @@ VARS=(
|
||||
MESA_IMAGE_PATH
|
||||
MESA_IMAGE_TAG
|
||||
MESA_LOADER_DRIVER_OVERRIDE
|
||||
MESA_SPIRV_LOG_LEVEL
|
||||
MESA_TEMPLATES_COMMIT
|
||||
MESA_VK_ABORT_ON_DEVICE_LOSS
|
||||
MESA_VK_IGNORE_CONFORMANCE_WARNING
|
||||
|
@@ -18,7 +18,7 @@ DEPS=(
|
||||
bash
|
||||
bison
|
||||
ccache
|
||||
clang16-dev
|
||||
clang${LLVM_VERSION}-dev
|
||||
cmake
|
||||
clang-dev
|
||||
coreutils
|
||||
@@ -31,8 +31,8 @@ DEPS=(
|
||||
glslang
|
||||
graphviz
|
||||
linux-headers
|
||||
llvm16-static
|
||||
llvm16-dev
|
||||
llvm${LLVM_VERSION}-static
|
||||
llvm${LLVM_VERSION}-dev
|
||||
meson
|
||||
mold
|
||||
musl-dev
|
||||
|
3670
.pick_status.json
3670
.pick_status.json
File diff suppressed because it is too large
Load Diff
@@ -157,9 +157,9 @@ endif
|
||||
endef
|
||||
|
||||
ifneq ($(strip $(BOARD_MESA3D_GALLIUM_DRIVERS)),)
|
||||
# Module 'libgallium_dri', produces '/vendor/lib{64}/dri/libgallium_dri.so'
|
||||
# Module 'libgallium_dri', produces '/vendor/lib{64}/libgallium_dri.so'
|
||||
# This module also trigger DRI symlinks creation process
|
||||
$(eval $(call mesa3d-lib,libgallium_dri,dri,MESA3D_GALLIUM_DRI_BIN))
|
||||
$(eval $(call mesa3d-lib,libgallium_dri,,MESA3D_GALLIUM_BIN))
|
||||
# Module 'libglapi', produces '/vendor/lib{64}/libglapi.so'
|
||||
$(eval $(call mesa3d-lib,libglapi,,MESA3D_LIBGLAPI_BIN))
|
||||
|
||||
|
@@ -63,8 +63,8 @@ MESON_OUT_DIR := $($(M_TARGET_PREFIX)TARGET_OUT_INTER
|
||||
MESON_GEN_DIR := $(MESON_OUT_DIR)_GEN
|
||||
MESON_GEN_FILES_TARGET := $(MESON_GEN_DIR)/.timestamp
|
||||
|
||||
MESA3D_GALLIUM_DRI_DIR := $(MESON_OUT_DIR)/install/usr/local/lib/dri
|
||||
$(M_TARGET_PREFIX)MESA3D_GALLIUM_DRI_BIN := $(MESON_OUT_DIR)/install/usr/local/lib/libgallium_dri.so
|
||||
MESA3D_GALLIUM_DIR := $(MESON_OUT_DIR)/install/usr/local/lib
|
||||
$(M_TARGET_PREFIX)MESA3D_GALLIUM_BIN := $(MESON_OUT_DIR)/install/usr/local/lib/libgallium_dri.so
|
||||
$(M_TARGET_PREFIX)MESA3D_LIBEGL_BIN := $(MESON_OUT_DIR)/install/usr/local/lib/libEGL.so
|
||||
$(M_TARGET_PREFIX)MESA3D_LIBGLESV1_BIN := $(MESON_OUT_DIR)/install/usr/local/lib/libGLESv1_CM.so
|
||||
$(M_TARGET_PREFIX)MESA3D_LIBGLESV2_BIN := $(MESON_OUT_DIR)/install/usr/local/lib/libGLESv2.so
|
||||
@@ -73,6 +73,7 @@ $(M_TARGET_PREFIX)MESA3D_LIBGBM_BIN := $(MESON_OUT_DIR)/install/usr/local/l
|
||||
|
||||
|
||||
MESA3D_GLES_BINS := \
|
||||
$($(M_TARGET_PREFIX)MESA3D_GALLIUM_BIN) \
|
||||
$($(M_TARGET_PREFIX)MESA3D_LIBEGL_BIN) \
|
||||
$($(M_TARGET_PREFIX)MESA3D_LIBGLESV1_BIN) \
|
||||
$($(M_TARGET_PREFIX)MESA3D_LIBGLESV2_BIN) \
|
||||
@@ -284,16 +285,11 @@ endif
|
||||
$(MESON_BUILD)
|
||||
touch $@
|
||||
|
||||
MESON_COPY_LIBGALLIUM := \
|
||||
cp `ls -1 $(MESA3D_GALLIUM_DRI_DIR)/* | head -1` $($(M_TARGET_PREFIX)MESA3D_GALLIUM_DRI_BIN)
|
||||
|
||||
$(MESON_OUT_DIR)/install/.install.timestamp: MESON_COPY_LIBGALLIUM:=$(MESON_COPY_LIBGALLIUM)
|
||||
$(MESON_OUT_DIR)/install/.install.timestamp: MESON_BUILD:=$(MESON_BUILD)
|
||||
$(MESON_OUT_DIR)/install/.install.timestamp: $(MESON_OUT_DIR)/.build.timestamp
|
||||
rm -rf $(dir $@)
|
||||
mkdir -p $(dir $@)
|
||||
DESTDIR=$(call relative-to-absolute,$(dir $@)) $(MESON_BUILD) install
|
||||
$(if $(BOARD_MESA3D_GALLIUM_DRIVERS),$(MESON_COPY_LIBGALLIUM))
|
||||
touch $@
|
||||
|
||||
$($(M_TARGET_PREFIX)MESA3D_LIBGBM_BIN) $(MESA3D_GLES_BINS): $(MESON_OUT_DIR)/install/.install.timestamp
|
||||
@@ -308,14 +304,3 @@ $(MESON_OUT_DIR)/install/usr/local/lib/libvulkan_$(MESA_VK_LIB_SUFFIX_$1).so: $(
|
||||
endef
|
||||
|
||||
$(foreach driver,$(BOARD_MESA3D_VULKAN_DRIVERS), $(eval $(call vulkan_target,$(driver))))
|
||||
|
||||
$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.symlinks.timestamp: MESA3D_GALLIUM_DRI_DIR:=$(MESA3D_GALLIUM_DRI_DIR)
|
||||
$($(M_TARGET_PREFIX)TARGET_OUT_VENDOR_SHARED_LIBRARIES)/dri/.symlinks.timestamp: $(MESON_OUT_DIR)/install/.install.timestamp
|
||||
# Create Symlinks
|
||||
mkdir -p $(dir $@)
|
||||
ls -1 $(MESA3D_GALLIUM_DRI_DIR)/ | PATH=/usr/bin:$$PATH xargs -I{} ln -s -f libgallium_dri.so $(dir $@)/{}
|
||||
touch $@
|
||||
|
||||
$($(M_TARGET_PREFIX)MESA3D_GALLIUM_DRI_BIN): $(TARGET_OUT_VENDOR)/$(MESA3D_LIB_DIR)/dri/.symlinks.timestamp
|
||||
echo "Build $@"
|
||||
touch $@
|
||||
|
50
meson.build
50
meson.build
@@ -501,22 +501,28 @@ if not have_mtls_dialect
|
||||
if meson.is_cross_build() and not meson.can_run_host_binaries()
|
||||
warning('cannot auto-detect -mtls-dialect when cross-compiling, using compiler default')
|
||||
else
|
||||
# -fpic to force dynamic tls, otherwise TLS relaxation defeats check
|
||||
gnu2_test = cc.run('int __thread x; int main() { return x; }',
|
||||
args: ['-mtls-dialect=gnu2', '-fpic'],
|
||||
name: '-mtls-dialect=gnu2')
|
||||
if gnu2_test.returncode() == 0 and (
|
||||
# check for lld 13 bug: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5665
|
||||
host_machine.cpu_family() != 'x86_64' or
|
||||
# get_linker_id misses LDFLAGS=-fuse-ld=lld: https://github.com/mesonbuild/meson/issues/6377
|
||||
#cc.get_linker_id() != 'ld.lld' or
|
||||
cc.links('''int __thread x; int y; int main() { __asm__(
|
||||
"leaq x@TLSDESC(%rip), %rax\n"
|
||||
"movq y@GOTPCREL(%rip), %rdx\n"
|
||||
"call *x@TLSCALL(%rax)\n"); }''', name: 'split TLSDESC')
|
||||
)
|
||||
c_cpp_args += '-mtls-dialect=gnu2'
|
||||
endif
|
||||
# The way to specify the TLSDESC dialect is architecture-specific.
|
||||
# We probe both because there is not a fallback guaranteed to work for all
|
||||
# future architectures.
|
||||
foreach tlsdesc_arg : ['-mtls-dialect=gnu2', '-mtls-dialect=desc']
|
||||
# -fpic to force dynamic tls, otherwise TLS relaxation defeats check
|
||||
tlsdesc_test = cc.run('int __thread x; int main() { return x; }',
|
||||
args: [tlsdesc_arg, '-fpic'],
|
||||
name: tlsdesc_arg)
|
||||
if tlsdesc_test.returncode() == 0 and (
|
||||
# check for lld 13 bug: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5665
|
||||
host_machine.cpu_family() != 'x86_64' or
|
||||
# get_linker_id misses LDFLAGS=-fuse-ld=lld: https://github.com/mesonbuild/meson/issues/6377
|
||||
#cc.get_linker_id() != 'ld.lld' or
|
||||
cc.links('''int __thread x; int y; int main() { __asm__(
|
||||
"leaq x@TLSDESC(%rip), %rax\n"
|
||||
"movq y@GOTPCREL(%rip), %rdx\n"
|
||||
"call *x@TLSCALL(%rax)\n"); }''', name: 'split TLSDESC')
|
||||
)
|
||||
c_cpp_args += tlsdesc_arg
|
||||
break
|
||||
endif
|
||||
endforeach
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -1013,6 +1019,7 @@ endforeach
|
||||
_attributes = [
|
||||
'const', 'flatten', 'malloc', 'pure', 'unused', 'warn_unused_result',
|
||||
'weak', 'format', 'packed', 'returns_nonnull', 'alias', 'noreturn',
|
||||
'optimize',
|
||||
]
|
||||
foreach a : cc.get_supported_function_attributes(_attributes)
|
||||
pre_args += '-DHAVE_FUNC_ATTRIBUTE_@0@'.format(a.to_upper())
|
||||
@@ -1753,7 +1760,6 @@ if with_clc
|
||||
llvm_optional_modules += ['all-targets', 'windowsdriver', 'frontendhlsl', 'frontenddriver']
|
||||
endif
|
||||
draw_with_llvm = get_option('draw-use-llvm')
|
||||
llvm_with_orcjit = get_option('llvm-orcjit')
|
||||
if draw_with_llvm
|
||||
llvm_modules += 'native'
|
||||
# lto is needded with LLVM>=15, but we don't know what LLVM verrsion we are using yet
|
||||
@@ -1761,6 +1767,12 @@ if draw_with_llvm
|
||||
endif
|
||||
amd_with_llvm = get_option('amd-use-llvm')
|
||||
|
||||
# MCJIT is deprecated in LLVM and will not accept new architecture ports,
|
||||
# so any architecture not in the exhaustive list will have to rely on LLVM
|
||||
# ORCJIT for llvmpipe functionality.
|
||||
llvm_has_mcjit = host_machine.cpu_family() in ['aarch64', 'arm', 'ppc', 'ppc64', 's390x', 'x86', 'x86_64']
|
||||
llvm_with_orcjit = get_option('llvm-orcjit') or not llvm_has_mcjit
|
||||
|
||||
if with_amd_vk or with_gallium_radeonsi or with_clc or llvm_with_orcjit
|
||||
_llvm_version = '>= 15.0.0'
|
||||
elif with_gallium_clover
|
||||
@@ -1797,8 +1809,8 @@ if with_llvm
|
||||
pre_args += '-DMESA_LLVM_VERSION_STRING="@0@"'.format(dep_llvm.version())
|
||||
pre_args += '-DLLVM_IS_SHARED=@0@'.format(_shared_llvm.to_int())
|
||||
|
||||
if with_swrast_vk and not draw_with_llvm
|
||||
error('Lavapipe requires LLVM draw support.')
|
||||
if (with_swrast_vk or with_gallium_llvmpipe) and not draw_with_llvm
|
||||
error('Lavapipe and llvmpipe require LLVM draw support.')
|
||||
endif
|
||||
|
||||
if with_gallium_r600 and not amd_with_llvm
|
||||
|
@@ -65,6 +65,14 @@ option(
|
||||
description : 'Location to install dri drivers. Default: $libdir/dri.'
|
||||
)
|
||||
|
||||
option(
|
||||
'unversion-libgallium',
|
||||
type : 'boolean',
|
||||
value : false,
|
||||
description : 'Do not include mesa version in libgallium DSO filename. ' +
|
||||
'Do not enable unless you know what you are doing. Default: false'
|
||||
)
|
||||
|
||||
option(
|
||||
'dri-search-path',
|
||||
type : 'string',
|
||||
@@ -436,7 +444,10 @@ option (
|
||||
'llvm-orcjit',
|
||||
type : 'boolean',
|
||||
value : false,
|
||||
description: 'Build llvmpipe with LLVM ORCJIT support.'
|
||||
description: 'Build llvmpipe with LLVM ORCJIT support. Has no effect when ' +
|
||||
'building for architectures without LLVM MCJIT support -- ' +
|
||||
'ORCJIT is the only choice on such architectures and will ' +
|
||||
'always be enabled.'
|
||||
)
|
||||
|
||||
option(
|
||||
|
@@ -3015,7 +3015,7 @@ static bool gfx12_compute_hiz_his_info(struct ac_addrlib *addrlib, const struct
|
||||
{
|
||||
assert(surf_in->flags.depth != surf_in->flags.stencil);
|
||||
|
||||
if (surf->flags & RADEON_SURF_NO_HTILE)
|
||||
if (surf->flags & RADEON_SURF_NO_HTILE || (info->gfx_level == GFX12 && info->chip_rev == 0))
|
||||
return true;
|
||||
|
||||
ADDR3_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
|
||||
|
@@ -199,6 +199,21 @@ process_live_temps_per_block(live_ctx& ctx, Block* block)
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx.program->gfx_level >= GFX10 && insn->isVALU() &&
|
||||
insn->definitions.back().regClass() == s2) {
|
||||
/* RDNA2 ISA doc, 6.2.4. Wave64 Destination Restrictions:
|
||||
* The first pass of a wave64 VALU instruction may not overwrite a scalar value used by
|
||||
* the second half.
|
||||
*/
|
||||
bool carry_in = insn->opcode == aco_opcode::v_addc_co_u32 ||
|
||||
insn->opcode == aco_opcode::v_subb_co_u32 ||
|
||||
insn->opcode == aco_opcode::v_subbrev_co_u32;
|
||||
for (unsigned op_idx = 0; op_idx < (carry_in ? 2 : insn->operands.size()); op_idx++) {
|
||||
if (insn->operands[op_idx].isOfType(RegType::sgpr))
|
||||
insn->operands[op_idx].setLateKill(true);
|
||||
}
|
||||
}
|
||||
|
||||
/* we need to do this in a separate loop because the next one can
|
||||
* setKill() for several operands at once and we don't want to
|
||||
* overwrite that in a later iteration */
|
||||
|
@@ -2398,6 +2398,24 @@ ast_function_expression::hir(exec_list *instructions,
|
||||
|
||||
ir_rvalue *result = convert_component(ir, desired_type);
|
||||
|
||||
/* If the bindless packing constructors are used directly as function
|
||||
* params to bultin functions the compiler doesn't know what to do
|
||||
* with them. To avoid this make sure we always copy the results from
|
||||
* the pack to a temp first.
|
||||
*/
|
||||
if (result->as_expression() &&
|
||||
result->as_expression()->operation == ir_unop_pack_sampler_2x32) {
|
||||
ir_variable *var =
|
||||
new(ctx) ir_variable(desired_type, "sampler_ctor",
|
||||
ir_var_temporary);
|
||||
instructions->push_tail(var);
|
||||
|
||||
ir_dereference *lhs = new(ctx) ir_dereference_variable(var);
|
||||
ir_instruction *assignment = new(ctx) ir_assignment(lhs, result);
|
||||
instructions->push_tail(assignment);
|
||||
result = lhs;
|
||||
}
|
||||
|
||||
/* Attempt to convert the parameter to a constant valued expression.
|
||||
* After doing so, track whether or not all the parameters to the
|
||||
* constructor are trivially constant valued expressions.
|
||||
|
@@ -683,24 +683,13 @@ lower_ufind_msb64(nir_builder *b, nir_def *x)
|
||||
nir_def *lo_count = nir_ufind_msb(b, x_lo);
|
||||
nir_def *hi_count = nir_ufind_msb(b, x_hi);
|
||||
|
||||
if (b->shader->options->lower_uadd_sat) {
|
||||
nir_def *valid_hi_bits = nir_ine_imm(b, x_hi, 0);
|
||||
nir_def *hi_res = nir_iadd_imm(b, hi_count, 32);
|
||||
return nir_bcsel(b, valid_hi_bits, hi_res, lo_count);
|
||||
} else {
|
||||
/* If hi_count was -1, it will still be -1 after this uadd_sat. As a
|
||||
* result, hi_count is either -1 or the correct return value for 64-bit
|
||||
* ufind_msb.
|
||||
*/
|
||||
nir_def *hi_res = nir_uadd_sat(b, nir_imm_intN_t(b, 32, 32), hi_count);
|
||||
|
||||
/* hi_res is either -1 or a value in the range [63, 32]. lo_count is
|
||||
* either -1 or a value in the range [31, 0]. The imax will pick
|
||||
* lo_count only when hi_res is -1. In those cases, lo_count is
|
||||
* guaranteed to be the correct answer.
|
||||
*/
|
||||
return nir_imax(b, hi_res, lo_count);
|
||||
}
|
||||
/* hi_count is either -1 or a value in the range [31, 0]. lo_count is
|
||||
* the same. The imax will pick lo_count only when hi_count is -1. In those
|
||||
* cases, lo_count is guaranteed to be the correct answer.
|
||||
* The ior 32 is always safe here as with -1 the value won't change,
|
||||
* otherwise it adds 32, which is what we want anyway.
|
||||
*/
|
||||
return nir_imax(b, lo_count, nir_ior_imm(b, hi_count, 32));
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
@@ -713,11 +702,9 @@ lower_find_lsb64(nir_builder *b, nir_def *x)
|
||||
|
||||
/* Use umin so that -1 (no bits found) becomes larger (0xFFFFFFFF)
|
||||
* than any actual bit position, so we return a found bit instead.
|
||||
* This is similar to the ufind_msb lowering. If you need this lowering
|
||||
* without uadd_sat, add code like in lower_ufind_msb64.
|
||||
* This is similar to the ufind_msb lowering.
|
||||
*/
|
||||
assert(!b->shader->options->lower_uadd_sat);
|
||||
return nir_umin(b, lo_lsb, nir_uadd_sat(b, hi_lsb, nir_imm_int(b, 32)));
|
||||
return nir_umin(b, lo_lsb, nir_ior_imm(b, hi_lsb, 32));
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
|
@@ -2976,8 +2976,10 @@ dri2_initialize_wayland_swrast(_EGLDisplay *disp)
|
||||
dri2_dpy->formats.num_formats))
|
||||
goto cleanup;
|
||||
|
||||
if (disp->Options.Zink)
|
||||
dri2_initialize_wayland_drm_extensions(dri2_dpy);
|
||||
if (disp->Options.Zink) {
|
||||
if (!dri2_initialize_wayland_drm_extensions(dri2_dpy) && !disp->Options.ForceSoftware)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
dri2_dpy->driver_name = strdup(disp->Options.Zink ? "zink" : "swrast");
|
||||
if (!dri2_load_driver_swrast(disp))
|
||||
|
@@ -1778,7 +1778,7 @@ dri2_initialize_x11_swrast(_EGLDisplay *disp)
|
||||
if (disp->Options.Zink &&
|
||||
!debug_get_bool_option("LIBGL_DRI3_DISABLE", false) &&
|
||||
!debug_get_bool_option("LIBGL_KOPPER_DRI2", false))
|
||||
dri3_x11_connect(dri2_dpy, disp->Options.ForceSoftware);
|
||||
dri3_x11_connect(dri2_dpy, disp->Options.Zink, disp->Options.ForceSoftware);
|
||||
#endif
|
||||
if (!dri2_load_driver_swrast(disp))
|
||||
goto cleanup;
|
||||
@@ -1863,7 +1863,7 @@ dri2_initialize_x11_dri3(_EGLDisplay *disp)
|
||||
if (!dri2_get_xcb_connection(disp, dri2_dpy))
|
||||
goto cleanup;
|
||||
|
||||
status = dri3_x11_connect(dri2_dpy, disp->Options.ForceSoftware);
|
||||
status = dri3_x11_connect(dri2_dpy, disp->Options.Zink, disp->Options.ForceSoftware);
|
||||
if (status != DRI2_EGL_DRIVER_LOADED)
|
||||
goto cleanup;
|
||||
|
||||
|
@@ -527,7 +527,7 @@ struct dri2_egl_display_vtbl dri3_x11_display_vtbl = {
|
||||
};
|
||||
|
||||
enum dri2_egl_driver_fail
|
||||
dri3_x11_connect(struct dri2_egl_display *dri2_dpy, bool swrast)
|
||||
dri3_x11_connect(struct dri2_egl_display *dri2_dpy, bool zink, bool swrast)
|
||||
{
|
||||
dri2_dpy->fd_render_gpu =
|
||||
loader_dri3_open(dri2_dpy->conn, dri2_dpy->screen->root, 0);
|
||||
@@ -549,15 +549,16 @@ dri3_x11_connect(struct dri2_egl_display *dri2_dpy, bool swrast)
|
||||
if (!dri2_dpy->driver_name)
|
||||
dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd_render_gpu);
|
||||
|
||||
if (!strcmp(dri2_dpy->driver_name, "zink") &&
|
||||
!debug_get_bool_option("LIBGL_KOPPER_DISABLE", false)) {
|
||||
if (!zink && !strcmp(dri2_dpy->driver_name, "zink")) {
|
||||
close(dri2_dpy->fd_render_gpu);
|
||||
dri2_dpy->fd_render_gpu = -1;
|
||||
return DRI2_EGL_DRIVER_PREFER_ZINK;
|
||||
}
|
||||
|
||||
if (!dri2_dpy->driver_name) {
|
||||
_eglLog(_EGL_WARNING, "DRI3: No driver found");
|
||||
close(dri2_dpy->fd_render_gpu);
|
||||
dri2_dpy->fd_render_gpu = -1;
|
||||
return DRI2_EGL_DRIVER_FAILED;
|
||||
}
|
||||
|
||||
|
@@ -36,6 +36,6 @@ extern const __DRIimageLoaderExtension dri3_image_loader_extension;
|
||||
extern struct dri2_egl_display_vtbl dri3_x11_display_vtbl;
|
||||
|
||||
enum dri2_egl_driver_fail
|
||||
dri3_x11_connect(struct dri2_egl_display *dri2_dpy, bool swrast);
|
||||
dri3_x11_connect(struct dri2_egl_display *dri2_dpy, bool zink, bool swrast);
|
||||
|
||||
#endif
|
||||
|
@@ -24,9 +24,12 @@ tu_wsi_proc_addr(VkPhysicalDevice physicalDevice, const char *pName)
|
||||
static bool
|
||||
tu_wsi_can_present_on_device(VkPhysicalDevice physicalDevice, int fd)
|
||||
{
|
||||
#ifdef HAVE_LIBDRM
|
||||
VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
|
||||
|
||||
return wsi_common_drm_devices_equal(fd, pdevice->local_fd);
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
VkResult
|
||||
|
@@ -10,6 +10,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <mutex>
|
||||
#include <cstdlib>
|
||||
#include "lp_bld.h"
|
||||
#include "lp_bld_debug.h"
|
||||
#include "lp_bld_init.h"
|
||||
@@ -57,7 +58,7 @@
|
||||
/* conflict with ObjectLinkingLayer.h */
|
||||
#include "util/u_memory.h"
|
||||
|
||||
#if DETECT_ARCH_RISCV64 == 1 || DETECT_ARCH_RISCV32 == 1 || (defined(_WIN32) && LLVM_VERSION_MAJOR >= 15)
|
||||
#if DETECT_ARCH_RISCV64 == 1 || DETECT_ARCH_RISCV32 == 1 || DETECT_ARCH_LOONGARCH64 == 1 || (defined(_WIN32) && LLVM_VERSION_MAJOR >= 15)
|
||||
/* use ObjectLinkingLayer (JITLINK backend) */
|
||||
#define USE_JITLINK
|
||||
#endif
|
||||
@@ -102,6 +103,8 @@ public:
|
||||
|
||||
class LPJit;
|
||||
|
||||
void lpjit_exit();
|
||||
|
||||
class LLVMEnsureMultithreaded {
|
||||
public:
|
||||
LLVMEnsureMultithreaded()
|
||||
@@ -270,15 +273,19 @@ private:
|
||||
LPJit(const LPJit&) = delete;
|
||||
LPJit& operator=(const LPJit&) = delete;
|
||||
|
||||
friend void lpjit_exit();
|
||||
|
||||
static void init_native_targets();
|
||||
llvm::orc::JITTargetMachineBuilder create_jtdb();
|
||||
|
||||
static void init_lpjit() {
|
||||
jit = new LPJit;
|
||||
std::atexit(lpjit_exit);
|
||||
}
|
||||
static LPJit* jit;
|
||||
|
||||
std::unique_ptr<llvm::orc::LLJIT> lljit;
|
||||
std::unique_ptr<llvm::TargetMachine> tm_unique;
|
||||
/* avoid name conflict */
|
||||
unsigned jit_dylib_count;
|
||||
|
||||
@@ -292,6 +299,11 @@ private:
|
||||
|
||||
LPJit* LPJit::jit = NULL;
|
||||
|
||||
void lpjit_exit()
|
||||
{
|
||||
delete LPJit::jit;
|
||||
}
|
||||
|
||||
LLVMErrorRef module_transform(void *Ctx, LLVMModuleRef mod) {
|
||||
struct lp_passmgr *mgr;
|
||||
|
||||
@@ -318,7 +330,8 @@ LPJit::LPJit() :jit_dylib_count(0) {
|
||||
|
||||
init_native_targets();
|
||||
JITTargetMachineBuilder JTMB = create_jtdb();
|
||||
tm = wrap(ExitOnErr(JTMB.createTargetMachine()).release());
|
||||
tm_unique = ExitOnErr(JTMB.createTargetMachine());
|
||||
tm = wrap(tm_unique.get());
|
||||
|
||||
/* Create an LLJIT instance with an ObjectLinkingLayer (JITLINK)
|
||||
* or RuntimeDyld as the base layer.
|
||||
@@ -410,6 +423,14 @@ llvm::orc::JITTargetMachineBuilder LPJit::create_jtdb() {
|
||||
#else
|
||||
#error "GALLIVM: unknown target riscv float abi"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if DETECT_ARCH_LOONGARCH64 == 1
|
||||
#if defined(__loongarch_lp64) && defined(__loongarch_double_float)
|
||||
options.MCOptions.ABIName = "lp64d";
|
||||
#else
|
||||
#error "GALLIVM: unknown target loongarch float abi"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
JTMB.setOptions(options);
|
||||
|
@@ -414,6 +414,24 @@ lp_build_fill_mattrs(std::vector<std::string> &MAttrs)
|
||||
*/
|
||||
MAttrs = {"+m","+c","+a","+d","+f"};
|
||||
#endif
|
||||
|
||||
#if DETECT_ARCH_LOONGARCH64 == 1
|
||||
/*
|
||||
* TODO: Implement util_get_cpu_caps()
|
||||
*
|
||||
* No FPU-less LoongArch64 systems are ever shipped yet, and LP64D is
|
||||
* the default ABI, so FPU is enabled here.
|
||||
*
|
||||
* The Software development convention defaults to have "128-bit
|
||||
* vector", so LSX is enabled here, see
|
||||
* https://github.com/loongson/la-softdev-convention/releases/download/v0.1/la-softdev-convention.pdf
|
||||
*/
|
||||
MAttrs = {"+f","+d"};
|
||||
#if LLVM_VERSION_MAJOR == 17
|
||||
/* LLVM 17's LSX support is incomplete, so explicitly mask it */
|
||||
MAttrs.push_back("-lsx");
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -3848,7 +3848,7 @@ atomic_emit(
|
||||
LLVMValueRef atom_res = lp_build_alloca(gallivm,
|
||||
uint_bld->vec_type, "");
|
||||
|
||||
LLVMValueRef ssbo_limit;
|
||||
LLVMValueRef ssbo_limit = NULL;
|
||||
if (!is_shared) {
|
||||
ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
|
||||
ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
|
||||
|
@@ -1090,6 +1090,7 @@ d3d12_video_encoder_convert_profile_to_d3d12_enc_profile_h264(enum pipe_video_pr
|
||||
{
|
||||
switch (profile) {
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
|
||||
{
|
||||
return D3D12_VIDEO_ENCODER_PROFILE_H264_MAIN;
|
||||
|
@@ -873,6 +873,7 @@ d3d12_has_video_encode_support(struct pipe_screen *pscreen,
|
||||
switch (profile) {
|
||||
#if VIDEO_CODEC_H264ENC
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH10:
|
||||
|
@@ -47,6 +47,7 @@
|
||||
#include "pan_cmdstream.h"
|
||||
#include "pan_context.h"
|
||||
#include "pan_csf.h"
|
||||
#include "pan_format.h"
|
||||
#include "pan_indirect_dispatch.h"
|
||||
#include "pan_jm.h"
|
||||
#include "pan_job.h"
|
||||
@@ -195,7 +196,12 @@ panfrost_create_sampler_state(struct pipe_context *pctx,
|
||||
* swizzle derived from the format, to allow more formats than the
|
||||
* hardware otherwise supports. When packing border colours, we need to
|
||||
* undo this bijection, by swizzling with its inverse.
|
||||
* On v10+, watch out for depth+stencil formats, because those have a
|
||||
* swizzle that doesn't really apply to the border color
|
||||
*/
|
||||
#if PAN_ARCH >= 10
|
||||
if (!util_format_is_depth_and_stencil(cso->border_color_format)) {
|
||||
#endif
|
||||
unsigned mali_format =
|
||||
GENX(panfrost_format_from_pipe_format)(cso->border_color_format)->hw;
|
||||
enum mali_rgb_component_order order = mali_format & BITFIELD_MASK(12);
|
||||
@@ -207,6 +213,10 @@ panfrost_create_sampler_state(struct pipe_context *pctx,
|
||||
util_format_apply_color_swizzle(&so->base.border_color, &cso->border_color,
|
||||
inverted_swizzle,
|
||||
false /* is_integer (irrelevant) */);
|
||||
#if PAN_ARCH >= 10
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
bool using_nearest = cso->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST;
|
||||
@@ -378,6 +388,17 @@ panfrost_emit_blend(struct panfrost_batch *batch, void *rts,
|
||||
panfrost_dithered_format_from_pipe_format)(format, dithered);
|
||||
cfg.fixed_function.rt = i;
|
||||
|
||||
#if PAN_ARCH >= 7
|
||||
if (cfg.mode == MALI_BLEND_MODE_FIXED_FUNCTION &&
|
||||
(cfg.fixed_function.conversion.memory_format & 0xff) ==
|
||||
MALI_RGB_COMPONENT_ORDER_RGB1) {
|
||||
/* fixed function does not like RGB1 as the component order */
|
||||
/* force this field to be the default 0 (RGBA) */
|
||||
cfg.fixed_function.conversion.memory_format &= ~0xff;
|
||||
cfg.fixed_function.conversion.memory_format |=
|
||||
MALI_RGB_COMPONENT_ORDER_RGBA;
|
||||
}
|
||||
#endif
|
||||
#if PAN_ARCH <= 7
|
||||
if (!info.opaque) {
|
||||
cfg.fixed_function.alpha_zero_nop = info.alpha_zero_nop;
|
||||
|
@@ -208,7 +208,7 @@ panfrost_get_param(struct pipe_screen *screen, enum pipe_cap param)
|
||||
* handles this but we need to fix up the border colour.
|
||||
*/
|
||||
case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
|
||||
if (dev->arch == 7)
|
||||
if (dev->arch == 7 || dev->arch >= 10)
|
||||
return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_FREEDRENO;
|
||||
else
|
||||
return 0;
|
||||
|
@@ -627,7 +627,10 @@ static int peephole_mad_presub_bias(
|
||||
if (rc_inline_to_float(src1_reg.Index) != 2.0f)
|
||||
return 0;
|
||||
} else {
|
||||
struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index];
|
||||
if (src1_reg.File != RC_FILE_CONSTANT)
|
||||
return 0;
|
||||
|
||||
struct rc_constant *constant = &c->Program.Constants.Constants[src1_reg.Index];
|
||||
if (constant->Type != RC_CONSTANT_IMMEDIATE)
|
||||
return 0;
|
||||
for (i = 0; i < 4; i++) {
|
||||
|
@@ -2924,8 +2924,14 @@ begin_rendering(struct zink_context *ctx, bool check_msaa_expand)
|
||||
if (has_swapchain) {
|
||||
ASSERTED struct zink_resource *res = zink_resource(ctx->fb_state.cbufs[0]->texture);
|
||||
zink_render_fixup_swapchain(ctx);
|
||||
if (res->use_damage)
|
||||
if (res->use_damage) {
|
||||
ctx->dynamic_fb.info.renderArea = res->damage;
|
||||
} else {
|
||||
ctx->dynamic_fb.info.renderArea.offset.x = 0;
|
||||
ctx->dynamic_fb.info.renderArea.offset.y = 0;
|
||||
ctx->dynamic_fb.info.renderArea.extent.width = ctx->fb_state.width;
|
||||
ctx->dynamic_fb.info.renderArea.extent.height = ctx->fb_state.height;
|
||||
}
|
||||
/* clamp for late swapchain resize */
|
||||
if (res->base.b.width0 < ctx->dynamic_fb.info.renderArea.extent.width)
|
||||
ctx->dynamic_fb.info.renderArea.extent.width = res->base.b.width0;
|
||||
|
@@ -416,7 +416,7 @@ init_program_db(struct zink_screen *screen, struct zink_program *pg, enum zink_d
|
||||
{
|
||||
VkDeviceSize val;
|
||||
VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, dsl, &val);
|
||||
pg->dd.db_size[type] = val;
|
||||
pg->dd.db_size[type] = align64(val, screen->info.db_props.descriptorBufferOffsetAlignment);
|
||||
pg->dd.db_offset[type] = rzalloc_array(pg, uint32_t, num_bindings);
|
||||
for (unsigned i = 0; i < num_bindings; i++) {
|
||||
VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, dsl, bindings[i].binding, &val);
|
||||
@@ -740,7 +740,7 @@ zink_descriptor_shader_init(struct zink_screen *screen, struct zink_shader *shad
|
||||
shader->precompile.num_bindings = num_bindings;
|
||||
VkDeviceSize val;
|
||||
VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, shader->precompile.dsl, &val);
|
||||
shader->precompile.db_size = val;
|
||||
shader->precompile.db_size = align64(val, screen->info.db_props.descriptorBufferOffsetAlignment);
|
||||
shader->precompile.db_offset = rzalloc_array(shader, uint32_t, num_bindings);
|
||||
for (unsigned i = 0; i < num_bindings; i++) {
|
||||
VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, shader->precompile.dsl, bindings[i].binding, &val);
|
||||
@@ -1146,6 +1146,7 @@ update_separable(struct zink_context *ctx, struct zink_program *pg)
|
||||
}
|
||||
bs->dd.cur_db_offset[use_buffer] = bs->dd.db_offset;
|
||||
bs->dd.db_offset += zs->precompile.db_size;
|
||||
|
||||
/* TODO: maybe compile multiple variants for different set counts for compact mode? */
|
||||
int set_idx = screen->info.have_EXT_shader_object ? j : j == MESA_SHADER_FRAGMENT;
|
||||
VKCTX(CmdSetDescriptorBufferOffsetsEXT)(bs->cmdbuf, VK_PIPELINE_BIND_POINT_GRAPHICS, pg->layout, set_idx, 1, &use_buffer, &offset);
|
||||
@@ -1633,7 +1634,7 @@ zink_descriptors_init(struct zink_context *ctx)
|
||||
VkDeviceSize val;
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, ctx->dd.push_dsl[i]->layout, &val);
|
||||
ctx->dd.db_size[i] = val;
|
||||
ctx->dd.db_size[i] = align64(val, screen->info.db_props.descriptorBufferOffsetAlignment);
|
||||
}
|
||||
for (unsigned i = 0; i < ZINK_GFX_SHADER_COUNT; i++) {
|
||||
VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, i, &val);
|
||||
@@ -1709,7 +1710,7 @@ zink_descriptor_util_init_fbfetch(struct zink_context *ctx)
|
||||
if (zink_descriptor_mode == ZINK_DESCRIPTOR_MODE_DB) {
|
||||
VkDeviceSize val;
|
||||
VKSCR(GetDescriptorSetLayoutSizeEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, &val);
|
||||
ctx->dd.db_size[0] = val;
|
||||
ctx->dd.db_size[0] = align64(val, screen->info.db_props.descriptorBufferOffsetAlignment);
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(ctx->dd.db_offset); i++) {
|
||||
VKSCR(GetDescriptorSetLayoutBindingOffsetEXT)(screen->dev, ctx->dd.push_dsl[0]->layout, i, &val);
|
||||
ctx->dd.db_offset[i] = val;
|
||||
|
@@ -887,6 +887,8 @@ zink_kopper_present_queue(struct zink_screen *screen, struct zink_resource *res,
|
||||
kopper_present(cpi, screen, -1);
|
||||
}
|
||||
res->obj->indefinite_acquire = false;
|
||||
res->use_damage = false;
|
||||
memset(&res->damage, 0, sizeof(res->damage));
|
||||
cdt->swapchain->images[res->obj->dt_idx].acquired = NULL;
|
||||
res->obj->dt_idx = UINT32_MAX;
|
||||
}
|
||||
|
@@ -1542,10 +1542,25 @@ zink_set_damage_region(struct pipe_screen *pscreen, struct pipe_resource *pres,
|
||||
|
||||
for (unsigned i = 0; i < nrects; i++) {
|
||||
int y = pres->height0 - rects[i].y - rects[i].height;
|
||||
res->damage.extent.width = MAX2(res->damage.extent.width, rects[i].x + rects[i].width);
|
||||
res->damage.extent.height = MAX2(res->damage.extent.height, y + rects[i].height);
|
||||
res->damage.offset.x = MIN2(res->damage.offset.x, rects[i].x);
|
||||
res->damage.offset.y = MIN2(res->damage.offset.y, y);
|
||||
/* convert back to coord-based rects to use coordinate calcs */
|
||||
struct u_rect currect = {
|
||||
.x0 = res->damage.offset.x,
|
||||
.y0 = res->damage.offset.y,
|
||||
.x1 = res->damage.offset.x + res->damage.extent.width,
|
||||
.y1 = res->damage.offset.y + res->damage.extent.height,
|
||||
};
|
||||
struct u_rect newrect = {
|
||||
.x0 = rects[i].x,
|
||||
.y0 = y,
|
||||
.x1 = rects[i].x + rects[i].width,
|
||||
.y1 = y + rects[i].height,
|
||||
};
|
||||
struct u_rect u;
|
||||
u_rect_union(&u, &currect, &newrect);
|
||||
res->damage.extent.width = u.y1 - u.y0;
|
||||
res->damage.extent.height = u.x1 - u.x0;
|
||||
res->damage.offset.x = u.x0;
|
||||
res->damage.offset.y = u.y0;
|
||||
}
|
||||
|
||||
res->use_damage = nrects > 0;
|
||||
|
@@ -2346,7 +2346,7 @@ dri_swrast_kms_init_screen(struct dri_screen *screen, bool driver_name_is_inferr
|
||||
#endif
|
||||
|
||||
if (!pscreen)
|
||||
goto fail;
|
||||
return NULL;
|
||||
|
||||
dri_init_options(screen);
|
||||
dri2_init_screen_extensions(screen, pscreen, true);
|
||||
@@ -2364,7 +2364,7 @@ dri_swrast_kms_init_screen(struct dri_screen *screen, bool driver_name_is_inferr
|
||||
return configs;
|
||||
|
||||
fail:
|
||||
dri_release_screen(screen);
|
||||
pipe_loader_release(&screen->dev, 1);
|
||||
|
||||
#endif // HAVE_SWRAST
|
||||
return NULL;
|
||||
|
@@ -35,7 +35,7 @@ pub static DISPATCH: cl_icd_dispatch = cl_icd_dispatch {
|
||||
clRetainCommandQueue: Some(clRetainCommandQueue),
|
||||
clReleaseCommandQueue: Some(clReleaseCommandQueue),
|
||||
clGetCommandQueueInfo: Some(clGetCommandQueueInfo),
|
||||
clSetCommandQueueProperty: None,
|
||||
clSetCommandQueueProperty: Some(clSetCommandQueueProperty),
|
||||
clCreateBuffer: Some(clCreateBuffer),
|
||||
clCreateImage2D: Some(clCreateImage2D),
|
||||
clCreateImage3D: Some(clCreateImage3D),
|
||||
|
@@ -367,7 +367,14 @@ fn set_kernel_arg(
|
||||
return Err(CL_INVALID_ARG_SIZE);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
|
||||
KernelArgType::Sampler => {
|
||||
if arg_size != std::mem::size_of::<cl_sampler>() {
|
||||
return Err(CL_INVALID_ARG_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
KernelArgType::Constant => {
|
||||
if arg.size != arg_size {
|
||||
return Err(CL_INVALID_ARG_SIZE);
|
||||
}
|
||||
|
@@ -2192,13 +2192,20 @@ fn enqueue_unmap_mem_object(
|
||||
|
||||
// SAFETY: it's required that applications do not cause data races
|
||||
let mapped_ptr = unsafe { MutMemoryPtr::from_ptr(mapped_ptr) };
|
||||
let needs_sync = m.unmap(mapped_ptr)?;
|
||||
create_and_queue(
|
||||
q,
|
||||
CL_COMMAND_UNMAP_MEM_OBJECT,
|
||||
evs,
|
||||
event,
|
||||
false,
|
||||
Box::new(move |q, ctx| m.unmap(q, ctx, mapped_ptr)),
|
||||
Box::new(move |q, ctx| {
|
||||
if needs_sync {
|
||||
m.sync_unmap(q, ctx, mapped_ptr)
|
||||
} else {
|
||||
Ok(())
|
||||
}
|
||||
}),
|
||||
)
|
||||
}
|
||||
|
||||
|
@@ -41,6 +41,22 @@ impl CLInfo<cl_command_queue_info> for cl_command_queue {
|
||||
}
|
||||
}
|
||||
|
||||
#[cl_entrypoint(clSetCommandQueueProperty)]
|
||||
fn set_command_queue_property(
|
||||
_command_queue: cl_command_queue,
|
||||
_properties: cl_command_queue_properties,
|
||||
_enable: cl_bool,
|
||||
_old_properties: *mut cl_command_queue_properties,
|
||||
) -> CLResult<()> {
|
||||
// clSetCommandQueueProperty may unconditionally return an error if no devices in the context
|
||||
// associated with command_queue support modifying the properties of a command-queue. Support
|
||||
// for modifying the properties of a command-queue is required only for OpenCL 1.0 devices.
|
||||
//
|
||||
// CL_INVALID_OPERATION if no devices in the context associated with command_queue support
|
||||
// modifying the properties of a command-queue.
|
||||
Err(CL_INVALID_OPERATION)
|
||||
}
|
||||
|
||||
fn valid_command_queue_properties(properties: cl_command_queue_properties) -> bool {
|
||||
let valid_flags = cl_bitfield::from(
|
||||
CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
|
||||
|
@@ -39,6 +39,8 @@ struct Mapping<T> {
|
||||
layout: Layout,
|
||||
writes: bool,
|
||||
ptr: Option<MutMemoryPtr>,
|
||||
/// reference count from the API perspective. Once it reaches 0, we need to write back the
|
||||
/// mappings content to the GPU resource.
|
||||
count: u32,
|
||||
inner: T,
|
||||
}
|
||||
@@ -152,10 +154,17 @@ impl Mem {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
match self {
|
||||
Self::Buffer(b) => b.unmap(q, ctx, ptr),
|
||||
Self::Image(i) => i.unmap(q, ctx, ptr),
|
||||
Self::Buffer(b) => b.sync_unmap(q, ctx, ptr),
|
||||
Self::Image(i) => i.sync_unmap(q, ctx, ptr),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
|
||||
match self {
|
||||
Self::Buffer(b) => b.unmap(ptr),
|
||||
Self::Image(i) => i.unmap(ptr),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -712,7 +721,9 @@ impl MemBase {
|
||||
|
||||
fn is_pure_user_memory(&self, d: &Device) -> CLResult<bool> {
|
||||
let r = self.get_res_of_dev(d)?;
|
||||
Ok(r.is_user())
|
||||
// 1Dbuffer objects are weird. The parent memory object can be a host_ptr thing, but we are
|
||||
// not allowed to actually return a pointer based on the host_ptr when mapping.
|
||||
Ok(r.is_user() && !self.host_ptr().is_null())
|
||||
}
|
||||
|
||||
fn map<T>(
|
||||
@@ -912,7 +923,9 @@ impl Buffer {
|
||||
}
|
||||
|
||||
fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
|
||||
self.maps.lock().unwrap().contains_key(ptr as usize)
|
||||
let mut maps = self.maps.lock().unwrap();
|
||||
let entry = maps.entry(ptr as usize);
|
||||
matches!(entry, Entry::Occupied(entry) if entry.get().count > 0)
|
||||
}
|
||||
|
||||
pub fn map(&self, size: usize, offset: usize, writes: bool) -> CLResult<MutMemoryPtr> {
|
||||
@@ -993,6 +1006,31 @@ impl Buffer {
|
||||
self.read(q, ctx, mapping.offset, ptr, mapping.size())
|
||||
}
|
||||
|
||||
pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
// no need to update
|
||||
if self.is_pure_user_memory(q.device)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
|
||||
Entry::Vacant(_) => Err(CL_INVALID_VALUE),
|
||||
Entry::Occupied(entry) => {
|
||||
let mapping = entry.get();
|
||||
|
||||
if mapping.writes {
|
||||
self.write(q, ctx, mapping.offset, ptr.into(), mapping.size())?;
|
||||
}
|
||||
|
||||
// only remove if the mapping wasn't reused in the meantime
|
||||
if mapping.count == 0 {
|
||||
entry.remove();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn tx<'a>(
|
||||
&self,
|
||||
q: &Queue,
|
||||
@@ -1014,22 +1052,16 @@ impl Buffer {
|
||||
}
|
||||
|
||||
// TODO: only sync on unmap when the memory is not mapped for writing
|
||||
pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
let mapping = match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
|
||||
Entry::Vacant(_) => return Err(CL_INVALID_VALUE),
|
||||
pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
|
||||
match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
|
||||
Entry::Vacant(_) => Err(CL_INVALID_VALUE),
|
||||
Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().count -= 1;
|
||||
(entry.get().count == 0).then(|| entry.remove())
|
||||
let entry = entry.get_mut();
|
||||
debug_assert!(entry.count > 0);
|
||||
entry.count -= 1;
|
||||
Ok(entry.count == 0)
|
||||
}
|
||||
};
|
||||
|
||||
if let Some(mapping) = mapping {
|
||||
if mapping.writes && !self.is_pure_user_memory(q.device)? {
|
||||
self.write(q, ctx, mapping.offset, ptr.into(), mapping.size())?;
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn write(
|
||||
@@ -1289,7 +1321,9 @@ impl Image {
|
||||
}
|
||||
|
||||
fn is_mapped_ptr(&self, ptr: *mut c_void) -> bool {
|
||||
self.maps.lock().unwrap().contains_key(ptr as usize)
|
||||
let mut maps = self.maps.lock().unwrap();
|
||||
let entry = maps.entry(ptr as usize);
|
||||
matches!(entry, Entry::Occupied(entry) if entry.get().count > 0)
|
||||
}
|
||||
|
||||
pub fn is_parent_buffer(&self) -> bool {
|
||||
@@ -1309,8 +1343,33 @@ impl Image {
|
||||
*row_pitch = self.image_desc.row_pitch()? as usize;
|
||||
*slice_pitch = self.image_desc.slice_pitch();
|
||||
|
||||
let (offset, size) =
|
||||
CLVec::calc_offset_size(origin, region, [pixel_size, *row_pitch, *slice_pitch]);
|
||||
let offset = CLVec::calc_offset(origin, [pixel_size, *row_pitch, *slice_pitch]);
|
||||
|
||||
// From the CL Spec:
|
||||
//
|
||||
// The pointer returned maps a 1D, 2D or 3D region starting at origin and is at least
|
||||
// region[0] pixels in size for a 1D image, 1D image buffer or 1D image array,
|
||||
// (image_row_pitch × region[1]) pixels in size for a 2D image or 2D image array, and
|
||||
// (image_slice_pitch × region[2]) pixels in size for a 3D image. The result of a memory
|
||||
// access outside this region is undefined.
|
||||
//
|
||||
// It's not guaranteed that the row_pitch is taken into account for 1D images, but the CL
|
||||
// CTS relies on this behavior.
|
||||
//
|
||||
// Also note, that the spec wording is wrong in regards to arrays, which need to take the
|
||||
// image_slice_pitch into account.
|
||||
let size = if self.image_desc.is_array() || self.image_desc.dims() == 3 {
|
||||
debug_assert_ne!(*slice_pitch, 0);
|
||||
// the slice count is in region[1] for 1D array images
|
||||
if self.mem_type == CL_MEM_OBJECT_IMAGE1D_ARRAY {
|
||||
region[1] * *slice_pitch
|
||||
} else {
|
||||
region[2] * *slice_pitch
|
||||
}
|
||||
} else {
|
||||
debug_assert_ne!(*row_pitch, 0);
|
||||
region[1] * *row_pitch
|
||||
};
|
||||
|
||||
let layout;
|
||||
unsafe {
|
||||
@@ -1418,6 +1477,41 @@ impl Image {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn sync_unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
// no need to update
|
||||
if self.is_pure_user_memory(q.device)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
|
||||
Entry::Vacant(_) => Err(CL_INVALID_VALUE),
|
||||
Entry::Occupied(entry) => {
|
||||
let mapping = entry.get();
|
||||
let row_pitch = self.image_desc.row_pitch()? as usize;
|
||||
let slice_pitch = self.image_desc.slice_pitch();
|
||||
|
||||
if mapping.writes {
|
||||
self.write(
|
||||
ptr.into(),
|
||||
q,
|
||||
ctx,
|
||||
&mapping.region,
|
||||
row_pitch,
|
||||
slice_pitch,
|
||||
&mapping.origin,
|
||||
)?;
|
||||
}
|
||||
|
||||
// only remove if the mapping wasn't reused in the meantime
|
||||
if mapping.count == 0 {
|
||||
entry.remove();
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn tx_image<'a>(
|
||||
&self,
|
||||
q: &Queue,
|
||||
@@ -1430,33 +1524,16 @@ impl Image {
|
||||
}
|
||||
|
||||
// TODO: only sync on unmap when the memory is not mapped for writing
|
||||
pub fn unmap(&self, q: &Queue, ctx: &PipeContext, ptr: MutMemoryPtr) -> CLResult<()> {
|
||||
let mapping = match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
|
||||
Entry::Vacant(_) => return Err(CL_INVALID_VALUE),
|
||||
pub fn unmap(&self, ptr: MutMemoryPtr) -> CLResult<bool> {
|
||||
match self.maps.lock().unwrap().entry(ptr.as_ptr() as usize) {
|
||||
Entry::Vacant(_) => Err(CL_INVALID_VALUE),
|
||||
Entry::Occupied(mut entry) => {
|
||||
entry.get_mut().count -= 1;
|
||||
(entry.get().count == 0).then(|| entry.remove())
|
||||
}
|
||||
};
|
||||
|
||||
let row_pitch = self.image_desc.row_pitch()? as usize;
|
||||
let slice_pitch = self.image_desc.slice_pitch();
|
||||
|
||||
if let Some(mapping) = mapping {
|
||||
if mapping.writes && !self.is_pure_user_memory(q.device)? {
|
||||
self.write(
|
||||
ptr.into(),
|
||||
q,
|
||||
ctx,
|
||||
&mapping.region,
|
||||
row_pitch,
|
||||
slice_pitch,
|
||||
&mapping.origin,
|
||||
)?;
|
||||
let entry = entry.get_mut();
|
||||
debug_assert!(entry.count > 0);
|
||||
entry.count -= 1;
|
||||
Ok(entry.count == 0)
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn write(
|
||||
|
@@ -22,8 +22,14 @@ if with_ld_dynamic_list
|
||||
gallium_dri_link_depends += files('../dri.dyn')
|
||||
endif
|
||||
|
||||
if get_option('unversion-libgallium') or with_platform_android
|
||||
libgallium_name = 'gallium_dri'
|
||||
else
|
||||
libgallium_name = 'gallium-@0@'.format(meson.project_version())
|
||||
endif
|
||||
|
||||
libgallium_dri = shared_library(
|
||||
'gallium-@0@'.format(meson.project_version()),
|
||||
libgallium_name,
|
||||
files('dri_target.c'),
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux, inc_util, inc_gallium_drivers,
|
||||
|
@@ -593,7 +593,10 @@ blorp_emit_cc_viewport(struct blorp_batch *batch)
|
||||
{
|
||||
uint32_t cc_vp_offset;
|
||||
|
||||
if (batch->blorp->config.use_cached_dynamic_states) {
|
||||
/* Somehow reusing CC_VIEWPORT on Gfx9 is causing issues :
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647
|
||||
*/
|
||||
if (GFX_VER != 9 && batch->blorp->config.use_cached_dynamic_states) {
|
||||
cc_vp_offset = blorp_get_dynamic_state(batch, BLORP_DYNAMIC_STATE_CC_VIEWPORT);
|
||||
} else {
|
||||
blorp_emit_dynamic(batch, GENX(CC_VIEWPORT), vp, 32, &cc_vp_offset) {
|
||||
|
@@ -75,6 +75,9 @@ opt_saturate_propagation_local(fs_visitor &s, bblock_t *block)
|
||||
!scan_inst->can_change_types()))
|
||||
break;
|
||||
|
||||
if (scan_inst->flags_written(s.devinfo) != 0)
|
||||
break;
|
||||
|
||||
if (scan_inst->saturate) {
|
||||
inst->saturate = false;
|
||||
progress = true;
|
||||
|
@@ -24,6 +24,24 @@
|
||||
#include "brw_nir_rt.h"
|
||||
#include "brw_nir_rt_builder.h"
|
||||
|
||||
static nir_def *
|
||||
nir_build_vec3_mat_mult_col_major(nir_builder *b, nir_def *vec,
|
||||
nir_def *matrix[], bool translation)
|
||||
{
|
||||
nir_def *result_components[3] = {
|
||||
nir_channel(b, matrix[3], 0),
|
||||
nir_channel(b, matrix[3], 1),
|
||||
nir_channel(b, matrix[3], 2),
|
||||
};
|
||||
for (unsigned i = 0; i < 3; ++i) {
|
||||
for (unsigned j = 0; j < 3; ++j) {
|
||||
nir_def *v = nir_fmul(b, nir_channels(b, vec, 1 << j), nir_channels(b, matrix[j], 1 << i));
|
||||
result_components[i] = (translation || j) ? nir_fadd(b, result_components[i], v) : v;
|
||||
}
|
||||
}
|
||||
return nir_vec(b, result_components, 3);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
build_leaf_is_procedural(nir_builder *b, struct brw_nir_rt_mem_hit_defs *hit)
|
||||
{
|
||||
@@ -163,11 +181,27 @@ lower_rt_intrinsics_impl(nir_function_impl *impl,
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_object_origin:
|
||||
sysval = object_ray_in.orig;
|
||||
if (stage == MESA_SHADER_CLOSEST_HIT) {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
|
||||
sysval = nir_build_vec3_mat_mult_col_major(
|
||||
b, world_ray_in.orig, leaf.world_to_object, true);
|
||||
} else {
|
||||
sysval = object_ray_in.orig;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_object_direction:
|
||||
sysval = object_ray_in.dir;
|
||||
if (stage == MESA_SHADER_CLOSEST_HIT) {
|
||||
struct brw_nir_rt_bvh_instance_leaf_defs leaf;
|
||||
brw_nir_rt_load_bvh_instance_leaf(b, &leaf, hit_in.inst_leaf_ptr);
|
||||
|
||||
sysval = nir_build_vec3_mat_mult_col_major(
|
||||
b, world_ray_in.dir, leaf.world_to_object, false);
|
||||
} else {
|
||||
sysval = object_ray_in.dir;
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_ray_t_min:
|
||||
|
@@ -45,7 +45,8 @@ using namespace elk;
|
||||
*/
|
||||
|
||||
static bool
|
||||
opt_saturate_propagation_local(const fs_live_variables &live, elk_bblock_t *block)
|
||||
opt_saturate_propagation_local(const intel_device_info *devinfo,
|
||||
const fs_live_variables &live, elk_bblock_t *block)
|
||||
{
|
||||
bool progress = false;
|
||||
int ip = block->end_ip + 1;
|
||||
@@ -74,6 +75,16 @@ opt_saturate_propagation_local(const fs_live_variables &live, elk_bblock_t *bloc
|
||||
!scan_inst->can_change_types()))
|
||||
break;
|
||||
|
||||
/* min and max pseudo ops modify the flags on Gfx4 and Gfx5, but
|
||||
* it's not based on the result of the operation. This is the one
|
||||
* case where it is always safe to propagate a saturate to an
|
||||
* instruction that writes the flags.
|
||||
*/
|
||||
if (scan_inst->flags_written(devinfo) != 0 &&
|
||||
scan_inst->opcode != ELK_OPCODE_SEL) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (scan_inst->saturate) {
|
||||
inst->saturate = false;
|
||||
progress = true;
|
||||
@@ -156,7 +167,7 @@ elk_fs_visitor::opt_saturate_propagation()
|
||||
bool progress = false;
|
||||
|
||||
foreach_block (block, cfg) {
|
||||
progress = opt_saturate_propagation_local(live, block) || progress;
|
||||
progress = opt_saturate_propagation_local(devinfo, live, block) || progress;
|
||||
}
|
||||
|
||||
/* Live intervals are still valid. */
|
||||
|
@@ -2023,15 +2023,15 @@ intel_device_info_wa_stepping(struct intel_device_info *devinfo)
|
||||
uint32_t
|
||||
intel_device_info_get_max_slm_size(const struct intel_device_info *devinfo)
|
||||
{
|
||||
uint32_t k_bytes = 0;
|
||||
uint32_t bytes = 0;
|
||||
|
||||
if (devinfo->verx10 >= 200) {
|
||||
k_bytes = intel_device_info_get_max_preferred_slm_size(devinfo);
|
||||
bytes = intel_device_info_get_max_preferred_slm_size(devinfo);
|
||||
} else {
|
||||
k_bytes = 64;
|
||||
bytes = 64 * 1024;
|
||||
}
|
||||
|
||||
return k_bytes * 1024;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
|
@@ -1825,7 +1825,13 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC)) {
|
||||
/* Force CC_VIEWPORT reallocation on Gfx9 when reprogramming
|
||||
* 3DSTATE_VIEWPORT_STATE_POINTERS_CC :
|
||||
* https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647
|
||||
*/
|
||||
if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
|
||||
(GFX_VER == 9 &&
|
||||
BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR))) {
|
||||
hw_state->vp_cc.state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
hw_state->vp_cc.count * 8, 32);
|
||||
|
@@ -110,7 +110,8 @@ genX(emit_simpler_shader_init_fragment)(struct anv_simple_shader *state)
|
||||
|
||||
genX(emit_l3_config)(batch, device, state->l3_config);
|
||||
|
||||
state->cmd_buffer->state.current_l3_config = state->l3_config;
|
||||
if (state->cmd_buffer)
|
||||
state->cmd_buffer->state.current_l3_config = state->l3_config;
|
||||
|
||||
enum intel_urb_deref_block_size deref_block_size;
|
||||
genX(emit_urb_setup)(device, batch, state->l3_config,
|
||||
|
@@ -13,6 +13,8 @@
|
||||
|
||||
#include "vk_format.h"
|
||||
|
||||
#include "clb097.h"
|
||||
|
||||
VkFormatFeatureFlags2
|
||||
nvk_get_buffer_format_features(struct nvk_physical_device *pdev,
|
||||
VkFormat vk_format)
|
||||
@@ -29,6 +31,8 @@ nvk_get_buffer_format_features(struct nvk_physical_device *pdev,
|
||||
if (nil_format_supports_storage(&pdev->info, p_format)) {
|
||||
features |= VK_FORMAT_FEATURE_2_STORAGE_TEXEL_BUFFER_BIT |
|
||||
VK_FORMAT_FEATURE_2_STORAGE_WRITE_WITHOUT_FORMAT_BIT;
|
||||
if (pdev->info.cls_eng3d >= MAXWELL_A)
|
||||
features |= VK_FORMAT_FEATURE_2_STORAGE_READ_WITHOUT_FORMAT_BIT;
|
||||
}
|
||||
|
||||
if (p_format == PIPE_FORMAT_R32_UINT || p_format == PIPE_FORMAT_R32_SINT)
|
||||
|
@@ -267,6 +267,9 @@ vk_image_usage_to_format_features(VkImageUsageFlagBits usage_flag)
|
||||
return VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT;
|
||||
case VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT:
|
||||
return VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
case VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT:
|
||||
return VK_FORMAT_FEATURE_2_COLOR_ATTACHMENT_BIT |
|
||||
VK_FORMAT_FEATURE_2_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
|
@@ -30,18 +30,26 @@
|
||||
|
||||
/* Convenience */
|
||||
|
||||
#define MALI_BLEND_AU_R8G8B8A8 (MALI_RGBA8_TB << 12)
|
||||
#define MALI_BLEND_PU_R8G8B8A8 (MALI_RGBA8_TB << 12)
|
||||
#define MALI_BLEND_AU_R10G10B10A2 (MALI_RGB10_A2_TB << 12)
|
||||
#define MALI_BLEND_PU_R10G10B10A2 (MALI_RGB10_A2_TB << 12)
|
||||
#define MALI_BLEND_AU_R8G8B8A2 (MALI_RGB8_A2_AU << 12)
|
||||
#define MALI_BLEND_PU_R8G8B8A2 (MALI_RGB8_A2_PU << 12)
|
||||
#define MALI_BLEND_AU_R4G4B4A4 (MALI_RGBA4_AU << 12)
|
||||
#define MALI_BLEND_PU_R4G4B4A4 (MALI_RGBA4_PU << 12)
|
||||
#define MALI_BLEND_AU_R5G6B5A0 (MALI_R5G6B5_AU << 12)
|
||||
#define MALI_BLEND_PU_R5G6B5A0 (MALI_R5G6B5_PU << 12)
|
||||
#define MALI_BLEND_AU_R5G5B5A1 (MALI_RGB5_A1_AU << 12)
|
||||
#define MALI_BLEND_PU_R5G5B5A1 (MALI_RGB5_A1_PU << 12)
|
||||
#if PAN_ARCH == 6
|
||||
#define MALI_RGBA_SWIZZLE PAN_V6_SWIZZLE(R, G, B, A)
|
||||
#define MALI_RGB1_SWIZZLE PAN_V6_SWIZZLE(R, G, B, A)
|
||||
#else
|
||||
#define MALI_RGBA_SWIZZLE MALI_RGB_COMPONENT_ORDER_RGBA
|
||||
#define MALI_RGB1_SWIZZLE MALI_RGB_COMPONENT_ORDER_RGB1
|
||||
#endif
|
||||
|
||||
#define MALI_BLEND_AU_R8G8B8A8 (MALI_RGBA8_TB << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_PU_R8G8B8A8 (MALI_RGBA8_TB << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_AU_R10G10B10A2 (MALI_RGB10_A2_TB << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_PU_R10G10B10A2 (MALI_RGB10_A2_TB << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_AU_R8G8B8A2 (MALI_RGB8_A2_AU << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_PU_R8G8B8A2 (MALI_RGB8_A2_PU << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_AU_R4G4B4A4 (MALI_RGBA4_AU << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_PU_R4G4B4A4 (MALI_RGBA4_PU << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_AU_R5G6B5A0 (MALI_R5G6B5_AU << 12) | MALI_RGB1_SWIZZLE
|
||||
#define MALI_BLEND_PU_R5G6B5A0 (MALI_R5G6B5_PU << 12) | MALI_RGB1_SWIZZLE
|
||||
#define MALI_BLEND_AU_R5G5B5A1 (MALI_RGB5_A1_AU << 12) | MALI_RGBA_SWIZZLE
|
||||
#define MALI_BLEND_PU_R5G5B5A1 (MALI_RGB5_A1_PU << 12) | MALI_RGBA_SWIZZLE
|
||||
|
||||
#if PAN_ARCH <= 5
|
||||
#define BFMT2(pipe, internal, writeback, srgb) \
|
||||
@@ -50,18 +58,6 @@
|
||||
MALI_COLOR_FORMAT_##writeback, \
|
||||
{ 0, 0 }, \
|
||||
}
|
||||
#elif PAN_ARCH == 6
|
||||
#define BFMT2(pipe, internal, writeback, srgb) \
|
||||
[PIPE_FORMAT_##pipe] = { \
|
||||
MALI_COLOR_BUFFER_INTERNAL_FORMAT_##internal, \
|
||||
MALI_COLOR_FORMAT_##writeback, \
|
||||
{ \
|
||||
MALI_BLEND_PU_##internal | (srgb ? (1 << 20) : 0) | \
|
||||
PAN_V6_SWIZZLE(R, G, B, A), \
|
||||
MALI_BLEND_AU_##internal | (srgb ? (1 << 20) : 0) | \
|
||||
PAN_V6_SWIZZLE(R, G, B, A), \
|
||||
}, \
|
||||
}
|
||||
#else
|
||||
#define BFMT2(pipe, internal, writeback, srgb) \
|
||||
[PIPE_FORMAT_##pipe] = { \
|
||||
|
@@ -112,6 +112,14 @@
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if defined(__loongarch__)
|
||||
#ifdef __loongarch_lp64
|
||||
#define DETECT_ARCH_LOONGARCH64 1
|
||||
#else
|
||||
#error "detect_arch: unknown target loongarch base ABI type"
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef DETECT_ARCH_X86
|
||||
#define DETECT_ARCH_X86 0
|
||||
#endif
|
||||
@@ -168,4 +176,8 @@
|
||||
#define DETECT_ARCH_RISCV64 0
|
||||
#endif
|
||||
|
||||
#ifndef DETECT_ARCH_LOONGARCH64
|
||||
#define DETECT_ARCH_LOONGARCH64 0
|
||||
#endif
|
||||
|
||||
#endif /* UTIL_DETECT_ARCH_H_ */
|
||||
|
@@ -240,6 +240,12 @@ do { \
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_FUNC_ATTRIBUTE_OPTIMIZE
|
||||
#define ATTRIBUTE_OPTIMIZE(flags) __attribute__((__optimize__((flags))))
|
||||
#else
|
||||
#define ATTRIBUTE_OPTIMIZE(flags)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
/**
|
||||
* Macro function that evaluates to true if T is a trivially
|
||||
|
@@ -49,7 +49,8 @@ func_b(void)
|
||||
debug_backtrace_dump(backtrace, 16);
|
||||
}
|
||||
|
||||
static void ATTRIBUTE_NOINLINE
|
||||
/* This function must emit a stack frame for the unit test to work */
|
||||
static void ATTRIBUTE_NOINLINE ATTRIBUTE_OPTIMIZE("no-omit-frame-pointer")
|
||||
func_c(struct debug_stack_frame *frames)
|
||||
{
|
||||
debug_backtrace_capture(frames, 0, 16);
|
||||
|
@@ -166,10 +166,9 @@ u_printf_impl(FILE *out, const char *buffer, size_t buffer_size,
|
||||
int arg_size = fmt->arg_sizes[i];
|
||||
size_t spec_pos = util_printf_next_spec_pos(format, 0);
|
||||
|
||||
if (spec_pos == -1) {
|
||||
u_printf_plain(out, format);
|
||||
continue;
|
||||
}
|
||||
/* If we hit an unused argument we skip all remaining ones */
|
||||
if (spec_pos == -1)
|
||||
break;
|
||||
|
||||
const char *token = util_printf_prev_tok(&format[spec_pos]);
|
||||
const char *next_format = &format[spec_pos + 1];
|
||||
|
@@ -58,6 +58,10 @@ static const struct debug_control debug_control[] = {
|
||||
{ NULL, },
|
||||
};
|
||||
|
||||
static bool present_false(VkPhysicalDevice pdevice, int fd) {
|
||||
return false;
|
||||
}
|
||||
|
||||
VkResult
|
||||
wsi_device_init(struct wsi_device *wsi,
|
||||
VkPhysicalDevice pdevice,
|
||||
@@ -270,6 +274,21 @@ wsi_device_init(struct wsi_device *wsi,
|
||||
}
|
||||
}
|
||||
|
||||
/* can_present_on_device is a function pointer used to determine if images
|
||||
* can be presented directly on a given device file descriptor (fd).
|
||||
* If HAVE_LIBDRM is defined, it will be initialized to a platform-specific
|
||||
* function (wsi_device_matches_drm_fd). Otherwise, it is initialized to
|
||||
* present_false to ensure that it always returns false, preventing potential
|
||||
* segmentation faults from unchecked calls.
|
||||
* Drivers for non-PCI based GPUs are expected to override this after calling
|
||||
* wsi_device_init().
|
||||
*/
|
||||
#ifdef HAVE_LIBDRM
|
||||
wsi->can_present_on_device = wsi_device_matches_drm_fd;
|
||||
#else
|
||||
wsi->can_present_on_device = present_false;
|
||||
#endif
|
||||
|
||||
return VK_SUCCESS;
|
||||
fail:
|
||||
wsi_device_finish(wsi, alloc);
|
||||
|
@@ -1100,7 +1100,7 @@ wsi_display_surface_get_present_rectangles(VkIcdSurfaceBase *surface_base,
|
||||
wsi_display_mode *mode = wsi_display_mode_from_handle(surface->displayMode);
|
||||
VK_OUTARRAY_MAKE_TYPED(VkRect2D, out, pRects, pRectCount);
|
||||
|
||||
if (wsi_device_matches_drm_fd(wsi_device, mode->connector->wsi->fd)) {
|
||||
if (wsi_device->can_present_on_device(wsi_device->pdevice, mode->connector->wsi->fd)) {
|
||||
vk_outarray_append_typed(VkRect2D, &out, rect) {
|
||||
*rect = (VkRect2D) {
|
||||
.offset = { 0, 0 },
|
||||
@@ -3114,7 +3114,7 @@ wsi_AcquireDrmDisplayEXT(VkPhysicalDevice physicalDevice,
|
||||
VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice);
|
||||
struct wsi_device *wsi_device = pdevice->wsi_device;
|
||||
|
||||
if (!wsi_device_matches_drm_fd(wsi_device, drmFd))
|
||||
if (!wsi_device->can_present_on_device(wsi_device->pdevice, drmFd))
|
||||
return VK_ERROR_UNKNOWN;
|
||||
|
||||
struct wsi_display *wsi =
|
||||
@@ -3148,7 +3148,7 @@ wsi_GetDrmDisplayEXT(VkPhysicalDevice physicalDevice,
|
||||
VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice);
|
||||
struct wsi_device *wsi_device = pdevice->wsi_device;
|
||||
|
||||
if (!wsi_device_matches_drm_fd(wsi_device, drmFd)) {
|
||||
if (!wsi_device->can_present_on_device(wsi_device->pdevice, drmFd)) {
|
||||
*pDisplay = VK_NULL_HANDLE;
|
||||
return VK_ERROR_UNKNOWN;
|
||||
}
|
||||
|
@@ -440,10 +440,10 @@ wsi_common_drm_devices_equal(int fd_a, int fd_b)
|
||||
}
|
||||
|
||||
bool
|
||||
wsi_device_matches_drm_fd(const struct wsi_device *wsi, int drm_fd)
|
||||
wsi_device_matches_drm_fd(VkPhysicalDevice physicalDevice, int drm_fd)
|
||||
{
|
||||
if (wsi->can_present_on_device)
|
||||
return wsi->can_present_on_device(wsi->pdevice, drm_fd);
|
||||
VK_FROM_HANDLE(vk_physical_device, pdevice, physicalDevice);
|
||||
const struct wsi_device *wsi = pdevice->wsi_device;
|
||||
|
||||
drmDevicePtr fd_device;
|
||||
int ret = drmGetDevice2(drm_fd, 0, &fd_device);
|
||||
|
@@ -225,7 +225,7 @@ struct wsi_swapchain {
|
||||
};
|
||||
|
||||
bool
|
||||
wsi_device_matches_drm_fd(const struct wsi_device *wsi, int drm_fd);
|
||||
wsi_device_matches_drm_fd(VkPhysicalDevice pdevice, int drm_fd);
|
||||
|
||||
void
|
||||
wsi_wl_surface_destroy(VkIcdSurfaceBase *icd_surface, VkInstance _instance,
|
||||
|
@@ -160,7 +160,7 @@ wsi_x11_check_dri3_compatible(const struct wsi_device *wsi_dev,
|
||||
if (dri3_fd == -1)
|
||||
return true;
|
||||
|
||||
bool match = wsi_device_matches_drm_fd(wsi_dev, dri3_fd);
|
||||
bool match = wsi_dev->can_present_on_device(wsi_dev->pdevice, dri3_fd);
|
||||
|
||||
close(dri3_fd);
|
||||
|
||||
@@ -1071,9 +1071,11 @@ struct x11_image {
|
||||
* We need to keep track of them when considering present ID. */
|
||||
|
||||
/* This is arbitrarily chosen. With IMMEDIATE on a 3 deep swapchain,
|
||||
* we allow up to 48 outstanding presentations per vblank, which is more than enough
|
||||
* for any reasonable application. */
|
||||
#define X11_SWAPCHAIN_MAX_PENDING_COMPLETIONS 16
|
||||
* we allow over 300 outstanding presentations per vblank, which is more than enough
|
||||
* for any reasonable application.
|
||||
* This used to be 16, but it regressed benchmarks that did 15k+ FPS.
|
||||
* This should allow over 25k FPS on a 60 Hz monitor. Any more than this is comical. */
|
||||
#define X11_SWAPCHAIN_MAX_PENDING_COMPLETIONS 128
|
||||
uint32_t present_queued_count;
|
||||
struct x11_image_pending_completion pending_completions[X11_SWAPCHAIN_MAX_PENDING_COMPLETIONS];
|
||||
#ifdef HAVE_DRI3_EXPLICIT_SYNC
|
||||
|
Reference in New Issue
Block a user