Compare commits

...

39 Commits

Author SHA1 Message Date
Eric Engestrom
e716b08f86 VERSION: bump for 24.0.0-rc2 2024-01-17 22:28:20 +00:00
Friedrich Vock
9d1a064663 radv/rt: Add workaround to make leaves always active
DOOM Eternal builds acceleration structures with inactive primitives and
tries to make them active in later AS updates. This is disallowed by the
spec and triggers a GPU hang. Fix the hang by working around the bug.

Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27034>
(cherry picked from commit a9831caa14)
2024-01-17 21:42:02 +00:00
Boris Brezillon
f7f823c787 panvk: Fix access to unitialized panvk_pipeline_layout::num_sets field
Commit 73eecffabd ("panvk: Use the vk_pipeline_layout base struct")
reworked the panvk logic to use vk_pipeline_layout, which contains the
number of descriptor set layout referenced by a pipeline layout, thus
deprecating panvk_pipeline_layout::num_sets.

Make panvk_fill_non_vs_attribs() use vk_pipeline_layout::set_count
instead of panvk_pipeline_layout::num_sets and kill the latter so we
can't introduce new users.

Fixes: 73eecffabd ("panvk: Use the vk_pipeline_layout base struct")
Cc: mesa-stable
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Constantine Shablya <constantine.shablya@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27107>
(cherry picked from commit b18bfed2c5)
2024-01-17 21:41:44 +00:00
Boris Brezillon
b65d7520f6 panvk: Fix tracing
pandecode_next_frame() take a decode context. Passing NULL leads to a
NULL deref.

Fixes: 56be9a55be ("pan/decode: handle more than one panfrost_device")
Cc: mesa-stable
Signed-off-by: Boris Brezillon <boris.brezillon@collabora.com>
Reviewed-by: Constantine Shablya <constantine.shablya@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27107>
(cherry picked from commit 35a02560c8)
2024-01-17 21:39:08 +00:00
Sviatoslav Peleshko
1246e54f1c nir: Use alu source components count in nir_alu_srcs_negative_equal
When we use source from ALU instruction directly, the default swizzle array
should be populated with the same amount of components as the src has.

Otherwise, if we use nir_ssa_alu_instr_src_components, it can return
the destination components count that is lower than component index
actually used in that source. This can lead to false equality
between 0 (uninitialized) and 0 (.x) in swizzle comparison below.

Fixes: c6ee46a7 ("nir: Add nir_alu_srcs_negative_equal")
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/8704
Signed-off-by: Sviatoslav Peleshko <sviatoslav.peleshko@globallogic.com>
Reviewed-by: Ian Romanick <ian.d.romanick@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/22655>
(cherry picked from commit 6b0bfdfa9e)
2024-01-17 21:39:06 +00:00
Erico Nunes
4175b4d547 Revert "ci: lima farm is down"
This reverts commit 601b826a5e.

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26905>
(cherry picked from commit 8bd4cae768)
2024-01-17 21:39:02 +00:00
Yonggang Luo
9732d1bdcd compiler/spirv: The spirv shader is binary, should write in binary mode
Fixes: 53265c8798 ("spirv: Add a mechanism for dumping failing shaders")

Signed-off-by: Yonggang Luo <luoyonggang@gmail.com>
Reviewed-by: Jesse Natalie <jenatali@microsoft.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26775>
(cherry picked from commit fd11818828)
2024-01-17 21:39:00 +00:00
Yiwei Zhang
8974222433 vulkan/wsi/wayland: fix returns and avoid leaks for failed swapchain
Cc: mesa-stable
Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org>
Tested-by: Eric Engestrom <eric@engestrom.ch>
Reviewed-by: Ryan Neph <ryanneph@google.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27080>
(cherry picked from commit dc5725ee29)
2024-01-17 21:38:56 +00:00
Eric Engestrom
ce34ec41cd ci: fix job dependency error in MRs for bin/ci/* scripts
'debian/x86_64_build' job needs 'debian/x86_64_build-base' job, but 'debian/x86_64_build-base' is not in any previous stage

Fixes: f298a0e709 ("ci: make sure we evaluate the python-test rules first")
Fixes: 2c9fdaa830 ("ci: fix python-test dependency error on merge requests")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27042>
(cherry picked from commit 2ce0b5ab0a)
2024-01-17 21:38:53 +00:00
Eric Engestrom
3dabc03b58 .pick_status.json: Update to 10e2dbb63b 2024-01-17 21:36:44 +00:00
David Rosca
25ae9134dd radeonsi/vcn: Fix H264 slice header when encoding I frames
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27029>
(cherry picked from commit 865abfde63)
2024-01-16 18:41:37 +00:00
Patrick Lerda
43a00ad0fa glsl/nir: fix gl_nir_cross_validate_outputs_to_inputs() memory leak
For instance, this issue is triggered with
vs-to-fs-overlap.shader_test -auto -fbo:
Direct leak of 24 byte(s) in 1 object(s) allocated from:
    #0 0x7fe64f58e9a7 in calloc (/usr/lib64/libasan.so.6+0xb19a7)
    #1 0x7fe642ca2839 in _mesa_symbol_table_ctor ../src/mesa/program/symbol_table.c:286
    #2 0x7fe642ff003d in gl_nir_cross_validate_outputs_to_inputs ../src/compiler/glsl/gl_nir_link_varyings.c:728
    #3 0x7fe642d7c7d8 in gl_nir_link_glsl ../src/compiler/glsl/gl_nir_linker.c:1357
    #4 0x7fe642be6931 in st_link_glsl_to_nir ../src/mesa/state_tracker/st_glsl_to_nir.cpp:562
    #5 0x7fe642be6931 in st_link_shader ../src/mesa/state_tracker/st_glsl_to_nir.cpp:944
    #6 0x7fe642acab55 in link_program ../src/mesa/main/shaderapi.c:1336
    #7 0x7fe642acab55 in link_program_error ../src/mesa/main/shaderapi.c:1447
    #8 0x7fe6424aa389 in _mesa_unmarshal_LinkProgram src/mapi/glapi/gen/marshal_generated2.c:1911
    #9 0x7fe641fd912b in glthread_unmarshal_batch ../src/mesa/main/glthread.c:139
    #10 0x7fe641f48d48 in util_queue_thread_func ../src/util/u_queue.c:309
    #11 0x7fe641fa442a in impl_thrd_routine ../src/c11/impl/threads_posix.c:67

Fixes: 7d1948e9b5 ("glsl: implement cross_validate_outputs_to_inputs() in nir linker")
Signed-off-by: Patrick Lerda <patrick9876@free.fr>
Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27071>
(cherry picked from commit bacace8634)
2024-01-16 18:41:36 +00:00
Karol Herbst
78fd14d938 rusticl/kernel: run opt/lower_memcpy later to fix a crash
nir_opt_memcpy requires explicit types to function properly. So run them
after lowering vars to explicit types.

Cc: mesa-stable
Signed-off-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27068>
(cherry picked from commit f896659894)
2024-01-16 18:41:35 +00:00
Tatsuyuki Ishi
c5b8590e6d radv: never set DISABLE_WR_CONFIRM for CP DMA clears and copies
This mirrors the changes in 69ff9c16bb ("radeonsi: never set
DISABLE_WR_CONFIRM for CP DMA clears and copies").

Cc: mesa-stable
Suggested-by: Vitaliy Triang3l Kuzmin <triang3l@yandex.ru>
Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27053>
(cherry picked from commit 43fb43ba2c)
2024-01-16 18:41:34 +00:00
Lucas Stach
9888a95130 etnaviv: disable 64bpp render/sampler formats
Vivante hardware handles 64bpp render targets and samplers in a odd way
by splitting the buffer and using a pair of texture samplers or a pair
of MRT outputs to access those resources. This isn't implemented in the
driver right now, so we should not advertise support for those formats.

CC: mesa-stable
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
Reviewed-by: Christian Gmeiner <cgmeiner@igalia.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26982>
(cherry picked from commit e481c1269c)
2024-01-16 18:41:33 +00:00
Eric Engestrom
05ff891088 .pick_status.json: Update to ff84aef116 2024-01-16 18:41:30 +00:00
Tapani Pälli
fc4180339c anv: check for wa 16013994831 in emit_so_memcpy_end
We are toggling preemption on/off during streamout, this is also
happening on gfx12 platforms, not just dg2.

Cc: mesa-stable
Signed-off-by: Tapani Pälli <tapani.palli@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27002>
(cherry picked from commit 36f428f1de)
2024-01-15 21:57:32 +00:00
Vinson Lee
b39ee4d766 intel/disasm: Remove duplicate variable reg_file
Fix defects reported by Coverity Scan.

Evaluation order violation (EVALUATION_ORDER)
write_write_typo: In reg_file = reg_file = brw_inst_dpas_3src_dst_reg_file(devinfo, inst),
reg_file is written twice with the same value.

Fixes: 1c92dad5cb ("intel/disasm: Disassembly support for DPAS")
Signed-off-by: Vinson Lee <vlee@freedesktop.org>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27056>
(cherry picked from commit 73835874a8)
2024-01-15 21:57:31 +00:00
Lionel Landwerlin
5b8984f32f anv: hide vendor ID for The Finals
XeSS workaround.

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10436
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27057>
(cherry picked from commit a34a113059)
2024-01-15 21:57:30 +00:00
Lionel Landwerlin
eb3d73073f intel/aux_map: fix fallback unmapping range on failure
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: 7c6faa1efe ("intel/aux_map: introduce ref count of L1 entries")
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27057>
(cherry picked from commit ff6041afdf)
2024-01-15 21:57:29 +00:00
Jesse Natalie
f19b7d8dfc mesa: Consider mesa format in addition to internal format for mip/cube completeness
Prior to 06b526de, the mesa format was used for these completeness checks.
That was to address the case where a *different* internal format selected
the *same* mesa format, and the texture shouldn't be considered compatible.
But this didn't address the case where the *same* internal format selected
a *different* mesa format, e.g. because the type passed to the TexImage
API was different.

An old WGL demo app called TexFilter.exe tries to redefine a mipped RGBA16
texture as RGBA8. This incorrect logic caused Mesa to try to copy the RGBA16
data from the smaller mips into the newly created RGBA8 data, because it
thought that the texture was still mip-complete, despite the format changing.

Cc: mesa-stable
Reviewed-By: Mike Blumenkrantz <michael.blumenkrantz@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27023>
(cherry picked from commit 4cb9c77e8e)
2024-01-15 21:57:28 +00:00
José Roberto de Souza
04ffe4771e anv: Fix PAT entry for userptr in integrated GPUs
Fixes: 060439bdf0 ("anv: Add ANV_BO_ALLOC_IMPORTED")
Signed-off-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27040>
(cherry picked from commit 49fe060b5f)
2024-01-15 21:57:27 +00:00
Yiwei Zhang
0ebdd39d85 venus: populate oom from ring submit alloc failures
ring_seqno_valid indicates a successful ring cmd submission, and can be
used to avoid invalid reply decoding due to failed submit alloc.
Otherwise, the garbled VkResult will mislead into initialization failure
instead of oom.

Below cts failure is fixed:
dEQP-VK.api.device_init.create_instance_device_intentional_alloc_fail.basic

Fixes: ec131c6e55 ("venus: use instance allocator for ring allocs")
Signed-off-by: Yiwei Zhang <zzyiwei@chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27026>
(cherry picked from commit ecd50e70d4)
2024-01-15 21:57:24 +00:00
Matt Turner
fcd78c5281 util/tests: Disable half-float NaN test on hppa/old-mips
Bug: https://bugs.gentoo.org/908079
Fixes: 067023dce2 ("util: Add some unit tests of the half-float conversions.")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26991>
(cherry picked from commit 5b7c733902)
2024-01-15 21:56:38 +00:00
Matt Turner
97ebcff41c util: Add DETECT_ARCH_HPPA macro
Cc: mesa-stable
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26991>
(cherry picked from commit 0540c9de44)
2024-01-15 21:56:37 +00:00
Pierre-Eric Pelloux-Prayer
6febac5c96 Revert "ci/radeonsi: disable VA-API testing on raven"
This reverts commit 9017852de4.

Reviewed-by: Marek Olšák <marek.olsak@amd.com>
Reviewed-by: David Heidelberg <david.heidelberg@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26947>
(cherry picked from commit e2f39e8aca)
2024-01-15 21:56:36 +00:00
Pierre-Eric Pelloux-Prayer
ab960ee0bf radeonsi: compute epitch when modifying surf_pitch
In the linear case with no mipmaps addrlib sets epitch to surf_pitch - 1
so lets do the same thing here.

The change in si_descriptors.c looks like it's papering over a bug but I
couldn't find any other changes that wouldn't break at least one use case.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10375
Fixes: 115b61e51f ("ac/surface: don't oversize surf_size")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26947>
(cherry picked from commit 4e76c4ecb4)
2024-01-15 21:56:10 +00:00
Tatsuyuki Ishi
fc11cbb37e radv: Recompute max_waves after postprocessing RT config
The max waves for RT prolog need to be recalculated after merging the
resource usage of all shaders invoked from it.

Note that there is no need to panic, as the info was only used to
calculate maximum scratch size and with the RT prolog being low
footprint, this likely only caused overestimation rather than
underestimation.

Fixes: 533ec9843e ("radv: Precompute shader max_waves.")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26998>
(cherry picked from commit 63827751e1)
2024-01-15 21:56:09 +00:00
Mike Blumenkrantz
1f5604ed45 zink: fix separate shader patch variable location adjustment
in spirv, these start at location 0, not location 32

fixes #10414

Fixes: d9942442f2 ("zink: handle patch variable locations for separate shaders better")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26981>
(cherry picked from commit 565ee4fafc)
2024-01-15 21:56:07 +00:00
Lionel Landwerlin
cc677d7c30 anv: fix disabled Wa_14017076903/18022508906
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Fixes: d0669f3ede ("intel/dev: switch defect identifiers to use lineage numbers")
Reviewed-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Tapani Pälli <tapani.palli@intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27008>
(cherry picked from commit 695b4a2992)
2024-01-15 21:56:05 +00:00
Eric Engestrom
f575e2b9f1 ci: make sure we evaluate the python-test rules first
Fixes: 2c9fdaa830 ("ci: fix python-test dependency error on merge requests")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26984>
(cherry picked from commit f298a0e709)
2024-01-15 21:56:02 +00:00
Timur Kristóf
3753919715 radv: Correctly select SDMA support for PRIME blit.
Cc: mesa-stable
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/10317
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Signed-off-by: Timur Kristóf <timur.kristof@gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27015>
(cherry picked from commit 436b89e838)
2024-01-15 21:56:00 +00:00
Pavel Ondračka
757192b046 r300: fix reusing of color varying slots for generic ones
This was broken when I added texcoord support, the problem is that we
failed to properly count the number of used fs inputs and thus we failed
to make the proper decision when to reuse the color varying slot
Also fix the error messages, they were incorrect after the rewrite as
well. This fixes a bunch of piglits.

Fixes: d4b8e8a481

Signed-off-by: Pavel Ondračka <pavel.ondracka@gmail.com>
Reviewed-by: Filip Gawin <filip.gawin@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/27003>
(cherry picked from commit 53c17d85ab)
2024-01-15 21:55:56 +00:00
Mike Blumenkrantz
02b5a2348d lavapipe: fix devenv icd filename
fixes #10408

cc: mesa-stable

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26985>
(cherry picked from commit 465e26dd98)
2024-01-15 21:55:52 +00:00
Mike Blumenkrantz
3c36933195 lavapipe: use pushconstants2 for dgc
Fixes: ec656e1984 ("lavapipe: maint6")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26977>
(cherry picked from commit bf729063c3)
2024-01-15 21:55:51 +00:00
Mike Blumenkrantz
ae5c0e6600 vk/cmdbuf: add back deleted maint6 workgraph bits
this otherwise breaks workgraph support in lavapipe

Fixes: ec656e1984 ("lavapipe: maint6")
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26977>
(cherry picked from commit b6bfa73dc7)
2024-01-15 21:22:38 +00:00
Eric Engestrom
f1064107e9 .pick_status.json: Mark 0557f0d59c as denominated 2024-01-15 09:44:39 +00:00
Eric Engestrom
6b4f639474 .pick_status.json: Update to 4fe5f06d40 2024-01-15 09:43:41 +00:00
Eric Engestrom
26a96af808 VERSION: bump for 24.0.0-rc1 2024-01-11 14:19:21 +00:00
54 changed files with 3250 additions and 185 deletions

View File

@@ -68,6 +68,10 @@ debian/x86_64_build-base:
- .debian-container
variables:
MESA_IMAGE_TAG: &debian-x86_64_build-base "${DEBIAN_BASE_TAG}--${PKG_REPO_REV}"
rules:
# python-test requires debian/x86_64_build, which requires this job
- !reference [python-test, rules]
- !reference [.container, rules]
.use-debian/x86_64_build-base:
extends:
@@ -88,9 +92,9 @@ debian/x86_64_build:
variables:
MESA_IMAGE_TAG: &debian-x86_64_build ${DEBIAN_BUILD_TAG}
rules:
- !reference [.use-debian/x86_64_build-base, rules]
# python-test requires this job
- !reference [python-test, rules]
- !reference [.use-debian/x86_64_build-base, rules]
.use-debian/x86_64_build:
extends:

2912
.pick_status.json Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1 +1 @@
24.0.0-devel
24.0.0-rc2

View File

@@ -94,7 +94,7 @@ radeonsi-raven-piglit-quick_shader:x86_64:
PIGLIT_PROFILES: quick_shader
PIGLIT_FRACTION: 2
.radeonsi-raven-va:x86_64:
radeonsi-raven-va:x86_64:
extends:
- .lava-test-deqp:x86_64
- .radeonsi-raven-test:x86_64
@@ -117,7 +117,7 @@ radeonsi-raven-piglit-quick_shader:x86_64:
radeonsi-raven-va-full:x86_64:
extends:
- .radeonsi-raven-va:x86_64
- radeonsi-raven-va:x86_64
- .radeonsi-vaapi-manual-rules
variables:
JOB_TIMEOUT: 60

View File

@@ -1883,12 +1883,16 @@ static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_
util_next_power_of_two(LINEAR_PITCH_ALIGNMENT / surf->bpe);
if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) {
/* Adjust surf_pitch to be in elements units not in pixels */
surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR &&
in->numMipLevels == 1) {
/* Divide surf_pitch (= pitch in pixels) by blk_w to get a
* pitch in elements instead because that's what the hardware needs
* in resource descriptors.
* See the comment in si_descriptors.c.
*/
surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w,
linear_alignment);
surf->u.gfx9.epitch =
MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);
surf->u.gfx9.epitch = surf->u.gfx9.surf_pitch - 1;
/* Adjust surf_slice_size and surf_size to reflect the change made to surf_pitch. */
surf->u.gfx9.surf_slice_size = (uint64_t)surf->u.gfx9.surf_pitch * out.height * surf->bpe;
surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;

View File

@@ -156,6 +156,7 @@
#define VK_GEOMETRY_TYPE_TRIANGLES_KHR 0
#define VK_GEOMETRY_TYPE_AABBS_KHR 1
#define VK_GEOMETRY_TYPE_INSTANCES_KHR 2
#define VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR 1
#define VK_GEOMETRY_INSTANCE_TRIANGLE_FLIP_FACING_BIT_KHR 2

View File

@@ -87,6 +87,14 @@ main(void)
is_active = build_instance(bounds, src_ptr, dst_ptr, global_id);
}
#if ALWAYS_ACTIVE
if (!is_active && args.geom_data.geometry_type != VK_GEOMETRY_TYPE_INSTANCES_KHR) {
bounds.min = vec3(0.0);
bounds.max = vec3(0.0);
is_active = true;
}
#endif
if (is_active) {
REF(radv_ir_node) ir_node = INDEX(radv_ir_node, args.ir, primitive_id);
DEREF(ir_node).aabb = bounds;

View File

@@ -53,7 +53,12 @@ bvh_shaders = [
[
'leaf.comp',
'leaf',
[],
['ALWAYS_ACTIVE=0'],
],
[
'leaf.comp',
'leaf_always_active',
['ALWAYS_ACTIVE=1'],
],
[
'morton.comp',

View File

@@ -41,6 +41,10 @@ static const uint32_t leaf_spv[] = {
#include "bvh/leaf.spv.h"
};
static const uint32_t leaf_always_active_spv[] = {
#include "bvh/leaf_always_active.spv.h"
};
static const uint32_t morton_spv[] = {
#include "bvh/morton.spv.h"
};
@@ -538,9 +542,14 @@ radv_device_init_accel_struct_build_state(struct radv_device *device)
if (device->meta_state.accel_struct_build.radix_sort)
goto exit;
result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
&device->meta_state.accel_struct_build.leaf_pipeline,
&device->meta_state.accel_struct_build.leaf_p_layout);
if (device->instance->drirc.force_active_accel_struct_leaves)
result = create_build_pipeline_spv(device, leaf_always_active_spv, sizeof(leaf_always_active_spv),
sizeof(struct leaf_args), &device->meta_state.accel_struct_build.leaf_pipeline,
&device->meta_state.accel_struct_build.leaf_p_layout);
else
result = create_build_pipeline_spv(device, leaf_spv, sizeof(leaf_spv), sizeof(struct leaf_args),
&device->meta_state.accel_struct_build.leaf_pipeline,
&device->meta_state.accel_struct_build.leaf_p_layout);
if (result != VK_SUCCESS)
goto exit;

View File

@@ -554,7 +554,7 @@ radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *
image_info->surf_index = NULL;
}
if (create_info->prime_blit_src && device->physical_device->rad_info.gfx_level == GFX9) {
if (create_info->prime_blit_src && !device->physical_device->rad_info.sdma_supports_compression) {
/* Older SDMA hw can't handle DCC */
image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
}

View File

@@ -160,6 +160,7 @@ static const driOptionDescription radv_dri_options[] = {
DRI_CONF_RADV_OVERRIDE_COMPUTE_SHADER_VERSION(0)
DRI_CONF_RADV_OVERRIDE_RAY_TRACING_SHADER_VERSION(0)
DRI_CONF_RADV_SSBO_NON_UNIFORM(false)
DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(false)
DRI_CONF_RADV_APP_LAYER()
DRI_CONF_SECTION_END
};
@@ -251,6 +252,9 @@ radv_init_dri_options(struct radv_instance *instance)
instance->drirc.vk_require_etc2 = driQueryOptionb(&instance->drirc.options, "vk_require_etc2");
instance->drirc.vk_require_astc = driQueryOptionb(&instance->drirc.options, "vk_require_astc");
instance->drirc.force_active_accel_struct_leaves =
driQueryOptionb(&instance->drirc.options, "radv_force_active_accel_struct_leaves");
}
static const struct vk_instance_extension_table radv_instance_extensions_supported = {

View File

@@ -732,6 +732,8 @@ compile_rt_prolog(struct radv_device *device, struct radv_ray_tracing_pipeline *
combine_config(config, &pipeline->base.base.shaders[MESA_SHADER_INTERSECTION]->config);
postprocess_rt_config(config, device->physical_device->rad_info.gfx_level, device->physical_device->rt_wave_size);
pipeline->prolog->max_waves = radv_get_max_waves(device, config, &pipeline->prolog->info);
}
static VkResult

View File

@@ -386,6 +386,7 @@ struct radv_instance {
bool report_llvm9_version_string;
bool vk_require_etc2;
bool vk_require_astc;
bool force_active_accel_struct_leaves;
char *app_layer;
uint8_t override_graphics_shader_version;
uint8_t override_compute_shader_version;

View File

@@ -2051,7 +2051,7 @@ radv_shader_upload(struct radv_device *device, struct radv_shader *shader, const
return true;
}
static unsigned
unsigned
radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
const struct radv_shader_info *info)
{

View File

@@ -803,6 +803,9 @@ struct radv_shader_part *radv_shader_part_cache_get(struct radv_device *device,
uint64_t radv_shader_get_va(const struct radv_shader *shader);
struct radv_shader *radv_find_shader(struct radv_device *device, uint64_t pc);
unsigned radv_get_max_waves(const struct radv_device *device, const struct ac_shader_config *conf,
const struct radv_shader_info *info);
unsigned radv_get_max_scratch_waves(const struct radv_device *device, struct radv_shader *shader);
const char *radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage);

View File

@@ -1606,12 +1606,6 @@ radv_cs_emit_cp_dma(struct radv_device *device, struct radeon_cmdbuf *cs, bool p
/* Sync flags. */
if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1);
else {
if (device->physical_device->rad_info.gfx_level >= GFX9)
command |= S_415_DISABLE_WR_CONFIRM_GFX9(1);
else
command |= S_415_DISABLE_WR_CONFIRM_GFX6(1);
}
if (flags & CP_DMA_RAW_WAIT)
command |= S_415_RAW_WAIT(1);

View File

@@ -745,7 +745,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
if (!validate_explicit_variable_location(consts,
output_explicit_locations,
var, prog, producer)) {
return;
goto out;
}
}
}
@@ -799,7 +799,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
if (!validate_explicit_variable_location(consts,
input_explicit_locations,
input, prog, consumer)) {
return;
goto out;
}
while (idx < slot_limit) {
@@ -807,7 +807,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
linker_error(prog,
"Invalid location %u in %s shader\n", idx,
_mesa_shader_stage_to_string(consumer->Stage));
return;
goto out;
}
output = output_explicit_locations[idx][input->data.location_frac].var;
@@ -870,6 +870,7 @@ gl_nir_cross_validate_outputs_to_inputs(const struct gl_constants *consts,
}
}
out:
_mesa_symbol_table_dtor(table);
}

View File

@@ -441,7 +441,7 @@ nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
} else {
alu1_actual_src = alu1->src[src1].src;
for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++)
for (unsigned i = 0; i < nir_src_num_components(alu1_actual_src); i++)
alu1_swizzle[i] = i;
}
@@ -458,7 +458,7 @@ nir_alu_srcs_negative_equal(const nir_alu_instr *alu1,
} else {
alu2_actual_src = alu2->src[src2].src;
for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu2, src2); i++)
for (unsigned i = 0; i < nir_src_num_components(alu2_actual_src); i++)
alu2_swizzle[i] = i;
}

View File

@@ -579,3 +579,95 @@ TEST_F(comparison_pre_test, non_scalar_add_result)
EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
}
TEST_F(comparison_pre_test, multi_comps_load)
{
/* Before:
*
* vec1 32 ssa_0 = load_ubo (...)
* vec4 32 ssa_1 = load_ubo (...)
* vec1 1 ssa_2 = flt ssa_0, ssa_1.w
*
* if ssa_2 {
* vec1 32 ssa_3 = fneg ssa_1.x
* vec1 32 ssa_4 = fadd ssa_0, ssa_3
* } else {
* }
*/
nir_def *ssa_0 = nir_load_ubo(&bld, 1, 32,
nir_imm_int(&bld, 0),
nir_imm_int(&bld, 0));
nir_def *ssa_1 = nir_load_ubo(&bld, 4, 32,
nir_imm_int(&bld, 1),
nir_imm_int(&bld, 0));
nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
flt->src[0].src = nir_src_for_ssa(ssa_0);
flt->src[1].src = nir_src_for_ssa(ssa_1);
memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
memcpy(&flt->src[1].swizzle, wwww, sizeof(wwww));
nir_builder_alu_instr_finish_and_insert(&bld, flt);
flt->def.num_components = 1;
nir_def *ssa_2 = &flt->def;
nir_if *nif = nir_push_if(&bld, ssa_2);
{
nir_alu_instr *fneg = nir_alu_instr_create(bld.shader, nir_op_fneg);
fneg->src[0].src = nir_src_for_ssa(ssa_1);
memcpy(&fneg->src[0].swizzle, xxxx, sizeof(xxxx));
nir_builder_alu_instr_finish_and_insert(&bld, fneg);
fneg->def.num_components = 1;
nir_def *ssa_3 = &fneg->def;
nir_fadd(&bld, ssa_0, ssa_3);
}
nir_pop_if(&bld, nif);
EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
}
TEST_F(comparison_pre_test, multi_comps_load2)
{
/* Before:
*
* vec1 32 ssa_0 = load_ubo (...)
* vec4 32 ssa_1 = load_ubo (...)
* vec1 1 ssa_2 = flt ssa_0, ssa_1.x
*
* if ssa_2 {
* vec1 32 ssa_3 = fneg ssa_1.w
* vec1 32 ssa_4 = fadd ssa_0, ssa_3
* } else {
* }
*/
nir_def *ssa_0 = nir_load_ubo(&bld, 1, 32,
nir_imm_int(&bld, 0),
nir_imm_int(&bld, 0));
nir_def *ssa_1 = nir_load_ubo(&bld, 4, 32,
nir_imm_int(&bld, 1),
nir_imm_int(&bld, 0));
nir_alu_instr *flt = nir_alu_instr_create(bld.shader, nir_op_flt);
flt->src[0].src = nir_src_for_ssa(ssa_0);
flt->src[1].src = nir_src_for_ssa(ssa_1);
memcpy(&flt->src[0].swizzle, xxxx, sizeof(xxxx));
memcpy(&flt->src[1].swizzle, xxxx, sizeof(xxxx));
nir_builder_alu_instr_finish_and_insert(&bld, flt);
flt->def.num_components = 1;
nir_def *ssa_2 = &flt->def;
nir_if *nif = nir_push_if(&bld, ssa_2);
{
nir_alu_instr *fneg = nir_alu_instr_create(bld.shader, nir_op_fneg);
fneg->src[0].src = nir_src_for_ssa(ssa_1);
memcpy(&fneg->src[0].swizzle, wwww, sizeof(wwww));
nir_builder_alu_instr_finish_and_insert(&bld, fneg);
fneg->def.num_components = 1;
nir_def *ssa_3 = &fneg->def;
nir_fadd(&bld, ssa_0, ssa_3);
}
nir_pop_if(&bld, nif);
EXPECT_FALSE(nir_opt_comparison_pre_impl(bld.impl));
}

View File

@@ -156,7 +156,7 @@ vtn_dump_shader(struct vtn_builder *b, const char *path, const char *prefix)
if (len < 0 || len >= sizeof(filename))
return;
FILE *f = fopen(filename, "w");
FILE *f = fopen(filename, "wb");
if (f == NULL)
return;

View File

@@ -458,6 +458,11 @@ gpu_supports_texture_format(struct etna_screen *screen, uint32_t fmt,
{
bool supported = true;
/* Requires split sampler support, which the driver doesn't support, yet. */
if (!util_format_is_compressed(format) &&
util_format_get_blocksizebits(format) > 32)
return false;
if (fmt == TEXTURE_FORMAT_ETC1)
supported = VIV_FEATURE(screen, chipFeatures, ETC1_TEXTURE_COMPRESSION);
@@ -500,6 +505,10 @@ gpu_supports_render_format(struct etna_screen *screen, enum pipe_format format,
if (fmt == ETNA_NO_MATCH)
return false;
/* Requires split target support, which the driver doesn't support, yet. */
if (util_format_get_blocksizebits(format) > 32)
return false;
if (sample_count > 1) {
/* Explicitly enabled. */
if (!DBG_ENABLED(ETNA_DBG_MSAA))

View File

@@ -368,8 +368,6 @@ shaders@glsl-bug-110796,Fail
shaders@glsl-fs-bug25902,Fail
shaders@glsl-fwidth,Fail
shaders@glsl-lod-bias,Fail
shaders@glsl-max-varyings,Fail
shaders@glsl-max-varyings >max_varying_components,Fail
shaders@glsl-orangebook-ch06-bump,Fail
shaders@glsl-uniform-interstage-limits@subdivide 5,Fail
shaders@glsl-uniform-interstage-limits@subdivide 5- statechanges,Fail
@@ -841,33 +839,6 @@ spec@glsl-1.10@execution@loops@glsl-vs-loop-300,Fail
spec@glsl-1.10@execution@variable-indexing@vs-output-array-vec2-index-wr-no-unroll,Fail
spec@glsl-1.10@execution@varying-packing@simple float array,Fail
spec@glsl-1.10@execution@varying-packing@simple float separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat2 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat2 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat2x3 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat2x3 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat2x4 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat2x4 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat3 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat3 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat3x2 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat3x2 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat3x4 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat3x4 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat4 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat4 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat4x2 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat4x2 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple mat4x3 array,Fail
spec@glsl-1.10@execution@varying-packing@simple mat4x3 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple vec2 array,Fail
spec@glsl-1.10@execution@varying-packing@simple vec2 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple vec3 array,Fail
spec@glsl-1.10@execution@varying-packing@simple vec3 separate,Fail
spec@glsl-1.10@execution@varying-packing@simple vec4 array,Fail
spec@glsl-1.10@execution@varying-packing@simple vec4 separate,Fail
spec@glsl-1.20@execution@clipping@vs-clip-vertex-const-accept,Fail
spec@glsl-1.20@execution@clipping@vs-clip-vertex-different-from-position,Fail
spec@glsl-1.20@execution@clipping@vs-clip-vertex-homogeneity,Fail

View File

@@ -65,11 +65,13 @@ void r300_shader_read_fs_inputs(struct tgsi_shader_info* info,
case TGSI_SEMANTIC_TEXCOORD:
assert(index < ATTR_TEXCOORD_COUNT);
fs_inputs->texcoord[index] = i;
fs_inputs->num_texcoord++;
break;
case TGSI_SEMANTIC_GENERIC:
assert(index < ATTR_GENERIC_COUNT);
fs_inputs->generic[index] = i;
fs_inputs->num_generic++;
break;
case TGSI_SEMANTIC_FOG:

View File

@@ -541,6 +541,14 @@ static void r300_update_rs_block(struct r300_context *r300)
}
}
for (; i < ATTR_GENERIC_COUNT; i++) {
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
"not enough hardware slots (it's not a bug, do not "
"report it).\n", i);
}
}
gen_offset = 0;
/* Re-use color varyings for texcoords if possible.
*
@@ -645,6 +653,14 @@ static void r300_update_rs_block(struct r300_context *r300)
}
}
for (; i < ATTR_TEXCOORD_COUNT; i++) {
if (fs_inputs->texcoord[i] != ATTR_UNUSED) {
fprintf(stderr, "r300: ERROR: FS input texcoord %i unassigned, "
"not enough hardware slots (it's not a bug, do not "
"report it).\n", i);
}
}
/* Rasterize pointcoord. */
if (fs_inputs->pcoord != ATTR_UNUSED && tex_count < 8) {
@@ -666,14 +682,6 @@ static void r300_update_rs_block(struct r300_context *r300)
tex_ptr += 2;
}
for (; i < ATTR_GENERIC_COUNT; i++) {
if (fs_inputs->generic[i] != ATTR_UNUSED) {
fprintf(stderr, "r300: ERROR: FS input generic %i unassigned, "
"not enough hardware slots (it's not a bug, do not "
"report it).\n", i);
}
}
/* Rasterize fog coordinates. */
if (vs_outputs->fog != ATTR_UNUSED && tex_count < 8) {
/* Set up the fog coordinates in VAP. */

View File

@@ -920,7 +920,8 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5);
/* ref_pic_list_modification() */
if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) {
if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR &&
enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) {
radeon_enc_code_fixed_bits(enc, 0x0, 1);
/* long-term reference */
@@ -962,6 +963,7 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
}
if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) &&
(enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) &&
(enc->enc_pic.spec_misc.cabac_enable))
radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);

View File

@@ -301,7 +301,8 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
radeon_enc_code_fixed_bits(enc, 0x1, 1); /* direct_spatial_mv_pred_flag */
/* ref_pic_list_modification() */
if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) {
if (enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR &&
enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) {
radeon_enc_code_fixed_bits(enc, 0x0, 1);
/* long-term reference */
@@ -347,6 +348,7 @@ static void radeon_enc_slice_header(struct radeon_encoder *enc)
}
if ((enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_IDR) &&
(enc->enc_pic.picture_type != PIPE_H2645_ENC_PICTURE_TYPE_I) &&
(enc->enc_pic.spec_misc.cabac_enable))
radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);

View File

@@ -382,17 +382,33 @@ void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture
state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch);
} else {
uint16_t epitch = tex->surface.u.gfx9.epitch;
if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM &&
block_width == 1) {
/* epitch is patched in ac_surface for sdma/vcn blocks to get
* a value expressed in elements unit.
* But here the texture is used with block_width == 1 so we
* need epitch in pixel units.
*/
epitch = (epitch + 1) / tex->surface.blk_w - 1;
}
state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
uint32_t hw_format = G_008F14_DATA_FORMAT(state[1]);
uint16_t epitch = tex->surface.u.gfx9.epitch;
/* epitch is surf_pitch - 1 and are in elements unit.
* For some reason I don't understand, when a packed YUV format
* like UYUV is used, we have to double epitch (making it a pixel
* pitch instead of an element pitch). Note that it's only done
* when sampling the texture using its native format; we don't
* need to do this when sampling it as UINT32 (as done by
* SI_IMAGE_ACCESS_BLOCK_FORMAT_AS_UINT).
* This looks broken, so it's possible that surf_pitch / epitch
* are computed incorrectly, but that's the only way I found
* to get these use cases to work properly:
* - yuyv dmabuf import (#6131)
* - jpeg vaapi decode
* - yuyv texture sampling (!26947)
* - jpeg vaapi get image (#10375)
*/
if ((tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM ||
tex->buffer.b.b.format == PIPE_FORMAT_G8R8_B8R8_UNORM) &&
(hw_format == V_008F14_IMG_DATA_FORMAT_GB_GR ||
hw_format == V_008F14_IMG_DATA_FORMAT_BG_RG)) {
epitch = (epitch + 1) * 2 - 1;
}
state[4] |= S_008F20_PITCH(epitch);
}

View File

@@ -4909,7 +4909,7 @@ fixup_io_locations(nir_shader *nir)
if (var->data.location == VARYING_SLOT_VAR0)
var->data.driver_location = 0;
else if (var->data.patch)
var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
else
var->data.driver_location = var->data.location;
}
@@ -4936,7 +4936,7 @@ fixup_io_locations(nir_shader *nir)
size += glsl_count_vec4_slots(var->type, false, false);
}
if (var->data.patch)
var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
else
var->data.driver_location = slot;
found = true;

View File

@@ -2404,7 +2404,7 @@ lvp_nv_dgc_token_to_cmd_type(const VkIndirectCommandsLayoutTokenNV *token)
assert(!"unknown token type!");
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
return VK_CMD_PUSH_CONSTANTS;
return VK_CMD_PUSH_CONSTANTS2_KHR;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:
return VK_CMD_BIND_INDEX_BUFFER;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_VERTEX_BUFFER_NV:
@@ -2447,7 +2447,7 @@ VKAPI_ATTR void VKAPI_CALL lvp_GetGeneratedCommandsMemoryRequirementsNV(
size += sizeof(*cmd->u.bind_vertex_buffers2.sizes) + sizeof(*cmd->u.bind_vertex_buffers2.strides);
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV:
size += token->pushconstantSize;
size += token->pushconstantSize + sizeof(VkPushConstantsInfoKHR);
break;
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_SHADER_GROUP_NV:
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV:

View File

@@ -3841,15 +3841,17 @@ process_sequence(struct rendering_state *state,
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_PUSH_CONSTANT_NV: {
uint32_t *data = input;
cmd_size += token->pushconstantSize;
cmd_size += token->pushconstantSize + sizeof(VkPushConstantsInfoKHR);
if (max_size < size + cmd_size)
abort();
cmd->u.push_constants.layout = token->pushconstantPipelineLayout;
cmd->u.push_constants.stage_flags = token->pushconstantShaderStageFlags;
cmd->u.push_constants.offset = token->pushconstantOffset;
cmd->u.push_constants.size = token->pushconstantSize;
cmd->u.push_constants.values = (void*)cmdptr;
memcpy(cmd->u.push_constants.values, data, token->pushconstantSize);
cmd->u.push_constants2_khr.push_constants_info = (void*)cmdptr;
VkPushConstantsInfoKHR *pci = cmd->u.push_constants2_khr.push_constants_info;
pci->layout = token->pushconstantPipelineLayout;
pci->stageFlags = token->pushconstantShaderStageFlags;
pci->offset = token->pushconstantOffset;
pci->size = token->pushconstantSize;
pci->pValues = (void*)((uint8_t*)cmdptr + sizeof(VkPushConstantsInfoKHR));
memcpy((void*)pci->pValues, data, token->pushconstantSize);
break;
}
case VK_INDIRECT_COMMANDS_TOKEN_TYPE_INDEX_BUFFER_NV: {

View File

@@ -317,7 +317,7 @@ where
res
}
fn opt_nir(nir: &mut NirShader, dev: &Device) {
fn opt_nir(nir: &mut NirShader, dev: &Device, has_explicit_types: bool) {
let nir_options = unsafe {
&*dev
.screen
@@ -342,7 +342,9 @@ fn opt_nir(nir: &mut NirShader, dev: &Device) {
}
progress |= nir_pass!(nir, nir_opt_deref);
progress |= nir_pass!(nir, nir_opt_memcpy);
if has_explicit_types {
progress |= nir_pass!(nir, nir_opt_memcpy);
}
progress |= nir_pass!(nir, nir_opt_dce);
progress |= nir_pass!(nir, nir_opt_undef);
progress |= nir_pass!(nir, nir_opt_constant_folding);
@@ -451,11 +453,10 @@ fn lower_and_optimize_nir(
printf_opts.max_buffer_size = dev.printf_buffer_size() as u32;
nir_pass!(nir, nir_lower_printf, &printf_opts);
opt_nir(nir, dev);
opt_nir(nir, dev, false);
let mut args = KernelArg::from_spirv_nir(args, nir);
let mut internal_args = Vec::new();
nir_pass!(nir, nir_lower_memcpy);
let dv_opts = nir_remove_dead_variables_options {
can_remove_var: Some(can_remove_var),
@@ -626,7 +627,8 @@ fn lower_and_optimize_nir(
Some(glsl_get_cl_type_size_align),
);
opt_nir(nir, dev);
opt_nir(nir, dev, true);
nir_pass!(nir, nir_lower_memcpy);
nir_pass!(
nir,
@@ -655,7 +657,7 @@ fn lower_and_optimize_nir(
nir_pass!(nir, nir_lower_convert_alu_types, None);
opt_nir(nir, dev);
opt_nir(nir, dev, true);
/* before passing it into drivers, assign locations as drivers might remove nir_variables or
* other things we depend on

View File

@@ -50,7 +50,7 @@ _dev_icd = custom_target(
command : [
prog_python, '@INPUT0@',
'--api-version', '1.1', '--xml', '@INPUT1@',
'--lib-path', meson.current_build_dir() / 'libvulkan_lvp.so',
'--lib-path', meson.current_build_dir() / icd_file_name,
'--out', '@OUTPUT@',
],
build_by_default : true,

View File

@@ -728,7 +728,7 @@ intel_aux_map_add_mapping(struct intel_aux_map_context *ctx, uint64_t main_addre
if (!success && (main_inc_addr - main_address) > 0) {
/* If the mapping failed, remove the mapped portion. */
remove_mapping_locked(ctx, main_address,
main_size_B - (main_inc_addr - main_address),
main_inc_addr - main_address,
false /* reset_refcount */, &state_changed);
}
pthread_mutex_unlock(&ctx->mutex);

View File

@@ -1055,8 +1055,7 @@ static int
dest_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
const brw_inst *inst)
{
uint32_t reg_file =
reg_file = brw_inst_dpas_3src_dst_reg_file(devinfo, inst);
uint32_t reg_file = brw_inst_dpas_3src_dst_reg_file(devinfo, inst);
if (reg(file, reg_file, brw_inst_dpas_3src_dst_reg_nr(devinfo, inst)) == -1)
return 0;
@@ -1551,8 +1550,7 @@ static int
src0_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
const brw_inst *inst)
{
uint32_t reg_file =
reg_file = brw_inst_dpas_3src_src0_reg_file(devinfo, inst);
uint32_t reg_file = brw_inst_dpas_3src_src0_reg_file(devinfo, inst);
if (reg(file, reg_file, brw_inst_dpas_3src_src0_reg_nr(devinfo, inst)) == -1)
return 0;
@@ -1573,8 +1571,7 @@ static int
src1_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
const brw_inst *inst)
{
uint32_t reg_file =
reg_file = brw_inst_dpas_3src_src1_reg_file(devinfo, inst);
uint32_t reg_file = brw_inst_dpas_3src_src1_reg_file(devinfo, inst);
if (reg(file, reg_file, brw_inst_dpas_3src_src1_reg_nr(devinfo, inst)) == -1)
return 0;
@@ -1595,8 +1592,7 @@ static int
src2_dpas_3src(FILE *file, const struct intel_device_info *devinfo,
const brw_inst *inst)
{
uint32_t reg_file =
reg_file = brw_inst_dpas_3src_src2_reg_file(devinfo, inst);
uint32_t reg_file = brw_inst_dpas_3src_src2_reg_file(devinfo, inst);
if (reg(file, reg_file, brw_inst_dpas_3src_src2_reg_nr(devinfo, inst)) == -1)
return 0;

View File

@@ -5116,13 +5116,15 @@ const struct intel_device_info_pat_entry *
anv_device_get_pat_entry(struct anv_device *device,
enum anv_bo_alloc_flags alloc_flags)
{
if (alloc_flags & ANV_BO_ALLOC_IMPORTED)
return &device->info->pat.cached_coherent;
/* PAT indexes has no actual effect in DG2 and DG1, smem caches will always
* be snopped by GPU and lmem will always be WC.
* This might change in future discrete platforms.
*/
if (anv_physical_device_has_vram(device->physical)) {
if ((alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM) ||
(alloc_flags & ANV_BO_ALLOC_IMPORTED))
if (alloc_flags & ANV_BO_ALLOC_NO_LOCAL_MEM)
return &device->info->pat.cached_coherent;
return &device->info->pat.writecombining;
}

View File

@@ -1479,6 +1479,7 @@ struct anv_gfx_dynamic_state {
bool RenderingDisable;
uint32_t RenderStreamSelect;
uint32_t ReorderMode;
uint32_t ForceRendering;
} so;
/* 3DSTATE_SAMPLE_MASK */

View File

@@ -496,8 +496,8 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
#if INTEL_NEEDS_WA_14017076903
/* Wa_14017076903 :
#if INTEL_NEEDS_WA_18022508906
/* Wa_18022508906 :
*
* SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
*
@@ -525,8 +525,9 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
* Here we force rendering to get SOL_INT::Render_Enable when occlusion
* queries are active.
*/
if (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0)
SET(STREAMOUT, so.ForceRendering, Force_on);
SET(STREAMOUT, so.ForceRendering,
(!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
Force_on : 0);
#endif
switch (dyn->rs.provoking_vertex) {
@@ -1526,6 +1527,7 @@ cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
SET(so, so, RenderingDisable);
SET(so, so, RenderStreamSelect);
SET(so, so, ReorderMode);
SET(so, so, ForceRendering);
}
}

View File

@@ -272,7 +272,7 @@ genX(emit_so_memcpy_fini)(struct anv_memcpy_state *state)
void
genX(emit_so_memcpy_end)(struct anv_memcpy_state *state)
{
if (intel_device_info_is_dg2(state->device->info))
if (intel_needs_workaround(state->device->info, 16013994831))
genX(batch_set_preemption)(state->batch, state->device->info, _3D, true);
anv_batch_emit(state->batch, GENX(MI_BATCH_BUFFER_END), end);

View File

@@ -817,7 +817,8 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
return;
}
if (t->Image[face][baseLevel]->InternalFormat !=
baseImage->InternalFormat) {
baseImage->InternalFormat ||
t->Image[face][baseLevel]->TexFormat != baseImage->TexFormat) {
incomplete(t, BASE, "Cube face format mismatch");
return;
}
@@ -876,7 +877,8 @@ _mesa_test_texobj_completeness( const struct gl_context *ctx,
incomplete(t, MIPMAP, "TexImage[%d] is missing", i);
return;
}
if (img->InternalFormat != baseImage->InternalFormat) {
if (img->InternalFormat != baseImage->InternalFormat ||
img->TexFormat != baseImage->TexFormat) {
incomplete(t, MIPMAP, "Format[i] != Format[baseLevel]");
return;
}

View File

@@ -446,36 +446,7 @@ dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.float32,Crash
dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.int32,Crash
dEQP-VK.spirv_assembly.instruction.compute.workgroup_memory.uint32,Crash
dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_uint_pack32,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.a2b10g10r10_unorm_pack32,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_sint_pack32,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_uint_pack32,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.a8b8g8r8_unorm_pack32,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sfloat,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_sint,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.r16g16b16a16_uint,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_sint,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_uint,Fail
dEQP-VK.api.buffer_view.access.storage_texel_buffer.r8g8b8a8_unorm,Fail
dEQP-VK.api.command_buffers.record_many_draws_secondary_2,Fail
dEQP-VK.binding_model.descriptor_copy.compute.mix_1,Fail
dEQP-VK.binding_model.descriptor_copy.compute.mix_array0,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_0,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_1,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_2,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_4,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_5,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array0,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array1,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_image_array2,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_0,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_1,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_2,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_4,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_5,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array0,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array1,Fail
dEQP-VK.binding_model.descriptor_copy.compute.storage_texel_buffer_array2,Fail
dEQP-VK.glsl.operator.sequence.no_side_effects.highp_bool_vec2_fragment,Fail
dEQP-VK.glsl.operator.sequence.no_side_effects.highp_float_uint_fragment,Fail
dEQP-VK.glsl.operator.sequence.no_side_effects.highp_vec4_ivec4_bvec4_fragment,Fail
@@ -488,27 +459,6 @@ dEQP-VK.glsl.operator.sequence.side_effects.highp_vec4_fragment,Fail
dEQP-VK.glsl.operator.sequence.side_effects.mediump_bool_vec2_fragment,Fail
dEQP-VK.glsl.operator.sequence.side_effects.mediump_float_uint_fragment,Fail
dEQP-VK.glsl.operator.sequence.side_effects.mediump_vec4_fragment,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_storage_read.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.a2b10g10r10_unorm_pack32.oob_uniform_read.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_storage_read,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.out_of_alloc.oob_uniform_read,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_read.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_storage_write.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sfloat.oob_uniform_read.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_read.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_storage_write.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_sint.oob_uniform_read.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_read.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_storage_write.range_3_texels,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_1_texel,Fail
dEQP-VK.robustness.buffer_access.compute.texel_copy.r32g32b32a32_uint.oob_uniform_read.range_3_texels,Fail
dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_linear,Fail
dEQP-VK.texture.explicit_lod.2d.derivatives.linear_linear_mipmap_nearest,Fail
dEQP-VK.texture.explicit_lod.2d.derivatives.linear_nearest_mipmap_linear,Fail

View File

@@ -398,7 +398,6 @@ struct panvk_pipeline_layout {
unsigned num_dyn_ubos;
unsigned num_dyn_ssbos;
uint32_t num_imgs;
uint32_t num_sets;
struct {
uint32_t size;

View File

@@ -514,7 +514,7 @@ panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf,
struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state;
const struct panvk_pipeline *pipeline = bind_point_state->pipeline;
for (unsigned s = 0; s < pipeline->layout->num_sets; s++) {
for (unsigned s = 0; s < pipeline->layout->vk.set_count; s++) {
const struct panvk_descriptor_set *set = desc_state->sets[s];
if (!set)

View File

@@ -121,7 +121,7 @@ panvk_queue_submit_batch(struct panvk_queue *queue, struct panvk_batch *batch,
}
if (debug & PANVK_DEBUG_TRACE)
pandecode_next_frame(0);
pandecode_next_frame(pdev->decode_ctx);
batch->issued = true;
}

View File

@@ -1199,6 +1199,9 @@ TODO: document the other workarounds.
<application name="Baldur's Gate 3" executable="bg3.exe">
<option name="anv_disable_fcv" value="true" />
</application>
<application name="The Finals" executable="Discovery.exe">
<option name="force_vk_vendor" value="-1" />
</application>
<!--
Disable 16-bit feature on zink and angle so that GLES mediump doesn't
lower to our inefficent 16-bit shader support. No need to do so for

View File

@@ -106,6 +106,7 @@ Application bugs worked around in this file:
<application name="DOOM Eternal" application_name_match="DOOMEternal">
<option name="radv_zero_vram" value="true" />
<option name="radv_legacy_sparse_binding" value="true" />
<option name="radv_force_active_accel_struct_leaves" value="true" />
</application>
<application name="No Man's Sky" application_name_match="No Man's Sky">

View File

@@ -97,6 +97,10 @@
#define DETECT_ARCH_MIPS 1
#endif
#if defined(__hppa__)
#define DETECT_ARCH_HPPA 1
#endif
#ifndef DETECT_ARCH_X86
#define DETECT_ARCH_X86 0
#endif
@@ -137,4 +141,8 @@
#define DETECT_ARCH_MIPS 0
#endif
#ifndef DETECT_ARCH_HPPA
#define DETECT_ARCH_HPPA 0
#endif
#endif /* UTIL_DETECT_ARCH_H_ */

View File

@@ -716,6 +716,10 @@
#define DRI_CONF_RADV_CLEAR_LDS(def) \
DRI_CONF_OPT_B(radv_clear_lds, def, "Clear LDS at the end of shaders. Might decrease performance.")
#define DRI_CONF_RADV_FORCE_ACTIVE_ACCEL_STRUCT_LEAVES(def) \
DRI_CONF_OPT_B(radv_force_active_accel_struct_leaves, def, \
"Force leaf nodes of acceleration structures to be marked active.")
/**
* \brief ANV specific configuration options
*/

View File

@@ -46,18 +46,35 @@ static bool issignaling(float x)
}
#endif
/* Sanity test our test values */
TEST(half_to_float_test, nan_test)
/* The sign of the bit for signaling is different on some old processors
* (PA-RISC, old MIPS without IEEE-754-2008 support).
*
* Disable the tests on those platforms, because it's not clear how to
* correctly handle NaNs when the CPU and GPU differ in their convention.
*/
#if DETECT_ARCH_HPPA || ((DETECT_ARCH_MIPS || DETECT_ARCH_MIPS64) && !defined __mips_nan2008)
#define IEEE754_2008_NAN 0
#else
#define IEEE754_2008_NAN 1
#endif
/* Sanity test our inf test values */
TEST(half_to_float_test, inf_test)
{
EXPECT_TRUE(isinf(TEST_POS_INF));
EXPECT_TRUE(isinf(TEST_NEG_INF));
}
/* Make sure that our 32-bit float nan test value we're using is a
* non-signaling NaN.
*/
#if IEEE754_2008_NAN
TEST(half_to_float_test, nan_test)
#else
TEST(half_to_float_test, DISABLED_nan_test)
#endif
{
EXPECT_TRUE(isnan(TEST_NAN));
/* Make sure that our 32-bit float nan test value we're using is a
* non-signaling NaN. The sign of the bit for signaling was apparently
* different on some old processors (PA-RISC, MIPS?). This test value should
* cover Intel, ARM, and PPC, for sure.
*/
EXPECT_FALSE(issignaling(TEST_NAN));
}
@@ -82,12 +99,20 @@ test_half_to_float_limits(float (*func)(uint16_t))
}
/* Test the optionally HW instruction-using path. */
#if IEEE754_2008_NAN
TEST(half_to_float_test, half_to_float_test)
#else
TEST(half_to_float_test, DISABLED_half_to_float_test)
#endif
{
test_half_to_float_limits(_mesa_half_to_float);
}
#if IEEE754_2008_NAN
TEST(half_to_float_test, half_to_float_slow_test)
#else
TEST(half_to_float_test, DISABLED_half_to_float_slow_test)
#endif
{
test_half_to_float_limits(_mesa_half_to_float_slow);
}

View File

@@ -633,7 +633,6 @@ vn_ring_submit_command(struct vn_ring *ring,
vn_cs_encoder_commit(&submit->command);
size_t reply_offset = 0;
submit->reply_shmem = NULL;
if (submit->reply_size) {
submit->reply_shmem = vn_instance_reply_shmem_alloc(
ring->instance, submit->reply_size, &reply_offset);
@@ -653,11 +652,16 @@ vn_ring_submit_command(struct vn_ring *ring,
mtx_unlock(&ring->mutex);
if (submit->reply_size) {
void *reply_ptr = submit->reply_shmem->mmap_ptr + reply_offset;
submit->reply =
VN_CS_DECODER_INITIALIZER(reply_ptr, submit->reply_size);
if (submit->ring_seqno_valid)
if (likely(submit->ring_seqno_valid)) {
void *reply_ptr = submit->reply_shmem->mmap_ptr + reply_offset;
submit->reply =
VN_CS_DECODER_INITIALIZER(reply_ptr, submit->reply_size);
vn_ring_wait_seqno(ring, submit->ring_seqno);
} else {
vn_renderer_shmem_unref(ring->instance->renderer,
submit->reply_shmem);
submit->reply_shmem = NULL;
}
}
}

View File

@@ -77,7 +77,7 @@ struct vn_ring_submit_command {
struct vn_renderer_shmem *reply_shmem;
struct vn_cs_decoder reply;
/* valid when instance ring submission succeeds */
/* valid when ring submission succeeds */
bool ring_seqno_valid;
uint32_t ring_seqno;
};
@@ -95,6 +95,8 @@ vn_ring_submit_command_init(struct vn_ring *ring,
submit->reply_size = reply_size;
submit->reply_shmem = NULL;
submit->ring_seqno_valid = false;
return &submit->command;
}

View File

@@ -200,6 +200,10 @@ VkShaderStageFlags
vk_shader_stages_from_bind_point(VkPipelineBindPoint pipelineBindPoint)
{
switch (pipelineBindPoint) {
#ifdef VK_ENABLE_BETA_EXTENSIONS
case VK_PIPELINE_BIND_POINT_EXECUTION_GRAPH_AMDX:
return VK_SHADER_STAGE_COMPUTE_BIT | MESA_VK_SHADER_STAGE_WORKGRAPH_HACK_BIT_FIXME;
#endif
case VK_PIPELINE_BIND_POINT_COMPUTE:
return VK_SHADER_STAGE_COMPUTE_BIT;
case VK_PIPELINE_BIND_POINT_GRAPHICS:

View File

@@ -98,6 +98,8 @@ enum mesa_vk_command_buffer_state {
MESA_VK_COMMAND_BUFFER_STATE_PENDING,
};
/* this needs spec fixes */
#define MESA_VK_SHADER_STAGE_WORKGRAPH_HACK_BIT_FIXME (1<<30)
VkShaderStageFlags vk_shader_stages_from_bind_point(VkPipelineBindPoint pipelineBindPoint);
struct vk_command_buffer {

View File

@@ -2262,7 +2262,8 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
*/
if (wsi_wl_surface->chain &&
wsi_swapchain_to_handle(&wsi_wl_surface->chain->base) != pCreateInfo->oldSwapchain) {
return VK_ERROR_NATIVE_WINDOW_IN_USE_KHR;
result = VK_ERROR_NATIVE_WINDOW_IN_USE_KHR;
goto fail;
}
if (pCreateInfo->oldSwapchain) {
VK_FROM_HANDLE(wsi_wl_swapchain, old_chain, pCreateInfo->oldSwapchain);
@@ -2376,16 +2377,20 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
uint64_t *drm_modifiers_copy =
vk_alloc(pAllocator, sizeof(*drm_modifiers) * num_drm_modifiers, 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
if (!drm_modifiers_copy)
goto fail;
if (!drm_modifiers_copy) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail_free_wl_chain;
}
typed_memcpy(drm_modifiers_copy, drm_modifiers, num_drm_modifiers);
chain->drm_modifiers = drm_modifiers_copy;
}
if (chain->wsi_wl_surface->display->wp_presentation_notwrapped) {
if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced))
goto fail;
if (!wsi_init_pthread_cond_monotonic(&chain->present_ids.list_advanced)) {
result = VK_ERROR_OUT_OF_HOST_MEMORY;
goto fail_free_wl_chain;
}
pthread_mutex_init(&chain->present_ids.lock, NULL);
wl_list_init(&chain->present_ids.outstanding_list);
@@ -2403,7 +2408,7 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
result = wsi_wl_image_init(chain, &chain->images[i],
pCreateInfo, pAllocator);
if (result != VK_SUCCESS)
goto fail_image_init;
goto fail_free_wl_images;
chain->images[i].busy = false;
}
@@ -2411,14 +2416,15 @@ wsi_wl_surface_create_swapchain(VkIcdSurfaceBase *icd_surface,
return VK_SUCCESS;
fail_image_init:
fail_free_wl_images:
wsi_wl_swapchain_images_free(chain);
fail_free_wl_chain:
wsi_wl_swapchain_chain_free(chain, pAllocator);
fail:
vk_free(pAllocator, chain);
wsi_wl_surface->chain = NULL;
assert(result != VK_SUCCESS);
return result;
}