Compare commits
49 Commits
mesa-25.0.
...
mesa-25.0.
Author | SHA1 | Date | |
---|---|---|---|
|
4fa244fddf | ||
|
45be2424ec | ||
|
e4831adc20 | ||
|
ef610a0d25 | ||
|
52e3f30992 | ||
|
f18483d265 | ||
|
b90c99c3dc | ||
|
1a911f3d75 | ||
|
1d6206a82c | ||
|
6482efdaba | ||
|
b948e3f3a6 | ||
|
3445cf4f96 | ||
|
8154790767 | ||
|
a026515817 | ||
|
ceaf6b2231 | ||
|
9b60c38646 | ||
|
f5bace5bf6 | ||
|
5c65587861 | ||
|
d8ffce96d2 | ||
|
3194cae6d0 | ||
|
19e2eed688 | ||
|
70bb670e9f | ||
|
27b7056835 | ||
|
961a3fc760 | ||
|
0cef98b71a | ||
|
b6ffd0cd80 | ||
|
dc633a3560 | ||
|
05f1528235 | ||
|
eae4213ccb | ||
|
a21604ce78 | ||
|
8ef1017e36 | ||
|
af2b8d745f | ||
|
81fe589ccb | ||
|
e1f713bf63 | ||
|
3106363a95 | ||
|
85f4342382 | ||
|
7e54da043a | ||
|
b0a094edfa | ||
|
e074dcbbbb | ||
|
3a9d9099d4 | ||
|
5f2343889d | ||
|
e2232c0be4 | ||
|
399de9dd00 | ||
|
b0891768d5 | ||
|
df3ad61978 | ||
|
90e72c54d8 | ||
|
b01077c27a | ||
|
58540dd004 | ||
|
7445240551 |
@@ -72,6 +72,8 @@
|
||||
optional: true
|
||||
- job: debian-testing-asan
|
||||
optional: true
|
||||
- job: debian-testing-ubsan
|
||||
optional: true
|
||||
- job: debian-build-testing
|
||||
optional: true
|
||||
- job: debian-arm32
|
||||
|
@@ -64,7 +64,8 @@ yaml-toml-shell-py-test:
|
||||
- !reference [.disable-farm-mr-rules, rules]
|
||||
- !reference [.never-post-merge-rules, rules]
|
||||
- !reference [.no_scheduled_pipelines-rules, rules]
|
||||
- if: $GITLAB_USER_LOGIN == "marge-bot"
|
||||
# merge pipeline
|
||||
- if: $GITLAB_USER_LOGIN == "marge-bot" && $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||
changes: &lint_files
|
||||
- .gitlab-ci/test/gitlab-ci.yml
|
||||
- .gitlab-ci/**/*.sh
|
||||
@@ -74,6 +75,14 @@ yaml-toml-shell-py-test:
|
||||
- .gitlab-ci/tests/**/*
|
||||
- bin/ci/**/*
|
||||
when: on_success
|
||||
# direct pushes that bypassed the CI
|
||||
- if: $CI_PROJECT_NAMESPACE == "mesa" && $CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_REF_NAME == $CI_DEFAULT_BRANCH
|
||||
changes: *lint_files
|
||||
when: on_success
|
||||
# direct pushes from release manager
|
||||
- if: $CI_PROJECT_NAMESPACE == "mesa" && $CI_PIPELINE_SOURCE == "push" && $CI_COMMIT_REF_NAME =~ /^staging\//
|
||||
changes: *lint_files
|
||||
when: on_success
|
||||
- changes: *lint_files
|
||||
when: manual
|
||||
tags:
|
||||
|
3112
.pick_status.json
3112
.pick_status.json
File diff suppressed because it is too large
Load Diff
@@ -3,6 +3,7 @@ Release Notes
|
||||
|
||||
The release notes summarize what's new or changed in each Mesa release.
|
||||
|
||||
- :doc:`25.0.0 release notes <relnotes/25.0.0>`
|
||||
- :doc:`24.3.4 release notes <relnotes/24.3.4>`
|
||||
- :doc:`24.3.3 release notes <relnotes/24.3.3>`
|
||||
- :doc:`24.3.2 release notes <relnotes/24.3.2>`
|
||||
@@ -442,6 +443,7 @@ The release notes summarize what's new or changed in each Mesa release.
|
||||
:maxdepth: 1
|
||||
:hidden:
|
||||
|
||||
25.0.0 <relnotes/25.0.0>
|
||||
24.3.4 <relnotes/24.3.4>
|
||||
24.3.3 <relnotes/24.3.3>
|
||||
24.3.2 <relnotes/24.3.2>
|
||||
|
4609
docs/relnotes/25.0.0.rst
Normal file
4609
docs/relnotes/25.0.0.rst
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,40 +0,0 @@
|
||||
cl_khr_depth_images in rusticl
|
||||
Vulkan 1.4 on radv/gfx8+
|
||||
VK_KHR_dedicated_allocation on panvk
|
||||
VK_KHR_global_priority on panvk
|
||||
VK_KHR_index_type_uint8 on panvk
|
||||
VK_KHR_map_memory2 on panvk
|
||||
VK_KHR_multiview on panvk/v10+
|
||||
VK_KHR_shader_non_semantic_info on panvk
|
||||
VK_KHR_shader_relaxed_extended_instruction on panvk
|
||||
VK_KHR_vertex_attribute_divisor on panvk
|
||||
VK_KHR_zero_initialize_workgroup_memory on panvk
|
||||
VK_KHR_shader_draw_parameters on panvk
|
||||
VK_KHR_shader_float16_int8 on panvk
|
||||
VK_KHR_8bit_storage on panvk
|
||||
VK_EXT_4444_formats on panvk
|
||||
VK_EXT_global_priority on panvk
|
||||
VK_EXT_global_priority_query on panvk
|
||||
VK_EXT_host_query_reset on panvk
|
||||
VK_EXT_image_robustness on panvk
|
||||
VK_EXT_pipeline_robustness on panvk
|
||||
VK_EXT_provoking_vertex on panvk
|
||||
VK_EXT_queue_family_foreign on panvk
|
||||
VK_EXT_sampler_filter_minmax on panvk
|
||||
VK_EXT_scalar_block_layout on panvk
|
||||
VK_EXT_tooling_info on panvk
|
||||
depthClamp on panvk
|
||||
depthBiasClamp on panvk
|
||||
drawIndirectFirstInstance on panvk
|
||||
fragmentStoresAndAtomics on panvk/v10+
|
||||
sampleRateShading on panvk
|
||||
occlusionQueryPrecise on panvk
|
||||
shaderInt16 on panvk
|
||||
shaderInt64 on panvk
|
||||
imageCubeArray on panvk
|
||||
VK_KHR_depth_clamp_zero_one on RADV
|
||||
VK_KHR_maintenance8 on radv
|
||||
VK_KHR_shader_subgroup_rotate on panvk/v10+
|
||||
Vulkan 1.1 on panvk/v10+
|
||||
VK_EXT_subgroup_size_control on panvk/v10+
|
||||
initial GFX12 (RDNA4) support on RADV
|
@@ -652,13 +652,17 @@ struct drm_amdgpu_gem_userptr {
|
||||
/* GFX12 and later: */
|
||||
#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_SHIFT 0
|
||||
#define AMDGPU_TILING_GFX12_SWIZZLE_MODE_MASK 0x7
|
||||
/* These are DCC recompression setting for memory management: */
|
||||
/* These are DCC recompression settings for memory management: */
|
||||
#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_SHIFT 3
|
||||
#define AMDGPU_TILING_GFX12_DCC_MAX_COMPRESSED_BLOCK_MASK 0x3 /* 0:64B, 1:128B, 2:256B */
|
||||
#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_SHIFT 5
|
||||
#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7 /* CB_COLOR0_INFO.NUMBER_TYPE */
|
||||
#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8
|
||||
#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */
|
||||
/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata
|
||||
* to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */
|
||||
#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14
|
||||
#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1
|
||||
/* bit gap */
|
||||
#define AMDGPU_TILING_GFX12_SCANOUT_SHIFT 63
|
||||
#define AMDGPU_TILING_GFX12_SCANOUT_MASK 0x1
|
||||
|
@@ -1450,6 +1450,11 @@ bool ac_query_gpu_info(int fd, void *dev_p, struct radeon_info *info,
|
||||
*/
|
||||
info->gfx12_supports_display_dcc = info->gfx_level >= GFX12 && info->drm_minor >= 58;
|
||||
|
||||
/* AMDGPU always enables DCC compressed writes when a BO is moved back to
|
||||
* VRAM until .60.
|
||||
*/
|
||||
info->gfx12_supports_dcc_write_compress_disable = info->gfx_level >= GFX12 && info->drm_minor >= 60;
|
||||
|
||||
info->has_stable_pstate = info->drm_minor >= 45;
|
||||
|
||||
if (info->gfx_level >= GFX12) {
|
||||
|
@@ -161,6 +161,7 @@ struct radeon_info {
|
||||
/* Allocate both aligned and unaligned DCC and use the retile blit. */
|
||||
bool use_display_dcc_with_retile_blit;
|
||||
bool gfx12_supports_display_dcc;
|
||||
bool gfx12_supports_dcc_write_compress_disable;
|
||||
|
||||
/* Memory info. */
|
||||
uint32_t pte_fragment_size;
|
||||
|
@@ -65,6 +65,10 @@
|
||||
#define AMDGPU_TILING_GFX12_DCC_NUMBER_TYPE_MASK 0x7
|
||||
#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_SHIFT 8
|
||||
#define AMDGPU_TILING_GFX12_DCC_DATA_FORMAT_MASK 0x3f
|
||||
/* When clearing the buffer or moving it from VRAM to GTT, don't compress and set DCC metadata
|
||||
* to uncompressed. Set when parts of an allocation bypass DCC and read raw data. */
|
||||
#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_SHIFT 14
|
||||
#define AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE_MASK 0x1
|
||||
#define AMDGPU_TILING_SET(field, value) \
|
||||
(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
|
||||
#define AMDGPU_TILING_GET(value, field) \
|
||||
@@ -3517,6 +3521,8 @@ void ac_surface_apply_bo_metadata(enum amd_gfx_level gfx_level, struct radeon_su
|
||||
AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
|
||||
surf->u.gfx9.color.dcc_number_type =
|
||||
AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
|
||||
surf->u.gfx9.color.dcc_write_compress_disable =
|
||||
AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
|
||||
scanout = AMDGPU_TILING_GET(tiling_flags, GFX12_SCANOUT);
|
||||
} else if (gfx_level >= GFX9) {
|
||||
surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
|
||||
@@ -3564,6 +3570,7 @@ void ac_surface_compute_bo_metadata(const struct radeon_info *info, struct radeo
|
||||
surf->u.gfx9.color.dcc.max_compressed_block_size);
|
||||
*tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_NUMBER_TYPE, surf->u.gfx9.color.dcc_number_type);
|
||||
*tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_DATA_FORMAT, surf->u.gfx9.color.dcc_data_format);
|
||||
*tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_WRITE_COMPRESS_DISABLE, surf->u.gfx9.color.dcc_write_compress_disable);
|
||||
*tiling_flags |= AMDGPU_TILING_SET(GFX12_SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
|
||||
} else if (info->gfx_level >= GFX9) {
|
||||
uint64_t dcc_offset = 0;
|
||||
|
@@ -275,6 +275,7 @@ struct gfx9_surf_layout {
|
||||
*/
|
||||
uint8_t dcc_number_type; /* CB_COLOR0_INFO.NUMBER_TYPE */
|
||||
uint8_t dcc_data_format; /* [0:4]:CB_COLOR0_INFO.FORMAT, [5]:MM */
|
||||
bool dcc_write_compress_disable;
|
||||
|
||||
/* Displayable DCC. This is always rb_aligned=0 and pipe_aligned=0.
|
||||
* The 3D engine doesn't support that layout except for chips with 1 RB.
|
||||
|
@@ -301,11 +301,21 @@ radv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSi
|
||||
{
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
|
||||
bool old_predicating;
|
||||
|
||||
/* VK_EXT_conditional_rendering says that copy commands should not be
|
||||
* affected by conditional rendering.
|
||||
*/
|
||||
old_predicating = cmd_buffer->state.predicating;
|
||||
cmd_buffer->state.predicating = false;
|
||||
|
||||
fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize) & ~3ull;
|
||||
|
||||
radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo,
|
||||
radv_buffer_get_va(dst_buffer->bo) + dst_buffer->offset + dstOffset, fillSize, data);
|
||||
|
||||
/* Restore conditional rendering. */
|
||||
cmd_buffer->state.predicating = old_predicating;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -369,6 +379,7 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
VK_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
bool old_predicating;
|
||||
uint64_t va = radv_buffer_get_va(dst_buffer->bo);
|
||||
va += dstOffset + dst_buffer->offset;
|
||||
|
||||
@@ -378,6 +389,12 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice
|
||||
if (!dataSize)
|
||||
return;
|
||||
|
||||
/* VK_EXT_conditional_rendering says that copy commands should not be
|
||||
* affected by conditional rendering.
|
||||
*/
|
||||
old_predicating = cmd_buffer->state.predicating;
|
||||
cmd_buffer->state.predicating = false;
|
||||
|
||||
if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD && cmd_buffer->qf != RADV_QUEUE_TRANSFER) {
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, dst_buffer->bo);
|
||||
radv_update_buffer_cp(cmd_buffer, va, pData, dataSize);
|
||||
@@ -387,4 +404,7 @@ radv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDevice
|
||||
radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, buf_offset,
|
||||
dstOffset + dst_buffer->offset, dataSize);
|
||||
}
|
||||
|
||||
/* Restore conditional rendering. */
|
||||
cmd_buffer->state.predicating = old_predicating;
|
||||
}
|
||||
|
@@ -55,18 +55,18 @@ blit_surf_for_image_level_layer(struct radv_image *image, VkImageLayout layout,
|
||||
static bool
|
||||
alloc_transfer_temp_bo(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
if (cmd_buffer->transfer.copy_temp)
|
||||
return true;
|
||||
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const VkResult r =
|
||||
radv_bo_create(device, &cmd_buffer->vk.base, RADV_SDMA_TRANSFER_TEMP_BYTES, 4096, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_SCRATCH, 0, true,
|
||||
&cmd_buffer->transfer.copy_temp);
|
||||
|
||||
if (r != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd_buffer->vk, r);
|
||||
return false;
|
||||
if (!cmd_buffer->transfer.copy_temp) {
|
||||
const VkResult r =
|
||||
radv_bo_create(device, &cmd_buffer->vk.base, RADV_SDMA_TRANSFER_TEMP_BYTES, 4096, RADEON_DOMAIN_VRAM,
|
||||
RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING, RADV_BO_PRIORITY_SCRATCH, 0,
|
||||
true, &cmd_buffer->transfer.copy_temp);
|
||||
|
||||
if (r != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd_buffer->vk, r);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, cmd_buffer->transfer.copy_temp);
|
||||
|
@@ -4618,6 +4618,8 @@ radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
const struct radv_image_view *vrs_iview = render->vrs_att.iview;
|
||||
struct radv_image *vrs_image = vrs_iview->image;
|
||||
|
||||
radv_cs_add_buffer(device->ws, cmd_buffer->cs, vrs_image->bindings[0].bo);
|
||||
|
||||
va = radv_image_get_va(vrs_image, 0);
|
||||
va |= vrs_image->planes[0].surface.tile_swizzle << 8;
|
||||
|
||||
|
@@ -11,6 +11,7 @@
|
||||
#include "radv_device_memory.h"
|
||||
#include "radv_android.h"
|
||||
#include "radv_buffer.h"
|
||||
#include "radv_debug.h"
|
||||
#include "radv_entrypoints.h"
|
||||
#include "radv_image.h"
|
||||
#include "radv_rmv.h"
|
||||
@@ -216,6 +217,17 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
|
||||
if (instance->drirc.zero_vram)
|
||||
flags |= RADEON_FLAG_ZERO_VRAM;
|
||||
|
||||
/* On GFX12, DCC is transparent to the userspace driver and PTE.DCC is
|
||||
* set per buffer allocation. Only VRAM can have DCC. When the kernel
|
||||
* moves a buffer from VRAM->GTT it decompresses. When the kernel moves
|
||||
* it from GTT->VRAM it recompresses but only if WRITE_COMPRESS_DISABLE=0
|
||||
* (see DCC tiling flags).
|
||||
*/
|
||||
if (pdev->info.gfx_level >= GFX12 && pdev->info.gfx12_supports_dcc_write_compress_disable &&
|
||||
domain == RADEON_DOMAIN_VRAM && !(instance->debug_flags & RADV_DEBUG_NO_DCC)) {
|
||||
flags |= RADEON_FLAG_GFX12_ALLOW_DCC;
|
||||
}
|
||||
|
||||
if (device->overallocation_disallowed) {
|
||||
uint64_t total_size = pdev->memory_properties.memoryHeaps[heap_index].size;
|
||||
|
||||
@@ -241,6 +253,28 @@ radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAlloc
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (flags & RADEON_FLAG_GFX12_ALLOW_DCC) {
|
||||
if (mem->image) {
|
||||
/* Set BO metadata (including DCC tiling flags) for dedicated
|
||||
* allocations because compressed writes are enabled and the kernel
|
||||
* requires a DCC view for recompression.
|
||||
*/
|
||||
radv_image_bo_set_metadata(device, mem->image, mem->bo);
|
||||
} else {
|
||||
/* Otherwise, disable compressed writes to prevent recompression
|
||||
* when the BO is moved back to VRAM because it's not yet possible
|
||||
* to set DCC tiling flags per range for suballocations. The only
|
||||
* problem is that we will loose DCC after migration but that
|
||||
* should happen rarely.
|
||||
*/
|
||||
struct radeon_bo_metadata md = {0};
|
||||
|
||||
md.u.gfx12.dcc_write_compress_disable = true;
|
||||
|
||||
device->ws->buffer_set_metadata(device->ws, mem->bo, &md);
|
||||
}
|
||||
}
|
||||
|
||||
mem->heap_index = heap_index;
|
||||
mem->alloc_size = alloc_size;
|
||||
}
|
||||
|
@@ -479,6 +479,7 @@ radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf
|
||||
surface->u.gfx9.color.dcc.max_compressed_block_size = md->u.gfx12.dcc_max_compressed_block;
|
||||
surface->u.gfx9.color.dcc_data_format = md->u.gfx12.dcc_data_format;
|
||||
surface->u.gfx9.color.dcc_number_type = md->u.gfx12.dcc_number_type;
|
||||
surface->u.gfx9.color.dcc_write_compress_disable = md->u.gfx12.dcc_write_compress_disable;
|
||||
} else if (pdev->info.gfx_level >= GFX9) {
|
||||
if (md->u.gfx9.swizzle_mode > 0)
|
||||
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
|
||||
@@ -777,6 +778,7 @@ radv_image_bo_set_metadata(struct radv_device *device, struct radv_image *image,
|
||||
md.u.gfx12.dcc_max_compressed_block = surface->u.gfx9.color.dcc.max_compressed_block_size;
|
||||
md.u.gfx12.dcc_number_type = surface->u.gfx9.color.dcc_number_type;
|
||||
md.u.gfx12.dcc_data_format = surface->u.gfx9.color.dcc_data_format;
|
||||
md.u.gfx12.dcc_write_compress_disable = surface->u.gfx9.color.dcc_write_compress_disable;
|
||||
md.u.gfx12.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
|
||||
} else if (pdev->info.gfx_level >= GFX9) {
|
||||
uint64_t dcc_offset =
|
||||
@@ -1198,6 +1200,16 @@ radv_image_create_layout(struct radv_device *device, struct radv_image_create_in
|
||||
ac_surface_zero_dcc_fields(&image->planes[0].surface);
|
||||
}
|
||||
|
||||
if (pdev->info.gfx_level >= GFX12 &&
|
||||
(!radv_surface_has_scanout(device, &create_info) || pdev->info.gfx12_supports_display_dcc)) {
|
||||
const enum pipe_format format = vk_format_to_pipe_format(image->vk.format);
|
||||
|
||||
/* Set DCC tilings for both color and depth/stencil. */
|
||||
image->planes[plane].surface.u.gfx9.color.dcc_number_type = ac_get_cb_number_type(format);
|
||||
image->planes[plane].surface.u.gfx9.color.dcc_data_format = ac_get_cb_format(pdev->info.gfx_level, format);
|
||||
image->planes[plane].surface.u.gfx9.color.dcc_write_compress_disable = false;
|
||||
}
|
||||
|
||||
if (create_info.bo_metadata && !mod_info &&
|
||||
!ac_surface_apply_umd_metadata(&pdev->info, &image->planes[plane].surface, image->vk.samples,
|
||||
image->vk.mip_levels, create_info.bo_metadata->size_metadata,
|
||||
|
@@ -55,19 +55,19 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
|
||||
bool is_storage_image, bool disable_compression, bool enable_write_compression,
|
||||
uint32_t *state, const struct ac_surf_nbc_view *nbc_view, uint64_t offset)
|
||||
{
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
struct radv_image_plane *plane = &image->planes[plane_id];
|
||||
const uint32_t bind_idx = image->disjoint ? plane_id : 0;
|
||||
struct radv_image_binding *binding = &image->bindings[bind_idx];
|
||||
uint64_t gpu_address = binding->bo ? radv_image_get_va(image, bind_idx) + offset : 0;
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
const bool dcc_enabled = pdev->info.gfx_level >= GFX12 || radv_dcc_enabled(image, first_level);
|
||||
|
||||
const struct ac_mutable_tex_state ac_state = {
|
||||
.surf = &plane->surface,
|
||||
.va = gpu_address,
|
||||
.gfx10 =
|
||||
{
|
||||
.write_compress_enable =
|
||||
radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression,
|
||||
.write_compress_enable = dcc_enabled && is_storage_image && enable_write_compression,
|
||||
.iterate_256 = radv_image_get_iterate256(device, image),
|
||||
},
|
||||
.gfx9 =
|
||||
@@ -81,7 +81,7 @@ radv_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *
|
||||
.block_width = block_width,
|
||||
},
|
||||
.is_stencil = is_stencil,
|
||||
.dcc_enabled = !disable_compression && radv_dcc_enabled(image, first_level),
|
||||
.dcc_enabled = !disable_compression && dcc_enabled,
|
||||
.tc_compat_htile_enabled = !disable_compression && radv_image_is_tc_compat_htile(image),
|
||||
};
|
||||
|
||||
|
@@ -152,6 +152,7 @@ struct radeon_bo_metadata {
|
||||
unsigned dcc_max_compressed_block : 3;
|
||||
unsigned dcc_data_format : 6;
|
||||
unsigned dcc_number_type : 3;
|
||||
bool dcc_write_compress_disable;
|
||||
bool scanout;
|
||||
} gfx12;
|
||||
} u;
|
||||
|
@@ -2544,9 +2544,6 @@ radv_CmdBeginVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoBeginCod
|
||||
|
||||
cmd_buffer->video.vid = vid;
|
||||
cmd_buffer->video.params = params;
|
||||
|
||||
if (vid->encode)
|
||||
radv_video_enc_begin_coding(cmd_buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2628,12 +2625,6 @@ radv_CmdControlVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoCoding
|
||||
VKAPI_ATTR void VKAPI_CALL
|
||||
radv_CmdEndVideoCodingKHR(VkCommandBuffer commandBuffer, const VkVideoEndCodingInfoKHR *pEndCodingInfo)
|
||||
{
|
||||
VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
|
||||
if (cmd_buffer->video.vid->encode) {
|
||||
radv_video_enc_end_coding(cmd_buffer);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -77,8 +77,6 @@ void radv_vcn_write_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event
|
||||
void radv_init_physical_device_encoder(struct radv_physical_device *pdevice);
|
||||
void radv_probe_video_decode(struct radv_physical_device *pdev);
|
||||
void radv_probe_video_encode(struct radv_physical_device *pdev);
|
||||
void radv_video_enc_begin_coding(struct radv_cmd_buffer *cmd_buffer);
|
||||
void radv_video_enc_end_coding(struct radv_cmd_buffer *cmd_buffer);
|
||||
void radv_video_enc_control_video_coding(struct radv_cmd_buffer *cmd_buffer,
|
||||
const VkVideoCodingControlInfoKHR *pCodingControlInfo);
|
||||
VkResult radv_video_get_encode_session_memory_requirements(struct radv_device *device, struct radv_video_session *vid,
|
||||
|
@@ -1522,6 +1522,13 @@ radv_enc_op_preset(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKH
|
||||
struct radv_video_session *vid = cmd_buffer->video.vid;
|
||||
uint32_t preset_mode;
|
||||
|
||||
if (vid->enc_preset_mode == RENCODE_PRESET_MODE_QUALITY)
|
||||
preset_mode = RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE;
|
||||
else if (vid->enc_preset_mode == RENCODE_PRESET_MODE_BALANCE)
|
||||
preset_mode = RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE;
|
||||
else
|
||||
preset_mode = RENCODE_IB_OP_SET_SPEED_ENCODING_MODE;
|
||||
|
||||
switch (vid->vk.op) {
|
||||
case VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR: {
|
||||
const struct VkVideoEncodeH265PictureInfoKHR *h265_picture_info =
|
||||
@@ -1529,22 +1536,14 @@ radv_enc_op_preset(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInfoKH
|
||||
const StdVideoEncodeH265PictureInfo *pic = h265_picture_info->pStdPictureInfo;
|
||||
const StdVideoH265SequenceParameterSet *sps =
|
||||
vk_video_find_h265_enc_std_sps(&cmd_buffer->video.params->vk, pic->pps_seq_parameter_set_id);
|
||||
if (sps->flags.sample_adaptive_offset_enabled_flag && vid->enc_preset_mode == RENCODE_PRESET_MODE_SPEED) {
|
||||
if (sps->flags.sample_adaptive_offset_enabled_flag && vid->enc_preset_mode == RENCODE_PRESET_MODE_SPEED)
|
||||
preset_mode = RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE;
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (vid->enc_preset_mode == RENCODE_PRESET_MODE_QUALITY)
|
||||
preset_mode = RENCODE_IB_OP_SET_QUALITY_ENCODING_MODE;
|
||||
else if (vid->enc_preset_mode == RENCODE_PRESET_MODE_BALANCE)
|
||||
preset_mode = RENCODE_IB_OP_SET_BALANCE_ENCODING_MODE;
|
||||
else
|
||||
preset_mode = RENCODE_IB_OP_SET_SPEED_ENCODING_MODE;
|
||||
ENC_BEGIN;
|
||||
radeon_emit(cs, preset_mode);
|
||||
ENC_END;
|
||||
@@ -1683,6 +1682,11 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
||||
return;
|
||||
}
|
||||
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 1024);
|
||||
|
||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
|
||||
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false);
|
||||
|
||||
const struct VkVideoInlineQueryInfoKHR *inline_queries = NULL;
|
||||
if (vid->vk.flags & VK_VIDEO_SESSION_CREATE_INLINE_QUERIES_BIT_KHR) {
|
||||
inline_queries = vk_find_struct_const(enc_info->pNext, VIDEO_INLINE_QUERY_INFO_KHR);
|
||||
@@ -1756,6 +1760,9 @@ radv_vcn_encode_video(struct radv_cmd_buffer *cmd_buffer, const VkVideoEncodeInf
|
||||
radv_enc_op_enc(cmd_buffer);
|
||||
|
||||
radeon_emit_direct(cmd_buffer->cs, enc->task_size_offset, enc->total_task_size);
|
||||
|
||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
|
||||
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2038,26 +2045,6 @@ radv_GetEncodedVideoSessionParametersKHR(VkDevice device,
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void
|
||||
radv_video_enc_begin_coding(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
radeon_check_space(device->ws, cmd_buffer->cs, 1024);
|
||||
|
||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
|
||||
radv_vcn_sq_header(cmd_buffer->cs, &cmd_buffer->video.sq, RADEON_VCN_ENGINE_TYPE_ENCODE, false);
|
||||
}
|
||||
|
||||
void
|
||||
radv_video_enc_end_coding(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
|
||||
const struct radv_physical_device *pdev = radv_device_physical(device);
|
||||
if (pdev->enc_hw_ver >= RADV_VIDEO_ENC_HW_4)
|
||||
radv_vcn_sq_tail(cmd_buffer->cs, &cmd_buffer->video.sq);
|
||||
}
|
||||
|
||||
#define VCN_ENC_SESSION_SIZE 128 * 1024
|
||||
|
||||
VkResult
|
||||
|
@@ -985,6 +985,7 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys *_ws, struct radeon_wins
|
||||
tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_MAX_COMPRESSED_BLOCK, md->u.gfx12.dcc_max_compressed_block);
|
||||
tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_NUMBER_TYPE, md->u.gfx12.dcc_number_type);
|
||||
tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_DATA_FORMAT, md->u.gfx12.dcc_data_format);
|
||||
tiling_flags |= AMDGPU_TILING_SET(GFX12_DCC_WRITE_COMPRESS_DISABLE, md->u.gfx12.dcc_write_compress_disable);
|
||||
tiling_flags |= AMDGPU_TILING_SET(GFX12_SCANOUT, md->u.gfx12.scanout);
|
||||
} else if (ws->info.gfx_level >= GFX9) {
|
||||
tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
|
||||
@@ -1042,6 +1043,7 @@ radv_amdgpu_winsys_bo_get_metadata(struct radeon_winsys *_ws, struct radeon_wins
|
||||
md->u.gfx12.dcc_max_compressed_block = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK);
|
||||
md->u.gfx12.dcc_data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT);
|
||||
md->u.gfx12.dcc_number_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE);
|
||||
md->u.gfx12.dcc_write_compress_disable = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE);
|
||||
md->u.gfx12.scanout = AMDGPU_TILING_GET(tiling_flags, GFX12_SCANOUT);
|
||||
} else if (ws->info.gfx_level >= GFX9) {
|
||||
md->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
|
||||
|
@@ -594,6 +594,7 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
||||
case nir_op_fabs:
|
||||
case nir_op_fexp2:
|
||||
case nir_op_frcp:
|
||||
case nir_op_frsq:
|
||||
case nir_op_fneg:
|
||||
case nir_op_fsat:
|
||||
case nir_op_fsign:
|
||||
@@ -1048,14 +1049,25 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_frcp:
|
||||
r = (struct ssa_result_range){
|
||||
unpack_data(src_res[0]).range,
|
||||
false,
|
||||
false, /* Various cases can result in NaN, so assume the worst. */
|
||||
false /* " " " " " " " " " " */
|
||||
};
|
||||
case nir_op_frcp: {
|
||||
const struct ssa_result_range left = unpack_data(src_res[0]);
|
||||
|
||||
/* Only rcp(NaN) is NaN. */
|
||||
r.is_a_number = left.is_a_number;
|
||||
|
||||
/* rcp can be zero for large values if denorms are flushed, or for Inf.
|
||||
* Also, rcp(-0) is -Inf and rcp(+0) is Inf.
|
||||
*/
|
||||
if (left.range == gt_zero)
|
||||
r.range = ge_zero;
|
||||
else if (left.range == lt_zero)
|
||||
r.range = le_zero;
|
||||
|
||||
if (left.range == gt_zero || left.range == lt_zero || left.range == ne_zero)
|
||||
r.is_finite = left.is_a_number;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_mov:
|
||||
r = unpack_data(src_res[0]);
|
||||
@@ -1110,10 +1122,28 @@ process_fp_query(struct analysis_state *state, struct analysis_query *aq, uint32
|
||||
break;
|
||||
|
||||
case nir_op_fsqrt:
|
||||
case nir_op_frsq:
|
||||
r = (struct ssa_result_range){ ge_zero, false, false, false };
|
||||
break;
|
||||
|
||||
case nir_op_frsq: {
|
||||
const struct ssa_result_range left = unpack_data(src_res[0]);
|
||||
|
||||
/* rsq(NaN) and rsq(< 0) is NaN. */
|
||||
if (left.range == eq_zero || left.range == ge_zero || left.range == gt_zero)
|
||||
r.is_a_number = left.is_a_number;
|
||||
|
||||
/* rsq(-0) is -Inf and rsq(+0) is +Inf */
|
||||
if (left.range == gt_zero || left.range == ne_zero) {
|
||||
if (left.is_finite)
|
||||
r.range = gt_zero;
|
||||
else
|
||||
r.range = ge_zero;
|
||||
r.is_finite = r.is_a_number;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_op_ffloor: {
|
||||
const struct ssa_result_range left = unpack_data(src_res[0]);
|
||||
|
||||
|
@@ -4893,7 +4893,8 @@ store_cp_blit(struct tu_cmd_buffer *cmd,
|
||||
.samples_average = !util_format_is_pure_integer(dst_format) &&
|
||||
!util_format_is_depth_or_stencil(dst_format),
|
||||
.unk20 = 1,
|
||||
.unk22 = 1),
|
||||
.unk22 = 1,
|
||||
.mutableen = src_iview->view.is_mutable),
|
||||
SP_PS_2D_SRC_SIZE(CHIP,
|
||||
.width = dst_iview->vk.extent.width,
|
||||
.height = dst_iview->vk.extent.height),
|
||||
@@ -5037,18 +5038,10 @@ tu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a)
|
||||
(y2 % phys_dev->info->gmem_align_h && need_y2_align));
|
||||
}
|
||||
|
||||
/* The fast path cannot handle the corner case where GMEM and sysmem
|
||||
* attachments have different swap if the GMEM attachment is mutable, which
|
||||
* can happen when a mutable color attachment is being resolved into a
|
||||
* non-mutable resolve attachment. In such a case, if the format is a swapped
|
||||
* format like BGRA8, the color attachment will be stored in GMEM swapped but
|
||||
* the resolve attachment in sysmem will not be swapped and there's no way to
|
||||
* express that in the hardware because it computes the GMEM swap from the
|
||||
* sysmem swap.
|
||||
*/
|
||||
/* The fast path cannot handle mismatched mutability. */
|
||||
static bool
|
||||
tu_attachment_store_mismatched_swap(struct tu_cmd_buffer *cmd, uint32_t a,
|
||||
uint32_t gmem_a)
|
||||
tu_attachment_store_mismatched_mutability(struct tu_cmd_buffer *cmd, uint32_t a,
|
||||
uint32_t gmem_a)
|
||||
{
|
||||
if (a == gmem_a)
|
||||
return false;
|
||||
@@ -5056,8 +5049,7 @@ tu_attachment_store_mismatched_swap(struct tu_cmd_buffer *cmd, uint32_t a,
|
||||
const struct tu_image_view *dst_iview = cmd->state.attachments[a];
|
||||
const struct tu_image_view *src_iview = cmd->state.attachments[gmem_a];
|
||||
|
||||
return src_iview->view.is_mutable &&
|
||||
dst_iview->view.color_swap != src_iview->view.color_swap;
|
||||
return dst_iview->view.is_mutable != src_iview->view.is_mutable;
|
||||
}
|
||||
|
||||
/* Choose the GMEM layout (use the CCU space or not) based on whether the
|
||||
@@ -5099,7 +5091,7 @@ tu_choose_gmem_layout(struct tu_cmd_buffer *cmd)
|
||||
j == subpass->color_count ?
|
||||
subpass->depth_stencil_attachment.attachment :
|
||||
subpass->color_attachments[j].attachment;
|
||||
if (tu_attachment_store_mismatched_swap(cmd, a, gmem_a))
|
||||
if (tu_attachment_store_mismatched_mutability(cmd, a, gmem_a))
|
||||
cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
|
||||
}
|
||||
}
|
||||
@@ -5161,7 +5153,7 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
|
||||
const struct tu_image_view *dst_iview = cmd->state.attachments[a];
|
||||
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
|
||||
const struct tu_image_view *src_iview = cmd->state.attachments[a];
|
||||
const struct tu_image_view *src_iview = cmd->state.attachments[gmem_a];
|
||||
const VkClearValue *clear_value = &cmd->state.clear_values[gmem_a];
|
||||
bool resolve = a != gmem_a;
|
||||
if (resolve)
|
||||
@@ -5171,7 +5163,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
return;
|
||||
|
||||
bool unaligned = tu_attachment_store_unaligned(cmd, a);
|
||||
bool mismatched_swap = tu_attachment_store_mismatched_swap(cmd, a, gmem_a);
|
||||
bool mismatched_mutability =
|
||||
tu_attachment_store_mismatched_mutability(cmd, a, gmem_a);
|
||||
|
||||
/* D32_SFLOAT_S8_UINT is quite special format: it has two planes,
|
||||
* one for depth and other for stencil. When resolving a MSAA
|
||||
@@ -5191,7 +5184,8 @@ tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
|
||||
bool store_common = dst->store && !resolve_d32s8_s8;
|
||||
bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8;
|
||||
|
||||
bool use_fast_path = !unaligned && !mismatched_swap && !resolve_d24s8_s8 &&
|
||||
bool use_fast_path = !unaligned && !mismatched_mutability &&
|
||||
!resolve_d24s8_s8 &&
|
||||
(a == gmem_a || blit_can_resolve(dst->format));
|
||||
|
||||
trace_start_gmem_store(&cmd->trace, cs, dst->format, use_fast_path, unaligned);
|
||||
|
@@ -150,8 +150,6 @@ void lp_exec_mask_update(struct lp_exec_mask *mask)
|
||||
void
|
||||
lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
|
||||
{
|
||||
LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context);
|
||||
LLVMBuilderRef builder = mask->bld->gallivm->builder;
|
||||
struct function_ctx *ctx = &mask->function_stack[function_idx];
|
||||
|
||||
ctx->cond_stack_size = 0;
|
||||
@@ -162,13 +160,6 @@ lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx)
|
||||
if (function_idx == 0) {
|
||||
ctx->ret_mask = mask->ret_mask;
|
||||
}
|
||||
|
||||
ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm,
|
||||
int_type, "looplimiter");
|
||||
LLVMBuildStore(
|
||||
builder,
|
||||
LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false),
|
||||
ctx->loop_limiter);
|
||||
}
|
||||
|
||||
void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld)
|
||||
@@ -293,9 +284,8 @@ void lp_exec_endloop(struct gallivm_state *gallivm,
|
||||
LLVMBuilderRef builder = exec_mask->bld->gallivm->builder;
|
||||
struct function_ctx *ctx = func_ctx(exec_mask);
|
||||
LLVMBasicBlockRef endloop;
|
||||
LLVMTypeRef int_type = LLVMInt32TypeInContext(exec_mask->bld->gallivm->context);
|
||||
LLVMTypeRef mask_type = LLVMIntTypeInContext(exec_mask->bld->gallivm->context, exec_mask->bld->type.length);
|
||||
LLVMValueRef i1cond, i2cond, icond, limiter;
|
||||
LLVMValueRef icond;
|
||||
|
||||
assert(exec_mask->break_mask);
|
||||
|
||||
@@ -318,17 +308,6 @@ void lp_exec_endloop(struct gallivm_state *gallivm,
|
||||
*/
|
||||
LLVMBuildStore(builder, LLVMBuildLoad2(builder, exec_mask->int_vec_type, exec_mask->break_mask, ""), ctx->break_var);
|
||||
|
||||
/* Decrement the loop limiter */
|
||||
limiter = LLVMBuildLoad2(builder, int_type, ctx->loop_limiter, "");
|
||||
|
||||
limiter = LLVMBuildSub(
|
||||
builder,
|
||||
limiter,
|
||||
LLVMConstInt(int_type, 1, false),
|
||||
"");
|
||||
|
||||
LLVMBuildStore(builder, limiter, ctx->loop_limiter);
|
||||
|
||||
LLVMValueRef end_mask = exec_mask->exec_mask;
|
||||
if (mask)
|
||||
end_mask = LLVMBuildAnd(builder, exec_mask->exec_mask, lp_build_mask_value(mask), "");
|
||||
@@ -336,22 +315,12 @@ void lp_exec_endloop(struct gallivm_state *gallivm,
|
||||
end_mask = LLVMBuildBitCast(builder, end_mask, mask_type, "");
|
||||
|
||||
/* i1cond = (end_mask != 0) */
|
||||
i1cond = LLVMBuildICmp(
|
||||
icond = LLVMBuildICmp(
|
||||
builder,
|
||||
LLVMIntNE,
|
||||
end_mask,
|
||||
LLVMConstNull(mask_type), "i1cond");
|
||||
|
||||
/* i2cond = (looplimiter > 0) */
|
||||
i2cond = LLVMBuildICmp(
|
||||
builder,
|
||||
LLVMIntSGT,
|
||||
limiter,
|
||||
LLVMConstNull(int_type), "i2cond");
|
||||
|
||||
/* if( i1cond && i2cond ) */
|
||||
icond = LLVMBuildAnd(builder, i1cond, i2cond, "");
|
||||
|
||||
endloop = lp_build_insert_new_block(exec_mask->bld->gallivm, "endloop");
|
||||
|
||||
LLVMBuildCondBr(builder,
|
||||
|
@@ -85,7 +85,6 @@ struct lp_exec_mask {
|
||||
bool switch_in_default; /* if switch exec is currently in default */
|
||||
unsigned switch_pc; /* when used points to default or endswitch-1 */
|
||||
|
||||
LLVMValueRef loop_limiter;
|
||||
LLVMBasicBlockRef loop_block;
|
||||
LLVMValueRef break_var;
|
||||
struct {
|
||||
|
@@ -77,12 +77,6 @@
|
||||
*/
|
||||
#define LP_MAX_TGSI_NESTING 80
|
||||
|
||||
/**
|
||||
* Maximum iterations before loop termination
|
||||
* Shared between every loop in a TGSI shader
|
||||
*/
|
||||
#define LP_MAX_TGSI_LOOP_ITERATIONS 65535
|
||||
|
||||
static inline bool
|
||||
lp_has_fp16(void)
|
||||
{
|
||||
|
@@ -2198,9 +2198,13 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld,
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
struct lp_build_context *coord_bld = &bld->coord_bld;
|
||||
struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
|
||||
struct lp_build_context uint_coord_bld;
|
||||
|
||||
LLVMValueRef size0, row_stride0_vec, img_stride0_vec;
|
||||
LLVMValueRef data_ptr0, mipoff0 = NULL;
|
||||
|
||||
lp_build_context_init(&uint_coord_bld, gallivm, lp_uint_type(int_coord_bld->type));
|
||||
|
||||
lp_build_mipmap_level_sizes(bld, ilevel0,
|
||||
&size0,
|
||||
&row_stride0_vec, &img_stride0_vec);
|
||||
@@ -2243,7 +2247,9 @@ lp_build_sample_aniso(struct lp_build_sample_context *bld,
|
||||
|
||||
/* Number of samples used for averaging. */
|
||||
LLVMValueRef N = lp_build_iceil(coord_bld, lp_build_max(coord_bld, rho_x, rho_y));
|
||||
N = lp_build_min(int_coord_bld, N, lp_build_const_int_vec(gallivm, int_coord_bld->type, bld->static_sampler_state->aniso));
|
||||
|
||||
/* Use uint min so in case of NaNs/overflows loop iterations are clamped to max aniso */
|
||||
N = lp_build_min(&uint_coord_bld, N, lp_build_const_int_vec(gallivm, int_coord_bld->type, bld->static_sampler_state->aniso));
|
||||
LLVMValueRef wave_max_N = NULL;
|
||||
for (uint32_t i = 0; i < coord_bld->type.length; i++) {
|
||||
LLVMValueRef invocation_N = LLVMBuildExtractElement(builder, N, lp_build_const_int32(gallivm, i), "");
|
||||
|
@@ -233,6 +233,7 @@ static int si_init_surface(struct si_screen *sscreen, struct radeon_surf *surfac
|
||||
/* These should be set for both color and Z/S. */
|
||||
surface->u.gfx9.color.dcc_number_type = ac_get_cb_number_type(format);
|
||||
surface->u.gfx9.color.dcc_data_format = ac_get_cb_format(sscreen->info.gfx_level, format);
|
||||
surface->u.gfx9.color.dcc_write_compress_disable = false;
|
||||
}
|
||||
|
||||
if (modifier == DRM_FORMAT_MOD_INVALID &&
|
||||
@@ -803,10 +804,12 @@ static bool si_texture_get_handle(struct pipe_screen *screen, struct pipe_contex
|
||||
}
|
||||
|
||||
const bool debug_disable_dcc = sscreen->debug_flags & DBG(NO_EXPORTED_DCC);
|
||||
/* Since shader image stores don't support DCC on GFX9 and older,
|
||||
* disable it for external clients that want write access.
|
||||
/* Disable DCC for external clients that might use shader image stores.
|
||||
* They don't support DCC on GFX9 and older. GFX10/10.3 is also problematic
|
||||
* if the view formats between clients are incompatible or if DCC clear is
|
||||
* used.
|
||||
*/
|
||||
const bool shader_write = sscreen->info.gfx_level <= GFX9 &&
|
||||
const bool shader_write = sscreen->info.gfx_level < GFX11 &&
|
||||
usage & PIPE_HANDLE_USAGE_SHADER_WRITE &&
|
||||
!tex->is_depth &&
|
||||
tex->surface.meta_offset;
|
||||
|
@@ -40,9 +40,6 @@ dEQP-VK.pipeline.shader_object_linked_spirv.multisample.multisampled_render_to_s
|
||||
dEQP-VK.pipeline.shader_object_linked_spirv.multisample.multisampled_render_to_single_sampled.multi_renderpass.r8g8b8a8_unorm_r16g16b16a16_sfloat_r16g16b16a16_sint_d32_sfloat_s8_uint.random_119,Fail
|
||||
dEQP-VK.pipeline.shader_object_linked_spirv.multisample.multisampled_render_to_single_sampled.multi_subpass.r8g8b8a8_unorm_r16g16b16a16_sfloat_r16g16b16a16_sint_d32_sfloat_s8_uint.random_119,Fail
|
||||
|
||||
# The test has a loop that exceeds LP_MAX_TGSI_LOOP_ITERATIONS
|
||||
dEQP-VK.sparse_resources.buffer.ssbo.sparse_residency.buffer_size_2_24,Fail
|
||||
|
||||
dEQP-VK.dgc.ext.graphics.draw.token_draw.shader_objects_with_geom_preprocess_same_state_cmd_buffer_unordered,Fail
|
||||
dEQP-VK.pipeline.fast_linked_library.multisample.multisampled_render_to_single_sampled.multi_subpass.r8g8b8a8_unorm_r16g16b16a16_sfloat_r16g16b16a16_sint_s8_uint.random_529,Fail
|
||||
dEQP-VK.pipeline.monolithic.multisample.multisampled_render_to_single_sampled.dynamic_rendering.multi_renderpass.r8g8b8a8_unorm_r16g16b16a16_sfloat_r16g16b16a16_sint_s8_uint.random_641,Fail
|
||||
|
@@ -3,9 +3,6 @@
|
||||
dEQP-VK.rasterization.provoking_vertex.transform_feedback.first.line_strip_with_adjacency,Fail
|
||||
dEQP-VK.rasterization.provoking_vertex.transform_feedback.per_pipeline.triangle_strip_with_adjacency,Fail
|
||||
|
||||
# The test has a loop that exceeds LP_MAX_TGSI_LOOP_ITERATIONS
|
||||
dEQP-VK.sparse_resources.buffer.ssbo.sparse_residency.buffer_size_2_24,Fail
|
||||
|
||||
dEQP-VK.api.maintenance7.total_dynamic_buffers_properties,Fail
|
||||
|
||||
dEQP-VK.dynamic_rendering.complete_secondary_cmd_buff.suballocation.unused_attachment.loadopclear.storeopdontcare.stencilloadopdontcare.stencilstoreopdontcare,Fail
|
||||
|
@@ -1078,11 +1078,17 @@ gbm_dri_surface_create(struct gbm_device *gbm,
|
||||
uint32_t format, uint32_t flags,
|
||||
const uint64_t *modifiers, const unsigned count)
|
||||
{
|
||||
struct gbm_dri_device *dri = gbm_dri_device(gbm);
|
||||
struct gbm_dri_surface *surf;
|
||||
|
||||
if (count)
|
||||
assert(modifiers);
|
||||
|
||||
if (count > 0 && !dri->screen->base.screen->resource_create_with_modifiers) {
|
||||
errno = ENOSYS;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* It's acceptable to create an image with INVALID modifier in the list,
|
||||
* but it cannot be on the only modifier (since it will certainly fail
|
||||
* later). While we could easily catch this after modifier creation, doing
|
||||
|
@@ -162,8 +162,8 @@ typedef struct brw_reg {
|
||||
unsigned negate:1; /* source only */
|
||||
unsigned abs:1; /* source only */
|
||||
unsigned address_mode:1; /* relative addressing, hopefully! */
|
||||
unsigned pad0:16;
|
||||
unsigned subnr:5; /* :1 in align16 */
|
||||
unsigned pad0:15;
|
||||
unsigned subnr:6; /* :1 in align16 */
|
||||
};
|
||||
uint32_t bits;
|
||||
};
|
||||
|
@@ -4312,7 +4312,12 @@ cmd_buffer_accumulate_barrier_bits(struct anv_cmd_buffer *cmd_buffer,
|
||||
* barriers within renderpass are operating with consistent layouts.
|
||||
*/
|
||||
if (!cmd_buffer->vk.runtime_rp_barrier &&
|
||||
cmd_buffer->vk.render_pass != NULL) {
|
||||
cmd_buffer->vk.render_pass != NULL &&
|
||||
old_layout == VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT) {
|
||||
/* Those assert are here to recognize the changes made by the
|
||||
* runtime. If we fail them, we need to investigate what is going
|
||||
* on.
|
||||
*/
|
||||
assert(anv_cmd_graphics_state_has_image_as_attachment(&cmd_buffer->state.gfx,
|
||||
image));
|
||||
VkImageLayout subpass_att_layout, subpass_stencil_att_layout;
|
||||
@@ -4596,26 +4601,6 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
|
||||
if (cmd_buffer->state.current_pipeline == pipeline)
|
||||
return;
|
||||
|
||||
#if GFX_VER >= 20
|
||||
/* While PIPELINE_SELECT is not needed on Xe2+, our current assumption
|
||||
* is that the pipelined flushes in the 3D pipeline are not getting
|
||||
* synchronized with the compute dispatches (and vice versa). So we need
|
||||
* a CS_STALL prior the next set of commands to ensure the flushes have
|
||||
* completed.
|
||||
*
|
||||
* The new RESOURCE_BARRIER instruction has support for synchronizing
|
||||
* 3D/Compute and once we switch to that we should be able to get rid of
|
||||
* this CS_STALL.
|
||||
*/
|
||||
anv_add_pending_pipe_bits(cmd_buffer, ANV_PIPE_CS_STALL_BIT, "pipeline switch stall");
|
||||
|
||||
/* Since we are not stalling/flushing caches explicitly while switching
|
||||
* between the pipelines, we need to apply data dependency flushes recorded
|
||||
* previously on the resource.
|
||||
*/
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
#else
|
||||
|
||||
#if GFX_VER == 9
|
||||
/* From the Broadwell PRM, Volume 2a: Instructions, PIPELINE_SELECT:
|
||||
*
|
||||
@@ -4783,7 +4768,6 @@ genX(flush_pipeline_select)(struct anv_cmd_buffer *cmd_buffer,
|
||||
if (pipeline == GPGPU)
|
||||
cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_COMPUTE_BIT;
|
||||
#endif
|
||||
#endif /* else of if GFX_VER >= 20 */
|
||||
cmd_buffer->state.current_pipeline = pipeline;
|
||||
}
|
||||
|
||||
|
@@ -2013,6 +2013,9 @@ genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
|
||||
static void
|
||||
emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
/* For Wa_16012775297, ensure VF_STATISTICS is emitted before 3DSTATE_VF
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero);
|
||||
#if GFX_VERx10 >= 125
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
|
||||
vfg.DistributionMode = RR_STRICT;
|
||||
@@ -2034,7 +2037,6 @@ emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer)
|
||||
rr.BackFaceFillMode = FILL_MODE_SOLID;
|
||||
}
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero);
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), zero);
|
||||
|
||||
#if GFX_VER >= 11
|
||||
|
@@ -77,6 +77,7 @@ emit_common_so_memcpy(struct anv_memcpy_state *state,
|
||||
vfi.InstancingEnable = false;
|
||||
vfi.VertexElementIndex = 0;
|
||||
}
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_STATISTICS), vfs);
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS), sgvs);
|
||||
#if GFX_VER >= 11
|
||||
anv_batch_emit(batch, GENX(3DSTATE_VF_SGVS_2), sgvs);
|
||||
|
@@ -1749,6 +1749,13 @@ impl<'a> ShaderFromNir<'a> {
|
||||
let mut mask = u8::try_from(mask).unwrap();
|
||||
if flags.is_sparse() {
|
||||
mask &= !(1 << (tex.def.num_components - 1));
|
||||
if mask == 0 {
|
||||
// This can happen if only the sparse predicate is used. In
|
||||
// that case, we need at least one result register.
|
||||
mask = 1;
|
||||
}
|
||||
} else {
|
||||
debug_assert!(mask != 0);
|
||||
}
|
||||
|
||||
let dst_comps = u8::try_from(mask.count_ones()).unwrap();
|
||||
@@ -2298,6 +2305,15 @@ impl<'a> ShaderFromNir<'a> {
|
||||
let coord = self.get_image_coord(intrin, dim);
|
||||
// let sample = self.get_src(&srcs[2]);
|
||||
|
||||
let mem_order = if intrin.intrinsic
|
||||
== nir_intrinsic_load_global_constant
|
||||
|| (intrin.access() & ACCESS_CAN_REORDER) != 0
|
||||
{
|
||||
MemOrder::Constant
|
||||
} else {
|
||||
MemOrder::Strong(MemScope::System)
|
||||
};
|
||||
|
||||
let comps = intrin.num_components;
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
assert!(comps == 1 || comps == 2 || comps == 4);
|
||||
@@ -2308,7 +2324,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||
dst: dst.into(),
|
||||
fault: Dst::None,
|
||||
image_dim: dim,
|
||||
mem_order: MemOrder::Strong(MemScope::System),
|
||||
mem_order,
|
||||
mem_eviction_priority: self
|
||||
.get_eviction_priority(intrin.access()),
|
||||
mask: (1 << comps) - 1,
|
||||
@@ -2323,6 +2339,15 @@ impl<'a> ShaderFromNir<'a> {
|
||||
let coord = self.get_image_coord(intrin, dim);
|
||||
// let sample = self.get_src(&srcs[2]);
|
||||
|
||||
let mem_order = if intrin.intrinsic
|
||||
== nir_intrinsic_load_global_constant
|
||||
|| (intrin.access() & ACCESS_CAN_REORDER) != 0
|
||||
{
|
||||
MemOrder::Constant
|
||||
} else {
|
||||
MemOrder::Strong(MemScope::System)
|
||||
};
|
||||
|
||||
let comps = intrin.num_components;
|
||||
assert!(intrin.def.bit_size() == 32);
|
||||
assert!(comps == 5);
|
||||
@@ -2334,7 +2359,7 @@ impl<'a> ShaderFromNir<'a> {
|
||||
dst: dst.into(),
|
||||
fault: fault.into(),
|
||||
image_dim: dim,
|
||||
mem_order: MemOrder::Strong(MemScope::System),
|
||||
mem_order,
|
||||
mem_eviction_priority: self
|
||||
.get_eviction_priority(intrin.access()),
|
||||
mask: (1 << (comps - 1)) - 1,
|
||||
|
@@ -28,6 +28,9 @@ nvk_get_buffer_alignment(const struct nvk_physical_device *pdev,
|
||||
VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR))
|
||||
alignment = MAX2(alignment, NVK_MIN_TEXEL_BUFFER_ALIGNMENT);
|
||||
|
||||
if (usage_flags & VK_BUFFER_USAGE_2_PREPROCESS_BUFFER_BIT_EXT)
|
||||
alignment = MAX2(alignment, NVK_DGC_ALIGN);
|
||||
|
||||
if (create_flags & (VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
|
||||
VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT))
|
||||
alignment = MAX2(alignment, pdev->nvkmd->bind_align_B);
|
||||
|
@@ -365,6 +365,7 @@ nvk_cmd_buffer_get_cbuf_descriptor_addr(struct nvk_cmd_buffer *cmd,
|
||||
const struct nvk_cbuf *cbuf);
|
||||
|
||||
VkResult nvk_cmd_flush_cs_qmd(struct nvk_cmd_buffer *cmd,
|
||||
const struct nvk_cmd_state *state,
|
||||
uint32_t global_size[3],
|
||||
uint64_t *qmd_addr_out,
|
||||
uint64_t *root_desc_addr_out);
|
||||
|
@@ -145,54 +145,56 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
|
||||
|
||||
memcpy(root_desc_map, root, sizeof(*root));
|
||||
|
||||
struct nak_qmd_info qmd_info = {
|
||||
.addr = shader->hdr_addr,
|
||||
.smem_size = shader->info.cs.smem_size,
|
||||
.smem_max = NVK_MAX_SHARED_SIZE,
|
||||
.global_size = {
|
||||
global_size[0],
|
||||
global_size[1],
|
||||
global_size[2],
|
||||
},
|
||||
};
|
||||
uint64_t qmd_addr = 0;
|
||||
if (shader != NULL) {
|
||||
struct nak_qmd_info qmd_info = {
|
||||
.addr = shader->hdr_addr,
|
||||
.smem_size = shader->info.cs.smem_size,
|
||||
.smem_max = NVK_MAX_SHARED_SIZE,
|
||||
.global_size = {
|
||||
global_size[0],
|
||||
global_size[1],
|
||||
global_size[2],
|
||||
},
|
||||
};
|
||||
|
||||
assert(shader->cbuf_map.cbuf_count <= ARRAY_SIZE(qmd_info.cbufs));
|
||||
for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) {
|
||||
const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c];
|
||||
assert(shader->cbuf_map.cbuf_count <= ARRAY_SIZE(qmd_info.cbufs));
|
||||
for (uint32_t c = 0; c < shader->cbuf_map.cbuf_count; c++) {
|
||||
const struct nvk_cbuf *cbuf = &shader->cbuf_map.cbufs[c];
|
||||
|
||||
struct nvk_buffer_address ba;
|
||||
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) {
|
||||
ba = (struct nvk_buffer_address) {
|
||||
.base_addr = root_desc_addr,
|
||||
.size = sizeof(*root),
|
||||
};
|
||||
} else {
|
||||
ASSERTED bool direct_descriptor =
|
||||
nvk_cmd_buffer_get_cbuf_addr(cmd, desc, shader, cbuf, &ba);
|
||||
assert(direct_descriptor);
|
||||
struct nvk_buffer_address ba;
|
||||
if (cbuf->type == NVK_CBUF_TYPE_ROOT_DESC) {
|
||||
ba = (struct nvk_buffer_address) {
|
||||
.base_addr = root_desc_addr,
|
||||
.size = sizeof(*root),
|
||||
};
|
||||
} else {
|
||||
ASSERTED bool direct_descriptor =
|
||||
nvk_cmd_buffer_get_cbuf_addr(cmd, desc, shader, cbuf, &ba);
|
||||
assert(direct_descriptor);
|
||||
}
|
||||
|
||||
if (ba.size > 0) {
|
||||
assert(ba.base_addr % min_cbuf_alignment == 0);
|
||||
ba.size = align(ba.size, min_cbuf_alignment);
|
||||
ba.size = MIN2(ba.size, NVK_MAX_CBUF_SIZE);
|
||||
|
||||
qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) {
|
||||
.index = c,
|
||||
.addr = ba.base_addr,
|
||||
.size = ba.size,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if (ba.size > 0) {
|
||||
assert(ba.base_addr % min_cbuf_alignment == 0);
|
||||
ba.size = align(ba.size, min_cbuf_alignment);
|
||||
ba.size = MIN2(ba.size, NVK_MAX_CBUF_SIZE);
|
||||
uint32_t qmd[64];
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd));
|
||||
|
||||
qmd_info.cbufs[qmd_info.num_cbufs++] = (struct nak_qmd_cbuf) {
|
||||
.index = c,
|
||||
.addr = ba.base_addr,
|
||||
.size = ba.size,
|
||||
};
|
||||
}
|
||||
result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t qmd[64];
|
||||
nak_fill_qmd(&pdev->info, &shader->info, &qmd_info, qmd, sizeof(qmd));
|
||||
|
||||
uint64_t qmd_addr;
|
||||
result = nvk_cmd_buffer_upload_data(cmd, qmd, sizeof(qmd), 0x100, &qmd_addr);
|
||||
if (unlikely(result != VK_SUCCESS))
|
||||
return result;
|
||||
|
||||
*qmd_addr_out = qmd_addr;
|
||||
if (root_desc_addr_out != NULL)
|
||||
*root_desc_addr_out = root_desc_addr;
|
||||
@@ -202,13 +204,14 @@ nvk_cmd_upload_qmd(struct nvk_cmd_buffer *cmd,
|
||||
|
||||
VkResult
|
||||
nvk_cmd_flush_cs_qmd(struct nvk_cmd_buffer *cmd,
|
||||
const struct nvk_cmd_state *state,
|
||||
uint32_t global_size[3],
|
||||
uint64_t *qmd_addr_out,
|
||||
uint64_t *root_desc_addr_out)
|
||||
{
|
||||
struct nvk_descriptor_state *desc = &cmd->state.cs.descriptors;
|
||||
const struct nvk_descriptor_state *desc = &state->cs.descriptors;
|
||||
|
||||
return nvk_cmd_upload_qmd(cmd, cmd->state.cs.shader,
|
||||
return nvk_cmd_upload_qmd(cmd, state->cs.shader,
|
||||
desc, (void *)desc->root, global_size,
|
||||
qmd_addr_out, root_desc_addr_out);
|
||||
}
|
||||
@@ -257,7 +260,8 @@ nvk_CmdDispatchBase(VkCommandBuffer commandBuffer,
|
||||
nvk_flush_compute_state(cmd, base_workgroup, global_size);
|
||||
|
||||
uint64_t qmd_addr = 0;
|
||||
VkResult result = nvk_cmd_flush_cs_qmd(cmd, global_size, &qmd_addr, NULL);
|
||||
VkResult result = nvk_cmd_flush_cs_qmd(cmd, &cmd->state, global_size,
|
||||
&qmd_addr, NULL);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
@@ -496,8 +500,8 @@ nvk_CmdDispatchIndirect(VkCommandBuffer commandBuffer,
|
||||
nvk_flush_compute_state(cmd, base_workgroup, global_size);
|
||||
|
||||
uint64_t qmd_addr = 0, root_desc_addr = 0;
|
||||
VkResult result = nvk_cmd_flush_cs_qmd(cmd, global_size, &qmd_addr,
|
||||
&root_desc_addr);
|
||||
VkResult result = nvk_cmd_flush_cs_qmd(cmd, &cmd->state, global_size,
|
||||
&qmd_addr, &root_desc_addr);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
|
@@ -239,6 +239,8 @@ static_assert(sizeof(struct nvk_ies_cs_qmd) % QMD_ALIGN == 0,
|
||||
"QMD size is not properly algined");
|
||||
static_assert(sizeof(struct nvk_root_descriptor_table) % QMD_ALIGN == 0,
|
||||
"Root descriptor table size is not aligned");
|
||||
static_assert(NVK_DGC_ALIGN >= QMD_ALIGN,
|
||||
"QMD alignment requirement is a lower bound of DGC alignment");
|
||||
|
||||
static void
|
||||
copy_repl_global_dw(nir_builder *b, nir_def *dst_addr, nir_def *src_addr,
|
||||
@@ -945,8 +947,8 @@ nvk_cmd_process_cmds(struct nvk_cmd_buffer *cmd,
|
||||
uint64_t qmd_addr = 0;
|
||||
if (layout->stages & VK_SHADER_STAGE_COMPUTE_BIT) {
|
||||
uint32_t global_size[3] = { 0, 0, 0 };
|
||||
VkResult result = nvk_cmd_flush_cs_qmd(cmd, global_size, &qmd_addr,
|
||||
&push.root_addr);
|
||||
VkResult result = nvk_cmd_flush_cs_qmd(cmd, state, global_size,
|
||||
&qmd_addr, &push.root_addr);
|
||||
if (unlikely(result != VK_SUCCESS)) {
|
||||
vk_command_buffer_set_error(&cmd->vk, result);
|
||||
return;
|
||||
|
@@ -37,7 +37,7 @@ desc_ubo_data(struct nvk_descriptor_set *set, uint32_t binding,
|
||||
if (size_out != NULL)
|
||||
*size_out = set->size - offset;
|
||||
|
||||
return (char *)set->mapped_ptr + offset;
|
||||
return (char *)set->map + offset;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -173,7 +173,7 @@ ubo_desc(struct nvk_physical_device *pdev,
|
||||
assert(addr_range.addr % min_cbuf_alignment == 0);
|
||||
assert(addr_range.range <= NVK_MAX_CBUF_SIZE);
|
||||
|
||||
addr_range.addr = align64(addr_range.addr, min_cbuf_alignment);
|
||||
addr_range.addr = ROUND_DOWN_TO(addr_range.addr, min_cbuf_alignment);
|
||||
addr_range.range = align(addr_range.range, min_cbuf_alignment);
|
||||
|
||||
if (nvk_use_bindless_cbuf(&pdev->info)) {
|
||||
@@ -225,7 +225,7 @@ ssbo_desc(struct nvk_addr_range addr_range)
|
||||
assert(addr_range.addr % NVK_MIN_SSBO_ALIGNMENT == 0);
|
||||
assert(addr_range.range <= UINT32_MAX);
|
||||
|
||||
addr_range.addr = align64(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT);
|
||||
addr_range.addr = ROUND_DOWN_TO(addr_range.addr, NVK_MIN_SSBO_ALIGNMENT);
|
||||
addr_range.range = align(addr_range.range, NVK_SSBO_BOUNDS_CHECK_ALIGNMENT);
|
||||
|
||||
return (union nvk_buffer_descriptor) { .addr = {
|
||||
@@ -449,7 +449,7 @@ nvk_push_descriptor_set_update(struct nvk_device *dev,
|
||||
struct nvk_descriptor_set set = {
|
||||
.layout = layout,
|
||||
.size = sizeof(push_set->data),
|
||||
.mapped_ptr = push_set->data,
|
||||
.map = push_set->data,
|
||||
};
|
||||
|
||||
for (uint32_t w = 0; w < write_count; w++) {
|
||||
@@ -537,9 +537,14 @@ nvk_destroy_descriptor_pool(struct nvk_device *dev,
|
||||
if (pool->mem != NULL)
|
||||
nvkmd_mem_unref(pool->mem);
|
||||
|
||||
if (pool->host_mem != NULL)
|
||||
vk_free2(&dev->vk.alloc, pAllocator, pool->host_mem);
|
||||
|
||||
vk_object_free(&dev->vk, pAllocator, pool);
|
||||
}
|
||||
|
||||
#define HOST_ONLY_ADDR 0xc0ffee0000000000ull
|
||||
|
||||
VKAPI_ATTR VkResult VKAPI_CALL
|
||||
nvk_CreateDescriptorPool(VkDevice _device,
|
||||
const VkDescriptorPoolCreateInfo *pCreateInfo,
|
||||
@@ -601,24 +606,39 @@ nvk_CreateDescriptorPool(VkDevice _device,
|
||||
*/
|
||||
mem_size += nvk_min_cbuf_alignment(&pdev->info) * pCreateInfo->maxSets;
|
||||
|
||||
if (mem_size) {
|
||||
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
|
||||
mem_size, 0, NVKMD_MEM_LOCAL,
|
||||
NVKMD_MEM_MAP_WR, &pool->mem);
|
||||
if (result != VK_SUCCESS) {
|
||||
nvk_destroy_descriptor_pool(dev, pAllocator, pool);
|
||||
return result;
|
||||
}
|
||||
if (mem_size > 0) {
|
||||
if (pCreateInfo->flags & VK_DESCRIPTOR_POOL_CREATE_HOST_ONLY_BIT_EXT) {
|
||||
pool->host_mem = vk_zalloc2(&dev->vk.alloc, pAllocator, mem_size,
|
||||
16, VK_OBJECT_TYPE_DESCRIPTOR_POOL);
|
||||
if (pool->host_mem == NULL) {
|
||||
nvk_destroy_descriptor_pool(dev, pAllocator, pool);
|
||||
return vk_error(dev, VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
/* The BO may be larger thanks to GPU page alignment. We may as well
|
||||
* make that extra space available to the client.
|
||||
*/
|
||||
assert(pool->mem->size_B >= mem_size);
|
||||
util_vma_heap_init(&pool->heap, pool->mem->va->addr, pool->mem->size_B);
|
||||
util_vma_heap_init(&pool->heap, HOST_ONLY_ADDR, mem_size);
|
||||
} else {
|
||||
result = nvkmd_dev_alloc_mapped_mem(dev->nvkmd, &dev->vk.base,
|
||||
mem_size, 0, NVKMD_MEM_LOCAL,
|
||||
NVKMD_MEM_MAP_WR, &pool->mem);
|
||||
if (result != VK_SUCCESS) {
|
||||
nvk_destroy_descriptor_pool(dev, pAllocator, pool);
|
||||
return result;
|
||||
}
|
||||
|
||||
/* The BO may be larger thanks to GPU page alignment. We may as well
|
||||
* make that extra space available to the client.
|
||||
*/
|
||||
assert(pool->mem->size_B >= mem_size);
|
||||
mem_size = pool->mem->size_B;
|
||||
|
||||
util_vma_heap_init(&pool->heap, pool->mem->va->addr, mem_size);
|
||||
}
|
||||
} else {
|
||||
util_vma_heap_init(&pool->heap, 0, 0);
|
||||
}
|
||||
|
||||
pool->mem_size_B = mem_size;
|
||||
|
||||
*pDescriptorPool = nvk_descriptor_pool_to_handle(pool);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -638,12 +658,22 @@ nvk_descriptor_pool_alloc(struct nvk_descriptor_pool *pool,
|
||||
if (addr == 0)
|
||||
return VK_ERROR_FRAGMENTED_POOL;
|
||||
|
||||
assert(addr >= pool->mem->va->addr);
|
||||
assert(addr + size <= pool->mem->va->addr + pool->mem->size_B);
|
||||
uint64_t offset = addr - pool->mem->va->addr;
|
||||
if (pool->host_mem != NULL) {
|
||||
/* In this case, the address is a host address */
|
||||
assert(addr >= HOST_ONLY_ADDR);
|
||||
assert(addr + size <= HOST_ONLY_ADDR + pool->mem_size_B);
|
||||
uint64_t offset = addr - HOST_ONLY_ADDR;
|
||||
|
||||
*addr_out = addr;
|
||||
*map_out = pool->mem->map + offset;
|
||||
*addr_out = addr;
|
||||
*map_out = pool->host_mem + offset;
|
||||
} else {
|
||||
assert(addr >= pool->mem->va->addr);
|
||||
assert(addr + size <= pool->mem->va->addr + pool->mem_size_B);
|
||||
uint64_t offset = addr - pool->mem->va->addr;
|
||||
|
||||
*addr_out = addr;
|
||||
*map_out = pool->mem->map + offset;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -653,8 +683,13 @@ nvk_descriptor_pool_free(struct nvk_descriptor_pool *pool,
|
||||
uint64_t addr, uint64_t size)
|
||||
{
|
||||
assert(size > 0);
|
||||
assert(addr >= pool->mem->va->addr);
|
||||
assert(addr + size <= pool->mem->va->addr + pool->mem->size_B);
|
||||
if (pool->host_mem != NULL) {
|
||||
assert(addr >= HOST_ONLY_ADDR);
|
||||
assert(addr + size <= HOST_ONLY_ADDR + pool->mem_size_B);
|
||||
} else {
|
||||
assert(addr >= pool->mem->va->addr);
|
||||
assert(addr + size <= pool->mem->va->addr + pool->mem_size_B);
|
||||
}
|
||||
util_vma_heap_free(&pool->heap, addr, size);
|
||||
}
|
||||
|
||||
@@ -691,7 +726,7 @@ nvk_descriptor_set_create(struct nvk_device *dev,
|
||||
|
||||
if (set->size > 0) {
|
||||
result = nvk_descriptor_pool_alloc(pool, set->size, alignment,
|
||||
&set->addr, &set->mapped_ptr);
|
||||
&set->addr, &set->map);
|
||||
if (result != VK_SUCCESS) {
|
||||
vk_object_free(&dev->vk, NULL, set);
|
||||
return result;
|
||||
@@ -952,7 +987,7 @@ nvk_push_descriptor_set_update_template(
|
||||
struct nvk_descriptor_set tmp_set = {
|
||||
.layout = layout,
|
||||
.size = sizeof(push_set->data),
|
||||
.mapped_ptr = push_set->data,
|
||||
.map = push_set->data,
|
||||
};
|
||||
nvk_descriptor_set_write_template(dev, &tmp_set, template, data);
|
||||
}
|
||||
|
@@ -24,7 +24,9 @@ struct nvk_descriptor_pool {
|
||||
|
||||
struct list_head sets;
|
||||
|
||||
uint64_t mem_size_B;
|
||||
struct nvkmd_mem *mem;
|
||||
void *host_mem;
|
||||
struct util_vma_heap heap;
|
||||
};
|
||||
|
||||
@@ -38,7 +40,7 @@ struct nvk_descriptor_set {
|
||||
struct list_head link;
|
||||
|
||||
struct nvk_descriptor_set_layout *layout;
|
||||
void *mapped_ptr;
|
||||
void *map;
|
||||
uint64_t addr;
|
||||
uint32_t size;
|
||||
|
||||
|
@@ -963,8 +963,10 @@ is_edb_buffer_view(nir_deref_instr *deref,
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
uint8_t set = var->data.descriptor_set;
|
||||
|
||||
return ctx->set_layouts[set]->flags &
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT;
|
||||
return (ctx->set_layouts[set]->flags &
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_DESCRIPTOR_BUFFER_BIT_EXT) &&
|
||||
!(ctx->set_layouts[set]->flags &
|
||||
VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT);
|
||||
}
|
||||
|
||||
static nir_def *
|
||||
@@ -1047,6 +1049,7 @@ lower_edb_buffer_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
|
||||
switch (intrin->intrinsic) {
|
||||
case nir_intrinsic_image_deref_load:
|
||||
case nir_intrinsic_image_deref_sparse_load:
|
||||
case nir_intrinsic_image_deref_store:
|
||||
case nir_intrinsic_image_deref_atomic:
|
||||
case nir_intrinsic_image_deref_atomic_swap: {
|
||||
@@ -1060,7 +1063,8 @@ lower_edb_buffer_image_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
|
||||
pos = nir_vector_insert_imm(b, pos, new_x, 0);
|
||||
nir_src_rewrite(&intrin->src[1], pos);
|
||||
|
||||
if (intrin->intrinsic == nir_intrinsic_image_deref_load) {
|
||||
if (intrin->intrinsic == nir_intrinsic_image_deref_load ||
|
||||
intrin->intrinsic == nir_intrinsic_image_deref_sparse_load) {
|
||||
b->cursor = nir_after_instr(&intrin->instr);
|
||||
nir_def *res = &intrin->def;
|
||||
res = fixup_edb_buffer_view_result(b, desc, in_bounds, res,
|
||||
@@ -1216,17 +1220,35 @@ lower_edb_buffer_tex_instr(nir_builder *b, nir_tex_instr *tex,
|
||||
nir_def *in_bounds = edb_buffer_view_coord_is_in_bounds(b, desc, coord);
|
||||
|
||||
nir_def *index = edb_buffer_view_index(b, desc, in_bounds);
|
||||
nir_src_rewrite(&tex->src[texture_src_idx].src, index);
|
||||
tex->src[texture_src_idx].src_type = nir_tex_src_texture_handle;
|
||||
|
||||
nir_def *new_coord = adjust_edb_buffer_view_coord(b, desc, coord);
|
||||
nir_src_rewrite(&tex->src[coord_src_idx].src, new_coord);
|
||||
nir_def *u = nir_undef(b, 1, 32);
|
||||
|
||||
/* The tricks we play for EDB use very large texel buffer views. These
|
||||
* don't seem to play nicely with the tld instruction which thinks
|
||||
* buffers are a 1D texture. However, suld seems fine with it so we'll
|
||||
* rewrite to use that.
|
||||
*/
|
||||
nir_def *res = nir_bindless_image_load(b, tex->def.num_components,
|
||||
tex->def.bit_size,
|
||||
index,
|
||||
nir_vec4(b, new_coord, u, u, u),
|
||||
u, /* sample_id */
|
||||
nir_imm_int(b, 0), /* LOD */
|
||||
.image_dim = GLSL_SAMPLER_DIM_BUF,
|
||||
.image_array = false,
|
||||
.format = PIPE_FORMAT_NONE,
|
||||
.access = ACCESS_NON_WRITEABLE |
|
||||
ACCESS_CAN_REORDER,
|
||||
.dest_type = tex->dest_type);
|
||||
if (tex->is_sparse) {
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(res->parent_instr);
|
||||
intr->intrinsic = nir_intrinsic_bindless_image_sparse_load;
|
||||
}
|
||||
|
||||
b->cursor = nir_after_instr(&tex->instr);
|
||||
nir_def *res = &tex->def;
|
||||
res = fixup_edb_buffer_view_result(b, desc, in_bounds,
|
||||
res, tex->dest_type);
|
||||
nir_def_rewrite_uses_after(&tex->def, res, res->parent_instr);
|
||||
|
||||
nir_def_rewrite_uses(&tex->def, res);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -552,7 +552,7 @@ nvk_get_device_features(const struct nv_device_info *info,
|
||||
.descriptorBuffer = true,
|
||||
.descriptorBufferCaptureReplay = true,
|
||||
.descriptorBufferImageLayoutIgnored = true,
|
||||
.descriptorBufferPushDescriptors = false,
|
||||
.descriptorBufferPushDescriptors = true,
|
||||
|
||||
/* VK_EXT_device_generated_commands */
|
||||
.deviceGeneratedCommands = true,
|
||||
@@ -984,7 +984,7 @@ nvk_get_device_properties(const struct nvk_instance *instance,
|
||||
|
||||
/* VK_EXT_descriptor_buffer */
|
||||
.combinedImageSamplerDescriptorSingleArray = true,
|
||||
.bufferlessPushDescriptors = false,
|
||||
.bufferlessPushDescriptors = true,
|
||||
.allowSamplerImageViewPostSubmitCreation = false,
|
||||
.descriptorBufferOffsetAlignment = nvk_min_cbuf_alignment(info),
|
||||
.maxDescriptorBufferBindings = 32,
|
||||
|
@@ -35,6 +35,9 @@
|
||||
/* Max size of a bound cbuf */
|
||||
#define NVK_MAX_CBUF_SIZE (1u << 16)
|
||||
|
||||
/* Device Generated Commands */
|
||||
#define NVK_DGC_ALIGN 0x100
|
||||
|
||||
struct nvk_addr_range {
|
||||
uint64_t addr;
|
||||
uint64_t range;
|
||||
|
@@ -396,6 +396,18 @@ bi_optimizer_var_tex(bi_context *ctx, bi_instr *var, bi_instr *tex)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
bi_record_use(bi_instr **uses, BITSET_WORD *multiple, bi_instr *I, unsigned s)
|
||||
{
|
||||
unsigned v = I->src[s].value;
|
||||
|
||||
assert(I->src[s].type == BI_INDEX_NORMAL);
|
||||
if (uses[v] && uses[v] != I)
|
||||
BITSET_SET(multiple, v);
|
||||
else
|
||||
uses[v] = I;
|
||||
}
|
||||
|
||||
void
|
||||
bi_opt_mod_prop_backward(bi_context *ctx)
|
||||
{
|
||||
@@ -403,45 +415,65 @@ bi_opt_mod_prop_backward(bi_context *ctx)
|
||||
bi_instr **uses = calloc(count, sizeof(*uses));
|
||||
BITSET_WORD *multiple = calloc(BITSET_WORDS(count), sizeof(*multiple));
|
||||
|
||||
bi_foreach_instr_global_rev(ctx, I) {
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
unsigned v = I->src[s].value;
|
||||
|
||||
if (uses[v] && uses[v] != I)
|
||||
BITSET_SET(multiple, v);
|
||||
else
|
||||
uses[v] = I;
|
||||
bi_foreach_block_rev(ctx, block) {
|
||||
/* Watch out for PHI instructions in loops!
|
||||
* PHI sources are logically read at the end of the predecessor,
|
||||
* so process our source in successor phis first
|
||||
*/
|
||||
bi_foreach_successor(block, succ) {
|
||||
unsigned s = bi_predecessor_index(succ, block);
|
||||
bi_foreach_instr_in_block(succ, phi) {
|
||||
/* the PHIs are all at the start of the block, so stop
|
||||
* when we see a non-PHI
|
||||
*/
|
||||
if (phi->op != BI_OPCODE_PHI)
|
||||
break;
|
||||
if (phi->src[s].type == BI_INDEX_NORMAL)
|
||||
bi_record_use(uses, multiple, phi, s);
|
||||
}
|
||||
}
|
||||
|
||||
if (!I->nr_dests)
|
||||
continue;
|
||||
|
||||
bi_instr *use = uses[I->dest[0].value];
|
||||
|
||||
if (!use || BITSET_TEST(multiple, I->dest[0].value))
|
||||
continue;
|
||||
|
||||
/* Destination has a single use, try to propagate */
|
||||
bool propagated =
|
||||
bi_optimizer_clamp(I, use) || bi_optimizer_result_type(I, use);
|
||||
|
||||
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM &&
|
||||
use->op == BI_OPCODE_SPLIT_I32) {
|
||||
/* Need to see through the split in a
|
||||
* ld_var_imm/split/var_tex sequence
|
||||
*/
|
||||
bi_instr *tex = uses[use->dest[0].value];
|
||||
|
||||
if (!tex || BITSET_TEST(multiple, use->dest[0].value))
|
||||
/* now go through the instructions backwards */
|
||||
bi_foreach_instr_in_block_rev(block, I) {
|
||||
/* skip PHIs, they are handled specially */
|
||||
if (I->op == BI_OPCODE_PHI)
|
||||
continue;
|
||||
|
||||
use = tex;
|
||||
propagated = bi_optimizer_var_tex(ctx, I, use);
|
||||
}
|
||||
/* record uses */
|
||||
bi_foreach_ssa_src(I, s) {
|
||||
bi_record_use(uses, multiple, I, s);
|
||||
}
|
||||
|
||||
if (propagated) {
|
||||
bi_remove_instruction(use);
|
||||
continue;
|
||||
/* check for definitions */
|
||||
if (I->nr_dests != 1)
|
||||
continue;
|
||||
|
||||
bi_instr *use = uses[I->dest[0].value];
|
||||
|
||||
if (!use || BITSET_TEST(multiple, I->dest[0].value))
|
||||
continue;
|
||||
|
||||
/* Destination has a single use, try to propagate */
|
||||
bool propagated =
|
||||
bi_optimizer_clamp(I, use) || bi_optimizer_result_type(I, use);
|
||||
|
||||
if (!propagated && I->op == BI_OPCODE_LD_VAR_IMM &&
|
||||
use->op == BI_OPCODE_SPLIT_I32) {
|
||||
/* Need to see through the split in a
|
||||
* ld_var_imm/split/var_tex sequence
|
||||
*/
|
||||
bi_instr *tex = uses[use->dest[0].value];
|
||||
|
||||
if (!tex || BITSET_TEST(multiple, use->dest[0].value))
|
||||
continue;
|
||||
|
||||
use = tex;
|
||||
propagated = bi_optimizer_var_tex(ctx, I, use);
|
||||
}
|
||||
|
||||
if (propagated) {
|
||||
bi_remove_instruction(use);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -137,7 +137,7 @@ const struct pan_blendable_format
|
||||
#define YUV_NO_SWAP (0)
|
||||
#define YUV_SWAP (1)
|
||||
|
||||
#if PAN_ARCH <= 9
|
||||
#if PAN_ARCH < 14
|
||||
#define MALI_YUV_CR_SITING_CENTER_422 (MALI_YUV_CR_SITING_CENTER_Y)
|
||||
#else
|
||||
#define MALI_YUV_CR_SITING_CENTER_422 (MALI_YUV_CR_SITING_CENTER_X)
|
||||
|
@@ -434,6 +434,15 @@ get_vk_version(unsigned arch)
|
||||
return VK_MAKE_API_VERSION(0, 1, 0, VK_HEADER_VERSION);
|
||||
}
|
||||
|
||||
static VkConformanceVersion
|
||||
get_conformance_version(unsigned arch)
|
||||
{
|
||||
if (arch == 10)
|
||||
return (VkConformanceVersion){1, 4, 1, 2};
|
||||
|
||||
return (VkConformanceVersion){0, 0, 0, 0};
|
||||
}
|
||||
|
||||
static void
|
||||
get_device_properties(const struct panvk_instance *instance,
|
||||
const struct panvk_physical_device *device,
|
||||
@@ -723,7 +732,7 @@ get_device_properties(const struct panvk_instance *instance,
|
||||
.independentResolve = true,
|
||||
/* VK_KHR_driver_properties */
|
||||
.driverID = VK_DRIVER_ID_MESA_PANVK,
|
||||
.conformanceVersion = (VkConformanceVersion){0, 0, 0, 0},
|
||||
.conformanceVersion = get_conformance_version(arch),
|
||||
/* XXX: VK_KHR_shader_float_controls */
|
||||
.denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
.roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
|
||||
|
@@ -32,6 +32,9 @@ vn_buffer_get_cache_index(const VkBufferCreateInfo *create_info,
|
||||
*
|
||||
* Btw, we assume VkBufferCreateFlagBits won't exhaust all 32bits, at least
|
||||
* no earlier than VkBufferUsageFlagBits.
|
||||
*
|
||||
* TODO: extend cache to cover VkBufferUsageFlags2CreateInfo (introduced in
|
||||
* VK_KHR_maintenance5 and promoted to 1.4).
|
||||
*/
|
||||
assert(!(create_info->flags & 0x80000000));
|
||||
|
||||
|
@@ -492,6 +492,7 @@ vn_physical_device_init_properties(struct vn_physical_device *physical_dev)
|
||||
VkPhysicalDeviceTexelBufferAlignmentProperties texel_buffer_alignment;
|
||||
|
||||
/* KHR */
|
||||
VkPhysicalDeviceMaintenance5PropertiesKHR maintenance_5;
|
||||
VkPhysicalDevicePushDescriptorPropertiesKHR push_descriptor;
|
||||
VkPhysicalDeviceFragmentShadingRatePropertiesKHR fragment_shading_rate;
|
||||
|
||||
@@ -552,6 +553,7 @@ vn_physical_device_init_properties(struct vn_physical_device *physical_dev)
|
||||
}
|
||||
|
||||
/* KHR */
|
||||
VN_ADD_PNEXT_EXT(props2, MAINTENANCE_5_PROPERTIES_KHR, local_props.maintenance_5, exts->KHR_maintenance5);
|
||||
VN_ADD_PNEXT_EXT(props2, FRAGMENT_SHADING_RATE_PROPERTIES_KHR, local_props.fragment_shading_rate, exts->KHR_fragment_shading_rate);
|
||||
VN_ADD_PNEXT_EXT(props2, PUSH_DESCRIPTOR_PROPERTIES_KHR, local_props.push_descriptor, exts->KHR_push_descriptor);
|
||||
|
||||
@@ -614,6 +616,7 @@ vn_physical_device_init_properties(struct vn_physical_device *physical_dev)
|
||||
|
||||
/* KHR */
|
||||
VN_SET_VK_PROPS_EXT(props, &local_props.fragment_shading_rate, exts->KHR_fragment_shading_rate);
|
||||
VN_SET_VK_PROPS_EXT(props, &local_props.maintenance_5, exts->KHR_maintenance5);
|
||||
VN_SET_VK_PROPS_EXT(props, &local_props.push_descriptor, exts->KHR_push_descriptor);
|
||||
|
||||
/* EXT */
|
||||
|
@@ -221,10 +221,10 @@ struct vn_graphics_pipeline_fix_tmp {
|
||||
/* Fixing the pNext chain
|
||||
*
|
||||
* TODO: extend when below or more extensions are supported:
|
||||
* - VK_KHR_maintenance5
|
||||
* - VK_EXT_pipeline_robustness
|
||||
*/
|
||||
VkGraphicsPipelineLibraryCreateInfoEXT *gpl_infos;
|
||||
VkPipelineCreateFlags2CreateInfo *flags2_infos;
|
||||
VkPipelineCreationFeedbackCreateInfo *feedback_infos;
|
||||
VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_infos;
|
||||
VkPipelineLibraryCreateInfoKHR *library_infos;
|
||||
@@ -626,8 +626,8 @@ vn_destroy_failed_pipeline_handles(struct vn_device *dev,
|
||||
}
|
||||
|
||||
#define VN_PIPELINE_CREATE_SYNC_MASK \
|
||||
(VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT | \
|
||||
VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT)
|
||||
(VK_PIPELINE_CREATE_2_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT | \
|
||||
VK_PIPELINE_CREATE_2_EARLY_RETURN_ON_FAILURE_BIT)
|
||||
|
||||
static struct vn_graphics_pipeline_fix_tmp *
|
||||
vn_graphics_pipeline_fix_tmp_alloc(const VkAllocationCallbacks *alloc,
|
||||
@@ -641,6 +641,7 @@ vn_graphics_pipeline_fix_tmp_alloc(const VkAllocationCallbacks *alloc,
|
||||
|
||||
/* for pNext */
|
||||
VkGraphicsPipelineLibraryCreateInfoEXT *gpl_infos;
|
||||
VkPipelineCreateFlags2CreateInfo *flags2_infos;
|
||||
VkPipelineCreationFeedbackCreateInfo *feedback_infos;
|
||||
VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_infos;
|
||||
VkPipelineLibraryCreateInfoKHR *library_infos;
|
||||
@@ -656,6 +657,8 @@ vn_graphics_pipeline_fix_tmp_alloc(const VkAllocationCallbacks *alloc,
|
||||
|
||||
if (alloc_pnext) {
|
||||
vk_multialloc_add(&ma, &gpl_infos, __typeof__(*gpl_infos), info_count);
|
||||
vk_multialloc_add(&ma, &flags2_infos, __typeof__(*flags2_infos),
|
||||
info_count);
|
||||
vk_multialloc_add(&ma, &feedback_infos, __typeof__(*feedback_infos),
|
||||
info_count);
|
||||
vk_multialloc_add(&ma, &fsr_infos, __typeof__(*fsr_infos), info_count);
|
||||
@@ -674,6 +677,7 @@ vn_graphics_pipeline_fix_tmp_alloc(const VkAllocationCallbacks *alloc,
|
||||
|
||||
if (alloc_pnext) {
|
||||
tmp->gpl_infos = gpl_infos;
|
||||
tmp->flags2_infos = flags2_infos;
|
||||
tmp->feedback_infos = feedback_infos;
|
||||
tmp->fsr_infos = fsr_infos;
|
||||
tmp->library_infos = library_infos;
|
||||
@@ -702,6 +706,7 @@ vn_graphics_pipeline_fix_tmp_alloc(const VkAllocationCallbacks *alloc,
|
||||
static void
|
||||
vn_graphics_pipeline_library_state_update(
|
||||
const VkGraphicsPipelineCreateInfo *info,
|
||||
VkPipelineCreateFlags2 flags2,
|
||||
struct vn_graphics_pipeline_library_state *restrict gpl)
|
||||
{
|
||||
const VkGraphicsPipelineLibraryCreateInfoEXT *gpl_info =
|
||||
@@ -713,7 +718,7 @@ vn_graphics_pipeline_library_state_update(
|
||||
|
||||
if (gpl_info) {
|
||||
gpl->mask |= gpl_info->flags;
|
||||
} else if ((info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR) ||
|
||||
} else if ((flags2 & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR) ||
|
||||
lib_count > 0) {
|
||||
gpl->mask |= 0;
|
||||
} else {
|
||||
@@ -1019,6 +1024,7 @@ vn_graphics_pipeline_state_merge(
|
||||
static void
|
||||
vn_graphics_pipeline_state_fill(
|
||||
const VkGraphicsPipelineCreateInfo *info,
|
||||
VkPipelineCreateFlags2 flags2,
|
||||
struct vn_graphics_pipeline_state *restrict state,
|
||||
struct vn_graphics_pipeline_fix_desc *out_fix_desc)
|
||||
{
|
||||
@@ -1066,7 +1072,7 @@ vn_graphics_pipeline_state_fill(
|
||||
* directly (without linking).
|
||||
*/
|
||||
struct vn_graphics_pipeline_library_state direct_gpl = { 0 };
|
||||
vn_graphics_pipeline_library_state_update(info, &direct_gpl);
|
||||
vn_graphics_pipeline_library_state_update(info, flags2, &direct_gpl);
|
||||
|
||||
/* From the Vulkan 1.3.251 spec:
|
||||
* VUID-VkGraphicsPipelineCreateInfo-pLibraries-06611
|
||||
@@ -1186,7 +1192,7 @@ vn_graphics_pipeline_state_fill(
|
||||
valid.self.depth_stencil_state = true;
|
||||
} else if (state->render_pass.attachment_aspects ==
|
||||
VK_IMAGE_ASPECT_METADATA_BIT &&
|
||||
(info->flags & VK_PIPELINE_CREATE_LIBRARY_BIT_KHR)) {
|
||||
(flags2 & VK_PIPELINE_CREATE_2_LIBRARY_BIT_KHR)) {
|
||||
/* The app has not yet provided render pass info, neither directly
|
||||
* in this VkGraphicsPipelineCreateInfo nor in any linked pipeline
|
||||
* libraries. Therefore we do not know if the final complete
|
||||
@@ -1255,7 +1261,7 @@ vn_graphics_pipeline_state_fill(
|
||||
* basePipelineIndex is -1, basePipelineHandle must be a valid graphics
|
||||
* VkPipeline handle
|
||||
*/
|
||||
if ((info->flags & VK_PIPELINE_CREATE_DERIVATIVE_BIT) &&
|
||||
if ((flags2 & VK_PIPELINE_CREATE_2_DERIVATIVE_BIT) &&
|
||||
info->basePipelineIndex == -1)
|
||||
valid.self.base_pipeline_handle = true;
|
||||
|
||||
@@ -1393,6 +1399,7 @@ vn_graphics_pipeline_create_info_pnext_init(
|
||||
uint32_t index)
|
||||
{
|
||||
VkGraphicsPipelineLibraryCreateInfoEXT *gpl = &fix_tmp->gpl_infos[index];
|
||||
VkPipelineCreateFlags2CreateInfo *flags2 = &fix_tmp->flags2_infos[index];
|
||||
VkPipelineCreationFeedbackCreateInfo *feedback =
|
||||
&fix_tmp->feedback_infos[index];
|
||||
VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr =
|
||||
@@ -1410,6 +1417,10 @@ vn_graphics_pipeline_create_info_pnext_init(
|
||||
memcpy(gpl, src, sizeof(*gpl));
|
||||
next = gpl;
|
||||
break;
|
||||
case VK_STRUCTURE_TYPE_PIPELINE_CREATE_FLAGS_2_CREATE_INFO:
|
||||
memcpy(flags2, src, sizeof(*flags2));
|
||||
next = flags2;
|
||||
break;
|
||||
case VK_STRUCTURE_TYPE_PIPELINE_CREATION_FEEDBACK_CREATE_INFO:
|
||||
memcpy(feedback, src, sizeof(*feedback));
|
||||
next = feedback;
|
||||
@@ -1528,6 +1539,14 @@ vn_invalidate_pipeline_creation_feedback(const VkBaseInStructure *chain)
|
||||
feedback_info->pPipelineStageCreationFeedbacks[i].flags = 0;
|
||||
}
|
||||
|
||||
static inline VkPipelineCreateFlags2
|
||||
vn_pipeline_create_flags2(const void *pnext, VkPipelineCreateFlags flags)
|
||||
{
|
||||
const VkPipelineCreateFlags2CreateInfo *flags2 =
|
||||
vk_find_struct_const(pnext, PIPELINE_CREATE_FLAGS_2_CREATE_INFO);
|
||||
return flags2 ? flags2->flags : flags;
|
||||
}
|
||||
|
||||
VkResult
|
||||
vn_CreateGraphicsPipelines(VkDevice device,
|
||||
VkPipelineCache pipelineCache,
|
||||
@@ -1558,8 +1577,14 @@ vn_CreateGraphicsPipelines(VkDevice device,
|
||||
for (uint32_t i = 0; i < createInfoCount; i++) {
|
||||
struct vn_graphics_pipeline *pipeline =
|
||||
vn_graphics_pipeline_from_handle(pPipelines[i]);
|
||||
vn_graphics_pipeline_state_fill(&pCreateInfos[i], &pipeline->state,
|
||||
&fix_descs[i]);
|
||||
|
||||
const VkPipelineCreateFlags2 flags2 = vn_pipeline_create_flags2(
|
||||
pCreateInfos[i].pNext, pCreateInfos[i].flags);
|
||||
if (flags2 & VN_PIPELINE_CREATE_SYNC_MASK)
|
||||
want_sync = true;
|
||||
|
||||
vn_graphics_pipeline_state_fill(&pCreateInfos[i], flags2,
|
||||
&pipeline->state, &fix_descs[i]);
|
||||
}
|
||||
|
||||
struct vn_graphics_pipeline_fix_tmp *fix_tmp = NULL;
|
||||
@@ -1580,9 +1605,6 @@ vn_CreateGraphicsPipelines(VkDevice device,
|
||||
pipeline->layout = vn_pipeline_layout_ref(dev, layout);
|
||||
}
|
||||
|
||||
if ((pCreateInfos[i].flags & VN_PIPELINE_CREATE_SYNC_MASK))
|
||||
want_sync = true;
|
||||
|
||||
vn_invalidate_pipeline_creation_feedback(
|
||||
(const VkBaseInStructure *)pCreateInfos[i].pNext);
|
||||
}
|
||||
@@ -1646,7 +1668,10 @@ vn_CreateComputePipelines(VkDevice device,
|
||||
layout->has_push_constant_ranges) {
|
||||
pipeline->layout = vn_pipeline_layout_ref(dev, layout);
|
||||
}
|
||||
if ((pCreateInfos[i].flags & VN_PIPELINE_CREATE_SYNC_MASK))
|
||||
|
||||
if (vn_pipeline_create_flags2(pCreateInfos[i].pNext,
|
||||
pCreateInfos[i].flags) &
|
||||
VN_PIPELINE_CREATE_SYNC_MASK)
|
||||
want_sync = true;
|
||||
|
||||
vn_invalidate_pipeline_creation_feedback(
|
||||
|
Reference in New Issue
Block a user