Compare commits
47 Commits
mesa-19.0.
...
mesa-17.3.
Author | SHA1 | Date | |
---|---|---|---|
|
19b62847e0 | ||
|
d5cc7e47a8 | ||
|
9b44ef94b4 | ||
|
a12ca3b231 | ||
|
9710fbbcdf | ||
|
b4bf9f6a41 | ||
|
2516c3217d | ||
|
383b360348 | ||
|
71571aab14 | ||
|
13bfb83b31 | ||
|
4c82f2c3a9 | ||
|
14c40ebd0f | ||
|
77839e9ba8 | ||
|
7826bc9538 | ||
|
f0951a6aa9 | ||
|
3eb187f376 | ||
|
0c20849f9c | ||
|
fb09360ea5 | ||
|
1664322838 | ||
|
c7e625df69 | ||
|
5addf041ef | ||
|
f4b6883ebc | ||
|
70ee0a4525 | ||
|
17d988bfaa | ||
|
03cf1953ad | ||
|
4fb6b83968 | ||
|
26b44eadac | ||
|
e22cf6e9b4 | ||
|
7df1b901b9 | ||
|
cbb8aec81c | ||
|
ff8c152640 | ||
|
0fef0c7deb | ||
|
66603bff6f | ||
|
b0082632eb | ||
|
3da6dd8003 | ||
|
2e33d68046 | ||
|
3b699fdd19 | ||
|
a2123968fa | ||
|
1ce3fbeb91 | ||
|
8f2bc19856 | ||
|
b6f0c16a89 | ||
|
5c8eb88553 | ||
|
afdb9da492 | ||
|
b8f10fdf34 | ||
|
ea132f9265 | ||
|
08b41e70dd | ||
|
ae720e2873 |
@@ -3631,15 +3631,17 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
|
||||
LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
|
||||
MAYBE_UNUSED int length;
|
||||
|
||||
bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
atomic_name = "add";
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_min:
|
||||
atomic_name = "smin";
|
||||
atomic_name = is_unsigned ? "umin" : "smin";
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_max:
|
||||
atomic_name = "smax";
|
||||
atomic_name = is_unsigned ? "umax" : "smax";
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
atomic_name = "and";
|
||||
|
@@ -927,9 +927,11 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
in->numSamples == 1) {
|
||||
ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
|
||||
ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
|
||||
ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
|
||||
|
||||
din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
|
||||
dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
|
||||
dout.pMipInfo = meta_mip_info;
|
||||
|
||||
din.dccKeyFlags.pipeAligned = 1;
|
||||
din.dccKeyFlags.rbAligned = 1;
|
||||
@@ -955,21 +957,37 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
surf->dcc_alignment = dout.dccRamBaseAlign;
|
||||
surf->num_dcc_levels = in->numMipLevels;
|
||||
|
||||
/* Disable DCC for the smallest levels. It seems to be
|
||||
* required for DCC readability between CB and shaders
|
||||
* when TC L2 isn't flushed. This was guessed.
|
||||
/* Disable DCC for levels that are in the mip tail.
|
||||
*
|
||||
* There are two issues that this is intended to
|
||||
* address:
|
||||
*
|
||||
* 1. Multiple mip levels may share a cache line. This
|
||||
* can lead to corruption when switching between
|
||||
* rendering to different mip levels because the
|
||||
* RBs don't maintain coherency.
|
||||
*
|
||||
* 2. Texturing with metadata after rendering sometimes
|
||||
* fails with corruption, probably for a similar
|
||||
* reason.
|
||||
*
|
||||
* Working around these issues for all levels in the
|
||||
* mip tail may be overly conservative, but it's what
|
||||
* Vulkan does.
|
||||
*
|
||||
* Alternative solutions that also work but are worse:
|
||||
* - Disable DCC.
|
||||
* - Disable DCC entirely.
|
||||
* - Flush TC L2 after rendering.
|
||||
*/
|
||||
for (unsigned i = 1; i < in->numMipLevels; i++) {
|
||||
if (mip_info[i].pitch *
|
||||
mip_info[i].height * surf->bpe < 1024) {
|
||||
for (unsigned i = 0; i < in->numMipLevels; i++) {
|
||||
if (meta_mip_info[i].inMiptail) {
|
||||
surf->num_dcc_levels = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!surf->num_dcc_levels)
|
||||
surf->dcc_size = 0;
|
||||
}
|
||||
|
||||
/* FMASK */
|
||||
|
@@ -104,6 +104,75 @@ get_chip_name(enum radeon_family family)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_physical_device_init_mem_types(struct radv_physical_device *device)
|
||||
{
|
||||
STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
|
||||
uint64_t visible_vram_size = MIN2(device->rad_info.vram_size,
|
||||
device->rad_info.vram_vis_size);
|
||||
|
||||
int vram_index = -1, visible_vram_index = -1, gart_index = -1;
|
||||
device->memory_properties.memoryHeapCount = 0;
|
||||
if (device->rad_info.vram_size - visible_vram_size > 0) {
|
||||
vram_index = device->memory_properties.memoryHeapCount++;
|
||||
device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
|
||||
.size = device->rad_info.vram_size - visible_vram_size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
};
|
||||
}
|
||||
if (visible_vram_size) {
|
||||
visible_vram_index = device->memory_properties.memoryHeapCount++;
|
||||
device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
|
||||
.size = visible_vram_size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
};
|
||||
}
|
||||
if (device->rad_info.gart_size > 0) {
|
||||
gart_index = device->memory_properties.memoryHeapCount++;
|
||||
device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
|
||||
.size = device->rad_info.gart_size,
|
||||
.flags = 0,
|
||||
};
|
||||
}
|
||||
|
||||
STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
|
||||
unsigned type_count = 0;
|
||||
if (vram_index >= 0) {
|
||||
device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM;
|
||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
.heapIndex = vram_index,
|
||||
};
|
||||
}
|
||||
if (gart_index >= 0) {
|
||||
device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
|
||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
.heapIndex = gart_index,
|
||||
};
|
||||
}
|
||||
if (visible_vram_index >= 0) {
|
||||
device->mem_type_indices[type_count] = RADV_MEM_TYPE_VRAM_CPU_ACCESS;
|
||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
.heapIndex = visible_vram_index,
|
||||
};
|
||||
}
|
||||
if (gart_index >= 0) {
|
||||
device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_CACHED;
|
||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||
.heapIndex = gart_index,
|
||||
};
|
||||
}
|
||||
device->memory_properties.memoryTypeCount = type_count;
|
||||
}
|
||||
|
||||
static VkResult
|
||||
radv_physical_device_init(struct radv_physical_device *device,
|
||||
struct radv_instance *instance,
|
||||
@@ -152,6 +221,8 @@ radv_physical_device_init(struct radv_physical_device *device,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
device->name = get_chip_name(device->rad_info.family);
|
||||
|
||||
if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
|
||||
radv_finish_wsi(device);
|
||||
device->ws->destroy(device->ws);
|
||||
@@ -168,12 +239,11 @@ radv_physical_device_init(struct radv_physical_device *device,
|
||||
/* The gpu id is already embeded in the uuid so we just pass "radv"
|
||||
* when creating the cache.
|
||||
*/
|
||||
char buf[VK_UUID_SIZE + 1];
|
||||
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE);
|
||||
device->disk_cache = disk_cache_create("radv", buf, shader_env_flags);
|
||||
char buf[VK_UUID_SIZE * 2 + 1];
|
||||
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
|
||||
device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
|
||||
|
||||
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
|
||||
device->name = get_chip_name(device->rad_info.family);
|
||||
|
||||
radv_get_driver_uuid(&device->device_uuid);
|
||||
radv_get_device_uuid(&device->rad_info, &device->device_uuid);
|
||||
@@ -189,6 +259,7 @@ radv_physical_device_init(struct radv_physical_device *device,
|
||||
*/
|
||||
device->has_clear_state = device->rad_info.chip_class >= CIK;
|
||||
|
||||
radv_physical_device_init_mem_types(device);
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail:
|
||||
@@ -779,49 +850,7 @@ void radv_GetPhysicalDeviceMemoryProperties(
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
|
||||
|
||||
STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES);
|
||||
|
||||
pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT;
|
||||
pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
|
||||
.heapIndex = RADV_MEM_HEAP_VRAM,
|
||||
};
|
||||
pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
.heapIndex = RADV_MEM_HEAP_GTT,
|
||||
};
|
||||
pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
.heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS,
|
||||
};
|
||||
pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||
.heapIndex = RADV_MEM_HEAP_GTT,
|
||||
};
|
||||
|
||||
STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS);
|
||||
uint64_t visible_vram_size = MIN2(physical_device->rad_info.vram_size,
|
||||
physical_device->rad_info.vram_vis_size);
|
||||
|
||||
pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
|
||||
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
|
||||
.size = physical_device->rad_info.vram_size -
|
||||
visible_vram_size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
};
|
||||
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
|
||||
.size = visible_vram_size,
|
||||
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
};
|
||||
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
|
||||
.size = physical_device->rad_info.gart_size,
|
||||
.flags = 0,
|
||||
};
|
||||
*pMemoryProperties = physical_device->memory_properties;
|
||||
}
|
||||
|
||||
void radv_GetPhysicalDeviceMemoryProperties2KHR(
|
||||
@@ -2059,6 +2088,7 @@ VkResult radv_alloc_memory(VkDevice _device,
|
||||
VkResult result;
|
||||
enum radeon_bo_domain domain;
|
||||
uint32_t flags = 0;
|
||||
enum radv_mem_type mem_type_index = device->physical_device->mem_type_indices[pAllocateInfo->memoryTypeIndex];
|
||||
|
||||
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
|
||||
|
||||
@@ -2101,18 +2131,18 @@ VkResult radv_alloc_memory(VkDevice _device,
|
||||
}
|
||||
|
||||
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
|
||||
if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
|
||||
pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
|
||||
if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
|
||||
mem_type_index == RADV_MEM_TYPE_GTT_CACHED)
|
||||
domain = RADEON_DOMAIN_GTT;
|
||||
else
|
||||
domain = RADEON_DOMAIN_VRAM;
|
||||
|
||||
if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM)
|
||||
if (mem_type_index == RADV_MEM_TYPE_VRAM)
|
||||
flags |= RADEON_FLAG_NO_CPU_ACCESS;
|
||||
else
|
||||
flags |= RADEON_FLAG_CPU_ACCESS;
|
||||
|
||||
if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
|
||||
if (mem_type_index == RADV_MEM_TYPE_GTT_WRITE_COMBINE)
|
||||
flags |= RADEON_FLAG_GTT_WC;
|
||||
|
||||
if (mem_flags & RADV_MEM_IMPLICIT_SYNC)
|
||||
@@ -2125,7 +2155,7 @@ VkResult radv_alloc_memory(VkDevice _device,
|
||||
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
mem->type_index = pAllocateInfo->memoryTypeIndex;
|
||||
mem->type_index = mem_type_index;
|
||||
out_success:
|
||||
*pMem = radv_device_memory_to_handle(mem);
|
||||
|
||||
@@ -2218,13 +2248,14 @@ VkResult radv_InvalidateMappedMemoryRanges(
|
||||
}
|
||||
|
||||
void radv_GetBufferMemoryRequirements(
|
||||
VkDevice device,
|
||||
VkDevice _device,
|
||||
VkBuffer _buffer,
|
||||
VkMemoryRequirements* pMemoryRequirements)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
|
||||
|
||||
pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
|
||||
pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
|
||||
|
||||
if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
|
||||
pMemoryRequirements->alignment = 4096;
|
||||
@@ -2258,13 +2289,14 @@ void radv_GetBufferMemoryRequirements2KHR(
|
||||
}
|
||||
|
||||
void radv_GetImageMemoryRequirements(
|
||||
VkDevice device,
|
||||
VkDevice _device,
|
||||
VkImage _image,
|
||||
VkMemoryRequirements* pMemoryRequirements)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_image, image, _image);
|
||||
|
||||
pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1;
|
||||
pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
|
||||
|
||||
pMemoryRequirements->size = image->size;
|
||||
pMemoryRequirements->alignment = image->alignment;
|
||||
|
@@ -1766,6 +1766,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
stage ? stage->pName : "main", i,
|
||||
stage ? stage->pSpecializationInfo : NULL);
|
||||
pipeline->active_stages |= mesa_to_vk_shader_stage(i);
|
||||
/* We don't want to alter meta shaders IR directly so clone it
|
||||
* first.
|
||||
*/
|
||||
if (nir[i]->info.name) {
|
||||
nir[i] = nir_shader_clone(NULL, nir[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (nir[MESA_SHADER_TESS_CTRL]) {
|
||||
@@ -1779,6 +1786,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
|
||||
radv_link_shaders(pipeline, nir);
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
|
||||
continue;
|
||||
|
||||
if (modules[i])
|
||||
nir_print_shader(nir[i], stderr);
|
||||
}
|
||||
|
||||
if (nir[MESA_SHADER_FRAGMENT]) {
|
||||
if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
|
||||
pipeline->shaders[MESA_SHADER_FRAGMENT] =
|
||||
@@ -1863,7 +1878,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
free(codes[i]);
|
||||
if (modules[i] && !modules[i]->nir && !pipeline->device->trace_bo)
|
||||
if (modules[i] && !pipeline->device->trace_bo)
|
||||
ralloc_free(nir[i]);
|
||||
}
|
||||
|
||||
|
@@ -170,81 +170,6 @@ radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
|
||||
return entry;
|
||||
}
|
||||
|
||||
bool
|
||||
radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
|
||||
struct radv_pipeline_cache *cache,
|
||||
const unsigned char *sha1,
|
||||
struct radv_shader_variant **variants)
|
||||
{
|
||||
struct cache_entry *entry;
|
||||
|
||||
if (!cache)
|
||||
cache = device->mem_cache;
|
||||
|
||||
pthread_mutex_lock(&cache->mutex);
|
||||
|
||||
entry = radv_pipeline_cache_search_unlocked(cache, sha1);
|
||||
|
||||
if (!entry) {
|
||||
if (!device->physical_device->disk_cache ||
|
||||
(device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t disk_sha1[20];
|
||||
disk_cache_compute_key(device->physical_device->disk_cache,
|
||||
sha1, 20, disk_sha1);
|
||||
entry = (struct cache_entry *)
|
||||
disk_cache_get(device->physical_device->disk_cache,
|
||||
disk_sha1, NULL);
|
||||
if (!entry) {
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
char *p = entry->code;
|
||||
for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
if (!entry->variants[i] && entry->code_sizes[i]) {
|
||||
struct radv_shader_variant *variant;
|
||||
struct cache_entry_variant_info info;
|
||||
|
||||
variant = calloc(1, sizeof(struct radv_shader_variant));
|
||||
if (!variant) {
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(&info, p, sizeof(struct cache_entry_variant_info));
|
||||
p += sizeof(struct cache_entry_variant_info);
|
||||
|
||||
variant->config = info.config;
|
||||
variant->info = info.variant_info;
|
||||
variant->rsrc1 = info.rsrc1;
|
||||
variant->rsrc2 = info.rsrc2;
|
||||
variant->code_size = entry->code_sizes[i];
|
||||
variant->ref_count = 1;
|
||||
|
||||
void *ptr = radv_alloc_shader_memory(device, variant);
|
||||
memcpy(ptr, p, entry->code_sizes[i]);
|
||||
p += entry->code_sizes[i];
|
||||
|
||||
entry->variants[i] = variant;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i)
|
||||
if (entry->variants[i])
|
||||
p_atomic_inc(&entry->variants[i]->ref_count);
|
||||
|
||||
memcpy(variants, entry->variants, sizeof(entry->variants));
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
|
||||
struct cache_entry *entry)
|
||||
@@ -314,6 +239,97 @@ radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
|
||||
radv_pipeline_cache_set_entry(cache, entry);
|
||||
}
|
||||
|
||||
bool
|
||||
radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
|
||||
struct radv_pipeline_cache *cache,
|
||||
const unsigned char *sha1,
|
||||
struct radv_shader_variant **variants)
|
||||
{
|
||||
struct cache_entry *entry;
|
||||
|
||||
if (!cache)
|
||||
cache = device->mem_cache;
|
||||
|
||||
pthread_mutex_lock(&cache->mutex);
|
||||
|
||||
entry = radv_pipeline_cache_search_unlocked(cache, sha1);
|
||||
|
||||
if (!entry) {
|
||||
if (!device->physical_device->disk_cache ||
|
||||
(device->instance->debug_flags & RADV_DEBUG_NO_CACHE)) {
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
uint8_t disk_sha1[20];
|
||||
disk_cache_compute_key(device->physical_device->disk_cache,
|
||||
sha1, 20, disk_sha1);
|
||||
entry = (struct cache_entry *)
|
||||
disk_cache_get(device->physical_device->disk_cache,
|
||||
disk_sha1, NULL);
|
||||
if (!entry) {
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return false;
|
||||
} else {
|
||||
size_t size = entry_size(entry);
|
||||
struct cache_entry *new_entry = vk_alloc(&cache->alloc, size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
|
||||
if (!new_entry) {
|
||||
free(entry);
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(new_entry, entry, entry_size(entry));
|
||||
free(entry);
|
||||
entry = new_entry;
|
||||
|
||||
radv_pipeline_cache_add_entry(cache, new_entry);
|
||||
}
|
||||
}
|
||||
|
||||
char *p = entry->code;
|
||||
for(int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
if (!entry->variants[i] && entry->code_sizes[i]) {
|
||||
struct radv_shader_variant *variant;
|
||||
struct cache_entry_variant_info info;
|
||||
|
||||
variant = calloc(1, sizeof(struct radv_shader_variant));
|
||||
if (!variant) {
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(&info, p, sizeof(struct cache_entry_variant_info));
|
||||
p += sizeof(struct cache_entry_variant_info);
|
||||
|
||||
variant->config = info.config;
|
||||
variant->info = info.variant_info;
|
||||
variant->rsrc1 = info.rsrc1;
|
||||
variant->rsrc2 = info.rsrc2;
|
||||
variant->code_size = entry->code_sizes[i];
|
||||
variant->ref_count = 1;
|
||||
|
||||
void *ptr = radv_alloc_shader_memory(device, variant);
|
||||
memcpy(ptr, p, entry->code_sizes[i]);
|
||||
p += entry->code_sizes[i];
|
||||
|
||||
entry->variants[i] = variant;
|
||||
} else if (entry->code_sizes[i]) {
|
||||
p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i)
|
||||
if (entry->variants[i])
|
||||
p_atomic_inc(&entry->variants[i]->ref_count);
|
||||
|
||||
memcpy(variants, entry->variants, sizeof(entry->variants));
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
radv_pipeline_cache_insert_shaders(struct radv_device *device,
|
||||
struct radv_pipeline_cache *cache,
|
||||
|
@@ -282,6 +282,9 @@ struct radv_physical_device {
|
||||
* the pipeline cache defined by apps.
|
||||
*/
|
||||
struct disk_cache * disk_cache;
|
||||
|
||||
VkPhysicalDeviceMemoryProperties memory_properties;
|
||||
enum radv_mem_type mem_type_indices[RADV_MEM_TYPE_COUNT];
|
||||
};
|
||||
|
||||
struct radv_instance {
|
||||
|
@@ -291,9 +291,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
||||
nir_remove_dead_variables(nir, nir_var_local);
|
||||
radv_optimize_nir(nir);
|
||||
|
||||
if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)
|
||||
nir_print_shader(nir, stderr);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
|
@@ -194,12 +194,26 @@ radv_wsi_image_create(VkDevice device_h,
|
||||
.image = image_h
|
||||
};
|
||||
|
||||
/* Find the first VRAM memory type, or GART for PRIME images. */
|
||||
int memory_type_index = -1;
|
||||
for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
|
||||
bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
if ((linear && !is_local) || (!linear && is_local)) {
|
||||
memory_type_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* fallback */
|
||||
if (memory_type_index == -1)
|
||||
memory_type_index = 0;
|
||||
|
||||
result = radv_alloc_memory(device_h,
|
||||
&(VkMemoryAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &ded_alloc,
|
||||
.allocationSize = image->size,
|
||||
.memoryTypeIndex = linear ? 1 : 0,
|
||||
.memoryTypeIndex = memory_type_index,
|
||||
},
|
||||
NULL /* XXX: pAllocator */,
|
||||
RADV_MEM_IMPLICIT_SYNC,
|
||||
|
@@ -85,6 +85,7 @@ LIBGLSL_FILES = \
|
||||
glsl/lower_buffer_access.cpp \
|
||||
glsl/lower_buffer_access.h \
|
||||
glsl/lower_const_arrays_to_uniforms.cpp \
|
||||
glsl/lower_cs_derived.cpp \
|
||||
glsl/lower_discard.cpp \
|
||||
glsl/lower_discard_flow.cpp \
|
||||
glsl/lower_distance.cpp \
|
||||
|
@@ -1295,15 +1295,10 @@ builtin_variable_generator::generate_cs_special_vars()
|
||||
uvec3_t, "gl_LocalGroupSizeARB");
|
||||
}
|
||||
|
||||
if (state->ctx->Const.LowerCsDerivedVariables) {
|
||||
add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
|
||||
add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
|
||||
} else {
|
||||
add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
|
||||
uvec3_t, "gl_GlobalInvocationID");
|
||||
add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
|
||||
uint_t, "gl_LocalInvocationIndex");
|
||||
}
|
||||
add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
|
||||
uvec3_t, "gl_GlobalInvocationID");
|
||||
add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
|
||||
uint_t, "gl_LocalInvocationIndex");
|
||||
}
|
||||
|
||||
|
||||
@@ -1474,84 +1469,3 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initialize compute shader variables with values that are derived from other
|
||||
* compute shader variable.
|
||||
*/
|
||||
static void
|
||||
initialize_cs_derived_variables(gl_shader *shader,
|
||||
ir_function_signature *const main_sig)
|
||||
{
|
||||
assert(shader->Stage == MESA_SHADER_COMPUTE);
|
||||
|
||||
ir_variable *gl_GlobalInvocationID =
|
||||
shader->symbols->get_variable("gl_GlobalInvocationID");
|
||||
assert(gl_GlobalInvocationID);
|
||||
ir_variable *gl_WorkGroupID =
|
||||
shader->symbols->get_variable("gl_WorkGroupID");
|
||||
assert(gl_WorkGroupID);
|
||||
ir_variable *gl_WorkGroupSize =
|
||||
shader->symbols->get_variable("gl_WorkGroupSize");
|
||||
if (gl_WorkGroupSize == NULL) {
|
||||
void *const mem_ctx = ralloc_parent(shader->ir);
|
||||
gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
|
||||
"gl_WorkGroupSize",
|
||||
ir_var_auto);
|
||||
gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
|
||||
gl_WorkGroupSize->data.read_only = true;
|
||||
shader->ir->push_head(gl_WorkGroupSize);
|
||||
}
|
||||
ir_variable *gl_LocalInvocationID =
|
||||
shader->symbols->get_variable("gl_LocalInvocationID");
|
||||
assert(gl_LocalInvocationID);
|
||||
|
||||
/* gl_GlobalInvocationID =
|
||||
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
|
||||
*/
|
||||
ir_instruction *inst =
|
||||
assign(gl_GlobalInvocationID,
|
||||
add(mul(gl_WorkGroupID, gl_WorkGroupSize),
|
||||
gl_LocalInvocationID));
|
||||
main_sig->body.push_head(inst);
|
||||
|
||||
/* gl_LocalInvocationIndex =
|
||||
* gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
|
||||
* gl_LocalInvocationID.y * gl_WorkGroupSize.x +
|
||||
* gl_LocalInvocationID.x;
|
||||
*/
|
||||
ir_expression *index_z =
|
||||
mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
|
||||
swizzle_y(gl_WorkGroupSize));
|
||||
ir_expression *index_y =
|
||||
mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
|
||||
ir_expression *index_y_plus_z = add(index_y, index_z);
|
||||
operand index_x(swizzle_x(gl_LocalInvocationID));
|
||||
ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
|
||||
ir_variable *gl_LocalInvocationIndex =
|
||||
shader->symbols->get_variable("gl_LocalInvocationIndex");
|
||||
assert(gl_LocalInvocationIndex);
|
||||
inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
|
||||
main_sig->body.push_head(inst);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initialize builtin variables with values based on other builtin variables.
|
||||
* These are initialized in the main function.
|
||||
*/
|
||||
void
|
||||
_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
|
||||
gl_shader *shader)
|
||||
{
|
||||
/* We only need to set CS variables currently. */
|
||||
if (shader->Stage == MESA_SHADER_COMPUTE &&
|
||||
ctx->Const.LowerCsDerivedVariables) {
|
||||
ir_function_signature *const main_sig =
|
||||
_mesa_get_main_function_signature(shader->symbols);
|
||||
|
||||
if (main_sig != NULL)
|
||||
initialize_cs_derived_variables(shader, main_sig);
|
||||
}
|
||||
}
|
||||
|
@@ -2009,8 +2009,6 @@ opt_shader_and_create_symbol_table(struct gl_context *ctx,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_glsl_initialize_derived_variables(ctx, shader);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -2412,10 +2412,6 @@ extern void
|
||||
_mesa_glsl_initialize_variables(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state);
|
||||
|
||||
extern void
|
||||
_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
|
||||
gl_shader *shader);
|
||||
|
||||
extern void
|
||||
reparent_ir(exec_list *list, void *mem_ctx);
|
||||
|
||||
|
@@ -166,6 +166,7 @@ void optimize_dead_builtin_variables(exec_list *instructions,
|
||||
bool lower_tess_level(gl_linked_shader *shader);
|
||||
|
||||
bool lower_vertex_id(gl_linked_shader *shader);
|
||||
bool lower_cs_derived(gl_linked_shader *shader);
|
||||
bool lower_blend_equation_advanced(gl_linked_shader *shader);
|
||||
|
||||
bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
|
||||
|
@@ -2374,6 +2374,9 @@ link_intrastage_shaders(void *mem_ctx,
|
||||
if (ctx->Const.VertexID_is_zero_based)
|
||||
lower_vertex_id(linked);
|
||||
|
||||
if (ctx->Const.LowerCsDerivedVariables)
|
||||
lower_cs_derived(linked);
|
||||
|
||||
#ifdef DEBUG
|
||||
/* Compute the source checksum. */
|
||||
linked->SourceChecksum = 0;
|
||||
|
234
src/compiler/glsl/lower_cs_derived.cpp
Normal file
234
src/compiler/glsl/lower_cs_derived.cpp
Normal file
@@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright © 2017 Ilia Mirkin
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file lower_cs_derived.cpp
|
||||
*
|
||||
* For hardware that does not support the gl_GlobalInvocationID and
|
||||
* gl_LocalInvocationIndex system values, replace them with fresh
|
||||
* globals. Note that we can't rely on gl_WorkGroupSize or
|
||||
* gl_LocalGroupSizeARB being available, since they may only have been defined
|
||||
* in a non-main shader.
|
||||
*
|
||||
* [ This can happen if only a secondary shader has the layout(local_size_*)
|
||||
* declaration. ]
|
||||
*
|
||||
* This is meant to be run post-linking.
|
||||
*/
|
||||
|
||||
#include "glsl_symbol_table.h"
|
||||
#include "ir_hierarchical_visitor.h"
|
||||
#include "ir.h"
|
||||
#include "ir_builder.h"
|
||||
#include "linker.h"
|
||||
#include "program/prog_statevars.h"
|
||||
#include "builtin_functions.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
namespace {
|
||||
|
||||
class lower_cs_derived_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
explicit lower_cs_derived_visitor(gl_linked_shader *shader)
|
||||
: progress(false),
|
||||
shader(shader),
|
||||
local_size_variable(shader->Program->info.cs.local_size_variable),
|
||||
gl_WorkGroupSize(NULL),
|
||||
gl_WorkGroupID(NULL),
|
||||
gl_LocalInvocationID(NULL),
|
||||
gl_GlobalInvocationID(NULL),
|
||||
gl_LocalInvocationIndex(NULL)
|
||||
{
|
||||
main_sig = _mesa_get_main_function_signature(shader->symbols);
|
||||
assert(main_sig);
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *);
|
||||
|
||||
ir_variable *add_system_value(
|
||||
int slot, const glsl_type *type, const char *name);
|
||||
void find_sysvals();
|
||||
void make_gl_GlobalInvocationID();
|
||||
void make_gl_LocalInvocationIndex();
|
||||
|
||||
bool progress;
|
||||
|
||||
private:
|
||||
gl_linked_shader *shader;
|
||||
bool local_size_variable;
|
||||
ir_function_signature *main_sig;
|
||||
|
||||
ir_rvalue *gl_WorkGroupSize;
|
||||
ir_variable *gl_WorkGroupID;
|
||||
ir_variable *gl_LocalInvocationID;
|
||||
|
||||
ir_variable *gl_GlobalInvocationID;
|
||||
ir_variable *gl_LocalInvocationIndex;
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
ir_variable *
|
||||
lower_cs_derived_visitor::add_system_value(
|
||||
int slot, const glsl_type *type, const char *name)
|
||||
{
|
||||
ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
|
||||
var->data.how_declared = ir_var_declared_implicitly;
|
||||
var->data.read_only = true;
|
||||
var->data.location = slot;
|
||||
var->data.explicit_location = true;
|
||||
var->data.explicit_index = 0;
|
||||
shader->ir->push_head(var);
|
||||
|
||||
return var;
|
||||
}
|
||||
|
||||
void
|
||||
lower_cs_derived_visitor::find_sysvals()
|
||||
{
|
||||
if (gl_WorkGroupSize != NULL)
|
||||
return;
|
||||
|
||||
ir_variable *WorkGroupSize;
|
||||
if (local_size_variable)
|
||||
WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
|
||||
else
|
||||
WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
|
||||
if (WorkGroupSize)
|
||||
gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
|
||||
gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
|
||||
gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
|
||||
|
||||
/*
|
||||
* These may be missing due to either dead code elimination, or, in the
|
||||
* case of the group size, due to the layout being declared in a non-main
|
||||
* shader. Re-create them.
|
||||
*/
|
||||
|
||||
if (!gl_WorkGroupID)
|
||||
gl_WorkGroupID = add_system_value(
|
||||
SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
|
||||
if (!gl_LocalInvocationID)
|
||||
gl_LocalInvocationID = add_system_value(
|
||||
SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
|
||||
"gl_LocalInvocationID");
|
||||
if (!WorkGroupSize) {
|
||||
if (local_size_variable) {
|
||||
gl_WorkGroupSize = new(shader) ir_dereference_variable(
|
||||
add_system_value(
|
||||
SYSTEM_VALUE_LOCAL_GROUP_SIZE, glsl_type::uvec3_type,
|
||||
"gl_LocalGroupSizeARB"));
|
||||
} else {
|
||||
ir_constant_data data;
|
||||
memset(&data, 0, sizeof(data));
|
||||
for (int i = 0; i < 3; i++)
|
||||
data.u[i] = shader->Program->info.cs.local_size[i];
|
||||
gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lower_cs_derived_visitor::make_gl_GlobalInvocationID()
|
||||
{
|
||||
if (gl_GlobalInvocationID != NULL)
|
||||
return;
|
||||
|
||||
find_sysvals();
|
||||
|
||||
/* gl_GlobalInvocationID =
|
||||
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
|
||||
*/
|
||||
gl_GlobalInvocationID = new(shader) ir_variable(
|
||||
glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
|
||||
shader->ir->push_head(gl_GlobalInvocationID);
|
||||
|
||||
ir_instruction *inst =
|
||||
assign(gl_GlobalInvocationID,
|
||||
add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
|
||||
gl_LocalInvocationID));
|
||||
main_sig->body.push_head(inst);
|
||||
}
|
||||
|
||||
void
|
||||
lower_cs_derived_visitor::make_gl_LocalInvocationIndex()
|
||||
{
|
||||
if (gl_LocalInvocationIndex != NULL)
|
||||
return;
|
||||
|
||||
find_sysvals();
|
||||
|
||||
/* gl_LocalInvocationIndex =
|
||||
* gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
|
||||
* gl_LocalInvocationID.y * gl_WorkGroupSize.x +
|
||||
* gl_LocalInvocationID.x;
|
||||
*/
|
||||
gl_LocalInvocationIndex = new(shader)
|
||||
ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
|
||||
shader->ir->push_head(gl_LocalInvocationIndex);
|
||||
|
||||
ir_expression *index_z =
|
||||
mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
|
||||
swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
|
||||
ir_expression *index_y =
|
||||
mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
|
||||
ir_expression *index_y_plus_z = add(index_y, index_z);
|
||||
operand index_x(swizzle_x(gl_LocalInvocationID));
|
||||
ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
|
||||
ir_instruction *inst =
|
||||
assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
|
||||
main_sig->body.push_head(inst);
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_cs_derived_visitor::visit(ir_dereference_variable *ir)
|
||||
{
|
||||
if (ir->var->data.mode == ir_var_system_value &&
|
||||
ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
|
||||
make_gl_GlobalInvocationID();
|
||||
ir->var = gl_GlobalInvocationID;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (ir->var->data.mode == ir_var_system_value &&
|
||||
ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
|
||||
make_gl_LocalInvocationIndex();
|
||||
ir->var = gl_LocalInvocationIndex;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
bool
|
||||
lower_cs_derived(gl_linked_shader *shader)
|
||||
{
|
||||
if (shader->Stage != MESA_SHADER_COMPUTE)
|
||||
return false;
|
||||
|
||||
lower_cs_derived_visitor v(shader);
|
||||
v.run(shader->ir);
|
||||
|
||||
return v.progress;
|
||||
}
|
@@ -124,6 +124,7 @@ files_libglsl = files(
|
||||
'lower_buffer_access.cpp',
|
||||
'lower_buffer_access.h',
|
||||
'lower_const_arrays_to_uniforms.cpp',
|
||||
'lower_cs_derived.cpp',
|
||||
'lower_discard.cpp',
|
||||
'lower_discard_flow.cpp',
|
||||
'lower_distance.cpp',
|
||||
|
@@ -62,23 +62,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
|
||||
* information, so removing these variables from the user shader will
|
||||
* cause problems later.
|
||||
*
|
||||
* For compute shaders, gl_GlobalInvocationID has some dependencies, so
|
||||
* we avoid removing these dependencies.
|
||||
*
|
||||
* We also avoid removing gl_GlobalInvocationID at this stage because it
|
||||
* might be used by a linked shader. In this case it still needs to be
|
||||
* initialized by the main function.
|
||||
*
|
||||
* gl_GlobalInvocationID =
|
||||
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
|
||||
*
|
||||
* Similarly, we initialize gl_LocalInvocationIndex in the main function:
|
||||
*
|
||||
* gl_LocalInvocationIndex =
|
||||
* gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
|
||||
* gl_LocalInvocationID.y * gl_WorkGroupSize.x +
|
||||
* gl_LocalInvocationID.x;
|
||||
*
|
||||
* Matrix uniforms with "Transpose" are not eliminated because there's
|
||||
* an optimization pass that can turn references to the regular matrix
|
||||
* into references to the transpose matrix. Eliminating the transpose
|
||||
@@ -90,11 +73,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
|
||||
*/
|
||||
if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
|
||||
|| strcmp(var->name, "gl_Vertex") == 0
|
||||
|| strcmp(var->name, "gl_WorkGroupID") == 0
|
||||
|| strcmp(var->name, "gl_WorkGroupSize") == 0
|
||||
|| strcmp(var->name, "gl_LocalInvocationID") == 0
|
||||
|| strcmp(var->name, "gl_GlobalInvocationID") == 0
|
||||
|| strcmp(var->name, "gl_LocalInvocationIndex") == 0
|
||||
|| strstr(var->name, "Transpose") != NULL)
|
||||
continue;
|
||||
|
||||
|
@@ -434,7 +434,7 @@ INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base, component } */
|
||||
LOAD(output, 1, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
LOAD(output, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { vertex, offset }. const_index[] = { base, component } */
|
||||
LOAD(per_vertex_output, 2, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
|
@@ -28,6 +28,26 @@
|
||||
* \file nir_opt_intrinsics.c
|
||||
*/
|
||||
|
||||
static nir_ssa_def *
|
||||
high_subgroup_mask(nir_builder *b,
|
||||
nir_ssa_def *count,
|
||||
uint64_t base_mask)
|
||||
{
|
||||
/* group_mask could probably be calculated more efficiently but we want to
|
||||
* be sure not to shift by 64 if the subgroup size is 64 because the GLSL
|
||||
* shift operator is undefined in that case. In any case if we were worried
|
||||
* about efficency this should probably be done further down because the
|
||||
* subgroup size is likely to be known at compile time.
|
||||
*/
|
||||
nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
|
||||
nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
|
||||
nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
|
||||
nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
|
||||
nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
|
||||
|
||||
return nir_iand(b, higher_bits, group_mask);
|
||||
}
|
||||
|
||||
static bool
|
||||
opt_intrinsics_impl(nir_function_impl *impl)
|
||||
{
|
||||
@@ -95,10 +115,10 @@ opt_intrinsics_impl(nir_function_impl *impl)
|
||||
replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_ge_mask:
|
||||
replacement = nir_ishl(&b, nir_imm_int64(&b, ~0ull), count);
|
||||
replacement = high_subgroup_mask(&b, count, ~0ull);
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_gt_mask:
|
||||
replacement = nir_ishl(&b, nir_imm_int64(&b, ~1ull), count);
|
||||
replacement = high_subgroup_mask(&b, count, ~1ull);
|
||||
break;
|
||||
case nir_intrinsic_load_subgroup_le_mask:
|
||||
replacement = nir_inot(&b, nir_ishl(&b, nir_imm_int64(&b, ~1ull), count));
|
||||
|
@@ -2802,7 +2802,8 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
case SpvOpMemoryModel:
|
||||
assert(w[1] == SpvAddressingModelLogical);
|
||||
assert(w[2] == SpvMemoryModelGLSL450);
|
||||
assert(w[2] == SpvMemoryModelSimple ||
|
||||
w[2] == SpvMemoryModelGLSL450);
|
||||
break;
|
||||
|
||||
case SpvOpEntryPoint: {
|
||||
|
@@ -21,7 +21,9 @@
|
||||
c_args_for_egl = []
|
||||
link_for_egl = []
|
||||
deps_for_egl = []
|
||||
incs_for_egl = []
|
||||
incs_for_egl = [
|
||||
inc_include, inc_src, inc_loader, inc_gbm, include_directories('main'),
|
||||
]
|
||||
files_egl = files(
|
||||
'main/eglapi.c',
|
||||
'main/eglapi.h',
|
||||
@@ -159,10 +161,7 @@ libegl = shared_library(
|
||||
'-D_EGL_BUILT_IN_DRIVER_DRI2',
|
||||
'-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
|
||||
],
|
||||
include_directories : [
|
||||
incs_for_egl, inc_include, inc_src, inc_loader, inc_gbm,
|
||||
include_directories('main'),
|
||||
],
|
||||
include_directories : incs_for_egl,
|
||||
link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util],
|
||||
link_args : [ld_args_bsymbolic, ld_args_gc_sections],
|
||||
dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread],
|
||||
|
@@ -3,7 +3,7 @@ pkgconfig_DATA = wayland-egl.pc
|
||||
|
||||
AM_CFLAGS = $(DEFINES) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(WAYLAND_SERVER_CFLAGS)
|
||||
$(WAYLAND_CLIENT_CFLAGS)
|
||||
|
||||
lib_LTLIBRARIES = libwayland-egl.la
|
||||
noinst_HEADERS = wayland-egl-backend.h
|
||||
|
@@ -24,6 +24,7 @@ libwayland_egl = shared_library(
|
||||
'wayland-egl.c',
|
||||
c_args : [c_vis_args],
|
||||
link_args : ld_args_gc_sections,
|
||||
dependencies : dep_wayland_client,
|
||||
version : '1.0.0',
|
||||
install : true,
|
||||
)
|
||||
|
@@ -555,6 +555,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
||||
}
|
||||
|
||||
/* Set up color TS to source surface before blit, if needed */
|
||||
bool source_ts_valid = false;
|
||||
if (src->levels[blit_info->src.level].ts_size &&
|
||||
src->levels[blit_info->src.level].ts_valid) {
|
||||
struct etna_reloc reloc;
|
||||
@@ -579,6 +580,8 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
||||
|
||||
etna_set_state(ctx->stream, VIVS_TS_COLOR_CLEAR_VALUE,
|
||||
src->levels[blit_info->src.level].clear_value);
|
||||
|
||||
source_ts_valid = true;
|
||||
} else {
|
||||
etna_set_state(ctx->stream, VIVS_TS_MEM_CONFIG, ts_mem_config);
|
||||
}
|
||||
@@ -593,6 +596,7 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
||||
.source_stride = src_lev->stride,
|
||||
.source_padded_width = src_lev->padded_width,
|
||||
.source_padded_height = src_lev->padded_height,
|
||||
.source_ts_valid = source_ts_valid,
|
||||
.dest_format = translate_rs_format(dst_format),
|
||||
.dest_tiling = dst->layout,
|
||||
.dest = dst->bo,
|
||||
|
@@ -171,6 +171,10 @@ etna_submit_rs_state(struct etna_context *ctx,
|
||||
struct etna_cmd_stream *stream = ctx->stream;
|
||||
struct etna_coalesce coalesce;
|
||||
|
||||
if (cs->RS_KICKER_INPLACE && !cs->source_ts_valid)
|
||||
/* Inplace resolve is no-op if TS is not configured */
|
||||
return;
|
||||
|
||||
ctx->stats.rs_operations++;
|
||||
|
||||
if (cs->RS_KICKER_INPLACE) {
|
||||
|
@@ -133,6 +133,7 @@ etna_compile_rs_state(struct etna_context *ctx, struct compiled_rs_state *cs,
|
||||
/* Total number of tiles (same as for autodisable) */
|
||||
cs->RS_KICKER_INPLACE = rs->source_padded_width * rs->source_padded_height / 16;
|
||||
}
|
||||
cs->source_ts_valid = rs->source_ts_valid;
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -33,6 +33,7 @@
|
||||
struct rs_state {
|
||||
uint8_t downsample_x : 1; /* Downsample in x direction */
|
||||
uint8_t downsample_y : 1; /* Downsample in y direction */
|
||||
uint8_t source_ts_valid : 1;
|
||||
|
||||
uint8_t source_format; /* RS_FORMAT_XXX */
|
||||
uint8_t source_tiling; /* ETNA_LAYOUT_XXX */
|
||||
@@ -61,6 +62,7 @@ struct rs_state {
|
||||
|
||||
/* treat this as opaque structure */
|
||||
struct compiled_rs_state {
|
||||
uint8_t source_ts_valid : 1;
|
||||
uint32_t RS_CONFIG;
|
||||
uint32_t RS_SOURCE_STRIDE;
|
||||
uint32_t RS_DEST_STRIDE;
|
||||
|
@@ -216,6 +216,23 @@ void i915_update_derived(struct i915_context *i915)
|
||||
if (I915_DBG_ON(DBG_ATOMS))
|
||||
i915_dump_dirty(i915, __FUNCTION__);
|
||||
|
||||
if (!i915->fs) {
|
||||
i915->dirty &= ~(I915_NEW_FS_CONSTANTS | I915_NEW_FS);
|
||||
i915->hardware_dirty &= ~(I915_HW_PROGRAM | I915_HW_CONSTANTS);
|
||||
}
|
||||
|
||||
if (!i915->vs)
|
||||
i915->dirty &= ~I915_NEW_VS;
|
||||
|
||||
if (!i915->blend)
|
||||
i915->dirty &= ~I915_NEW_BLEND;
|
||||
|
||||
if (!i915->rasterizer)
|
||||
i915->dirty &= ~I915_NEW_RASTERIZER;
|
||||
|
||||
if (!i915->depth_stencil)
|
||||
i915->dirty &= ~I915_NEW_DEPTH_STENCIL;
|
||||
|
||||
for (i = 0; atoms[i]; i++)
|
||||
if (atoms[i]->dirty & i915->dirty)
|
||||
atoms[i]->update(i915);
|
||||
|
@@ -213,7 +213,8 @@ static void upload_STIPPLE(struct i915_context *i915)
|
||||
|
||||
/* I915_NEW_RASTERIZER
|
||||
*/
|
||||
st[1] |= i915->rasterizer->st;
|
||||
if (i915->rasterizer)
|
||||
st[1] |= i915->rasterizer->st;
|
||||
|
||||
/* I915_NEW_STIPPLE
|
||||
*/
|
||||
|
@@ -168,11 +168,13 @@ static void upload_S6(struct i915_context *i915)
|
||||
|
||||
/* I915_NEW_BLEND
|
||||
*/
|
||||
LIS6 |= i915->blend->LIS6;
|
||||
if (i915->blend)
|
||||
LIS6 |= i915->blend->LIS6;
|
||||
|
||||
/* I915_NEW_DEPTH
|
||||
*/
|
||||
LIS6 |= i915->depth_stencil->depth_LIS6;
|
||||
if (i915->depth_stencil)
|
||||
LIS6 |= i915->depth_stencil->depth_LIS6;
|
||||
|
||||
set_immediate(i915, I915_IMMEDIATE_S6, LIS6);
|
||||
}
|
||||
|
@@ -216,7 +216,7 @@ static void update_dst_buf_vars(struct i915_context *i915)
|
||||
zformat = translate_depth_format(depth_surface->format);
|
||||
|
||||
if (is->is_i945 && tex->tiling != I915_TILE_NONE
|
||||
&& !i915->fs->info.writes_z)
|
||||
&& (i915->fs && !i915->fs->info.writes_z))
|
||||
early_z = CLASSIC_EARLY_DEPTH;
|
||||
} else
|
||||
zformat = 0;
|
||||
|
@@ -711,22 +711,24 @@ void alu_group_tracker::update_flags(alu_node* n) {
|
||||
}
|
||||
|
||||
int post_scheduler::run() {
|
||||
run_on(sh.root);
|
||||
return 0;
|
||||
return run_on(sh.root) ? 0 : 1;
|
||||
}
|
||||
|
||||
void post_scheduler::run_on(container_node* n) {
|
||||
|
||||
bool post_scheduler::run_on(container_node* n) {
|
||||
int r = true;
|
||||
for (node_riterator I = n->rbegin(), E = n->rend(); I != E; ++I) {
|
||||
if (I->is_container()) {
|
||||
if (I->subtype == NST_BB) {
|
||||
bb_node* bb = static_cast<bb_node*>(*I);
|
||||
schedule_bb(bb);
|
||||
r = schedule_bb(bb);
|
||||
} else {
|
||||
run_on(static_cast<container_node*>(*I));
|
||||
r = run_on(static_cast<container_node*>(*I));
|
||||
}
|
||||
if (!r)
|
||||
break;
|
||||
}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
void post_scheduler::init_uc_val(container_node *c, value *v) {
|
||||
@@ -758,7 +760,7 @@ unsigned post_scheduler::init_ucm(container_node *c, node *n) {
|
||||
return F == ucm.end() ? 0 : F->second;
|
||||
}
|
||||
|
||||
void post_scheduler::schedule_bb(bb_node* bb) {
|
||||
bool post_scheduler::schedule_bb(bb_node* bb) {
|
||||
PSC_DUMP(
|
||||
sblog << "scheduling BB " << bb->id << "\n";
|
||||
if (!pending.empty())
|
||||
@@ -791,8 +793,10 @@ void post_scheduler::schedule_bb(bb_node* bb) {
|
||||
|
||||
if (n->is_alu_clause()) {
|
||||
n->remove();
|
||||
process_alu(static_cast<container_node*>(n));
|
||||
continue;
|
||||
bool r = process_alu(static_cast<container_node*>(n));
|
||||
if (r)
|
||||
continue;
|
||||
return false;
|
||||
}
|
||||
|
||||
n->remove();
|
||||
@@ -800,6 +804,7 @@ void post_scheduler::schedule_bb(bb_node* bb) {
|
||||
}
|
||||
|
||||
this->cur_bb = NULL;
|
||||
return true;
|
||||
}
|
||||
|
||||
void post_scheduler::init_regmap() {
|
||||
@@ -933,10 +938,10 @@ void post_scheduler::process_fetch(container_node *c) {
|
||||
cur_bb->push_front(c);
|
||||
}
|
||||
|
||||
void post_scheduler::process_alu(container_node *c) {
|
||||
bool post_scheduler::process_alu(container_node *c) {
|
||||
|
||||
if (c->empty())
|
||||
return;
|
||||
return true;
|
||||
|
||||
ucm.clear();
|
||||
alu.reset();
|
||||
@@ -973,7 +978,7 @@ void post_scheduler::process_alu(container_node *c) {
|
||||
}
|
||||
}
|
||||
|
||||
schedule_alu(c);
|
||||
return schedule_alu(c);
|
||||
}
|
||||
|
||||
void post_scheduler::update_local_interferences() {
|
||||
@@ -1135,15 +1140,20 @@ void post_scheduler::emit_clause() {
|
||||
emit_index_registers();
|
||||
}
|
||||
|
||||
void post_scheduler::schedule_alu(container_node *c) {
|
||||
bool post_scheduler::schedule_alu(container_node *c) {
|
||||
|
||||
assert(!ready.empty() || !ready_copies.empty());
|
||||
|
||||
while (1) {
|
||||
|
||||
bool improving = true;
|
||||
int last_pending = pending.count();
|
||||
while (improving) {
|
||||
prev_regmap = regmap;
|
||||
|
||||
if (!prepare_alu_group()) {
|
||||
|
||||
int new_pending = pending.count();
|
||||
improving = (new_pending < last_pending) || (last_pending == 0);
|
||||
last_pending = new_pending;
|
||||
|
||||
if (alu.current_idx[0] || alu.current_idx[1]) {
|
||||
regmap = prev_regmap;
|
||||
emit_clause();
|
||||
@@ -1186,6 +1196,7 @@ void post_scheduler::schedule_alu(container_node *c) {
|
||||
dump::dump_op_list(&pending);
|
||||
assert(!"unscheduled pending instructions");
|
||||
}
|
||||
return improving;
|
||||
}
|
||||
|
||||
void post_scheduler::add_interferences(value *v, sb_bitset &rb, val_set &vs) {
|
||||
|
@@ -267,14 +267,14 @@ public:
|
||||
live(), ucm(), alu(sh), regmap(), cleared_interf() {}
|
||||
|
||||
virtual int run();
|
||||
void run_on(container_node *n);
|
||||
void schedule_bb(bb_node *bb);
|
||||
bool run_on(container_node *n);
|
||||
bool schedule_bb(bb_node *bb);
|
||||
|
||||
void load_index_register(value *v, unsigned idx);
|
||||
void process_fetch(container_node *c);
|
||||
|
||||
void process_alu(container_node *c);
|
||||
void schedule_alu(container_node *c);
|
||||
bool process_alu(container_node *c);
|
||||
bool schedule_alu(container_node *c);
|
||||
bool prepare_alu_group();
|
||||
|
||||
void release_op(node *n);
|
||||
|
@@ -182,8 +182,11 @@ void si_vid_join_surfaces(struct r600_common_context *rctx,
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
|
||||
surfaces[i]->u.legacy.level[j].offset += off;
|
||||
} else
|
||||
} else {
|
||||
surfaces[i]->u.gfx9.surf_offset += off;
|
||||
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.gfx9.offset); ++j)
|
||||
surfaces[i]->u.gfx9.offset[j] += off;
|
||||
}
|
||||
|
||||
off += surfaces[i]->surf_size;
|
||||
}
|
||||
|
@@ -6,5 +6,5 @@ DRI_CONF_SECTION_PERFORMANCE
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_DEBUG
|
||||
DRI_CONF_RADEONSI_CLEAR_DB_META_BEFORE_CLEAR("false")
|
||||
DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
|
||||
DRI_CONF_SECTION_END
|
||||
|
@@ -901,16 +901,16 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
||||
* corruption in ARK: Survival Evolved, but that may just be
|
||||
* a coincidence and the root cause is elsewhere.
|
||||
*
|
||||
* The corruption can be fixed by putting the DB metadata flush
|
||||
* before or after the depth clear. (suprisingly)
|
||||
* The corruption can be fixed by putting the DB flush before
|
||||
* or after the depth clear. (surprisingly)
|
||||
*
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
|
||||
*
|
||||
* This hack decreases back-to-back ClearDepth performance.
|
||||
*/
|
||||
if (sctx->screen->clear_db_meta_before_clear)
|
||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META |
|
||||
SI_CONTEXT_PS_PARTIAL_FLUSH;
|
||||
if (sctx->screen->clear_db_cache_before_clear) {
|
||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
|
||||
}
|
||||
}
|
||||
|
||||
si_blitter_begin(ctx, SI_CLEAR);
|
||||
|
@@ -1072,8 +1072,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
||||
driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
|
||||
sscreen->commutative_blend_add =
|
||||
driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
|
||||
sscreen->clear_db_meta_before_clear =
|
||||
driQueryOptionb(config->options, "radeonsi_clear_db_meta_before_clear");
|
||||
sscreen->clear_db_cache_before_clear =
|
||||
driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear");
|
||||
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
|
||||
sscreen->b.family <= CHIP_POLARIS12) ||
|
||||
sscreen->b.family == CHIP_VEGA10 ||
|
||||
|
@@ -98,7 +98,7 @@ struct si_screen {
|
||||
bool has_out_of_order_rast;
|
||||
bool assume_no_z_fights;
|
||||
bool commutative_blend_add;
|
||||
bool clear_db_meta_before_clear;
|
||||
bool clear_db_cache_before_clear;
|
||||
bool has_msaa_sample_loc_bug;
|
||||
bool dpbb_allowed;
|
||||
bool dfsm_allowed;
|
||||
|
@@ -2015,14 +2015,21 @@ static LLVMValueRef fetch_constant(
|
||||
* code reducing SIMD wave occupancy from 8 to 2 in many cases.
|
||||
*
|
||||
* Using s_buffer_load_dword (x1) seems to be the best option right now.
|
||||
*
|
||||
* LLVM 5.0 on SI doesn't insert a required s_nop between SALU setting
|
||||
* a descriptor and s_buffer_load_dword using it, so we can't expand
|
||||
* the pointer into a full descriptor like below. We have to use
|
||||
* s_load_dword instead. The only case when LLVM 5.0 would select
|
||||
* s_buffer_load_dword (that we have to prevent) is when we use use
|
||||
* a literal offset where we don't need bounds checking.
|
||||
*/
|
||||
#if 0 /* keep this codepath disabled */
|
||||
if (!reg->Register.Indirect) {
|
||||
if (ctx->screen->b.chip_class == SI &&
|
||||
HAVE_LLVM < 0x0600 &&
|
||||
!reg->Register.Indirect) {
|
||||
addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
|
||||
return bitcast(bld_base, type, result);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Do the bounds checking with a descriptor, because
|
||||
* doing computation and manual bounds checking of 64-bit
|
||||
|
@@ -302,8 +302,7 @@ void si_nir_scan_shader(const struct nir_shader *nir,
|
||||
info->num_written_clipdistance = nir->info.clip_distance_array_size;
|
||||
info->num_written_culldistance = nir->info.cull_distance_array_size;
|
||||
info->clipdist_writemask = u_bit_consecutive(0, info->num_written_clipdistance);
|
||||
info->culldist_writemask = u_bit_consecutive(info->num_written_clipdistance,
|
||||
info->num_written_culldistance);
|
||||
info->culldist_writemask = u_bit_consecutive(0, info->num_written_culldistance);
|
||||
|
||||
if (info->processor == PIPE_SHADER_FRAGMENT)
|
||||
info->uses_kill = nir->info.fs.uses_discard;
|
||||
|
@@ -61,7 +61,7 @@ struct vc4_cl {
|
||||
struct vc4_cl_out *next;
|
||||
struct vc4_cl_out *reloc_next;
|
||||
uint32_t size;
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
uint32_t reloc_count;
|
||||
#endif
|
||||
};
|
||||
@@ -163,8 +163,8 @@ static inline void
|
||||
cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
|
||||
{
|
||||
assert(n == 1 || n == 2);
|
||||
#ifdef DEBUG
|
||||
assert(cl->reloc_count == 0);
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count = n;
|
||||
#endif
|
||||
|
||||
@@ -177,8 +177,8 @@ cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
|
||||
static inline struct vc4_cl_out *
|
||||
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
assert(cl->reloc_count == 0);
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count = n;
|
||||
#endif
|
||||
cl->reloc_next = cl->next;
|
||||
@@ -196,7 +196,7 @@ cl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count--;
|
||||
#endif
|
||||
|
||||
@@ -211,7 +211,7 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count--;
|
||||
#endif
|
||||
|
||||
@@ -297,7 +297,7 @@ cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *reloc)
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(cl->job, reloc->bo);
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count--;
|
||||
#endif
|
||||
}
|
||||
|
@@ -70,7 +70,6 @@ namespace {
|
||||
make_kernel_args(const Module &mod, const std::string &kernel_name,
|
||||
const clang::CompilerInstance &c) {
|
||||
std::vector<module::argument> args;
|
||||
const auto address_spaces = c.getTarget().getAddressSpaceMap();
|
||||
const Function &f = *mod.getFunction(kernel_name);
|
||||
::llvm::DataLayout dl(&mod);
|
||||
const auto size_type =
|
||||
@@ -128,8 +127,8 @@ namespace {
|
||||
const unsigned address_space =
|
||||
cast< ::llvm::PointerType>(actual_type)->getAddressSpace();
|
||||
|
||||
if (address_space == address_spaces[clang::LangAS::opencl_local
|
||||
- compat::lang_as_offset]) {
|
||||
if (address_space == compat::target_address_space(
|
||||
c.getTarget(), clang::LangAS::opencl_local)) {
|
||||
args.emplace_back(module::argument::local, arg_api_size,
|
||||
target_size, target_align,
|
||||
module::argument::zero_ext);
|
||||
|
@@ -69,11 +69,19 @@ namespace clover {
|
||||
typedef ::llvm::TargetLibraryInfo target_library_info;
|
||||
#endif
|
||||
|
||||
template<typename T, typename AS>
|
||||
unsigned target_address_space(const T &target, const AS lang_as) {
|
||||
const auto &map = target.getAddressSpaceMap();
|
||||
#if HAVE_LLVM >= 0x0500
|
||||
return map[static_cast<unsigned>(lang_as)];
|
||||
#else
|
||||
return map[lang_as - clang::LangAS::Offset];
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HAVE_LLVM >= 0x0500
|
||||
const auto lang_as_offset = 0;
|
||||
const clang::InputKind ik_opencl = clang::InputKind::OpenCL;
|
||||
#else
|
||||
const auto lang_as_offset = clang::LangAS::Offset;
|
||||
const clang::InputKind ik_opencl = clang::IK_OpenCL;
|
||||
#endif
|
||||
|
||||
|
@@ -68,8 +68,9 @@ LOCAL_SHARED_LIBRARIES += $(sort $(GALLIUM_SHARED_LIBS))
|
||||
ifneq ($(filter 5 6 7, $(MESA_ANDROID_MAJOR_VERSION)),)
|
||||
LOCAL_POST_INSTALL_CMD := \
|
||||
$(foreach l, lib $(if $(filter true,$(TARGET_IS_64_BIT)),lib64), \
|
||||
mkdir -p $(TARGET_OUT)/$(l)/$(MESA_DRI_MODULE_REL_PATH); \
|
||||
$(foreach d, $(GALLIUM_TARGET_DRIVERS), ln -sf gallium_dri.so $(TARGET_OUT)/$(l)/$(MESA_DRI_MODULE_REL_PATH)/$(d)_dri.so;) \
|
||||
$(eval MESA_DRI_MODULE_PATH := $(TARGET_OUT_VENDOR)/$(l)/$(MESA_DRI_MODULE_REL_PATH)) \
|
||||
mkdir -p $(MESA_DRI_MODULE_PATH); \
|
||||
$(foreach d, $(GALLIUM_TARGET_DRIVERS), ln -sf gallium_dri.so $(MESA_DRI_MODULE_PATH)/$(d)_dri.so;) \
|
||||
)
|
||||
else
|
||||
LOCAL_MODULE_SYMLINKS := $(foreach d, $(GALLIUM_TARGET_DRIVERS), $(d)_dri.so)
|
||||
|
@@ -1190,7 +1190,7 @@ brw_JMPI(struct brw_codegen *p, struct brw_reg index,
|
||||
struct brw_reg ip = brw_ip_reg();
|
||||
brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
|
||||
|
||||
brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_2);
|
||||
brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
|
||||
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
|
||||
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
|
||||
brw_inst_set_pred_control(devinfo, inst, predicate_control);
|
||||
|
@@ -47,7 +47,8 @@ cat(struct string *dest, const struct string src)
|
||||
static bool
|
||||
contains(const struct string haystack, const struct string needle)
|
||||
{
|
||||
return memmem(haystack.str, haystack.len, needle.str, needle.len) != NULL;
|
||||
return haystack.str && memmem(haystack.str, haystack.len,
|
||||
needle.str, needle.len) != NULL;
|
||||
}
|
||||
#define CONTAINS(haystack, needle) \
|
||||
contains(haystack, (struct string){needle, strlen(needle)})
|
||||
|
@@ -2092,10 +2092,10 @@ fs_visitor::assign_constant_locations()
|
||||
*/
|
||||
uint32_t *param = stage_prog_data->param;
|
||||
stage_prog_data->nr_params = num_push_constants;
|
||||
stage_prog_data->param = ralloc_array(NULL, uint32_t, num_push_constants);
|
||||
stage_prog_data->param = ralloc_array(mem_ctx, uint32_t, num_push_constants);
|
||||
if (num_pull_constants > 0) {
|
||||
stage_prog_data->nr_pull_params = num_pull_constants;
|
||||
stage_prog_data->pull_param = ralloc_array(NULL, uint32_t,
|
||||
stage_prog_data->pull_param = ralloc_array(mem_ctx, uint32_t,
|
||||
num_pull_constants);
|
||||
}
|
||||
|
||||
@@ -5013,7 +5013,9 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
|
||||
{
|
||||
return !(is_periodic(inst->src[i], lbld.dispatch_width()) ||
|
||||
(inst->components_read(i) == 1 &&
|
||||
lbld.dispatch_width() <= inst->exec_size));
|
||||
lbld.dispatch_width() <= inst->exec_size)) ||
|
||||
(inst->flags_written() &
|
||||
flag_mask(inst->src[i], type_sz(inst->src[i].type)));
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -6164,6 +6166,31 @@ fs_visitor::run_gs()
|
||||
return !failed;
|
||||
}
|
||||
|
||||
/* From the SKL PRM, Volume 16, Workarounds:
|
||||
*
|
||||
* 0877 3D Pixel Shader Hang possible when pixel shader dispatched with
|
||||
* only header phases (R0-R2)
|
||||
*
|
||||
* WA: Enable a non-header phase (e.g. push constant) when dispatch would
|
||||
* have been header only.
|
||||
*
|
||||
* Instead of enabling push constants one can alternatively enable one of the
|
||||
* inputs. Here one simply chooses "layer" which shouldn't impose much
|
||||
* overhead.
|
||||
*/
|
||||
static void
|
||||
gen9_ps_header_only_workaround(struct brw_wm_prog_data *wm_prog_data)
|
||||
{
|
||||
if (wm_prog_data->num_varying_inputs)
|
||||
return;
|
||||
|
||||
if (wm_prog_data->base.curb_read_length)
|
||||
return;
|
||||
|
||||
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
|
||||
wm_prog_data->num_varying_inputs = 1;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||
{
|
||||
@@ -6227,6 +6254,10 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
|
||||
optimize();
|
||||
|
||||
assign_curb_setup();
|
||||
|
||||
if (devinfo->gen >= 9)
|
||||
gen9_ps_header_only_workaround(wm_prog_data);
|
||||
|
||||
assign_urb_setup();
|
||||
|
||||
fixup_3src_null_dest();
|
||||
|
@@ -402,7 +402,6 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
|
||||
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
|
||||
|
||||
int jmp = brw_JMPI(p, brw_imm_ud(0), BRW_PREDICATE_NORMAL) - p->store;
|
||||
brw_inst_set_exec_size(p->devinfo, brw_last_inst, BRW_EXECUTE_1);
|
||||
{
|
||||
/* Don't send AA data */
|
||||
fire_fb_write(inst, offset(payload, 1), implied_header, inst->mlen-1);
|
||||
|
@@ -173,14 +173,13 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
|
||||
NIR_PASS_V(nir, nir_propagate_invariant);
|
||||
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
||||
entry_point->impl, true, false);
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
/* Vulkan uses the separate-shader linking model */
|
||||
nir->info.separate_shader = true;
|
||||
|
||||
nir = brw_preprocess_nir(compiler, nir);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS_V(nir, anv_nir_lower_input_attachments);
|
||||
|
@@ -315,7 +315,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
src_format = dst_format = MESA_FORMAT_R_FLOAT32;
|
||||
}
|
||||
|
||||
enum isl_format src_isl_format = brw_isl_format_for_mesa_format(src_format);
|
||||
enum isl_format src_isl_format =
|
||||
brw_blorp_to_isl_format(brw, src_format, false);
|
||||
enum isl_aux_usage src_aux_usage =
|
||||
intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
|
||||
/* We do format workarounds for some depth formats so we can't reliably
|
||||
@@ -328,8 +329,10 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
intel_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
|
||||
src_aux_usage, src_clear_supported);
|
||||
|
||||
enum isl_format dst_isl_format =
|
||||
brw_blorp_to_isl_format(brw, dst_format, true);
|
||||
enum isl_aux_usage dst_aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, dst_mt, encode_srgb, false);
|
||||
intel_miptree_render_aux_usage(brw, dst_mt, dst_isl_format, false);
|
||||
const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE;
|
||||
intel_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
|
||||
dst_aux_usage, dst_clear_supported);
|
||||
@@ -351,10 +354,9 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
struct blorp_batch batch;
|
||||
blorp_batch_init(&brw->blorp, &batch, brw, 0);
|
||||
blorp_blit(&batch, &src_surf, src_level, src_layer,
|
||||
brw_blorp_to_isl_format(brw, src_format, false), src_isl_swizzle,
|
||||
src_isl_format, src_isl_swizzle,
|
||||
&dst_surf, dst_level, dst_layer,
|
||||
brw_blorp_to_isl_format(brw, dst_format, true),
|
||||
ISL_SWIZZLE_IDENTITY,
|
||||
dst_isl_format, ISL_SWIZZLE_IDENTITY,
|
||||
src_x0, src_y0, src_x1, src_y1,
|
||||
dst_x0, dst_y0, dst_x1, dst_y1,
|
||||
filter, mirror_x, mirror_y);
|
||||
@@ -1157,6 +1159,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
mesa_format format = irb->Base.Base.Format;
|
||||
if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
|
||||
format = _mesa_get_srgb_format_linear(format);
|
||||
enum isl_format isl_format = brw->mesa_to_isl_render_format[format];
|
||||
|
||||
x0 = fb->_Xmin;
|
||||
x1 = fb->_Xmax;
|
||||
@@ -1255,8 +1258,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
|
||||
struct blorp_batch batch;
|
||||
blorp_batch_init(&brw->blorp, &batch, brw, 0);
|
||||
blorp_fast_clear(&batch, &surf,
|
||||
brw->mesa_to_isl_render_format[format],
|
||||
blorp_fast_clear(&batch, &surf, isl_format,
|
||||
level, irb->mt_layer, num_layers,
|
||||
x0, y0, x1, y1);
|
||||
blorp_batch_finish(&batch);
|
||||
@@ -1275,9 +1277,9 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
irb->mt, irb->mt_level, irb->mt_layer, num_layers);
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, irb->mt, encode_srgb, false);
|
||||
intel_miptree_render_aux_usage(brw, irb->mt, isl_format, false);
|
||||
intel_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer,
|
||||
num_layers, encode_srgb, false);
|
||||
num_layers, isl_format, false);
|
||||
|
||||
struct isl_surf isl_tmp[2];
|
||||
struct blorp_surf surf;
|
||||
@@ -1289,16 +1291,14 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
|
||||
struct blorp_batch batch;
|
||||
blorp_batch_init(&brw->blorp, &batch, brw, 0);
|
||||
blorp_clear(&batch, &surf,
|
||||
brw->mesa_to_isl_render_format[format],
|
||||
ISL_SWIZZLE_IDENTITY,
|
||||
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
|
||||
level, irb->mt_layer, num_layers,
|
||||
x0, y0, x1, y1,
|
||||
clear_color, color_write_disable);
|
||||
blorp_batch_finish(&batch);
|
||||
|
||||
intel_miptree_finish_render(brw, irb->mt, level, irb->mt_layer,
|
||||
num_layers, encode_srgb, false);
|
||||
num_layers, isl_format, false);
|
||||
}
|
||||
|
||||
return;
|
||||
|
@@ -1072,6 +1072,12 @@ intelDestroyContext(__DRIcontext * driContextPriv)
|
||||
if (brw->wm.base.scratch_bo)
|
||||
brw_bo_unreference(brw->wm.base.scratch_bo);
|
||||
|
||||
brw_bo_unreference(brw->vs.base.push_const_bo);
|
||||
brw_bo_unreference(brw->tcs.base.push_const_bo);
|
||||
brw_bo_unreference(brw->tes.base.push_const_bo);
|
||||
brw_bo_unreference(brw->gs.base.push_const_bo);
|
||||
brw_bo_unreference(brw->wm.base.push_const_bo);
|
||||
|
||||
brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
|
||||
|
||||
if (ctx->swrast_context) {
|
||||
|
@@ -25,6 +25,7 @@
|
||||
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include "main/blend.h"
|
||||
#include "main/context.h"
|
||||
#include "main/condrender.h"
|
||||
#include "main/samplerobj.h"
|
||||
@@ -503,9 +504,13 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw)
|
||||
if (irb == NULL || irb->mt == NULL)
|
||||
continue;
|
||||
|
||||
mesa_format mesa_format =
|
||||
_mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
|
||||
|
||||
intel_miptree_prepare_render(brw, irb->mt, irb->mt_level,
|
||||
irb->mt_layer, irb->layer_count,
|
||||
ctx->Color.sRGBEnabled,
|
||||
isl_format,
|
||||
ctx->Color.BlendEnabled & (1 << i));
|
||||
}
|
||||
}
|
||||
@@ -571,10 +576,14 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
|
||||
if (!irb)
|
||||
continue;
|
||||
|
||||
mesa_format mesa_format =
|
||||
_mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
|
||||
|
||||
brw_render_cache_set_add_bo(brw, irb->mt->bo);
|
||||
intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
|
||||
irb->mt_layer, irb->layer_count,
|
||||
ctx->Color.sRGBEnabled,
|
||||
isl_format,
|
||||
ctx->Color.BlendEnabled & (1 << i));
|
||||
}
|
||||
}
|
||||
@@ -866,7 +875,6 @@ brw_draw_prims(struct gl_context *ctx,
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
const struct gl_vertex_array **arrays = ctx->Array._DrawArrays;
|
||||
int predicate_state = brw->predicate.state;
|
||||
int combine_op = MI_PREDICATE_COMBINEOP_SET;
|
||||
struct brw_transform_feedback_object *xfb_obj =
|
||||
(struct brw_transform_feedback_object *) gl_xfb_obj;
|
||||
|
||||
@@ -910,49 +918,35 @@ brw_draw_prims(struct gl_context *ctx,
|
||||
* to it.
|
||||
*/
|
||||
|
||||
if (brw->draw.draw_params_count_bo &&
|
||||
predicate_state == BRW_PREDICATE_STATE_USE_BIT) {
|
||||
/* We need to empty the MI_PREDICATE_DATA register since it might
|
||||
* already be set.
|
||||
*/
|
||||
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(MI_PREDICATE_DATA);
|
||||
OUT_BATCH(0u);
|
||||
OUT_BATCH(MI_PREDICATE_DATA + 4);
|
||||
OUT_BATCH(0u);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
/* We need to combine the results of both predicates.*/
|
||||
combine_op = MI_PREDICATE_COMBINEOP_AND;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_prims; i++) {
|
||||
/* Implementation of ARB_indirect_parameters via predicates */
|
||||
if (brw->draw.draw_params_count_bo) {
|
||||
struct brw_bo *draw_id_bo = NULL;
|
||||
uint32_t draw_id_offset;
|
||||
|
||||
intel_upload_data(brw, &prims[i].draw_id, 4, 4, &draw_id_bo,
|
||||
&draw_id_offset);
|
||||
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_FLUSH_ENABLE);
|
||||
|
||||
/* Upload the current draw count from the draw parameters buffer to
|
||||
* MI_PREDICATE_SRC0.
|
||||
*/
|
||||
brw_load_register_mem(brw, MI_PREDICATE_SRC0,
|
||||
brw->draw.draw_params_count_bo,
|
||||
brw->draw.draw_params_count_offset);
|
||||
brw_load_register_mem(brw, MI_PREDICATE_SRC1, draw_id_bo,
|
||||
draw_id_offset);
|
||||
/* Zero the top 32-bits of MI_PREDICATE_SRC0 */
|
||||
brw_load_register_imm32(brw, MI_PREDICATE_SRC0 + 4, 0);
|
||||
/* Upload the id of the current primitive to MI_PREDICATE_SRC1. */
|
||||
brw_load_register_imm64(brw, MI_PREDICATE_SRC1, prims[i].draw_id);
|
||||
|
||||
BEGIN_BATCH(1);
|
||||
OUT_BATCH(GEN7_MI_PREDICATE |
|
||||
MI_PREDICATE_LOADOP_LOADINV | combine_op |
|
||||
MI_PREDICATE_COMPAREOP_DELTAS_EQUAL);
|
||||
if (i == 0 && brw->predicate.state != BRW_PREDICATE_STATE_USE_BIT) {
|
||||
OUT_BATCH(GEN7_MI_PREDICATE | MI_PREDICATE_LOADOP_LOADINV |
|
||||
MI_PREDICATE_COMBINEOP_SET |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
|
||||
} else {
|
||||
OUT_BATCH(GEN7_MI_PREDICATE |
|
||||
MI_PREDICATE_LOADOP_LOAD | MI_PREDICATE_COMBINEOP_XOR |
|
||||
MI_PREDICATE_COMPAREOP_SRCS_EQUAL);
|
||||
}
|
||||
ADVANCE_BATCH();
|
||||
|
||||
brw->predicate.state = BRW_PREDICATE_STATE_USE_BIT;
|
||||
|
||||
brw_bo_unreference(draw_id_bo);
|
||||
}
|
||||
|
||||
brw_draw_single_prim(ctx, arrays, &prims[i], i, xfb_obj, stream,
|
||||
|
@@ -101,30 +101,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
/* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
|
||||
* 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
|
||||
*
|
||||
* On Gen6-7.5, we use an execbuf parameter to do this for us.
|
||||
* However, the kernel ignores that when execlists are in use.
|
||||
* Fortunately, we can just write the registers from userspace
|
||||
* on Gen8+, and they're context saved/restored.
|
||||
*/
|
||||
if (devinfo->gen >= 9) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(CS_DEBUG_MODE2);
|
||||
OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
|
||||
CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
|
||||
ADVANCE_BATCH();
|
||||
} else if (devinfo->gen == 8) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(INSTPM);
|
||||
OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
|
||||
INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
static inline const struct brw_tracked_state *
|
||||
|
@@ -213,11 +213,6 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
|
||||
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
|
||||
struct intel_mipmap_tree *mt = irb->mt;
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
|
||||
intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled,
|
||||
ctx->Color.BlendEnabled & (1 << unit));
|
||||
|
||||
assert(brw_render_target_supported(brw, rb));
|
||||
|
||||
mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
@@ -225,9 +220,15 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
|
||||
_mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
|
||||
__func__, _mesa_get_format_name(rb_format));
|
||||
}
|
||||
enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
|
||||
intel_miptree_render_aux_usage(brw, mt, isl_format,
|
||||
ctx->Color.BlendEnabled & (1 << unit));
|
||||
|
||||
struct isl_view view = {
|
||||
.format = brw->mesa_to_isl_render_format[rb_format],
|
||||
.format = isl_format,
|
||||
.base_level = irb->mt_level - irb->mt->first_level,
|
||||
.levels = 1,
|
||||
.base_array_layer = irb->mt_layer,
|
||||
|
@@ -241,6 +241,27 @@ intel_miptree_supports_hiz(const struct brw_context *brw,
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return true if the format that will be used to access the miptree is
|
||||
* CCS_E-compatible with the miptree's linear/non-sRGB format.
|
||||
*
|
||||
* Why use the linear format? Well, although the miptree may be specified with
|
||||
* an sRGB format, the usage of that color space/format can be toggled. Since
|
||||
* our HW tends to support more linear formats than sRGB ones, we use this
|
||||
* format variant for check for CCS_E compatibility.
|
||||
*/
|
||||
static bool
|
||||
format_ccs_e_compat_with_miptree(const struct gen_device_info *devinfo,
|
||||
const struct intel_mipmap_tree *mt,
|
||||
enum isl_format access_format)
|
||||
{
|
||||
assert(mt->aux_usage == ISL_AUX_USAGE_CCS_E);
|
||||
|
||||
mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
|
||||
return isl_formats_are_ccs_e_compatible(devinfo, isl_format, access_format);
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_miptree_supports_ccs_e(struct brw_context *brw,
|
||||
const struct intel_mipmap_tree *mt)
|
||||
@@ -2549,6 +2570,7 @@ can_texture_with_ccs(struct brw_context *brw,
|
||||
if (mt->aux_usage != ISL_AUX_USAGE_CCS_E)
|
||||
return false;
|
||||
|
||||
/* TODO: Replace with format_ccs_e_compat_with_miptree for better perf. */
|
||||
if (!isl_formats_are_ccs_e_compatible(&brw->screen->devinfo,
|
||||
mt->surf.format, view_format)) {
|
||||
perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
|
||||
@@ -2654,7 +2676,8 @@ intel_miptree_prepare_image(struct brw_context *brw,
|
||||
enum isl_aux_usage
|
||||
intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
bool srgb_enabled, bool blend_enabled)
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled)
|
||||
{
|
||||
switch (mt->aux_usage) {
|
||||
case ISL_AUX_USAGE_MCS:
|
||||
@@ -2665,12 +2688,11 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE;
|
||||
|
||||
case ISL_AUX_USAGE_CCS_E: {
|
||||
mesa_format mesa_format =
|
||||
srgb_enabled ? mt->format :_mesa_get_srgb_format_linear(mt->format);
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
|
||||
|
||||
/* If the format supports CCS_E, then we can just use it */
|
||||
if (isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format))
|
||||
/* If the format supports CCS_E and is compatible with the miptree,
|
||||
* then we can use it.
|
||||
*/
|
||||
if (format_ccs_e_compat_with_miptree(&brw->screen->devinfo,
|
||||
mt, render_format))
|
||||
return ISL_AUX_USAGE_CCS_E;
|
||||
|
||||
/* Otherwise, we have to fall back to CCS_D */
|
||||
@@ -2679,8 +2701,8 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
* formats. However, there are issues with blending where it doesn't
|
||||
* properly apply the sRGB curve to the clear color when blending.
|
||||
*/
|
||||
if (blend_enabled && isl_format_is_srgb(isl_format) &&
|
||||
!isl_color_value_is_zero_one(mt->fast_clear_color, isl_format))
|
||||
if (blend_enabled && isl_format_is_srgb(render_format) &&
|
||||
!isl_color_value_is_zero_one(mt->fast_clear_color, render_format))
|
||||
return ISL_AUX_USAGE_NONE;
|
||||
|
||||
return ISL_AUX_USAGE_CCS_D;
|
||||
@@ -2695,10 +2717,11 @@ void
|
||||
intel_miptree_prepare_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled)
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled)
|
||||
{
|
||||
enum isl_aux_usage aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
|
||||
intel_miptree_render_aux_usage(brw, mt, render_format, blend_enabled);
|
||||
intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
|
||||
aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
|
||||
}
|
||||
@@ -2707,12 +2730,13 @@ void
|
||||
intel_miptree_finish_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled)
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled)
|
||||
{
|
||||
assert(_mesa_is_format_color_format(mt->format));
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
|
||||
intel_miptree_render_aux_usage(brw, mt, render_format, blend_enabled);
|
||||
intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
|
||||
aux_usage);
|
||||
}
|
||||
|
@@ -650,17 +650,20 @@ intel_miptree_prepare_image(struct brw_context *brw,
|
||||
enum isl_aux_usage
|
||||
intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
bool srgb_enabled, bool blend_enabled);
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled);
|
||||
void
|
||||
intel_miptree_prepare_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled);
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled);
|
||||
void
|
||||
intel_miptree_finish_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled);
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled);
|
||||
void
|
||||
intel_miptree_prepare_depth(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
|
@@ -2523,7 +2523,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
|
||||
screen->compiler = brw_compiler_create(screen, devinfo);
|
||||
screen->compiler->shader_debug_log = shader_debug_log_mesa;
|
||||
screen->compiler->shader_perf_log = shader_perf_log_mesa;
|
||||
screen->compiler->constant_buffer_0_is_relative = devinfo->gen < 8;
|
||||
screen->compiler->constant_buffer_0_is_relative = true;
|
||||
screen->compiler->supports_pull_constants = true;
|
||||
|
||||
screen->has_exec_fence =
|
||||
|
@@ -3815,7 +3815,7 @@ bind_uniform_buffers(struct gl_context *ctx, GLuint first, GLsizei count,
|
||||
}
|
||||
|
||||
set_buffer_multi_binding(ctx, buffers, i, caller,
|
||||
binding, offset, size, !range,
|
||||
binding, offset, size, range,
|
||||
USAGE_UNIFORM_BUFFER);
|
||||
}
|
||||
|
||||
@@ -3916,7 +3916,7 @@ bind_shader_storage_buffers(struct gl_context *ctx, GLuint first,
|
||||
}
|
||||
|
||||
set_buffer_multi_binding(ctx, buffers, i, caller,
|
||||
binding, offset, size, !range,
|
||||
binding, offset, size, range,
|
||||
USAGE_SHADER_STORAGE_BUFFER);
|
||||
}
|
||||
|
||||
@@ -4238,7 +4238,7 @@ bind_atomic_buffers(struct gl_context *ctx,
|
||||
}
|
||||
|
||||
set_buffer_multi_binding(ctx, buffers, i, caller,
|
||||
binding, offset, size, !range,
|
||||
binding, offset, size, range,
|
||||
USAGE_ATOMIC_COUNTER_BUFFER);
|
||||
}
|
||||
|
||||
|
@@ -330,6 +330,15 @@ get_fb0_attachment(struct gl_context *ctx, struct gl_framebuffer *fb,
|
||||
return &fb->Attachment[BUFFER_BACK_LEFT];
|
||||
case GL_BACK_RIGHT:
|
||||
return &fb->Attachment[BUFFER_BACK_RIGHT];
|
||||
case GL_BACK:
|
||||
/* The ARB_ES3_1_compatibility spec says:
|
||||
*
|
||||
* "Since this command can only query a single framebuffer
|
||||
* attachment, BACK is equivalent to BACK_LEFT."
|
||||
*/
|
||||
if (ctx->Extensions.ARB_ES3_1_compatibility)
|
||||
return &fb->Attachment[BUFFER_BACK_LEFT];
|
||||
return NULL;
|
||||
case GL_AUX0:
|
||||
if (fb->Visual.numAuxBuffers == 1) {
|
||||
return &fb->Attachment[BUFFER_AUX0];
|
||||
|
@@ -1110,7 +1110,7 @@ disk_cache_get(struct disk_cache *cache, const cache_key key, size_t *size)
|
||||
* TODO: pass the metadata back to the caller and do some basic
|
||||
* validation.
|
||||
*/
|
||||
cache_item_md_size += sizeof(cache_key);
|
||||
cache_item_md_size += num_keys * sizeof(cache_key);
|
||||
ret = lseek(fd, num_keys * sizeof(cache_key), SEEK_CUR);
|
||||
if (ret == -1)
|
||||
goto fail;
|
||||
|
@@ -264,7 +264,7 @@ TODO: document the other workarounds.
|
||||
</device>
|
||||
<device driver="radeonsi">
|
||||
<application name="ARK: Survival Evolved (and unintentionally the UE4 demo template)" executable="ShooterGame">
|
||||
<option name="radeonsi_clear_db_meta_before_clear" value="true" />
|
||||
<option name="radeonsi_clear_db_cache_before_clear" value="true" />
|
||||
</application>
|
||||
</device>
|
||||
</driconf>
|
||||
|
@@ -444,7 +444,7 @@ DRI_CONF_OPT_BEGIN_B(radeonsi_commutative_blend_add, def) \
|
||||
DRI_CONF_DESC(en,gettext("Commutative additive blending optimizations (may cause rendering errors)")) \
|
||||
DRI_CONF_OPT_END
|
||||
|
||||
#define DRI_CONF_RADEONSI_CLEAR_DB_META_BEFORE_CLEAR(def) \
|
||||
DRI_CONF_OPT_BEGIN_B(radeonsi_clear_db_meta_before_clear, def) \
|
||||
DRI_CONF_DESC(en,"Clear DB metadata cache before fast depth clear") \
|
||||
#define DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR(def) \
|
||||
DRI_CONF_OPT_BEGIN_B(radeonsi_clear_db_cache_before_clear, def) \
|
||||
DRI_CONF_DESC(en,"Clear DB cache before fast depth clear") \
|
||||
DRI_CONF_OPT_END
|
||||
|
Reference in New Issue
Block a user