Compare commits
32 Commits
21.1
...
mesa-17.3.
Author | SHA1 | Date | |
---|---|---|---|
|
3eb187f376 | ||
|
0c20849f9c | ||
|
fb09360ea5 | ||
|
1664322838 | ||
|
c7e625df69 | ||
|
5addf041ef | ||
|
f4b6883ebc | ||
|
70ee0a4525 | ||
|
17d988bfaa | ||
|
03cf1953ad | ||
|
4fb6b83968 | ||
|
26b44eadac | ||
|
e22cf6e9b4 | ||
|
7df1b901b9 | ||
|
cbb8aec81c | ||
|
ff8c152640 | ||
|
0fef0c7deb | ||
|
66603bff6f | ||
|
b0082632eb | ||
|
3da6dd8003 | ||
|
2e33d68046 | ||
|
3b699fdd19 | ||
|
a2123968fa | ||
|
1ce3fbeb91 | ||
|
8f2bc19856 | ||
|
b6f0c16a89 | ||
|
5c8eb88553 | ||
|
afdb9da492 | ||
|
b8f10fdf34 | ||
|
ea132f9265 | ||
|
08b41e70dd | ||
|
ae720e2873 |
@@ -3631,15 +3631,17 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
|
||||
LLVMValueRef i1true = LLVMConstInt(ctx->ac.i1, 1, false);
|
||||
MAYBE_UNUSED int length;
|
||||
|
||||
bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
|
||||
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_image_atomic_add:
|
||||
atomic_name = "add";
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_min:
|
||||
atomic_name = "smin";
|
||||
atomic_name = is_unsigned ? "umin" : "smin";
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_max:
|
||||
atomic_name = "smax";
|
||||
atomic_name = is_unsigned ? "umax" : "smax";
|
||||
break;
|
||||
case nir_intrinsic_image_atomic_and:
|
||||
atomic_name = "and";
|
||||
|
@@ -927,9 +927,11 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
in->numSamples == 1) {
|
||||
ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
|
||||
ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
|
||||
ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
|
||||
|
||||
din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
|
||||
dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
|
||||
dout.pMipInfo = meta_mip_info;
|
||||
|
||||
din.dccKeyFlags.pipeAligned = 1;
|
||||
din.dccKeyFlags.rbAligned = 1;
|
||||
@@ -955,21 +957,37 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
|
||||
surf->dcc_alignment = dout.dccRamBaseAlign;
|
||||
surf->num_dcc_levels = in->numMipLevels;
|
||||
|
||||
/* Disable DCC for the smallest levels. It seems to be
|
||||
* required for DCC readability between CB and shaders
|
||||
* when TC L2 isn't flushed. This was guessed.
|
||||
/* Disable DCC for levels that are in the mip tail.
|
||||
*
|
||||
* There are two issues that this is intended to
|
||||
* address:
|
||||
*
|
||||
* 1. Multiple mip levels may share a cache line. This
|
||||
* can lead to corruption when switching between
|
||||
* rendering to different mip levels because the
|
||||
* RBs don't maintain coherency.
|
||||
*
|
||||
* 2. Texturing with metadata after rendering sometimes
|
||||
* fails with corruption, probably for a similar
|
||||
* reason.
|
||||
*
|
||||
* Working around these issues for all levels in the
|
||||
* mip tail may be overly conservative, but it's what
|
||||
* Vulkan does.
|
||||
*
|
||||
* Alternative solutions that also work but are worse:
|
||||
* - Disable DCC.
|
||||
* - Disable DCC entirely.
|
||||
* - Flush TC L2 after rendering.
|
||||
*/
|
||||
for (unsigned i = 1; i < in->numMipLevels; i++) {
|
||||
if (mip_info[i].pitch *
|
||||
mip_info[i].height * surf->bpe < 1024) {
|
||||
for (unsigned i = 0; i < in->numMipLevels; i++) {
|
||||
if (meta_mip_info[i].inMiptail) {
|
||||
surf->num_dcc_levels = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!surf->num_dcc_levels)
|
||||
surf->dcc_size = 0;
|
||||
}
|
||||
|
||||
/* FMASK */
|
||||
|
@@ -152,6 +152,8 @@ radv_physical_device_init(struct radv_physical_device *device,
|
||||
goto fail;
|
||||
}
|
||||
|
||||
device->name = get_chip_name(device->rad_info.family);
|
||||
|
||||
if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
|
||||
radv_finish_wsi(device);
|
||||
device->ws->destroy(device->ws);
|
||||
@@ -168,12 +170,11 @@ radv_physical_device_init(struct radv_physical_device *device,
|
||||
/* The gpu id is already embeded in the uuid so we just pass "radv"
|
||||
* when creating the cache.
|
||||
*/
|
||||
char buf[VK_UUID_SIZE + 1];
|
||||
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE);
|
||||
device->disk_cache = disk_cache_create("radv", buf, shader_env_flags);
|
||||
char buf[VK_UUID_SIZE * 2 + 1];
|
||||
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
|
||||
device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
|
||||
|
||||
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
|
||||
device->name = get_chip_name(device->rad_info.family);
|
||||
|
||||
radv_get_driver_uuid(&device->device_uuid);
|
||||
radv_get_device_uuid(&device->rad_info, &device->device_uuid);
|
||||
|
@@ -1766,6 +1766,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
stage ? stage->pName : "main", i,
|
||||
stage ? stage->pSpecializationInfo : NULL);
|
||||
pipeline->active_stages |= mesa_to_vk_shader_stage(i);
|
||||
/* We don't want to alter meta shaders IR directly so clone it
|
||||
* first.
|
||||
*/
|
||||
if (nir[i]->info.name) {
|
||||
nir[i] = nir_shader_clone(NULL, nir[i]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (nir[MESA_SHADER_TESS_CTRL]) {
|
||||
@@ -1779,6 +1786,14 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
|
||||
radv_link_shaders(pipeline, nir);
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
if (!(device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS))
|
||||
continue;
|
||||
|
||||
if (modules[i])
|
||||
nir_print_shader(nir[i], stderr);
|
||||
}
|
||||
|
||||
if (nir[MESA_SHADER_FRAGMENT]) {
|
||||
if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
|
||||
pipeline->shaders[MESA_SHADER_FRAGMENT] =
|
||||
@@ -1863,7 +1878,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
|
||||
for (int i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
free(codes[i]);
|
||||
if (modules[i] && !modules[i]->nir && !pipeline->device->trace_bo)
|
||||
if (modules[i] && !pipeline->device->trace_bo)
|
||||
ralloc_free(nir[i]);
|
||||
}
|
||||
|
||||
|
@@ -231,6 +231,8 @@ radv_create_shader_variants_from_pipeline_cache(struct radv_device *device,
|
||||
p += entry->code_sizes[i];
|
||||
|
||||
entry->variants[i] = variant;
|
||||
} else if (entry->code_sizes[i]) {
|
||||
p += sizeof(struct cache_entry_variant_info) + entry->code_sizes[i];
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -291,9 +291,6 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
||||
nir_remove_dead_variables(nir, nir_var_local);
|
||||
radv_optimize_nir(nir);
|
||||
|
||||
if (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADERS)
|
||||
nir_print_shader(nir, stderr);
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
|
@@ -85,6 +85,7 @@ LIBGLSL_FILES = \
|
||||
glsl/lower_buffer_access.cpp \
|
||||
glsl/lower_buffer_access.h \
|
||||
glsl/lower_const_arrays_to_uniforms.cpp \
|
||||
glsl/lower_cs_derived.cpp \
|
||||
glsl/lower_discard.cpp \
|
||||
glsl/lower_discard_flow.cpp \
|
||||
glsl/lower_distance.cpp \
|
||||
|
@@ -1295,15 +1295,10 @@ builtin_variable_generator::generate_cs_special_vars()
|
||||
uvec3_t, "gl_LocalGroupSizeARB");
|
||||
}
|
||||
|
||||
if (state->ctx->Const.LowerCsDerivedVariables) {
|
||||
add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
|
||||
add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
|
||||
} else {
|
||||
add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
|
||||
uvec3_t, "gl_GlobalInvocationID");
|
||||
add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
|
||||
uint_t, "gl_LocalInvocationIndex");
|
||||
}
|
||||
add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
|
||||
uvec3_t, "gl_GlobalInvocationID");
|
||||
add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
|
||||
uint_t, "gl_LocalInvocationIndex");
|
||||
}
|
||||
|
||||
|
||||
@@ -1474,84 +1469,3 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initialize compute shader variables with values that are derived from other
|
||||
* compute shader variable.
|
||||
*/
|
||||
static void
|
||||
initialize_cs_derived_variables(gl_shader *shader,
|
||||
ir_function_signature *const main_sig)
|
||||
{
|
||||
assert(shader->Stage == MESA_SHADER_COMPUTE);
|
||||
|
||||
ir_variable *gl_GlobalInvocationID =
|
||||
shader->symbols->get_variable("gl_GlobalInvocationID");
|
||||
assert(gl_GlobalInvocationID);
|
||||
ir_variable *gl_WorkGroupID =
|
||||
shader->symbols->get_variable("gl_WorkGroupID");
|
||||
assert(gl_WorkGroupID);
|
||||
ir_variable *gl_WorkGroupSize =
|
||||
shader->symbols->get_variable("gl_WorkGroupSize");
|
||||
if (gl_WorkGroupSize == NULL) {
|
||||
void *const mem_ctx = ralloc_parent(shader->ir);
|
||||
gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type,
|
||||
"gl_WorkGroupSize",
|
||||
ir_var_auto);
|
||||
gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly;
|
||||
gl_WorkGroupSize->data.read_only = true;
|
||||
shader->ir->push_head(gl_WorkGroupSize);
|
||||
}
|
||||
ir_variable *gl_LocalInvocationID =
|
||||
shader->symbols->get_variable("gl_LocalInvocationID");
|
||||
assert(gl_LocalInvocationID);
|
||||
|
||||
/* gl_GlobalInvocationID =
|
||||
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
|
||||
*/
|
||||
ir_instruction *inst =
|
||||
assign(gl_GlobalInvocationID,
|
||||
add(mul(gl_WorkGroupID, gl_WorkGroupSize),
|
||||
gl_LocalInvocationID));
|
||||
main_sig->body.push_head(inst);
|
||||
|
||||
/* gl_LocalInvocationIndex =
|
||||
* gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
|
||||
* gl_LocalInvocationID.y * gl_WorkGroupSize.x +
|
||||
* gl_LocalInvocationID.x;
|
||||
*/
|
||||
ir_expression *index_z =
|
||||
mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)),
|
||||
swizzle_y(gl_WorkGroupSize));
|
||||
ir_expression *index_y =
|
||||
mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize));
|
||||
ir_expression *index_y_plus_z = add(index_y, index_z);
|
||||
operand index_x(swizzle_x(gl_LocalInvocationID));
|
||||
ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
|
||||
ir_variable *gl_LocalInvocationIndex =
|
||||
shader->symbols->get_variable("gl_LocalInvocationIndex");
|
||||
assert(gl_LocalInvocationIndex);
|
||||
inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
|
||||
main_sig->body.push_head(inst);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Initialize builtin variables with values based on other builtin variables.
|
||||
* These are initialized in the main function.
|
||||
*/
|
||||
void
|
||||
_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
|
||||
gl_shader *shader)
|
||||
{
|
||||
/* We only need to set CS variables currently. */
|
||||
if (shader->Stage == MESA_SHADER_COMPUTE &&
|
||||
ctx->Const.LowerCsDerivedVariables) {
|
||||
ir_function_signature *const main_sig =
|
||||
_mesa_get_main_function_signature(shader->symbols);
|
||||
|
||||
if (main_sig != NULL)
|
||||
initialize_cs_derived_variables(shader, main_sig);
|
||||
}
|
||||
}
|
||||
|
@@ -2009,8 +2009,6 @@ opt_shader_and_create_symbol_table(struct gl_context *ctx,
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_glsl_initialize_derived_variables(ctx, shader);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -2412,10 +2412,6 @@ extern void
|
||||
_mesa_glsl_initialize_variables(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state);
|
||||
|
||||
extern void
|
||||
_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
|
||||
gl_shader *shader);
|
||||
|
||||
extern void
|
||||
reparent_ir(exec_list *list, void *mem_ctx);
|
||||
|
||||
|
@@ -166,6 +166,7 @@ void optimize_dead_builtin_variables(exec_list *instructions,
|
||||
bool lower_tess_level(gl_linked_shader *shader);
|
||||
|
||||
bool lower_vertex_id(gl_linked_shader *shader);
|
||||
bool lower_cs_derived(gl_linked_shader *shader);
|
||||
bool lower_blend_equation_advanced(gl_linked_shader *shader);
|
||||
|
||||
bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state);
|
||||
|
@@ -2374,6 +2374,9 @@ link_intrastage_shaders(void *mem_ctx,
|
||||
if (ctx->Const.VertexID_is_zero_based)
|
||||
lower_vertex_id(linked);
|
||||
|
||||
if (ctx->Const.LowerCsDerivedVariables)
|
||||
lower_cs_derived(linked);
|
||||
|
||||
#ifdef DEBUG
|
||||
/* Compute the source checksum. */
|
||||
linked->SourceChecksum = 0;
|
||||
|
234
src/compiler/glsl/lower_cs_derived.cpp
Normal file
234
src/compiler/glsl/lower_cs_derived.cpp
Normal file
@@ -0,0 +1,234 @@
|
||||
/*
|
||||
* Copyright © 2017 Ilia Mirkin
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file lower_cs_derived.cpp
|
||||
*
|
||||
* For hardware that does not support the gl_GlobalInvocationID and
|
||||
* gl_LocalInvocationIndex system values, replace them with fresh
|
||||
* globals. Note that we can't rely on gl_WorkGroupSize or
|
||||
* gl_LocalGroupSizeARB being available, since they may only have been defined
|
||||
* in a non-main shader.
|
||||
*
|
||||
* [ This can happen if only a secondary shader has the layout(local_size_*)
|
||||
* declaration. ]
|
||||
*
|
||||
* This is meant to be run post-linking.
|
||||
*/
|
||||
|
||||
#include "glsl_symbol_table.h"
|
||||
#include "ir_hierarchical_visitor.h"
|
||||
#include "ir.h"
|
||||
#include "ir_builder.h"
|
||||
#include "linker.h"
|
||||
#include "program/prog_statevars.h"
|
||||
#include "builtin_functions.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
namespace {
|
||||
|
||||
class lower_cs_derived_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
explicit lower_cs_derived_visitor(gl_linked_shader *shader)
|
||||
: progress(false),
|
||||
shader(shader),
|
||||
local_size_variable(shader->Program->info.cs.local_size_variable),
|
||||
gl_WorkGroupSize(NULL),
|
||||
gl_WorkGroupID(NULL),
|
||||
gl_LocalInvocationID(NULL),
|
||||
gl_GlobalInvocationID(NULL),
|
||||
gl_LocalInvocationIndex(NULL)
|
||||
{
|
||||
main_sig = _mesa_get_main_function_signature(shader->symbols);
|
||||
assert(main_sig);
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *);
|
||||
|
||||
ir_variable *add_system_value(
|
||||
int slot, const glsl_type *type, const char *name);
|
||||
void find_sysvals();
|
||||
void make_gl_GlobalInvocationID();
|
||||
void make_gl_LocalInvocationIndex();
|
||||
|
||||
bool progress;
|
||||
|
||||
private:
|
||||
gl_linked_shader *shader;
|
||||
bool local_size_variable;
|
||||
ir_function_signature *main_sig;
|
||||
|
||||
ir_rvalue *gl_WorkGroupSize;
|
||||
ir_variable *gl_WorkGroupID;
|
||||
ir_variable *gl_LocalInvocationID;
|
||||
|
||||
ir_variable *gl_GlobalInvocationID;
|
||||
ir_variable *gl_LocalInvocationIndex;
|
||||
};
|
||||
|
||||
} /* anonymous namespace */
|
||||
|
||||
ir_variable *
|
||||
lower_cs_derived_visitor::add_system_value(
|
||||
int slot, const glsl_type *type, const char *name)
|
||||
{
|
||||
ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
|
||||
var->data.how_declared = ir_var_declared_implicitly;
|
||||
var->data.read_only = true;
|
||||
var->data.location = slot;
|
||||
var->data.explicit_location = true;
|
||||
var->data.explicit_index = 0;
|
||||
shader->ir->push_head(var);
|
||||
|
||||
return var;
|
||||
}
|
||||
|
||||
void
|
||||
lower_cs_derived_visitor::find_sysvals()
|
||||
{
|
||||
if (gl_WorkGroupSize != NULL)
|
||||
return;
|
||||
|
||||
ir_variable *WorkGroupSize;
|
||||
if (local_size_variable)
|
||||
WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
|
||||
else
|
||||
WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
|
||||
if (WorkGroupSize)
|
||||
gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
|
||||
gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
|
||||
gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
|
||||
|
||||
/*
|
||||
* These may be missing due to either dead code elimination, or, in the
|
||||
* case of the group size, due to the layout being declared in a non-main
|
||||
* shader. Re-create them.
|
||||
*/
|
||||
|
||||
if (!gl_WorkGroupID)
|
||||
gl_WorkGroupID = add_system_value(
|
||||
SYSTEM_VALUE_WORK_GROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
|
||||
if (!gl_LocalInvocationID)
|
||||
gl_LocalInvocationID = add_system_value(
|
||||
SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
|
||||
"gl_LocalInvocationID");
|
||||
if (!WorkGroupSize) {
|
||||
if (local_size_variable) {
|
||||
gl_WorkGroupSize = new(shader) ir_dereference_variable(
|
||||
add_system_value(
|
||||
SYSTEM_VALUE_LOCAL_GROUP_SIZE, glsl_type::uvec3_type,
|
||||
"gl_LocalGroupSizeARB"));
|
||||
} else {
|
||||
ir_constant_data data;
|
||||
memset(&data, 0, sizeof(data));
|
||||
for (int i = 0; i < 3; i++)
|
||||
data.u[i] = shader->Program->info.cs.local_size[i];
|
||||
gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
lower_cs_derived_visitor::make_gl_GlobalInvocationID()
|
||||
{
|
||||
if (gl_GlobalInvocationID != NULL)
|
||||
return;
|
||||
|
||||
find_sysvals();
|
||||
|
||||
/* gl_GlobalInvocationID =
|
||||
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
|
||||
*/
|
||||
gl_GlobalInvocationID = new(shader) ir_variable(
|
||||
glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
|
||||
shader->ir->push_head(gl_GlobalInvocationID);
|
||||
|
||||
ir_instruction *inst =
|
||||
assign(gl_GlobalInvocationID,
|
||||
add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
|
||||
gl_LocalInvocationID));
|
||||
main_sig->body.push_head(inst);
|
||||
}
|
||||
|
||||
void
|
||||
lower_cs_derived_visitor::make_gl_LocalInvocationIndex()
|
||||
{
|
||||
if (gl_LocalInvocationIndex != NULL)
|
||||
return;
|
||||
|
||||
find_sysvals();
|
||||
|
||||
/* gl_LocalInvocationIndex =
|
||||
* gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
|
||||
* gl_LocalInvocationID.y * gl_WorkGroupSize.x +
|
||||
* gl_LocalInvocationID.x;
|
||||
*/
|
||||
gl_LocalInvocationIndex = new(shader)
|
||||
ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
|
||||
shader->ir->push_head(gl_LocalInvocationIndex);
|
||||
|
||||
ir_expression *index_z =
|
||||
mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
|
||||
swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
|
||||
ir_expression *index_y =
|
||||
mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
|
||||
ir_expression *index_y_plus_z = add(index_y, index_z);
|
||||
operand index_x(swizzle_x(gl_LocalInvocationID));
|
||||
ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
|
||||
ir_instruction *inst =
|
||||
assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
|
||||
main_sig->body.push_head(inst);
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
lower_cs_derived_visitor::visit(ir_dereference_variable *ir)
|
||||
{
|
||||
if (ir->var->data.mode == ir_var_system_value &&
|
||||
ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
|
||||
make_gl_GlobalInvocationID();
|
||||
ir->var = gl_GlobalInvocationID;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (ir->var->data.mode == ir_var_system_value &&
|
||||
ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
|
||||
make_gl_LocalInvocationIndex();
|
||||
ir->var = gl_LocalInvocationIndex;
|
||||
progress = true;
|
||||
}
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
bool
|
||||
lower_cs_derived(gl_linked_shader *shader)
|
||||
{
|
||||
if (shader->Stage != MESA_SHADER_COMPUTE)
|
||||
return false;
|
||||
|
||||
lower_cs_derived_visitor v(shader);
|
||||
v.run(shader->ir);
|
||||
|
||||
return v.progress;
|
||||
}
|
@@ -124,6 +124,7 @@ files_libglsl = files(
|
||||
'lower_buffer_access.cpp',
|
||||
'lower_buffer_access.h',
|
||||
'lower_const_arrays_to_uniforms.cpp',
|
||||
'lower_cs_derived.cpp',
|
||||
'lower_discard.cpp',
|
||||
'lower_discard_flow.cpp',
|
||||
'lower_distance.cpp',
|
||||
|
@@ -62,23 +62,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
|
||||
* information, so removing these variables from the user shader will
|
||||
* cause problems later.
|
||||
*
|
||||
* For compute shaders, gl_GlobalInvocationID has some dependencies, so
|
||||
* we avoid removing these dependencies.
|
||||
*
|
||||
* We also avoid removing gl_GlobalInvocationID at this stage because it
|
||||
* might be used by a linked shader. In this case it still needs to be
|
||||
* initialized by the main function.
|
||||
*
|
||||
* gl_GlobalInvocationID =
|
||||
* gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
|
||||
*
|
||||
* Similarly, we initialize gl_LocalInvocationIndex in the main function:
|
||||
*
|
||||
* gl_LocalInvocationIndex =
|
||||
* gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
|
||||
* gl_LocalInvocationID.y * gl_WorkGroupSize.x +
|
||||
* gl_LocalInvocationID.x;
|
||||
*
|
||||
* Matrix uniforms with "Transpose" are not eliminated because there's
|
||||
* an optimization pass that can turn references to the regular matrix
|
||||
* into references to the transpose matrix. Eliminating the transpose
|
||||
@@ -90,11 +73,6 @@ optimize_dead_builtin_variables(exec_list *instructions,
|
||||
*/
|
||||
if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0
|
||||
|| strcmp(var->name, "gl_Vertex") == 0
|
||||
|| strcmp(var->name, "gl_WorkGroupID") == 0
|
||||
|| strcmp(var->name, "gl_WorkGroupSize") == 0
|
||||
|| strcmp(var->name, "gl_LocalInvocationID") == 0
|
||||
|| strcmp(var->name, "gl_GlobalInvocationID") == 0
|
||||
|| strcmp(var->name, "gl_LocalInvocationIndex") == 0
|
||||
|| strstr(var->name, "Transpose") != NULL)
|
||||
continue;
|
||||
|
||||
|
@@ -434,7 +434,7 @@ INTRINSIC(load_interpolated_input, 2, ARR(2, 1), true, 0, 0,
|
||||
/* src[] = { buffer_index, offset }. No const_index */
|
||||
LOAD(ssbo, 2, 0, xx, xx, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base, component } */
|
||||
LOAD(output, 1, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
LOAD(output, 1, 2, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { vertex, offset }. const_index[] = { base, component } */
|
||||
LOAD(per_vertex_output, 2, 1, BASE, COMPONENT, xx, NIR_INTRINSIC_CAN_ELIMINATE)
|
||||
/* src[] = { offset }. const_index[] = { base } */
|
||||
|
@@ -2802,7 +2802,8 @@ vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
case SpvOpMemoryModel:
|
||||
assert(w[1] == SpvAddressingModelLogical);
|
||||
assert(w[2] == SpvMemoryModelGLSL450);
|
||||
assert(w[2] == SpvMemoryModelSimple ||
|
||||
w[2] == SpvMemoryModelGLSL450);
|
||||
break;
|
||||
|
||||
case SpvOpEntryPoint: {
|
||||
|
@@ -21,7 +21,9 @@
|
||||
c_args_for_egl = []
|
||||
link_for_egl = []
|
||||
deps_for_egl = []
|
||||
incs_for_egl = []
|
||||
incs_for_egl = [
|
||||
inc_include, inc_src, inc_loader, inc_gbm, include_directories('main'),
|
||||
]
|
||||
files_egl = files(
|
||||
'main/eglapi.c',
|
||||
'main/eglapi.h',
|
||||
@@ -159,10 +161,7 @@ libegl = shared_library(
|
||||
'-D_EGL_BUILT_IN_DRIVER_DRI2',
|
||||
'-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
|
||||
],
|
||||
include_directories : [
|
||||
incs_for_egl, inc_include, inc_src, inc_loader, inc_gbm,
|
||||
include_directories('main'),
|
||||
],
|
||||
include_directories : incs_for_egl,
|
||||
link_with : [link_for_egl, libloader, libxmlconfig, libglapi, libmesa_util],
|
||||
link_args : [ld_args_bsymbolic, ld_args_gc_sections],
|
||||
dependencies : [deps_for_egl, dep_dl, dep_libdrm, dep_clock, dep_thread],
|
||||
|
@@ -3,7 +3,7 @@ pkgconfig_DATA = wayland-egl.pc
|
||||
|
||||
AM_CFLAGS = $(DEFINES) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(WAYLAND_SERVER_CFLAGS)
|
||||
$(WAYLAND_CLIENT_CFLAGS)
|
||||
|
||||
lib_LTLIBRARIES = libwayland-egl.la
|
||||
noinst_HEADERS = wayland-egl-backend.h
|
||||
|
@@ -24,6 +24,7 @@ libwayland_egl = shared_library(
|
||||
'wayland-egl.c',
|
||||
c_args : [c_vis_args],
|
||||
link_args : ld_args_gc_sections,
|
||||
dependencies : dep_wayland_client,
|
||||
version : '1.0.0',
|
||||
install : true,
|
||||
)
|
||||
|
@@ -182,8 +182,11 @@ void si_vid_join_surfaces(struct r600_common_context *rctx,
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.legacy.level); ++j)
|
||||
surfaces[i]->u.legacy.level[j].offset += off;
|
||||
} else
|
||||
} else {
|
||||
surfaces[i]->u.gfx9.surf_offset += off;
|
||||
for (j = 0; j < ARRAY_SIZE(surfaces[i]->u.gfx9.offset); ++j)
|
||||
surfaces[i]->u.gfx9.offset[j] += off;
|
||||
}
|
||||
|
||||
off += surfaces[i]->surf_size;
|
||||
}
|
||||
|
@@ -6,5 +6,5 @@ DRI_CONF_SECTION_PERFORMANCE
|
||||
DRI_CONF_SECTION_END
|
||||
|
||||
DRI_CONF_SECTION_DEBUG
|
||||
DRI_CONF_RADEONSI_CLEAR_DB_META_BEFORE_CLEAR("false")
|
||||
DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR("false")
|
||||
DRI_CONF_SECTION_END
|
||||
|
@@ -901,16 +901,16 @@ static void si_clear(struct pipe_context *ctx, unsigned buffers,
|
||||
* corruption in ARK: Survival Evolved, but that may just be
|
||||
* a coincidence and the root cause is elsewhere.
|
||||
*
|
||||
* The corruption can be fixed by putting the DB metadata flush
|
||||
* before or after the depth clear. (suprisingly)
|
||||
* The corruption can be fixed by putting the DB flush before
|
||||
* or after the depth clear. (surprisingly)
|
||||
*
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=102955 (apitrace)
|
||||
*
|
||||
* This hack decreases back-to-back ClearDepth performance.
|
||||
*/
|
||||
if (sctx->screen->clear_db_meta_before_clear)
|
||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META |
|
||||
SI_CONTEXT_PS_PARTIAL_FLUSH;
|
||||
if (sctx->screen->clear_db_cache_before_clear) {
|
||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
|
||||
}
|
||||
}
|
||||
|
||||
si_blitter_begin(ctx, SI_CLEAR);
|
||||
|
@@ -1072,8 +1072,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
|
||||
driQueryOptionb(config->options, "radeonsi_assume_no_z_fights");
|
||||
sscreen->commutative_blend_add =
|
||||
driQueryOptionb(config->options, "radeonsi_commutative_blend_add");
|
||||
sscreen->clear_db_meta_before_clear =
|
||||
driQueryOptionb(config->options, "radeonsi_clear_db_meta_before_clear");
|
||||
sscreen->clear_db_cache_before_clear =
|
||||
driQueryOptionb(config->options, "radeonsi_clear_db_cache_before_clear");
|
||||
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
|
||||
sscreen->b.family <= CHIP_POLARIS12) ||
|
||||
sscreen->b.family == CHIP_VEGA10 ||
|
||||
|
@@ -98,7 +98,7 @@ struct si_screen {
|
||||
bool has_out_of_order_rast;
|
||||
bool assume_no_z_fights;
|
||||
bool commutative_blend_add;
|
||||
bool clear_db_meta_before_clear;
|
||||
bool clear_db_cache_before_clear;
|
||||
bool has_msaa_sample_loc_bug;
|
||||
bool dpbb_allowed;
|
||||
bool dfsm_allowed;
|
||||
|
@@ -2015,14 +2015,21 @@ static LLVMValueRef fetch_constant(
|
||||
* code reducing SIMD wave occupancy from 8 to 2 in many cases.
|
||||
*
|
||||
* Using s_buffer_load_dword (x1) seems to be the best option right now.
|
||||
*
|
||||
* LLVM 5.0 on SI doesn't insert a required s_nop between SALU setting
|
||||
* a descriptor and s_buffer_load_dword using it, so we can't expand
|
||||
* the pointer into a full descriptor like below. We have to use
|
||||
* s_load_dword instead. The only case when LLVM 5.0 would select
|
||||
* s_buffer_load_dword (that we have to prevent) is when we use use
|
||||
* a literal offset where we don't need bounds checking.
|
||||
*/
|
||||
#if 0 /* keep this codepath disabled */
|
||||
if (!reg->Register.Indirect) {
|
||||
if (ctx->screen->b.chip_class == SI &&
|
||||
HAVE_LLVM < 0x0600 &&
|
||||
!reg->Register.Indirect) {
|
||||
addr = LLVMBuildLShr(ctx->ac.builder, addr, LLVMConstInt(ctx->i32, 2, 0), "");
|
||||
LLVMValueRef result = ac_build_load_invariant(&ctx->ac, ptr, addr);
|
||||
return bitcast(bld_base, type, result);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Do the bounds checking with a descriptor, because
|
||||
* doing computation and manual bounds checking of 64-bit
|
||||
|
@@ -61,7 +61,7 @@ struct vc4_cl {
|
||||
struct vc4_cl_out *next;
|
||||
struct vc4_cl_out *reloc_next;
|
||||
uint32_t size;
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
uint32_t reloc_count;
|
||||
#endif
|
||||
};
|
||||
@@ -163,8 +163,8 @@ static inline void
|
||||
cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
|
||||
{
|
||||
assert(n == 1 || n == 2);
|
||||
#ifdef DEBUG
|
||||
assert(cl->reloc_count == 0);
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count = n;
|
||||
#endif
|
||||
|
||||
@@ -177,8 +177,8 @@ cl_start_reloc(struct vc4_cl *cl, struct vc4_cl_out **out, uint32_t n)
|
||||
static inline struct vc4_cl_out *
|
||||
cl_start_shader_reloc(struct vc4_cl *cl, uint32_t n)
|
||||
{
|
||||
#ifdef DEBUG
|
||||
assert(cl->reloc_count == 0);
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count = n;
|
||||
#endif
|
||||
cl->reloc_next = cl->next;
|
||||
@@ -196,7 +196,7 @@ cl_reloc(struct vc4_job *job, struct vc4_cl *cl, struct vc4_cl_out **cl_out,
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count--;
|
||||
#endif
|
||||
|
||||
@@ -211,7 +211,7 @@ cl_aligned_reloc(struct vc4_job *job, struct vc4_cl *cl,
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(job, bo);
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count--;
|
||||
#endif
|
||||
|
||||
@@ -297,7 +297,7 @@ cl_pack_emit_reloc(struct vc4_cl *cl, const struct vc4_cl_reloc *reloc)
|
||||
*(uint32_t *)cl->reloc_next = vc4_gem_hindex(cl->job, reloc->bo);
|
||||
cl_advance(&cl->reloc_next, 4);
|
||||
|
||||
#ifdef DEBUG
|
||||
#ifndef NDEBUG
|
||||
cl->reloc_count--;
|
||||
#endif
|
||||
}
|
||||
|
@@ -70,7 +70,6 @@ namespace {
|
||||
make_kernel_args(const Module &mod, const std::string &kernel_name,
|
||||
const clang::CompilerInstance &c) {
|
||||
std::vector<module::argument> args;
|
||||
const auto address_spaces = c.getTarget().getAddressSpaceMap();
|
||||
const Function &f = *mod.getFunction(kernel_name);
|
||||
::llvm::DataLayout dl(&mod);
|
||||
const auto size_type =
|
||||
@@ -128,8 +127,8 @@ namespace {
|
||||
const unsigned address_space =
|
||||
cast< ::llvm::PointerType>(actual_type)->getAddressSpace();
|
||||
|
||||
if (address_space == address_spaces[clang::LangAS::opencl_local
|
||||
- compat::lang_as_offset]) {
|
||||
if (address_space == compat::target_address_space(
|
||||
c.getTarget(), clang::LangAS::opencl_local)) {
|
||||
args.emplace_back(module::argument::local, arg_api_size,
|
||||
target_size, target_align,
|
||||
module::argument::zero_ext);
|
||||
|
@@ -69,11 +69,19 @@ namespace clover {
|
||||
typedef ::llvm::TargetLibraryInfo target_library_info;
|
||||
#endif
|
||||
|
||||
template<typename T, typename AS>
|
||||
unsigned target_address_space(const T &target, const AS lang_as) {
|
||||
const auto &map = target.getAddressSpaceMap();
|
||||
#if HAVE_LLVM >= 0x0500
|
||||
return map[static_cast<unsigned>(lang_as)];
|
||||
#else
|
||||
return map[lang_as - clang::LangAS::Offset];
|
||||
#endif
|
||||
}
|
||||
|
||||
#if HAVE_LLVM >= 0x0500
|
||||
const auto lang_as_offset = 0;
|
||||
const clang::InputKind ik_opencl = clang::InputKind::OpenCL;
|
||||
#else
|
||||
const auto lang_as_offset = clang::LangAS::Offset;
|
||||
const clang::InputKind ik_opencl = clang::IK_OpenCL;
|
||||
#endif
|
||||
|
||||
|
@@ -1190,7 +1190,7 @@ brw_JMPI(struct brw_codegen *p, struct brw_reg index,
|
||||
struct brw_reg ip = brw_ip_reg();
|
||||
brw_inst *inst = brw_alu2(p, BRW_OPCODE_JMPI, ip, ip, index);
|
||||
|
||||
brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_2);
|
||||
brw_inst_set_exec_size(devinfo, inst, BRW_EXECUTE_1);
|
||||
brw_inst_set_qtr_control(devinfo, inst, BRW_COMPRESSION_NONE);
|
||||
brw_inst_set_mask_control(devinfo, inst, BRW_MASK_DISABLE);
|
||||
brw_inst_set_pred_control(devinfo, inst, predicate_control);
|
||||
|
@@ -47,7 +47,8 @@ cat(struct string *dest, const struct string src)
|
||||
static bool
|
||||
contains(const struct string haystack, const struct string needle)
|
||||
{
|
||||
return memmem(haystack.str, haystack.len, needle.str, needle.len) != NULL;
|
||||
return haystack.str && memmem(haystack.str, haystack.len,
|
||||
needle.str, needle.len) != NULL;
|
||||
}
|
||||
#define CONTAINS(haystack, needle) \
|
||||
contains(haystack, (struct string){needle, strlen(needle)})
|
||||
|
@@ -5013,7 +5013,9 @@ needs_src_copy(const fs_builder &lbld, const fs_inst *inst, unsigned i)
|
||||
{
|
||||
return !(is_periodic(inst->src[i], lbld.dispatch_width()) ||
|
||||
(inst->components_read(i) == 1 &&
|
||||
lbld.dispatch_width() <= inst->exec_size));
|
||||
lbld.dispatch_width() <= inst->exec_size)) ||
|
||||
(inst->flags_written() &
|
||||
flag_mask(inst->src[i], type_sz(inst->src[i].type)));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -402,7 +402,6 @@ fs_generator::generate_fb_write(fs_inst *inst, struct brw_reg payload)
|
||||
brw_inst_set_cond_modifier(p->devinfo, brw_last_inst, BRW_CONDITIONAL_NZ);
|
||||
|
||||
int jmp = brw_JMPI(p, brw_imm_ud(0), BRW_PREDICATE_NORMAL) - p->store;
|
||||
brw_inst_set_exec_size(p->devinfo, brw_last_inst, BRW_EXECUTE_1);
|
||||
{
|
||||
/* Don't send AA data */
|
||||
fire_fb_write(inst, offset(payload, 1), implied_header, inst->mlen-1);
|
||||
|
@@ -173,14 +173,13 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
|
||||
NIR_PASS_V(nir, nir_propagate_invariant);
|
||||
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
||||
entry_point->impl, true, false);
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
/* Vulkan uses the separate-shader linking model */
|
||||
nir->info.separate_shader = true;
|
||||
|
||||
nir = brw_preprocess_nir(compiler, nir);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
if (stage == MESA_SHADER_FRAGMENT)
|
||||
NIR_PASS_V(nir, anv_nir_lower_input_attachments);
|
||||
|
@@ -315,7 +315,8 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
src_format = dst_format = MESA_FORMAT_R_FLOAT32;
|
||||
}
|
||||
|
||||
enum isl_format src_isl_format = brw_isl_format_for_mesa_format(src_format);
|
||||
enum isl_format src_isl_format =
|
||||
brw_blorp_to_isl_format(brw, src_format, false);
|
||||
enum isl_aux_usage src_aux_usage =
|
||||
intel_miptree_texture_aux_usage(brw, src_mt, src_isl_format);
|
||||
/* We do format workarounds for some depth formats so we can't reliably
|
||||
@@ -328,8 +329,10 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
intel_miptree_prepare_access(brw, src_mt, src_level, 1, src_layer, 1,
|
||||
src_aux_usage, src_clear_supported);
|
||||
|
||||
enum isl_format dst_isl_format =
|
||||
brw_blorp_to_isl_format(brw, dst_format, true);
|
||||
enum isl_aux_usage dst_aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, dst_mt, encode_srgb, false);
|
||||
intel_miptree_render_aux_usage(brw, dst_mt, dst_isl_format, false);
|
||||
const bool dst_clear_supported = dst_aux_usage != ISL_AUX_USAGE_NONE;
|
||||
intel_miptree_prepare_access(brw, dst_mt, dst_level, 1, dst_layer, 1,
|
||||
dst_aux_usage, dst_clear_supported);
|
||||
@@ -351,10 +354,9 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
|
||||
struct blorp_batch batch;
|
||||
blorp_batch_init(&brw->blorp, &batch, brw, 0);
|
||||
blorp_blit(&batch, &src_surf, src_level, src_layer,
|
||||
brw_blorp_to_isl_format(brw, src_format, false), src_isl_swizzle,
|
||||
src_isl_format, src_isl_swizzle,
|
||||
&dst_surf, dst_level, dst_layer,
|
||||
brw_blorp_to_isl_format(brw, dst_format, true),
|
||||
ISL_SWIZZLE_IDENTITY,
|
||||
dst_isl_format, ISL_SWIZZLE_IDENTITY,
|
||||
src_x0, src_y0, src_x1, src_y1,
|
||||
dst_x0, dst_y0, dst_x1, dst_y1,
|
||||
filter, mirror_x, mirror_y);
|
||||
@@ -1157,6 +1159,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
mesa_format format = irb->Base.Base.Format;
|
||||
if (!encode_srgb && _mesa_get_format_color_encoding(format) == GL_SRGB)
|
||||
format = _mesa_get_srgb_format_linear(format);
|
||||
enum isl_format isl_format = brw->mesa_to_isl_render_format[format];
|
||||
|
||||
x0 = fb->_Xmin;
|
||||
x1 = fb->_Xmax;
|
||||
@@ -1255,8 +1258,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
|
||||
struct blorp_batch batch;
|
||||
blorp_batch_init(&brw->blorp, &batch, brw, 0);
|
||||
blorp_fast_clear(&batch, &surf,
|
||||
brw->mesa_to_isl_render_format[format],
|
||||
blorp_fast_clear(&batch, &surf, isl_format,
|
||||
level, irb->mt_layer, num_layers,
|
||||
x0, y0, x1, y1);
|
||||
blorp_batch_finish(&batch);
|
||||
@@ -1275,9 +1277,9 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
irb->mt, irb->mt_level, irb->mt_layer, num_layers);
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, irb->mt, encode_srgb, false);
|
||||
intel_miptree_render_aux_usage(brw, irb->mt, isl_format, false);
|
||||
intel_miptree_prepare_render(brw, irb->mt, level, irb->mt_layer,
|
||||
num_layers, encode_srgb, false);
|
||||
num_layers, isl_format, false);
|
||||
|
||||
struct isl_surf isl_tmp[2];
|
||||
struct blorp_surf surf;
|
||||
@@ -1289,16 +1291,14 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
|
||||
struct blorp_batch batch;
|
||||
blorp_batch_init(&brw->blorp, &batch, brw, 0);
|
||||
blorp_clear(&batch, &surf,
|
||||
brw->mesa_to_isl_render_format[format],
|
||||
ISL_SWIZZLE_IDENTITY,
|
||||
blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
|
||||
level, irb->mt_layer, num_layers,
|
||||
x0, y0, x1, y1,
|
||||
clear_color, color_write_disable);
|
||||
blorp_batch_finish(&batch);
|
||||
|
||||
intel_miptree_finish_render(brw, irb->mt, level, irb->mt_layer,
|
||||
num_layers, encode_srgb, false);
|
||||
num_layers, isl_format, false);
|
||||
}
|
||||
|
||||
return;
|
||||
|
@@ -1072,6 +1072,12 @@ intelDestroyContext(__DRIcontext * driContextPriv)
|
||||
if (brw->wm.base.scratch_bo)
|
||||
brw_bo_unreference(brw->wm.base.scratch_bo);
|
||||
|
||||
brw_bo_unreference(brw->vs.base.push_const_bo);
|
||||
brw_bo_unreference(brw->tcs.base.push_const_bo);
|
||||
brw_bo_unreference(brw->tes.base.push_const_bo);
|
||||
brw_bo_unreference(brw->gs.base.push_const_bo);
|
||||
brw_bo_unreference(brw->wm.base.push_const_bo);
|
||||
|
||||
brw_destroy_hw_context(brw->bufmgr, brw->hw_ctx);
|
||||
|
||||
if (ctx->swrast_context) {
|
||||
|
@@ -25,6 +25,7 @@
|
||||
|
||||
#include <sys/errno.h>
|
||||
|
||||
#include "main/blend.h"
|
||||
#include "main/context.h"
|
||||
#include "main/condrender.h"
|
||||
#include "main/samplerobj.h"
|
||||
@@ -503,9 +504,13 @@ brw_predraw_resolve_framebuffer(struct brw_context *brw)
|
||||
if (irb == NULL || irb->mt == NULL)
|
||||
continue;
|
||||
|
||||
mesa_format mesa_format =
|
||||
_mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
|
||||
|
||||
intel_miptree_prepare_render(brw, irb->mt, irb->mt_level,
|
||||
irb->mt_layer, irb->layer_count,
|
||||
ctx->Color.sRGBEnabled,
|
||||
isl_format,
|
||||
ctx->Color.BlendEnabled & (1 << i));
|
||||
}
|
||||
}
|
||||
@@ -571,10 +576,14 @@ brw_postdraw_set_buffers_need_resolve(struct brw_context *brw)
|
||||
if (!irb)
|
||||
continue;
|
||||
|
||||
mesa_format mesa_format =
|
||||
_mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
|
||||
|
||||
brw_render_cache_set_add_bo(brw, irb->mt->bo);
|
||||
intel_miptree_finish_render(brw, irb->mt, irb->mt_level,
|
||||
irb->mt_layer, irb->layer_count,
|
||||
ctx->Color.sRGBEnabled,
|
||||
isl_format,
|
||||
ctx->Color.BlendEnabled & (1 << i));
|
||||
}
|
||||
}
|
||||
|
@@ -101,30 +101,6 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
/* Set the "CONSTANT_BUFFER Address Offset Disable" bit, so
|
||||
* 3DSTATE_CONSTANT_XS buffer 0 is an absolute address.
|
||||
*
|
||||
* On Gen6-7.5, we use an execbuf parameter to do this for us.
|
||||
* However, the kernel ignores that when execlists are in use.
|
||||
* Fortunately, we can just write the registers from userspace
|
||||
* on Gen8+, and they're context saved/restored.
|
||||
*/
|
||||
if (devinfo->gen >= 9) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(CS_DEBUG_MODE2);
|
||||
OUT_BATCH(REG_MASK(CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
|
||||
CSDBG2_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
|
||||
ADVANCE_BATCH();
|
||||
} else if (devinfo->gen == 8) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(INSTPM);
|
||||
OUT_BATCH(REG_MASK(INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE) |
|
||||
INSTPM_CONSTANT_BUFFER_ADDRESS_OFFSET_DISABLE);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
static inline const struct brw_tracked_state *
|
||||
|
@@ -213,11 +213,6 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
|
||||
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
|
||||
struct intel_mipmap_tree *mt = irb->mt;
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
|
||||
intel_miptree_render_aux_usage(brw, mt, ctx->Color.sRGBEnabled,
|
||||
ctx->Color.BlendEnabled & (1 << unit));
|
||||
|
||||
assert(brw_render_target_supported(brw, rb));
|
||||
|
||||
mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
@@ -225,9 +220,15 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
|
||||
_mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
|
||||
__func__, _mesa_get_format_name(rb_format));
|
||||
}
|
||||
enum isl_format isl_format = brw->mesa_to_isl_render_format[rb_format];
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
brw->draw_aux_buffer_disabled[unit] ? ISL_AUX_USAGE_NONE :
|
||||
intel_miptree_render_aux_usage(brw, mt, isl_format,
|
||||
ctx->Color.BlendEnabled & (1 << unit));
|
||||
|
||||
struct isl_view view = {
|
||||
.format = brw->mesa_to_isl_render_format[rb_format],
|
||||
.format = isl_format,
|
||||
.base_level = irb->mt_level - irb->mt->first_level,
|
||||
.levels = 1,
|
||||
.base_array_layer = irb->mt_layer,
|
||||
|
@@ -2654,7 +2654,8 @@ intel_miptree_prepare_image(struct brw_context *brw,
|
||||
enum isl_aux_usage
|
||||
intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
bool srgb_enabled, bool blend_enabled)
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled)
|
||||
{
|
||||
switch (mt->aux_usage) {
|
||||
case ISL_AUX_USAGE_MCS:
|
||||
@@ -2665,12 +2666,8 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE;
|
||||
|
||||
case ISL_AUX_USAGE_CCS_E: {
|
||||
mesa_format mesa_format =
|
||||
srgb_enabled ? mt->format :_mesa_get_srgb_format_linear(mt->format);
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(mesa_format);
|
||||
|
||||
/* If the format supports CCS_E, then we can just use it */
|
||||
if (isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format))
|
||||
if (isl_format_supports_ccs_e(&brw->screen->devinfo, render_format))
|
||||
return ISL_AUX_USAGE_CCS_E;
|
||||
|
||||
/* Otherwise, we have to fall back to CCS_D */
|
||||
@@ -2679,8 +2676,8 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
* formats. However, there are issues with blending where it doesn't
|
||||
* properly apply the sRGB curve to the clear color when blending.
|
||||
*/
|
||||
if (blend_enabled && isl_format_is_srgb(isl_format) &&
|
||||
!isl_color_value_is_zero_one(mt->fast_clear_color, isl_format))
|
||||
if (blend_enabled && isl_format_is_srgb(render_format) &&
|
||||
!isl_color_value_is_zero_one(mt->fast_clear_color, render_format))
|
||||
return ISL_AUX_USAGE_NONE;
|
||||
|
||||
return ISL_AUX_USAGE_CCS_D;
|
||||
@@ -2695,10 +2692,11 @@ void
|
||||
intel_miptree_prepare_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled)
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled)
|
||||
{
|
||||
enum isl_aux_usage aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
|
||||
intel_miptree_render_aux_usage(brw, mt, render_format, blend_enabled);
|
||||
intel_miptree_prepare_access(brw, mt, level, 1, start_layer, layer_count,
|
||||
aux_usage, aux_usage != ISL_AUX_USAGE_NONE);
|
||||
}
|
||||
@@ -2707,12 +2705,13 @@ void
|
||||
intel_miptree_finish_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled)
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled)
|
||||
{
|
||||
assert(_mesa_is_format_color_format(mt->format));
|
||||
|
||||
enum isl_aux_usage aux_usage =
|
||||
intel_miptree_render_aux_usage(brw, mt, srgb_enabled, blend_enabled);
|
||||
intel_miptree_render_aux_usage(brw, mt, render_format, blend_enabled);
|
||||
intel_miptree_finish_write(brw, mt, level, start_layer, layer_count,
|
||||
aux_usage);
|
||||
}
|
||||
|
@@ -650,17 +650,20 @@ intel_miptree_prepare_image(struct brw_context *brw,
|
||||
enum isl_aux_usage
|
||||
intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
bool srgb_enabled, bool blend_enabled);
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled);
|
||||
void
|
||||
intel_miptree_prepare_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled);
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled);
|
||||
void
|
||||
intel_miptree_finish_render(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
uint32_t start_layer, uint32_t layer_count,
|
||||
bool srgb_enabled, bool blend_enabled);
|
||||
enum isl_format render_format,
|
||||
bool blend_enabled);
|
||||
void
|
||||
intel_miptree_prepare_depth(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt, uint32_t level,
|
||||
|
@@ -2523,7 +2523,7 @@ __DRIconfig **intelInitScreen2(__DRIscreen *dri_screen)
|
||||
screen->compiler = brw_compiler_create(screen, devinfo);
|
||||
screen->compiler->shader_debug_log = shader_debug_log_mesa;
|
||||
screen->compiler->shader_perf_log = shader_perf_log_mesa;
|
||||
screen->compiler->constant_buffer_0_is_relative = devinfo->gen < 8;
|
||||
screen->compiler->constant_buffer_0_is_relative = true;
|
||||
screen->compiler->supports_pull_constants = true;
|
||||
|
||||
screen->has_exec_fence =
|
||||
|
@@ -3815,7 +3815,7 @@ bind_uniform_buffers(struct gl_context *ctx, GLuint first, GLsizei count,
|
||||
}
|
||||
|
||||
set_buffer_multi_binding(ctx, buffers, i, caller,
|
||||
binding, offset, size, !range,
|
||||
binding, offset, size, range,
|
||||
USAGE_UNIFORM_BUFFER);
|
||||
}
|
||||
|
||||
@@ -3916,7 +3916,7 @@ bind_shader_storage_buffers(struct gl_context *ctx, GLuint first,
|
||||
}
|
||||
|
||||
set_buffer_multi_binding(ctx, buffers, i, caller,
|
||||
binding, offset, size, !range,
|
||||
binding, offset, size, range,
|
||||
USAGE_SHADER_STORAGE_BUFFER);
|
||||
}
|
||||
|
||||
@@ -4238,7 +4238,7 @@ bind_atomic_buffers(struct gl_context *ctx,
|
||||
}
|
||||
|
||||
set_buffer_multi_binding(ctx, buffers, i, caller,
|
||||
binding, offset, size, !range,
|
||||
binding, offset, size, range,
|
||||
USAGE_ATOMIC_COUNTER_BUFFER);
|
||||
}
|
||||
|
||||
|
@@ -330,6 +330,15 @@ get_fb0_attachment(struct gl_context *ctx, struct gl_framebuffer *fb,
|
||||
return &fb->Attachment[BUFFER_BACK_LEFT];
|
||||
case GL_BACK_RIGHT:
|
||||
return &fb->Attachment[BUFFER_BACK_RIGHT];
|
||||
case GL_BACK:
|
||||
/* The ARB_ES3_1_compatibility spec says:
|
||||
*
|
||||
* "Since this command can only query a single framebuffer
|
||||
* attachment, BACK is equivalent to BACK_LEFT."
|
||||
*/
|
||||
if (ctx->Extensions.ARB_ES3_1_compatibility)
|
||||
return &fb->Attachment[BUFFER_BACK_LEFT];
|
||||
return NULL;
|
||||
case GL_AUX0:
|
||||
if (fb->Visual.numAuxBuffers == 1) {
|
||||
return &fb->Attachment[BUFFER_AUX0];
|
||||
|
@@ -264,7 +264,7 @@ TODO: document the other workarounds.
|
||||
</device>
|
||||
<device driver="radeonsi">
|
||||
<application name="ARK: Survival Evolved (and unintentionally the UE4 demo template)" executable="ShooterGame">
|
||||
<option name="radeonsi_clear_db_meta_before_clear" value="true" />
|
||||
<option name="radeonsi_clear_db_cache_before_clear" value="true" />
|
||||
</application>
|
||||
</device>
|
||||
</driconf>
|
||||
|
@@ -444,7 +444,7 @@ DRI_CONF_OPT_BEGIN_B(radeonsi_commutative_blend_add, def) \
|
||||
DRI_CONF_DESC(en,gettext("Commutative additive blending optimizations (may cause rendering errors)")) \
|
||||
DRI_CONF_OPT_END
|
||||
|
||||
#define DRI_CONF_RADEONSI_CLEAR_DB_META_BEFORE_CLEAR(def) \
|
||||
DRI_CONF_OPT_BEGIN_B(radeonsi_clear_db_meta_before_clear, def) \
|
||||
DRI_CONF_DESC(en,"Clear DB metadata cache before fast depth clear") \
|
||||
#define DRI_CONF_RADEONSI_CLEAR_DB_CACHE_BEFORE_CLEAR(def) \
|
||||
DRI_CONF_OPT_BEGIN_B(radeonsi_clear_db_cache_before_clear, def) \
|
||||
DRI_CONF_DESC(en,"Clear DB cache before fast depth clear") \
|
||||
DRI_CONF_OPT_END
|
||||
|
Reference in New Issue
Block a user