Compare commits
63 Commits
bisect-tem
...
mesa-12.0.
Author | SHA1 | Date | |
---|---|---|---|
|
a7649abe9f | ||
|
bcfda0a1fe | ||
|
998e503592 | ||
|
5e3e292502 | ||
|
3be5c6a9ec | ||
|
a26ca04fe3 | ||
|
1a5d6a232f | ||
|
702a1121c9 | ||
|
9a21315ea9 | ||
|
94630ce0c7 | ||
|
6ad61d90ea | ||
|
a136b8bfe2 | ||
|
52ba7abe1e | ||
|
28ecf2b90e | ||
|
ead833a395 | ||
|
ee77c4a099 | ||
|
a94be40ecc | ||
|
632d7ef148 | ||
|
5513300f59 | ||
|
33d0016836 | ||
|
169b700dfd | ||
|
33e985f8b9 | ||
|
c9de6190a0 | ||
|
05d88165d9 | ||
|
d1cf18497a | ||
|
5a44d36b46 | ||
|
0e54eebeed | ||
|
0ab1a3957a | ||
|
1398a9510f | ||
|
b265796c79 | ||
|
4a00da1662 | ||
|
e817522728 | ||
|
915cc490d7 | ||
|
683c6940d8 | ||
|
2d483256d5 | ||
|
8c627af1f0 | ||
|
86e367a572 | ||
|
64015c03bb | ||
|
99fcfd985e | ||
|
7bc29c784a | ||
|
b2b7f05da6 | ||
|
eb56a2f250 | ||
|
c1269825cf | ||
|
f651a4bb2e | ||
|
44029d4237 | ||
|
910fa7a824 | ||
|
3b78304025 | ||
|
dd96daa55e | ||
|
a6011c6fc6 | ||
|
2d83aad693 | ||
|
665f57c513 | ||
|
7e62585ee8 | ||
|
4dd70617a1 | ||
|
9ed6965749 | ||
|
aed6230269 | ||
|
f73a68ec37 | ||
|
0c12887764 | ||
|
145705e49c | ||
|
d3c92267e0 | ||
|
36e26f2ee2 | ||
|
9a56e7d25b | ||
|
7ad2cb6f08 | ||
|
a43a368457 |
@@ -95,6 +95,8 @@ SUBDIRS := \
|
||||
src/mesa \
|
||||
src/util \
|
||||
src/egl \
|
||||
src/intel/genxml \
|
||||
src/intel/isl \
|
||||
src/mesa/drivers/dri
|
||||
|
||||
INC_DIRS := $(call all-named-subdir-makefiles,$(SUBDIRS))
|
||||
|
@@ -99,7 +99,6 @@ AM_PROG_CC_C_O
|
||||
AM_PROG_AS
|
||||
AX_CHECK_GNU_MAKE
|
||||
AC_CHECK_PROGS([PYTHON2], [python2.7 python2 python])
|
||||
AC_CHECK_PROGS([PYTHON3], [python3.5 python3.4 python3])
|
||||
AC_PROG_SED
|
||||
AC_PROG_MKDIR_P
|
||||
|
||||
@@ -142,12 +141,6 @@ else
|
||||
fi
|
||||
fi
|
||||
|
||||
if test -z "$PYTHON3"; then
|
||||
if test ! -f "$srcdir/src/intel/genxml/gen9_pack.h"; then
|
||||
AC_MSG_ERROR([Python3 not found - unable to generate sources])
|
||||
fi
|
||||
fi
|
||||
|
||||
AC_PROG_INSTALL
|
||||
|
||||
dnl We need a POSIX shell for parts of the build. Assume we have one
|
||||
@@ -2874,7 +2867,6 @@ if test "x$MESA_LLVM" = x1; then
|
||||
echo ""
|
||||
fi
|
||||
echo " PYTHON2: $PYTHON2"
|
||||
echo " PYTHON3: $PYTHON3"
|
||||
|
||||
echo ""
|
||||
echo " Run '${MAKE-make}' to build Mesa"
|
||||
|
@@ -166,6 +166,7 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
|
||||
<li>vec4 - force vec4 mode in vertex shader</li>
|
||||
<li>spill_fs - force spilling of all registers in the scalar backend (useful to debug spilling code)</li>
|
||||
<li>spill_vec4 - force spilling of all registers in the vec4 backend (useful to debug spilling code)</li>
|
||||
<li>norbc - disable single sampled render buffer compression</li>
|
||||
</ul>
|
||||
</ul>
|
||||
|
||||
|
@@ -14,15 +14,15 @@
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.3.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 12.0.0 Release Notes / TBD</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.3.0 is a new development release.
|
||||
Mesa 12.0.0 is a new development release.
|
||||
People who are concerned with stability and reliability should stick
|
||||
with a previous release or wait for Mesa 11.3.1.
|
||||
with a previous release or wait for Mesa 12.0.1.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.3.0 implements the OpenGL 4.3 API, but the version reported by
|
||||
Mesa 12.0.0 implements the OpenGL 4.3 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.3. OpenGL
|
@@ -19,17 +19,39 @@
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
# IN THE SOFTWARE.
|
||||
|
||||
git_sha1.h:
|
||||
.PHONY: git_sha1.h.tmp
|
||||
git_sha1.h.tmp:
|
||||
@# Don't assume that $(top_srcdir)/.git is a directory. It may be
|
||||
@# a gitlink file if $(top_srcdir) is a submodule checkout or a linked
|
||||
@# worktree.
|
||||
@# If we are building from a release tarball copy the bundled header.
|
||||
@if test -e $(top_srcdir)/.git; then \
|
||||
if which git > /dev/null; then \
|
||||
git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \
|
||||
sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
|
||||
> git_sha1.h ; \
|
||||
> git_sha1.h.tmp ; \
|
||||
fi \
|
||||
else \
|
||||
cp $(srcdir)/git_sha1.h git_sha1.h.tmp ;\
|
||||
chmod u+w git_sha1.h.tmp; \
|
||||
fi
|
||||
|
||||
git_sha1.h: git_sha1.h.tmp
|
||||
@echo "updating git_sha1.h"
|
||||
@if ! cmp -s git_sha1.h.tmp git_sha1.h; then \
|
||||
mv git_sha1.h.tmp git_sha1.h ;\
|
||||
else \
|
||||
rm git_sha1.h.tmp ;\
|
||||
fi
|
||||
|
||||
BUILT_SOURCES = git_sha1.h
|
||||
|
||||
# We want to keep the srcdir file since we need it on rebuild from tarball.
|
||||
# At the same time `make distclean' gets angry at us if we don't cleanup the
|
||||
# builddir one.
|
||||
distclean-local:
|
||||
test $(top_srcdir) != $(top_builddir) && rm $(builddir)/git_sha1.h
|
||||
|
||||
SUBDIRS = . gtest util mapi/glapi/gen mapi
|
||||
|
||||
# include only conditionally ?
|
||||
|
@@ -38,13 +38,14 @@ LOCAL_SRC_FILES := \
|
||||
$(LIBGLSL_FILES) \
|
||||
|
||||
LOCAL_C_INCLUDES := \
|
||||
$(MESA_TOP)/src/compiler/nir \
|
||||
$(MESA_TOP)/src/mapi \
|
||||
$(MESA_TOP)/src/mesa \
|
||||
$(MESA_TOP)/src/gallium/include \
|
||||
$(MESA_TOP)/src/gallium/auxiliary
|
||||
|
||||
LOCAL_STATIC_LIBRARIES := libmesa_compiler
|
||||
LOCAL_STATIC_LIBRARIES := \
|
||||
libmesa_compiler \
|
||||
libmesa_nir
|
||||
|
||||
LOCAL_MODULE := libmesa_glsl
|
||||
|
||||
|
@@ -86,4 +86,5 @@ EXTRA_DIST += \
|
||||
nir/nir_opcodes_c.py \
|
||||
nir/nir_opcodes_h.py \
|
||||
nir/nir_opt_algebraic.py \
|
||||
nir/tests
|
||||
nir/tests \
|
||||
SConscript.nir
|
||||
|
@@ -3442,11 +3442,11 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
|
||||
if (qual->flags.q.explicit_xfb_offset) {
|
||||
unsigned qual_xfb_offset;
|
||||
unsigned component_size = var->type->contains_double() ? 8 : 4;
|
||||
const glsl_type *t = get_varying_type(var, state->stage);
|
||||
|
||||
if (process_qualifier_constant(state, loc, "xfb_offset",
|
||||
qual->offset, &qual_xfb_offset) &&
|
||||
validate_xfb_offset_qualifier(loc, state, (int) qual_xfb_offset,
|
||||
t, component_size)) {
|
||||
var->type, component_size)) {
|
||||
var->data.offset = qual_xfb_offset;
|
||||
var->data.explicit_xfb_offset = true;
|
||||
}
|
||||
@@ -7336,6 +7336,12 @@ ast_interface_block::hir(exec_list *instructions,
|
||||
packing,
|
||||
this->block_name);
|
||||
|
||||
unsigned component_size = block_type->contains_double() ? 8 : 4;
|
||||
int xfb_offset =
|
||||
layout.flags.q.explicit_xfb_offset ? (int) qual_xfb_offset : -1;
|
||||
validate_xfb_offset_qualifier(&loc, state, xfb_offset, block_type,
|
||||
component_size);
|
||||
|
||||
if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) {
|
||||
YYLTYPE loc = this->get_location();
|
||||
_mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' "
|
||||
@@ -7474,13 +7480,6 @@ ast_interface_block::hir(exec_list *instructions,
|
||||
var_mode);
|
||||
}
|
||||
|
||||
unsigned component_size = block_type->contains_double() ? 8 : 4;
|
||||
int xfb_offset =
|
||||
layout.flags.q.explicit_xfb_offset ? (int) qual_xfb_offset : -1;
|
||||
const glsl_type *t = get_varying_type(var, state->stage);
|
||||
validate_xfb_offset_qualifier(&loc, state, xfb_offset, t,
|
||||
component_size);
|
||||
|
||||
var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED
|
||||
? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout;
|
||||
|
||||
@@ -7531,12 +7530,6 @@ ast_interface_block::hir(exec_list *instructions,
|
||||
*/
|
||||
assert(this->array_specifier == NULL);
|
||||
|
||||
unsigned component_size = block_type->contains_double() ? 8 : 4;
|
||||
int xfb_offset =
|
||||
layout.flags.q.explicit_xfb_offset ? (int) qual_xfb_offset : -1;
|
||||
validate_xfb_offset_qualifier(&loc, state, xfb_offset, block_type,
|
||||
component_size);
|
||||
|
||||
for (unsigned i = 0; i < num_variables; i++) {
|
||||
ir_variable *var =
|
||||
new(state) ir_variable(fields[i].type,
|
||||
|
@@ -622,7 +622,7 @@ builtin_variable_generator::generate_constants()
|
||||
/* Max uniforms/varyings: GLSL ES counts these in units of vectors; desktop
|
||||
* GL counts them in units of "components" or "floats".
|
||||
*/
|
||||
if (state->es_shader) {
|
||||
if (state->is_version(410, 100)) {
|
||||
add_const("gl_MaxVertexUniformVectors",
|
||||
state->Const.MaxVertexUniformComponents / 4);
|
||||
add_const("gl_MaxFragmentUniformVectors",
|
||||
@@ -1201,8 +1201,15 @@ builtin_variable_generator::generate_cs_special_vars()
|
||||
"gl_LocalInvocationID");
|
||||
add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID");
|
||||
add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups");
|
||||
add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
|
||||
add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
|
||||
if (state->ctx->Const.LowerCsDerivedVariables) {
|
||||
add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0);
|
||||
add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0);
|
||||
} else {
|
||||
add_system_value(SYSTEM_VALUE_GLOBAL_INVOCATION_ID,
|
||||
uvec3_t, "gl_GlobalInvocationID");
|
||||
add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_INDEX,
|
||||
uint_t, "gl_LocalInvocationIndex");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1431,16 +1438,16 @@ initialize_cs_derived_variables(gl_shader *shader,
|
||||
* These are initialized in the main function.
|
||||
*/
|
||||
void
|
||||
_mesa_glsl_initialize_derived_variables(gl_shader *shader)
|
||||
_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
|
||||
gl_shader *shader)
|
||||
{
|
||||
/* We only need to set CS variables currently. */
|
||||
if (shader->Stage != MESA_SHADER_COMPUTE)
|
||||
return;
|
||||
if (shader->Stage == MESA_SHADER_COMPUTE &&
|
||||
ctx->Const.LowerCsDerivedVariables) {
|
||||
ir_function_signature *const main_sig =
|
||||
_mesa_get_main_function_signature(shader);
|
||||
|
||||
ir_function_signature *const main_sig =
|
||||
_mesa_get_main_function_signature(shader);
|
||||
if (main_sig == NULL)
|
||||
return;
|
||||
|
||||
initialize_cs_derived_variables(shader, main_sig);
|
||||
if (main_sig != NULL)
|
||||
initialize_cs_derived_variables(shader, main_sig);
|
||||
}
|
||||
}
|
||||
|
@@ -1687,7 +1687,7 @@ set_shader_inout_layout(struct gl_shader *shader,
|
||||
shader->TessEval.PointMode = state->in_qualifier->point_mode;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
shader->Geom.VerticesOut = 0;
|
||||
shader->Geom.VerticesOut = -1;
|
||||
if (state->out_qualifier->flags.q.max_vertices) {
|
||||
unsigned qual_max_vertices;
|
||||
if (state->out_qualifier->max_vertices->
|
||||
@@ -1907,7 +1907,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_glsl_initialize_derived_variables(shader);
|
||||
_mesa_glsl_initialize_derived_variables(ctx, shader);
|
||||
|
||||
delete state->symbols;
|
||||
ralloc_free(state);
|
||||
|
@@ -2021,26 +2021,3 @@ mode_string(const ir_variable *var)
|
||||
assert(!"Should not get here.");
|
||||
return "invalid variable";
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the varying type stripped of the outermost array if we're processing
|
||||
* a stage whose varyings are arrays indexed by a vertex number (such as
|
||||
* geometry shader inputs).
|
||||
*/
|
||||
const glsl_type *
|
||||
get_varying_type(const ir_variable *var, gl_shader_stage stage)
|
||||
{
|
||||
const glsl_type *type = var->type;
|
||||
|
||||
if (!var->data.patch &&
|
||||
((var->data.mode == ir_var_shader_out &&
|
||||
stage == MESA_SHADER_TESS_CTRL) ||
|
||||
(var->data.mode == ir_var_shader_in &&
|
||||
(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
|
||||
stage == MESA_SHADER_GEOMETRY)))) {
|
||||
assert(type->is_array());
|
||||
type = type->fields.array;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
@@ -2562,7 +2562,8 @@ _mesa_glsl_initialize_variables(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state);
|
||||
|
||||
extern void
|
||||
_mesa_glsl_initialize_derived_variables(gl_shader *shader);
|
||||
_mesa_glsl_initialize_derived_variables(struct gl_context *ctx,
|
||||
gl_shader *shader);
|
||||
|
||||
extern void
|
||||
_mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state);
|
||||
@@ -2621,9 +2622,6 @@ is_gl_identifier(const char *s)
|
||||
return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_';
|
||||
}
|
||||
|
||||
const glsl_type *
|
||||
get_varying_type(const ir_variable *var, gl_shader_stage stage);
|
||||
|
||||
extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
|
||||
|
@@ -145,6 +145,8 @@ set_opaque_binding(void *mem_ctx, gl_shader_program *prog,
|
||||
storage->opaque[sh].active) {
|
||||
for (unsigned i = 0; i < elements; i++) {
|
||||
const unsigned index = storage->opaque[sh].index + i;
|
||||
if (index >= ARRAY_SIZE(shader->ImageUnits))
|
||||
break;
|
||||
shader->ImageUnits[index] = storage->storage[i].i;
|
||||
}
|
||||
}
|
||||
|
@@ -40,6 +40,29 @@
|
||||
#include "program.h"
|
||||
|
||||
|
||||
/**
|
||||
* Get the varying type stripped of the outermost array if we're processing
|
||||
* a stage whose varyings are arrays indexed by a vertex number (such as
|
||||
* geometry shader inputs).
|
||||
*/
|
||||
static const glsl_type *
|
||||
get_varying_type(const ir_variable *var, gl_shader_stage stage)
|
||||
{
|
||||
const glsl_type *type = var->type;
|
||||
|
||||
if (!var->data.patch &&
|
||||
((var->data.mode == ir_var_shader_out &&
|
||||
stage == MESA_SHADER_TESS_CTRL) ||
|
||||
(var->data.mode == ir_var_shader_in &&
|
||||
(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
|
||||
stage == MESA_SHADER_GEOMETRY)))) {
|
||||
assert(type->is_array());
|
||||
type = type->fields.array;
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
static void
|
||||
create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
|
||||
size_t name_length, unsigned *count,
|
||||
@@ -1094,21 +1117,23 @@ store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
|
||||
num_buffers++;
|
||||
buffer_stream_id = -1;
|
||||
continue;
|
||||
} else if (buffer_stream_id == -1) {
|
||||
/* First varying writing to this buffer: remember its stream */
|
||||
buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
|
||||
} else if (buffer_stream_id !=
|
||||
(int) tfeedback_decls[i].get_stream_id()) {
|
||||
/* Varying writes to the same buffer from a different stream */
|
||||
linker_error(prog,
|
||||
"Transform feedback can't capture varyings belonging "
|
||||
"to different vertex streams in a single buffer. "
|
||||
"Varying %s writes to buffer from stream %u, other "
|
||||
"varyings in the same buffer write from stream %u.",
|
||||
tfeedback_decls[i].name(),
|
||||
tfeedback_decls[i].get_stream_id(),
|
||||
buffer_stream_id);
|
||||
return false;
|
||||
} else if (tfeedback_decls[i].is_varying()) {
|
||||
if (buffer_stream_id == -1) {
|
||||
/* First varying writing to this buffer: remember its stream */
|
||||
buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
|
||||
} else if (buffer_stream_id !=
|
||||
(int) tfeedback_decls[i].get_stream_id()) {
|
||||
/* Varying writes to the same buffer from a different stream */
|
||||
linker_error(prog,
|
||||
"Transform feedback can't capture varyings belonging "
|
||||
"to different vertex streams in a single buffer. "
|
||||
"Varying %s writes to buffer from stream %u, other "
|
||||
"varyings in the same buffer write from stream %u.",
|
||||
tfeedback_decls[i].name(),
|
||||
tfeedback_decls[i].get_stream_id(),
|
||||
buffer_stream_id);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (has_xfb_qualifiers) {
|
||||
|
@@ -1980,7 +1980,7 @@ link_gs_inout_layout_qualifiers(struct gl_shader_program *prog,
|
||||
struct gl_shader **shader_list,
|
||||
unsigned num_shaders)
|
||||
{
|
||||
linked_shader->Geom.VerticesOut = 0;
|
||||
linked_shader->Geom.VerticesOut = -1;
|
||||
linked_shader->Geom.Invocations = 0;
|
||||
linked_shader->Geom.InputType = PRIM_UNKNOWN;
|
||||
linked_shader->Geom.OutputType = PRIM_UNKNOWN;
|
||||
@@ -2024,8 +2024,8 @@ link_gs_inout_layout_qualifiers(struct gl_shader_program *prog,
|
||||
linked_shader->Geom.OutputType = shader->Geom.OutputType;
|
||||
}
|
||||
|
||||
if (shader->Geom.VerticesOut != 0) {
|
||||
if (linked_shader->Geom.VerticesOut != 0 &&
|
||||
if (shader->Geom.VerticesOut != -1) {
|
||||
if (linked_shader->Geom.VerticesOut != -1 &&
|
||||
linked_shader->Geom.VerticesOut != shader->Geom.VerticesOut) {
|
||||
linker_error(prog, "geometry shader defined with conflicting "
|
||||
"output vertex count (%d and %d)\n",
|
||||
@@ -2067,7 +2067,7 @@ link_gs_inout_layout_qualifiers(struct gl_shader_program *prog,
|
||||
}
|
||||
prog->Geom.OutputType = linked_shader->Geom.OutputType;
|
||||
|
||||
if (linked_shader->Geom.VerticesOut == 0) {
|
||||
if (linked_shader->Geom.VerticesOut == -1) {
|
||||
linker_error(prog,
|
||||
"geometry shader didn't declare max_vertices\n");
|
||||
return;
|
||||
|
@@ -168,6 +168,7 @@ lower_distance_visitor::visit(ir_variable *ir)
|
||||
*new_var = ir->clone(ralloc_parent(ir), NULL);
|
||||
(*new_var)->name = ralloc_strdup(*new_var, GLSL_CLIP_VAR_NAME);
|
||||
(*new_var)->data.max_array_access = new_size - 1;
|
||||
(*new_var)->data.location = VARYING_SLOT_CLIP_DIST0;
|
||||
|
||||
if (!ir->type->fields.array->is_array()) {
|
||||
/* gl_ClipDistance (used for vertex, tessellation evaluation and
|
||||
|
@@ -1752,6 +1752,8 @@ nir_intrinsic_from_system_value(gl_system_value val)
|
||||
return nir_intrinsic_load_sample_mask_in;
|
||||
case SYSTEM_VALUE_LOCAL_INVOCATION_ID:
|
||||
return nir_intrinsic_load_local_invocation_id;
|
||||
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX:
|
||||
return nir_intrinsic_load_local_invocation_index;
|
||||
case SYSTEM_VALUE_WORK_GROUP_ID:
|
||||
return nir_intrinsic_load_work_group_id;
|
||||
case SYSTEM_VALUE_NUM_WORK_GROUPS:
|
||||
@@ -1801,6 +1803,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin)
|
||||
return SYSTEM_VALUE_SAMPLE_MASK_IN;
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
return SYSTEM_VALUE_LOCAL_INVOCATION_ID;
|
||||
case nir_intrinsic_load_local_invocation_index:
|
||||
return SYSTEM_VALUE_LOCAL_INVOCATION_INDEX;
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
return SYSTEM_VALUE_NUM_WORK_GROUPS;
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
|
@@ -1682,6 +1682,8 @@ typedef struct nir_shader_compiler_options {
|
||||
|
||||
/* Indicates that the driver only has zero-based vertex id */
|
||||
bool vertex_id_zero_based;
|
||||
|
||||
bool lower_cs_local_index_from_id;
|
||||
} nir_shader_compiler_options;
|
||||
|
||||
typedef struct nir_shader_info {
|
||||
|
@@ -44,6 +44,7 @@ gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader)
|
||||
case nir_intrinsic_load_primitive_id:
|
||||
case nir_intrinsic_load_invocation_id:
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
case nir_intrinsic_load_local_invocation_index:
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
case nir_intrinsic_load_num_work_groups:
|
||||
shader->info.system_values_read |=
|
||||
|
@@ -299,10 +299,12 @@ SYSTEM_VALUE(tess_level_outer, 4, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(tess_level_inner, 2, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(patch_vertices_in, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(local_invocation_id, 3, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(local_invocation_index, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(work_group_id, 3, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(user_clip_plane, 4, 1, UCP_ID, xx, xx)
|
||||
SYSTEM_VALUE(num_work_groups, 3, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(helper_invocation, 1, 0, xx, xx, xx)
|
||||
SYSTEM_VALUE(channel_num, 1, 0, xx, xx, xx)
|
||||
|
||||
/*
|
||||
* Load operations pull data from some piece of GPU memory. All load
|
||||
|
@@ -48,7 +48,7 @@ convert_block(nir_block *block, nir_builder *b)
|
||||
|
||||
b->cursor = nir_after_instr(&load_var->instr);
|
||||
|
||||
nir_ssa_def *sysval;
|
||||
nir_ssa_def *sysval = NULL;
|
||||
switch (var->data.location) {
|
||||
case SYSTEM_VALUE_GLOBAL_INVOCATION_ID: {
|
||||
/* From the GLSL man page for gl_GlobalInvocationID:
|
||||
@@ -74,6 +74,12 @@ convert_block(nir_block *block, nir_builder *b)
|
||||
}
|
||||
|
||||
case SYSTEM_VALUE_LOCAL_INVOCATION_INDEX: {
|
||||
/* If lower_cs_local_index_from_id is true, then we derive the local
|
||||
* index from the local id.
|
||||
*/
|
||||
if (!b->shader->options->lower_cs_local_index_from_id)
|
||||
break;
|
||||
|
||||
/* From the GLSL man page for gl_LocalInvocationIndex:
|
||||
*
|
||||
* "The value of gl_LocalInvocationIndex is equal to
|
||||
@@ -111,12 +117,14 @@ convert_block(nir_block *block, nir_builder *b)
|
||||
nir_load_system_value(b, nir_intrinsic_load_base_instance, 0));
|
||||
break;
|
||||
|
||||
default: {
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (sysval == NULL) {
|
||||
nir_intrinsic_op sysval_op =
|
||||
nir_intrinsic_from_system_value(var->data.location);
|
||||
sysval = nir_load_system_value(b, sysval_op, 0);
|
||||
break;
|
||||
} /* default */
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval));
|
||||
|
@@ -331,7 +331,9 @@ validate_alu_dest(nir_alu_instr *instr, validate_state *state)
|
||||
* destinations of type float
|
||||
*/
|
||||
nir_alu_instr *alu = nir_instr_as_alu(state->instr);
|
||||
validate_assert(state, nir_op_infos[alu->op].output_type == nir_type_float ||
|
||||
validate_assert(state,
|
||||
(nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) ==
|
||||
nir_type_float) ||
|
||||
!dest->saturate);
|
||||
|
||||
unsigned bit_size = dest->dest.is_ssa ? dest->dest.ssa.bit_size
|
||||
|
@@ -814,10 +814,6 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
|
||||
dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER;
|
||||
|
||||
dri2_dpy->extensions[0] = &droid_image_loader_extension.base;
|
||||
dri2_dpy->extensions[1] = &use_invalidate.base;
|
||||
dri2_dpy->extensions[2] = &image_lookup_extension.base;
|
||||
|
||||
/* render nodes cannot use Gem names, and thus do not support
|
||||
* the __DRI_DRI2_LOADER extension */
|
||||
if (!dri2_dpy->is_render_node) {
|
||||
@@ -827,10 +823,13 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
dri2_dpy->dri2_loader_extension.flushFrontBuffer = droid_flush_front_buffer;
|
||||
dri2_dpy->dri2_loader_extension.getBuffersWithFormat =
|
||||
droid_get_buffers_with_format;
|
||||
dri2_dpy->extensions[3] = &dri2_dpy->dri2_loader_extension.base;
|
||||
dri2_dpy->extensions[4] = NULL;
|
||||
} else
|
||||
dri2_dpy->extensions[3] = NULL;
|
||||
dri2_dpy->extensions[0] = &dri2_dpy->dri2_loader_extension.base;
|
||||
} else {
|
||||
dri2_dpy->extensions[0] = &droid_image_loader_extension.base;
|
||||
}
|
||||
dri2_dpy->extensions[1] = &use_invalidate.base;
|
||||
dri2_dpy->extensions[2] = &image_lookup_extension.base;
|
||||
dri2_dpy->extensions[3] = NULL;
|
||||
|
||||
|
||||
if (!dri2_create_screen(dpy)) {
|
||||
|
@@ -1985,6 +1985,10 @@ CodeEmitterNVC0::emitMOV(const Instruction *i)
|
||||
opc |= i->lanes << 5;
|
||||
|
||||
emitForm_B(i, opc);
|
||||
|
||||
// Explicitly emit the predicate source as emitForm_B skips it.
|
||||
if (i->src(0).getFile() == FILE_PREDICATE)
|
||||
srcId(i->src(0), 20);
|
||||
} else {
|
||||
uint32_t imm;
|
||||
|
||||
|
@@ -365,6 +365,12 @@ int BitSet::findFreeRange(unsigned int count) const
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// If we couldn't find a position, we can have a left-over -1 in pos. Make
|
||||
// sure to abort in such a case.
|
||||
if (pos < 0)
|
||||
return -1;
|
||||
|
||||
pos += i * 32;
|
||||
|
||||
return ((pos + count) <= size) ? pos : -1;
|
||||
|
@@ -298,7 +298,7 @@ int rvid_get_video_param(struct pipe_screen *screen,
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_MAIN:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH:
|
||||
return 41;
|
||||
return (rscreen->family < CHIP_TONGA) ? 41 : 52;
|
||||
case PIPE_VIDEO_PROFILE_HEVC_MAIN:
|
||||
case PIPE_VIDEO_PROFILE_HEVC_MAIN_10:
|
||||
return 186;
|
||||
|
@@ -3692,7 +3692,10 @@ static void si_init_config(struct si_context *sctx)
|
||||
raster_config_1 = 0x0000002a;
|
||||
break;
|
||||
case CHIP_ICELAND:
|
||||
raster_config = 0x00000002;
|
||||
if (num_rb == 1)
|
||||
raster_config = 0x00000000;
|
||||
else
|
||||
raster_config = 0x00000002;
|
||||
raster_config_1 = 0x00000000;
|
||||
break;
|
||||
case CHIP_CARRIZO:
|
||||
|
82
src/intel/genxml/Android.mk
Normal file
82
src/intel/genxml/Android.mk
Normal file
@@ -0,0 +1,82 @@
|
||||
# Copyright © 2016 Intel Corporation
|
||||
# Copyright © 2016 Mauro Rossi <issor.oruam@gmail.com>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
LOCAL_PATH := $(call my-dir)
|
||||
|
||||
# Import variable GENERATED_FILES.
|
||||
include $(LOCAL_PATH)/Makefile.sources
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_MODULE := libmesa_genxml
|
||||
|
||||
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
|
||||
|
||||
intermediates := $(call local-generated-sources-dir)
|
||||
|
||||
# dummy.c source file is generated to meet the build system's rules.
|
||||
LOCAL_GENERATED_SOURCES += $(intermediates)/dummy.c
|
||||
|
||||
$(intermediates)/dummy.c:
|
||||
@mkdir -p $(dir $@)
|
||||
@echo "Gen Dummy: $(PRIVATE_MODULE) <= $(notdir $(@))"
|
||||
$(hide) touch $@
|
||||
|
||||
# This is the list of auto-generated files headers
|
||||
LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/genxml/, $(GENXML_GENERATED_FILES))
|
||||
|
||||
define header-gen
|
||||
@mkdir -p $(dir $@)
|
||||
@echo "Gen Header: $(PRIVATE_MODULE) <= $(notdir $(@))"
|
||||
$(hide) $(PRIVATE_SCRIPT) $(PRIVATE_XML) > $@
|
||||
endef
|
||||
|
||||
$(intermediates)/genxml/gen6_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(intermediates)/genxml/gen6_pack.h: PRIVATE_XML := $(LOCAL_PATH)/gen6.xml
|
||||
$(intermediates)/genxml/gen6_pack.h: $(LOCAL_PATH)/gen6.xml $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(call header-gen)
|
||||
|
||||
$(intermediates)/genxml/gen7_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(intermediates)/genxml/gen7_pack.h: PRIVATE_XML := $(LOCAL_PATH)/gen7.xml
|
||||
$(intermediates)/genxml/gen7_pack.h: $(LOCAL_PATH)/gen7.xml $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(call header-gen)
|
||||
|
||||
$(intermediates)/genxml/gen75_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(intermediates)/genxml/gen75_pack.h: PRIVATE_XML := $(LOCAL_PATH)/gen75.xml
|
||||
$(intermediates)/genxml/gen75_pack.h: $(LOCAL_PATH)/gen75.xml $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(call header-gen)
|
||||
|
||||
$(intermediates)/genxml/gen8_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(intermediates)/genxml/gen8_pack.h: PRIVATE_XML := $(LOCAL_PATH)/gen8.xml
|
||||
$(intermediates)/genxml/gen8_pack.h: $(LOCAL_PATH)/gen8.xml $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(call header-gen)
|
||||
|
||||
$(intermediates)/genxml/gen9_pack.h: PRIVATE_SCRIPT := $(MESA_PYTHON2) $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(intermediates)/genxml/gen9_pack.h: PRIVATE_XML := $(LOCAL_PATH)/gen9.xml
|
||||
$(intermediates)/genxml/gen9_pack.h: $(LOCAL_PATH)/gen9.xml $(LOCAL_PATH)/gen_pack_header.py
|
||||
$(call header-gen)
|
||||
|
||||
LOCAL_EXPORT_C_INCLUDE_DIRS := \
|
||||
$(MESA_TOP)/src/intel \
|
||||
$(intermediates)
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
@@ -23,14 +23,14 @@ include Makefile.sources
|
||||
|
||||
BUILT_SOURCES = $(GENXML_GENERATED_FILES)
|
||||
|
||||
PYTHON3_GEN = $(AM_V_GEN)$(PYTHON3) $(PYTHON_FLAGS)
|
||||
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
|
||||
|
||||
SUFFIXES = _pack.h .xml
|
||||
|
||||
$(BUILT_SOURCES): gen_pack_header.py
|
||||
|
||||
.xml_pack.h:
|
||||
$(PYTHON3_GEN) $(srcdir)/gen_pack_header.py $< > $@
|
||||
$(PYTHON_GEN) $(srcdir)/gen_pack_header.py $< > $@
|
||||
|
||||
CLEANFILES = $(BUILT_SOURCES)
|
||||
|
||||
|
@@ -1,5 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
#!/usr/bin/env python2
|
||||
#encoding=utf-8
|
||||
|
||||
from __future__ import (
|
||||
absolute_import, division, print_function, unicode_literals
|
||||
)
|
||||
import xml.parsers.expat
|
||||
import re
|
||||
import sys
|
||||
@@ -197,7 +201,7 @@ def to_alphanum(name):
|
||||
|
||||
def safe_name(name):
|
||||
name = to_alphanum(name)
|
||||
if not str.isalpha(name[0]):
|
||||
if not name[0].isalpha():
|
||||
name = '_' + name
|
||||
|
||||
return name
|
||||
@@ -209,9 +213,9 @@ def num_from_str(num_str):
|
||||
assert(not num_str.startswith('0') and 'octals numbers not allowed')
|
||||
return int(num_str)
|
||||
|
||||
class Field:
|
||||
ufixed_pattern = re.compile("u(\d+)\.(\d+)")
|
||||
sfixed_pattern = re.compile("s(\d+)\.(\d+)")
|
||||
class Field(object):
|
||||
ufixed_pattern = re.compile(r"u(\d+)\.(\d+)")
|
||||
sfixed_pattern = re.compile(r"s(\d+)\.(\d+)")
|
||||
|
||||
def __init__(self, parser, attrs):
|
||||
self.parser = parser
|
||||
@@ -278,7 +282,7 @@ class Field:
|
||||
for value in self.values:
|
||||
print("#define %-40s %d" % (prefix + value.name, value.value))
|
||||
|
||||
class Group:
|
||||
class Group(object):
|
||||
def __init__(self, parser, parent, start, count, size):
|
||||
self.parser = parser
|
||||
self.parent = parent
|
||||
@@ -466,12 +470,12 @@ class Group:
|
||||
print(" dw[%d] = %s;" % (index, v))
|
||||
print(" dw[%d] = %s >> 32;" % (index + 1, v))
|
||||
|
||||
class Value:
|
||||
class Value(object):
|
||||
def __init__(self, attrs):
|
||||
self.name = safe_name(attrs["name"])
|
||||
self.value = int(attrs["value"])
|
||||
|
||||
class Parser:
|
||||
class Parser(object):
|
||||
def __init__(self):
|
||||
self.parser = xml.parsers.expat.ParserCreate()
|
||||
self.parser.StartElementHandler = self.start_element
|
||||
|
155
src/intel/isl/Android.mk
Normal file
155
src/intel/isl/Android.mk
Normal file
@@ -0,0 +1,155 @@
|
||||
# Copyright © 2016 Intel Corporation
|
||||
# Copyright © 2016 Mauro Rossi <issor.oruam@gmail.com>
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
#
|
||||
|
||||
LOCAL_PATH := $(call my-dir)
|
||||
|
||||
# Import variables LIBISL_FILES.
|
||||
include $(LOCAL_PATH)/Makefile.sources
|
||||
|
||||
LIBISL_GENX_COMMON_INCLUDES := \
|
||||
$(MESA_TOP)/src/ \
|
||||
$(MESA_TOP)/src/mesa/drivers/dri/i965
|
||||
|
||||
# ---------------------------------------
|
||||
# Build libisl_gen7
|
||||
# ---------------------------------------
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_MODULE := libmesa_isl_gen7
|
||||
|
||||
LOCAL_SRC_FILES := $(ISL_GEN7_FILES)
|
||||
|
||||
LOCAL_CFLAGS := -DGEN_VERSIONx10=70
|
||||
|
||||
LOCAL_C_INCLUDES := $(LIBISL_GENX_COMMON_INCLUDES)
|
||||
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
|
||||
# ---------------------------------------
|
||||
# Build libisl_gen75
|
||||
# ---------------------------------------
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_MODULE := libmesa_isl_gen75
|
||||
|
||||
LOCAL_SRC_FILES := $(ISL_GEN75_FILES)
|
||||
|
||||
LOCAL_CFLAGS := -DGEN_VERSIONx10=75
|
||||
|
||||
LOCAL_C_INCLUDES := $(LIBISL_GENX_COMMON_INCLUDES)
|
||||
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
|
||||
# ---------------------------------------
|
||||
# Build libisl_gen8
|
||||
# ---------------------------------------
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_MODULE := libmesa_isl_gen8
|
||||
|
||||
LOCAL_SRC_FILES := $(ISL_GEN8_FILES)
|
||||
|
||||
LOCAL_CFLAGS := -DGEN_VERSIONx10=80
|
||||
|
||||
LOCAL_C_INCLUDES := $(LIBISL_GENX_COMMON_INCLUDES)
|
||||
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
|
||||
# ---------------------------------------
|
||||
# Build libisl_gen9
|
||||
# ---------------------------------------
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_MODULE := libmesa_isl_gen9
|
||||
|
||||
LOCAL_SRC_FILES := $(ISL_GEN9_FILES)
|
||||
|
||||
LOCAL_CFLAGS := -DGEN_VERSIONx10=90
|
||||
|
||||
LOCAL_C_INCLUDES := $(LIBISL_GENX_COMMON_INCLUDES)
|
||||
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := libmesa_genxml
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
|
||||
# ---------------------------------------
|
||||
# Build libisl
|
||||
# ---------------------------------------
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_MODULE := libmesa_isl
|
||||
|
||||
LOCAL_SRC_FILES := $(ISL_FILES)
|
||||
|
||||
LOCAL_C_INCLUDES := \
|
||||
$(MESA_TOP)/src/mapi \
|
||||
$(MESA_TOP)/src/mesa \
|
||||
$(MESA_TOP)/src/mesa/drivers/dri/i965 \
|
||||
|
||||
LOCAL_EXPORT_C_INCLUDE_DIRS := $(MESA_TOP)/src/intel
|
||||
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := \
|
||||
libmesa_isl_gen7 \
|
||||
libmesa_isl_gen75 \
|
||||
libmesa_isl_gen8 \
|
||||
libmesa_isl_gen9
|
||||
|
||||
# Autogenerated sources
|
||||
|
||||
LOCAL_MODULE_CLASS := STATIC_LIBRARIES
|
||||
|
||||
intermediates := $(call local-generated-sources-dir)
|
||||
|
||||
LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, $(ISL_GENERATED_FILES))
|
||||
|
||||
define bash-gen
|
||||
@mkdir -p $(dir $@)
|
||||
@echo "Gen Bash: $(PRIVATE_MODULE) <= $(notdir $(@))"
|
||||
$(hide) $(PRIVATE_SCRIPT) < $(PRIVATE_CSV) > $@
|
||||
endef
|
||||
|
||||
isl_format_layout_deps := \
|
||||
$(LOCAL_PATH)/isl_format_layout_gen.bash \
|
||||
$(LOCAL_PATH)/isl_format_layout.csv
|
||||
|
||||
$(intermediates)/isl_format_layout.c: PRIVATE_SCRIPT := bash -c $(LOCAL_PATH)/isl_format_layout_gen.bash
|
||||
$(intermediates)/isl_format_layout.c: PRIVATE_CSV := $(LOCAL_PATH)/isl_format_layout.csv
|
||||
$(intermediates)/isl_format_layout.c: $(isl_format_layout_deps)
|
||||
$(call bash-gen)
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
@@ -50,7 +50,7 @@ libisl_la_CFLAGS = $(CFLAGS) -Wno-override-init
|
||||
|
||||
libisl_la_LIBADD = $(ISL_GEN_LIBS)
|
||||
|
||||
libisl_la_SOURCES = $(ISL_FILES)
|
||||
libisl_la_SOURCES = $(ISL_FILES) $(ISL_GENERATED_FILES)
|
||||
|
||||
libisl_gen7_la_SOURCES = $(ISL_GEN7_FILES)
|
||||
libisl_gen7_la_CFLAGS = $(libisl_la_CFLAGS) -DGEN_VERSIONx10=70
|
||||
|
@@ -2,7 +2,6 @@ ISL_FILES = \
|
||||
isl.c \
|
||||
isl.h \
|
||||
isl_format.c \
|
||||
isl_format_layout.c \
|
||||
isl_gen4.c \
|
||||
isl_gen4.h \
|
||||
isl_gen6.c \
|
||||
|
@@ -1065,24 +1065,14 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
|
||||
const unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
const unsigned push_constant_data_size =
|
||||
(local_id_dwords + prog_data->nr_params) * 4;
|
||||
const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
const unsigned param_aligned_count =
|
||||
reg_aligned_constant_size / sizeof(uint32_t);
|
||||
|
||||
/* If we don't actually have any push constants, bail. */
|
||||
if (reg_aligned_constant_size == 0)
|
||||
if (cs_prog_data->push.total.size == 0)
|
||||
return (struct anv_state) { .offset = 0 };
|
||||
|
||||
const unsigned threads = pipeline->cs_thread_width_max;
|
||||
const unsigned total_push_constants_size =
|
||||
reg_aligned_constant_size * threads;
|
||||
const unsigned push_constant_alignment =
|
||||
cmd_buffer->device->info.gen < 8 ? 32 : 64;
|
||||
const unsigned aligned_total_push_constants_size =
|
||||
ALIGN(total_push_constants_size, push_constant_alignment);
|
||||
ALIGN(cs_prog_data->push.total.size, push_constant_alignment);
|
||||
struct anv_state state =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
|
||||
aligned_total_push_constants_size,
|
||||
@@ -1091,21 +1081,33 @@ anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer)
|
||||
/* Walk through the param array and fill the buffer with data */
|
||||
uint32_t *u32_map = state.map;
|
||||
|
||||
brw_cs_fill_local_id_payload(cs_prog_data, u32_map, threads,
|
||||
reg_aligned_constant_size);
|
||||
|
||||
/* Setup uniform data for the first thread */
|
||||
for (unsigned i = 0; i < prog_data->nr_params; i++) {
|
||||
uint32_t offset = (uintptr_t)prog_data->param[i];
|
||||
u32_map[local_id_dwords + i] = *(uint32_t *)((uint8_t *)data + offset);
|
||||
if (cs_prog_data->push.cross_thread.size > 0) {
|
||||
assert(cs_prog_data->thread_local_id_index < 0 ||
|
||||
cs_prog_data->thread_local_id_index >=
|
||||
cs_prog_data->push.cross_thread.dwords);
|
||||
for (unsigned i = 0;
|
||||
i < cs_prog_data->push.cross_thread.dwords;
|
||||
i++) {
|
||||
uint32_t offset = (uintptr_t)prog_data->param[i];
|
||||
u32_map[i] = *(uint32_t *)((uint8_t *)data + offset);
|
||||
}
|
||||
}
|
||||
|
||||
/* Copy uniform data from the first thread to every other thread */
|
||||
const size_t uniform_data_size = prog_data->nr_params * sizeof(uint32_t);
|
||||
for (unsigned t = 1; t < threads; t++) {
|
||||
memcpy(&u32_map[t * param_aligned_count + local_id_dwords],
|
||||
&u32_map[local_id_dwords],
|
||||
uniform_data_size);
|
||||
if (cs_prog_data->push.per_thread.size > 0) {
|
||||
for (unsigned t = 0; t < cs_prog_data->threads; t++) {
|
||||
unsigned dst =
|
||||
8 * (cs_prog_data->push.per_thread.regs * t +
|
||||
cs_prog_data->push.cross_thread.regs);
|
||||
unsigned src = cs_prog_data->push.cross_thread.dwords;
|
||||
for ( ; src < prog_data->nr_params; src++, dst++) {
|
||||
if (src != cs_prog_data->thread_local_id_index) {
|
||||
uint32_t offset = (uintptr_t)prog_data->param[src];
|
||||
u32_map[dst] = *(uint32_t *)((uint8_t *)data + offset);
|
||||
} else {
|
||||
u32_map[dst] = t * cs_prog_data->simd_size;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!cmd_buffer->device->info.has_llc)
|
||||
|
@@ -51,6 +51,20 @@ def hash(name):
|
||||
|
||||
return h
|
||||
|
||||
def print_guard_start(name):
|
||||
if "Wayland" in name:
|
||||
print "#ifdef VK_USE_PLATFORM_WAYLAND_KHR"
|
||||
if "Xcb" in name:
|
||||
print "#ifdef VK_USE_PLATFORM_XCB_KHR"
|
||||
return
|
||||
|
||||
def print_guard_end(name):
|
||||
if "Wayland" in name:
|
||||
print "#endif // VK_USE_PLATFORM_WAYLAND_KHR"
|
||||
if "Xcb" in name:
|
||||
print "#endif // VK_USE_PLATFORM_XCB_KHR"
|
||||
return
|
||||
|
||||
opt_header = False
|
||||
opt_code = False
|
||||
|
||||
@@ -86,7 +100,9 @@ if opt_header:
|
||||
print " struct {"
|
||||
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print_guard_start(name)
|
||||
print " %s (*%s)%s;" % (type, name, args)
|
||||
print_guard_end(name)
|
||||
print " };\n"
|
||||
print " };\n"
|
||||
print "};\n"
|
||||
@@ -94,12 +110,14 @@ if opt_header:
|
||||
print "void anv_set_dispatch_devinfo(const struct brw_device_info *info);\n"
|
||||
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print_guard_start(name)
|
||||
print "%s anv_%s%s;" % (type, name, args)
|
||||
print "%s gen7_%s%s;" % (type, name, args)
|
||||
print "%s gen75_%s%s;" % (type, name, args)
|
||||
print "%s gen8_%s%s;" % (type, name, args)
|
||||
print "%s gen9_%s%s;" % (type, name, args)
|
||||
print "%s anv_validate_%s%s;" % (type, name, args)
|
||||
print_guard_end(name)
|
||||
exit()
|
||||
|
||||
|
||||
@@ -146,9 +164,11 @@ static const char strings[] ="""
|
||||
offsets = []
|
||||
i = 0;
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print_guard_start(name)
|
||||
print " \"vk%s\\0\"" % name
|
||||
offsets.append(i)
|
||||
i += 2 + len(name) + 1
|
||||
print_guard_end(name)
|
||||
print """ ;
|
||||
|
||||
/* Weak aliases for all potential validate functions. These will resolve to
|
||||
@@ -162,15 +182,21 @@ print """ ;
|
||||
|
||||
print "\nstatic const struct anv_entrypoint entrypoints[] = {"
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print_guard_start(name)
|
||||
print " { %5d, 0x%08x }," % (offsets[num], h)
|
||||
print_guard_end(name)
|
||||
print "};\n"
|
||||
|
||||
for layer in [ "anv", "validate", "gen7", "gen75", "gen8", "gen9" ]:
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print_guard_start(name)
|
||||
print "%s %s_%s%s __attribute__ ((weak));" % (type, layer, name, args)
|
||||
print_guard_end(name)
|
||||
print "\nconst struct anv_dispatch_table %s_layer = {" % layer
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print_guard_start(name)
|
||||
print " .%s = %s_%s," % (name, layer, name)
|
||||
print_guard_end(name)
|
||||
print "};\n"
|
||||
|
||||
print """
|
||||
@@ -242,8 +268,10 @@ anv_resolve_entrypoint(uint32_t index)
|
||||
# lets the resolver look it up in the table.
|
||||
|
||||
for type, name, args, num, h in entrypoints:
|
||||
print_guard_start(name)
|
||||
print "static void *resolve_%s(void) { return anv_resolve_entrypoint(%d); }" % (name, num)
|
||||
print "%s vk%s%s\n __attribute__ ((ifunc (\"resolve_%s\"), visibility (\"default\")));\n" % (type, name, args, name)
|
||||
print_guard_end(name)
|
||||
|
||||
|
||||
# Now generate the hash table used for entry point look up. This is a
|
||||
|
@@ -338,6 +338,10 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
||||
pipeline->needs_data_cache = true;
|
||||
}
|
||||
|
||||
if (stage == MESA_SHADER_COMPUTE)
|
||||
((struct brw_cs_prog_data *)prog_data)->thread_local_id_index =
|
||||
prog_data->nr_params++; /* The CS Thread ID uniform */
|
||||
|
||||
if (nir->info.num_ssbos > 0)
|
||||
pipeline->needs_data_cache = true;
|
||||
|
||||
|
@@ -1474,7 +1474,6 @@ struct anv_pipeline {
|
||||
bool primitive_restart;
|
||||
uint32_t topology;
|
||||
|
||||
uint32_t cs_thread_width_max;
|
||||
uint32_t cs_right_mask;
|
||||
|
||||
struct {
|
||||
|
@@ -234,12 +234,6 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
|
||||
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
unsigned push_constant_data_size =
|
||||
(prog_data->nr_params + local_id_dwords) * 4;
|
||||
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
unsigned push_constant_regs = reg_aligned_constant_size / 32;
|
||||
|
||||
if (push_state.alloc_size) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
||||
curbe.CURBETotalDataLength = push_state.alloc_size;
|
||||
@@ -264,14 +258,17 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
||||
.BindingTablePointer = surfaces.offset,
|
||||
.SamplerStatePointer = samplers.offset,
|
||||
.ConstantURBEntryReadLength =
|
||||
push_constant_regs,
|
||||
#if !GEN_IS_HASWELL
|
||||
cs_prog_data->push.per_thread.regs,
|
||||
#if GEN_IS_HASWELL
|
||||
.CrossThreadConstantDataReadLength =
|
||||
cs_prog_data->push.cross_thread.regs,
|
||||
#else
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
#endif
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize = slm_size,
|
||||
.NumberofThreadsinGPGPUThreadGroup =
|
||||
pipeline->cs_thread_width_max);
|
||||
cs_prog_data->threads);
|
||||
|
||||
const uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
|
@@ -319,12 +319,6 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
|
||||
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
unsigned push_constant_data_size =
|
||||
(prog_data->nr_params + local_id_dwords) * 4;
|
||||
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
unsigned push_constant_regs = reg_aligned_constant_size / 32;
|
||||
|
||||
if (push_state.alloc_size) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MEDIA_CURBE_LOAD), curbe) {
|
||||
curbe.CURBETotalDataLength = push_state.alloc_size;
|
||||
@@ -351,12 +345,15 @@ flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
|
||||
.BindingTableEntryCount = 0,
|
||||
.SamplerStatePointer = samplers.offset,
|
||||
.SamplerCount = 0,
|
||||
.ConstantIndirectURBEntryReadLength = push_constant_regs,
|
||||
.ConstantIndirectURBEntryReadLength =
|
||||
cs_prog_data->push.per_thread.regs,
|
||||
.ConstantURBEntryReadOffset = 0,
|
||||
.BarrierEnable = cs_prog_data->uses_barrier,
|
||||
.SharedLocalMemorySize = slm_size,
|
||||
.NumberofThreadsinGPGPUThreadGroup =
|
||||
pipeline->cs_thread_width_max);
|
||||
cs_prog_data->threads,
|
||||
.CrossThreadConstantDataReadLength =
|
||||
cs_prog_data->push.cross_thread.regs);
|
||||
|
||||
uint32_t size = GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t);
|
||||
anv_batch_emit(&cmd_buffer->batch,
|
||||
|
@@ -773,7 +773,7 @@ void genX(CmdDispatch)(
|
||||
ggw.SIMDSize = prog_data->simd_size / 16;
|
||||
ggw.ThreadDepthCounterMaximum = 0;
|
||||
ggw.ThreadHeightCounterMaximum = 0;
|
||||
ggw.ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1;
|
||||
ggw.ThreadWidthCounterMaximum = prog_data->threads - 1;
|
||||
ggw.ThreadGroupIDXDimension = x;
|
||||
ggw.ThreadGroupIDYDimension = y;
|
||||
ggw.ThreadGroupIDZDimension = z;
|
||||
@@ -874,7 +874,7 @@ void genX(CmdDispatchIndirect)(
|
||||
ggw.SIMDSize = prog_data->simd_size / 16;
|
||||
ggw.ThreadDepthCounterMaximum = 0;
|
||||
ggw.ThreadHeightCounterMaximum = 0;
|
||||
ggw.ThreadWidthCounterMaximum = pipeline->cs_thread_width_max - 1;
|
||||
ggw.ThreadWidthCounterMaximum = prog_data->threads - 1;
|
||||
ggw.RightExecutionMask = pipeline->cs_right_mask;
|
||||
ggw.BottomExecutionMask = 0xffffffff;
|
||||
}
|
||||
|
@@ -87,18 +87,9 @@ genX(compute_pipeline_create)(
|
||||
anv_setup_pipeline_l3_config(pipeline);
|
||||
|
||||
const struct brw_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
|
||||
const struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
|
||||
unsigned local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
unsigned push_constant_data_size =
|
||||
(prog_data->nr_params + local_id_dwords) * 4;
|
||||
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
unsigned push_constant_regs = reg_aligned_constant_size / 32;
|
||||
|
||||
uint32_t group_size = cs_prog_data->local_size[0] *
|
||||
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
|
||||
pipeline->cs_thread_width_max =
|
||||
DIV_ROUND_UP(group_size, cs_prog_data->simd_size);
|
||||
uint32_t remainder = group_size & (cs_prog_data->simd_size - 1);
|
||||
|
||||
if (remainder > 0)
|
||||
@@ -107,7 +98,8 @@ genX(compute_pipeline_create)(
|
||||
pipeline->cs_right_mask = ~0u >> (32 - cs_prog_data->simd_size);
|
||||
|
||||
const uint32_t vfe_curbe_allocation =
|
||||
push_constant_regs * pipeline->cs_thread_width_max;
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
|
||||
cs_prog_data->push.cross_thread.regs, 2);
|
||||
|
||||
anv_batch_emit(&pipeline->batch, GENX(MEDIA_VFE_STATE), vfe) {
|
||||
vfe.ScratchSpaceBasePointer = pipeline->scratch_start[MESA_SHADER_COMPUTE];
|
||||
|
@@ -52,7 +52,8 @@ LOCAL_SRC_FILES := \
|
||||
$(i965_FILES)
|
||||
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := \
|
||||
$(MESA_DRI_WHOLE_STATIC_LIBRARIES)
|
||||
$(MESA_DRI_WHOLE_STATIC_LIBRARIES) \
|
||||
libmesa_isl
|
||||
|
||||
LOCAL_SHARED_LIBRARIES := \
|
||||
$(MESA_DRI_SHARED_LIBRARIES) \
|
||||
|
@@ -46,6 +46,7 @@ i965_compiler_FILES = \
|
||||
brw_nir.c \
|
||||
brw_nir_analyze_boolean_resolves.c \
|
||||
brw_nir_attribute_workarounds.c \
|
||||
brw_nir_intrinsics.c \
|
||||
brw_nir_opt_peephole_ffma.c \
|
||||
brw_packed_float.c \
|
||||
brw_predicated_break.cpp \
|
||||
|
@@ -424,15 +424,28 @@ struct brw_wm_prog_data {
|
||||
int urb_setup[VARYING_SLOT_MAX];
|
||||
};
|
||||
|
||||
struct brw_push_const_block {
|
||||
unsigned dwords; /* Dword count, not reg aligned */
|
||||
unsigned regs;
|
||||
unsigned size; /* Bytes, register aligned */
|
||||
};
|
||||
|
||||
struct brw_cs_prog_data {
|
||||
struct brw_stage_prog_data base;
|
||||
|
||||
GLuint dispatch_grf_start_reg_16;
|
||||
unsigned local_size[3];
|
||||
unsigned simd_size;
|
||||
unsigned threads;
|
||||
bool uses_barrier;
|
||||
bool uses_num_work_groups;
|
||||
unsigned local_invocation_id_regs;
|
||||
int thread_local_id_index;
|
||||
|
||||
struct {
|
||||
struct brw_push_const_block cross_thread;
|
||||
struct brw_push_const_block per_thread;
|
||||
struct brw_push_const_block total;
|
||||
} push;
|
||||
|
||||
struct {
|
||||
/** @{
|
||||
@@ -817,13 +830,6 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
||||
unsigned *final_assembly_size,
|
||||
char **error_str);
|
||||
|
||||
/**
|
||||
* Fill out local id payload for compute shader according to cs_prog_data.
|
||||
*/
|
||||
void
|
||||
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *cs_prog_data,
|
||||
void *buffer, uint32_t threads, uint32_t stride);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
@@ -70,6 +70,7 @@
|
||||
#include "tnl/t_pipeline.h"
|
||||
#include "util/ralloc.h"
|
||||
#include "util/debug.h"
|
||||
#include "isl/isl.h"
|
||||
|
||||
/***************************************
|
||||
* Mesa's Driver Functions
|
||||
@@ -166,6 +167,38 @@ intel_update_framebuffer(struct gl_context *ctx,
|
||||
fb->DefaultGeometry.NumSamples);
|
||||
}
|
||||
|
||||
/* On Gen9 color buffers may be compressed by the hardware (lossless
|
||||
* compression). There are, however, format restrictions and care needs to be
|
||||
* taken that the sampler engine is capable for re-interpreting a buffer with
|
||||
* format different the buffer was originally written with.
|
||||
*
|
||||
* For example, SRGB formats are not compressible and the sampler engine isn't
|
||||
* capable of treating RGBA_UNORM as SRGB_ALPHA. In such a case the underlying
|
||||
* color buffer needs to be resolved so that the sampling surface can be
|
||||
* sampled as non-compressed (i.e., without the auxiliary MCS buffer being
|
||||
* set).
|
||||
*/
|
||||
static bool
|
||||
intel_texture_view_requires_resolve(struct brw_context *brw,
|
||||
struct intel_texture_object *intel_tex)
|
||||
{
|
||||
if (brw->gen < 9 ||
|
||||
!intel_miptree_is_lossless_compressed(brw, intel_tex->mt))
|
||||
return false;
|
||||
|
||||
const uint32_t brw_format = brw_format_for_mesa_format(intel_tex->_Format);
|
||||
|
||||
if (isl_format_supports_lossless_compression(brw->intelScreen->devinfo,
|
||||
brw_format))
|
||||
return false;
|
||||
|
||||
perf_debug("Incompatible sampling format (%s) for rbc (%s)\n",
|
||||
_mesa_get_format_name(intel_tex->_Format),
|
||||
_mesa_get_format_name(intel_tex->mt->format));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_update_state(struct gl_context * ctx, GLuint new_state)
|
||||
{
|
||||
@@ -198,8 +231,9 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
|
||||
/* Sampling engine understands lossless compression and resolving
|
||||
* those surfaces should be skipped for performance reasons.
|
||||
*/
|
||||
intel_miptree_resolve_color(brw, tex_obj->mt,
|
||||
INTEL_MIPTREE_IGNORE_CCS_E);
|
||||
const int flags = intel_texture_view_requires_resolve(brw, tex_obj) ?
|
||||
0 : INTEL_MIPTREE_IGNORE_CCS_E;
|
||||
intel_miptree_resolve_color(brw, tex_obj->mt, flags);
|
||||
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
|
||||
}
|
||||
|
||||
|
@@ -93,6 +93,9 @@ brw_codegen_cs_prog(struct brw_context *brw,
|
||||
*/
|
||||
int param_count = cp->program.Base.nir->num_uniforms / 4;
|
||||
|
||||
/* The backend also sometimes add a param for the thread local id. */
|
||||
prog_data.thread_local_id_index = param_count++;
|
||||
|
||||
/* The backend also sometimes adds params for texture size. */
|
||||
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
|
||||
prog_data.base.param =
|
||||
|
@@ -2943,6 +2943,9 @@ enum brw_wm_barycentric_interp_mode {
|
||||
# define MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(7, 0)
|
||||
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_SHIFT 0
|
||||
# define GEN8_MEDIA_GPGPU_THREAD_COUNT_MASK INTEL_MASK(9, 0)
|
||||
/* GEN7 DW6, GEN8+ DW7 */
|
||||
# define CROSS_THREAD_READ_LENGTH_SHIFT 0
|
||||
# define CROSS_THREAD_READ_LENGTH_MASK INTEL_MASK(7, 0)
|
||||
#define MEDIA_STATE_FLUSH 0x7004
|
||||
#define GPGPU_WALKER 0x7105
|
||||
/* GEN7 DW0 */
|
||||
|
@@ -2000,8 +2000,10 @@ void gen6_math(struct brw_codegen *p,
|
||||
|
||||
assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
if (devinfo->gen == 6) {
|
||||
assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
assert(has_scalar_region(src0) ||
|
||||
src0.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
assert(has_scalar_region(src1) ||
|
||||
src1.hstride == BRW_HORIZONTAL_STRIDE_1);
|
||||
}
|
||||
|
||||
if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
|
||||
|
@@ -2097,6 +2097,10 @@ fs_visitor::assign_constant_locations()
|
||||
bool contiguous[uniforms];
|
||||
memset(contiguous, 0, sizeof(contiguous));
|
||||
|
||||
int thread_local_id_index =
|
||||
(stage == MESA_SHADER_COMPUTE) ?
|
||||
((brw_cs_prog_data*)stage_prog_data)->thread_local_id_index : -1;
|
||||
|
||||
/* First, we walk through the instructions and do two things:
|
||||
*
|
||||
* 1) Figure out which uniforms are live.
|
||||
@@ -2141,6 +2145,9 @@ fs_visitor::assign_constant_locations()
|
||||
}
|
||||
}
|
||||
|
||||
if (thread_local_id_index >= 0 && !is_live[thread_local_id_index])
|
||||
thread_local_id_index = -1;
|
||||
|
||||
/* Only allow 16 registers (128 uniform components) as push constants.
|
||||
*
|
||||
* Just demote the end of the list. We could probably do better
|
||||
@@ -2149,7 +2156,9 @@ fs_visitor::assign_constant_locations()
|
||||
* If changing this value, note the limitation about total_regs in
|
||||
* brw_curbe.c.
|
||||
*/
|
||||
const unsigned int max_push_components = 16 * 8;
|
||||
unsigned int max_push_components = 16 * 8;
|
||||
if (thread_local_id_index >= 0)
|
||||
max_push_components--; /* Save a slot for the thread ID */
|
||||
|
||||
/* We push small arrays, but no bigger than 16 floats. This is big enough
|
||||
* for a vec4 but hopefully not large enough to push out other stuff. We
|
||||
@@ -2187,6 +2196,10 @@ fs_visitor::assign_constant_locations()
|
||||
if (!is_live[u] || is_live_64bit[u])
|
||||
continue;
|
||||
|
||||
/* Skip thread_local_id_index to put it in the last push register. */
|
||||
if (thread_local_id_index == (int)u)
|
||||
continue;
|
||||
|
||||
set_push_pull_constant_loc(u, &chunk_start, contiguous[u],
|
||||
push_constant_loc, pull_constant_loc,
|
||||
&num_push_constants, &num_pull_constants,
|
||||
@@ -2194,6 +2207,10 @@ fs_visitor::assign_constant_locations()
|
||||
stage_prog_data);
|
||||
}
|
||||
|
||||
/* Add the CS local thread ID uniform at the end of the push constants */
|
||||
if (thread_local_id_index >= 0)
|
||||
push_constant_loc[thread_local_id_index] = num_push_constants++;
|
||||
|
||||
/* As the uniforms are going to be reordered, take the data from a temporary
|
||||
* copy of the original param[].
|
||||
*/
|
||||
@@ -2212,6 +2229,7 @@ fs_visitor::assign_constant_locations()
|
||||
* push_constant_loc[i] <= i and we can do it in one smooth loop without
|
||||
* having to make a copy.
|
||||
*/
|
||||
int new_thread_local_id_index = -1;
|
||||
for (unsigned int i = 0; i < uniforms; i++) {
|
||||
const gl_constant_value *value = param[i];
|
||||
|
||||
@@ -2219,9 +2237,15 @@ fs_visitor::assign_constant_locations()
|
||||
stage_prog_data->pull_param[pull_constant_loc[i]] = value;
|
||||
} else if (push_constant_loc[i] != -1) {
|
||||
stage_prog_data->param[push_constant_loc[i]] = value;
|
||||
if (thread_local_id_index == (int)i)
|
||||
new_thread_local_id_index = push_constant_loc[i];
|
||||
}
|
||||
}
|
||||
ralloc_free(param);
|
||||
|
||||
if (stage == MESA_SHADER_COMPUTE)
|
||||
((brw_cs_prog_data*)stage_prog_data)->thread_local_id_index =
|
||||
new_thread_local_id_index;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2767,6 +2791,20 @@ fs_visitor::opt_redundant_discard_jumps()
|
||||
return progress;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute a bitmask with GRF granularity with a bit set for each GRF starting
|
||||
* from \p r which overlaps the region starting at \p r and spanning \p n GRF
|
||||
* units.
|
||||
*/
|
||||
static inline unsigned
|
||||
mask_relative_to(const fs_reg &r, const fs_reg &s, unsigned n)
|
||||
{
|
||||
const int rel_offset = (reg_offset(s) - reg_offset(r)) / REG_SIZE;
|
||||
assert(reg_space(r) == reg_space(s) &&
|
||||
rel_offset >= 0 && rel_offset < int(8 * sizeof(unsigned)));
|
||||
return ((1 << n) - 1) << rel_offset;
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::compute_to_mrf()
|
||||
{
|
||||
@@ -2792,31 +2830,22 @@ fs_visitor::compute_to_mrf()
|
||||
inst->src[0].subreg_offset)
|
||||
continue;
|
||||
|
||||
/* Work out which hardware MRF registers are written by this
|
||||
* instruction.
|
||||
*/
|
||||
int mrf_low = inst->dst.nr & ~BRW_MRF_COMPR4;
|
||||
int mrf_high;
|
||||
if (inst->dst.nr & BRW_MRF_COMPR4) {
|
||||
mrf_high = mrf_low + 4;
|
||||
} else if (inst->exec_size == 16) {
|
||||
mrf_high = mrf_low + 1;
|
||||
} else {
|
||||
mrf_high = mrf_low;
|
||||
}
|
||||
|
||||
/* Can't compute-to-MRF this GRF if someone else was going to
|
||||
* read it later.
|
||||
*/
|
||||
if (this->virtual_grf_end[inst->src[0].nr] > ip)
|
||||
continue;
|
||||
|
||||
/* Found a move of a GRF to a MRF. Let's see if we can go
|
||||
* rewrite the thing that made this GRF to write into the MRF.
|
||||
/* Found a move of a GRF to a MRF. Let's see if we can go rewrite the
|
||||
* things that computed the value of all GRFs of the source region. The
|
||||
* regs_left bitset keeps track of the registers we haven't yet found a
|
||||
* generating instruction for.
|
||||
*/
|
||||
unsigned regs_left = (1 << inst->regs_read(0)) - 1;
|
||||
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (scan_inst->dst.file == VGRF &&
|
||||
scan_inst->dst.nr == inst->src[0].nr) {
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
|
||||
inst->src[0], inst->regs_read(0) * REG_SIZE)) {
|
||||
/* Found the last thing to write our reg we want to turn
|
||||
* into a compute-to-MRF.
|
||||
*/
|
||||
@@ -2824,15 +2853,18 @@ fs_visitor::compute_to_mrf()
|
||||
/* If this one instruction didn't populate all the
|
||||
* channels, bail. We might be able to rewrite everything
|
||||
* that writes that reg, but it would require smarter
|
||||
* tracking to delay the rewriting until complete success.
|
||||
* tracking.
|
||||
*/
|
||||
if (scan_inst->is_partial_write())
|
||||
break;
|
||||
|
||||
/* Things returning more than one register would need us to
|
||||
* understand coalescing out more than one MOV at a time.
|
||||
/* Handling things not fully contained in the source of the copy
|
||||
* would need us to understand coalescing out more than one MOV at
|
||||
* a time.
|
||||
*/
|
||||
if (scan_inst->regs_written > scan_inst->exec_size / 8)
|
||||
if (scan_inst->dst.reg_offset < inst->src[0].reg_offset ||
|
||||
scan_inst->dst.reg_offset + scan_inst->regs_written >
|
||||
inst->src[0].reg_offset + inst->regs_read(0))
|
||||
break;
|
||||
|
||||
/* SEND instructions can't have MRF as a destination. */
|
||||
@@ -2848,16 +2880,11 @@ fs_visitor::compute_to_mrf()
|
||||
}
|
||||
}
|
||||
|
||||
if (scan_inst->dst.reg_offset == inst->src[0].reg_offset) {
|
||||
/* Found the creator of our MRF's source value. */
|
||||
scan_inst->dst.file = MRF;
|
||||
scan_inst->dst.nr = inst->dst.nr;
|
||||
scan_inst->dst.reg_offset = 0;
|
||||
scan_inst->saturate |= inst->saturate;
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
/* Clear the bits for any registers this instruction overwrites. */
|
||||
regs_left &= ~mask_relative_to(
|
||||
inst->src[0], scan_inst->dst, scan_inst->regs_written);
|
||||
if (!regs_left)
|
||||
break;
|
||||
}
|
||||
|
||||
/* We don't handle control flow here. Most computation of
|
||||
@@ -2872,54 +2899,83 @@ fs_visitor::compute_to_mrf()
|
||||
*/
|
||||
bool interfered = false;
|
||||
for (int i = 0; i < scan_inst->sources; i++) {
|
||||
if (scan_inst->src[i].file == VGRF &&
|
||||
scan_inst->src[i].nr == inst->src[0].nr &&
|
||||
scan_inst->src[i].reg_offset == inst->src[0].reg_offset) {
|
||||
if (regions_overlap(scan_inst->src[i], scan_inst->regs_read(i) * REG_SIZE,
|
||||
inst->src[0], inst->regs_read(0) * REG_SIZE)) {
|
||||
interfered = true;
|
||||
}
|
||||
}
|
||||
if (interfered)
|
||||
break;
|
||||
|
||||
if (scan_inst->dst.file == MRF) {
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
|
||||
inst->dst, inst->regs_written * REG_SIZE)) {
|
||||
/* If somebody else writes our MRF here, we can't
|
||||
* compute-to-MRF before that.
|
||||
*/
|
||||
int scan_mrf_low = scan_inst->dst.nr & ~BRW_MRF_COMPR4;
|
||||
int scan_mrf_high;
|
||||
break;
|
||||
}
|
||||
|
||||
if (scan_inst->dst.nr & BRW_MRF_COMPR4) {
|
||||
scan_mrf_high = scan_mrf_low + 4;
|
||||
} else if (scan_inst->exec_size == 16) {
|
||||
scan_mrf_high = scan_mrf_low + 1;
|
||||
} else {
|
||||
scan_mrf_high = scan_mrf_low;
|
||||
}
|
||||
|
||||
if (mrf_low == scan_mrf_low ||
|
||||
mrf_low == scan_mrf_high ||
|
||||
mrf_high == scan_mrf_low ||
|
||||
mrf_high == scan_mrf_high) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1) {
|
||||
if (scan_inst->mlen > 0 && scan_inst->base_mrf != -1 &&
|
||||
regions_overlap(fs_reg(MRF, scan_inst->base_mrf), scan_inst->mlen * REG_SIZE,
|
||||
inst->dst, inst->regs_written * REG_SIZE)) {
|
||||
/* Found a SEND instruction, which means that there are
|
||||
* live values in MRFs from base_mrf to base_mrf +
|
||||
* scan_inst->mlen - 1. Don't go pushing our MRF write up
|
||||
* above it.
|
||||
*/
|
||||
if (mrf_low >= scan_inst->base_mrf &&
|
||||
mrf_low < scan_inst->base_mrf + scan_inst->mlen) {
|
||||
break;
|
||||
}
|
||||
if (mrf_high >= scan_inst->base_mrf &&
|
||||
mrf_high < scan_inst->base_mrf + scan_inst->mlen) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (regs_left)
|
||||
continue;
|
||||
|
||||
/* Found all generating instructions of our MRF's source value, so it
|
||||
* should be safe to rewrite them to point to the MRF directly.
|
||||
*/
|
||||
regs_left = (1 << inst->regs_read(0)) - 1;
|
||||
|
||||
foreach_inst_in_block_reverse_starting_from(fs_inst, scan_inst, inst) {
|
||||
if (regions_overlap(scan_inst->dst, scan_inst->regs_written * REG_SIZE,
|
||||
inst->src[0], inst->regs_read(0) * REG_SIZE)) {
|
||||
/* Clear the bits for any registers this instruction overwrites. */
|
||||
regs_left &= ~mask_relative_to(
|
||||
inst->src[0], scan_inst->dst, scan_inst->regs_written);
|
||||
|
||||
const unsigned rel_offset = (reg_offset(scan_inst->dst) -
|
||||
reg_offset(inst->src[0])) / REG_SIZE;
|
||||
|
||||
if (inst->dst.nr & BRW_MRF_COMPR4) {
|
||||
/* Apply the same address transformation done by the hardware
|
||||
* for COMPR4 MRF writes.
|
||||
*/
|
||||
assert(rel_offset < 2);
|
||||
scan_inst->dst.nr = inst->dst.nr + rel_offset * 4;
|
||||
|
||||
/* Clear the COMPR4 bit if the generating instruction is not
|
||||
* compressed.
|
||||
*/
|
||||
if (scan_inst->regs_written < 2)
|
||||
scan_inst->dst.nr &= ~BRW_MRF_COMPR4;
|
||||
|
||||
} else {
|
||||
/* Calculate the MRF number the result of this instruction is
|
||||
* ultimately written to.
|
||||
*/
|
||||
scan_inst->dst.nr = inst->dst.nr + rel_offset;
|
||||
}
|
||||
|
||||
scan_inst->dst.file = MRF;
|
||||
scan_inst->dst.reg_offset = 0;
|
||||
scan_inst->saturate |= inst->saturate;
|
||||
if (!regs_left)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
assert(!regs_left);
|
||||
inst->remove(block);
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if (progress)
|
||||
@@ -3080,18 +3136,18 @@ fs_visitor::remove_duplicate_mrf_writes()
|
||||
}
|
||||
|
||||
/* Clear out any MRF move records whose sources got overwritten. */
|
||||
if (inst->dst.file == VGRF) {
|
||||
for (unsigned int i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
|
||||
if (last_mrf_move[i] &&
|
||||
last_mrf_move[i]->src[0].nr == inst->dst.nr) {
|
||||
last_mrf_move[i] = NULL;
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(last_mrf_move); i++) {
|
||||
if (last_mrf_move[i] &&
|
||||
regions_overlap(inst->dst, inst->regs_written * REG_SIZE,
|
||||
last_mrf_move[i]->src[0],
|
||||
last_mrf_move[i]->regs_read(0) * REG_SIZE)) {
|
||||
last_mrf_move[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->opcode == BRW_OPCODE_MOV &&
|
||||
inst->dst.file == MRF &&
|
||||
inst->src[0].file == VGRF &&
|
||||
inst->src[0].file != ARF &&
|
||||
!inst->is_partial_write()) {
|
||||
last_mrf_move[inst->dst.nr] = inst;
|
||||
}
|
||||
@@ -4416,6 +4472,14 @@ lower_varying_pull_constant_logical_send(const fs_builder &bld, fs_inst *inst)
|
||||
const brw_device_info *devinfo = bld.shader->devinfo;
|
||||
|
||||
if (devinfo->gen >= 7) {
|
||||
/* We are switching the instruction from an ALU-like instruction to a
|
||||
* send-from-grf instruction. Since sends can't handle strides or
|
||||
* source modifiers, we have to make a copy of the offset source.
|
||||
*/
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD);
|
||||
bld.MOV(tmp, inst->src[1]);
|
||||
inst->src[1] = tmp;
|
||||
|
||||
inst->opcode = FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7;
|
||||
|
||||
} else {
|
||||
@@ -5517,31 +5581,6 @@ fs_visitor::setup_vs_payload()
|
||||
payload.num_regs = 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* We are building the local ID push constant data using the simplest possible
|
||||
* method. We simply push the local IDs directly as they should appear in the
|
||||
* registers for the uvec3 gl_LocalInvocationID variable.
|
||||
*
|
||||
* Therefore, for SIMD8, we use 3 full registers, and for SIMD16 we use 6
|
||||
* registers worth of push constant space.
|
||||
*
|
||||
* Note: Any updates to brw_cs_prog_local_id_payload_dwords,
|
||||
* fill_local_id_payload or fs_visitor::emit_cs_local_invocation_id_setup need
|
||||
* to coordinated.
|
||||
*
|
||||
* FINISHME: There are a few easy optimizations to consider.
|
||||
*
|
||||
* 1. If gl_WorkGroupSize x, y or z is 1, we can just use zero, and there is
|
||||
* no need for using push constant space for that dimension.
|
||||
*
|
||||
* 2. Since GL_MAX_COMPUTE_WORK_GROUP_SIZE is currently 1024 or less, we can
|
||||
* easily use 16-bit words rather than 32-bit dwords in the push constant
|
||||
* data.
|
||||
*
|
||||
* 3. If gl_WorkGroupSize x, y or z is small, then we can use bytes for
|
||||
* conveying the data, and thereby reduce push constant usage.
|
||||
*
|
||||
*/
|
||||
void
|
||||
fs_visitor::setup_gs_payload()
|
||||
{
|
||||
@@ -5585,15 +5624,7 @@ void
|
||||
fs_visitor::setup_cs_payload()
|
||||
{
|
||||
assert(devinfo->gen >= 7);
|
||||
brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
|
||||
|
||||
payload.num_regs = 1;
|
||||
|
||||
if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
|
||||
prog_data->local_invocation_id_regs = dispatch_width * 3 / 8;
|
||||
payload.local_invocation_id_reg = payload.num_regs;
|
||||
payload.num_regs += prog_data->local_invocation_id_regs;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -6467,25 +6498,6 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data,
|
||||
return g.get_assembly(final_assembly_size);
|
||||
}
|
||||
|
||||
fs_reg *
|
||||
fs_visitor::emit_cs_local_invocation_id_setup()
|
||||
{
|
||||
assert(stage == MESA_SHADER_COMPUTE);
|
||||
|
||||
fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type));
|
||||
|
||||
struct brw_reg src =
|
||||
brw_vec8_grf(payload.local_invocation_id_reg, 0);
|
||||
src = retype(src, BRW_REGISTER_TYPE_UD);
|
||||
bld.MOV(*reg, src);
|
||||
src.nr += dispatch_width / 8;
|
||||
bld.MOV(offset(*reg, bld, 1), src);
|
||||
src.nr += dispatch_width / 8;
|
||||
bld.MOV(offset(*reg, bld, 2), src);
|
||||
|
||||
return reg;
|
||||
}
|
||||
|
||||
fs_reg *
|
||||
fs_visitor::emit_cs_work_group_id_setup()
|
||||
{
|
||||
@@ -6504,6 +6516,70 @@ fs_visitor::emit_cs_work_group_id_setup()
|
||||
return reg;
|
||||
}
|
||||
|
||||
static void
|
||||
fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
|
||||
{
|
||||
block->dwords = dwords;
|
||||
block->regs = DIV_ROUND_UP(dwords, 8);
|
||||
block->size = block->regs * 32;
|
||||
}
|
||||
|
||||
static void
|
||||
cs_fill_push_const_info(const struct brw_device_info *devinfo,
|
||||
struct brw_cs_prog_data *cs_prog_data)
|
||||
{
|
||||
const struct brw_stage_prog_data *prog_data =
|
||||
(struct brw_stage_prog_data*) cs_prog_data;
|
||||
bool fill_thread_id =
|
||||
cs_prog_data->thread_local_id_index >= 0 &&
|
||||
cs_prog_data->thread_local_id_index < (int)prog_data->nr_params;
|
||||
bool cross_thread_supported = devinfo->gen > 7 || devinfo->is_haswell;
|
||||
|
||||
/* The thread ID should be stored in the last param dword */
|
||||
assert(prog_data->nr_params > 0 || !fill_thread_id);
|
||||
assert(!fill_thread_id ||
|
||||
cs_prog_data->thread_local_id_index ==
|
||||
(int)prog_data->nr_params - 1);
|
||||
|
||||
unsigned cross_thread_dwords, per_thread_dwords;
|
||||
if (!cross_thread_supported) {
|
||||
cross_thread_dwords = 0u;
|
||||
per_thread_dwords = prog_data->nr_params;
|
||||
} else if (fill_thread_id) {
|
||||
/* Fill all but the last register with cross-thread payload */
|
||||
cross_thread_dwords = 8 * (cs_prog_data->thread_local_id_index / 8);
|
||||
per_thread_dwords = prog_data->nr_params - cross_thread_dwords;
|
||||
assert(per_thread_dwords > 0 && per_thread_dwords <= 8);
|
||||
} else {
|
||||
/* Fill all data using cross-thread payload */
|
||||
cross_thread_dwords = prog_data->nr_params;
|
||||
per_thread_dwords = 0u;
|
||||
}
|
||||
|
||||
fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
|
||||
fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
|
||||
|
||||
unsigned total_dwords =
|
||||
(cs_prog_data->push.per_thread.size * cs_prog_data->threads +
|
||||
cs_prog_data->push.cross_thread.size) / 4;
|
||||
fill_push_const_block_info(&cs_prog_data->push.total, total_dwords);
|
||||
|
||||
assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
|
||||
cs_prog_data->push.per_thread.size == 0);
|
||||
assert(cs_prog_data->push.cross_thread.dwords +
|
||||
cs_prog_data->push.per_thread.dwords ==
|
||||
prog_data->nr_params);
|
||||
}
|
||||
|
||||
static void
|
||||
cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)
|
||||
{
|
||||
cs_prog_data->simd_size = size;
|
||||
unsigned group_size = cs_prog_data->local_size[0] *
|
||||
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
|
||||
cs_prog_data->threads = (group_size + size - 1) / size;
|
||||
}
|
||||
|
||||
const unsigned *
|
||||
brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
||||
void *mem_ctx,
|
||||
@@ -6519,6 +6595,16 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
||||
true);
|
||||
brw_nir_lower_cs_shared(shader);
|
||||
prog_data->base.total_shared += shader->num_shared;
|
||||
|
||||
/* Now that we cloned the nir_shader, we can update num_uniforms based on
|
||||
* the thread_local_id_index.
|
||||
*/
|
||||
assert(prog_data->thread_local_id_index >= 0);
|
||||
shader->num_uniforms =
|
||||
MAX2(shader->num_uniforms,
|
||||
(unsigned)4 * (prog_data->thread_local_id_index + 1));
|
||||
|
||||
brw_nir_lower_intrinsics(shader, &prog_data->base);
|
||||
shader = brw_postprocess_nir(shader, compiler->devinfo, true);
|
||||
|
||||
prog_data->local_size[0] = shader->info.cs.local_size[0];
|
||||
@@ -6544,7 +6630,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
||||
fail_msg = v8.fail_msg;
|
||||
} else {
|
||||
cfg = v8.cfg;
|
||||
prog_data->simd_size = 8;
|
||||
cs_set_simd_size(prog_data, 8);
|
||||
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
||||
prog_data->base.dispatch_grf_start_reg = v8.payload.num_regs;
|
||||
}
|
||||
}
|
||||
@@ -6569,7 +6656,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
||||
}
|
||||
} else {
|
||||
cfg = v16.cfg;
|
||||
prog_data->simd_size = 16;
|
||||
cs_set_simd_size(prog_data, 16);
|
||||
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
||||
prog_data->dispatch_grf_start_reg_16 = v16.payload.num_regs;
|
||||
}
|
||||
}
|
||||
@@ -6596,7 +6684,8 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
||||
}
|
||||
} else {
|
||||
cfg = v32.cfg;
|
||||
prog_data->simd_size = 32;
|
||||
cs_set_simd_size(prog_data, 32);
|
||||
cs_fill_push_const_info(compiler->devinfo, prog_data);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6623,39 +6712,3 @@ brw_compile_cs(const struct brw_compiler *compiler, void *log_data,
|
||||
|
||||
return g.get_assembly(final_assembly_size);
|
||||
}
|
||||
|
||||
void
|
||||
brw_cs_fill_local_id_payload(const struct brw_cs_prog_data *prog_data,
|
||||
void *buffer, uint32_t threads, uint32_t stride)
|
||||
{
|
||||
if (prog_data->local_invocation_id_regs == 0)
|
||||
return;
|
||||
|
||||
/* 'stride' should be an integer number of registers, that is, a multiple
|
||||
* of 32 bytes.
|
||||
*/
|
||||
assert(stride % 32 == 0);
|
||||
|
||||
unsigned x = 0, y = 0, z = 0;
|
||||
for (unsigned t = 0; t < threads; t++) {
|
||||
uint32_t *param = (uint32_t *) buffer + stride * t / 4;
|
||||
|
||||
for (unsigned i = 0; i < prog_data->simd_size; i++) {
|
||||
param[0 * prog_data->simd_size + i] = x;
|
||||
param[1 * prog_data->simd_size + i] = y;
|
||||
param[2 * prog_data->simd_size + i] = z;
|
||||
|
||||
x++;
|
||||
if (x == prog_data->local_size[0]) {
|
||||
x = 0;
|
||||
y++;
|
||||
if (y == prog_data->local_size[1]) {
|
||||
y = 0;
|
||||
z++;
|
||||
if (z == prog_data->local_size[2])
|
||||
z = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -267,7 +267,6 @@ public:
|
||||
unsigned base_offset, const nir_src &offset_src,
|
||||
unsigned num_components);
|
||||
void emit_cs_terminate();
|
||||
fs_reg *emit_cs_local_invocation_id_setup();
|
||||
fs_reg *emit_cs_work_group_id_setup();
|
||||
|
||||
void emit_barrier();
|
||||
|
@@ -621,20 +621,14 @@ namespace brw {
|
||||
src_reg
|
||||
fix_math_operand(const src_reg &src) const
|
||||
{
|
||||
/* Can't do hstride == 0 args on gen6 math, so expand it out. We
|
||||
* might be able to do better by doing execsize = 1 math and then
|
||||
* expanding that result out, but we would need to be careful with
|
||||
* masking.
|
||||
*
|
||||
* Gen6 hardware ignores source modifiers (negate and abs) on math
|
||||
/* Gen6 hardware ignores source modifiers (negate and abs) on math
|
||||
* instructions, so we also move to a temp to set those up.
|
||||
*
|
||||
* Gen7 relaxes most of the above restrictions, but still can't use IMM
|
||||
* operands to math
|
||||
*/
|
||||
if ((shader->devinfo->gen == 6 &&
|
||||
(src.file == IMM || src.file == UNIFORM ||
|
||||
src.abs || src.negate)) ||
|
||||
(src.file == IMM || src.abs || src.negate)) ||
|
||||
(shader->devinfo->gen == 7 && src.file == IMM)) {
|
||||
const dst_reg tmp = vgrf(src.type);
|
||||
MOV(tmp, src);
|
||||
|
@@ -147,8 +147,6 @@ struct table {
|
||||
static struct imm *
|
||||
find_imm(struct table *table, float val)
|
||||
{
|
||||
assert(signbit(val) == 0);
|
||||
|
||||
for (int i = 0; i < table->len; i++) {
|
||||
if (table->imm[i].val == val) {
|
||||
return &table->imm[i];
|
||||
@@ -220,7 +218,8 @@ fs_visitor::opt_combine_constants()
|
||||
inst->src[i].type != BRW_REGISTER_TYPE_F)
|
||||
continue;
|
||||
|
||||
float val = fabsf(inst->src[i].f);
|
||||
float val = !inst->can_do_source_mods(devinfo) ? inst->src[i].f :
|
||||
fabs(inst->src[i].f);
|
||||
struct imm *imm = find_imm(&table, val);
|
||||
|
||||
if (imm) {
|
||||
@@ -301,7 +300,7 @@ fs_visitor::opt_combine_constants()
|
||||
reg->stride = 0;
|
||||
reg->negate = signbit(reg->f) != signbit(table.imm[i].val);
|
||||
assert((isnan(reg->f) && isnan(table.imm[i].val)) ||
|
||||
fabsf(reg->f) == table.imm[i].val);
|
||||
fabsf(reg->f) == fabs(table.imm[i].val));
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -578,14 +578,9 @@ fs_visitor::try_constant_propagate(fs_inst *inst, acp_entry *entry)
|
||||
break;
|
||||
/* fallthrough */
|
||||
case SHADER_OPCODE_POW:
|
||||
/* Allow constant propagation into src1 (except on Gen 6), and let
|
||||
* constant combining promote the constant on Gen < 8.
|
||||
*
|
||||
* While Gen 6 MATH can take a scalar source, its source and
|
||||
* destination offsets must be equal and we cannot ensure that.
|
||||
/* Allow constant propagation into src1, and let constant combining
|
||||
* promote the constant on Gen < 8.
|
||||
*/
|
||||
if (devinfo->gen == 6)
|
||||
break;
|
||||
/* fallthrough */
|
||||
case BRW_OPCODE_BFI1:
|
||||
case BRW_OPCODE_ASR:
|
||||
|
@@ -272,13 +272,6 @@ emit_system_values_block(nir_block *block, fs_visitor *v)
|
||||
*reg = *v->emit_samplemaskin_setup();
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_local_invocation_id:
|
||||
assert(v->stage == MESA_SHADER_COMPUTE);
|
||||
reg = &v->nir_system_values[SYSTEM_VALUE_LOCAL_INVOCATION_ID];
|
||||
if (reg->file == BAD_FILE)
|
||||
*reg = *v->emit_cs_local_invocation_id_setup();
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_work_group_id:
|
||||
assert(v->stage == MESA_SHADER_COMPUTE);
|
||||
reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID];
|
||||
@@ -1668,6 +1661,9 @@ fs_visitor::emit_gs_end_primitive(const nir_src &vertex_count_nir_src)
|
||||
struct brw_gs_prog_data *gs_prog_data =
|
||||
(struct brw_gs_prog_data *) prog_data;
|
||||
|
||||
if (gs_compile->control_data_header_size_bits == 0)
|
||||
return;
|
||||
|
||||
/* We can only do EndPrimitive() functionality when the control data
|
||||
* consists of cut bits. Fortunately, the only time it isn't is when the
|
||||
* output type is points, in which case EndPrimitive() is a no-op.
|
||||
@@ -2746,7 +2742,7 @@ fs_visitor::nir_emit_tes_intrinsic(const fs_builder &bld,
|
||||
break;
|
||||
case BRW_TESS_DOMAIN_ISOLINE:
|
||||
for (unsigned i = 0; i < 2; i++)
|
||||
bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 7 - i));
|
||||
bld.MOV(offset(dest, bld, i), component(fs_reg(ATTR, 0), 6 + i));
|
||||
break;
|
||||
}
|
||||
break;
|
||||
@@ -3873,6 +3869,21 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_channel_num: {
|
||||
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UW);
|
||||
dest = retype(dest, BRW_REGISTER_TYPE_UD);
|
||||
const fs_builder allbld8 = bld.group(8, 0).exec_all();
|
||||
allbld8.MOV(tmp, brw_imm_v(0x76543210));
|
||||
if (dispatch_width > 8)
|
||||
allbld8.ADD(byte_offset(tmp, 16), tmp, brw_imm_uw(8u));
|
||||
if (dispatch_width > 16) {
|
||||
const fs_builder allbld16 = bld.group(16, 0).exec_all();
|
||||
allbld16.ADD(byte_offset(tmp, 32), tmp, brw_imm_uw(16u));
|
||||
}
|
||||
bld.MOV(dest, tmp);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("unknown intrinsic");
|
||||
}
|
||||
|
@@ -204,9 +204,23 @@ reg_offset(const fs_reg &r)
|
||||
static inline bool
|
||||
regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
|
||||
{
|
||||
return reg_space(r) == reg_space(s) &&
|
||||
!(reg_offset(r) + dr <= reg_offset(s) ||
|
||||
reg_offset(s) + ds <= reg_offset(r));
|
||||
if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
|
||||
fs_reg t = r;
|
||||
t.nr &= ~BRW_MRF_COMPR4;
|
||||
/* COMPR4 regions are translated by the hardware during decompression
|
||||
* into two separate half-regions 4 MRFs apart from each other.
|
||||
*/
|
||||
return regions_overlap(t, dr / 2, s, ds) ||
|
||||
regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
|
||||
|
||||
} else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
|
||||
return regions_overlap(s, ds, r, dr);
|
||||
|
||||
} else {
|
||||
return reg_space(r) == reg_space(s) &&
|
||||
!(reg_offset(r) + dr <= reg_offset(s) ||
|
||||
reg_offset(s) + ds <= reg_offset(r));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -91,6 +91,8 @@ void brw_nir_analyze_boolean_resolves(nir_shader *nir);
|
||||
nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler,
|
||||
nir_shader *nir);
|
||||
|
||||
bool brw_nir_lower_intrinsics(nir_shader *nir,
|
||||
struct brw_stage_prog_data *prog_data);
|
||||
void brw_nir_lower_vs_inputs(nir_shader *nir,
|
||||
const struct brw_device_info *devinfo,
|
||||
bool is_scalar,
|
||||
|
179
src/mesa/drivers/dri/i965/brw_nir_intrinsics.c
Normal file
179
src/mesa/drivers/dri/i965/brw_nir_intrinsics.c
Normal file
@@ -0,0 +1,179 @@
|
||||
/*
|
||||
* Copyright (c) 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_nir.h"
|
||||
#include "compiler/nir/nir_builder.h"
|
||||
|
||||
struct lower_intrinsics_state {
|
||||
nir_shader *nir;
|
||||
union {
|
||||
struct brw_stage_prog_data *prog_data;
|
||||
struct brw_cs_prog_data *cs_prog_data;
|
||||
};
|
||||
nir_function_impl *impl;
|
||||
bool progress;
|
||||
nir_builder builder;
|
||||
bool cs_thread_id_used;
|
||||
};
|
||||
|
||||
static nir_ssa_def *
|
||||
read_thread_local_id(struct lower_intrinsics_state *state)
|
||||
{
|
||||
assert(state->cs_prog_data->thread_local_id_index >= 0);
|
||||
state->cs_thread_id_used = true;
|
||||
const int id_index = state->cs_prog_data->thread_local_id_index;
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
nir_shader *nir = state->nir;
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
|
||||
load->num_components = 1;
|
||||
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
|
||||
nir_intrinsic_set_base(load, id_index * sizeof(uint32_t));
|
||||
nir_intrinsic_set_range(load, sizeof(uint32_t));
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
return &load->dest.ssa;
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
|
||||
nir_block *block)
|
||||
{
|
||||
bool progress = false;
|
||||
nir_builder *b = &state->builder;
|
||||
nir_shader *nir = state->nir;
|
||||
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
|
||||
nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);
|
||||
|
||||
b->cursor = nir_after_instr(&intrinsic->instr);
|
||||
|
||||
nir_ssa_def *sysval;
|
||||
switch (intrinsic->intrinsic) {
|
||||
case nir_intrinsic_load_local_invocation_index: {
|
||||
assert(nir->stage == MESA_SHADER_COMPUTE);
|
||||
/* We construct the local invocation index from:
|
||||
*
|
||||
* gl_LocalInvocationIndex =
|
||||
* cs_thread_local_id + channel_num;
|
||||
*/
|
||||
nir_ssa_def *thread_local_id = read_thread_local_id(state);
|
||||
nir_ssa_def *channel =
|
||||
nir_load_system_value(b, nir_intrinsic_load_channel_num, 0);
|
||||
sysval = nir_iadd(b, channel, thread_local_id);
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_load_local_invocation_id: {
|
||||
assert(nir->stage == MESA_SHADER_COMPUTE);
|
||||
/* We lower gl_LocalInvocationID from gl_LocalInvocationIndex based
|
||||
* on this formula:
|
||||
*
|
||||
* gl_LocalInvocationID.x =
|
||||
* gl_LocalInvocationIndex % gl_WorkGroupSize.x;
|
||||
* gl_LocalInvocationID.y =
|
||||
* (gl_LocalInvocationIndex / gl_WorkGroupSize.x) %
|
||||
* gl_WorkGroupSize.y;
|
||||
* gl_LocalInvocationID.z =
|
||||
* (gl_LocalInvocationIndex /
|
||||
* (gl_WorkGroupSize.x * gl_WorkGroupSize.y)) %
|
||||
* gl_WorkGroupSize.z;
|
||||
*/
|
||||
unsigned *size = nir->info.cs.local_size;
|
||||
|
||||
nir_ssa_def *local_index =
|
||||
nir_load_system_value(b, nir_intrinsic_load_local_invocation_index, 0);
|
||||
|
||||
nir_const_value uvec3;
|
||||
uvec3.u32[0] = 1;
|
||||
uvec3.u32[1] = size[0];
|
||||
uvec3.u32[2] = size[0] * size[1];
|
||||
nir_ssa_def *div_val = nir_build_imm(b, 3, 32, uvec3);
|
||||
uvec3.u32[0] = size[0];
|
||||
uvec3.u32[1] = size[1];
|
||||
uvec3.u32[2] = size[2];
|
||||
nir_ssa_def *mod_val = nir_build_imm(b, 3, 32, uvec3);
|
||||
|
||||
sysval = nir_imod(b, nir_idiv(b, local_index, div_val), mod_val);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
continue;
|
||||
}
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, nir_src_for_ssa(sysval));
|
||||
nir_instr_remove(&intrinsic->instr);
|
||||
|
||||
state->progress = true;
|
||||
}
|
||||
|
||||
return progress;
|
||||
}
|
||||
|
||||
static void
|
||||
lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state)
|
||||
{
|
||||
nir_builder_init(&state->builder, state->impl);
|
||||
|
||||
nir_foreach_block(block, state->impl) {
|
||||
lower_cs_intrinsics_convert_block(state, block);
|
||||
}
|
||||
|
||||
nir_metadata_preserve(state->impl,
|
||||
nir_metadata_block_index | nir_metadata_dominance);
|
||||
}
|
||||
|
||||
bool
|
||||
brw_nir_lower_intrinsics(nir_shader *nir, struct brw_stage_prog_data *prog_data)
|
||||
{
|
||||
/* Currently we only lower intrinsics for compute shaders */
|
||||
if (nir->stage != MESA_SHADER_COMPUTE)
|
||||
return false;
|
||||
|
||||
bool progress = false;
|
||||
struct lower_intrinsics_state state;
|
||||
memset(&state, 0, sizeof(state));
|
||||
state.nir = nir;
|
||||
state.prog_data = prog_data;
|
||||
|
||||
do {
|
||||
state.progress = false;
|
||||
nir_foreach_function(function, nir) {
|
||||
if (function->impl) {
|
||||
state.impl = function->impl;
|
||||
lower_cs_intrinsics_convert_impl(&state);
|
||||
}
|
||||
}
|
||||
progress |= state.progress;
|
||||
} while (state.progress);
|
||||
|
||||
if (nir->stage == MESA_SHADER_COMPUTE && !state.cs_thread_id_used)
|
||||
state.cs_prog_data->thread_local_id_index = -1;
|
||||
|
||||
return progress;
|
||||
}
|
@@ -225,19 +225,24 @@ brw_codegen_tcs_prog(struct brw_context *brw,
|
||||
*/
|
||||
const float **param = (const float **) prog_data.base.base.param;
|
||||
static float zero = 0.0f;
|
||||
for (int i = 0; i < 4; i++) {
|
||||
param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i];
|
||||
}
|
||||
for (int i = 0; i < 8; i++)
|
||||
param[i] = &zero;
|
||||
|
||||
if (key->tes_primitive_mode == GL_QUADS) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i];
|
||||
|
||||
param[3] = &ctx->TessCtrlProgram.patch_default_inner_level[0];
|
||||
param[2] = &ctx->TessCtrlProgram.patch_default_inner_level[1];
|
||||
param[1] = &zero;
|
||||
param[0] = &zero;
|
||||
} else if (key->tes_primitive_mode == GL_TRIANGLES) {
|
||||
for (int i = 0; i < 3; i++)
|
||||
param[7 - i] = &ctx->TessCtrlProgram.patch_default_outer_level[i];
|
||||
|
||||
param[4] = &ctx->TessCtrlProgram.patch_default_inner_level[0];
|
||||
for (int i = 0; i < 4; i++)
|
||||
param[i] = &zero;
|
||||
} else {
|
||||
assert(key->tes_primitive_mode == GL_ISOLINES);
|
||||
param[7] = &ctx->TessCtrlProgram.patch_default_outer_level[1];
|
||||
param[6] = &ctx->TessCtrlProgram.patch_default_outer_level[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -539,6 +539,9 @@ vec4_gs_visitor::gs_end_primitive()
|
||||
return;
|
||||
}
|
||||
|
||||
if (c->control_data_header_size_bits == 0)
|
||||
return;
|
||||
|
||||
/* Cut bits use one bit per vertex. */
|
||||
assert(c->control_data_bits_per_vertex == 1);
|
||||
|
||||
|
@@ -33,17 +33,6 @@
|
||||
#include "program/prog_statevars.h"
|
||||
#include "compiler/glsl/ir_uniform.h"
|
||||
|
||||
static unsigned
|
||||
get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
|
||||
{
|
||||
const unsigned simd_size = cs_prog_data->simd_size;
|
||||
unsigned group_size = cs_prog_data->local_size[0] *
|
||||
cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
|
||||
|
||||
return (group_size + simd_size - 1) / simd_size;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
brw_upload_cs_state(struct brw_context *brw)
|
||||
{
|
||||
@@ -53,7 +42,6 @@ brw_upload_cs_state(struct brw_context *brw)
|
||||
uint32_t offset;
|
||||
uint32_t *desc = (uint32_t*) brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
|
||||
8 * 4, 64, &offset);
|
||||
struct gl_program *prog = (struct gl_program *) brw->compute_program;
|
||||
struct brw_stage_state *stage_state = &brw->cs.base;
|
||||
struct brw_cs_prog_data *cs_prog_data = brw->cs.prog_data;
|
||||
struct brw_stage_prog_data *prog_data = &cs_prog_data->base;
|
||||
@@ -70,17 +58,6 @@ brw_upload_cs_state(struct brw_context *brw)
|
||||
prog_data->binding_table.size_bytes,
|
||||
32, &stage_state->bind_bo_offset);
|
||||
|
||||
unsigned local_id_dwords = 0;
|
||||
|
||||
if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
|
||||
local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
|
||||
unsigned push_constant_data_size =
|
||||
(prog_data->nr_params + local_id_dwords) * sizeof(gl_constant_value);
|
||||
unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
unsigned push_constant_regs = reg_aligned_constant_size / 32;
|
||||
unsigned threads = get_cs_thread_count(cs_prog_data);
|
||||
|
||||
uint32_t dwords = brw->gen < 8 ? 8 : 9;
|
||||
BEGIN_BATCH(dwords);
|
||||
OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
|
||||
@@ -129,7 +106,9 @@ brw_upload_cs_state(struct brw_context *brw)
|
||||
*
|
||||
* Note: The constant data is built in brw_upload_cs_push_constants below.
|
||||
*/
|
||||
const uint32_t vfe_curbe_allocation = push_constant_regs * threads;
|
||||
const uint32_t vfe_curbe_allocation =
|
||||
ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads +
|
||||
cs_prog_data->push.cross_thread.regs, 2);
|
||||
OUT_BATCH(SET_FIELD(vfe_urb_allocation, MEDIA_VFE_STATE_URB_ALLOC) |
|
||||
SET_FIELD(vfe_curbe_allocation, MEDIA_VFE_STATE_CURBE_ALLOC));
|
||||
OUT_BATCH(0);
|
||||
@@ -137,11 +116,11 @@ brw_upload_cs_state(struct brw_context *brw)
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
if (reg_aligned_constant_size > 0) {
|
||||
if (cs_prog_data->push.total.size > 0) {
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(MEDIA_CURBE_LOAD << 16 | (4 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(ALIGN(reg_aligned_constant_size * threads, 64));
|
||||
OUT_BATCH(ALIGN(cs_prog_data->push.total.size, 64));
|
||||
OUT_BATCH(stage_state->push_const_offset);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
@@ -160,12 +139,13 @@ brw_upload_cs_state(struct brw_context *brw)
|
||||
desc[dw++] = stage_state->sampler_offset |
|
||||
((stage_state->sampler_count + 3) / 4);
|
||||
desc[dw++] = stage_state->bind_bo_offset;
|
||||
desc[dw++] = SET_FIELD(push_constant_regs, MEDIA_CURBE_READ_LENGTH);
|
||||
desc[dw++] = SET_FIELD(cs_prog_data->push.per_thread.regs,
|
||||
MEDIA_CURBE_READ_LENGTH);
|
||||
const uint32_t media_threads =
|
||||
brw->gen >= 8 ?
|
||||
SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
|
||||
SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
|
||||
assert(threads <= brw->max_cs_threads);
|
||||
SET_FIELD(cs_prog_data->threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
|
||||
SET_FIELD(cs_prog_data->threads, MEDIA_GPGPU_THREAD_COUNT);
|
||||
assert(cs_prog_data->threads <= brw->max_cs_threads);
|
||||
|
||||
assert(prog_data->total_shared <= 64 * 1024);
|
||||
uint32_t slm_size = 0;
|
||||
@@ -182,6 +162,9 @@ brw_upload_cs_state(struct brw_context *brw)
|
||||
SET_FIELD(slm_size, MEDIA_SHARED_LOCAL_MEMORY_SIZE) |
|
||||
media_threads;
|
||||
|
||||
desc[dw++] =
|
||||
SET_FIELD(cs_prog_data->push.cross_thread.regs, CROSS_THREAD_READ_LENGTH);
|
||||
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
|
||||
OUT_BATCH(0);
|
||||
@@ -224,10 +207,6 @@ brw_upload_cs_push_constants(struct brw_context *brw,
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const struct brw_stage_prog_data *prog_data =
|
||||
(struct brw_stage_prog_data*) cs_prog_data;
|
||||
unsigned local_id_dwords = 0;
|
||||
|
||||
if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID)
|
||||
local_id_dwords = cs_prog_data->local_invocation_id_regs * 8;
|
||||
|
||||
/* Updates the ParamaterValues[i] pointers for all parameters of the
|
||||
* basic type of PROGRAM_STATE_VAR.
|
||||
@@ -235,42 +214,52 @@ brw_upload_cs_push_constants(struct brw_context *brw,
|
||||
/* XXX: Should this happen somewhere before to get our state flag set? */
|
||||
_mesa_load_state_parameters(ctx, prog->Parameters);
|
||||
|
||||
if (prog_data->nr_params == 0 && local_id_dwords == 0) {
|
||||
if (cs_prog_data->push.total.size == 0) {
|
||||
stage_state->push_const_size = 0;
|
||||
} else {
|
||||
gl_constant_value *param;
|
||||
unsigned i, t;
|
||||
return;
|
||||
}
|
||||
|
||||
const unsigned push_constant_data_size =
|
||||
(local_id_dwords + prog_data->nr_params) * sizeof(gl_constant_value);
|
||||
const unsigned reg_aligned_constant_size = ALIGN(push_constant_data_size, 32);
|
||||
const unsigned param_aligned_count =
|
||||
reg_aligned_constant_size / sizeof(*param);
|
||||
|
||||
unsigned threads = get_cs_thread_count(cs_prog_data);
|
||||
gl_constant_value *param = (gl_constant_value*)
|
||||
brw_state_batch(brw, type, ALIGN(cs_prog_data->push.total.size, 64),
|
||||
64, &stage_state->push_const_offset);
|
||||
assert(param);
|
||||
|
||||
param = (gl_constant_value*)
|
||||
brw_state_batch(brw, type,
|
||||
ALIGN(reg_aligned_constant_size * threads, 64),
|
||||
64, &stage_state->push_const_offset);
|
||||
assert(param);
|
||||
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
|
||||
|
||||
STATIC_ASSERT(sizeof(gl_constant_value) == sizeof(float));
|
||||
if (cs_prog_data->push.cross_thread.size > 0) {
|
||||
gl_constant_value *param_copy = param;
|
||||
assert(cs_prog_data->thread_local_id_index < 0 ||
|
||||
cs_prog_data->thread_local_id_index >=
|
||||
cs_prog_data->push.cross_thread.dwords);
|
||||
for (unsigned i = 0;
|
||||
i < cs_prog_data->push.cross_thread.dwords;
|
||||
i++) {
|
||||
param_copy[i] = *prog_data->param[i];
|
||||
}
|
||||
}
|
||||
|
||||
brw_cs_fill_local_id_payload(cs_prog_data, param, threads,
|
||||
reg_aligned_constant_size);
|
||||
|
||||
/* _NEW_PROGRAM_CONSTANTS */
|
||||
for (t = 0; t < threads; t++) {
|
||||
gl_constant_value *next_param =
|
||||
¶m[t * param_aligned_count + local_id_dwords];
|
||||
for (i = 0; i < prog_data->nr_params; i++) {
|
||||
next_param[i] = *prog_data->param[i];
|
||||
gl_constant_value thread_id;
|
||||
if (cs_prog_data->push.per_thread.size > 0) {
|
||||
for (unsigned t = 0; t < cs_prog_data->threads; t++) {
|
||||
unsigned dst =
|
||||
8 * (cs_prog_data->push.per_thread.regs * t +
|
||||
cs_prog_data->push.cross_thread.regs);
|
||||
unsigned src = cs_prog_data->push.cross_thread.dwords;
|
||||
for ( ; src < prog_data->nr_params; src++, dst++) {
|
||||
if (src != cs_prog_data->thread_local_id_index)
|
||||
param[dst] = *prog_data->param[src];
|
||||
else {
|
||||
thread_id.u = t * cs_prog_data->simd_size;
|
||||
param[dst] = thread_id;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
|
||||
}
|
||||
|
||||
stage_state->push_const_size =
|
||||
cs_prog_data->push.cross_thread.regs +
|
||||
cs_prog_data->push.per_thread.regs;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -123,7 +123,7 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
|
||||
const unsigned components = linked_xfb_info->Outputs[i].NumComponents;
|
||||
unsigned component_mask = (1 << components) - 1;
|
||||
unsigned stream_id = linked_xfb_info->Outputs[i].StreamId;
|
||||
|
||||
unsigned decl_buffer_slot = buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
|
||||
assert(stream_id < MAX_VERTEX_STREAMS);
|
||||
|
||||
/* gl_PointSize is stored in VARYING_SLOT_PSIZ.w
|
||||
@@ -145,7 +145,7 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
|
||||
|
||||
buffer_mask[stream_id] |= 1 << buffer;
|
||||
|
||||
decl |= buffer << SO_DECL_OUTPUT_BUFFER_SLOT_SHIFT;
|
||||
decl |= decl_buffer_slot;
|
||||
if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT) {
|
||||
decl |= vue_map->varying_to_slot[VARYING_SLOT_PSIZ] <<
|
||||
SO_DECL_REGISTER_INDEX_SHIFT;
|
||||
@@ -172,12 +172,14 @@ gen7_upload_3dstate_so_decl_list(struct brw_context *brw,
|
||||
next_offset[buffer] += skip_components;
|
||||
|
||||
while (skip_components >= 4) {
|
||||
so_decl[stream_id][decls[stream_id]++] = SO_DECL_HOLE_FLAG | 0xf;
|
||||
so_decl[stream_id][decls[stream_id]++] =
|
||||
SO_DECL_HOLE_FLAG | 0xf | decl_buffer_slot;
|
||||
skip_components -= 4;
|
||||
}
|
||||
if (skip_components > 0)
|
||||
so_decl[stream_id][decls[stream_id]++] =
|
||||
SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1);
|
||||
SO_DECL_HOLE_FLAG | ((1 << skip_components) - 1) |
|
||||
decl_buffer_slot;
|
||||
|
||||
assert(linked_xfb_info->Outputs[i].DstOffset == next_offset[buffer]);
|
||||
|
||||
|
@@ -40,6 +40,7 @@
|
||||
#include "brw_state.h"
|
||||
#include "brw_defines.h"
|
||||
#include "brw_wm.h"
|
||||
#include "isl/isl.h"
|
||||
|
||||
/**
|
||||
* Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
|
||||
@@ -254,8 +255,18 @@ gen8_emit_texture_surface_state(struct brw_context *brw,
|
||||
* the color buffer should always have been resolved before it is used as
|
||||
* a texture so there is no need for it. On Gen9 it will be uploaded when
|
||||
* the surface is losslessly compressed (CCS_E).
|
||||
* However, sampling engine is not capable of re-interpreting the
|
||||
* underlying color buffer in non-compressible formats when the surface
|
||||
* is configured as compressed. Therefore state upload has made sure the
|
||||
* buffer is in resolved state allowing the surface to be configured as
|
||||
* non-compressed.
|
||||
*/
|
||||
if (mt->num_samples <= 1 && aux_mode != GEN9_SURFACE_AUX_MODE_CCS_E) {
|
||||
if (mt->num_samples <= 1 &&
|
||||
(aux_mode != GEN9_SURFACE_AUX_MODE_CCS_E ||
|
||||
!isl_format_supports_lossless_compression(
|
||||
brw->intelScreen->devinfo, format))) {
|
||||
assert(!mt->mcs_mt ||
|
||||
mt->fast_clear_state == INTEL_FAST_CLEAR_STATE_RESOLVED);
|
||||
aux_mt = NULL;
|
||||
aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
|
||||
}
|
||||
|
@@ -80,6 +80,7 @@ static const struct debug_control debug_control[] = {
|
||||
{ "tes", DEBUG_TES },
|
||||
{ "l3", DEBUG_L3 },
|
||||
{ "do32", DEBUG_DO32 },
|
||||
{ "norbc", DEBUG_NO_RBC },
|
||||
{ NULL, 0 }
|
||||
};
|
||||
|
||||
|
@@ -73,6 +73,7 @@ extern uint64_t INTEL_DEBUG;
|
||||
#define DEBUG_TES (1ull << 37)
|
||||
#define DEBUG_L3 (1ull << 38)
|
||||
#define DEBUG_DO32 (1ull << 39)
|
||||
#define DEBUG_NO_RBC (1ull << 40)
|
||||
|
||||
#ifdef HAVE_ANDROID_PLATFORM
|
||||
#define LOG_TAG "INTEL-MESA"
|
||||
|
@@ -1620,7 +1620,9 @@ intel_miptree_alloc_non_msrt_mcs(struct brw_context *brw,
|
||||
* single-sampled buffers. Disabling compression allows us to skip
|
||||
* resolves.
|
||||
*/
|
||||
const bool lossless_compression_disabled = INTEL_DEBUG & DEBUG_NO_RBC;
|
||||
const bool is_lossless_compressed =
|
||||
unlikely(!lossless_compression_disabled) &&
|
||||
brw->gen >= 9 && !mt->is_scanout &&
|
||||
intel_miptree_supports_lossless_compressed(brw, mt);
|
||||
|
||||
|
@@ -1765,7 +1765,7 @@ _mesa_buffer_sub_data(struct gl_context *ctx, struct gl_buffer_object *bufObj,
|
||||
const char *func)
|
||||
{
|
||||
if (!buffer_object_subdata_range_good(ctx, bufObj, offset, size,
|
||||
false, func)) {
|
||||
true, func)) {
|
||||
/* error already recorded */
|
||||
return;
|
||||
}
|
||||
|
@@ -389,7 +389,8 @@ driver_RenderTexture_is_safe(const struct gl_renderbuffer_attachment *att)
|
||||
const struct gl_texture_image *const texImage =
|
||||
att->Texture->Image[att->CubeMapFace][att->TextureLevel];
|
||||
|
||||
if (texImage->Width == 0 || texImage->Height == 0 || texImage->Depth == 0)
|
||||
if (!texImage ||
|
||||
texImage->Width == 0 || texImage->Height == 0 || texImage->Depth == 0)
|
||||
return false;
|
||||
|
||||
if ((texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY
|
||||
|
@@ -3763,6 +3763,9 @@ struct gl_constants
|
||||
GLuint MaxTessControlTotalOutputComponents;
|
||||
bool LowerTessLevel; /**< Lower gl_TessLevel* from float[n] to vecn? */
|
||||
bool PrimitiveRestartForPatches;
|
||||
bool LowerCsDerivedVariables; /**< Lower gl_GlobalInvocationID and
|
||||
* gl_LocalInvocationIndex based on
|
||||
* other builtin variables. */
|
||||
};
|
||||
|
||||
|
||||
|
@@ -811,10 +811,8 @@ _mesa_SamplerParameteri(GLuint sampler, GLenum pname, GLint param)
|
||||
* "An INVALID_OPERATION error is generated if sampler is not the name
|
||||
* of a sampler object previously returned from a call to GenSamplers."
|
||||
*
|
||||
* In desktop GL, an GL_INVALID_VALUE is returned instead.
|
||||
*/
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glSamplerParameteri(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -904,10 +902,8 @@ _mesa_SamplerParameterf(GLuint sampler, GLenum pname, GLfloat param)
|
||||
* "An INVALID_OPERATION error is generated if sampler is not the name
|
||||
* of a sampler object previously returned from a call to GenSamplers."
|
||||
*
|
||||
* In desktop GL, an GL_INVALID_VALUE is returned instead.
|
||||
*/
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glSamplerParameterf(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -995,11 +991,8 @@ _mesa_SamplerParameteriv(GLuint sampler, GLenum pname, const GLint *params)
|
||||
*
|
||||
* "An INVALID_OPERATION error is generated if sampler is not the name
|
||||
* of a sampler object previously returned from a call to GenSamplers."
|
||||
*
|
||||
* In desktop GL, an GL_INVALID_VALUE is returned instead.
|
||||
*/
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glSamplerParameteriv(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -1096,10 +1089,8 @@ _mesa_SamplerParameterfv(GLuint sampler, GLenum pname, const GLfloat *params)
|
||||
* "An INVALID_OPERATION error is generated if sampler is not the name
|
||||
* of a sampler object previously returned from a call to GenSamplers."
|
||||
*
|
||||
* In desktop GL, an GL_INVALID_VALUE is returned instead.
|
||||
*/
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glSamplerParameterfv(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -1184,8 +1175,7 @@ _mesa_SamplerParameterIiv(GLuint sampler, GLenum pname, const GLint *params)
|
||||
|
||||
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
|
||||
if (!sampObj) {
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glSamplerParameterIiv(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -1271,8 +1261,7 @@ _mesa_SamplerParameterIuiv(GLuint sampler, GLenum pname, const GLuint *params)
|
||||
|
||||
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
|
||||
if (!sampObj) {
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glSamplerParameterIuiv(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -1362,10 +1351,8 @@ _mesa_GetSamplerParameteriv(GLuint sampler, GLenum pname, GLint *params)
|
||||
* "An INVALID_OPERATION error is generated if sampler is not the name
|
||||
* of a sampler object previously returned from a call to GenSamplers."
|
||||
*
|
||||
* In desktop GL, an GL_INVALID_VALUE is returned instead.
|
||||
*/
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glGetSamplerParameteriv(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -1456,10 +1443,8 @@ _mesa_GetSamplerParameterfv(GLuint sampler, GLenum pname, GLfloat *params)
|
||||
* "An INVALID_OPERATION error is generated if sampler is not the name
|
||||
* of a sampler object previously returned from a call to GenSamplers."
|
||||
*
|
||||
* In desktop GL, an GL_INVALID_VALUE is returned instead.
|
||||
*/
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glGetSamplerParameterfv(sampler %u)", sampler);
|
||||
return;
|
||||
}
|
||||
@@ -1533,8 +1518,7 @@ _mesa_GetSamplerParameterIiv(GLuint sampler, GLenum pname, GLint *params)
|
||||
|
||||
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
|
||||
if (!sampObj) {
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glGetSamplerParameterIiv(sampler %u)",
|
||||
sampler);
|
||||
return;
|
||||
@@ -1609,8 +1593,7 @@ _mesa_GetSamplerParameterIuiv(GLuint sampler, GLenum pname, GLuint *params)
|
||||
|
||||
sampObj = _mesa_lookup_samplerobj(ctx, sampler);
|
||||
if (!sampObj) {
|
||||
_mesa_error(ctx, (_mesa_is_gles(ctx) ?
|
||||
GL_INVALID_OPERATION : GL_INVALID_VALUE),
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION,
|
||||
"glGetSamplerParameterIuiv(sampler %u)",
|
||||
sampler);
|
||||
return;
|
||||
|
@@ -505,7 +505,7 @@ _mesa_program_resource_find_name(struct gl_shader_program *shProg,
|
||||
if (rname_last_square_bracket) {
|
||||
baselen_without_array_index -= strlen(rname_last_square_bracket);
|
||||
rname_has_array_index_zero =
|
||||
(strncmp(rname_last_square_bracket, "[0]\0", 4) == 0) &&
|
||||
(strcmp(rname_last_square_bracket, "[0]") == 0) &&
|
||||
(baselen_without_array_index == strlen(name));
|
||||
}
|
||||
|
||||
|
@@ -232,7 +232,7 @@ init_shader_program(struct gl_shader_program *prog)
|
||||
prog->FragDataBindings = string_to_uint_map_ctor();
|
||||
prog->FragDataIndexBindings = string_to_uint_map_ctor();
|
||||
|
||||
prog->Geom.VerticesOut = 0;
|
||||
prog->Geom.VerticesOut = -1;
|
||||
prog->Geom.InputType = GL_TRIANGLES;
|
||||
prog->Geom.OutputType = GL_TRIANGLE_STRIP;
|
||||
prog->Geom.UsesEndPrimitive = false;
|
||||
|
@@ -314,6 +314,7 @@ void st_init_limits(struct pipe_screen *screen,
|
||||
}
|
||||
|
||||
c->LowerTessLevel = true;
|
||||
c->LowerCsDerivedVariables = true;
|
||||
c->PrimitiveRestartForPatches =
|
||||
screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES);
|
||||
|
||||
|
Reference in New Issue
Block a user