Compare commits
104 Commits
mesa-23.1.
...
mesa-17.0.
Author | SHA1 | Date | |
---|---|---|---|
|
07571cd8cc | ||
|
2fc362f147 | ||
|
89b51c7e43 | ||
|
ac2337ee38 | ||
|
77ec080710 | ||
|
eadbc95d64 | ||
|
69ec90ad24 | ||
|
7abecef5c3 | ||
|
5d470a68e6 | ||
|
3df060d953 | ||
|
34cd53ca8c | ||
|
05d1c8aa02 | ||
|
ca222b7c18 | ||
|
6c89a728d9 | ||
|
f3b7a51383 | ||
|
9ecfbafedb | ||
|
eaf311d90d | ||
|
bbb4562def | ||
|
7083ca2625 | ||
|
8917af11f7 | ||
|
b7f7dc7231 | ||
|
301c9b96f2 | ||
|
06b9bc66d5 | ||
|
270597d13f | ||
|
671dfe51a0 | ||
|
d7d772f903 | ||
|
522ee2cd7d | ||
|
929b3bb6fe | ||
|
e6ea92b263 | ||
|
27e7e7e7e3 | ||
|
3919feee55 | ||
|
6ee946862c | ||
|
4e20356a6c | ||
|
5236ab7bac | ||
|
4ea4e19ccb | ||
|
fad44e6aea | ||
|
a817d1e227 | ||
|
44ba34817c | ||
|
586b009cfe | ||
|
89ce0721eb | ||
|
87fc95c94c | ||
|
042b3445b2 | ||
|
0a1ad5c916 | ||
|
fe1b2f7341 | ||
|
c22ee800d2 | ||
|
e79043bbb9 | ||
|
f14926027c | ||
|
23ffeed7e0 | ||
|
1e03b5e566 | ||
|
58952675f6 | ||
|
fe44c532b2 | ||
|
939c0c82e5 | ||
|
7c663b1d5e | ||
|
2554c98d70 | ||
|
31715781c6 | ||
|
ebfe5e17ee | ||
|
dcb3b24b86 | ||
|
5a806f7def | ||
|
c63652b8ac | ||
|
651861d862 | ||
|
d701877fb0 | ||
|
b2bbfca79f | ||
|
140ad270c8 | ||
|
517fc3ef78 | ||
|
450f6aa5b2 | ||
|
d940b91f94 | ||
|
63f169d5d0 | ||
|
d283ec0a7b | ||
|
9577977266 | ||
|
8621961d43 | ||
|
7d5a98f106 | ||
|
4e6445caa9 | ||
|
e405d0d3c6 | ||
|
0c4b8c75e2 | ||
|
e35cfa15cf | ||
|
34f902e17e | ||
|
e4cf4690d1 | ||
|
7f6c6b9101 | ||
|
23ead4c7b2 | ||
|
7856dfdbab | ||
|
41b93b1fe0 | ||
|
8857256214 | ||
|
516b34908d | ||
|
0645c0e0d4 | ||
|
6aade42111 | ||
|
4dc6ed53c1 | ||
|
cfe14ab39c | ||
|
250b1cad3b | ||
|
9318d81574 | ||
|
00cdbfe6ef | ||
|
83deab2f6a | ||
|
de2dfa1dc3 | ||
|
5c2951c7f9 | ||
|
e3bfa959a8 | ||
|
a259b800df | ||
|
45f13c2be0 | ||
|
b72f8de873 | ||
|
1cc5774e5e | ||
|
50a607cf70 | ||
|
613154fc8f | ||
|
ff81869f0d | ||
|
9cb066601c | ||
|
45297f7e4a | ||
|
acc7837799 |
@@ -43,6 +43,7 @@ LOCAL_CFLAGS += \
|
||||
-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
|
||||
|
||||
LOCAL_CFLAGS += \
|
||||
-DENABLE_SHADER_CACHE \
|
||||
-D__STDC_LIMIT_MACROS \
|
||||
-DHAVE___BUILTIN_EXPECT \
|
||||
-DHAVE___BUILTIN_FFS \
|
||||
@@ -77,10 +78,22 @@ endif
|
||||
|
||||
ifeq ($(MESA_ENABLE_LLVM),true)
|
||||
LOCAL_CFLAGS += \
|
||||
-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
|
||||
-D__STDC_CONSTANT_MACROS \
|
||||
-D__STDC_FORMAT_MACROS \
|
||||
-D__STDC_LIMIT_MACROS
|
||||
|
||||
ifeq ($(MESA_ANDROID_MAJOR_VERSION),5)
|
||||
LOCAL_CFLAGS += -DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2
|
||||
ELF_INCLUDES := external/elfutils/0.153/libelf
|
||||
endif
|
||||
ifeq ($(MESA_ANDROID_MAJOR_VERSION),6)
|
||||
LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0
|
||||
ELF_INCLUDES := external/elfutils/src/libelf
|
||||
endif
|
||||
ifeq ($(MESA_ANDROID_MAJOR_VERSION),7)
|
||||
LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0
|
||||
ELF_INCLUDES := external/elfutils/libelf
|
||||
endif
|
||||
endif
|
||||
|
||||
ifneq ($(LOCAL_IS_HOST_MODULE),true)
|
||||
|
50
configure.ac
50
configure.ac
@@ -1436,6 +1436,22 @@ if test "x$enable_gallium_osmesa" = xyes; then
|
||||
fi
|
||||
fi
|
||||
|
||||
require_dri_shared_libs_and_glapi() {
|
||||
if test "x$enable_static" = xyes; then
|
||||
AC_MSG_ERROR([$1 cannot be build as static library])
|
||||
fi
|
||||
|
||||
if test "x$enable_dri" != xyes; then
|
||||
# There is only a single backend which won't be build/used otherwise.
|
||||
# XXX: Revisit this as the egl/haiku is a thing.
|
||||
AC_MSG_ERROR([$1 requires --enable-dri])
|
||||
fi
|
||||
|
||||
if test "x$enable_shared_glapi" != xyes; then
|
||||
AC_MSG_ERROR([$1 requires --enable-shared-glapi])
|
||||
fi
|
||||
}
|
||||
|
||||
if test "x$enable_dri" = xyes; then
|
||||
require_dri_shared_libs_and_glapi "DRI"
|
||||
|
||||
@@ -1722,7 +1738,7 @@ fi
|
||||
AC_ARG_WITH([vulkan-drivers],
|
||||
[AS_HELP_STRING([--with-vulkan-drivers@<:@=DIRS...@:>@],
|
||||
[comma delimited Vulkan drivers list, e.g.
|
||||
"intel"
|
||||
"intel,radeon"
|
||||
@<:@default=no@:>@])],
|
||||
[with_vulkan_drivers="$withval"],
|
||||
[with_vulkan_drivers="no"])
|
||||
@@ -1766,6 +1782,7 @@ if test -n "$with_vulkan_drivers"; then
|
||||
fi
|
||||
|
||||
|
||||
DEFINES="$DEFINES -DENABLE_SHADER_CACHE"
|
||||
AM_CONDITIONAL(NEED_MEGADRIVER, test -n "$DRI_DIRS")
|
||||
AM_CONDITIONAL(NEED_LIBMESA, test "x$enable_glx" = xxlib -o \
|
||||
"x$enable_osmesa" = xyes -o \
|
||||
@@ -1814,22 +1831,6 @@ AC_SUBST([OSMESA_LIB_DEPS])
|
||||
AC_SUBST([OSMESA_PC_REQ])
|
||||
AC_SUBST([OSMESA_PC_LIB_PRIV])
|
||||
|
||||
require_dri_shared_libs_and_glapi() {
|
||||
if test "x$enable_static" = xyes; then
|
||||
AC_MSG_ERROR([$1 cannot be build as static library])
|
||||
fi
|
||||
|
||||
if test "x$enable_dri" != xyes; then
|
||||
# There is only a single backend which won't be build/used otherwise.
|
||||
# XXX: Revisit this as the egl/haiku is a thing.
|
||||
AC_MSG_ERROR([$1 requires --enable-dri])
|
||||
fi
|
||||
|
||||
if test "x$enable_shared_glapi" != xyes; then
|
||||
AC_MSG_ERROR([$1 requires --enable-shared-glapi])
|
||||
fi
|
||||
}
|
||||
|
||||
dnl
|
||||
dnl gbm configuration
|
||||
dnl
|
||||
@@ -2211,6 +2212,19 @@ gallium_require_llvm() {
|
||||
fi
|
||||
}
|
||||
|
||||
dnl
|
||||
dnl r300 doesn't strictly require LLVM, but for performance reasons we
|
||||
dnl highly recommend LLVM usage. So require it at least on x86 and x86_64
|
||||
dnl architectures.
|
||||
dnl
|
||||
r300_require_llvm() {
|
||||
case "$host" in *gnux32) return;; esac
|
||||
case "$host_cpu" in
|
||||
i*86|x86_64|amd64) gallium_require_llvm $1
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
||||
dnl
|
||||
dnl DRM is needed by X, Wayland, and offscreen rendering.
|
||||
dnl Surfaceless is an alternative for the last one.
|
||||
@@ -2297,7 +2311,7 @@ if test -n "$with_gallium_drivers"; then
|
||||
HAVE_GALLIUM_R300=yes
|
||||
PKG_CHECK_MODULES([RADEON], [libdrm_radeon >= $LIBDRM_RADEON_REQUIRED])
|
||||
require_libdrm "r300"
|
||||
gallium_require_llvm "r300"
|
||||
r300_require_llvm "r300"
|
||||
;;
|
||||
xr600)
|
||||
HAVE_GALLIUM_R600=yes
|
||||
|
@@ -55,7 +55,7 @@ LOCAL_C_INCLUDES := \
|
||||
external/llvm/include \
|
||||
external/llvm/device/include \
|
||||
external/libcxx/include \
|
||||
external/elfutils/$(if $(filter 5,$(MESA_ANDROID_MAJOR_VERSION)),0.153/,$(if $(filter 6,$(MESA_ANDROID_MAJOR_VERSION)),src/))libelf
|
||||
$(ELF_INCLUDES)
|
||||
|
||||
LOCAL_STATIC_LIBRARIES := libLLVMCore
|
||||
|
||||
|
@@ -1267,6 +1267,9 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
|
||||
src[1] = to_float(ctx, src[1]);
|
||||
result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
|
||||
break;
|
||||
case nir_op_irem:
|
||||
result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
|
||||
break;
|
||||
case nir_op_idiv:
|
||||
result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
|
||||
break;
|
||||
@@ -1745,9 +1748,12 @@ static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
|
||||
static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
LLVMValueRef ptr;
|
||||
LLVMValueRef ptr, addr;
|
||||
|
||||
ptr = build_gep0(ctx, ctx->push_constants, get_src(ctx, instr->src[0]));
|
||||
addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
|
||||
addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
|
||||
|
||||
ptr = build_gep0(ctx, ctx->push_constants, addr);
|
||||
ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
|
||||
|
||||
return LLVMBuildLoad(ctx->builder, ptr, "");
|
||||
@@ -2238,7 +2244,7 @@ static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
|
||||
}
|
||||
|
||||
static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
|
||||
nir_intrinsic_instr *instr, bool add_frag_pos)
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
const struct glsl_type *type = instr->variables[0]->var->type;
|
||||
if(instr->variables[0]->deref.child)
|
||||
@@ -2253,6 +2259,8 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
|
||||
LLVMValueRef res;
|
||||
int count;
|
||||
enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
|
||||
bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
|
||||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
|
||||
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
|
||||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
|
||||
|
||||
@@ -2378,12 +2386,11 @@ static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
|
||||
} else {
|
||||
bool is_da = glsl_sampler_type_is_array(type) ||
|
||||
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
|
||||
bool add_frag_pos = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS;
|
||||
LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
|
||||
LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
|
||||
LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
|
||||
|
||||
params[0] = get_image_coords(ctx, instr, add_frag_pos);
|
||||
params[0] = get_image_coords(ctx, instr);
|
||||
params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
|
||||
params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
|
||||
if (HAVE_LLVM <= 0x0309) {
|
||||
@@ -2442,7 +2449,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
|
||||
LLVMValueRef slc = i1false;
|
||||
|
||||
params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
|
||||
params[1] = get_image_coords(ctx, instr, false); /* coords */
|
||||
params[1] = get_image_coords(ctx, instr); /* coords */
|
||||
params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
|
||||
params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
|
||||
if (HAVE_LLVM <= 0x0309) {
|
||||
@@ -2502,7 +2509,7 @@ static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
|
||||
bool da = glsl_sampler_type_is_array(type) ||
|
||||
glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
|
||||
|
||||
coords = params[param_count++] = get_image_coords(ctx, instr, false);
|
||||
coords = params[param_count++] = get_image_coords(ctx, instr);
|
||||
params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
|
||||
params[param_count++] = i1false; /* r128 */
|
||||
params[param_count++] = da ? i1true : i1false; /* da */
|
||||
@@ -3154,6 +3161,15 @@ static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
|
||||
*fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
|
||||
}
|
||||
|
||||
static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
|
||||
LLVMValueRef coord)
|
||||
{
|
||||
coord = to_float(ctx, coord);
|
||||
coord = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
|
||||
coord = to_integer(ctx, coord);
|
||||
return coord;
|
||||
}
|
||||
|
||||
static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
|
||||
{
|
||||
LLVMValueRef result = NULL;
|
||||
@@ -3211,6 +3227,11 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
|
||||
}
|
||||
}
|
||||
|
||||
if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
result = get_buffer_size(ctx, res_ptr, false);
|
||||
goto write_result;
|
||||
}
|
||||
|
||||
if (instr->op == nir_texop_texture_samples) {
|
||||
LLVMValueRef res, samples, is_msaa;
|
||||
res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
|
||||
@@ -3310,15 +3331,16 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
|
||||
/* Pack texture coordinates */
|
||||
if (coord) {
|
||||
address[count++] = coords[0];
|
||||
if (instr->coord_components > 1)
|
||||
if (instr->coord_components > 1) {
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
|
||||
coords[1] = apply_round_slice(ctx, coords[1]);
|
||||
}
|
||||
address[count++] = coords[1];
|
||||
}
|
||||
if (instr->coord_components > 2) {
|
||||
/* This seems like a bit of a hack - but it passes Vulkan CTS with it */
|
||||
if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
|
||||
coords[2] = to_float(ctx, coords[2]);
|
||||
coords[2] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coords[2],
|
||||
1, 0);
|
||||
coords[2] = to_integer(ctx, coords[2]);
|
||||
coords[2] = apply_round_slice(ctx, coords[2]);
|
||||
}
|
||||
address[count++] = coords[2];
|
||||
}
|
||||
|
@@ -21,9 +21,7 @@
|
||||
|
||||
include Makefile.sources
|
||||
|
||||
vulkan_includedir = $(includedir)/vulkan
|
||||
|
||||
vulkan_include_HEADERS = \
|
||||
noinst_HEADERS = \
|
||||
$(top_srcdir)/include/vulkan/vk_platform.h \
|
||||
$(top_srcdir)/include/vulkan/vulkan.h
|
||||
|
||||
@@ -32,9 +30,6 @@ lib_LTLIBRARIES = libvulkan_radeon.la
|
||||
# The gallium includes are for the util/u_math.h include from main/macros.h
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
$(AMDGPU_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(top_srcdir)/src \
|
||||
@@ -48,7 +43,10 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/include
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
$(AMDGPU_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES)
|
||||
|
||||
AM_CFLAGS = \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
|
@@ -438,7 +438,8 @@ radv_emit_graphics_raster_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
raster->spi_interp_control);
|
||||
|
||||
radeon_set_context_reg_seq(cmd_buffer->cs, R_028A00_PA_SU_POINT_SIZE, 2);
|
||||
radeon_emit(cmd_buffer->cs, 0);
|
||||
unsigned tmp = (unsigned)(1.0 * 8.0);
|
||||
radeon_emit(cmd_buffer->cs, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp));
|
||||
radeon_emit(cmd_buffer->cs, S_028A04_MIN_SIZE(radv_pack_float_12p4(0)) |
|
||||
S_028A04_MAX_SIZE(radv_pack_float_12p4(8192/2))); /* R_028A04_PA_SU_POINT_MINMAX */
|
||||
|
||||
@@ -2605,6 +2606,7 @@ void radv_CmdPipelineBarrier(
|
||||
break;
|
||||
case VK_ACCESS_COLOR_ATTACHMENT_READ_BIT:
|
||||
case VK_ACCESS_TRANSFER_READ_BIT:
|
||||
case VK_ACCESS_TRANSFER_WRITE_BIT:
|
||||
case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:
|
||||
flush_bits |= RADV_CMD_FLUSH_AND_INV_FRAMEBUFFER | RADV_CMD_FLAG_INV_GLOBAL_L2;
|
||||
default:
|
||||
|
@@ -989,8 +989,7 @@ VkResult radv_QueueSubmit(
|
||||
if (queue->device->trace_bo)
|
||||
*queue->device->trace_id_ptr = 0;
|
||||
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array,
|
||||
pSubmits[i].commandBufferCount,
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
|
||||
b ? pSubmits[i].waitSemaphoreCount : 0,
|
||||
(struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
|
||||
|
@@ -30,7 +30,7 @@
|
||||
|
||||
/* Operations for lower_instructions() */
|
||||
#define SUB_TO_ADD_NEG 0x01
|
||||
#define DIV_TO_MUL_RCP 0x02
|
||||
#define FDIV_TO_MUL_RCP 0x02
|
||||
#define EXP_TO_EXP2 0x04
|
||||
#define POW_TO_EXP2 0x08
|
||||
#define LOG_TO_LOG2 0x10
|
||||
@@ -49,6 +49,8 @@
|
||||
#define FIND_LSB_TO_FLOAT_CAST 0x20000
|
||||
#define FIND_MSB_TO_FLOAT_CAST 0x40000
|
||||
#define IMUL_HIGH_TO_MUL 0x80000
|
||||
#define DDIV_TO_MUL_RCP 0x100000
|
||||
#define DIV_TO_MUL_RCP (FDIV_TO_MUL_RCP | DDIV_TO_MUL_RCP)
|
||||
|
||||
/**
|
||||
* \see class lower_packing_builtins_visitor
|
||||
|
@@ -535,7 +535,7 @@ private:
|
||||
const char *str_end;
|
||||
while((str_start = strchr(name_copy, '[')) &&
|
||||
(str_end = strchr(name_copy, ']'))) {
|
||||
memmove(str_start, str_end + 1, 1 + strlen(str_end));
|
||||
memmove(str_start, str_end + 1, 1 + strlen(str_end + 1));
|
||||
}
|
||||
|
||||
unsigned index = 0;
|
||||
|
@@ -54,8 +54,8 @@
|
||||
* want to recognize add(op0, neg(op1)) or the other way around to
|
||||
* produce a subtract anyway.
|
||||
*
|
||||
* DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP:
|
||||
* --------------------------------------
|
||||
* FDIV_TO_MUL_RCP, DDIV_TO_MUL_RCP, and INT_DIV_TO_MUL_RCP:
|
||||
* ---------------------------------------------------------
|
||||
* Breaks an ir_binop_div expression down to op0 * (rcp(op1)).
|
||||
*
|
||||
* Many GPUs don't have a divide instruction (945 and 965 included),
|
||||
@@ -63,9 +63,11 @@
|
||||
* reciprocal. By breaking the operation down, constant reciprocals
|
||||
* can get constant folded.
|
||||
*
|
||||
* DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP
|
||||
* handles the integer case, converting to and from floating point so that
|
||||
* RCP is possible.
|
||||
* FDIV_TO_MUL_RCP only lowers single-precision floating point division;
|
||||
* DDIV_TO_MUL_RCP only lowers double-precision floating point division.
|
||||
* DIV_TO_MUL_RCP is a convenience macro that sets both flags.
|
||||
* INT_DIV_TO_MUL_RCP handles the integer case, converting to and from floating
|
||||
* point so that RCP is possible.
|
||||
*
|
||||
* EXP_TO_EXP2 and LOG_TO_LOG2:
|
||||
* ----------------------------
|
||||
@@ -326,7 +328,8 @@ lower_instructions_visitor::mod_to_floor(ir_expression *ir)
|
||||
/* Don't generate new IR that would need to be lowered in an additional
|
||||
* pass.
|
||||
*/
|
||||
if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double()))
|
||||
if ((lowering(FDIV_TO_MUL_RCP) && ir->type->is_float()) ||
|
||||
(lowering(DDIV_TO_MUL_RCP) && ir->type->is_double()))
|
||||
div_to_mul_rcp(div_expr);
|
||||
|
||||
ir_expression *const floor_expr =
|
||||
@@ -1599,8 +1602,8 @@ lower_instructions_visitor::visit_leave(ir_expression *ir)
|
||||
case ir_binop_div:
|
||||
if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP))
|
||||
int_div_to_mul_rcp(ir);
|
||||
else if ((ir->operands[1]->type->is_float() ||
|
||||
ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP))
|
||||
else if ((ir->operands[1]->type->is_float() && lowering(FDIV_TO_MUL_RCP)) ||
|
||||
(ir->operands[1]->type->is_double() && lowering(DDIV_TO_MUL_RCP)))
|
||||
div_to_mul_rcp(ir);
|
||||
break;
|
||||
|
||||
|
@@ -37,6 +37,8 @@
|
||||
|
||||
bool error = false;
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
|
||||
static void
|
||||
expect_equal(uint64_t actual, uint64_t expected, const char *test)
|
||||
{
|
||||
@@ -378,10 +380,12 @@ test_put_key_and_get_key(void)
|
||||
|
||||
disk_cache_destroy(cache);
|
||||
}
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
int err;
|
||||
|
||||
test_disk_cache_create();
|
||||
@@ -392,6 +396,7 @@ main(void)
|
||||
|
||||
err = rmrf_local(CACHE_TEST_TMP);
|
||||
expect_equal(err, 0, "Removing " CACHE_TEST_TMP " again");
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
return error ? 1 : 0;
|
||||
}
|
||||
|
@@ -210,43 +210,27 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
return true;
|
||||
|
||||
case nir_type_int:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
int64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.i32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.i64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
|
||||
if (val != const_val->data.i)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
case nir_type_uint:
|
||||
case nir_type_bool32:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
uint64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.u32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.u64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.u32[new_swizzle[i]] !=
|
||||
(uint32_t)const_val->data.u)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
if (val != const_val->data.u)
|
||||
return false;
|
||||
case 64:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.u64[new_swizzle[i]] != const_val->data.u)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("Invalid alu source type");
|
||||
|
@@ -1102,23 +1102,43 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
|
||||
SpvOp opcode = get_specialization(b, val, w[3]);
|
||||
switch (opcode) {
|
||||
case SpvOpVectorShuffle: {
|
||||
struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
|
||||
struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
|
||||
unsigned len0 = glsl_get_vector_elements(v0->const_type);
|
||||
unsigned len1 = glsl_get_vector_elements(v1->const_type);
|
||||
struct vtn_value *v0 = &b->values[w[4]];
|
||||
struct vtn_value *v1 = &b->values[w[5]];
|
||||
|
||||
assert(v0->value_type == vtn_value_type_constant ||
|
||||
v0->value_type == vtn_value_type_undef);
|
||||
assert(v1->value_type == vtn_value_type_constant ||
|
||||
v1->value_type == vtn_value_type_undef);
|
||||
|
||||
unsigned len0 = v0->value_type == vtn_value_type_constant ?
|
||||
glsl_get_vector_elements(v0->const_type) :
|
||||
glsl_get_vector_elements(v0->type->type);
|
||||
unsigned len1 = v1->value_type == vtn_value_type_constant ?
|
||||
glsl_get_vector_elements(v1->const_type) :
|
||||
glsl_get_vector_elements(v1->type->type);
|
||||
|
||||
assert(len0 + len1 < 16);
|
||||
|
||||
unsigned bit_size = glsl_get_bit_size(val->const_type);
|
||||
assert(bit_size == glsl_get_bit_size(v0->const_type) &&
|
||||
bit_size == glsl_get_bit_size(v1->const_type));
|
||||
unsigned bit_size0 = v0->value_type == vtn_value_type_constant ?
|
||||
glsl_get_bit_size(v0->const_type) :
|
||||
glsl_get_bit_size(v0->type->type);
|
||||
unsigned bit_size1 = v1->value_type == vtn_value_type_constant ?
|
||||
glsl_get_bit_size(v1->const_type) :
|
||||
glsl_get_bit_size(v1->type->type);
|
||||
|
||||
assert(bit_size == bit_size0 && bit_size == bit_size1);
|
||||
|
||||
if (bit_size == 64) {
|
||||
uint64_t u64[8];
|
||||
for (unsigned i = 0; i < len0; i++)
|
||||
u64[i] = v0->constant->values[0].u64[i];
|
||||
for (unsigned i = 0; i < len1; i++)
|
||||
u64[len0 + i] = v1->constant->values[0].u64[i];
|
||||
if (v0->value_type == vtn_value_type_constant) {
|
||||
for (unsigned i = 0; i < len0; i++)
|
||||
u64[i] = v0->constant->values[0].u64[i];
|
||||
}
|
||||
if (v1->value_type == vtn_value_type_constant) {
|
||||
for (unsigned i = 0; i < len1; i++)
|
||||
u64[len0 + i] = v1->constant->values[0].u64[i];
|
||||
}
|
||||
|
||||
for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
|
||||
uint32_t comp = w[i + 6];
|
||||
@@ -1132,11 +1152,14 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
|
||||
}
|
||||
} else {
|
||||
uint32_t u32[8];
|
||||
for (unsigned i = 0; i < len0; i++)
|
||||
u32[i] = v0->constant->values[0].u32[i];
|
||||
|
||||
for (unsigned i = 0; i < len1; i++)
|
||||
u32[len0 + i] = v1->constant->values[0].u32[i];
|
||||
if (v0->value_type == vtn_value_type_constant) {
|
||||
for (unsigned i = 0; i < len0; i++)
|
||||
u32[i] = v0->constant->values[0].u32[i];
|
||||
}
|
||||
if (v1->value_type == vtn_value_type_constant) {
|
||||
for (unsigned i = 0; i < len1; i++)
|
||||
u32[len0 + i] = v1->constant->values[0].u32[i];
|
||||
}
|
||||
|
||||
for (unsigned i = 0, j = 0; i < count - 6; i++, j++) {
|
||||
uint32_t comp = w[i + 6];
|
||||
@@ -2902,6 +2925,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
|
||||
vtn_handle_constant(b, opcode, w, count);
|
||||
break;
|
||||
|
||||
case SpvOpUndef:
|
||||
case SpvOpVariable:
|
||||
vtn_handle_variables(b, opcode, w, count);
|
||||
break;
|
||||
|
@@ -1199,7 +1199,8 @@ var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member,
|
||||
is_vertex_input = false;
|
||||
location += vtn_var->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0;
|
||||
} else {
|
||||
unreachable("Location must be on input or output variable");
|
||||
vtn_warn("Location must be on input or output variable");
|
||||
return;
|
||||
}
|
||||
|
||||
if (vtn_var->var) {
|
||||
@@ -1267,6 +1268,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SpvOpUndef: {
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
|
||||
val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpVariable: {
|
||||
struct vtn_variable *var = rzalloc(b, struct vtn_variable);
|
||||
var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
|
@@ -96,8 +96,8 @@ AM_CFLAGS += \
|
||||
-I$(top_srcdir)/src/egl/drivers/dri2 \
|
||||
-I$(top_srcdir)/src/gbm/backends/dri \
|
||||
-I$(top_srcdir)/src/egl/wayland/wayland-egl \
|
||||
-I$(top_srcdir)/src/egl/wayland/wayland-drm \
|
||||
-I$(top_builddir)/src/egl/wayland/wayland-drm \
|
||||
-I$(top_srcdir)/src/egl/wayland/wayland-drm \
|
||||
-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
|
||||
-D_EGL_BUILT_IN_DRIVER_DRI2
|
||||
|
||||
|
@@ -34,7 +34,7 @@ LOCAL_C_INCLUDES += \
|
||||
external/llvm/include \
|
||||
external/llvm/device/include \
|
||||
external/libcxx/include \
|
||||
external/elfutils/$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),0.153/)libelf
|
||||
$(ELF_INCLUDES)
|
||||
endif
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
|
@@ -527,7 +527,7 @@ lp_build_gather(struct gallivm_state *gallivm,
|
||||
if (vec_zext) {
|
||||
res = LLVMBuildZExt(gallivm->builder, res, res_t, "");
|
||||
if (vector_justify) {
|
||||
#if PIPE_ARCH_BIG_ENDIAN
|
||||
#ifdef PIPE_ARCH_BIG_ENDIAN
|
||||
unsigned sv = dst_type.width - src_width;
|
||||
res = LLVMBuildShl(gallivm->builder, res,
|
||||
lp_build_const_int_vec(gallivm, res_type, sv), "");
|
||||
|
@@ -2624,7 +2624,6 @@ lp_set_default_actions_cpu(
|
||||
bld_base->op_actions[TGSI_OPCODE_DSLT].emit = dslt_emit_cpu;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSNE].emit = dsne_emit_cpu;
|
||||
|
||||
bld_base->op_actions[TGSI_OPCODE_DDIV].emit = div_emit_cpu;
|
||||
bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = drecip_sqrt_emit_cpu;
|
||||
bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = dsqrt_emit_cpu;
|
||||
|
||||
|
@@ -149,6 +149,7 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
|
||||
break;
|
||||
case CPUFREQ_MAXIMUM:
|
||||
snprintf(gr->name, sizeof(gr->name), "%s-Max", cfi->name);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
@@ -209,6 +209,16 @@ micro_dadd(union tgsi_double_channel *dst,
|
||||
dst->d[3] = src[0].d[3] + src[1].d[3];
|
||||
}
|
||||
|
||||
static void
|
||||
micro_ddiv(union tgsi_double_channel *dst,
|
||||
const union tgsi_double_channel *src)
|
||||
{
|
||||
dst->d[0] = src[0].d[0] / src[1].d[0];
|
||||
dst->d[1] = src[0].d[1] / src[1].d[1];
|
||||
dst->d[2] = src[0].d[2] / src[1].d[2];
|
||||
dst->d[3] = src[0].d[3] / src[1].d[3];
|
||||
}
|
||||
|
||||
static void
|
||||
micro_ddx(union tgsi_exec_channel *dst,
|
||||
const union tgsi_exec_channel *src)
|
||||
@@ -5995,6 +6005,10 @@ exec_instruction(
|
||||
exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DDIV:
|
||||
exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
|
||||
break;
|
||||
|
||||
case TGSI_OPCODE_DMUL:
|
||||
exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
|
||||
break;
|
||||
|
@@ -1021,7 +1021,7 @@ label_mark_use(struct etna_compile *c, struct etna_compile_label *label)
|
||||
static struct etna_compile_frame *
|
||||
find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
|
||||
{
|
||||
for (unsigned sp = c->frame_sp; sp >= 0; sp--)
|
||||
for (int sp = c->frame_sp; sp >= 0; sp--)
|
||||
if (c->frame_stack[sp].type == type)
|
||||
return &c->frame_stack[sp];
|
||||
|
||||
@@ -1444,7 +1444,42 @@ static void
|
||||
trans_trig(const struct instr_translater *t, struct etna_compile *c,
|
||||
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
|
||||
{
|
||||
if (c->specs->has_sin_cos_sqrt) {
|
||||
if (c->specs->has_new_sin_cos) { /* Alternative SIN/COS */
|
||||
/* On newer chips alternative SIN/COS instructions are implemented,
|
||||
* which:
|
||||
* - Need their input scaled by 1/pi instead of 2/pi
|
||||
* - Output an x and y component, which need to be multiplied to
|
||||
* get the result
|
||||
*/
|
||||
/* TGSI lowering should deal with SCS */
|
||||
assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
|
||||
|
||||
struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_MUL,
|
||||
.sat = 0,
|
||||
.dst = etna_native_to_dst(temp, INST_COMPS_Z),
|
||||
.src[0] = src[0], /* any swizzling happens here */
|
||||
.src[1] = alloc_imm_f32(c, 1.0f / M_PI),
|
||||
});
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
|
||||
? INST_OPCODE_COS
|
||||
: INST_OPCODE_SIN,
|
||||
.sat = 0,
|
||||
.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
|
||||
.src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
|
||||
.tex = { .amode=1 }, /* Unknown bit needs to be set */
|
||||
});
|
||||
emit_inst(c, &(struct etna_inst) {
|
||||
.opcode = INST_OPCODE_MUL,
|
||||
.sat = inst->Instruction.Saturate,
|
||||
.dst = convert_dst(c, &inst->Dst[0]),
|
||||
.src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
|
||||
.src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
|
||||
});
|
||||
|
||||
} else if (c->specs->has_sin_cos_sqrt) {
|
||||
/* TGSI lowering should deal with SCS */
|
||||
assert(inst->Instruction.Opcode != TGSI_OPCODE_SCS);
|
||||
|
||||
|
@@ -491,6 +491,23 @@ etna_emit_state(struct etna_context *ctx)
|
||||
/*00C14*/ EMIT_STATE(SE_DEPTH_BIAS, rasterizer->SE_DEPTH_BIAS);
|
||||
/*00C18*/ EMIT_STATE(SE_CONFIG, rasterizer->SE_CONFIG);
|
||||
}
|
||||
if (unlikely(dirty & (ETNA_DIRTY_SCISSOR | ETNA_DIRTY_FRAMEBUFFER |
|
||||
ETNA_DIRTY_RASTERIZER | ETNA_DIRTY_VIEWPORT))) {
|
||||
struct etna_rasterizer_state *rasterizer = etna_rasterizer_state(ctx->rasterizer);
|
||||
|
||||
uint32_t clip_right =
|
||||
MIN2(ctx->framebuffer.SE_CLIP_RIGHT, ctx->viewport.SE_CLIP_RIGHT);
|
||||
uint32_t clip_bottom =
|
||||
MIN2(ctx->framebuffer.SE_CLIP_BOTTOM, ctx->viewport.SE_CLIP_BOTTOM);
|
||||
|
||||
if (rasterizer->scissor) {
|
||||
clip_right = MIN2(ctx->scissor.SE_CLIP_RIGHT, clip_right);
|
||||
clip_bottom = MIN2(ctx->scissor.SE_CLIP_BOTTOM, clip_bottom);
|
||||
}
|
||||
|
||||
/*00C20*/ EMIT_STATE_FIXP(SE_CLIP_RIGHT, clip_right);
|
||||
/*00C24*/ EMIT_STATE_FIXP(SE_CLIP_BOTTOM, clip_bottom);
|
||||
}
|
||||
if (unlikely(dirty & (ETNA_DIRTY_SHADER))) {
|
||||
/*00E00*/ EMIT_STATE(RA_CONTROL, ctx->shader_state.RA_CONTROL);
|
||||
}
|
||||
|
@@ -47,6 +47,17 @@
|
||||
/* PE render targets must be aligned to 64 bytes */
|
||||
#define ETNA_PE_ALIGNMENT (64)
|
||||
|
||||
/* These demarcate the margin (fixp16) between the computed sizes and the
|
||||
value sent to the chip. These have been set to the numbers used by the
|
||||
Vivante driver on gc2000. They used to be -1 for scissor right and bottom. I
|
||||
am not sure whether older hardware was relying on these or they were just a
|
||||
guess. But if so, these need to be moved to the _specs structure.
|
||||
*/
|
||||
#define ETNA_SE_SCISSOR_MARGIN_RIGHT (0x1119)
|
||||
#define ETNA_SE_SCISSOR_MARGIN_BOTTOM (0x1111)
|
||||
#define ETNA_SE_CLIP_MARGIN_RIGHT (0xffff)
|
||||
#define ETNA_SE_CLIP_MARGIN_BOTTOM (0xffff)
|
||||
|
||||
/* GPU chip 3D specs */
|
||||
struct etna_specs {
|
||||
/* supports SUPERTILE (64x64) tiling? */
|
||||
@@ -59,6 +70,8 @@ struct etna_specs {
|
||||
unsigned has_sign_floor_ceil : 1;
|
||||
/* can use VS_RANGE, PS_RANGE registers*/
|
||||
unsigned has_shader_range_registers : 1;
|
||||
/* has the new sin/cos functions */
|
||||
unsigned has_new_sin_cos : 1;
|
||||
/* can use any kind of wrapping mode on npot textures */
|
||||
unsigned npot_tex_any_wrap;
|
||||
/* number of bits per TS tile */
|
||||
@@ -126,6 +139,8 @@ struct compiled_scissor_state {
|
||||
uint32_t SE_SCISSOR_TOP;
|
||||
uint32_t SE_SCISSOR_RIGHT;
|
||||
uint32_t SE_SCISSOR_BOTTOM;
|
||||
uint32_t SE_CLIP_RIGHT;
|
||||
uint32_t SE_CLIP_BOTTOM;
|
||||
};
|
||||
|
||||
/* Compiled pipe_viewport_state */
|
||||
@@ -140,6 +155,8 @@ struct compiled_viewport_state {
|
||||
uint32_t SE_SCISSOR_TOP;
|
||||
uint32_t SE_SCISSOR_RIGHT;
|
||||
uint32_t SE_SCISSOR_BOTTOM;
|
||||
uint32_t SE_CLIP_RIGHT;
|
||||
uint32_t SE_CLIP_BOTTOM;
|
||||
uint32_t PE_DEPTH_NEAR;
|
||||
uint32_t PE_DEPTH_FAR;
|
||||
};
|
||||
@@ -162,6 +179,8 @@ struct compiled_framebuffer_state {
|
||||
uint32_t SE_SCISSOR_TOP;
|
||||
uint32_t SE_SCISSOR_RIGHT;
|
||||
uint32_t SE_SCISSOR_BOTTOM;
|
||||
uint32_t SE_CLIP_RIGHT;
|
||||
uint32_t SE_CLIP_BOTTOM;
|
||||
uint32_t RA_MULTISAMPLE_UNK00E04;
|
||||
uint32_t RA_MULTISAMPLE_UNK00E10[VIVS_RA_MULTISAMPLE_UNK00E10__LEN];
|
||||
uint32_t RA_CENTROID_TABLE[VIVS_RA_CENTROID_TABLE__LEN];
|
||||
|
@@ -201,7 +201,10 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
|
||||
|
||||
size = setup_miptree(rsc, paddingX, paddingY, msaa_xscale, msaa_yscale);
|
||||
|
||||
struct etna_bo *bo = etna_bo_new(screen->dev, size, DRM_ETNA_GEM_CACHE_WC);
|
||||
uint32_t flags = DRM_ETNA_GEM_CACHE_WC;
|
||||
if (templat->bind & PIPE_BIND_VERTEX_BUFFER)
|
||||
flags |= DRM_ETNA_GEM_FORCE_MMU;
|
||||
struct etna_bo *bo = etna_bo_new(screen->dev, size, flags);
|
||||
if (unlikely(bo == NULL)) {
|
||||
BUG("Problem allocating video memory for resource");
|
||||
return NULL;
|
||||
|
@@ -469,8 +469,11 @@ etna_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
return FALSE;
|
||||
|
||||
if (usage & PIPE_BIND_RENDER_TARGET) {
|
||||
/* if render target, must be RS-supported format */
|
||||
if (translate_rs_format(format) != ETNA_NO_MATCH) {
|
||||
/* If render target, must be RS-supported format that is not rb swapped.
|
||||
* Exposing rb swapped (or other swizzled) formats for rendering would
|
||||
* involve swizzing in the pixel shader.
|
||||
*/
|
||||
if (translate_rs_format(format) != ETNA_NO_MATCH && !translate_rs_format_rb_swap(format)) {
|
||||
/* Validate MSAA; number of samples must be allowed, and render target
|
||||
* must have MSAA'able format. */
|
||||
if (sample_count > 1) {
|
||||
@@ -617,6 +620,8 @@ etna_get_specs(struct etna_screen *screen)
|
||||
screen->model >= 0x1000 || screen->model == 0x880;
|
||||
screen->specs.npot_tex_any_wrap =
|
||||
VIV_FEATURE(screen, chipMinorFeatures1, NON_POWER_OF_TWO);
|
||||
screen->specs.has_new_sin_cos =
|
||||
VIV_FEATURE(screen, chipMinorFeatures3, HAS_FAST_TRANSCENDENTALS);
|
||||
|
||||
if (instruction_count > 256) { /* unified instruction memory? */
|
||||
screen->specs.vs_offset = 0xC000;
|
||||
|
@@ -323,8 +323,10 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
||||
/* Scissor setup */
|
||||
cs->SE_SCISSOR_LEFT = 0; /* affected by rasterizer and scissor state as well */
|
||||
cs->SE_SCISSOR_TOP = 0;
|
||||
cs->SE_SCISSOR_RIGHT = (sv->width << 16) - 1;
|
||||
cs->SE_SCISSOR_BOTTOM = (sv->height << 16) - 1;
|
||||
cs->SE_SCISSOR_RIGHT = (sv->width << 16) + ETNA_SE_SCISSOR_MARGIN_RIGHT;
|
||||
cs->SE_SCISSOR_BOTTOM = (sv->height << 16) + ETNA_SE_SCISSOR_MARGIN_BOTTOM;
|
||||
cs->SE_CLIP_RIGHT = (sv->width << 16) + ETNA_SE_CLIP_MARGIN_RIGHT;
|
||||
cs->SE_CLIP_BOTTOM = (sv->height << 16) + ETNA_SE_CLIP_MARGIN_BOTTOM;
|
||||
|
||||
cs->TS_MEM_CONFIG = ts_mem_config;
|
||||
|
||||
@@ -345,13 +347,17 @@ etna_set_scissor_states(struct pipe_context *pctx, unsigned start_slot,
|
||||
{
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
struct compiled_scissor_state *cs = &ctx->scissor;
|
||||
assert(ss->minx <= ss->maxx);
|
||||
assert(ss->miny <= ss->maxy);
|
||||
|
||||
/* note that this state is only used when rasterizer_state->scissor is on */
|
||||
ctx->scissor_s = *ss;
|
||||
cs->SE_SCISSOR_LEFT = (ss->minx << 16);
|
||||
cs->SE_SCISSOR_TOP = (ss->miny << 16);
|
||||
cs->SE_SCISSOR_RIGHT = (ss->maxx << 16) - 1;
|
||||
cs->SE_SCISSOR_BOTTOM = (ss->maxy << 16) - 1;
|
||||
cs->SE_SCISSOR_RIGHT = (ss->maxx << 16) + ETNA_SE_SCISSOR_MARGIN_RIGHT;
|
||||
cs->SE_SCISSOR_BOTTOM = (ss->maxy << 16) + ETNA_SE_SCISSOR_MARGIN_BOTTOM;
|
||||
cs->SE_CLIP_RIGHT = (ss->maxx << 16) + ETNA_SE_CLIP_MARGIN_RIGHT;
|
||||
cs->SE_CLIP_BOTTOM = (ss->maxy << 16) + ETNA_SE_CLIP_MARGIN_BOTTOM;
|
||||
|
||||
ctx->dirty |= ETNA_DIRTY_SCISSOR;
|
||||
}
|
||||
@@ -387,22 +393,14 @@ etna_set_viewport_states(struct pipe_context *pctx, unsigned start_slot,
|
||||
/* Compute scissor rectangle (fixp) from viewport.
|
||||
* Make sure left is always < right and top always < bottom.
|
||||
*/
|
||||
cs->SE_SCISSOR_LEFT = etna_f32_to_fixp16(MAX2(vs->translate[0] - vs->scale[0], 0.0f));
|
||||
cs->SE_SCISSOR_TOP = etna_f32_to_fixp16(MAX2(vs->translate[1] - vs->scale[1], 0.0f));
|
||||
cs->SE_SCISSOR_RIGHT = etna_f32_to_fixp16(MAX2(vs->translate[0] + vs->scale[0], 0.0f));
|
||||
cs->SE_SCISSOR_BOTTOM = etna_f32_to_fixp16(MAX2(vs->translate[1] + vs->scale[1], 0.0f));
|
||||
|
||||
if (cs->SE_SCISSOR_LEFT > cs->SE_SCISSOR_RIGHT) {
|
||||
uint32_t tmp = cs->SE_SCISSOR_RIGHT;
|
||||
cs->SE_SCISSOR_RIGHT = cs->SE_SCISSOR_LEFT;
|
||||
cs->SE_SCISSOR_LEFT = tmp;
|
||||
}
|
||||
|
||||
if (cs->SE_SCISSOR_TOP > cs->SE_SCISSOR_BOTTOM) {
|
||||
uint32_t tmp = cs->SE_SCISSOR_BOTTOM;
|
||||
cs->SE_SCISSOR_BOTTOM = cs->SE_SCISSOR_TOP;
|
||||
cs->SE_SCISSOR_TOP = tmp;
|
||||
}
|
||||
cs->SE_SCISSOR_LEFT = etna_f32_to_fixp16(MAX2(vs->translate[0] - fabsf(vs->scale[0]), 0.0f));
|
||||
cs->SE_SCISSOR_TOP = etna_f32_to_fixp16(MAX2(vs->translate[1] - fabsf(vs->scale[1]), 0.0f));
|
||||
uint32_t right_fixp = etna_f32_to_fixp16(MAX2(vs->translate[0] + fabsf(vs->scale[0]), 0.0f));
|
||||
uint32_t bottom_fixp = etna_f32_to_fixp16(MAX2(vs->translate[1] + fabsf(vs->scale[1]), 0.0f));
|
||||
cs->SE_SCISSOR_RIGHT = right_fixp + ETNA_SE_SCISSOR_MARGIN_RIGHT;
|
||||
cs->SE_SCISSOR_BOTTOM = bottom_fixp + ETNA_SE_SCISSOR_MARGIN_BOTTOM;
|
||||
cs->SE_CLIP_RIGHT = right_fixp + ETNA_SE_CLIP_MARGIN_RIGHT;
|
||||
cs->SE_CLIP_BOTTOM = bottom_fixp + ETNA_SE_CLIP_MARGIN_BOTTOM;
|
||||
|
||||
cs->PE_DEPTH_NEAR = fui(0.0); /* not affected if depth mode is Z (as in GL) */
|
||||
cs->PE_DEPTH_FAR = fui(1.0);
|
||||
|
@@ -9,6 +9,7 @@ AM_CFLAGS = \
|
||||
$(GALLIUM_DRIVER_CFLAGS) \
|
||||
$(FREEDRENO_CFLAGS)
|
||||
|
||||
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
|
||||
ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
|
||||
$(MKDIR_GEN)
|
||||
$(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py > $@ || ($(RM) $@; false)
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2017 by the following authors:
|
||||
@@ -2028,6 +2028,8 @@ static inline uint32_t A5XX_GRAS_CL_VPORT_ZSCALE_0(float val)
|
||||
}
|
||||
|
||||
#define REG_A5XX_GRAS_SU_CNTL 0x0000e090
|
||||
#define A5XX_GRAS_SU_CNTL_CULL_FRONT 0x00000001
|
||||
#define A5XX_GRAS_SU_CNTL_CULL_BACK 0x00000002
|
||||
#define A5XX_GRAS_SU_CNTL_FRONT_CW 0x00000004
|
||||
#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK 0x000007f8
|
||||
#define A5XX_GRAS_SU_CNTL_LINEHALFWIDTH__SHIFT 3
|
||||
@@ -2909,6 +2911,12 @@ static inline uint32_t A5XX_VPC_PACK_NUMNONPOSVAR(uint32_t val)
|
||||
{
|
||||
return ((val) << A5XX_VPC_PACK_NUMNONPOSVAR__SHIFT) & A5XX_VPC_PACK_NUMNONPOSVAR__MASK;
|
||||
}
|
||||
#define A5XX_VPC_PACK_PSIZELOC__MASK 0x0000ff00
|
||||
#define A5XX_VPC_PACK_PSIZELOC__SHIFT 8
|
||||
static inline uint32_t A5XX_VPC_PACK_PSIZELOC(uint32_t val)
|
||||
{
|
||||
return ((val) << A5XX_VPC_PACK_PSIZELOC__SHIFT) & A5XX_VPC_PACK_PSIZELOC__MASK;
|
||||
}
|
||||
|
||||
#define REG_A5XX_VPC_FS_PRIMITIVEID_CNTL 0x0000e2a0
|
||||
|
||||
@@ -3049,19 +3057,15 @@ static inline uint32_t A5XX_VFD_DECODE_INSTR_IDX(uint32_t val)
|
||||
{
|
||||
return ((val) << A5XX_VFD_DECODE_INSTR_IDX__SHIFT) & A5XX_VFD_DECODE_INSTR_IDX__MASK;
|
||||
}
|
||||
#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000
|
||||
#define A5XX_VFD_DECODE_INSTR_FORMAT__MASK 0x3ff00000
|
||||
#define A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT 20
|
||||
static inline uint32_t A5XX_VFD_DECODE_INSTR_FORMAT(enum a5xx_vtx_fmt val)
|
||||
{
|
||||
return ((val) << A5XX_VFD_DECODE_INSTR_FORMAT__SHIFT) & A5XX_VFD_DECODE_INSTR_FORMAT__MASK;
|
||||
}
|
||||
#define A5XX_VFD_DECODE_INSTR_SWAP__MASK 0xc0000000
|
||||
#define A5XX_VFD_DECODE_INSTR_SWAP__SHIFT 30
|
||||
static inline uint32_t A5XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
|
||||
{
|
||||
return ((val) << A5XX_VFD_DECODE_INSTR_SWAP__SHIFT) & A5XX_VFD_DECODE_INSTR_SWAP__MASK;
|
||||
}
|
||||
#define A5XX_VFD_DECODE_INSTR_INSTANCED 0x00020000
|
||||
#define A5XX_VFD_DECODE_INSTR_UNK30 0x40000000
|
||||
#define A5XX_VFD_DECODE_INSTR_FLOAT 0x80000000
|
||||
|
||||
static inline uint32_t REG_A5XX_VFD_DECODE_STEP_RATE(uint32_t i0) { return 0x0000e48b + 0x2*i0; }
|
||||
|
||||
@@ -3167,6 +3171,12 @@ static inline uint32_t A5XX_SP_GS_CONTROL_REG_SHADEROBJOFFSET(uint32_t val)
|
||||
#define REG_A5XX_SP_FS_CONFIG_MAX_CONST 0x0000e58b
|
||||
|
||||
#define REG_A5XX_SP_VS_CTRL_REG0 0x0000e590
|
||||
#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00000008
|
||||
#define A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 3
|
||||
static inline uint32_t A5XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A5XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
|
||||
}
|
||||
#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
|
||||
#define A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
|
||||
static inline uint32_t A5XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
|
||||
@@ -3259,6 +3269,12 @@ static inline uint32_t A5XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
|
||||
#define REG_A5XX_SP_VS_OBJ_START_HI 0x0000e5ad
|
||||
|
||||
#define REG_A5XX_SP_FS_CTRL_REG0 0x0000e5c0
|
||||
#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00000008
|
||||
#define A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 3
|
||||
static inline uint32_t A5XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A5XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT) & A5XX_SP_FS_CTRL_REG0_THREADSIZE__MASK;
|
||||
}
|
||||
#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
|
||||
#define A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
|
||||
static inline uint32_t A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
|
||||
@@ -3328,6 +3344,7 @@ static inline uint32_t A5XX_SP_FS_MRT_REG_COLOR_FORMAT(enum a5xx_color_fmt val)
|
||||
{
|
||||
return ((val) << A5XX_SP_FS_MRT_REG_COLOR_FORMAT__SHIFT) & A5XX_SP_FS_MRT_REG_COLOR_FORMAT__MASK;
|
||||
}
|
||||
#define A5XX_SP_FS_MRT_REG_COLOR_SRGB 0x00000400
|
||||
|
||||
#define REG_A5XX_UNKNOWN_E5DB 0x0000e5db
|
||||
|
||||
@@ -3381,6 +3398,12 @@ static inline uint32_t A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(enum a3xx_msaa_sample
|
||||
#define REG_A5XX_TPL1_TP_FS_ROTATION_CNTL 0x0000e764
|
||||
|
||||
#define REG_A5XX_HLSQ_CONTROL_0_REG 0x0000e784
|
||||
#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000001
|
||||
#define A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 0
|
||||
static inline uint32_t A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
|
||||
{
|
||||
return ((val) << A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
|
||||
}
|
||||
|
||||
#define REG_A5XX_HLSQ_CONTROL_1_REG 0x0000e785
|
||||
#define A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD__MASK 0x0000003f
|
||||
|
@@ -60,12 +60,6 @@ draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, info->primitive_restart ? /* PC_RESTART_INDEX */
|
||||
info->restart_index : 0xffffffff);
|
||||
|
||||
/* points + psize -> spritelist: */
|
||||
if (ctx->rasterizer->point_size_per_vertex &&
|
||||
fd5_emit_get_vp(emit)->writes_psize &&
|
||||
(info->mode == PIPE_PRIM_POINTS))
|
||||
primtype = DI_PT_POINTLIST_PSIZE;
|
||||
|
||||
fd5_emit_render_cntl(ctx, false);
|
||||
fd5_draw_emit(ctx->batch, ring, primtype,
|
||||
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
|
||||
@@ -214,35 +208,44 @@ fd5_clear(struct fd_context *ctx, unsigned buffers,
|
||||
if (!(buffers & (PIPE_CLEAR_COLOR0 << i)))
|
||||
continue;
|
||||
|
||||
enum pipe_format pfmt = pfb->cbufs[i]->format;
|
||||
|
||||
// XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP??
|
||||
float f[4];
|
||||
switch (fd5_pipe2swap(pfb->cbufs[i]->format)) {
|
||||
union pipe_color_union swapped;
|
||||
switch (fd5_pipe2swap(pfmt)) {
|
||||
case WZYX:
|
||||
f[0] = color->f[0];
|
||||
f[1] = color->f[1];
|
||||
f[2] = color->f[2];
|
||||
f[3] = color->f[3];
|
||||
swapped.ui[0] = color->ui[0];
|
||||
swapped.ui[1] = color->ui[1];
|
||||
swapped.ui[2] = color->ui[2];
|
||||
swapped.ui[3] = color->ui[3];
|
||||
break;
|
||||
case WXYZ:
|
||||
f[2] = color->f[0];
|
||||
f[1] = color->f[1];
|
||||
f[0] = color->f[2];
|
||||
f[3] = color->f[3];
|
||||
swapped.ui[2] = color->ui[0];
|
||||
swapped.ui[1] = color->ui[1];
|
||||
swapped.ui[0] = color->ui[2];
|
||||
swapped.ui[3] = color->ui[3];
|
||||
break;
|
||||
case ZYXW:
|
||||
f[3] = color->f[0];
|
||||
f[0] = color->f[1];
|
||||
f[1] = color->f[2];
|
||||
f[2] = color->f[3];
|
||||
swapped.ui[3] = color->ui[0];
|
||||
swapped.ui[0] = color->ui[1];
|
||||
swapped.ui[1] = color->ui[2];
|
||||
swapped.ui[2] = color->ui[3];
|
||||
break;
|
||||
case XYZW:
|
||||
f[3] = color->f[0];
|
||||
f[2] = color->f[1];
|
||||
f[1] = color->f[2];
|
||||
f[0] = color->f[3];
|
||||
swapped.ui[3] = color->ui[0];
|
||||
swapped.ui[2] = color->ui[1];
|
||||
swapped.ui[1] = color->ui[2];
|
||||
swapped.ui[0] = color->ui[3];
|
||||
break;
|
||||
}
|
||||
util_pack_color(f, pfb->cbufs[i]->format, &uc);
|
||||
|
||||
if (util_format_is_pure_uint(pfmt)) {
|
||||
util_format_write_4ui(pfmt, swapped.ui, 0, &uc, 0, 0, 0, 1, 1);
|
||||
} else if (util_format_is_pure_sint(pfmt)) {
|
||||
util_format_write_4i(pfmt, swapped.i, 0, &uc, 0, 0, 0, 1, 1);
|
||||
} else {
|
||||
util_pack_color(swapped.f, pfmt, &uc);
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1);
|
||||
OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(BLIT_MRT0 + i));
|
||||
|
@@ -366,6 +366,7 @@ fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
struct fd_resource *rsc = fd_resource(vb->buffer);
|
||||
enum pipe_format pfmt = elem->src_format;
|
||||
enum a5xx_vtx_fmt fmt = fd5_pipe2vtx(pfmt);
|
||||
bool isint = util_format_is_pure_integer(pfmt);
|
||||
uint32_t off = vb->buffer_offset + elem->src_offset;
|
||||
uint32_t size = fd_bo_size(rsc->bo) - off;
|
||||
debug_assert(fmt != ~0);
|
||||
@@ -379,7 +380,8 @@ fd5_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
OUT_RING(ring, A5XX_VFD_DECODE_INSTR_IDX(j) |
|
||||
A5XX_VFD_DECODE_INSTR_FORMAT(fmt) |
|
||||
COND(elem->instance_divisor, A5XX_VFD_DECODE_INSTR_INSTANCED) |
|
||||
0xc0000000); // XXX
|
||||
A5XX_VFD_DECODE_INSTR_UNK30 |
|
||||
COND(!isint, A5XX_VFD_DECODE_INSTR_FLOAT));
|
||||
OUT_RING(ring, MAX2(1, elem->instance_divisor)); /* VFD_DECODE[j].STEP_RATE */
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_VFD_DEST_CNTL(j), 1);
|
||||
|
@@ -109,7 +109,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1);
|
||||
OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format));
|
||||
OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) |
|
||||
COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB));
|
||||
|
||||
/* when we support UBWC, these would be the system memory
|
||||
* addr/pitch/etc:
|
||||
|
@@ -336,10 +336,14 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
uint32_t pos_regid, psize_regid, color_regid[8];
|
||||
uint32_t face_regid, coord_regid, zwcoord_regid;
|
||||
uint32_t vcoord_regid, vertex_regid, instance_regid;
|
||||
enum a3xx_threadsize fssz;
|
||||
uint8_t psize_loc = ~0;
|
||||
int i, j;
|
||||
|
||||
setup_stages(emit, s);
|
||||
|
||||
fssz = (s[FS].i->max_reg >= 24) ? TWO_QUADS : FOUR_QUADS;
|
||||
|
||||
pos_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_POS);
|
||||
psize_regid = ir3_find_output_regid(s[VS].v, VARYING_SLOT_PSIZ);
|
||||
vertex_regid = ir3_find_sysval_regid(s[VS].v, SYSTEM_VALUE_VERTEX_ID);
|
||||
@@ -364,7 +368,7 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
|
||||
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
|
||||
zwcoord_regid = s[FS].v->frag_coord ? regid(0,2) : regid(63,0);
|
||||
vcoord_regid = (s[FS].v->total_in > 0) ? regid(0,0) : regid(63,0);
|
||||
vcoord_regid = (s[FS].v->total_in > 0) ? s[FS].v->pos_regid : regid(63,0);
|
||||
|
||||
/* we could probably divide this up into things that need to be
|
||||
* emitted if frag-prog is dirty vs if vert-prog is dirty..
|
||||
@@ -472,8 +476,10 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
if (pos_regid != regid(63,0))
|
||||
ir3_link_add(&l, pos_regid, 0xf, l.max_loc);
|
||||
|
||||
if (psize_regid != regid(63,0))
|
||||
if (psize_regid != regid(63,0)) {
|
||||
psize_loc = l.max_loc;
|
||||
ir3_link_add(&l, psize_regid, 0x1, l.max_loc);
|
||||
}
|
||||
|
||||
if ((s[VS].v->shader->stream_output.num_outputs > 0) &&
|
||||
!emit->key.binning_pass) {
|
||||
@@ -551,7 +557,8 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
}
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_HLSQ_CONTROL_0_REG, 5);
|
||||
OUT_RING(ring, 0x00000881); /* XXX HLSQ_CONTROL_0 */
|
||||
OUT_RING(ring, A5XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(fssz) |
|
||||
0x00000880); /* XXX HLSQ_CONTROL_0 */
|
||||
OUT_RING(ring, A5XX_HLSQ_CONTROL_1_REG_PRIMALLOCTHRESHOLD(63));
|
||||
OUT_RING(ring, A5XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) |
|
||||
0xfcfcfc00); /* XXX */
|
||||
@@ -564,7 +571,8 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
OUT_PKT4(ring, REG_A5XX_SP_FS_CTRL_REG0, 1);
|
||||
OUT_RING(ring, COND(s[FS].v->total_in > 0, A5XX_SP_FS_CTRL_REG0_VARYING) |
|
||||
COND(s[FS].v->frag_coord, A5XX_SP_FS_CTRL_REG0_VARYING) |
|
||||
0x4000e | /* XXX set pretty much everywhere */
|
||||
0x40006 | /* XXX set pretty much everywhere */
|
||||
A5XX_SP_FS_CTRL_REG0_THREADSIZE(fssz) |
|
||||
A5XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(s[FS].i->max_half_reg + 1) |
|
||||
A5XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(s[FS].i->max_reg + 1) |
|
||||
A5XX_SP_FS_CTRL_REG0_BRANCHSTACK(0x3) | // XXX need to figure this out somehow..
|
||||
@@ -692,7 +700,7 @@ fd5_program_emit(struct fd_ringbuffer *ring, struct fd5_emit *emit)
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_VPC_PACK, 1);
|
||||
OUT_RING(ring, A5XX_VPC_PACK_NUMNONPOSVAR(s[FS].v->total_in) |
|
||||
(s[VS].v->writes_psize ? 0x0c00 : 0xff00)); // XXX
|
||||
A5XX_VPC_PACK_PSIZELOC(psize_loc));
|
||||
|
||||
OUT_PKT4(ring, REG_A5XX_VPC_VARYING_INTERP_MODE(0), 8);
|
||||
for (i = 0; i < 8; i++)
|
||||
|
@@ -76,11 +76,11 @@ fd5_rasterizer_state_create(struct pipe_context *pctx,
|
||||
// if (cso->fill_front != PIPE_POLYGON_MODE_FILL ||
|
||||
// cso->fill_back != PIPE_POLYGON_MODE_FILL)
|
||||
// so->pc_prim_vtx_cntl2 |= A5XX_PC_PRIM_VTX_CNTL2_POLYMODE_ENABLE;
|
||||
//
|
||||
// if (cso->cull_face & PIPE_FACE_FRONT)
|
||||
// so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
|
||||
// if (cso->cull_face & PIPE_FACE_BACK)
|
||||
// so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
|
||||
|
||||
if (cso->cull_face & PIPE_FACE_FRONT)
|
||||
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_FRONT;
|
||||
if (cso->cull_face & PIPE_FACE_BACK)
|
||||
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_CULL_BACK;
|
||||
if (!cso->front_ccw)
|
||||
so->gras_su_cntl |= A5XX_GRAS_SU_CNTL_FRONT_CW;
|
||||
// if (!cso->flatshade_first)
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -15,7 +15,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 23277 bytes, from 2016-12-24 05:01:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 83840 bytes, from 2016-11-26 23:01:08)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 110757 bytes, from 2016-12-26 17:51:07)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 99436 bytes, from 2017-01-10 16:36:25)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a5xx.xml ( 100594 bytes, from 2017-01-20 23:03:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
|
||||
|
||||
Copyright (C) 2013-2016 by the following authors:
|
||||
|
@@ -2924,7 +2924,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
struct pipe_stream_output_info so = pipeshader->selector->so;
|
||||
struct tgsi_full_immediate *immediate;
|
||||
struct r600_shader_ctx ctx;
|
||||
struct r600_bytecode_output output[32];
|
||||
struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
|
||||
unsigned output_done, noutput;
|
||||
unsigned opcode;
|
||||
int i, j, k, r = 0;
|
||||
@@ -4185,41 +4185,63 @@ static int egcm_double_to_int(struct r600_shader_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cayman_emit_unary_double_raw(struct r600_bytecode *bc,
|
||||
unsigned op,
|
||||
int dst_reg,
|
||||
struct r600_shader_src *src,
|
||||
bool abs)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
const int last_slot = 3;
|
||||
int r;
|
||||
|
||||
/* these have to write the result to X/Y by the looks of it */
|
||||
for (int i = 0 ; i < last_slot; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
|
||||
r600_bytecode_src(&alu.src[0], src, 1);
|
||||
r600_bytecode_src(&alu.src[1], src, 0);
|
||||
|
||||
if (abs)
|
||||
r600_bytecode_src_set_abs(&alu.src[1]);
|
||||
|
||||
alu.dst.sel = dst_reg;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = (i == 0 || i == 1);
|
||||
|
||||
if (bc->chip_class != CAYMAN || i == last_slot - 1)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int cayman_emit_double_instr(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
int i, r;
|
||||
struct r600_bytecode_alu alu;
|
||||
int last_slot = 3;
|
||||
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
int t1 = ctx->temp_reg;
|
||||
|
||||
/* these have to write the result to X/Y by the looks of it */
|
||||
for (i = 0 ; i < last_slot; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ctx->inst_info->op;
|
||||
/* should only be one src regs */
|
||||
assert(inst->Instruction.NumSrcRegs == 1);
|
||||
|
||||
/* should only be one src regs */
|
||||
assert (inst->Instruction.NumSrcRegs == 1);
|
||||
/* only support one double at a time */
|
||||
assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
|
||||
inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
|
||||
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], 1);
|
||||
r600_bytecode_src(&alu.src[1], &ctx->src[0], 0);
|
||||
|
||||
/* RSQ should take the absolute value of src */
|
||||
if (ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
|
||||
ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT) {
|
||||
r600_bytecode_src_set_abs(&alu.src[1]);
|
||||
}
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = (i == 0 || i == 1);
|
||||
|
||||
if (ctx->bc->chip_class != CAYMAN || i == last_slot - 1)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
r = cayman_emit_unary_double_raw(
|
||||
ctx->bc, ctx->inst_info->op, t1,
|
||||
&ctx->src[0],
|
||||
ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DRSQ ||
|
||||
ctx->parse.FullToken.FullInstruction.Instruction.Opcode == TGSI_OPCODE_DSQRT);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (i = 0 ; i <= lasti; i++) {
|
||||
if (!(inst->Dst[0].Register.WriteMask & (1 << i)))
|
||||
@@ -4326,25 +4348,27 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
|
||||
int lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
int t1 = ctx->temp_reg;
|
||||
|
||||
for (k = 0; k < 2; k++) {
|
||||
if (!(inst->Dst[0].Register.WriteMask & (0x3 << (k * 2))))
|
||||
continue;
|
||||
/* t1 would get overwritten below if we actually tried to
|
||||
* multiply two pairs of doubles at a time. */
|
||||
assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
|
||||
inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ctx->inst_info->op;
|
||||
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
|
||||
r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
|
||||
}
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ctx->inst_info->op;
|
||||
for (j = 0; j < inst->Instruction.NumSrcRegs; j++) {
|
||||
r600_bytecode_src(&alu.src[j], &ctx->src[j], k * 2 + ((i == 3) ? 0 : 1));
|
||||
}
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (i = 0; i <= lasti; i++) {
|
||||
@@ -4366,6 +4390,63 @@ static int cayman_mul_double_instr(struct r600_shader_ctx *ctx)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Emit RECIP_64 + MUL_64 to implement division.
|
||||
*/
|
||||
static int cayman_ddiv_instr(struct r600_shader_ctx *ctx)
|
||||
{
|
||||
struct tgsi_full_instruction *inst = &ctx->parse.FullToken.FullInstruction;
|
||||
int r;
|
||||
struct r600_bytecode_alu alu;
|
||||
int t1 = ctx->temp_reg;
|
||||
int k;
|
||||
|
||||
/* Only support one double at a time. This is the same constraint as
|
||||
* in DMUL lowering. */
|
||||
assert(inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ||
|
||||
inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_ZW);
|
||||
|
||||
k = inst->Dst[0].Register.WriteMask == TGSI_WRITEMASK_XY ? 0 : 1;
|
||||
|
||||
r = cayman_emit_unary_double_raw(ctx->bc, ALU_OP2_RECIP_64, t1, &ctx->src[1], false);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
for (int i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP2_MUL_64;
|
||||
|
||||
r600_bytecode_src(&alu.src[0], &ctx->src[0], k * 2 + ((i == 3) ? 0 : 1));
|
||||
|
||||
alu.src[1].sel = t1;
|
||||
alu.src[1].chan = (i == 3) ? 0 : 1;
|
||||
|
||||
alu.dst.sel = t1;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = 1;
|
||||
if (i == 3)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
alu.src[0].sel = t1;
|
||||
alu.src[0].chan = i;
|
||||
tgsi_dst(ctx, &inst->Dst[0], k * 2 + i, &alu.dst);
|
||||
alu.dst.write = 1;
|
||||
if (i == 1)
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* r600 - trunc to -PI..PI range
|
||||
* r700 - normalize by dividing by 2PI
|
||||
@@ -9376,6 +9457,7 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
|
||||
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
|
||||
[TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
|
||||
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
|
||||
@@ -9598,6 +9680,7 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_DNEG] = { ALU_OP2_ADD_64, tgsi_dneg},
|
||||
[TGSI_OPCODE_DADD] = { ALU_OP2_ADD_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMUL] = { ALU_OP2_MUL_64, cayman_mul_double_instr},
|
||||
[TGSI_OPCODE_DDIV] = { 0, cayman_ddiv_instr },
|
||||
[TGSI_OPCODE_DMAX] = { ALU_OP2_MAX_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DMIN] = { ALU_OP2_MIN_64, tgsi_op2_64},
|
||||
[TGSI_OPCODE_DSLT] = { ALU_OP2_SETGT_64, tgsi_op2_64_single_dest_s},
|
||||
|
@@ -320,14 +320,21 @@ static void si_sampler_view_add_buffer(struct si_context *sctx,
|
||||
if (resource->target == PIPE_BUFFER)
|
||||
return;
|
||||
|
||||
/* Now add separate DCC if it's present. */
|
||||
/* Now add separate DCC or HTILE. */
|
||||
rtex = (struct r600_texture*)resource;
|
||||
if (!rtex->dcc_separate_buffer)
|
||||
return;
|
||||
if (rtex->dcc_separate_buffer) {
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->dcc_separate_buffer, usage,
|
||||
RADEON_PRIO_DCC, check_mem);
|
||||
}
|
||||
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->dcc_separate_buffer, usage,
|
||||
RADEON_PRIO_DCC, check_mem);
|
||||
if (rtex->htile_buffer &&
|
||||
rtex->tc_compatible_htile &&
|
||||
!is_stencil_sampler) {
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->htile_buffer, usage,
|
||||
RADEON_PRIO_HTILE, check_mem);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
|
||||
@@ -653,7 +660,8 @@ si_mark_image_range_valid(const struct pipe_image_view *view)
|
||||
|
||||
static void si_set_shader_image(struct si_context *ctx,
|
||||
unsigned shader,
|
||||
unsigned slot, const struct pipe_image_view *view)
|
||||
unsigned slot, const struct pipe_image_view *view,
|
||||
bool skip_decompress)
|
||||
{
|
||||
struct si_screen *screen = ctx->screen;
|
||||
struct si_images_info *images = &ctx->images[shader];
|
||||
@@ -695,7 +703,7 @@ static void si_set_shader_image(struct si_context *ctx,
|
||||
assert(!tex->is_depth);
|
||||
assert(tex->fmask.size == 0);
|
||||
|
||||
if (uses_dcc &&
|
||||
if (uses_dcc && !skip_decompress &&
|
||||
(view->access & PIPE_IMAGE_ACCESS_WRITE ||
|
||||
!vi_dcc_formats_compatible(res->b.b.format, view->format))) {
|
||||
/* If DCC can't be disabled, at least decompress it.
|
||||
@@ -769,10 +777,10 @@ si_set_shader_images(struct pipe_context *pipe,
|
||||
|
||||
if (views) {
|
||||
for (i = 0, slot = start_slot; i < count; ++i, ++slot)
|
||||
si_set_shader_image(ctx, shader, slot, &views[i]);
|
||||
si_set_shader_image(ctx, shader, slot, &views[i], false);
|
||||
} else {
|
||||
for (i = 0, slot = start_slot; i < count; ++i, ++slot)
|
||||
si_set_shader_image(ctx, shader, slot, NULL);
|
||||
si_set_shader_image(ctx, shader, slot, NULL, false);
|
||||
}
|
||||
|
||||
si_update_compressed_tex_shader_mask(ctx, shader);
|
||||
@@ -1703,7 +1711,7 @@ void si_update_all_texture_descriptors(struct si_context *sctx)
|
||||
view->resource->target == PIPE_BUFFER)
|
||||
continue;
|
||||
|
||||
si_set_shader_image(sctx, shader, i, view);
|
||||
si_set_shader_image(sctx, shader, i, view, true);
|
||||
}
|
||||
|
||||
/* Sampler views. */
|
||||
|
@@ -717,8 +717,10 @@ static void si_update_poly_offset_state(struct si_context *sctx)
|
||||
{
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
|
||||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
|
||||
si_pm4_bind_state(sctx, poly_offset, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Use the user format, not db_render_format, so that the polygon
|
||||
* offset behaves as expected by applications.
|
||||
@@ -1363,11 +1365,17 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
return V_008F14_IMG_DATA_FORMAT_16;
|
||||
case PIPE_FORMAT_X24S8_UINT:
|
||||
case PIPE_FORMAT_S8X24_UINT:
|
||||
/*
|
||||
* Implemented as an 8_8_8_8 data format to fix texture
|
||||
* gathers in stencil sampling. This affects at least
|
||||
* GL45-CTS.texture_cube_map_array.sampling on VI.
|
||||
*/
|
||||
return V_008F14_IMG_DATA_FORMAT_8_8_8_8;
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return V_008F14_IMG_DATA_FORMAT_8_24;
|
||||
case PIPE_FORMAT_X8Z24_UNORM:
|
||||
case PIPE_FORMAT_S8X24_UINT:
|
||||
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
|
||||
return V_008F14_IMG_DATA_FORMAT_24_8;
|
||||
case PIPE_FORMAT_S8_UINT:
|
||||
@@ -2794,14 +2802,22 @@ si_make_texture_descriptor(struct si_screen *screen,
|
||||
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
|
||||
const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0};
|
||||
const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1};
|
||||
const unsigned char swizzle_wwww[4] = {3, 3, 3, 3};
|
||||
|
||||
switch (pipe_format) {
|
||||
case PIPE_FORMAT_S8_UINT_Z24_UNORM:
|
||||
case PIPE_FORMAT_X24S8_UINT:
|
||||
case PIPE_FORMAT_X32_S8X24_UINT:
|
||||
case PIPE_FORMAT_X8Z24_UNORM:
|
||||
util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle);
|
||||
break;
|
||||
case PIPE_FORMAT_X24S8_UINT:
|
||||
/*
|
||||
* X24S8 is implemented as an 8_8_8_8 data format, to
|
||||
* fix texture gathers. This affects at least
|
||||
* GL45-CTS.texture_cube_map_array.sampling on VI.
|
||||
*/
|
||||
util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle);
|
||||
break;
|
||||
default:
|
||||
util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle);
|
||||
}
|
||||
@@ -3352,7 +3368,7 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
|
||||
first_non_void = util_format_get_first_non_void_channel(elements[i].src_format);
|
||||
data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void);
|
||||
num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void);
|
||||
channel = &desc->channel[first_non_void];
|
||||
channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL;
|
||||
|
||||
v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) |
|
||||
S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) |
|
||||
@@ -3374,12 +3390,12 @@ static void *si_create_vertex_elements(struct pipe_context *ctx,
|
||||
/* This isn't actually used in OpenGL. */
|
||||
v->fix_fetch |= (uint64_t)SI_FIX_FETCH_A2_SINT << (4 * i);
|
||||
}
|
||||
} else if (channel->type == UTIL_FORMAT_TYPE_FIXED) {
|
||||
} else if (channel && channel->type == UTIL_FORMAT_TYPE_FIXED) {
|
||||
if (desc->swizzle[3] == PIPE_SWIZZLE_1)
|
||||
v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBX_32_FIXED << (4 * i);
|
||||
else
|
||||
v->fix_fetch |= (uint64_t)SI_FIX_FETCH_RGBA_32_FIXED << (4 * i);
|
||||
} else if (channel->size == 32 && !channel->pure_integer) {
|
||||
} else if (channel && channel->size == 32 && !channel->pure_integer) {
|
||||
if (channel->type == UTIL_FORMAT_TYPE_SIGNED) {
|
||||
if (channel->normalized) {
|
||||
if (desc->swizzle[3] == PIPE_SWIZZLE_1)
|
||||
|
@@ -850,11 +850,12 @@ void si_emit_cache_flush(struct si_context *sctx)
|
||||
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 ||
|
||||
(rctx->chip_class <= CIK &&
|
||||
(rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
|
||||
/* Invalidate L1 & L2. (L1 is always invalidated)
|
||||
/* Invalidate L1 & L2. (L1 is always invalidated on SI)
|
||||
* WB must be set on VI+ when TC_ACTION is set.
|
||||
*/
|
||||
si_emit_surface_sync(rctx, cp_coher_cntl |
|
||||
S_0085F0_TC_ACTION_ENA(1) |
|
||||
S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
|
||||
cp_coher_cntl = 0;
|
||||
sctx->b.num_L2_invalidates++;
|
||||
|
@@ -217,6 +217,15 @@ void CalculateProcessorTopology(CPUNumaNodes& out_nodes, uint32_t& out_numThread
|
||||
out_numThreadsPerProcGroup++;
|
||||
}
|
||||
|
||||
/* Prune empty numa nodes */
|
||||
for (auto it = out_nodes.begin(); it != out_nodes.end(); ) {
|
||||
if ((*it).cores.size() == 0)
|
||||
it = out_nodes.erase(it);
|
||||
else
|
||||
++it;
|
||||
}
|
||||
|
||||
/* Prune empty core nodes */
|
||||
for (uint32_t node = 0; node < out_nodes.size(); node++) {
|
||||
auto& numaNode = out_nodes[node];
|
||||
auto it = numaNode.cores.begin();
|
||||
|
@@ -29,7 +29,7 @@
|
||||
#include "swr_query.h"
|
||||
#include "swr_screen.h"
|
||||
#include "swr_state.h"
|
||||
|
||||
#include "common/os.h"
|
||||
|
||||
static struct swr_query *
|
||||
swr_query(struct pipe_query *p)
|
||||
@@ -45,7 +45,8 @@ swr_create_query(struct pipe_context *pipe, unsigned type, unsigned index)
|
||||
assert(type < PIPE_QUERY_TYPES);
|
||||
assert(index < MAX_SO_STREAMS);
|
||||
|
||||
pq = CALLOC_STRUCT(swr_query);
|
||||
pq = (struct swr_query *) AlignedMalloc(sizeof(struct swr_query), 64);
|
||||
memset(pq, 0, sizeof(*pq));
|
||||
|
||||
if (pq) {
|
||||
pq->type = type;
|
||||
@@ -67,7 +68,7 @@ swr_destroy_query(struct pipe_context *pipe, struct pipe_query *q)
|
||||
swr_fence_reference(pipe->screen, &pq->fence, NULL);
|
||||
}
|
||||
|
||||
FREE(pq);
|
||||
AlignedFree(pq);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -34,7 +34,7 @@ struct swr_query_result {
|
||||
uint64_t timestamp_end;
|
||||
};
|
||||
|
||||
struct swr_query {
|
||||
OSALIGNLINE(struct) swr_query {
|
||||
unsigned type; /* PIPE_QUERY_* */
|
||||
unsigned index;
|
||||
|
||||
|
@@ -2,12 +2,12 @@ include Makefile.sources
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/drivers \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/winsys \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(srcdir)
|
||||
|
||||
if HAVE_CLOVER_ICD
|
||||
|
@@ -28,8 +28,8 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
$(GALLIUM_CFLAGS) \
|
||||
$(LIBDRM_CFLAGS) \
|
||||
$(VISIBILITY_CFLAGS)
|
||||
|
@@ -81,7 +81,7 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende
|
||||
}
|
||||
|
||||
if (context->decoder->entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE)
|
||||
context->decoder->begin_frame(context->decoder, context->target, &context->desc.base);
|
||||
context->needs_begin_frame = true;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -178,6 +178,8 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *
|
||||
|
||||
if (!context->decoder)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
|
||||
context->needs_begin_frame = true;
|
||||
}
|
||||
|
||||
return vaStatus;
|
||||
@@ -308,8 +310,11 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
|
||||
sizes[num_buffers] = buf->size;
|
||||
++num_buffers;
|
||||
|
||||
context->decoder->begin_frame(context->decoder, context->target,
|
||||
&context->desc.base);
|
||||
if (context->needs_begin_frame) {
|
||||
context->decoder->begin_frame(context->decoder, context->target,
|
||||
&context->desc.base);
|
||||
context->needs_begin_frame = false;
|
||||
}
|
||||
context->decoder->decode_bitstream(context->decoder, context->target, &context->desc.base,
|
||||
num_buffers, (const void * const*)buffers, sizes);
|
||||
}
|
||||
|
@@ -261,6 +261,7 @@ typedef struct {
|
||||
int target_id;
|
||||
bool first_single_submitted;
|
||||
int gop_coeff;
|
||||
bool needs_begin_frame;
|
||||
} vlVaContext;
|
||||
|
||||
typedef struct {
|
||||
|
@@ -75,6 +75,13 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
|
||||
|
||||
memset(&res_tmpl, 0, sizeof(res_tmpl));
|
||||
|
||||
/*
|
||||
* The output won't look correctly when this buffer is send to X,
|
||||
* if the VDPAU RGB component order doesn't match the X11 one so
|
||||
* we only allow the X11 format
|
||||
*/
|
||||
vlsurface->send_to_X = rgba_format == VDP_RGBA_FORMAT_B8G8R8A8;
|
||||
|
||||
res_tmpl.target = PIPE_TEXTURE_2D;
|
||||
res_tmpl.format = VdpFormatRGBAToPipe(rgba_format);
|
||||
res_tmpl.width0 = width;
|
||||
|
@@ -231,7 +231,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
|
||||
vscreen = pq->device->vscreen;
|
||||
|
||||
pipe_mutex_lock(pq->device->mutex);
|
||||
if (vscreen->set_back_texture_from_output)
|
||||
if (vscreen->set_back_texture_from_output && surf->send_to_X)
|
||||
vscreen->set_back_texture_from_output(vscreen, surf->surface->texture, clip_width, clip_height);
|
||||
tex = vscreen->texture_from_drawable(vscreen, (void *)pq->drawable);
|
||||
if (!tex) {
|
||||
@@ -239,7 +239,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
|
||||
return VDP_STATUS_INVALID_HANDLE;
|
||||
}
|
||||
|
||||
if (!vscreen->set_back_texture_from_output) {
|
||||
if (!vscreen->set_back_texture_from_output || !surf->send_to_X) {
|
||||
dirty_area = vscreen->get_dirty_area(vscreen);
|
||||
|
||||
memset(&surf_templ, 0, sizeof(surf_templ));
|
||||
@@ -289,7 +289,7 @@ vlVdpPresentationQueueDisplay(VdpPresentationQueue presentation_queue,
|
||||
framenum++;
|
||||
}
|
||||
|
||||
if (!vscreen->set_back_texture_from_output) {
|
||||
if (!vscreen->set_back_texture_from_output || !surf->send_to_X) {
|
||||
pipe_resource_reference(&tex, NULL);
|
||||
pipe_surface_reference(&surf_draw, NULL);
|
||||
}
|
||||
|
@@ -415,6 +415,7 @@ typedef struct
|
||||
struct pipe_fence_handle *fence;
|
||||
struct vl_compositor_state cstate;
|
||||
struct u_rect dirty_area;
|
||||
bool send_to_X;
|
||||
} vlVdpOutputSurface;
|
||||
|
||||
typedef struct
|
||||
|
@@ -27,8 +27,8 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/loader \
|
||||
-I$(top_srcdir)/src/mapi/ \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_srcdir)/src/gallium/winsys \
|
||||
-I$(top_srcdir)/src/gallium/state_trackers/nine \
|
||||
$(GALLIUM_TARGET_CFLAGS) \
|
||||
|
@@ -37,10 +37,10 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/include/GL/internal \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/loader \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
-I$(top_builddir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_builddir)/src/mapi/glapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(SHARED_GLAPI_CFLAGS) \
|
||||
$(EXTRA_DEFINES_XF86VIDMODE) \
|
||||
|
@@ -6,11 +6,11 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src/glx \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_builddir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
-I$(top_builddir)/src/mapi/glapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(SHARED_GLAPI_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
|
@@ -24,8 +24,8 @@ libwindowsglx_la_CFLAGS = \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/glx \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
-I$(top_builddir)/src/mapi/glapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(SHARED_GLAPI_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
|
@@ -26,6 +26,9 @@
|
||||
#include "blorp_priv.h"
|
||||
#include "brw_meta_util.h"
|
||||
|
||||
/* header-only include needed for _mesa_unorm_to_float and friends. */
|
||||
#include "mesa/main/format_utils.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLORP
|
||||
|
||||
static const bool split_blorp_blit_debug = false;
|
||||
@@ -2204,6 +2207,75 @@ get_ccs_compatible_uint_format(const struct isl_format_layout *fmtl)
|
||||
}
|
||||
}
|
||||
|
||||
/* Takes an isl_color_value and returns a color value that is the original
|
||||
* color value only bit-casted to a UINT format. This value, together with
|
||||
* the format from get_ccs_compatible_uint_format, will yield the same bit
|
||||
* value as the original color and format.
|
||||
*/
|
||||
static union isl_color_value
|
||||
bitcast_color_value_to_uint(union isl_color_value color,
|
||||
const struct isl_format_layout *fmtl)
|
||||
{
|
||||
/* All CCS formats have the same number of bits in each channel */
|
||||
const struct isl_channel_layout *chan = &fmtl->channels.r;
|
||||
|
||||
union isl_color_value bits;
|
||||
switch (chan->type) {
|
||||
case ISL_UINT:
|
||||
case ISL_SINT:
|
||||
/* Hardware will ignore the high bits so there's no need to cast */
|
||||
bits = color;
|
||||
break;
|
||||
|
||||
case ISL_UNORM:
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bits.u32[i] = _mesa_float_to_unorm(color.f32[i], chan->bits);
|
||||
break;
|
||||
|
||||
case ISL_SNORM:
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bits.i32[i] = _mesa_float_to_snorm(color.f32[i], chan->bits);
|
||||
break;
|
||||
|
||||
case ISL_SFLOAT:
|
||||
switch (chan->bits) {
|
||||
case 16:
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
bits.u32[i] = _mesa_float_to_half(color.f32[i]);
|
||||
break;
|
||||
|
||||
case 32:
|
||||
bits = color;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid float format size");
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid channel type");
|
||||
}
|
||||
|
||||
switch (fmtl->format) {
|
||||
case ISL_FORMAT_B8G8R8A8_UNORM:
|
||||
case ISL_FORMAT_B8G8R8A8_UNORM_SRGB:
|
||||
case ISL_FORMAT_B8G8R8X8_UNORM:
|
||||
case ISL_FORMAT_B8G8R8X8_UNORM_SRGB: {
|
||||
/* If it's a BGRA format, we need to swap blue and red */
|
||||
uint32_t tmp = bits.u32[0];
|
||||
bits.u32[0] = bits.u32[2];
|
||||
bits.u32[2] = tmp;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break; /* Nothing to do */
|
||||
}
|
||||
|
||||
return bits;
|
||||
}
|
||||
|
||||
static void
|
||||
surf_convert_to_uncompressed(const struct isl_device *isl_dev,
|
||||
struct brw_blorp_surface_info *info,
|
||||
@@ -2320,6 +2392,16 @@ blorp_copy(struct blorp_batch *batch,
|
||||
params.src.view.format = get_copy_format_for_bpb(isl_dev, src_fmtl->bpb);
|
||||
}
|
||||
|
||||
if (params.src.aux_usage == ISL_AUX_USAGE_CCS_E) {
|
||||
params.src.clear_color =
|
||||
bitcast_color_value_to_uint(params.src.clear_color, src_fmtl);
|
||||
}
|
||||
|
||||
if (params.dst.aux_usage == ISL_AUX_USAGE_CCS_E) {
|
||||
params.dst.clear_color =
|
||||
bitcast_color_value_to_uint(params.dst.clear_color, dst_fmtl);
|
||||
}
|
||||
|
||||
wm_prog_key.src_bpc =
|
||||
isl_format_get_layout(params.src.view.format)->channels.r.bits;
|
||||
wm_prog_key.dst_bpc =
|
||||
|
@@ -349,6 +349,29 @@ blorp_clear(struct blorp_batch *batch,
|
||||
if (format == ISL_FORMAT_R9G9B9E5_SHAREDEXP) {
|
||||
clear_color.u32[0] = float3_to_rgb9e5(clear_color.f32);
|
||||
format = ISL_FORMAT_R32_UINT;
|
||||
} else if (format == ISL_FORMAT_A4B4G4R4_UNORM) {
|
||||
/* Broadwell and earlier cannot render to this format so we need to work
|
||||
* around it by swapping the colors around and using B4G4R4A4 instead.
|
||||
*/
|
||||
|
||||
/* First, we apply the swizzle. */
|
||||
union isl_color_value old;
|
||||
assert((unsigned)(swizzle.r - ISL_CHANNEL_SELECT_RED) < 4);
|
||||
assert((unsigned)(swizzle.g - ISL_CHANNEL_SELECT_RED) < 4);
|
||||
assert((unsigned)(swizzle.b - ISL_CHANNEL_SELECT_RED) < 4);
|
||||
assert((unsigned)(swizzle.a - ISL_CHANNEL_SELECT_RED) < 4);
|
||||
old.u32[swizzle.r - ISL_CHANNEL_SELECT_RED] = clear_color.u32[0];
|
||||
old.u32[swizzle.g - ISL_CHANNEL_SELECT_RED] = clear_color.u32[1];
|
||||
old.u32[swizzle.b - ISL_CHANNEL_SELECT_RED] = clear_color.u32[2];
|
||||
old.u32[swizzle.a - ISL_CHANNEL_SELECT_RED] = clear_color.u32[3];
|
||||
swizzle = ISL_SWIZZLE_IDENTITY;
|
||||
|
||||
/* Now we re-order for the new format */
|
||||
clear_color.u32[0] = old.u32[1];
|
||||
clear_color.u32[1] = old.u32[2];
|
||||
clear_color.u32[2] = old.u32[3];
|
||||
clear_color.u32[3] = old.u32[0];
|
||||
format = ISL_FORMAT_B4G4R4A4_UNORM;
|
||||
}
|
||||
|
||||
memcpy(¶ms.wm_inputs.clear_color, clear_color.f32, sizeof(float) * 4);
|
||||
|
@@ -218,9 +218,10 @@ static const struct surface_format_info format_info[] = {
|
||||
SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1)
|
||||
SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM)
|
||||
/* According to the PRM, A4B4G4R4_UNORM isn't supported until Sky Lake
|
||||
* but empirical testing indicates that it works just fine on Broadwell.
|
||||
* but empirical testing indicates that at least sampling works just fine
|
||||
* on Broadwell.
|
||||
*/
|
||||
SF(80, 80, x, x, 80, x, x, x, x, x, A4B4G4R4_UNORM)
|
||||
SF(80, 80, x, x, 90, x, x, x, x, x, A4B4G4R4_UNORM)
|
||||
SF(90, x, x, x, x, x, x, x, x, x, L8A8_UINT)
|
||||
SF(90, x, x, x, x, x, x, x, x, x, L8A8_SINT)
|
||||
SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM)
|
||||
|
@@ -232,9 +232,12 @@ VkResult anv_AllocateCommandBuffers(
|
||||
break;
|
||||
}
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
if (result != VK_SUCCESS) {
|
||||
anv_FreeCommandBuffers(_device, pAllocateInfo->commandPool,
|
||||
i, pCommandBuffers);
|
||||
for (i = 0; i < pAllocateInfo->commandBufferCount; i++)
|
||||
pCommandBuffers[i] = VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -329,18 +329,18 @@ VkResult anv_CreateDescriptorPool(
|
||||
}
|
||||
}
|
||||
|
||||
const size_t size =
|
||||
sizeof(*pool) +
|
||||
const size_t pool_size =
|
||||
pCreateInfo->maxSets * sizeof(struct anv_descriptor_set) +
|
||||
descriptor_count * sizeof(struct anv_descriptor) +
|
||||
buffer_count * sizeof(struct anv_buffer_view);
|
||||
const size_t total_size = sizeof(*pool) + pool_size;
|
||||
|
||||
pool = vk_alloc2(&device->alloc, pAllocator, size, 8,
|
||||
pool = vk_alloc2(&device->alloc, pAllocator, total_size, 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
if (!pool)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pool->size = size;
|
||||
pool->size = pool_size;
|
||||
pool->next = 0;
|
||||
pool->free_list = EMPTY;
|
||||
|
||||
|
@@ -75,8 +75,11 @@ choose_isl_surf_usage(VkImageUsageFlags vk_usage,
|
||||
isl_usage |= ISL_SURF_USAGE_TEXTURE_BIT;
|
||||
}
|
||||
|
||||
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) {
|
||||
/* blorp implements transfers by rendering into the destination image. */
|
||||
if (vk_usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT &&
|
||||
aspect == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
/* blorp implements transfers by rendering into the destination image.
|
||||
* Only request this with color images, as we deal with depth/stencil
|
||||
* formats differently. */
|
||||
isl_usage |= ISL_SURF_USAGE_RENDER_TARGET_BIT;
|
||||
}
|
||||
|
||||
|
@@ -100,11 +100,8 @@ try_lower_input_load(nir_function_impl *impl, nir_intrinsic_instr *load)
|
||||
|
||||
if (image_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) {
|
||||
tex->op = nir_texop_txf_ms;
|
||||
|
||||
nir_ssa_def *sample_id =
|
||||
nir_load_system_value(&b, nir_intrinsic_load_sample_id, 0);
|
||||
tex->src[2].src_type = nir_tex_src_ms_index;
|
||||
tex->src[2].src = nir_src_for_ssa(sample_id);
|
||||
tex->src[2].src = load->src[1];
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
|
||||
|
@@ -55,8 +55,6 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
/* XXX: Do we need this on more than just BDW? */
|
||||
#if (GEN_GEN >= 8)
|
||||
/* Emit a render target cache flush.
|
||||
*
|
||||
* This isn't documented anywhere in the PRM. However, it seems to be
|
||||
@@ -65,9 +63,10 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
* clear depth, reset state base address, and then go render stuff.
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DCFlushEnable = true;
|
||||
pc.RenderTargetCacheFlushEnable = true;
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(STATE_BASE_ADDRESS), sba) {
|
||||
sba.GeneralStateBaseAddress = (struct anv_address) { NULL, 0 };
|
||||
@@ -148,6 +147,8 @@ genX(cmd_buffer_emit_state_base_address)(struct anv_cmd_buffer *cmd_buffer)
|
||||
*/
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.TextureCacheInvalidationEnable = true;
|
||||
pc.ConstantCacheInvalidationEnable = true;
|
||||
pc.StateCacheInvalidationEnable = true;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1177,9 +1178,9 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
||||
case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
if (desc->image_view->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
/* For stencil input attachments, we treat it like any old texture
|
||||
* that a user may have bound.
|
||||
if (desc->image_view->aspect_mask != VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
/* For depth and stencil input attachments, we treat it like any
|
||||
* old texture that a user may have bound.
|
||||
*/
|
||||
surface_state = desc->image_view->sampler_surface_state;
|
||||
assert(surface_state.alloc_size);
|
||||
@@ -1187,9 +1188,9 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
desc->image_view->image->aux_usage,
|
||||
surface_state);
|
||||
} else {
|
||||
/* For depth and color input attachments, we create the surface
|
||||
* state at vkBeginRenderPass time so that we can include aux
|
||||
* and clear color information.
|
||||
/* For color input attachments, we create the surface state at
|
||||
* vkBeginRenderPass time so that we can include aux and clear
|
||||
* color information.
|
||||
*/
|
||||
assert(binding->input_attachment_index < subpass->input_count);
|
||||
const unsigned subpass_att = binding->input_attachment_index;
|
||||
|
@@ -39,8 +39,8 @@ libloader_la_LIBADD =
|
||||
|
||||
if HAVE_DRICOMMON
|
||||
libloader_la_CPPFLAGS += \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/mapi/ \
|
||||
-DUSE_DRICONF
|
||||
|
@@ -46,8 +46,8 @@ AM_CPPFLAGS = \
|
||||
$(SELINUX_CFLAGS) \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_builddir)/src/mapi
|
||||
-I$(top_builddir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi
|
||||
|
||||
include Makefile.sources
|
||||
|
||||
|
@@ -30,9 +30,9 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
$(DEFINES) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(INTEL_CFLAGS)
|
||||
|
@@ -30,21 +30,22 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/intel/server \
|
||||
-I$(top_srcdir)/src/gtest/include \
|
||||
-I$(top_srcdir)/src/compiler/nir \
|
||||
-I$(top_srcdir)/src/intel \
|
||||
-I$(top_builddir)/src/compiler/glsl \
|
||||
-I$(top_builddir)/src/compiler/nir \
|
||||
-I$(top_srcdir)/src/compiler/nir \
|
||||
-I$(top_builddir)/src/intel \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/intel \
|
||||
$(DEFINES) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(INTEL_CFLAGS)
|
||||
|
||||
AM_CXXFLAGS = $(AM_CFLAGS)
|
||||
|
||||
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
|
||||
brw_nir_trig_workarounds.c: brw_nir_trig_workarounds.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
|
||||
$(MKDIR_GEN)
|
||||
$(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/brw_nir_trig_workarounds.py > $@ || ($(RM) $@; false)
|
||||
|
@@ -284,8 +284,10 @@ brw_blorp_to_isl_format(struct brw_context *brw, mesa_format format,
|
||||
case MESA_FORMAT_S_UINT8:
|
||||
return ISL_FORMAT_R8_UINT;
|
||||
case MESA_FORMAT_Z24_UNORM_X8_UINT:
|
||||
case MESA_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return ISL_FORMAT_R24_UNORM_X8_TYPELESS;
|
||||
case MESA_FORMAT_Z_FLOAT32:
|
||||
case MESA_FORMAT_Z32_FLOAT_S8X24_UINT:
|
||||
return ISL_FORMAT_R32_FLOAT;
|
||||
case MESA_FORMAT_Z_UNORM16:
|
||||
return ISL_FORMAT_R16_UNORM;
|
||||
@@ -908,6 +910,17 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
blorp_batch_finish(&batch);
|
||||
}
|
||||
|
||||
/*
|
||||
* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -975,6 +988,17 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
brw_blorp_to_isl_format(brw, format, true),
|
||||
resolve_op);
|
||||
blorp_batch_finish(&batch);
|
||||
|
||||
/*
|
||||
* Ivybrigde PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
|
||||
*
|
||||
* Any transition from any value in {Clear, Render, Resolve} to a
|
||||
* different value in {Clear, Render, Resolve} requires end of pipe
|
||||
* synchronization.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -36,6 +36,7 @@
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_blorp.h"
|
||||
#include "brw_defines.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLIT
|
||||
|
||||
@@ -174,14 +175,46 @@ brw_fast_clear_depth(struct gl_context *ctx)
|
||||
mt->depth_clear_value = depth_clear_value;
|
||||
}
|
||||
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 313:
|
||||
*
|
||||
* "If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
|
||||
* must be issued before the rectangle primitive used for the depth
|
||||
* buffer clear operation.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
if (brw->gen == 6) {
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 313:
|
||||
*
|
||||
* "If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
|
||||
* must be issued before the rectangle primitive used for the depth
|
||||
* buffer clear operation.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
} else if (brw->gen >= 7) {
|
||||
/*
|
||||
* From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
|
||||
*
|
||||
* If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
|
||||
* enabled must be issued before the rectangle primitive used for the
|
||||
* depth buffer clear operation.
|
||||
*
|
||||
* Same applies for Gen8 and Gen9.
|
||||
*
|
||||
* In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 PIPE_CONTROL,
|
||||
* Depth Cache Flush Enable:
|
||||
*
|
||||
* This bit must not be set when Depth Stall Enable bit is set in
|
||||
* this packet.
|
||||
*
|
||||
* This is confirmed to hold for real, HSW gets immediate gpu hangs.
|
||||
*
|
||||
* Therefore issue two pipe control flushes, one for cache flush and
|
||||
* another for depth stall.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
|
||||
}
|
||||
|
||||
if (fb->MaxNumLayers > 0) {
|
||||
for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
|
||||
@@ -201,7 +234,12 @@ brw_fast_clear_depth(struct gl_context *ctx)
|
||||
* by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
|
||||
* followed by Depth FLUSH'
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
/* Now, the HiZ buffer contains data that needs to be resolved to the depth
|
||||
|
@@ -910,6 +910,9 @@ brw_process_driconf_options(struct brw_context *brw)
|
||||
ctx->Const.ForceGLSLExtensionsWarn =
|
||||
driQueryOptionb(options, "force_glsl_extensions_warn");
|
||||
|
||||
ctx->Const.ForceGLSLVersion =
|
||||
driQueryOptioni(options, "force_glsl_version");
|
||||
|
||||
ctx->Const.DisableGLSLLineContinuations =
|
||||
driQueryOptionb(options, "disable_glsl_line_continuations");
|
||||
|
||||
|
@@ -508,7 +508,7 @@ fs_generator::generate_cs_terminate(fs_inst *inst, struct brw_reg payload)
|
||||
insn = brw_next_insn(p, BRW_OPCODE_SEND);
|
||||
|
||||
brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_UW));
|
||||
brw_set_src0(p, insn, payload);
|
||||
brw_set_src0(p, insn, retype(payload, BRW_REGISTER_TYPE_UW));
|
||||
brw_set_src1(p, insn, brw_imm_d(0));
|
||||
|
||||
/* Terminate a compute shader by sending a message to the thread spawner.
|
||||
|
@@ -177,6 +177,49 @@ static struct gl_program *brwNewProgram(struct gl_context *ctx, GLenum target,
|
||||
static void brwDeleteProgram( struct gl_context *ctx,
|
||||
struct gl_program *prog )
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
/* Beware! prog's refcount has reached zero, and it's about to be freed.
|
||||
*
|
||||
* In brw_upload_pipeline_state(), we compare brw->foo_program to
|
||||
* ctx->FooProgram._Current, and flag BRW_NEW_FOO_PROGRAM if the
|
||||
* pointer has changed.
|
||||
*
|
||||
* We cannot leave brw->foo_program as a dangling pointer to the dead
|
||||
* program. malloc() may allocate the same memory for a new gl_program,
|
||||
* causing us to see matching pointers...but totally different programs.
|
||||
*
|
||||
* We cannot set brw->foo_program to NULL, either. If we've deleted the
|
||||
* active program, Mesa may set ctx->FooProgram._Current to NULL. That
|
||||
* would cause us to see matching pointers (NULL == NULL), and fail to
|
||||
* detect that a program has changed since our last draw.
|
||||
*
|
||||
* So, set it to a bogus gl_program pointer that will never match,
|
||||
* causing us to properly reevaluate the state on our next draw.
|
||||
*
|
||||
* Getting this wrong causes heisenbugs which are very hard to catch,
|
||||
* as you need a very specific allocation pattern to hit the problem.
|
||||
*/
|
||||
static const struct gl_program deleted_program;
|
||||
|
||||
if (brw->vertex_program == prog)
|
||||
brw->vertex_program = &deleted_program;
|
||||
|
||||
if (brw->tess_ctrl_program == prog)
|
||||
brw->tess_ctrl_program = &deleted_program;
|
||||
|
||||
if (brw->tess_eval_program == prog)
|
||||
brw->tess_eval_program = &deleted_program;
|
||||
|
||||
if (brw->geometry_program == prog)
|
||||
brw->geometry_program = &deleted_program;
|
||||
|
||||
if (brw->fragment_program == prog)
|
||||
brw->fragment_program = &deleted_program;
|
||||
|
||||
if (brw->compute_program == prog)
|
||||
brw->compute_program = &deleted_program;
|
||||
|
||||
_mesa_delete_program( ctx, prog );
|
||||
}
|
||||
|
||||
|
@@ -477,6 +477,18 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
break;
|
||||
case BLORP_HIZ_OP_DEPTH_CLEAR:
|
||||
dw1 |= GEN8_WM_HZ_DEPTH_CLEAR;
|
||||
|
||||
/* The "Clear Rectangle X Max" (and Y Max) fields are exclusive,
|
||||
* rather than inclusive, and limited to 16383. This means that
|
||||
* for a 16384x16384 render target, we would miss the last row
|
||||
* or column of pixels along the edge.
|
||||
*
|
||||
* To work around this, we have to set the "Full Surface Depth
|
||||
* and Stencil Clear" bit. We can do this in all cases because
|
||||
* we always clear the full rectangle anyway. We'll need to
|
||||
* change this if we ever add scissored clear support.
|
||||
*/
|
||||
dw1 |= GEN8_WM_HZ_FULL_SURFACE_DEPTH_CLEAR;
|
||||
break;
|
||||
case BLORP_HIZ_OP_NONE:
|
||||
unreachable("Should not get here.");
|
||||
@@ -511,6 +523,22 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
/*
|
||||
* From the Broadwell PRM, volume 7, "Depth Buffer Clear":
|
||||
*
|
||||
* Depth buffer clear pass using any of the methods (WM_STATE, 3DSTATE_WM
|
||||
* or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL command with
|
||||
* DEPTH_STALL bit and Depth FLUSH bits "set" before starting to render.
|
||||
* DepthStall and DepthFlush are not needed between consecutive depth
|
||||
* clear passes nor is it required if th e depth clear pass was done with
|
||||
* "full_surf_clear" bit set in the 3DSTATE_WM_HZ_OP.
|
||||
*
|
||||
* TODO: Such as the spec says, this could be conditional.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_STALL);
|
||||
|
||||
/* Mark this buffer as needing a TC flush, as we've rendered to it. */
|
||||
brw_render_cache_set_add_bo(brw, mt->bo);
|
||||
|
||||
|
@@ -25,6 +25,7 @@
|
||||
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_mipmap_tree.h"
|
||||
#include "intel_fbo.h"
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "brw_state.h"
|
||||
@@ -179,7 +180,9 @@ genX(blorp_exec)(struct blorp_batch *batch,
|
||||
* data with different formats, which blorp does for stencil and depth
|
||||
* data.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
if (params->src.enabled)
|
||||
brw_render_cache_set_check_flush(brw, params->src.addr.buffer);
|
||||
brw_render_cache_set_check_flush(brw, params->dst.addr.buffer);
|
||||
|
||||
brw_select_pipeline(brw, BRW_RENDER_PIPELINE);
|
||||
|
||||
@@ -256,8 +259,10 @@ retry:
|
||||
brw->no_depth_or_stencil = false;
|
||||
brw->ib.type = -1;
|
||||
|
||||
/* Flush the sampler cache so any texturing from the destination is
|
||||
* coherent.
|
||||
*/
|
||||
brw_emit_mi_flush(brw);
|
||||
if (params->dst.enabled)
|
||||
brw_render_cache_set_add_bo(brw, params->dst.addr.buffer);
|
||||
if (params->depth.enabled)
|
||||
brw_render_cache_set_add_bo(brw, params->depth.addr.buffer);
|
||||
if (params->stencil.enabled)
|
||||
brw_render_cache_set_add_bo(brw, params->stencil.addr.buffer);
|
||||
}
|
||||
|
@@ -235,13 +235,9 @@ emit_miptree_blit(struct brw_context *brw,
|
||||
* represented per scan line’s worth of graphics data depends on the
|
||||
* color depth.
|
||||
*
|
||||
* Furthermore, intelEmitCopyBlit (which is called below) uses a signed
|
||||
* 16-bit integer to represent buffer pitch, so it can only handle buffer
|
||||
* pitches < 32k. However, the pitch is measured in bytes for linear buffers
|
||||
* and dwords for tiled buffers.
|
||||
*
|
||||
* As a result of these two limitations, we can only use the blitter to do
|
||||
* this copy when the miptree's pitch is less than 32k linear or 128k tiled.
|
||||
* The blitter's pitch is a signed 16-bit integer, but measured in bytes
|
||||
* for linear surfaces and DWords for tiled surfaces. So the maximum
|
||||
* pitch is 32k linear and 128k tiled.
|
||||
*/
|
||||
if (blt_pitch(src_mt) >= 32768 || blt_pitch(dst_mt) >= 32768) {
|
||||
perf_debug("Falling back due to >= 32k/128k pitch\n");
|
||||
@@ -480,11 +476,11 @@ static bool
|
||||
can_fast_copy_blit(struct brw_context *brw,
|
||||
drm_intel_bo *src_buffer,
|
||||
int16_t src_x, int16_t src_y,
|
||||
uintptr_t src_offset, uint32_t src_pitch,
|
||||
uintptr_t src_offset, int32_t src_pitch,
|
||||
uint32_t src_tiling, uint32_t src_tr_mode,
|
||||
drm_intel_bo *dst_buffer,
|
||||
int16_t dst_x, int16_t dst_y,
|
||||
uintptr_t dst_offset, uint32_t dst_pitch,
|
||||
uintptr_t dst_offset, int32_t dst_pitch,
|
||||
uint32_t dst_tiling, uint32_t dst_tr_mode,
|
||||
int16_t w, int16_t h, uint32_t cpp,
|
||||
GLenum logic_op)
|
||||
@@ -520,10 +516,8 @@ can_fast_copy_blit(struct brw_context *brw,
|
||||
if (!_mesa_is_pow_two(cpp) || cpp > 16)
|
||||
return false;
|
||||
|
||||
/* For Fast Copy Blits the pitch cannot be a negative number. So, bit 15
|
||||
* of the destination pitch must be zero.
|
||||
*/
|
||||
if ((src_pitch >> 15 & 1) != 0 || (dst_pitch >> 15 & 1) != 0)
|
||||
/* For Fast Copy Blits the pitch cannot be a negative number. */
|
||||
if (src_pitch < 0 || dst_pitch < 0)
|
||||
return false;
|
||||
|
||||
/* For Linear surfaces, the pitch has to be an OWord (16byte) multiple. */
|
||||
@@ -577,12 +571,12 @@ xy_blit_cmd(uint32_t src_tiling, uint32_t src_tr_mode,
|
||||
bool
|
||||
intelEmitCopyBlit(struct brw_context *brw,
|
||||
GLuint cpp,
|
||||
GLshort src_pitch,
|
||||
int32_t src_pitch,
|
||||
drm_intel_bo *src_buffer,
|
||||
GLuint src_offset,
|
||||
uint32_t src_tiling,
|
||||
uint32_t src_tr_mode,
|
||||
GLshort dst_pitch,
|
||||
int32_t dst_pitch,
|
||||
drm_intel_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
uint32_t dst_tiling,
|
||||
|
@@ -31,12 +31,12 @@
|
||||
bool
|
||||
intelEmitCopyBlit(struct brw_context *brw,
|
||||
GLuint cpp,
|
||||
GLshort src_pitch,
|
||||
int32_t src_pitch,
|
||||
drm_intel_bo *src_buffer,
|
||||
GLuint src_offset,
|
||||
uint32_t src_tiling,
|
||||
uint32_t src_tr_mode,
|
||||
GLshort dst_pitch,
|
||||
int32_t dst_pitch,
|
||||
drm_intel_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
uint32_t dst_tiling,
|
||||
|
@@ -79,6 +79,7 @@ DRI_CONF_BEGIN
|
||||
DRI_CONF_ALWAYS_FLUSH_CACHE("false")
|
||||
DRI_CONF_DISABLE_THROTTLING("false")
|
||||
DRI_CONF_FORCE_GLSL_EXTENSIONS_WARN("false")
|
||||
DRI_CONF_FORCE_GLSL_VERSION(0)
|
||||
DRI_CONF_DISABLE_GLSL_LINE_CONTINUATIONS("false")
|
||||
DRI_CONF_DISABLE_BLEND_FUNC_EXTENDED("false")
|
||||
DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION("false")
|
||||
|
@@ -34,9 +34,9 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/r200/server \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
$(DEFINES) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(RADEON_CFLAGS)
|
||||
|
@@ -35,9 +35,9 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/radeon/server \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
$(DEFINES) \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(RADEON_CFLAGS)
|
||||
|
@@ -30,8 +30,8 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
$(LIBDRM_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
$(VISIBILITY_CFLAGS)
|
||||
|
@@ -28,8 +28,8 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_builddir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
$(DEFINES)
|
||||
AM_CFLAGS = $(PTHREAD_CFLAGS) \
|
||||
|
@@ -1071,7 +1071,8 @@ _mesa_PopAttrib(void)
|
||||
if (ctx->Extensions.ARB_color_buffer_float)
|
||||
_mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
|
||||
color->ClampFragmentColor);
|
||||
_mesa_ClampColor(GL_CLAMP_READ_COLOR_ARB, color->ClampReadColor);
|
||||
if (ctx->Extensions.ARB_color_buffer_float || ctx->Version >= 30)
|
||||
_mesa_ClampColor(GL_CLAMP_READ_COLOR_ARB, color->ClampReadColor);
|
||||
|
||||
/* GL_ARB_framebuffer_sRGB / GL_EXT_framebuffer_sRGB */
|
||||
if (ctx->Extensions.EXT_framebuffer_sRGB)
|
||||
|
@@ -363,7 +363,7 @@ EXT(OES_point_size_array , dummy_true
|
||||
EXT(OES_point_sprite , ARB_point_sprite , x , x , ES1, x , 2004)
|
||||
EXT(OES_primitive_bounding_box , OES_primitive_bounding_box , x , x , x , 31, 2014)
|
||||
EXT(OES_query_matrix , dummy_true , x , x , ES1, x , 2003)
|
||||
EXT(OES_read_format , dummy_true , GLL, GLC, ES1, x , 2003)
|
||||
EXT(OES_read_format , dummy_true , GLL, x , ES1, x , 2003)
|
||||
EXT(OES_rgb8_rgba8 , dummy_true , x , x , ES1, ES2, 2005)
|
||||
EXT(OES_sample_shading , OES_sample_variables , x , x , x , 30, 2014)
|
||||
EXT(OES_sample_variables , OES_sample_variables , x , x , x , 30, 2014)
|
||||
|
@@ -1612,6 +1612,7 @@ _mesa_LinkProgram(GLuint programObj)
|
||||
"glLinkProgram"));
|
||||
}
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
/**
|
||||
* Generate a SHA-1 hash value string for given source string.
|
||||
*/
|
||||
@@ -1723,6 +1724,8 @@ read_shader(const gl_shader_stage stage, const char *source)
|
||||
return buffer;
|
||||
}
|
||||
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
/**
|
||||
* Called via glShaderSource() and glShaderSourceARB() API functions.
|
||||
* Basically, concatenate the source code strings into one long string
|
||||
@@ -1738,8 +1741,6 @@ _mesa_ShaderSource(GLuint shaderObj, GLsizei count,
|
||||
GLcharARB *source;
|
||||
struct gl_shader *sh;
|
||||
|
||||
GLcharARB *replacement;
|
||||
|
||||
sh = _mesa_lookup_shader_err(ctx, shaderObj, "glShaderSourceARB");
|
||||
if (!sh)
|
||||
return;
|
||||
@@ -1795,6 +1796,9 @@ _mesa_ShaderSource(GLuint shaderObj, GLsizei count,
|
||||
source[totalLength - 1] = '\0';
|
||||
source[totalLength - 2] = '\0';
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
GLcharARB *replacement;
|
||||
|
||||
/* Dump original shader source to MESA_SHADER_DUMP_PATH and replace
|
||||
* if corresponding entry found from MESA_SHADER_READ_PATH.
|
||||
*/
|
||||
@@ -1805,6 +1809,7 @@ _mesa_ShaderSource(GLuint shaderObj, GLsizei count,
|
||||
free(source);
|
||||
source = replacement;
|
||||
}
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
shader_source(sh, source);
|
||||
|
||||
|
@@ -4,8 +4,8 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/src/gtest/include \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_builddir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/include \
|
||||
$(DEFINES) $(INCLUDE_DIRS)
|
||||
|
||||
|
@@ -278,7 +278,7 @@ void st_invalidate_state(struct gl_context * ctx, GLbitfield new_state)
|
||||
|
||||
|
||||
static void
|
||||
st_destroy_context_priv(struct st_context *st)
|
||||
st_destroy_context_priv(struct st_context *st, bool destroy_pipe)
|
||||
{
|
||||
uint shader, i;
|
||||
|
||||
@@ -314,6 +314,10 @@ st_destroy_context_priv(struct st_context *st)
|
||||
st_invalidate_readpix_cache(st);
|
||||
|
||||
cso_destroy_context(st->cso_context);
|
||||
|
||||
if (st->pipe && destroy_pipe)
|
||||
st->pipe->destroy(st->pipe);
|
||||
|
||||
free( st );
|
||||
}
|
||||
|
||||
@@ -503,7 +507,7 @@ st_create_context_priv( struct gl_context *ctx, struct pipe_context *pipe,
|
||||
/* This can happen when a core profile was requested, but the driver
|
||||
* does not support some features of GL 3.1 or later.
|
||||
*/
|
||||
st_destroy_context_priv(st);
|
||||
st_destroy_context_priv(st, false);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -579,7 +583,6 @@ destroy_tex_sampler_cb(GLuint id, void *data, void *userData)
|
||||
|
||||
void st_destroy_context( struct st_context *st )
|
||||
{
|
||||
struct pipe_context *pipe = st->pipe;
|
||||
struct gl_context *ctx = st->ctx;
|
||||
GLuint i;
|
||||
|
||||
@@ -608,11 +611,9 @@ void st_destroy_context( struct st_context *st )
|
||||
|
||||
/* This will free the st_context too, so 'st' must not be accessed
|
||||
* afterwards. */
|
||||
st_destroy_context_priv(st);
|
||||
st_destroy_context_priv(st, true);
|
||||
st = NULL;
|
||||
|
||||
pipe->destroy( pipe );
|
||||
|
||||
free(ctx);
|
||||
}
|
||||
|
||||
|
@@ -955,7 +955,7 @@ glsl_to_tgsi_visitor::get_opcode(unsigned op,
|
||||
case3fid(MUL, UMUL, DMUL);
|
||||
case3fid(MAD, UMAD, DMAD);
|
||||
case3fid(FMA, UMAD, DFMA);
|
||||
case3(DIV, IDIV, UDIV);
|
||||
case4d(DIV, IDIV, UDIV, DDIV);
|
||||
case4d(MAX, IMAX, UMAX, DMAX);
|
||||
case4d(MIN, IMIN, UMIN, DMIN);
|
||||
case2iu(MOD, UMOD);
|
||||
@@ -1710,10 +1710,7 @@ glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op)
|
||||
emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
|
||||
break;
|
||||
case ir_binop_div:
|
||||
if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
|
||||
assert(!"not reached: should be handled by ir_div_to_mul_rcp");
|
||||
else
|
||||
emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
|
||||
emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
|
||||
break;
|
||||
case ir_binop_mod:
|
||||
if (result_dst.type == GLSL_TYPE_FLOAT)
|
||||
@@ -6918,7 +6915,7 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
|
||||
lower_instructions(ir,
|
||||
MOD_TO_FLOOR |
|
||||
DIV_TO_MUL_RCP |
|
||||
FDIV_TO_MUL_RCP |
|
||||
EXP_TO_EXP2 |
|
||||
LOG_TO_LOG2 |
|
||||
LDEXP_TO_ARITH |
|
||||
|
@@ -21,6 +21,8 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
@@ -705,3 +707,5 @@ disk_cache_has_key(struct disk_cache *cache, cache_key key)
|
||||
|
||||
return memcmp(entry, key, CACHE_KEY_SIZE) == 0;
|
||||
}
|
||||
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
@@ -40,6 +40,8 @@ struct disk_cache;
|
||||
|
||||
/* Provide inlined stub functions if the shader cache is disabled. */
|
||||
|
||||
#ifdef ENABLE_SHADER_CACHE
|
||||
|
||||
/**
|
||||
* Create a new cache object.
|
||||
*
|
||||
@@ -129,6 +131,46 @@ disk_cache_put_key(struct disk_cache *cache, cache_key key);
|
||||
bool
|
||||
disk_cache_has_key(struct disk_cache *cache, cache_key key);
|
||||
|
||||
#else
|
||||
|
||||
static inline struct disk_cache *
|
||||
disk_cache_create(void)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
disk_cache_destroy(struct disk_cache *cache) {
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void
|
||||
disk_cache_put(struct disk_cache *cache, cache_key key,
|
||||
const void *data, size_t size)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline uint8_t *
|
||||
disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void
|
||||
disk_cache_put_key(struct disk_cache *cache, cache_key key)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
disk_cache_has_key(struct disk_cache *cache, cache_key key)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif /* ENABLE_SHADER_CACHE */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -31,7 +31,6 @@ void SHA1Pad(SHA1_CTX *);
|
||||
void SHA1Transform(uint32_t [5], const uint8_t [SHA1_BLOCK_LENGTH]);
|
||||
void SHA1Update(SHA1_CTX *, const uint8_t *, size_t);
|
||||
void SHA1Final(uint8_t [SHA1_DIGEST_LENGTH], SHA1_CTX *);
|
||||
__END_DECLS
|
||||
|
||||
#define HTONDIGEST(x) do { \
|
||||
x[0] = htonl(x[0]); \
|
||||
|
@@ -379,7 +379,8 @@ wsi_wl_surface_get_capabilities(VkIcdSurfaceBase *surface,
|
||||
|
||||
caps->currentExtent = (VkExtent2D) { -1, -1 };
|
||||
caps->minImageExtent = (VkExtent2D) { 1, 1 };
|
||||
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX };
|
||||
/* This is the maximum supported size on Intel */
|
||||
caps->maxImageExtent = (VkExtent2D) { 1 << 14, 1 << 14 };
|
||||
caps->supportedTransforms = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
caps->currentTransform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR;
|
||||
caps->maxImageArrayLayers = 1;
|
||||
@@ -409,25 +410,27 @@ wsi_wl_surface_get_formats(VkIcdSurfaceBase *icd_surface,
|
||||
if (!display)
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
uint32_t count = u_vector_length(&display->formats);
|
||||
|
||||
if (pSurfaceFormats == NULL) {
|
||||
*pSurfaceFormatCount = count;
|
||||
*pSurfaceFormatCount = u_vector_length(&display->formats);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(*pSurfaceFormatCount >= count);
|
||||
*pSurfaceFormatCount = count;
|
||||
|
||||
uint32_t count = 0;
|
||||
VkFormat *f;
|
||||
u_vector_foreach(f, &display->formats) {
|
||||
*(pSurfaceFormats++) = (VkSurfaceFormatKHR) {
|
||||
if (count == *pSurfaceFormatCount)
|
||||
return VK_INCOMPLETE;
|
||||
|
||||
pSurfaceFormats[count++] = (VkSurfaceFormatKHR) {
|
||||
.format = *f,
|
||||
/* TODO: We should get this from the compositor somehow */
|
||||
.colorSpace = VK_COLORSPACE_SRGB_NONLINEAR_KHR,
|
||||
};
|
||||
}
|
||||
|
||||
assert(*pSurfaceFormatCount <= count);
|
||||
*pSurfaceFormatCount = count;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -441,11 +444,13 @@ wsi_wl_surface_get_present_modes(VkIcdSurfaceBase *surface,
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
assert(*pPresentModeCount >= ARRAY_SIZE(present_modes));
|
||||
*pPresentModeCount = MIN2(*pPresentModeCount, ARRAY_SIZE(present_modes));
|
||||
typed_memcpy(pPresentModes, present_modes, *pPresentModeCount);
|
||||
*pPresentModeCount = ARRAY_SIZE(present_modes);
|
||||
|
||||
return VK_SUCCESS;
|
||||
if (*pPresentModeCount < ARRAY_SIZE(present_modes))
|
||||
return VK_INCOMPLETE;
|
||||
else
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult wsi_create_wl_surface(const VkAllocationCallbacks *pAllocator,
|
||||
|
@@ -265,7 +265,8 @@ VkBool32 wsi_get_physical_device_xcb_presentation_support(
|
||||
return false;
|
||||
|
||||
if (!wsi_conn->has_dri3) {
|
||||
fprintf(stderr, "vulkan: No DRI3 support\n");
|
||||
fprintf(stderr, "vulkan: No DRI3 support detected - required for presentation\n");
|
||||
fprintf(stderr, "Note: Buggy applications may crash, if they do please report to vendor\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -313,7 +314,8 @@ x11_surface_get_support(VkIcdSurfaceBase *icd_surface,
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
|
||||
if (!wsi_conn->has_dri3) {
|
||||
fprintf(stderr, "vulkan: No DRI3 support\n");
|
||||
fprintf(stderr, "vulkan: No DRI3 support detected - required for presentation\n");
|
||||
fprintf(stderr, "Note: Buggy applications may crash, if they do please report to vendor\n");
|
||||
*pSupported = false;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
@@ -368,7 +370,8 @@ x11_surface_get_capabilities(VkIcdSurfaceBase *icd_surface,
|
||||
*/
|
||||
caps->currentExtent = (VkExtent2D) { -1, -1 };
|
||||
caps->minImageExtent = (VkExtent2D) { 1, 1 };
|
||||
caps->maxImageExtent = (VkExtent2D) { INT16_MAX, INT16_MAX };
|
||||
/* This is the maximum supported size on Intel */
|
||||
caps->maxImageExtent = (VkExtent2D) { 1 << 14, 1 << 14 };
|
||||
}
|
||||
free(err);
|
||||
free(geom);
|
||||
|
Reference in New Issue
Block a user