Compare commits
107 Commits
bisect-tem
...
mesa-10.2-
Author | SHA1 | Date | |
---|---|---|---|
|
697316fe06 | ||
|
bfaee5277a | ||
|
9a8f12ae03 | ||
|
a31062fcb3 | ||
|
a1fff38c96 | ||
|
1db3ebd8a5 | ||
|
7cf3a674ea | ||
|
d2521a44af | ||
|
00f2dcb791 | ||
|
ed1ffa0197 | ||
|
5d056f51ab | ||
|
6be7789e11 | ||
|
d6a4c3c29c | ||
|
9028b94670 | ||
|
085d6bd5e7 | ||
|
fd0ea5be9d | ||
|
27d4836f35 | ||
|
e306ba9a9b | ||
|
81fb9ef112 | ||
|
32549f3f17 | ||
|
9576e17804 | ||
|
cc65bc4d15 | ||
|
25e641213f | ||
|
e084f71548 | ||
|
cdd328639f | ||
|
6440561737 | ||
|
fb4461b7dc | ||
|
fec2b45d02 | ||
|
d0c813c40a | ||
|
a05c073d79 | ||
|
031ee21961 | ||
|
b7604eff4c | ||
|
80da86c650 | ||
|
3c0ca023dd | ||
|
516db26e1e | ||
|
d5d9984c2b | ||
|
6db6f05fae | ||
|
c338759051 | ||
|
47a6830e22 | ||
|
3ffc507c94 | ||
|
c94e339adc | ||
|
a5951d09a5 | ||
|
3475ca1f00 | ||
|
3733cc3e8f | ||
|
ac49f97f12 | ||
|
d0aa394741 | ||
|
4baf6f12a5 | ||
|
21792665c7 | ||
|
872ea423ac | ||
|
ad8ad99eff | ||
|
62f1509070 | ||
|
eb2ef1641c | ||
|
947b60d19e | ||
|
cb37016f89 | ||
|
43ea5f9347 | ||
|
b5e717a618 | ||
|
f5848ec2e4 | ||
|
79a34441d5 | ||
|
1041fb86c0 | ||
|
b1aa25907a | ||
|
5d6e822d03 | ||
|
26d5b22039 | ||
|
3171da3402 | ||
|
875fd92d16 | ||
|
fb5c68d312 | ||
|
0e7b0f2a0a | ||
|
5f495b85a0 | ||
|
253834cbf6 | ||
|
f5c083dbc3 | ||
|
2b4a871e05 | ||
|
27da0bbeb4 | ||
|
91e2808c41 | ||
|
6cad93daab | ||
|
71f78bb87e | ||
|
ab43a98fcf | ||
|
af228e999c | ||
|
725a27e04d | ||
|
b0609b715b | ||
|
ca549a0f19 | ||
|
407bff9db0 | ||
|
0e14b19492 | ||
|
a233f4c303 | ||
|
7a81788c67 | ||
|
13bb2bc84b | ||
|
98b66e8d96 | ||
|
0b3126bddd | ||
|
f2682b3b9f | ||
|
d259928a56 | ||
|
ec6bd21162 | ||
|
53a0f9d0ba | ||
|
0f0f1106b6 | ||
|
2b34277bbd | ||
|
e29daf82cc | ||
|
03673bcf6c | ||
|
af47859aed | ||
|
e120f1a958 | ||
|
cc92276cb8 | ||
|
fac042fa05 | ||
|
d26b59ec27 | ||
|
c2c15a9a37 | ||
|
e6c98309c6 | ||
|
ffc0cc027a | ||
|
658d0410d0 | ||
|
a1dd1e62fa | ||
|
c7a3c2d29d | ||
|
0a5034517a | ||
|
e8f6150320 |
3
bin/.cherry-ignore
Normal file
3
bin/.cherry-ignore
Normal file
@@ -0,0 +1,3 @@
|
||||
# The first is the change, and the second is the revert of that change.
|
||||
e6967270c75a5b669152127bb7a746d55f4407a6 i965: Fix depth (array slices) computation for 1D_ARRAY render targets.
|
||||
155f98d49fdc2f46c760f8214327b3804ee60079 Revert "i965: Fix depth (array slices) computation for 1D_ARRAY render targets."
|
28
configure.ac
28
configure.ac
@@ -331,6 +331,19 @@ LDFLAGS=$save_LDFLAGS
|
||||
|
||||
AC_SUBST([GC_SECTIONS])
|
||||
|
||||
dnl
|
||||
dnl OpenBSD does not have DT_NEEDED entries for libc by design
|
||||
dnl so when these flags are passed to ld via libtool the checks will fail
|
||||
dnl
|
||||
case "$host_os" in
|
||||
openbsd*)
|
||||
LD_NO_UNDEFINED="" ;;
|
||||
*)
|
||||
LD_NO_UNDEFINED="-Wl,--no-undefined" ;;
|
||||
esac
|
||||
|
||||
AC_SUBST([LD_NO_UNDEFINED])
|
||||
|
||||
dnl
|
||||
dnl compatibility symlinks
|
||||
dnl
|
||||
@@ -1179,6 +1192,13 @@ if test "x$enable_gbm" = xyes; then
|
||||
if test "x$enable_shared_glapi" = xno; then
|
||||
AC_MSG_ERROR([gbm_dri requires --enable-shared-glapi])
|
||||
fi
|
||||
else
|
||||
# Strictly speaking libgbm does not require --enable-dri, although
|
||||
# both of its backends do. Thus one can build libgbm without any
|
||||
# backends if --disable-dri is set.
|
||||
# To avoid unnecessary complexity of checking if at least one backend
|
||||
# is available when building, just mandate --enable-dri.
|
||||
AC_MSG_ERROR([gbm requires --enable-dri])
|
||||
fi
|
||||
fi
|
||||
AM_CONDITIONAL(HAVE_GBM, test "x$enable_gbm" = xyes)
|
||||
@@ -1605,6 +1625,12 @@ if test "x$enable_gallium_llvm" = xyes; then
|
||||
AC_COMPUTE_INT([LLVM_VERSION_MINOR], [LLVM_VERSION_MINOR],
|
||||
[#include "${LLVM_INCLUDEDIR}/llvm/Config/llvm-config.h"])
|
||||
|
||||
dnl In LLVM 3.4.1 patch level was defined in config.h and not
|
||||
dnl llvm-config.h
|
||||
AC_COMPUTE_INT([LLVM_VERSION_PATCH], [LLVM_VERSION_PATCH],
|
||||
[#include "${LLVM_INCLUDEDIR}/llvm/Config/config.h"],
|
||||
LLVM_VERSION_PATCH=0) dnl Default if LLVM_VERSION_PATCH not found
|
||||
|
||||
if test -n "${LLVM_VERSION_MAJOR}"; then
|
||||
LLVM_VERSION_INT="${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MINOR}"
|
||||
else
|
||||
@@ -1627,7 +1653,7 @@ if test "x$enable_gallium_llvm" = xyes; then
|
||||
LLVM_COMPONENTS="${LLVM_COMPONENTS} option"
|
||||
fi
|
||||
fi
|
||||
DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT"
|
||||
DEFINES="${DEFINES} -DHAVE_LLVM=0x0$LLVM_VERSION_INT -DLLVM_VERSION_PATCH=$LLVM_VERSION_PATCH"
|
||||
MESA_LLVM=1
|
||||
|
||||
dnl Check for Clang internal headers
|
||||
|
@@ -67,6 +67,25 @@ TBD.
|
||||
<h2>Changes</h2>
|
||||
|
||||
<ul>
|
||||
<li>Renamed <i>--with-llvm-shared-libs</i> to <i>--enable-llvm-shared-libs</i></li>
|
||||
<p>
|
||||
The option is used to control how mesa is linked against LLVM, and now
|
||||
defaults to enabled (shared linking).
|
||||
</p>
|
||||
|
||||
<li>Split <i>libxatracker.so</i> into a standalone library which can be used
|
||||
with any gallium driver.</li>
|
||||
<p>
|
||||
Previously the library was linked statically against vmware's virtual gpu
|
||||
driver(svga), whereas now it loads a shared pipe_*.so driver. Provide the
|
||||
following options during configure, if you would like support for svga driver
|
||||
<i>--enable-xa --with-gallium-drivers=svga</i>
|
||||
</p>
|
||||
|
||||
<p>
|
||||
Note: The files are installed in $(libdir)/gallium-pipe/ and the interface
|
||||
between them and libxatracker.so is <strong>not</strong> stable.
|
||||
</p>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
|
@@ -135,22 +135,6 @@
|
||||
<arg name="stride2" type="int"/>
|
||||
</request>
|
||||
|
||||
<!-- Create a wayland buffer for the prime fd. Use for regular and planar
|
||||
buffers. Pass 0 for offset and stride for unused planes. -->
|
||||
<request name="create_prime_buffer" since="2">
|
||||
<arg name="id" type="new_id" interface="wl_buffer"/>
|
||||
<arg name="name" type="fd"/>
|
||||
<arg name="width" type="int"/>
|
||||
<arg name="height" type="int"/>
|
||||
<arg name="format" type="uint"/>
|
||||
<arg name="offset0" type="int"/>
|
||||
<arg name="stride0" type="int"/>
|
||||
<arg name="offset1" type="int"/>
|
||||
<arg name="stride1" type="int"/>
|
||||
<arg name="offset2" type="int"/>
|
||||
<arg name="stride2" type="int"/>
|
||||
</request>
|
||||
|
||||
<!-- Notification of the path of the drm device which is used by
|
||||
the server. The client should use this device for creating
|
||||
local buffers. Only buffers created from this device should
|
||||
@@ -177,6 +161,25 @@
|
||||
<event name="capabilities">
|
||||
<arg name="value" type="uint"/>
|
||||
</event>
|
||||
|
||||
<!-- Version 2 additions -->
|
||||
|
||||
<!-- Create a wayland buffer for the prime fd. Use for regular and planar
|
||||
buffers. Pass 0 for offset and stride for unused planes. -->
|
||||
<request name="create_prime_buffer" since="2">
|
||||
<arg name="id" type="new_id" interface="wl_buffer"/>
|
||||
<arg name="name" type="fd"/>
|
||||
<arg name="width" type="int"/>
|
||||
<arg name="height" type="int"/>
|
||||
<arg name="format" type="uint"/>
|
||||
<arg name="offset0" type="int"/>
|
||||
<arg name="stride0" type="int"/>
|
||||
<arg name="offset1" type="int"/>
|
||||
<arg name="stride1" type="int"/>
|
||||
<arg name="offset2" type="int"/>
|
||||
<arg name="stride2" type="int"/>
|
||||
</request>
|
||||
|
||||
</interface>
|
||||
|
||||
</protocol>
|
||||
|
@@ -1000,6 +1000,8 @@ draw_get_shader_param_no_llvm(unsigned shader, enum pipe_shader_cap param)
|
||||
/**
|
||||
* XXX: Results for PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS because there are two
|
||||
* different ways of setting textures, and drivers typically only support one.
|
||||
* Drivers requesting a draw context explicitly without llvm must call
|
||||
* draw_get_shader_param_no_llvm instead.
|
||||
*/
|
||||
int
|
||||
draw_get_shader_param(unsigned shader, enum pipe_shader_cap param)
|
||||
|
@@ -597,7 +597,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
|
||||
|
||||
|
||||
#ifdef HAVE_LLVM
|
||||
if (draw_get_option_use_llvm()) {
|
||||
if (shader->draw->llvm) {
|
||||
shader->gs_output = output_verts->verts;
|
||||
if (max_out_prims > shader->max_out_prims) {
|
||||
unsigned i;
|
||||
@@ -674,7 +674,7 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
|
||||
void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
|
||||
struct draw_context *draw)
|
||||
{
|
||||
boolean use_llvm = draw_get_option_use_llvm();
|
||||
boolean use_llvm = draw->llvm != NULL;
|
||||
if (!use_llvm && shader && shader->machine->Tokens != shader->state.tokens) {
|
||||
tgsi_exec_machine_bind_shader(shader->machine,
|
||||
shader->state.tokens,
|
||||
@@ -686,7 +686,7 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
|
||||
boolean
|
||||
draw_gs_init( struct draw_context *draw )
|
||||
{
|
||||
if (!draw_get_option_use_llvm()) {
|
||||
if (!draw->llvm) {
|
||||
draw->gs.tgsi.machine = tgsi_exec_machine_create();
|
||||
if (!draw->gs.tgsi.machine)
|
||||
return FALSE;
|
||||
@@ -715,7 +715,7 @@ draw_create_geometry_shader(struct draw_context *draw,
|
||||
const struct pipe_shader_state *state)
|
||||
{
|
||||
#ifdef HAVE_LLVM
|
||||
boolean use_llvm = draw_get_option_use_llvm();
|
||||
boolean use_llvm = draw->llvm != NULL;
|
||||
struct llvm_geometry_shader *llvm_gs;
|
||||
#endif
|
||||
struct draw_geometry_shader *gs;
|
||||
@@ -870,7 +870,7 @@ void draw_delete_geometry_shader(struct draw_context *draw,
|
||||
return;
|
||||
}
|
||||
#ifdef HAVE_LLVM
|
||||
if (draw_get_option_use_llvm()) {
|
||||
if (draw->llvm) {
|
||||
struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs);
|
||||
struct draw_gs_llvm_variant_list_item *li;
|
||||
|
||||
|
@@ -47,7 +47,6 @@
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
|
||||
#ifdef HAVE_LLVM
|
||||
struct draw_llvm;
|
||||
struct gallivm_state;
|
||||
#endif
|
||||
|
||||
@@ -69,6 +68,7 @@ struct tgsi_exec_machine;
|
||||
struct tgsi_sampler;
|
||||
struct draw_pt_front_end;
|
||||
struct draw_assembler;
|
||||
struct draw_llvm;
|
||||
|
||||
|
||||
/**
|
||||
@@ -318,9 +318,7 @@ struct draw_context
|
||||
unsigned start_instance;
|
||||
unsigned start_index;
|
||||
|
||||
#ifdef HAVE_LLVM
|
||||
struct draw_llvm *llvm;
|
||||
#endif
|
||||
|
||||
/** Texture sampler and sampler view state.
|
||||
* Note that we have arrays indexed by shader type. At this time
|
||||
|
@@ -149,7 +149,7 @@ draw_vs_init( struct draw_context *draw )
|
||||
{
|
||||
draw->dump_vs = debug_get_option_gallium_dump_vs();
|
||||
|
||||
if (!draw_get_option_use_llvm()) {
|
||||
if (!draw->llvm) {
|
||||
draw->vs.tgsi.machine = tgsi_exec_machine_create();
|
||||
if (!draw->vs.tgsi.machine)
|
||||
return FALSE;
|
||||
@@ -175,7 +175,7 @@ draw_vs_destroy( struct draw_context *draw )
|
||||
if (draw->vs.emit_cache)
|
||||
translate_cache_destroy(draw->vs.emit_cache);
|
||||
|
||||
if (!draw_get_option_use_llvm())
|
||||
if (!draw->llvm)
|
||||
tgsi_exec_machine_destroy(draw->vs.tgsi.machine);
|
||||
}
|
||||
|
||||
|
@@ -63,7 +63,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader,
|
||||
{
|
||||
struct exec_vertex_shader *evs = exec_vertex_shader(shader);
|
||||
|
||||
debug_assert(!draw_get_option_use_llvm());
|
||||
debug_assert(!draw->llvm);
|
||||
/* Specify the vertex program to interpret/execute.
|
||||
* Avoid rebinding when possible.
|
||||
*/
|
||||
@@ -97,7 +97,7 @@ vs_exec_run_linear( struct draw_vertex_shader *shader,
|
||||
unsigned slot;
|
||||
boolean clamp_vertex_color = shader->draw->rasterizer->clamp_vertex_color;
|
||||
|
||||
debug_assert(!draw_get_option_use_llvm());
|
||||
debug_assert(!shader->draw->llvm);
|
||||
tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
|
||||
constants, const_size);
|
||||
|
||||
|
@@ -145,9 +145,6 @@ pipe_loader_sw_release(struct pipe_loader_device **dev)
|
||||
{
|
||||
struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev);
|
||||
|
||||
if (sdev->ws && sdev->ws->destroy)
|
||||
sdev->ws->destroy(sdev->ws);
|
||||
|
||||
if (sdev->lib)
|
||||
util_dl_close(sdev->lib);
|
||||
|
||||
|
@@ -120,7 +120,8 @@ const char *tgsi_property_names[TGSI_PROPERTY_COUNT] =
|
||||
"FS_COORD_PIXEL_CENTER",
|
||||
"FS_COLOR0_WRITES_ALL_CBUFS",
|
||||
"FS_DEPTH_LAYOUT",
|
||||
"VS_PROHIBIT_UCPS"
|
||||
"VS_PROHIBIT_UCPS",
|
||||
"GS_INVOCATIONS",
|
||||
};
|
||||
|
||||
const char *tgsi_type_names[5] =
|
||||
|
@@ -3,6 +3,8 @@ C_SOURCES := \
|
||||
freedreno_lowering.c \
|
||||
freedreno_program.c \
|
||||
freedreno_query.c \
|
||||
freedreno_query_hw.c \
|
||||
freedreno_query_sw.c \
|
||||
freedreno_fence.c \
|
||||
freedreno_resource.c \
|
||||
freedreno_surface.c \
|
||||
@@ -38,6 +40,7 @@ a3xx_SOURCES := \
|
||||
a3xx/fd3_emit.c \
|
||||
a3xx/fd3_gmem.c \
|
||||
a3xx/fd3_program.c \
|
||||
a3xx/fd3_query.c \
|
||||
a3xx/fd3_rasterizer.c \
|
||||
a3xx/fd3_screen.c \
|
||||
a3xx/fd3_texture.c \
|
||||
|
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
@@ -125,7 +125,7 @@ emit_texture(struct fd_ringbuffer *ring, struct fd_context *ctx,
|
||||
{
|
||||
unsigned const_idx = fd2_get_const_idx(ctx, tex, samp_id);
|
||||
static const struct fd2_sampler_stateobj dummy_sampler = {};
|
||||
struct fd2_sampler_stateobj *sampler;
|
||||
const struct fd2_sampler_stateobj *sampler;
|
||||
struct fd2_pipe_sampler_view *view;
|
||||
|
||||
if (emitted & (1 << const_idx))
|
||||
|
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -41,31 +41,11 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
enum a3xx_render_mode {
|
||||
RB_RENDERING_PASS = 0,
|
||||
RB_TILING_PASS = 1,
|
||||
RB_RESOLVE_PASS = 2,
|
||||
};
|
||||
|
||||
enum a3xx_tile_mode {
|
||||
LINEAR = 0,
|
||||
TILE_32X32 = 2,
|
||||
};
|
||||
|
||||
enum a3xx_threadmode {
|
||||
MULTI = 0,
|
||||
SINGLE = 1,
|
||||
};
|
||||
|
||||
enum a3xx_instrbuffermode {
|
||||
BUFFER = 1,
|
||||
};
|
||||
|
||||
enum a3xx_threadsize {
|
||||
TWO_QUADS = 0,
|
||||
FOUR_QUADS = 1,
|
||||
};
|
||||
|
||||
enum a3xx_state_block_id {
|
||||
HLSQ_BLOCK_ID_TP_TEX = 2,
|
||||
HLSQ_BLOCK_ID_TP_MIPMAP = 3,
|
||||
@@ -180,12 +160,6 @@ enum a3xx_color_swap {
|
||||
XYZW = 3,
|
||||
};
|
||||
|
||||
enum a3xx_msaa_samples {
|
||||
MSAA_ONE = 0,
|
||||
MSAA_TWO = 1,
|
||||
MSAA_FOUR = 2,
|
||||
};
|
||||
|
||||
enum a3xx_sp_perfcounter_select {
|
||||
SP_FS_CFLOW_INSTRUCTIONS = 12,
|
||||
SP_FS_FULL_ALU_INSTRUCTIONS = 14,
|
||||
@@ -212,11 +186,6 @@ enum a3xx_rop_code {
|
||||
ROP_SET = 15,
|
||||
};
|
||||
|
||||
enum adreno_rb_copy_control_mode {
|
||||
RB_COPY_RESOLVE = 1,
|
||||
RB_COPY_DEPTH_STENCIL = 5,
|
||||
};
|
||||
|
||||
enum a3xx_tex_filter {
|
||||
A3XX_TEX_NEAREST = 0,
|
||||
A3XX_TEX_LINEAR = 1,
|
||||
@@ -337,6 +306,7 @@ enum a3xx_tex_type {
|
||||
#define REG_A3XX_RBBM_INT_0_STATUS 0x00000064
|
||||
|
||||
#define REG_A3XX_RBBM_PERFCTR_CTL 0x00000080
|
||||
#define A3XX_RBBM_PERFCTR_CTL_ENABLE 0x00000001
|
||||
|
||||
#define REG_A3XX_RBBM_PERFCTR_LOAD_CMD0 0x00000081
|
||||
|
||||
@@ -570,6 +540,10 @@ static inline uint32_t REG_A3XX_CP_PROTECT_REG(uint32_t i0) { return 0x00000460
|
||||
|
||||
#define REG_A3XX_CP_AHB_FAULT 0x0000054d
|
||||
|
||||
#define REG_A3XX_SP_GLOBAL_MEM_SIZE 0x00000e22
|
||||
|
||||
#define REG_A3XX_SP_GLOBAL_MEM_ADDR 0x00000e23
|
||||
|
||||
#define REG_A3XX_GRAS_CL_CLIP_CNTL 0x00002040
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 0x00001000
|
||||
#define A3XX_GRAS_CL_CLIP_CNTL_CLIP_DISABLE 0x00010000
|
||||
@@ -644,8 +618,26 @@ static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
|
||||
}
|
||||
|
||||
#define REG_A3XX_GRAS_SU_POINT_MINMAX 0x00002068
|
||||
#define A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK 0x0000ffff
|
||||
#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0
|
||||
static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
|
||||
}
|
||||
#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000
|
||||
#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16
|
||||
static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069
|
||||
#define A3XX_GRAS_SU_POINT_SIZE__MASK 0xffffffff
|
||||
#define A3XX_GRAS_SU_POINT_SIZE__SHIFT 0
|
||||
static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c
|
||||
#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK 0x00ffffff
|
||||
@@ -992,6 +984,12 @@ static inline uint32_t A3XX_RB_COPY_CONTROL_MODE(enum adreno_rb_copy_control_mod
|
||||
{
|
||||
return ((val) << A3XX_RB_COPY_CONTROL_MODE__SHIFT) & A3XX_RB_COPY_CONTROL_MODE__MASK;
|
||||
}
|
||||
#define A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK 0x00000f00
|
||||
#define A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT 8
|
||||
static inline uint32_t A3XX_RB_COPY_CONTROL_FASTCLEAR(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_RB_COPY_CONTROL_FASTCLEAR__SHIFT) & A3XX_RB_COPY_CONTROL_FASTCLEAR__MASK;
|
||||
}
|
||||
#define A3XX_RB_COPY_CONTROL_GMEM_BASE__MASK 0xffffc000
|
||||
#define A3XX_RB_COPY_CONTROL_GMEM_BASE__SHIFT 14
|
||||
static inline uint32_t A3XX_RB_COPY_CONTROL_GMEM_BASE(uint32_t val)
|
||||
@@ -1034,6 +1032,12 @@ static inline uint32_t A3XX_RB_COPY_DEST_INFO_SWAP(enum a3xx_color_swap val)
|
||||
{
|
||||
return ((val) << A3XX_RB_COPY_DEST_INFO_SWAP__SHIFT) & A3XX_RB_COPY_DEST_INFO_SWAP__MASK;
|
||||
}
|
||||
#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK 0x00000c00
|
||||
#define A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT 10
|
||||
static inline uint32_t A3XX_RB_COPY_DEST_INFO_DITHER_MODE(enum adreno_rb_dither_mode val)
|
||||
{
|
||||
return ((val) << A3XX_RB_COPY_DEST_INFO_DITHER_MODE__SHIFT) & A3XX_RB_COPY_DEST_INFO_DITHER_MODE__MASK;
|
||||
}
|
||||
#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__MASK 0x0003c000
|
||||
#define A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE__SHIFT 14
|
||||
static inline uint32_t A3XX_RB_COPY_DEST_INFO_COMPONENT_ENABLE(uint32_t val)
|
||||
@@ -1202,6 +1206,8 @@ static inline uint32_t A3XX_RB_WINDOW_OFFSET_Y(uint32_t val)
|
||||
}
|
||||
|
||||
#define REG_A3XX_RB_SAMPLE_COUNT_CONTROL 0x00002110
|
||||
#define A3XX_RB_SAMPLE_COUNT_CONTROL_RESET 0x00000001
|
||||
#define A3XX_RB_SAMPLE_COUNT_CONTROL_COPY 0x00000002
|
||||
|
||||
#define REG_A3XX_RB_SAMPLE_COUNT_ADDR 0x00002111
|
||||
|
||||
@@ -1366,10 +1372,36 @@ static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
|
||||
}
|
||||
|
||||
#define REG_A3XX_HLSQ_CL_NDRANGE_0_REG 0x0000220a
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK 0x00000003
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT 0
|
||||
static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_WORKDIM__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK 0x00000ffc
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT 2
|
||||
static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE0__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK 0x003ff000
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT 12
|
||||
static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE1__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK 0xffc00000
|
||||
#define A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT 22
|
||||
static inline uint32_t A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__SHIFT) & A3XX_HLSQ_CL_NDRANGE_0_REG_LOCALSIZE2__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_HLSQ_CL_NDRANGE_1_REG 0x0000220b
|
||||
static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK(uint32_t i0) { return 0x0000220b + 0x2*i0; }
|
||||
|
||||
#define REG_A3XX_HLSQ_CL_NDRANGE_2_REG 0x0000220c
|
||||
static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_SIZE(uint32_t i0) { return 0x0000220b + 0x2*i0; }
|
||||
|
||||
static inline uint32_t REG_A3XX_HLSQ_CL_GLOBAL_WORK_OFFSET(uint32_t i0) { return 0x0000220c + 0x2*i0; }
|
||||
|
||||
#define REG_A3XX_HLSQ_CL_CONTROL_0_REG 0x00002211
|
||||
|
||||
@@ -1377,7 +1409,9 @@ static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
|
||||
|
||||
#define REG_A3XX_HLSQ_CL_KERNEL_CONST_REG 0x00002214
|
||||
|
||||
#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x00002215
|
||||
static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP(uint32_t i0) { return 0x00002215 + 0x1*i0; }
|
||||
|
||||
static inline uint32_t REG_A3XX_HLSQ_CL_KERNEL_GROUP_RATIO(uint32_t i0) { return 0x00002215 + 0x1*i0; }
|
||||
|
||||
#define REG_A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x00002216
|
||||
|
||||
@@ -1624,6 +1658,7 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
}
|
||||
#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000
|
||||
#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000
|
||||
#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE 0x00800000
|
||||
#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000
|
||||
#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
|
||||
@@ -1797,6 +1832,7 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
|
||||
}
|
||||
#define A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE 0x00200000
|
||||
#define A3XX_SP_FS_CTRL_REG0_PIXLODENABLE 0x00400000
|
||||
#define A3XX_SP_FS_CTRL_REG0_COMPUTEMODE 0x00800000
|
||||
#define A3XX_SP_FS_CTRL_REG0_LENGTH__MASK 0xff000000
|
||||
#define A3XX_SP_FS_CTRL_REG0_LENGTH__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_FS_CTRL_REG0_LENGTH(uint32_t val)
|
||||
@@ -1976,6 +2012,42 @@ static inline uint32_t A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(uint32_t val)
|
||||
|
||||
#define REG_A3XX_VBIF_OUT_AXI_AOOO 0x0000305f
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_CNT_EN 0x00003070
|
||||
#define A3XX_VBIF_PERF_CNT_EN_CNT0 0x00000001
|
||||
#define A3XX_VBIF_PERF_CNT_EN_CNT1 0x00000002
|
||||
#define A3XX_VBIF_PERF_CNT_EN_PWRCNT0 0x00000004
|
||||
#define A3XX_VBIF_PERF_CNT_EN_PWRCNT1 0x00000008
|
||||
#define A3XX_VBIF_PERF_CNT_EN_PWRCNT2 0x00000010
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_CNT_CLR 0x00003071
|
||||
#define A3XX_VBIF_PERF_CNT_CLR_CNT0 0x00000001
|
||||
#define A3XX_VBIF_PERF_CNT_CLR_CNT1 0x00000002
|
||||
#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT0 0x00000004
|
||||
#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT1 0x00000008
|
||||
#define A3XX_VBIF_PERF_CNT_CLR_PWRCNT2 0x00000010
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_CNT_SEL 0x00003072
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_CNT0_LO 0x00003073
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_CNT0_HI 0x00003074
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_CNT1_LO 0x00003075
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_CNT1_HI 0x00003076
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_PWR_CNT0_LO 0x00003077
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_PWR_CNT0_HI 0x00003078
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_PWR_CNT1_LO 0x00003079
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_PWR_CNT1_HI 0x0000307a
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_PWR_CNT2_LO 0x0000307b
|
||||
|
||||
#define REG_A3XX_VBIF_PERF_PWR_CNT2_HI 0x0000307c
|
||||
|
||||
#define REG_A3XX_VSC_BIN_SIZE 0x00000c01
|
||||
#define A3XX_VSC_BIN_SIZE_WIDTH__MASK 0x0000001f
|
||||
#define A3XX_VSC_BIN_SIZE_WIDTH__SHIFT 0
|
||||
@@ -2249,6 +2321,12 @@ static inline uint32_t A3XX_TEX_SAMP_0_WRAP_R(enum a3xx_tex_clamp val)
|
||||
{
|
||||
return ((val) << A3XX_TEX_SAMP_0_WRAP_R__SHIFT) & A3XX_TEX_SAMP_0_WRAP_R__MASK;
|
||||
}
|
||||
#define A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK 0x00700000
|
||||
#define A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT 20
|
||||
static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val)
|
||||
{
|
||||
return ((val) << A3XX_TEX_SAMP_0_COMPARE_FUNC__SHIFT) & A3XX_TEX_SAMP_0_COMPARE_FUNC__MASK;
|
||||
}
|
||||
#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000
|
||||
|
||||
#define REG_A3XX_TEX_SAMP_1 0x00000001
|
||||
@@ -2267,6 +2345,7 @@ static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val)
|
||||
|
||||
#define REG_A3XX_TEX_CONST_0 0x00000000
|
||||
#define A3XX_TEX_CONST_0_TILED 0x00000001
|
||||
#define A3XX_TEX_CONST_0_SRGB 0x00000004
|
||||
#define A3XX_TEX_CONST_0_SWIZ_X__MASK 0x00000070
|
||||
#define A3XX_TEX_CONST_0_SWIZ_X__SHIFT 4
|
||||
static inline uint32_t A3XX_TEX_CONST_0_SWIZ_X(enum a3xx_tex_swiz val)
|
||||
@@ -2303,6 +2382,7 @@ static inline uint32_t A3XX_TEX_CONST_0_FMT(enum a3xx_tex_fmt val)
|
||||
{
|
||||
return ((val) << A3XX_TEX_CONST_0_FMT__SHIFT) & A3XX_TEX_CONST_0_FMT__MASK;
|
||||
}
|
||||
#define A3XX_TEX_CONST_0_NOCONVERT 0x20000000
|
||||
#define A3XX_TEX_CONST_0_TYPE__MASK 0xc0000000
|
||||
#define A3XX_TEX_CONST_0_TYPE__SHIFT 30
|
||||
static inline uint32_t A3XX_TEX_CONST_0_TYPE(enum a3xx_tex_type val)
|
||||
|
@@ -1074,77 +1074,154 @@ trans_arl(const struct instr_translater *t,
|
||||
add_src_reg(ctx, instr, tmp_src, chan)->flags |= IR3_REG_HALF;
|
||||
}
|
||||
|
||||
/* texture fetch/sample instructions: */
|
||||
static void
|
||||
trans_samp(const struct instr_translater *t,
|
||||
struct fd3_compile_context *ctx,
|
||||
/*
|
||||
* texture fetch/sample instructions:
|
||||
*/
|
||||
|
||||
struct tex_info {
|
||||
int8_t order[4];
|
||||
unsigned src_wrmask, flags;
|
||||
};
|
||||
|
||||
static const struct tex_info *
|
||||
get_tex_info(struct fd3_compile_context *ctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
struct tgsi_src_register *coord = &inst->Src[0].Register;
|
||||
struct tgsi_src_register *samp = &inst->Src[1].Register;
|
||||
unsigned tex = inst->Texture.Texture;
|
||||
int8_t *order;
|
||||
unsigned i, flags = 0, src_wrmask;
|
||||
bool needs_mov = false;
|
||||
static const struct tex_info tex1d = {
|
||||
.order = { 0, -1, -1, -1 }, /* coord.x */
|
||||
.src_wrmask = TGSI_WRITEMASK_XY,
|
||||
.flags = 0,
|
||||
};
|
||||
static const struct tex_info tex1ds = {
|
||||
.order = { 0, -1, 2, -1 }, /* coord.xz */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZ,
|
||||
.flags = IR3_INSTR_S,
|
||||
};
|
||||
static const struct tex_info tex2d = {
|
||||
.order = { 0, 1, -1, -1 }, /* coord.xy */
|
||||
.src_wrmask = TGSI_WRITEMASK_XY,
|
||||
.flags = 0,
|
||||
};
|
||||
static const struct tex_info tex2ds = {
|
||||
.order = { 0, 1, 2, -1 }, /* coord.xyz */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZ,
|
||||
.flags = IR3_INSTR_S,
|
||||
};
|
||||
static const struct tex_info tex3d = {
|
||||
.order = { 0, 1, 2, -1 }, /* coord.xyz */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZ,
|
||||
.flags = IR3_INSTR_3D,
|
||||
};
|
||||
static const struct tex_info tex3ds = {
|
||||
.order = { 0, 1, 2, 3 }, /* coord.xyzw */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZW,
|
||||
.flags = IR3_INSTR_S | IR3_INSTR_3D,
|
||||
};
|
||||
static const struct tex_info txp1d = {
|
||||
.order = { 0, -1, 3, -1 }, /* coord.xw */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZ,
|
||||
.flags = IR3_INSTR_P,
|
||||
};
|
||||
static const struct tex_info txp1ds = {
|
||||
.order = { 0, -1, 2, 3 }, /* coord.xzw */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZW,
|
||||
.flags = IR3_INSTR_P | IR3_INSTR_S,
|
||||
};
|
||||
static const struct tex_info txp2d = {
|
||||
.order = { 0, 1, 3, -1 }, /* coord.xyw */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZ,
|
||||
.flags = IR3_INSTR_P,
|
||||
};
|
||||
static const struct tex_info txp2ds = {
|
||||
.order = { 0, 1, 2, 3 }, /* coord.xyzw */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZW,
|
||||
.flags = IR3_INSTR_P | IR3_INSTR_S,
|
||||
};
|
||||
static const struct tex_info txp3d = {
|
||||
.order = { 0, 1, 2, 3 }, /* coord.xyzw */
|
||||
.src_wrmask = TGSI_WRITEMASK_XYZW,
|
||||
.flags = IR3_INSTR_P | IR3_INSTR_3D,
|
||||
};
|
||||
|
||||
switch (t->arg) {
|
||||
unsigned tex = inst->Texture.Texture;
|
||||
|
||||
switch (inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_TEX:
|
||||
switch (tex) {
|
||||
case TGSI_TEXTURE_1D:
|
||||
return &tex1d;
|
||||
case TGSI_TEXTURE_SHADOW1D:
|
||||
return &tex1ds;
|
||||
case TGSI_TEXTURE_2D:
|
||||
case TGSI_TEXTURE_RECT:
|
||||
order = (int8_t[4]){ 0, 1, -1, -1 };
|
||||
src_wrmask = TGSI_WRITEMASK_XY;
|
||||
break;
|
||||
return &tex2d;
|
||||
case TGSI_TEXTURE_SHADOW2D:
|
||||
case TGSI_TEXTURE_SHADOWRECT:
|
||||
return &tex2ds;
|
||||
case TGSI_TEXTURE_3D:
|
||||
case TGSI_TEXTURE_CUBE:
|
||||
order = (int8_t[4]){ 0, 1, 2, -1 };
|
||||
src_wrmask = TGSI_WRITEMASK_XYZ;
|
||||
flags |= IR3_INSTR_3D;
|
||||
break;
|
||||
return &tex3d;
|
||||
case TGSI_TEXTURE_SHADOWCUBE:
|
||||
return &tex3ds;
|
||||
default:
|
||||
compile_error(ctx, "unknown texture type: %s\n",
|
||||
tgsi_texture_names[tex]);
|
||||
break;
|
||||
return NULL;
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_TXP:
|
||||
switch (tex) {
|
||||
case TGSI_TEXTURE_1D:
|
||||
return &txp1d;
|
||||
case TGSI_TEXTURE_SHADOW1D:
|
||||
return &txp1ds;
|
||||
case TGSI_TEXTURE_2D:
|
||||
case TGSI_TEXTURE_RECT:
|
||||
order = (int8_t[4]){ 0, 1, 3, -1 };
|
||||
src_wrmask = TGSI_WRITEMASK_XYZ;
|
||||
break;
|
||||
return &txp2d;
|
||||
case TGSI_TEXTURE_SHADOW2D:
|
||||
case TGSI_TEXTURE_SHADOWRECT:
|
||||
return &txp2ds;
|
||||
case TGSI_TEXTURE_3D:
|
||||
case TGSI_TEXTURE_CUBE:
|
||||
order = (int8_t[4]){ 0, 1, 2, 3 };
|
||||
src_wrmask = TGSI_WRITEMASK_XYZW;
|
||||
flags |= IR3_INSTR_3D;
|
||||
break;
|
||||
return &txp3d;
|
||||
default:
|
||||
compile_error(ctx, "unknown texture type: %s\n",
|
||||
tgsi_texture_names[tex]);
|
||||
break;
|
||||
}
|
||||
flags |= IR3_INSTR_P;
|
||||
break;
|
||||
default:
|
||||
compile_assert(ctx, 0);
|
||||
break;
|
||||
}
|
||||
compile_assert(ctx, 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct tgsi_src_register *
|
||||
get_tex_coord(struct fd3_compile_context *ctx,
|
||||
struct tgsi_full_instruction *inst,
|
||||
const struct tex_info *tinf)
|
||||
{
|
||||
struct tgsi_src_register *coord = &inst->Src[0].Register;
|
||||
struct ir3_instruction *instr;
|
||||
unsigned tex = inst->Texture.Texture;
|
||||
bool needs_mov = false;
|
||||
unsigned i;
|
||||
|
||||
/* cat5 instruction cannot seem to handle const or relative: */
|
||||
if (is_rel_or_const(coord))
|
||||
needs_mov = true;
|
||||
|
||||
/* 1D textures we fix up w/ 0.0 as 2nd coord: */
|
||||
if ((tex == TGSI_TEXTURE_1D) || (tex == TGSI_TEXTURE_SHADOW1D))
|
||||
needs_mov = true;
|
||||
|
||||
/* The texture sample instructions need to coord in successive
|
||||
* registers/components (ie. src.xy but not src.yx). And TXP
|
||||
* needs the .w component in .z for 2D.. so in some cases we
|
||||
* might need to emit some mov instructions to shuffle things
|
||||
* around:
|
||||
*/
|
||||
for (i = 1; (i < 4) && (order[i] >= 0) && !needs_mov; i++)
|
||||
if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i]))
|
||||
for (i = 1; (i < 4) && (tinf->order[i] >= 0) && !needs_mov; i++)
|
||||
if (src_swiz(coord, i) != (src_swiz(coord, 0) + tinf->order[i]))
|
||||
needs_mov = true;
|
||||
|
||||
if (needs_mov) {
|
||||
@@ -1157,28 +1234,55 @@ trans_samp(const struct instr_translater *t,
|
||||
/* need to move things around: */
|
||||
tmp_src = get_internal_temp(ctx, &tmp_dst);
|
||||
|
||||
for (j = 0; (j < 4) && (order[j] >= 0); j++) {
|
||||
instr = instr_create(ctx, 1, 0);
|
||||
for (j = 0; j < 4; j++) {
|
||||
if (tinf->order[j] < 0)
|
||||
continue;
|
||||
instr = instr_create(ctx, 1, 0); /* mov */
|
||||
instr->cat1.src_type = type_mov;
|
||||
instr->cat1.dst_type = type_mov;
|
||||
add_dst_reg(ctx, instr, &tmp_dst, j);
|
||||
add_src_reg(ctx, instr, coord,
|
||||
src_swiz(coord, order[j]));
|
||||
src_swiz(coord, tinf->order[j]));
|
||||
}
|
||||
|
||||
/* fix up .y coord: */
|
||||
if ((tex == TGSI_TEXTURE_1D) ||
|
||||
(tex == TGSI_TEXTURE_SHADOW1D)) {
|
||||
instr = instr_create(ctx, 1, 0); /* mov */
|
||||
instr->cat1.src_type = type_mov;
|
||||
instr->cat1.dst_type = type_mov;
|
||||
add_dst_reg(ctx, instr, &tmp_dst, 1); /* .y */
|
||||
ir3_reg_create(instr, 0, IR3_REG_IMMED)->fim_val = 0.5;
|
||||
}
|
||||
|
||||
coord = tmp_src;
|
||||
}
|
||||
|
||||
return coord;
|
||||
}
|
||||
|
||||
static void
|
||||
trans_samp(const struct instr_translater *t,
|
||||
struct fd3_compile_context *ctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
struct tgsi_dst_register *dst = &inst->Dst[0].Register;
|
||||
struct tgsi_src_register *coord;
|
||||
struct tgsi_src_register *samp = &inst->Src[1].Register;
|
||||
const struct tex_info *tinf;
|
||||
|
||||
tinf = get_tex_info(ctx, inst);
|
||||
coord = get_tex_coord(ctx, inst, tinf);
|
||||
|
||||
instr = instr_create(ctx, 5, t->opc);
|
||||
instr->cat5.type = get_ftype(ctx);
|
||||
instr->cat5.samp = samp->Index;
|
||||
instr->cat5.tex = samp->Index;
|
||||
instr->flags |= flags;
|
||||
instr->flags |= tinf->flags;
|
||||
|
||||
add_dst_reg_wrmask(ctx, instr, &inst->Dst[0].Register, 0,
|
||||
inst->Dst[0].Register.WriteMask);
|
||||
|
||||
add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, src_wrmask);
|
||||
add_dst_reg_wrmask(ctx, instr, dst, 0, dst->WriteMask);
|
||||
add_src_reg_wrmask(ctx, instr, coord, coord->SwizzleX, tinf->src_wrmask);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1231,15 +1335,19 @@ trans_cmp(const struct instr_translater *t,
|
||||
|
||||
switch (t->tgsi_opc) {
|
||||
case TGSI_OPCODE_SEQ:
|
||||
case TGSI_OPCODE_FSEQ:
|
||||
condition = IR3_COND_EQ;
|
||||
break;
|
||||
case TGSI_OPCODE_SNE:
|
||||
case TGSI_OPCODE_FSNE:
|
||||
condition = IR3_COND_NE;
|
||||
break;
|
||||
case TGSI_OPCODE_SGE:
|
||||
case TGSI_OPCODE_FSGE:
|
||||
condition = IR3_COND_GE;
|
||||
break;
|
||||
case TGSI_OPCODE_SLT:
|
||||
case TGSI_OPCODE_FSLT:
|
||||
condition = IR3_COND_LT;
|
||||
break;
|
||||
case TGSI_OPCODE_SLE:
|
||||
@@ -1269,11 +1377,15 @@ trans_cmp(const struct instr_translater *t,
|
||||
|
||||
switch (t->tgsi_opc) {
|
||||
case TGSI_OPCODE_SEQ:
|
||||
case TGSI_OPCODE_FSEQ:
|
||||
case TGSI_OPCODE_SGE:
|
||||
case TGSI_OPCODE_FSGE:
|
||||
case TGSI_OPCODE_SLE:
|
||||
case TGSI_OPCODE_SNE:
|
||||
case TGSI_OPCODE_FSNE:
|
||||
case TGSI_OPCODE_SGT:
|
||||
case TGSI_OPCODE_SLT:
|
||||
case TGSI_OPCODE_FSLT:
|
||||
/* cov.u16f16 dst, tmp0 */
|
||||
instr = instr_create(ctx, 1, 0);
|
||||
instr->cat1.src_type = get_utype(ctx);
|
||||
@@ -1293,6 +1405,96 @@ trans_cmp(const struct instr_translater *t,
|
||||
put_dst(ctx, inst, dst);
|
||||
}
|
||||
|
||||
/*
|
||||
* USNE(a,b) = (a != b) ? 1 : 0
|
||||
* cmps.u32.ne dst, a, b
|
||||
*
|
||||
* USEQ(a,b) = (a == b) ? 1 : 0
|
||||
* cmps.u32.eq dst, a, b
|
||||
*
|
||||
* ISGE(a,b) = (a > b) ? 1 : 0
|
||||
* cmps.s32.ge dst, a, b
|
||||
*
|
||||
* USGE(a,b) = (a > b) ? 1 : 0
|
||||
* cmps.u32.ge dst, a, b
|
||||
*
|
||||
* ISLT(a,b) = (a < b) ? 1 : 0
|
||||
* cmps.s32.lt dst, a, b
|
||||
*
|
||||
* USLT(a,b) = (a < b) ? 1 : 0
|
||||
* cmps.u32.lt dst, a, b
|
||||
*
|
||||
* UCMP(a,b,c) = (a < 0) ? b : c
|
||||
* cmps.u32.lt tmp0, a, {0}
|
||||
* sel.b16 dst, b, tmp0, c
|
||||
*/
|
||||
static void
|
||||
trans_icmp(const struct instr_translater *t,
|
||||
struct fd3_compile_context *ctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
struct tgsi_dst_register *dst = get_dst(ctx, inst);
|
||||
struct tgsi_src_register constval0;
|
||||
struct tgsi_src_register *a0, *a1, *a2;
|
||||
unsigned condition;
|
||||
|
||||
a0 = &inst->Src[0].Register; /* a */
|
||||
a1 = &inst->Src[1].Register; /* b */
|
||||
|
||||
switch (t->tgsi_opc) {
|
||||
case TGSI_OPCODE_USNE:
|
||||
condition = IR3_COND_NE;
|
||||
break;
|
||||
case TGSI_OPCODE_USEQ:
|
||||
condition = IR3_COND_EQ;
|
||||
break;
|
||||
case TGSI_OPCODE_ISGE:
|
||||
case TGSI_OPCODE_USGE:
|
||||
condition = IR3_COND_GE;
|
||||
break;
|
||||
case TGSI_OPCODE_ISLT:
|
||||
case TGSI_OPCODE_USLT:
|
||||
condition = IR3_COND_LT;
|
||||
break;
|
||||
case TGSI_OPCODE_UCMP:
|
||||
get_immediate(ctx, &constval0, 0);
|
||||
a0 = &inst->Src[0].Register; /* a */
|
||||
a1 = &constval0; /* {0} */
|
||||
condition = IR3_COND_LT;
|
||||
break;
|
||||
|
||||
default:
|
||||
compile_assert(ctx, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_const(a0) && is_const(a1))
|
||||
a0 = get_unconst(ctx, a0);
|
||||
|
||||
if (t->tgsi_opc == TGSI_OPCODE_UCMP) {
|
||||
struct tgsi_dst_register tmp_dst;
|
||||
struct tgsi_src_register *tmp_src;
|
||||
tmp_src = get_internal_temp(ctx, &tmp_dst);
|
||||
/* cmps.u32.lt tmp, a0, a1 */
|
||||
instr = instr_create(ctx, 2, t->opc);
|
||||
instr->cat2.condition = condition;
|
||||
vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
|
||||
|
||||
a1 = &inst->Src[1].Register;
|
||||
a2 = &inst->Src[2].Register;
|
||||
/* sel.{b32,b16} dst, src2, tmp, src1 */
|
||||
instr = instr_create(ctx, 3, OPC_SEL_B32);
|
||||
vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
|
||||
} else {
|
||||
/* cmps.{u32,s32}.<cond> dst, a0, a1 */
|
||||
instr = instr_create(ctx, 2, t->opc);
|
||||
instr->cat2.condition = condition;
|
||||
vectorize(ctx, instr, dst, 2, a0, 0, a1, 0);
|
||||
}
|
||||
put_dst(ctx, inst, dst);
|
||||
}
|
||||
|
||||
/*
|
||||
* Conditional / Flow control
|
||||
*/
|
||||
@@ -1533,7 +1735,7 @@ trans_endif(const struct instr_translater *t,
|
||||
}
|
||||
|
||||
/*
|
||||
* Kill / Kill-if
|
||||
* Kill
|
||||
*/
|
||||
|
||||
static void
|
||||
@@ -1579,6 +1781,76 @@ trans_kill(const struct instr_translater *t,
|
||||
ctx->kill[ctx->kill_count++] = instr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Kill-If
|
||||
*/
|
||||
|
||||
static void
|
||||
trans_killif(const struct instr_translater *t,
|
||||
struct fd3_compile_context *ctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct tgsi_src_register *src = &inst->Src[0].Register;
|
||||
struct ir3_instruction *instr, *immed, *cond = NULL;
|
||||
bool inv = false;
|
||||
|
||||
immed = create_immed(ctx, 0.0);
|
||||
|
||||
/* cmps.f.ne p0.x, cond, {0.0} */
|
||||
instr = instr_create(ctx, 2, OPC_CMPS_F);
|
||||
instr->cat2.condition = IR3_COND_NE;
|
||||
ir3_reg_create(instr, regid(REG_P0, 0), 0);
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = immed;
|
||||
add_src_reg(ctx, instr, src, src->SwizzleX);
|
||||
|
||||
cond = instr;
|
||||
|
||||
/* kill p0.x */
|
||||
instr = instr_create(ctx, 0, OPC_KILL);
|
||||
instr->cat0.inv = inv;
|
||||
ir3_reg_create(instr, 0, 0); /* dummy dst */
|
||||
ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = cond;
|
||||
|
||||
ctx->kill[ctx->kill_count++] = instr;
|
||||
|
||||
}
|
||||
/*
|
||||
* I2F / U2F / F2I / F2U
|
||||
*/
|
||||
|
||||
static void
|
||||
trans_cov(const struct instr_translater *t,
|
||||
struct fd3_compile_context *ctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct ir3_instruction *instr;
|
||||
struct tgsi_dst_register *dst = get_dst(ctx, inst);
|
||||
struct tgsi_src_register *src = &inst->Src[0].Register;
|
||||
|
||||
// cov.f32s32 dst, tmp0 /
|
||||
instr = instr_create(ctx, 1, 0);
|
||||
switch (t->tgsi_opc) {
|
||||
case TGSI_OPCODE_U2F:
|
||||
instr->cat1.src_type = TYPE_U32;
|
||||
instr->cat1.dst_type = TYPE_F32;
|
||||
break;
|
||||
case TGSI_OPCODE_I2F:
|
||||
instr->cat1.src_type = TYPE_S32;
|
||||
instr->cat1.dst_type = TYPE_F32;
|
||||
break;
|
||||
case TGSI_OPCODE_F2U:
|
||||
instr->cat1.src_type = TYPE_F32;
|
||||
instr->cat1.dst_type = TYPE_U32;
|
||||
break;
|
||||
case TGSI_OPCODE_F2I:
|
||||
instr->cat1.src_type = TYPE_F32;
|
||||
instr->cat1.dst_type = TYPE_S32;
|
||||
break;
|
||||
|
||||
}
|
||||
vectorize(ctx, instr, dst, 1, src, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Handlers for TGSI instructions which do have 1:1 mapping to native
|
||||
* instructions:
|
||||
@@ -1616,9 +1888,11 @@ instr_cat2(const struct instr_translater *t,
|
||||
|
||||
switch (t->tgsi_opc) {
|
||||
case TGSI_OPCODE_ABS:
|
||||
case TGSI_OPCODE_IABS:
|
||||
src0_flags = IR3_REG_ABS;
|
||||
break;
|
||||
case TGSI_OPCODE_SUB:
|
||||
case TGSI_OPCODE_INEG:
|
||||
src1_flags = IR3_REG_NEGATE;
|
||||
break;
|
||||
}
|
||||
@@ -1724,6 +1998,22 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
|
||||
INSTR(SUB, instr_cat2, .opc = OPC_ADD_F),
|
||||
INSTR(MIN, instr_cat2, .opc = OPC_MIN_F),
|
||||
INSTR(MAX, instr_cat2, .opc = OPC_MAX_F),
|
||||
INSTR(UADD, instr_cat2, .opc = OPC_ADD_U),
|
||||
INSTR(IMIN, instr_cat2, .opc = OPC_MIN_S),
|
||||
INSTR(UMIN, instr_cat2, .opc = OPC_MIN_U),
|
||||
INSTR(IMAX, instr_cat2, .opc = OPC_MAX_S),
|
||||
INSTR(UMAX, instr_cat2, .opc = OPC_MAX_U),
|
||||
INSTR(AND, instr_cat2, .opc = OPC_AND_B),
|
||||
INSTR(OR, instr_cat2, .opc = OPC_OR_B),
|
||||
INSTR(NOT, instr_cat2, .opc = OPC_NOT_B),
|
||||
INSTR(XOR, instr_cat2, .opc = OPC_XOR_B),
|
||||
INSTR(UMUL, instr_cat2, .opc = OPC_MUL_U),
|
||||
INSTR(SHL, instr_cat2, .opc = OPC_SHL_B),
|
||||
INSTR(USHR, instr_cat2, .opc = OPC_SHR_B),
|
||||
INSTR(ISHR, instr_cat2, .opc = OPC_ASHR_B),
|
||||
INSTR(IABS, instr_cat2, .opc = OPC_ABSNEG_S),
|
||||
INSTR(INEG, instr_cat2, .opc = OPC_ABSNEG_S),
|
||||
INSTR(AND, instr_cat2, .opc = OPC_AND_B),
|
||||
INSTR(MAD, instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
|
||||
INSTR(TRUNC, instr_cat2, .opc = OPC_TRUNC_F),
|
||||
INSTR(CLAMP, trans_clamp),
|
||||
@@ -1741,16 +2031,33 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
|
||||
INSTR(TXP, trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
|
||||
INSTR(SGT, trans_cmp),
|
||||
INSTR(SLT, trans_cmp),
|
||||
INSTR(FSLT, trans_cmp),
|
||||
INSTR(SGE, trans_cmp),
|
||||
INSTR(FSGE, trans_cmp),
|
||||
INSTR(SLE, trans_cmp),
|
||||
INSTR(SNE, trans_cmp),
|
||||
INSTR(FSNE, trans_cmp),
|
||||
INSTR(SEQ, trans_cmp),
|
||||
INSTR(FSEQ, trans_cmp),
|
||||
INSTR(CMP, trans_cmp),
|
||||
INSTR(USNE, trans_icmp, .opc = OPC_CMPS_U),
|
||||
INSTR(USEQ, trans_icmp, .opc = OPC_CMPS_U),
|
||||
INSTR(ISGE, trans_icmp, .opc = OPC_CMPS_S),
|
||||
INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U),
|
||||
INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S),
|
||||
INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U),
|
||||
INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U),
|
||||
INSTR(IF, trans_if),
|
||||
INSTR(UIF, trans_if),
|
||||
INSTR(ELSE, trans_else),
|
||||
INSTR(ENDIF, trans_endif),
|
||||
INSTR(END, instr_cat0, .opc = OPC_END),
|
||||
INSTR(KILL, trans_kill, .opc = OPC_KILL),
|
||||
INSTR(KILL_IF, trans_killif, .opc = OPC_KILL),
|
||||
INSTR(I2F, trans_cov),
|
||||
INSTR(U2F, trans_cov),
|
||||
INSTR(F2I, trans_cov),
|
||||
INSTR(F2U, trans_cov),
|
||||
};
|
||||
|
||||
static fd3_semantic
|
||||
@@ -1935,6 +2242,8 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
|
||||
|
||||
DBG("decl in -> r%d", i);
|
||||
|
||||
compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
|
||||
|
||||
so->inputs[n].semantic = decl_semantic(&decl->Semantic);
|
||||
so->inputs[n].compmask = (1 << ncomp) - 1;
|
||||
so->inputs[n].regid = r;
|
||||
@@ -2024,6 +2333,8 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
|
||||
|
||||
ncomp = 4;
|
||||
|
||||
compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
|
||||
|
||||
so->outputs[n].semantic = decl_semantic(&decl->Semantic);
|
||||
so->outputs[n].regid = regid(i, comp);
|
||||
|
||||
@@ -2147,6 +2458,7 @@ compile_instructions(struct fd3_compile_context *ctx)
|
||||
struct tgsi_full_immediate *imm =
|
||||
&ctx->parser.FullToken.FullImmediate;
|
||||
unsigned n = ctx->so->immediates_count++;
|
||||
compile_assert(ctx, n < ARRAY_SIZE(ctx->so->immediates));
|
||||
memcpy(ctx->so->immediates[n].val, imm->u, 16);
|
||||
break;
|
||||
}
|
||||
|
@@ -1324,6 +1324,8 @@ decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
|
||||
|
||||
DBG("decl in -> r%d", i + base); // XXX
|
||||
|
||||
compile_assert(ctx, n < ARRAY_SIZE(so->inputs));
|
||||
|
||||
so->inputs[n].semantic = decl_semantic(&decl->Semantic);
|
||||
so->inputs[n].compmask = (1 << ncomp) - 1;
|
||||
so->inputs[n].ncomp = ncomp;
|
||||
@@ -1410,6 +1412,7 @@ decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
|
||||
|
||||
for (i = decl->Range.First; i <= decl->Range.Last; i++) {
|
||||
unsigned n = so->outputs_count++;
|
||||
compile_assert(ctx, n < ARRAY_SIZE(so->outputs));
|
||||
so->outputs[n].semantic = decl_semantic(&decl->Semantic);
|
||||
so->outputs[n].regid = regid(i + base, comp);
|
||||
}
|
||||
|
@@ -33,6 +33,7 @@
|
||||
#include "fd3_emit.h"
|
||||
#include "fd3_gmem.h"
|
||||
#include "fd3_program.h"
|
||||
#include "fd3_query.h"
|
||||
#include "fd3_rasterizer.h"
|
||||
#include "fd3_texture.h"
|
||||
#include "fd3_zsa.h"
|
||||
@@ -134,5 +135,7 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
|
||||
fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
|
||||
fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
|
||||
|
||||
fd3_query_context_init(pctx);
|
||||
|
||||
return pctx;
|
||||
}
|
||||
|
@@ -406,7 +406,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
|
||||
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));
|
||||
|
||||
for (i = 0, j = -1; j < (int)fp->inputs_count; i++) {
|
||||
for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) {
|
||||
uint32_t reg = 0;
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1);
|
||||
@@ -428,7 +428,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, reg);
|
||||
}
|
||||
|
||||
for (i = 0, j = -1; j < (int)fp->inputs_count; i++) {
|
||||
for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) {
|
||||
uint32_t reg = 0;
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1);
|
||||
|
@@ -91,7 +91,7 @@ struct fd3_shader_variant {
|
||||
struct {
|
||||
fd3_semantic semantic;
|
||||
uint8_t regid;
|
||||
} outputs[16];
|
||||
} outputs[16 + 2]; /* +POSITION +PSIZE */
|
||||
bool writes_pos, writes_psize;
|
||||
|
||||
/* vertices/inputs: */
|
||||
@@ -104,7 +104,7 @@ struct fd3_shader_variant {
|
||||
/* in theory inloc of fs should match outloc of vs: */
|
||||
uint8_t inloc;
|
||||
uint8_t bary;
|
||||
} inputs[16];
|
||||
} inputs[16 + 2]; /* +POSITION +FACE */
|
||||
|
||||
unsigned total_in; /* sum of inputs (scalar) */
|
||||
|
||||
|
139
src/gallium/drivers/freedreno/a3xx/fd3_query.c
Normal file
139
src/gallium/drivers/freedreno/a3xx/fd3_query.c
Normal file
@@ -0,0 +1,139 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
#include "fd3_query.h"
|
||||
#include "fd3_util.h"
|
||||
|
||||
|
||||
struct fd_rb_samp_ctrs {
|
||||
uint64_t ctr[16];
|
||||
};
|
||||
|
||||
/*
|
||||
* Occlusion Query:
|
||||
*
|
||||
* OCCLUSION_COUNTER and OCCLUSION_PREDICATE differ only in how they
|
||||
* interpret results
|
||||
*/
|
||||
|
||||
static struct fd_hw_sample *
|
||||
occlusion_get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
struct fd_hw_sample *samp =
|
||||
fd_hw_sample_init(ctx, sizeof(struct fd_rb_samp_ctrs));
|
||||
|
||||
/* Set RB_SAMPLE_COUNT_ADDR to samp->offset plus value of
|
||||
* HW_QUERY_BASE_REG register:
|
||||
*/
|
||||
OUT_PKT3(ring, CP_SET_CONSTANT, 3);
|
||||
OUT_RING(ring, CP_REG(REG_A3XX_RB_SAMPLE_COUNT_ADDR) | 0x80000000);
|
||||
OUT_RING(ring, HW_QUERY_BASE_REG);
|
||||
OUT_RING(ring, samp->offset);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
|
||||
OUT_RING(ring, A3XX_RB_SAMPLE_COUNT_CONTROL_COPY);
|
||||
|
||||
OUT_PKT3(ring, CP_DRAW_INDX, 3);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, DRAW(DI_PT_POINTLIST_A2XX, DI_SRC_SEL_AUTO_INDEX,
|
||||
INDEX_SIZE_IGN, USE_VISIBILITY));
|
||||
OUT_RING(ring, 0); /* NumIndices */
|
||||
|
||||
OUT_PKT3(ring, CP_EVENT_WRITE, 1);
|
||||
OUT_RING(ring, ZPASS_DONE);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_RBBM_PERFCTR_CTL, 1);
|
||||
OUT_RING(ring, A3XX_RBBM_PERFCTR_CTL_ENABLE);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VBIF_PERF_CNT_EN, 1);
|
||||
OUT_RING(ring, A3XX_VBIF_PERF_CNT_EN_CNT0 |
|
||||
A3XX_VBIF_PERF_CNT_EN_CNT1 |
|
||||
A3XX_VBIF_PERF_CNT_EN_PWRCNT0 |
|
||||
A3XX_VBIF_PERF_CNT_EN_PWRCNT1 |
|
||||
A3XX_VBIF_PERF_CNT_EN_PWRCNT2);
|
||||
|
||||
return samp;
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
count_samples(const struct fd_rb_samp_ctrs *start,
|
||||
const struct fd_rb_samp_ctrs *end)
|
||||
{
|
||||
uint64_t n = 0;
|
||||
unsigned i;
|
||||
|
||||
/* not quite sure what all of these are, possibly different
|
||||
* counters for each MRT render target:
|
||||
*/
|
||||
for (i = 0; i < 16; i += 4)
|
||||
n += end->ctr[i] - start->ctr[i];
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
static void
|
||||
occlusion_counter_accumulate_result(struct fd_context *ctx,
|
||||
const void *start, const void *end,
|
||||
union pipe_query_result *result)
|
||||
{
|
||||
uint64_t n = count_samples(start, end);
|
||||
result->u64 += n;
|
||||
}
|
||||
|
||||
static void
|
||||
occlusion_predicate_accumulate_result(struct fd_context *ctx,
|
||||
const void *start, const void *end,
|
||||
union pipe_query_result *result)
|
||||
{
|
||||
uint64_t n = count_samples(start, end);
|
||||
result->b |= (n > 0);
|
||||
}
|
||||
|
||||
static const struct fd_hw_sample_provider occlusion_counter = {
|
||||
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
|
||||
.active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */
|
||||
.get_sample = occlusion_get_sample,
|
||||
.accumulate_result = occlusion_counter_accumulate_result,
|
||||
};
|
||||
|
||||
static const struct fd_hw_sample_provider occlusion_predicate = {
|
||||
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
|
||||
.active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */
|
||||
.get_sample = occlusion_get_sample,
|
||||
.accumulate_result = occlusion_predicate_accumulate_result,
|
||||
};
|
||||
|
||||
void fd3_query_context_init(struct pipe_context *pctx)
|
||||
{
|
||||
fd_hw_query_register_provider(pctx, &occlusion_counter);
|
||||
fd_hw_query_register_provider(pctx, &occlusion_predicate);
|
||||
}
|
36
src/gallium/drivers/freedreno/a3xx/fd3_query.h
Normal file
36
src/gallium/drivers/freedreno/a3xx/fd3_query.h
Normal file
@@ -0,0 +1,36 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#ifndef FD3_QUERY_H_
|
||||
#define FD3_QUERY_H_
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
void fd3_query_context_init(struct pipe_context *pctx);
|
||||
|
||||
#endif /* FD3_QUERY_H_ */
|
@@ -40,6 +40,7 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
|
||||
const struct pipe_rasterizer_state *cso)
|
||||
{
|
||||
struct fd3_rasterizer_stateobj *so;
|
||||
float psize_min, psize_max;
|
||||
|
||||
so = CALLOC_STRUCT(fd3_rasterizer_stateobj);
|
||||
if (!so)
|
||||
@@ -47,19 +48,28 @@ fd3_rasterizer_state_create(struct pipe_context *pctx,
|
||||
|
||||
so->base = *cso;
|
||||
|
||||
if (cso->point_size_per_vertex) {
|
||||
psize_min = util_get_min_point_size(cso);
|
||||
psize_max = 8192;
|
||||
} else {
|
||||
/* Force the point size to be as if the vertex output was disabled. */
|
||||
psize_min = cso->point_size;
|
||||
psize_max = cso->point_size;
|
||||
}
|
||||
|
||||
/*
|
||||
if (cso->line_stipple_enable) {
|
||||
??? TODO line stipple
|
||||
}
|
||||
TODO cso->half_pixel_center
|
||||
TODO cso->point_size
|
||||
TODO psize_min/psize_max
|
||||
if (cso->multisample)
|
||||
TODO
|
||||
*/
|
||||
so->gras_cl_clip_cntl = A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER; /* ??? */
|
||||
so->gras_su_point_minmax = 0xffc00010; /* ??? */
|
||||
so->gras_su_point_size = 0x00000008; /* ??? */
|
||||
so->gras_su_point_minmax =
|
||||
A3XX_GRAS_SU_POINT_MINMAX_MIN(psize_min/2) |
|
||||
A3XX_GRAS_SU_POINT_MINMAX_MAX(psize_max/2);
|
||||
so->gras_su_point_size = A3XX_GRAS_SU_POINT_SIZE(cso->point_size/2);
|
||||
so->gras_su_poly_offset_scale =
|
||||
A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(cso->offset_scale);
|
||||
so->gras_su_poly_offset_offset =
|
||||
|
@@ -30,6 +30,7 @@
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_format.h"
|
||||
|
||||
#include "fd3_texture.h"
|
||||
#include "fd3_util.h"
|
||||
@@ -99,6 +100,9 @@ fd3_sampler_state_create(struct pipe_context *pctx,
|
||||
A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
|
||||
A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
|
||||
|
||||
if (cso->compare_mode)
|
||||
so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
|
||||
|
||||
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
|
||||
so->texsamp1 =
|
||||
A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
|
||||
@@ -158,6 +162,10 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
A3XX_TEX_CONST_0_MIPLVLS(miplevels) |
|
||||
fd3_tex_swiz(cso->format, cso->swizzle_r, cso->swizzle_g,
|
||||
cso->swizzle_b, cso->swizzle_a);
|
||||
|
||||
if (util_format_is_srgb(cso->format))
|
||||
so->texconst0 |= A3XX_TEX_CONST_0_SRGB;
|
||||
|
||||
so->texconst1 =
|
||||
A3XX_TEX_CONST_1_FETCHSIZE(fd3_pipe2fetchsize(cso->format)) |
|
||||
A3XX_TEX_CONST_1_WIDTH(prsc->width0) |
|
||||
|
@@ -235,6 +235,10 @@ fd3_pipe2tex(enum pipe_format format)
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8A8_SRGB:
|
||||
case PIPE_FORMAT_B8G8R8X8_SRGB:
|
||||
case PIPE_FORMAT_R8G8B8A8_SRGB:
|
||||
case PIPE_FORMAT_R8G8B8X8_SRGB:
|
||||
return TFMT_NORM_UINT_8_8_8_8;
|
||||
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
@@ -275,6 +279,12 @@ fd3_pipe2fetchsize(enum pipe_format format)
|
||||
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8A8_SRGB:
|
||||
case PIPE_FORMAT_B8G8R8X8_SRGB:
|
||||
case PIPE_FORMAT_R8G8B8A8_SRGB:
|
||||
case PIPE_FORMAT_R8G8B8X8_SRGB:
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return TFETCH_4_BYTE;
|
||||
@@ -379,14 +389,14 @@ fd3_tex_swiz(enum pipe_format format, unsigned swizzle_r, unsigned swizzle_g,
|
||||
{
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
uint8_t swiz[] = {
|
||||
unsigned char swiz[4] = {
|
||||
swizzle_r, swizzle_g, swizzle_b, swizzle_a,
|
||||
PIPE_SWIZZLE_ZERO, PIPE_SWIZZLE_ONE,
|
||||
PIPE_SWIZZLE_ONE, PIPE_SWIZZLE_ONE,
|
||||
};
|
||||
}, rswiz[4];
|
||||
|
||||
return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(swiz[desc->swizzle[0]])) |
|
||||
A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(swiz[desc->swizzle[1]])) |
|
||||
A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(swiz[desc->swizzle[2]])) |
|
||||
A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(swiz[desc->swizzle[3]]));
|
||||
util_format_compose_swizzles(desc->swizzle, swiz, rswiz);
|
||||
|
||||
return A3XX_TEX_CONST_0_SWIZ_X(tex_swiz(rswiz[0])) |
|
||||
A3XX_TEX_CONST_0_SWIZ_Y(tex_swiz(rswiz[1])) |
|
||||
A3XX_TEX_CONST_0_SWIZ_Z(tex_swiz(rswiz[2])) |
|
||||
A3XX_TEX_CONST_0_SWIZ_W(tex_swiz(rswiz[3]));
|
||||
}
|
||||
|
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -116,6 +116,39 @@ enum adreno_rb_depth_format {
|
||||
DEPTHX_24_8 = 1,
|
||||
};
|
||||
|
||||
enum adreno_rb_copy_control_mode {
|
||||
RB_COPY_RESOLVE = 1,
|
||||
RB_COPY_CLEAR = 2,
|
||||
RB_COPY_DEPTH_STENCIL = 5,
|
||||
};
|
||||
|
||||
enum a3xx_render_mode {
|
||||
RB_RENDERING_PASS = 0,
|
||||
RB_TILING_PASS = 1,
|
||||
RB_RESOLVE_PASS = 2,
|
||||
RB_COMPUTE_PASS = 3,
|
||||
};
|
||||
|
||||
enum a3xx_msaa_samples {
|
||||
MSAA_ONE = 0,
|
||||
MSAA_TWO = 1,
|
||||
MSAA_FOUR = 2,
|
||||
};
|
||||
|
||||
enum a3xx_threadmode {
|
||||
MULTI = 0,
|
||||
SINGLE = 1,
|
||||
};
|
||||
|
||||
enum a3xx_instrbuffermode {
|
||||
BUFFER = 1,
|
||||
};
|
||||
|
||||
enum a3xx_threadsize {
|
||||
TWO_QUADS = 0,
|
||||
FOUR_QUADS = 1,
|
||||
};
|
||||
|
||||
#define REG_AXXX_CP_RB_BASE 0x000001c0
|
||||
|
||||
#define REG_AXXX_CP_RB_CNTL 0x000001c1
|
||||
|
@@ -10,11 +10,11 @@ git clone https://github.com/freedreno/envytools.git
|
||||
The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32840 bytes, from 2014-01-05 14:44:21)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9009 bytes, from 2014-01-11 16:56:35)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 12362 bytes, from 2014-01-07 14:47:36)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 56545 bytes, from 2014-02-26 16:32:11)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 8344 bytes, from 2013-11-30 14:49:47)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32580 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10186 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-05-16 11:51:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 57831 bytes, from 2014-05-19 21:02:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 26293 bytes, from 2014-05-16 11:51:57)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -164,6 +164,11 @@ enum adreno_pm4_type3_packets {
|
||||
CP_SET_BIN = 76,
|
||||
CP_TEST_TWO_MEMS = 113,
|
||||
CP_WAIT_FOR_ME = 19,
|
||||
CP_SET_DRAW_STATE = 67,
|
||||
CP_DRAW_INDX_OFFSET = 56,
|
||||
CP_DRAW_INDIRECT = 40,
|
||||
CP_DRAW_INDX_INDIRECT = 41,
|
||||
CP_DRAW_AUTO = 36,
|
||||
IN_IB_PREFETCH_END = 23,
|
||||
IN_SUBBLK_PREFETCH = 31,
|
||||
IN_INSTR_PREFETCH = 32,
|
||||
@@ -351,6 +356,93 @@ static inline uint32_t CP_DRAW_INDX_2_2_NUM_INDICES(uint32_t val)
|
||||
return ((val) << CP_DRAW_INDX_2_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_2_2_NUM_INDICES__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_DRAW_INDX_OFFSET_0 0x00000000
|
||||
#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK 0x0000003f
|
||||
#define CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT 0
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(enum pc_di_primtype val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__SHIFT) & CP_DRAW_INDX_OFFSET_0_PRIM_TYPE__MASK;
|
||||
}
|
||||
#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK 0x000000c0
|
||||
#define CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT 6
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(enum pc_di_src_sel val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__SHIFT) & CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT__MASK;
|
||||
}
|
||||
#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK 0x00000700
|
||||
#define CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT 8
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_0_VIS_CULL(enum pc_di_vis_cull_mode val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_0_VIS_CULL__SHIFT) & CP_DRAW_INDX_OFFSET_0_VIS_CULL__MASK;
|
||||
}
|
||||
#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK 0x00000800
|
||||
#define CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT 11
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_0_INDEX_SIZE(enum pc_di_index_size val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_0_INDEX_SIZE__MASK;
|
||||
}
|
||||
#define CP_DRAW_INDX_OFFSET_0_NOT_EOP 0x00001000
|
||||
#define CP_DRAW_INDX_OFFSET_0_SMALL_INDEX 0x00002000
|
||||
#define CP_DRAW_INDX_OFFSET_0_PRE_DRAW_INITIATOR_ENABLE 0x00004000
|
||||
#define CP_DRAW_INDX_OFFSET_0_NUM_INDICES__MASK 0xffff0000
|
||||
#define CP_DRAW_INDX_OFFSET_0_NUM_INDICES__SHIFT 16
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_0_NUM_INDICES(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_0_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_0_NUM_INDICES__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_DRAW_INDX_OFFSET_1 0x00000001
|
||||
|
||||
#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002
|
||||
#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK 0xffffffff
|
||||
#define CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT 0
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_2_NUM_INDICES(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_2_NUM_INDICES__SHIFT) & CP_DRAW_INDX_OFFSET_2_NUM_INDICES__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002
|
||||
#define CP_DRAW_INDX_OFFSET_2_INDX_BASE__MASK 0xffffffff
|
||||
#define CP_DRAW_INDX_OFFSET_2_INDX_BASE__SHIFT 0
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_2_INDX_BASE(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_2_INDX_BASE__SHIFT) & CP_DRAW_INDX_OFFSET_2_INDX_BASE__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_DRAW_INDX_OFFSET_2 0x00000002
|
||||
#define CP_DRAW_INDX_OFFSET_2_INDX_SIZE__MASK 0xffffffff
|
||||
#define CP_DRAW_INDX_OFFSET_2_INDX_SIZE__SHIFT 0
|
||||
static inline uint32_t CP_DRAW_INDX_OFFSET_2_INDX_SIZE(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_DRAW_INDX_OFFSET_2_INDX_SIZE__SHIFT) & CP_DRAW_INDX_OFFSET_2_INDX_SIZE__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_SET_DRAW_STATE_0 0x00000000
|
||||
#define CP_SET_DRAW_STATE_0_COUNT__MASK 0x0000ffff
|
||||
#define CP_SET_DRAW_STATE_0_COUNT__SHIFT 0
|
||||
static inline uint32_t CP_SET_DRAW_STATE_0_COUNT(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_SET_DRAW_STATE_0_COUNT__SHIFT) & CP_SET_DRAW_STATE_0_COUNT__MASK;
|
||||
}
|
||||
#define CP_SET_DRAW_STATE_0_DIRTY 0x00010000
|
||||
#define CP_SET_DRAW_STATE_0_DISABLE 0x00020000
|
||||
#define CP_SET_DRAW_STATE_0_DISABLE_ALL_GROUPS 0x00040000
|
||||
#define CP_SET_DRAW_STATE_0_LOAD_IMMED 0x00080000
|
||||
#define CP_SET_DRAW_STATE_0_GROUP_ID__MASK 0x1f000000
|
||||
#define CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT 24
|
||||
static inline uint32_t CP_SET_DRAW_STATE_0_GROUP_ID(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_SET_DRAW_STATE_0_GROUP_ID__SHIFT) & CP_SET_DRAW_STATE_0_GROUP_ID__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_SET_DRAW_STATE_1 0x00000001
|
||||
#define CP_SET_DRAW_STATE_1_ADDR__MASK 0xffffffff
|
||||
#define CP_SET_DRAW_STATE_1_ADDR__SHIFT 0
|
||||
static inline uint32_t CP_SET_DRAW_STATE_1_ADDR(uint32_t val)
|
||||
{
|
||||
return ((val) << CP_SET_DRAW_STATE_1_ADDR__SHIFT) & CP_SET_DRAW_STATE_1_ADDR__MASK;
|
||||
}
|
||||
|
||||
#define REG_CP_SET_BIN_0 0x00000000
|
||||
|
||||
#define REG_CP_SET_BIN_1 0x00000001
|
||||
|
@@ -34,6 +34,7 @@
|
||||
#include "freedreno_state.h"
|
||||
#include "freedreno_gmem.h"
|
||||
#include "freedreno_query.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
static struct fd_ringbuffer *next_rb(struct fd_context *ctx)
|
||||
@@ -145,6 +146,7 @@ fd_context_destroy(struct pipe_context *pctx)
|
||||
DBG("");
|
||||
|
||||
fd_prog_fini(pctx);
|
||||
fd_hw_query_fini(pctx);
|
||||
|
||||
util_slab_destroy(&ctx->transfer_pool);
|
||||
|
||||
@@ -221,6 +223,7 @@ fd_context_init(struct fd_context *ctx, struct pipe_screen *pscreen,
|
||||
fd_query_context_init(pctx);
|
||||
fd_texture_init(pctx);
|
||||
fd_state_init(pctx);
|
||||
fd_hw_query_init(pctx);
|
||||
|
||||
ctx->blitter = util_blitter_create(pctx);
|
||||
if (!ctx->blitter)
|
||||
|
@@ -33,6 +33,7 @@
|
||||
#include "pipe/p_context.h"
|
||||
#include "indices/u_primconvert.h"
|
||||
#include "util/u_blitter.h"
|
||||
#include "util/u_double_list.h"
|
||||
#include "util/u_slab.h"
|
||||
#include "util/u_string.h"
|
||||
|
||||
@@ -82,16 +83,80 @@ struct fd_vertex_stateobj {
|
||||
unsigned num_elements;
|
||||
};
|
||||
|
||||
/* Bitmask of stages in rendering that a particular query query is
|
||||
* active. Queries will be automatically started/stopped (generating
|
||||
* additional fd_hw_sample_period's) on entrance/exit from stages that
|
||||
* are applicable to the query.
|
||||
*
|
||||
* NOTE: set the stage to NULL at end of IB to ensure no query is still
|
||||
* active. Things aren't going to work out the way you want if a query
|
||||
* is active across IB's (or between tile IB and draw IB)
|
||||
*/
|
||||
enum fd_render_stage {
|
||||
FD_STAGE_NULL = 0x00,
|
||||
FD_STAGE_DRAW = 0x01,
|
||||
FD_STAGE_CLEAR = 0x02,
|
||||
/* TODO before queries which include MEM2GMEM or GMEM2MEM will
|
||||
* work we will need to call fd_hw_query_prepare() from somewhere
|
||||
* appropriate so that queries in the tiling IB get backed with
|
||||
* memory to write results to.
|
||||
*/
|
||||
FD_STAGE_MEM2GMEM = 0x04,
|
||||
FD_STAGE_GMEM2MEM = 0x08,
|
||||
/* used for driver internal draws (ie. util_blitter_blit()): */
|
||||
FD_STAGE_BLIT = 0x10,
|
||||
};
|
||||
|
||||
#define MAX_HW_SAMPLE_PROVIDERS 4
|
||||
struct fd_hw_sample_provider;
|
||||
struct fd_hw_sample;
|
||||
|
||||
struct fd_context {
|
||||
struct pipe_context base;
|
||||
|
||||
struct fd_device *dev;
|
||||
struct fd_screen *screen;
|
||||
|
||||
struct blitter_context *blitter;
|
||||
struct primconvert_context *primconvert;
|
||||
|
||||
/* slab for pipe_transfer allocations: */
|
||||
struct util_slab_mempool transfer_pool;
|
||||
|
||||
/* slabs for fd_hw_sample and fd_hw_sample_period allocations: */
|
||||
struct util_slab_mempool sample_pool;
|
||||
struct util_slab_mempool sample_period_pool;
|
||||
|
||||
/* next sample offset.. incremented for each sample in the batch/
|
||||
* submit, reset to zero on next submit.
|
||||
*/
|
||||
uint32_t next_sample_offset;
|
||||
|
||||
/* sample-providers for hw queries: */
|
||||
const struct fd_hw_sample_provider *sample_providers[MAX_HW_SAMPLE_PROVIDERS];
|
||||
|
||||
/* cached samples (in case multiple queries need to reference
|
||||
* the same sample snapshot)
|
||||
*/
|
||||
struct fd_hw_sample *sample_cache[MAX_HW_SAMPLE_PROVIDERS];
|
||||
|
||||
/* tracking for current stage, to know when to start/stop
|
||||
* any active queries:
|
||||
*/
|
||||
enum fd_render_stage stage;
|
||||
|
||||
/* list of active queries: */
|
||||
struct list_head active_queries;
|
||||
|
||||
/* list of queries that are not active, but were active in the
|
||||
* current submit:
|
||||
*/
|
||||
struct list_head current_queries;
|
||||
|
||||
/* current query result bo and tile stride: */
|
||||
struct fd_bo *query_bo;
|
||||
uint32_t query_tile_stride;
|
||||
|
||||
/* table with PIPE_PRIM_MAX entries mapping PIPE_PRIM_x to
|
||||
* DI_PT_x value to use for draw initiator. There are some
|
||||
* slight differences between generation:
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_state.h"
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
|
||||
@@ -70,7 +71,7 @@ fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
idx_bo = fd_resource(idx->buffer)->bo;
|
||||
idx_type = size2indextype(idx->index_size);
|
||||
idx_size = idx->index_size * info->count;
|
||||
idx_offset = idx->offset;
|
||||
idx_offset = idx->offset + (info->start * idx->index_size);
|
||||
src_sel = DI_SRC_SEL_DMA;
|
||||
} else {
|
||||
idx_bo = NULL;
|
||||
@@ -156,6 +157,7 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
/* and any buffers used, need to be resolved: */
|
||||
ctx->resolve |= buffers;
|
||||
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
|
||||
ctx->draw(ctx, info);
|
||||
}
|
||||
|
||||
@@ -188,6 +190,8 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
|
||||
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
|
||||
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
|
||||
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_CLEAR);
|
||||
|
||||
ctx->clear(ctx, buffers, color, depth, stencil);
|
||||
|
||||
ctx->dirty |= FD_DIRTY_ZSA |
|
||||
|
@@ -35,6 +35,7 @@
|
||||
#include "freedreno_gmem.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
/*
|
||||
@@ -273,17 +274,24 @@ render_tiles(struct fd_context *ctx)
|
||||
|
||||
ctx->emit_tile_prep(ctx, tile);
|
||||
|
||||
if (ctx->restore)
|
||||
if (ctx->restore) {
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM);
|
||||
ctx->emit_tile_mem2gmem(ctx, tile);
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
|
||||
}
|
||||
|
||||
ctx->emit_tile_renderprep(ctx, tile);
|
||||
|
||||
fd_hw_query_prepare_tile(ctx, i, ctx->ring);
|
||||
|
||||
/* emit IB to drawcmds: */
|
||||
OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
|
||||
fd_reset_wfi(ctx);
|
||||
|
||||
/* emit gmem2mem to transfer tile back to system memory: */
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM);
|
||||
ctx->emit_tile_gmem2mem(ctx, tile);
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -292,6 +300,8 @@ render_sysmem(struct fd_context *ctx)
|
||||
{
|
||||
ctx->emit_sysmem_prep(ctx);
|
||||
|
||||
fd_hw_query_prepare_tile(ctx, 0, ctx->ring);
|
||||
|
||||
/* emit IB to drawcmds: */
|
||||
OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
|
||||
fd_reset_wfi(ctx);
|
||||
@@ -314,6 +324,11 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
|
||||
}
|
||||
}
|
||||
|
||||
/* close out the draw cmds by making sure any active queries are
|
||||
* paused:
|
||||
*/
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
|
||||
|
||||
/* mark the end of the clear/draw cmds before emitting per-tile cmds: */
|
||||
fd_ringmarker_mark(ctx->draw_end);
|
||||
fd_ringmarker_mark(ctx->binning_end);
|
||||
@@ -326,6 +341,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
|
||||
DBG("rendering sysmem (%s/%s)",
|
||||
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
|
||||
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
|
||||
fd_hw_query_prepare(ctx, 1);
|
||||
render_sysmem(ctx);
|
||||
ctx->stats.batch_sysmem++;
|
||||
} else {
|
||||
@@ -334,6 +350,7 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
|
||||
DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
|
||||
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
|
||||
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
|
||||
fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
|
||||
render_tiles(ctx);
|
||||
ctx->stats.batch_gmem++;
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; ttxab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
|
||||
* Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -27,63 +27,27 @@
|
||||
*/
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "os/os_time.h"
|
||||
|
||||
#include "freedreno_query.h"
|
||||
#include "freedreno_query_sw.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
|
||||
#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */
|
||||
#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */
|
||||
#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */
|
||||
#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */
|
||||
|
||||
/* Currently just simple cpu query's supported.. probably need
|
||||
* to refactor this a bit when I'm eventually ready to add gpu
|
||||
* queries:
|
||||
/*
|
||||
* Pipe Query interface:
|
||||
*/
|
||||
struct fd_query {
|
||||
int type;
|
||||
/* storage for the collected data */
|
||||
union pipe_query_result data;
|
||||
bool active;
|
||||
uint64_t begin_value, end_value;
|
||||
uint64_t begin_time, end_time;
|
||||
};
|
||||
|
||||
static inline struct fd_query *
|
||||
fd_query(struct pipe_query *pq)
|
||||
{
|
||||
return (struct fd_query *)pq;
|
||||
}
|
||||
|
||||
static struct pipe_query *
|
||||
fd_create_query(struct pipe_context *pctx, unsigned query_type)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_query *q;
|
||||
|
||||
switch (query_type) {
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case FD_QUERY_DRAW_CALLS:
|
||||
case FD_QUERY_BATCH_TOTAL:
|
||||
case FD_QUERY_BATCH_SYSMEM:
|
||||
case FD_QUERY_BATCH_GMEM:
|
||||
case FD_QUERY_BATCH_RESTORE:
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
q = CALLOC_STRUCT(fd_query);
|
||||
q = fd_sw_create_query(ctx, query_type);
|
||||
if (!q)
|
||||
return NULL;
|
||||
|
||||
q->type = query_type;
|
||||
q = fd_hw_create_query(ctx, query_type);
|
||||
|
||||
return (struct pipe_query *) q;
|
||||
}
|
||||
@@ -92,64 +56,21 @@ static void
|
||||
fd_destroy_query(struct pipe_context *pctx, struct pipe_query *pq)
|
||||
{
|
||||
struct fd_query *q = fd_query(pq);
|
||||
free(q);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_counter(struct pipe_context *pctx, int type)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
switch (type) {
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
/* for now same thing as _PRIMITIVES_EMITTED */
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
return ctx->stats.prims_emitted;
|
||||
case FD_QUERY_DRAW_CALLS:
|
||||
return ctx->stats.draw_calls;
|
||||
case FD_QUERY_BATCH_TOTAL:
|
||||
return ctx->stats.batch_total;
|
||||
case FD_QUERY_BATCH_SYSMEM:
|
||||
return ctx->stats.batch_sysmem;
|
||||
case FD_QUERY_BATCH_GMEM:
|
||||
return ctx->stats.batch_gmem;
|
||||
case FD_QUERY_BATCH_RESTORE:
|
||||
return ctx->stats.batch_restore;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_rate_query(struct fd_query *q)
|
||||
{
|
||||
switch (q->type) {
|
||||
case FD_QUERY_BATCH_TOTAL:
|
||||
case FD_QUERY_BATCH_SYSMEM:
|
||||
case FD_QUERY_BATCH_GMEM:
|
||||
case FD_QUERY_BATCH_RESTORE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
q->funcs->destroy_query(fd_context(pctx), q);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_begin_query(struct pipe_context *pctx, struct pipe_query *pq)
|
||||
{
|
||||
struct fd_query *q = fd_query(pq);
|
||||
q->active = true;
|
||||
q->begin_value = read_counter(pctx, q->type);
|
||||
if (is_rate_query(q))
|
||||
q->begin_time = os_time_get();
|
||||
q->funcs->begin_query(fd_context(pctx), q);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_end_query(struct pipe_context *pctx, struct pipe_query *pq)
|
||||
{
|
||||
struct fd_query *q = fd_query(pq);
|
||||
q->active = false;
|
||||
q->end_value = read_counter(pctx, q->type);
|
||||
if (is_rate_query(q))
|
||||
q->end_time = os_time_get();
|
||||
q->funcs->end_query(fd_context(pctx), q);
|
||||
}
|
||||
|
||||
static boolean
|
||||
@@ -157,21 +78,7 @@ fd_get_query_result(struct pipe_context *pctx, struct pipe_query *pq,
|
||||
boolean wait, union pipe_query_result *result)
|
||||
{
|
||||
struct fd_query *q = fd_query(pq);
|
||||
|
||||
if (q->active)
|
||||
return false;
|
||||
|
||||
util_query_clear_result(result, q->type);
|
||||
|
||||
result->u64 = q->end_value - q->begin_value;
|
||||
|
||||
if (is_rate_query(q)) {
|
||||
double fps = (result->u64 * 1000000) /
|
||||
(double)(q->end_time - q->begin_time);
|
||||
result->u64 = (uint64_t)fps;
|
||||
}
|
||||
|
||||
return true;
|
||||
return q->funcs->get_query_result(fd_context(pctx), q, wait, result);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
|
||||
* Copyright (C) 2013 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@@ -31,6 +31,37 @@
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
struct fd_context;
|
||||
struct fd_query;
|
||||
|
||||
struct fd_query_funcs {
|
||||
void (*destroy_query)(struct fd_context *ctx,
|
||||
struct fd_query *q);
|
||||
void (*begin_query)(struct fd_context *ctx, struct fd_query *q);
|
||||
void (*end_query)(struct fd_context *ctx, struct fd_query *q);
|
||||
boolean (*get_query_result)(struct fd_context *ctx,
|
||||
struct fd_query *q, boolean wait,
|
||||
union pipe_query_result *result);
|
||||
};
|
||||
|
||||
struct fd_query {
|
||||
const struct fd_query_funcs *funcs;
|
||||
bool active;
|
||||
int type;
|
||||
};
|
||||
|
||||
static inline struct fd_query *
|
||||
fd_query(struct pipe_query *pq)
|
||||
{
|
||||
return (struct fd_query *)pq;
|
||||
}
|
||||
|
||||
#define FD_QUERY_DRAW_CALLS (PIPE_QUERY_DRIVER_SPECIFIC + 0)
|
||||
#define FD_QUERY_BATCH_TOTAL (PIPE_QUERY_DRIVER_SPECIFIC + 1) /* total # of batches (submits) */
|
||||
#define FD_QUERY_BATCH_SYSMEM (PIPE_QUERY_DRIVER_SPECIFIC + 2) /* batches using system memory (GMEM bypass) */
|
||||
#define FD_QUERY_BATCH_GMEM (PIPE_QUERY_DRIVER_SPECIFIC + 3) /* batches using GMEM */
|
||||
#define FD_QUERY_BATCH_RESTORE (PIPE_QUERY_DRIVER_SPECIFIC + 4) /* batches requiring GMEM restore */
|
||||
|
||||
void fd_query_screen_init(struct pipe_screen *pscreen);
|
||||
void fd_query_context_init(struct pipe_context *pctx);
|
||||
|
||||
|
465
src/gallium/drivers/freedreno/freedreno_query_hw.c
Normal file
465
src/gallium/drivers/freedreno/freedreno_query_hw.c
Normal file
@@ -0,0 +1,465 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
struct fd_hw_sample_period {
|
||||
struct fd_hw_sample *start, *end;
|
||||
struct list_head list;
|
||||
};
|
||||
|
||||
/* maps query_type to sample provider idx: */
|
||||
static int pidx(unsigned query_type)
|
||||
{
|
||||
switch (query_type) {
|
||||
case PIPE_QUERY_OCCLUSION_COUNTER:
|
||||
return 0;
|
||||
case PIPE_QUERY_OCCLUSION_PREDICATE:
|
||||
return 1;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static struct fd_hw_sample *
|
||||
get_sample(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
unsigned query_type)
|
||||
{
|
||||
struct fd_hw_sample *samp = NULL;
|
||||
int idx = pidx(query_type);
|
||||
|
||||
if (!ctx->sample_cache[idx]) {
|
||||
ctx->sample_cache[idx] =
|
||||
ctx->sample_providers[idx]->get_sample(ctx, ring);
|
||||
}
|
||||
|
||||
fd_hw_sample_reference(ctx, &samp, ctx->sample_cache[idx]);
|
||||
|
||||
return samp;
|
||||
}
|
||||
|
||||
static void
|
||||
clear_sample_cache(struct fd_context *ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(ctx->sample_cache); i++)
|
||||
fd_hw_sample_reference(ctx, &ctx->sample_cache[i], NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_active(struct fd_hw_query *hq, enum fd_render_stage stage)
|
||||
{
|
||||
return !!(hq->provider->active & stage);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
resume_query(struct fd_context *ctx, struct fd_hw_query *hq,
|
||||
struct fd_ringbuffer *ring)
|
||||
{
|
||||
assert(!hq->period);
|
||||
hq->period = util_slab_alloc(&ctx->sample_period_pool);
|
||||
list_inithead(&hq->period->list);
|
||||
hq->period->start = get_sample(ctx, ring, hq->base.type);
|
||||
/* NOTE: util_slab_alloc() does not zero out the buffer: */
|
||||
hq->period->end = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
pause_query(struct fd_context *ctx, struct fd_hw_query *hq,
|
||||
struct fd_ringbuffer *ring)
|
||||
{
|
||||
assert(hq->period && !hq->period->end);
|
||||
hq->period->end = get_sample(ctx, ring, hq->base.type);
|
||||
list_addtail(&hq->period->list, &hq->current_periods);
|
||||
hq->period = NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
destroy_periods(struct fd_context *ctx, struct list_head *list)
|
||||
{
|
||||
struct fd_hw_sample_period *period, *s;
|
||||
LIST_FOR_EACH_ENTRY_SAFE(period, s, list, list) {
|
||||
fd_hw_sample_reference(ctx, &period->start, NULL);
|
||||
fd_hw_sample_reference(ctx, &period->end, NULL);
|
||||
list_del(&period->list);
|
||||
util_slab_free(&ctx->sample_period_pool, period);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fd_hw_destroy_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_hw_query *hq = fd_hw_query(q);
|
||||
|
||||
destroy_periods(ctx, &hq->periods);
|
||||
destroy_periods(ctx, &hq->current_periods);
|
||||
list_del(&hq->list);
|
||||
|
||||
free(hq);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_hw_begin_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_hw_query *hq = fd_hw_query(q);
|
||||
if (q->active)
|
||||
return;
|
||||
|
||||
/* begin_query() should clear previous results: */
|
||||
destroy_periods(ctx, &hq->periods);
|
||||
|
||||
if (is_active(hq, ctx->stage))
|
||||
resume_query(ctx, hq, ctx->ring);
|
||||
|
||||
q->active = true;
|
||||
|
||||
/* add to active list: */
|
||||
list_del(&hq->list);
|
||||
list_addtail(&hq->list, &ctx->active_queries);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_hw_end_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_hw_query *hq = fd_hw_query(q);
|
||||
if (!q->active)
|
||||
return;
|
||||
if (is_active(hq, ctx->stage))
|
||||
pause_query(ctx, hq, ctx->ring);
|
||||
q->active = false;
|
||||
/* move to current list: */
|
||||
list_del(&hq->list);
|
||||
list_addtail(&hq->list, &ctx->current_queries);
|
||||
}
|
||||
|
||||
/* helper to get ptr to specified sample: */
|
||||
static void * sampptr(struct fd_hw_sample *samp, uint32_t n, void *ptr)
|
||||
{
|
||||
return ((char *)ptr) + (samp->tile_stride * n) + samp->offset;
|
||||
}
|
||||
|
||||
static boolean
|
||||
fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
|
||||
boolean wait, union pipe_query_result *result)
|
||||
{
|
||||
struct fd_hw_query *hq = fd_hw_query(q);
|
||||
const struct fd_hw_sample_provider *p = hq->provider;
|
||||
struct fd_hw_sample_period *period;
|
||||
|
||||
if (q->active)
|
||||
return false;
|
||||
|
||||
/* if the app tries to read back the query result before the
|
||||
* back is submitted, that forces us to flush so that there
|
||||
* are actually results to wait for:
|
||||
*/
|
||||
if (!LIST_IS_EMPTY(&hq->list)) {
|
||||
DBG("reading query result forces flush!");
|
||||
ctx->needs_flush = true;
|
||||
fd_context_render(&ctx->base);
|
||||
}
|
||||
|
||||
util_query_clear_result(result, q->type);
|
||||
|
||||
if (LIST_IS_EMPTY(&hq->periods))
|
||||
return true;
|
||||
|
||||
assert(LIST_IS_EMPTY(&hq->list));
|
||||
assert(LIST_IS_EMPTY(&hq->current_periods));
|
||||
assert(!hq->period);
|
||||
|
||||
if (LIST_IS_EMPTY(&hq->periods))
|
||||
return true;
|
||||
|
||||
/* if !wait, then check the last sample (the one most likely to
|
||||
* not be ready yet) and bail if it is not ready:
|
||||
*/
|
||||
if (!wait) {
|
||||
int ret;
|
||||
|
||||
period = LIST_ENTRY(struct fd_hw_sample_period,
|
||||
hq->periods.prev, list);
|
||||
|
||||
ret = fd_bo_cpu_prep(period->end->bo, ctx->screen->pipe,
|
||||
DRM_FREEDRENO_PREP_READ | DRM_FREEDRENO_PREP_NOSYNC);
|
||||
if (ret)
|
||||
return false;
|
||||
|
||||
fd_bo_cpu_fini(period->end->bo);
|
||||
}
|
||||
|
||||
/* sum the result across all sample periods: */
|
||||
LIST_FOR_EACH_ENTRY(period, &hq->periods, list) {
|
||||
struct fd_hw_sample *start = period->start;
|
||||
struct fd_hw_sample *end = period->end;
|
||||
unsigned i;
|
||||
|
||||
/* start and end samples should be from same batch: */
|
||||
assert(start->bo == end->bo);
|
||||
assert(start->num_tiles == end->num_tiles);
|
||||
|
||||
for (i = 0; i < start->num_tiles; i++) {
|
||||
void *ptr;
|
||||
|
||||
fd_bo_cpu_prep(start->bo, ctx->screen->pipe,
|
||||
DRM_FREEDRENO_PREP_READ);
|
||||
|
||||
ptr = fd_bo_map(start->bo);
|
||||
|
||||
p->accumulate_result(ctx, sampptr(period->start, i, ptr),
|
||||
sampptr(period->end, i, ptr), result);
|
||||
|
||||
fd_bo_cpu_fini(start->bo);
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct fd_query_funcs hw_query_funcs = {
|
||||
.destroy_query = fd_hw_destroy_query,
|
||||
.begin_query = fd_hw_begin_query,
|
||||
.end_query = fd_hw_end_query,
|
||||
.get_query_result = fd_hw_get_query_result,
|
||||
};
|
||||
|
||||
struct fd_query *
|
||||
fd_hw_create_query(struct fd_context *ctx, unsigned query_type)
|
||||
{
|
||||
struct fd_hw_query *hq;
|
||||
struct fd_query *q;
|
||||
int idx = pidx(query_type);
|
||||
|
||||
if ((idx < 0) || !ctx->sample_providers[idx])
|
||||
return NULL;
|
||||
|
||||
hq = CALLOC_STRUCT(fd_hw_query);
|
||||
if (!hq)
|
||||
return NULL;
|
||||
|
||||
hq->provider = ctx->sample_providers[idx];
|
||||
|
||||
list_inithead(&hq->periods);
|
||||
list_inithead(&hq->current_periods);
|
||||
list_inithead(&hq->list);
|
||||
|
||||
q = &hq->base;
|
||||
q->funcs = &hw_query_funcs;
|
||||
q->type = query_type;
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
struct fd_hw_sample *
|
||||
fd_hw_sample_init(struct fd_context *ctx, uint32_t size)
|
||||
{
|
||||
struct fd_hw_sample *samp = util_slab_alloc(&ctx->sample_pool);
|
||||
pipe_reference_init(&samp->reference, 1);
|
||||
samp->size = size;
|
||||
samp->offset = ctx->next_sample_offset;
|
||||
/* NOTE: util_slab_alloc() does not zero out the buffer: */
|
||||
samp->bo = NULL;
|
||||
samp->num_tiles = 0;
|
||||
samp->tile_stride = 0;
|
||||
ctx->next_sample_offset += size;
|
||||
return samp;
|
||||
}
|
||||
|
||||
void
|
||||
__fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp)
|
||||
{
|
||||
if (samp->bo)
|
||||
fd_bo_del(samp->bo);
|
||||
util_slab_free(&ctx->sample_pool, samp);
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_sample(struct fd_hw_sample *samp, struct fd_bo *bo,
|
||||
uint32_t num_tiles, uint32_t tile_stride)
|
||||
{
|
||||
if (samp->bo) {
|
||||
assert(samp->bo == bo);
|
||||
assert(samp->num_tiles == num_tiles);
|
||||
assert(samp->tile_stride == tile_stride);
|
||||
return;
|
||||
}
|
||||
samp->bo = bo;
|
||||
samp->num_tiles = num_tiles;
|
||||
samp->tile_stride = tile_stride;
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_query(struct fd_hw_query *hq, struct fd_bo *bo,
|
||||
uint32_t num_tiles, uint32_t tile_stride)
|
||||
{
|
||||
struct fd_hw_sample_period *period, *s;
|
||||
|
||||
/* prepare all the samples in the query: */
|
||||
LIST_FOR_EACH_ENTRY_SAFE(period, s, &hq->current_periods, list) {
|
||||
prepare_sample(period->start, bo, num_tiles, tile_stride);
|
||||
prepare_sample(period->end, bo, num_tiles, tile_stride);
|
||||
|
||||
/* move from current_periods list to periods list: */
|
||||
list_del(&period->list);
|
||||
list_addtail(&period->list, &hq->periods);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_queries(struct fd_context *ctx, struct fd_bo *bo,
|
||||
uint32_t num_tiles, uint32_t tile_stride,
|
||||
struct list_head *list, bool remove)
|
||||
{
|
||||
struct fd_hw_query *hq, *s;
|
||||
LIST_FOR_EACH_ENTRY_SAFE(hq, s, list, list) {
|
||||
prepare_query(hq, bo, num_tiles, tile_stride);
|
||||
if (remove)
|
||||
list_delinit(&hq->list);
|
||||
}
|
||||
}
|
||||
|
||||
/* called from gmem code once total storage requirements are known (ie.
|
||||
* number of samples times number of tiles)
|
||||
*/
|
||||
void
|
||||
fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles)
|
||||
{
|
||||
uint32_t tile_stride = ctx->next_sample_offset;
|
||||
struct fd_bo *bo;
|
||||
|
||||
if (ctx->query_bo)
|
||||
fd_bo_del(ctx->query_bo);
|
||||
|
||||
if (tile_stride > 0) {
|
||||
bo = fd_bo_new(ctx->dev, tile_stride * num_tiles,
|
||||
DRM_FREEDRENO_GEM_CACHE_WCOMBINE |
|
||||
DRM_FREEDRENO_GEM_TYPE_KMEM);
|
||||
} else {
|
||||
bo = NULL;
|
||||
}
|
||||
|
||||
ctx->query_bo = bo;
|
||||
ctx->query_tile_stride = tile_stride;
|
||||
|
||||
prepare_queries(ctx, bo, num_tiles, tile_stride,
|
||||
&ctx->active_queries, false);
|
||||
prepare_queries(ctx, bo, num_tiles, tile_stride,
|
||||
&ctx->current_queries, true);
|
||||
|
||||
/* reset things for next batch: */
|
||||
ctx->next_sample_offset = 0;
|
||||
}
|
||||
|
||||
void
|
||||
fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
|
||||
struct fd_ringbuffer *ring)
|
||||
{
|
||||
uint32_t tile_stride = ctx->query_tile_stride;
|
||||
uint32_t offset = tile_stride * n;
|
||||
|
||||
/* bail if no queries: */
|
||||
if (tile_stride == 0)
|
||||
return;
|
||||
|
||||
fd_wfi(ctx, ring);
|
||||
OUT_PKT0 (ring, HW_QUERY_BASE_REG, 1);
|
||||
OUT_RELOCW(ring, ctx->query_bo, offset, 0, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fd_hw_query_set_stage(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
enum fd_render_stage stage)
|
||||
{
|
||||
/* special case: internal blits (like mipmap level generation)
|
||||
* go through normal draw path (via util_blitter_blit()).. but
|
||||
* we need to ignore the FD_STAGE_DRAW which will be set, so we
|
||||
* don't enable queries which should be paused during internal
|
||||
* blits:
|
||||
*/
|
||||
if ((ctx->stage == FD_STAGE_BLIT) &&
|
||||
(stage != FD_STAGE_NULL))
|
||||
return;
|
||||
|
||||
if (stage != ctx->stage) {
|
||||
struct fd_hw_query *hq;
|
||||
LIST_FOR_EACH_ENTRY(hq, &ctx->active_queries, list) {
|
||||
bool was_active = is_active(hq, ctx->stage);
|
||||
bool now_active = is_active(hq, stage);
|
||||
|
||||
if (now_active && !was_active)
|
||||
resume_query(ctx, hq, ring);
|
||||
else if (was_active && !now_active)
|
||||
pause_query(ctx, hq, ring);
|
||||
}
|
||||
}
|
||||
clear_sample_cache(ctx);
|
||||
ctx->stage = stage;
|
||||
}
|
||||
|
||||
void
|
||||
fd_hw_query_register_provider(struct pipe_context *pctx,
|
||||
const struct fd_hw_sample_provider *provider)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
int idx = pidx(provider->query_type);
|
||||
|
||||
assert((0 <= idx) && (idx < MAX_HW_SAMPLE_PROVIDERS));
|
||||
assert(!ctx->sample_providers[idx]);
|
||||
|
||||
ctx->sample_providers[idx] = provider;
|
||||
}
|
||||
|
||||
void
|
||||
fd_hw_query_init(struct pipe_context *pctx)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
|
||||
util_slab_create(&ctx->sample_pool, sizeof(struct fd_hw_sample),
|
||||
16, UTIL_SLAB_SINGLETHREADED);
|
||||
util_slab_create(&ctx->sample_period_pool, sizeof(struct fd_hw_sample_period),
|
||||
16, UTIL_SLAB_SINGLETHREADED);
|
||||
list_inithead(&ctx->active_queries);
|
||||
list_inithead(&ctx->current_queries);
|
||||
}
|
||||
|
||||
void
|
||||
fd_hw_query_fini(struct pipe_context *pctx)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
|
||||
util_slab_destroy(&ctx->sample_pool);
|
||||
util_slab_destroy(&ctx->sample_period_pool);
|
||||
}
|
164
src/gallium/drivers/freedreno/freedreno_query_hw.h
Normal file
164
src/gallium/drivers/freedreno/freedreno_query_hw.h
Normal file
@@ -0,0 +1,164 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#ifndef FREEDRENO_QUERY_HW_H_
|
||||
#define FREEDRENO_QUERY_HW_H_
|
||||
|
||||
#include "util/u_double_list.h"
|
||||
|
||||
#include "freedreno_query.h"
|
||||
#include "freedreno_context.h"
|
||||
|
||||
|
||||
/*
|
||||
* HW Queries:
|
||||
*
|
||||
* See: https://github.com/freedreno/freedreno/wiki/Queries#hardware-queries
|
||||
*
|
||||
* Hardware queries will be specific to gpu generation, but they need
|
||||
* some common infrastructure for triggering start/stop samples at
|
||||
* various points (for example, to exclude mem2gmem/gmem2mem or clear)
|
||||
* as well as per tile tracking.
|
||||
*
|
||||
* NOTE: in at least some cases hw writes sample values to memory addr
|
||||
* specified in some register. So we don't really have the option to
|
||||
* just sample the same counter multiple times for multiple different
|
||||
* queries with the same query_type. So we cache per sample provider
|
||||
* the most recent sample since the last draw. This way multiple
|
||||
* sample periods for multiple queries can reference the same sample.
|
||||
*
|
||||
* fd_hw_sample_provider:
|
||||
* - one per query type, registered/implemented by gpu generation
|
||||
* specific code
|
||||
* - can construct fd_hw_samples on demand
|
||||
* - most recent sample (since last draw) cached so multiple
|
||||
* different queries can ref the same sample
|
||||
*
|
||||
* fd_hw_sample:
|
||||
* - abstracts one snapshot of counter value(s) across N tiles
|
||||
* - backing object not allocated until submit time when number
|
||||
* of samples and number of tiles is known
|
||||
*
|
||||
* fd_hw_sample_period:
|
||||
* - consists of start and stop sample
|
||||
* - a query accumulates a list of sample periods
|
||||
* - the query result is the sum of the sample periods
|
||||
*/
|
||||
|
||||
struct fd_hw_sample_provider {
|
||||
unsigned query_type;
|
||||
|
||||
/* stages applicable to the query type: */
|
||||
enum fd_render_stage active;
|
||||
|
||||
/* when a new sample is required, emit appropriate cmdstream
|
||||
* and return a sample object:
|
||||
*/
|
||||
struct fd_hw_sample *(*get_sample)(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring);
|
||||
|
||||
/* accumulate the results from specified sample period: */
|
||||
void (*accumulate_result)(struct fd_context *ctx,
|
||||
const void *start, const void *end,
|
||||
union pipe_query_result *result);
|
||||
};
|
||||
|
||||
struct fd_hw_sample {
|
||||
struct pipe_reference reference; /* keep this first */
|
||||
|
||||
/* offset and size of the sample are know at the time the
|
||||
* sample is constructed.
|
||||
*/
|
||||
uint32_t size;
|
||||
uint32_t offset;
|
||||
|
||||
/* backing object, offset/stride/etc are determined not when
|
||||
* the sample is constructed, but when the batch is submitted.
|
||||
* This way we can defer allocation until total # of requested
|
||||
* samples, and total # of tiles, is known.
|
||||
*/
|
||||
struct fd_bo *bo;
|
||||
uint32_t num_tiles;
|
||||
uint32_t tile_stride;
|
||||
};
|
||||
|
||||
struct fd_hw_sample_period;
|
||||
|
||||
struct fd_hw_query {
|
||||
struct fd_query base;
|
||||
|
||||
const struct fd_hw_sample_provider *provider;
|
||||
|
||||
/* list of fd_hw_sample_period in previous submits: */
|
||||
struct list_head periods;
|
||||
|
||||
/* list of fd_hw_sample_period's in current submit: */
|
||||
struct list_head current_periods;
|
||||
|
||||
/* if active and not paused, the current sample period (not
|
||||
* yet added to current_periods):
|
||||
*/
|
||||
struct fd_hw_sample_period *period;
|
||||
|
||||
struct list_head list; /* list-node in ctx->active_queries */
|
||||
};
|
||||
|
||||
static inline struct fd_hw_query *
|
||||
fd_hw_query(struct fd_query *q)
|
||||
{
|
||||
return (struct fd_hw_query *)q;
|
||||
}
|
||||
|
||||
struct fd_query * fd_hw_create_query(struct fd_context *ctx, unsigned query_type);
|
||||
/* helper for sample providers: */
|
||||
struct fd_hw_sample * fd_hw_sample_init(struct fd_context *ctx, uint32_t size);
|
||||
/* don't call directly, use fd_hw_sample_reference() */
|
||||
void __fd_hw_sample_destroy(struct fd_context *ctx, struct fd_hw_sample *samp);
|
||||
void fd_hw_query_prepare(struct fd_context *ctx, uint32_t num_tiles);
|
||||
void fd_hw_query_prepare_tile(struct fd_context *ctx, uint32_t n,
|
||||
struct fd_ringbuffer *ring);
|
||||
void fd_hw_query_set_stage(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring, enum fd_render_stage stage);
|
||||
void fd_hw_query_register_provider(struct pipe_context *pctx,
|
||||
const struct fd_hw_sample_provider *provider);
|
||||
void fd_hw_query_init(struct pipe_context *pctx);
|
||||
void fd_hw_query_fini(struct pipe_context *pctx);
|
||||
|
||||
static inline void
|
||||
fd_hw_sample_reference(struct fd_context *ctx,
|
||||
struct fd_hw_sample **ptr, struct fd_hw_sample *samp)
|
||||
{
|
||||
struct fd_hw_sample *old_samp = *ptr;
|
||||
|
||||
if (pipe_reference(&(*ptr)->reference, &samp->reference))
|
||||
__fd_hw_sample_destroy(ctx, old_samp);
|
||||
if (ptr)
|
||||
*ptr = samp;
|
||||
}
|
||||
|
||||
#endif /* FREEDRENO_QUERY_HW_H_ */
|
165
src/gallium/drivers/freedreno/freedreno_query_sw.c
Normal file
165
src/gallium/drivers/freedreno/freedreno_query_sw.c
Normal file
@@ -0,0 +1,165 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#include "pipe/p_state.h"
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "os/os_time.h"
|
||||
|
||||
#include "freedreno_query_sw.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
/*
|
||||
* SW Queries:
|
||||
*
|
||||
* In the core, we have some support for basic sw counters
|
||||
*/
|
||||
|
||||
static void
|
||||
fd_sw_destroy_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_sw_query *sq = fd_sw_query(q);
|
||||
free(sq);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_counter(struct fd_context *ctx, int type)
|
||||
{
|
||||
switch (type) {
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
/* for now same thing as _PRIMITIVES_EMITTED */
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
return ctx->stats.prims_emitted;
|
||||
case FD_QUERY_DRAW_CALLS:
|
||||
return ctx->stats.draw_calls;
|
||||
case FD_QUERY_BATCH_TOTAL:
|
||||
return ctx->stats.batch_total;
|
||||
case FD_QUERY_BATCH_SYSMEM:
|
||||
return ctx->stats.batch_sysmem;
|
||||
case FD_QUERY_BATCH_GMEM:
|
||||
return ctx->stats.batch_gmem;
|
||||
case FD_QUERY_BATCH_RESTORE:
|
||||
return ctx->stats.batch_restore;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_rate_query(struct fd_query *q)
|
||||
{
|
||||
switch (q->type) {
|
||||
case FD_QUERY_BATCH_TOTAL:
|
||||
case FD_QUERY_BATCH_SYSMEM:
|
||||
case FD_QUERY_BATCH_GMEM:
|
||||
case FD_QUERY_BATCH_RESTORE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fd_sw_begin_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_sw_query *sq = fd_sw_query(q);
|
||||
q->active = true;
|
||||
sq->begin_value = read_counter(ctx, q->type);
|
||||
if (is_rate_query(q))
|
||||
sq->begin_time = os_time_get();
|
||||
}
|
||||
|
||||
static void
|
||||
fd_sw_end_query(struct fd_context *ctx, struct fd_query *q)
|
||||
{
|
||||
struct fd_sw_query *sq = fd_sw_query(q);
|
||||
q->active = false;
|
||||
sq->end_value = read_counter(ctx, q->type);
|
||||
if (is_rate_query(q))
|
||||
sq->end_time = os_time_get();
|
||||
}
|
||||
|
||||
static boolean
|
||||
fd_sw_get_query_result(struct fd_context *ctx, struct fd_query *q,
|
||||
boolean wait, union pipe_query_result *result)
|
||||
{
|
||||
struct fd_sw_query *sq = fd_sw_query(q);
|
||||
|
||||
if (q->active)
|
||||
return false;
|
||||
|
||||
util_query_clear_result(result, q->type);
|
||||
|
||||
result->u64 = sq->end_value - sq->begin_value;
|
||||
|
||||
if (is_rate_query(q)) {
|
||||
double fps = (result->u64 * 1000000) /
|
||||
(double)(sq->end_time - sq->begin_time);
|
||||
result->u64 = (uint64_t)fps;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static const struct fd_query_funcs sw_query_funcs = {
|
||||
.destroy_query = fd_sw_destroy_query,
|
||||
.begin_query = fd_sw_begin_query,
|
||||
.end_query = fd_sw_end_query,
|
||||
.get_query_result = fd_sw_get_query_result,
|
||||
};
|
||||
|
||||
struct fd_query *
|
||||
fd_sw_create_query(struct fd_context *ctx, unsigned query_type)
|
||||
{
|
||||
struct fd_sw_query *sq;
|
||||
struct fd_query *q;
|
||||
|
||||
switch (query_type) {
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case FD_QUERY_DRAW_CALLS:
|
||||
case FD_QUERY_BATCH_TOTAL:
|
||||
case FD_QUERY_BATCH_SYSMEM:
|
||||
case FD_QUERY_BATCH_GMEM:
|
||||
case FD_QUERY_BATCH_RESTORE:
|
||||
break;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sq = CALLOC_STRUCT(fd_sw_query);
|
||||
if (!sq)
|
||||
return NULL;
|
||||
|
||||
q = &sq->base;
|
||||
q->funcs = &sw_query_funcs;
|
||||
q->type = query_type;
|
||||
|
||||
return q;
|
||||
}
|
55
src/gallium/drivers/freedreno/freedreno_query_sw.h
Normal file
55
src/gallium/drivers/freedreno/freedreno_query_sw.h
Normal file
@@ -0,0 +1,55 @@
|
||||
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
|
||||
|
||||
/*
|
||||
* Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*
|
||||
* Authors:
|
||||
* Rob Clark <robclark@freedesktop.org>
|
||||
*/
|
||||
|
||||
#ifndef FREEDRENO_QUERY_SW_H_
|
||||
#define FREEDRENO_QUERY_SW_H_
|
||||
|
||||
#include "freedreno_query.h"
|
||||
|
||||
/*
|
||||
* SW Queries:
|
||||
*
|
||||
* In the core, we have some support for basic sw counters
|
||||
*/
|
||||
|
||||
struct fd_sw_query {
|
||||
struct fd_query base;
|
||||
uint64_t begin_value, end_value;
|
||||
uint64_t begin_time, end_time;
|
||||
};
|
||||
|
||||
static inline struct fd_sw_query *
|
||||
fd_sw_query(struct fd_query *q)
|
||||
{
|
||||
return (struct fd_sw_query *)q;
|
||||
}
|
||||
|
||||
struct fd_query * fd_sw_create_query(struct fd_context *ctx,
|
||||
unsigned query_type);
|
||||
|
||||
#endif /* FREEDRENO_QUERY_SW_H_ */
|
@@ -36,6 +36,7 @@
|
||||
#include "freedreno_screen.h"
|
||||
#include "freedreno_surface.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
#include <errno.h>
|
||||
@@ -401,7 +402,9 @@ render_blit(struct pipe_context *pctx, struct pipe_blit_info *info)
|
||||
util_blitter_save_fragment_sampler_views(ctx->blitter,
|
||||
ctx->fragtex.num_textures, ctx->fragtex.textures);
|
||||
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_BLIT);
|
||||
util_blitter_blit(ctx->blitter, info);
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -143,6 +143,8 @@ tables for things that differ if the delta is not too much..
|
||||
static int
|
||||
fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
{
|
||||
struct fd_screen *screen = fd_screen(pscreen);
|
||||
|
||||
/* this is probably not totally correct.. but it's a start: */
|
||||
switch (param) {
|
||||
/* Supported features (boolean caps). */
|
||||
@@ -161,8 +163,6 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
|
||||
case PIPE_CAP_SM3:
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER:
|
||||
case PIPE_CAP_TEXTURE_BARRIER:
|
||||
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
|
||||
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
|
||||
@@ -180,6 +180,8 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_SHADER_STENCIL_EXPORT:
|
||||
case PIPE_CAP_TGSI_TEXCOORD:
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
|
||||
@@ -229,17 +231,18 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
|
||||
return MAX_MIP_LEVELS;
|
||||
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
|
||||
return 9192;
|
||||
return 0; /* TODO: a3xx+ should support (required in gles3) */
|
||||
|
||||
/* Render targets. */
|
||||
case PIPE_CAP_MAX_RENDER_TARGETS:
|
||||
return 1;
|
||||
|
||||
/* Timer queries. */
|
||||
/* Queries. */
|
||||
case PIPE_CAP_QUERY_TIME_ELAPSED:
|
||||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
case PIPE_CAP_QUERY_TIMESTAMP:
|
||||
return 0;
|
||||
case PIPE_CAP_OCCLUSION_QUERY:
|
||||
return (screen->gpu_id >= 300) ? 1: 0;
|
||||
|
||||
case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
|
||||
case PIPE_CAP_MIN_TEXEL_OFFSET:
|
||||
@@ -252,7 +255,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_ENDIANNESS:
|
||||
return PIPE_ENDIAN_LITTLE;
|
||||
|
||||
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
|
||||
case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
|
||||
return 64;
|
||||
|
||||
default:
|
||||
@@ -315,7 +318,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
|
||||
return 8; /* XXX */
|
||||
case PIPE_SHADER_CAP_MAX_INPUTS:
|
||||
return 32;
|
||||
return 16;
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return 64; /* Max native temporaries. */
|
||||
case PIPE_SHADER_CAP_MAX_ADDRS:
|
||||
|
@@ -223,11 +223,18 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
|
||||
emit_marker(ring, 6);
|
||||
}
|
||||
|
||||
/* CP_SCRATCH_REG4 is used to hold base address for query results: */
|
||||
#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
|
||||
|
||||
static inline void
|
||||
emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
|
||||
{
|
||||
extern unsigned marker_cnt;
|
||||
OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG0 + scratch_idx, 1);
|
||||
unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
|
||||
assert(reg != HW_QUERY_BASE_REG);
|
||||
if (reg == HW_QUERY_BASE_REG)
|
||||
return;
|
||||
OUT_PKT0(ring, reg, 1);
|
||||
OUT_RING(ring, ++marker_cnt);
|
||||
}
|
||||
|
||||
|
@@ -915,6 +915,9 @@ CodeEmitterGK110::emitSET(const CmpInstruction *i)
|
||||
modNegAbsF32_3b(i, 1);
|
||||
}
|
||||
FTZ_(3a);
|
||||
|
||||
if (i->dType == TYPE_F32)
|
||||
code[1] |= 1 << 23;
|
||||
}
|
||||
if (i->sType == TYPE_S32)
|
||||
code[1] |= 1 << 19;
|
||||
|
@@ -37,18 +37,25 @@ namespace nv50_ir {
|
||||
// ah*bl 00
|
||||
//
|
||||
// fffe0001 + fffe0001
|
||||
//
|
||||
// Note that this sort of splitting doesn't work for signed values, so we
|
||||
// compute the sign on those manually and then perform an unsigned multiply.
|
||||
static bool
|
||||
expandIntegerMUL(BuildUtil *bld, Instruction *mul)
|
||||
{
|
||||
const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
|
||||
|
||||
DataType fTy = mul->sType; // full type
|
||||
DataType hTy;
|
||||
DataType fTy; // full type
|
||||
switch (mul->sType) {
|
||||
case TYPE_S32: fTy = TYPE_U32; break;
|
||||
case TYPE_S64: fTy = TYPE_U64; break;
|
||||
default: fTy = mul->sType; break;
|
||||
}
|
||||
|
||||
DataType hTy; // half type
|
||||
switch (fTy) {
|
||||
case TYPE_S32: hTy = TYPE_S16; break;
|
||||
case TYPE_U32: hTy = TYPE_U16; break;
|
||||
case TYPE_U64: hTy = TYPE_U32; break;
|
||||
case TYPE_S64: hTy = TYPE_S32; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -59,15 +66,25 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
|
||||
|
||||
bld->setPosition(mul, true);
|
||||
|
||||
Value *s[2];
|
||||
Value *a[2], *b[2];
|
||||
Value *c[2];
|
||||
Value *t[4];
|
||||
for (int j = 0; j < 4; ++j)
|
||||
t[j] = bld->getSSA(fullSize);
|
||||
|
||||
s[0] = mul->getSrc(0);
|
||||
s[1] = mul->getSrc(1);
|
||||
|
||||
if (isSignedType(mul->sType)) {
|
||||
s[0] = bld->getSSA(fullSize);
|
||||
s[1] = bld->getSSA(fullSize);
|
||||
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
|
||||
bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
|
||||
}
|
||||
|
||||
// split sources into halves
|
||||
i[0] = bld->mkSplit(a, halfSize, mul->getSrc(0));
|
||||
i[1] = bld->mkSplit(b, halfSize, mul->getSrc(1));
|
||||
i[0] = bld->mkSplit(a, halfSize, s[0]);
|
||||
i[1] = bld->mkSplit(b, halfSize, s[1]);
|
||||
|
||||
i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
|
||||
i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
|
||||
@@ -75,23 +92,76 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
|
||||
i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
|
||||
|
||||
if (highResult) {
|
||||
Value *r[3];
|
||||
Value *c[2];
|
||||
Value *r[5];
|
||||
Value *imm = bld->loadImm(NULL, 1 << (halfSize * 8));
|
||||
c[0] = bld->getSSA(1, FILE_FLAGS);
|
||||
c[1] = bld->getSSA(1, FILE_FLAGS);
|
||||
for (int j = 0; j < 3; ++j)
|
||||
for (int j = 0; j < 5; ++j)
|
||||
r[j] = bld->getSSA(fullSize);
|
||||
|
||||
i[8] = bld->mkOp2(OP_SHR, fTy, r[0], t[1], bld->mkImm(halfSize * 8));
|
||||
i[6] = bld->mkOp2(OP_ADD, fTy, r[1], r[0], imm);
|
||||
bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[0]);
|
||||
i[5] = bld->mkOp3(OP_MAD, fTy, mul->getDef(0), a[1], b[1], r[2]);
|
||||
bld->mkMov(r[3], r[0])->setPredicate(CC_NC, c[0]);
|
||||
bld->mkOp2(OP_UNION, TYPE_U32, r[2], r[1], r[3]);
|
||||
i[5] = bld->mkOp3(OP_MAD, fTy, r[4], a[1], b[1], r[2]);
|
||||
|
||||
// set carry defs / sources
|
||||
i[3]->setFlagsDef(1, c[0]);
|
||||
i[4]->setFlagsDef(0, c[1]); // actual result not required, just the carry
|
||||
// actual result required in negative case, but ignored for
|
||||
// unsigned. for some reason the compiler ends up dropping the whole
|
||||
// instruction if the destination is unused but the flags are.
|
||||
if (isSignedType(mul->sType))
|
||||
i[4]->setFlagsDef(1, c[1]);
|
||||
else
|
||||
i[4]->setFlagsDef(0, c[1]);
|
||||
i[6]->setPredicate(CC_C, c[0]);
|
||||
i[5]->setFlagsSrc(3, c[1]);
|
||||
|
||||
if (isSignedType(mul->sType)) {
|
||||
Value *cc[2];
|
||||
Value *rr[7];
|
||||
Value *one = bld->getSSA(fullSize);
|
||||
bld->loadImm(one, 1);
|
||||
for (int j = 0; j < 7; j++)
|
||||
rr[j] = bld->getSSA(fullSize);
|
||||
|
||||
// NOTE: this logic uses predicates because splitting basic blocks is
|
||||
// ~impossible during the SSA phase. The RA relies on a correlation
|
||||
// between edge order and phi node sources.
|
||||
|
||||
// Set the sign of the result based on the inputs
|
||||
bld->mkOp2(OP_XOR, fTy, NULL, mul->getSrc(0), mul->getSrc(1))
|
||||
->setFlagsDef(0, (cc[0] = bld->getSSA(1, FILE_FLAGS)));
|
||||
|
||||
// 1s complement of 64-bit value
|
||||
bld->mkOp1(OP_NOT, fTy, rr[0], r[4])
|
||||
->setPredicate(CC_S, cc[0]);
|
||||
bld->mkOp1(OP_NOT, fTy, rr[1], t[3])
|
||||
->setPredicate(CC_S, cc[0]);
|
||||
|
||||
// add to low 32-bits, keep track of the carry
|
||||
Instruction *n = bld->mkOp2(OP_ADD, fTy, NULL, rr[1], one);
|
||||
n->setPredicate(CC_S, cc[0]);
|
||||
n->setFlagsDef(0, (cc[1] = bld->getSSA(1, FILE_FLAGS)));
|
||||
|
||||
// If there was a carry, add 1 to the upper 32 bits
|
||||
// XXX: These get executed even if they shouldn't be
|
||||
bld->mkOp2(OP_ADD, fTy, rr[2], rr[0], one)
|
||||
->setPredicate(CC_C, cc[1]);
|
||||
bld->mkMov(rr[3], rr[0])
|
||||
->setPredicate(CC_NC, cc[1]);
|
||||
bld->mkOp2(OP_UNION, fTy, rr[4], rr[2], rr[3]);
|
||||
|
||||
// Merge the results from the negative and non-negative paths
|
||||
bld->mkMov(rr[5], rr[4])
|
||||
->setPredicate(CC_S, cc[0]);
|
||||
bld->mkMov(rr[6], r[4])
|
||||
->setPredicate(CC_NS, cc[0]);
|
||||
bld->mkOp2(OP_UNION, mul->sType, mul->getDef(0), rr[5], rr[6]);
|
||||
} else {
|
||||
bld->mkMov(mul->getDef(0), r[4]);
|
||||
}
|
||||
} else {
|
||||
bld->mkMov(mul->getDef(0), t[3]);
|
||||
}
|
||||
@@ -591,6 +661,10 @@ void NV50LoweringPreSSA::loadTexMsInfo(uint32_t off, Value **ms,
|
||||
Value *tmp = new_LValue(func, FILE_GPR);
|
||||
uint8_t b = prog->driver->io.resInfoCBSlot;
|
||||
off += prog->driver->io.suInfoBase;
|
||||
if (prog->getType() > Program::TYPE_VERTEX)
|
||||
off += 16 * 2 * 4;
|
||||
if (prog->getType() > Program::TYPE_GEOMETRY)
|
||||
off += 16 * 2 * 4;
|
||||
*ms_x = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
|
||||
FILE_MEMORY_CONST, b, TYPE_U32, off + 0), NULL);
|
||||
*ms_y = bld.mkLoadv(TYPE_U32, bld.mkSymbol(
|
||||
|
@@ -187,7 +187,8 @@ LoadPropagation::checkSwapSrc01(Instruction *insn)
|
||||
return;
|
||||
}
|
||||
|
||||
if (insn->op == OP_SET)
|
||||
if (insn->op == OP_SET || insn->op == OP_SET_AND ||
|
||||
insn->op == OP_SET_OR || insn->op == OP_SET_XOR)
|
||||
insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
|
||||
else
|
||||
if (insn->op == OP_SLCT)
|
||||
@@ -424,7 +425,17 @@ ConstantFolding::expr(Instruction *i,
|
||||
case TYPE_F32: res.data.f32 = a->data.f32 * b->data.f32; break;
|
||||
case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
|
||||
case TYPE_S32:
|
||||
case TYPE_U32: res.data.u32 = a->data.u32 * b->data.u32; break;
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
|
||||
res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
|
||||
break;
|
||||
}
|
||||
/* fallthrough */
|
||||
case TYPE_U32:
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
|
||||
res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
|
||||
break;
|
||||
}
|
||||
res.data.u32 = a->data.u32 * b->data.u32; break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
@@ -690,12 +701,41 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
{
|
||||
const int t = !s;
|
||||
const operation op = i->op;
|
||||
Instruction *newi = i;
|
||||
|
||||
switch (i->op) {
|
||||
case OP_MUL:
|
||||
if (i->dType == TYPE_F32)
|
||||
tryCollapseChainedMULs(i, s, imm0);
|
||||
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
|
||||
assert(!isFloatType(i->sType));
|
||||
if (imm0.isInteger(1) && i->dType == TYPE_S32) {
|
||||
bld.setPosition(i, false);
|
||||
// Need to set to the sign value, which is a compare.
|
||||
newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
|
||||
TYPE_S32, i->getSrc(t), bld.mkImm(0));
|
||||
delete_Instruction(prog, i);
|
||||
} else if (imm0.isInteger(0) || imm0.isInteger(1)) {
|
||||
// The high bits can't be set in this case (either mul by 0 or
|
||||
// unsigned by 1)
|
||||
i->op = OP_MOV;
|
||||
i->subOp = 0;
|
||||
i->setSrc(0, new_ImmediateValue(prog, 0u));
|
||||
i->src(0).mod = Modifier(0);
|
||||
i->setSrc(1, NULL);
|
||||
} else if (!imm0.isNegative() && imm0.isPow2()) {
|
||||
// Translate into a shift
|
||||
imm0.applyLog2();
|
||||
i->op = OP_SHR;
|
||||
i->subOp = 0;
|
||||
imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
|
||||
i->setSrc(0, i->getSrc(t));
|
||||
i->src(0).mod = i->src(t).mod;
|
||||
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
|
||||
i->src(1).mod = 0;
|
||||
}
|
||||
} else
|
||||
if (imm0.isInteger(0)) {
|
||||
i->op = OP_MOV;
|
||||
i->setSrc(0, new_ImmediateValue(prog, 0u));
|
||||
@@ -786,7 +826,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
else
|
||||
tA = tB;
|
||||
tB = s ? bld.getSSA() : i->getDef(0);
|
||||
bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
|
||||
newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
|
||||
if (s)
|
||||
bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
|
||||
|
||||
@@ -818,7 +858,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
tA = bld.getSSA();
|
||||
bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
|
||||
tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
|
||||
bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
|
||||
newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
|
||||
if (d < 0)
|
||||
bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
|
||||
|
||||
@@ -896,7 +936,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
default:
|
||||
return;
|
||||
}
|
||||
if (i->op != op)
|
||||
if (newi->op != op)
|
||||
foldCount++;
|
||||
}
|
||||
|
||||
|
@@ -998,7 +998,9 @@ GCRA::doCoalesce(ArrayList& insns, unsigned int mask)
|
||||
case OP_TXQ:
|
||||
case OP_TXD:
|
||||
case OP_TXG:
|
||||
case OP_TXLQ:
|
||||
case OP_TEXCSAA:
|
||||
case OP_TEXPREP:
|
||||
if (!(mask & JOIN_MASK_TEX))
|
||||
break;
|
||||
for (c = 0; insn->srcExists(c) && c != insn->predSrc; ++c)
|
||||
|
@@ -331,6 +331,8 @@ TargetNV50::insnCanLoad(const Instruction *i, int s,
|
||||
return false;
|
||||
if (sf == FILE_IMMEDIATE)
|
||||
return false;
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH && sf == FILE_MEMORY_CONST)
|
||||
return false;
|
||||
ldSize = 2;
|
||||
} else {
|
||||
ldSize = typeSizeof(ld->dType);
|
||||
|
@@ -78,16 +78,16 @@
|
||||
/* 8 user clip planes, at 4 32-bit floats each */
|
||||
#define NV50_CB_AUX_UCP_OFFSET 0x0000
|
||||
#define NV50_CB_AUX_UCP_SIZE (8 * 4 * 4)
|
||||
/* 256 textures, each with ms_x, ms_y u32 pairs */
|
||||
/* 16 textures * 3 shaders, each with ms_x, ms_y u32 pairs */
|
||||
#define NV50_CB_AUX_TEX_MS_OFFSET 0x0080
|
||||
#define NV50_CB_AUX_TEX_MS_SIZE (256 * 2 * 4)
|
||||
#define NV50_CB_AUX_TEX_MS_SIZE (16 * 3 * 2 * 4)
|
||||
/* For each MS level (4), 8 sets of 32-bit integer pairs sample offsets */
|
||||
#define NV50_CB_AUX_MS_OFFSET 0x880
|
||||
#define NV50_CB_AUX_MS_OFFSET 0x200
|
||||
#define NV50_CB_AUX_MS_SIZE (4 * 8 * 4 * 2)
|
||||
/* Sample position pairs for the current output MS level */
|
||||
#define NV50_CB_AUX_SAMPLE_OFFSET 0x980
|
||||
#define NV50_CB_AUX_SAMPLE_OFFSET 0x300
|
||||
#define NV50_CB_AUX_SAMPLE_OFFSET_SIZE (4 * 8 * 2)
|
||||
/* next spot: 0x9c0 */
|
||||
/* next spot: 0x340 */
|
||||
/* 4 32-bit floats for the vertex runout, put at the end */
|
||||
#define NV50_CB_AUX_RUNOUT_OFFSET (NV50_CB_AUX_SIZE - 0x10)
|
||||
|
||||
|
@@ -332,7 +332,7 @@ nv50_render_condition(struct pipe_context *pipe,
|
||||
nv50->cond_cond = condition;
|
||||
nv50->cond_mode = mode;
|
||||
|
||||
PUSH_SPACE(push, 6);
|
||||
PUSH_SPACE(push, 9);
|
||||
|
||||
if (!pq) {
|
||||
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
|
||||
@@ -351,6 +351,10 @@ nv50_render_condition(struct pipe_context *pipe,
|
||||
PUSH_DATAh(push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, NV50_3D_COND_MODE_RES_NON_ZERO);
|
||||
|
||||
BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, q->bo->offset + q->offset);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -397,6 +397,8 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, SUBC_2D(0x0888), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NV04(push, NV50_2D(COND_MODE), 1);
|
||||
PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS);
|
||||
|
||||
BEGIN_NV04(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
|
||||
PUSH_DATA (push, screen->tesla->handle);
|
||||
|
@@ -611,6 +611,7 @@ struct nv50_blitctx
|
||||
uint8_t mode;
|
||||
uint16_t color_mask;
|
||||
uint8_t filter;
|
||||
uint8_t render_condition_enable;
|
||||
enum pipe_texture_target target;
|
||||
struct {
|
||||
struct pipe_framebuffer_state fb;
|
||||
@@ -697,6 +698,12 @@ nv50_blitter_make_fp(struct pipe_context *pipe,
|
||||
tc = ureg_DECL_fs_input(
|
||||
ureg, TGSI_SEMANTIC_GENERIC, 0, TGSI_INTERPOLATE_LINEAR);
|
||||
|
||||
if (ptarg == PIPE_TEXTURE_1D_ARRAY) {
|
||||
/* Adjust coordinates. Depth is in z, but TEX expects it to be in y. */
|
||||
tc = ureg_swizzle(tc, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z,
|
||||
TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z);
|
||||
}
|
||||
|
||||
data = ureg_DECL_temporary(ureg);
|
||||
|
||||
if (tex_s) {
|
||||
@@ -933,7 +940,7 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
|
||||
{
|
||||
struct nouveau_pushbuf *push = blit->nv50->base.pushbuf;
|
||||
|
||||
if (blit->nv50->cond_query) {
|
||||
if (blit->nv50->cond_query && !blit->render_condition_enable) {
|
||||
BEGIN_NV04(push, NV50_3D(COND_MODE), 1);
|
||||
PUSH_DATA (push, NV50_3D_COND_MODE_ALWAYS);
|
||||
}
|
||||
@@ -1071,7 +1078,7 @@ nv50_blitctx_post_blit(struct nv50_blitctx *blit)
|
||||
nv50->samplers[2][0] = blit->saved.sampler[0];
|
||||
nv50->samplers[2][1] = blit->saved.sampler[1];
|
||||
|
||||
if (nv50->cond_query)
|
||||
if (nv50->cond_query && !blit->render_condition_enable)
|
||||
nv50->base.pipe.render_condition(&nv50->base.pipe, nv50->cond_query,
|
||||
nv50->cond_cond, nv50->cond_mode);
|
||||
|
||||
@@ -1105,6 +1112,7 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
|
||||
blit->mode = nv50_blit_select_mode(info);
|
||||
blit->color_mask = nv50_blit_derive_color_mask(info);
|
||||
blit->filter = nv50_blit_get_filter(info);
|
||||
blit->render_condition_enable = info->render_condition_enable;
|
||||
|
||||
nv50_blit_select_fp(blit, info);
|
||||
nv50_blitctx_pre_blit(blit);
|
||||
@@ -1134,6 +1142,12 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
|
||||
y0 *= (float)(1 << nv50_miptree(src)->ms_y);
|
||||
y1 *= (float)(1 << nv50_miptree(src)->ms_y);
|
||||
|
||||
/* XXX: multiply by 6 for cube arrays ? */
|
||||
dz = (float)info->src.box.depth / (float)info->dst.box.depth;
|
||||
z = (float)info->src.box.z;
|
||||
if (nv50_miptree(src)->layout_3d)
|
||||
z += 0.5f * dz;
|
||||
|
||||
if (src->last_level > 0) {
|
||||
/* If there are mip maps, GPU always assumes normalized coordinates. */
|
||||
const unsigned l = info->src.level;
|
||||
@@ -1143,14 +1157,12 @@ nv50_blit_3d(struct nv50_context *nv50, const struct pipe_blit_info *info)
|
||||
x1 /= fh;
|
||||
y0 /= fv;
|
||||
y1 /= fv;
|
||||
if (nv50_miptree(src)->layout_3d) {
|
||||
z /= u_minify(src->depth0, l);
|
||||
dz /= u_minify(src->depth0, l);
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX: multiply by 6 for cube arrays ? */
|
||||
dz = (float)info->src.box.depth / (float)info->dst.box.depth;
|
||||
z = (float)info->src.box.z;
|
||||
if (nv50_miptree(src)->layout_3d)
|
||||
z += 0.5f * dz;
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(VIEWPORT_TRANSFORM_EN), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_3D(VIEW_VOLUME_CLIP_CTRL), 1);
|
||||
@@ -1262,6 +1274,11 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
|
||||
PUSH_DATA (push, 1); /* enable */
|
||||
}
|
||||
|
||||
if (nv50->cond_query && info->render_condition_enable) {
|
||||
BEGIN_NV04(push, NV50_2D(COND_MODE), 1);
|
||||
PUSH_DATA (push, NV50_2D_COND_MODE_RES_NON_ZERO);
|
||||
}
|
||||
|
||||
if (mask != 0xffffffff) {
|
||||
BEGIN_NV04(push, NV50_2D(ROP), 1);
|
||||
PUSH_DATA (push, 0xca); /* DPSDxax */
|
||||
@@ -1384,6 +1401,10 @@ nv50_blit_eng2d(struct nv50_context *nv50, const struct pipe_blit_info *info)
|
||||
BEGIN_NV04(push, NV50_2D(OPERATION), 1);
|
||||
PUSH_DATA (push, NV50_2D_OPERATION_SRCCOPY);
|
||||
}
|
||||
if (nv50->cond_query && info->render_condition_enable) {
|
||||
BEGIN_NV04(push, NV50_2D(COND_MODE), 1);
|
||||
PUSH_DATA (push, NV50_2D_COND_MODE_ALWAYS);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -286,7 +286,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
|
||||
}
|
||||
if (nv50->num_textures[s]) {
|
||||
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
|
||||
PUSH_DATA (push, (NV50_CB_AUX_TEX_MS_OFFSET << (8 - 2)) | NV50_CB_AUX);
|
||||
PUSH_DATA (push, ((NV50_CB_AUX_TEX_MS_OFFSET + 16 * s * 2 * 4) << (8 - 2)) | NV50_CB_AUX);
|
||||
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nv50->num_textures[s] * 2);
|
||||
for (i = 0; i < nv50->num_textures[s]; i++) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nv50->textures[s][i]);
|
||||
|
@@ -585,12 +585,15 @@ nvc0_render_condition(struct pipe_context *pipe,
|
||||
if (wait)
|
||||
nvc0_query_fifo_wait(push, pq);
|
||||
|
||||
PUSH_SPACE(push, 4);
|
||||
PUSH_SPACE(push, 7);
|
||||
PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
|
||||
PUSH_DATAh(push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, cond);
|
||||
BEGIN_NVC0(push, NVC0_2D(COND_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, q->bo->offset + q->offset);
|
||||
PUSH_DATA (push, q->bo->offset + q->offset);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -676,6 +676,8 @@ nvc0_screen_create(struct nouveau_device *dev)
|
||||
PUSH_DATA (push, 0x3f);
|
||||
BEGIN_NVC0(push, SUBC_2D(0x0888), 1);
|
||||
PUSH_DATA (push, 1);
|
||||
BEGIN_NVC0(push, NVC0_2D(COND_MODE), 1);
|
||||
PUSH_DATA (push, NVC0_2D_COND_MODE_ALWAYS);
|
||||
|
||||
BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, screen->fence.bo->offset + 16);
|
||||
|
@@ -503,6 +503,7 @@ struct nvc0_blitctx
|
||||
uint8_t mode;
|
||||
uint16_t color_mask;
|
||||
uint8_t filter;
|
||||
uint8_t render_condition_enable;
|
||||
enum pipe_texture_target target;
|
||||
struct {
|
||||
struct pipe_framebuffer_state fb;
|
||||
@@ -691,7 +692,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
|
||||
|
||||
/* TODO: maybe make this a MACRO (if we need more logic) ? */
|
||||
|
||||
if (blit->nvc0->cond_query)
|
||||
if (blit->nvc0->cond_query && !blit->render_condition_enable)
|
||||
IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
|
||||
|
||||
/* blend state */
|
||||
@@ -833,7 +834,7 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
|
||||
nvc0->textures_dirty[4] |= 3;
|
||||
nvc0->samplers_dirty[4] |= 3;
|
||||
|
||||
if (nvc0->cond_query)
|
||||
if (nvc0->cond_query && !blit->render_condition_enable)
|
||||
nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
|
||||
nvc0->cond_cond, nvc0->cond_mode);
|
||||
|
||||
@@ -868,6 +869,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
blit->mode = nv50_blit_select_mode(info);
|
||||
blit->color_mask = nv50_blit_derive_color_mask(info);
|
||||
blit->filter = nv50_blit_get_filter(info);
|
||||
blit->render_condition_enable = info->render_condition_enable;
|
||||
|
||||
nvc0_blit_select_fp(blit, info);
|
||||
nvc0_blitctx_pre_blit(blit);
|
||||
@@ -894,6 +896,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
y0 *= (float)(1 << nv50_miptree(src)->ms_y);
|
||||
y1 *= (float)(1 << nv50_miptree(src)->ms_y);
|
||||
|
||||
dz = (float)info->src.box.depth / (float)info->dst.box.depth;
|
||||
z = (float)info->src.box.z;
|
||||
if (nv50_miptree(src)->layout_3d)
|
||||
z += 0.5f * dz;
|
||||
|
||||
if (src->last_level > 0) {
|
||||
/* If there are mip maps, GPU always assumes normalized coordinates. */
|
||||
const unsigned l = info->src.level;
|
||||
@@ -903,13 +910,12 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
x1 /= fh;
|
||||
y0 /= fv;
|
||||
y1 /= fv;
|
||||
if (nv50_miptree(src)->layout_3d) {
|
||||
z /= u_minify(src->depth0, l);
|
||||
dz /= u_minify(src->depth0, l);
|
||||
}
|
||||
}
|
||||
|
||||
dz = (float)info->src.box.depth / (float)info->dst.box.depth;
|
||||
z = (float)info->src.box.z;
|
||||
if (nv50_miptree(src)->layout_3d)
|
||||
z += 0.5f * dz;
|
||||
|
||||
IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
|
||||
IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
|
||||
NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
|
||||
@@ -1030,6 +1036,9 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
PUSH_DATA (push, 1); /* enable */
|
||||
}
|
||||
|
||||
if (nvc0->cond_query && info->render_condition_enable)
|
||||
IMMED_NVC0(push, NVC0_2D(COND_MODE), NVC0_2D_COND_MODE_RES_NON_ZERO);
|
||||
|
||||
if (mask != 0xffffffff) {
|
||||
IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */
|
||||
IMMED_NVC0(push, NVC0_2D(PATTERN_COLOR_FORMAT),
|
||||
@@ -1154,6 +1163,8 @@ nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
|
||||
IMMED_NVC0(push, NVC0_2D(CLIP_ENABLE), 0);
|
||||
if (mask != 0xffffffff)
|
||||
IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_SRCCOPY);
|
||||
if (nvc0->cond_query && info->render_condition_enable)
|
||||
IMMED_NVC0(push, NVC0_2D(COND_MODE), NVC0_2D_COND_MODE_ALWAYS);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -829,15 +829,6 @@ static INLINE uint32_t S_FIXED(float value, uint32_t frac_bits)
|
||||
}
|
||||
#define ALIGN_DIVUP(x, y) (((x) + (y) - 1) / (y))
|
||||
|
||||
static inline unsigned r600_tex_aniso_filter(unsigned filter)
|
||||
{
|
||||
if (filter <= 1) return 0;
|
||||
if (filter <= 2) return 1;
|
||||
if (filter <= 4) return 2;
|
||||
if (filter <= 8) return 3;
|
||||
/* else */ return 4;
|
||||
}
|
||||
|
||||
/* 12.4 fixed-point */
|
||||
static INLINE unsigned r600_pack_float_12p4(float x)
|
||||
{
|
||||
|
@@ -489,6 +489,15 @@ r600_resource_reference(struct r600_resource **ptr, struct r600_resource *res)
|
||||
(struct pipe_resource *)res);
|
||||
}
|
||||
|
||||
static inline unsigned r600_tex_aniso_filter(unsigned filter)
|
||||
{
|
||||
if (filter <= 1) return 0;
|
||||
if (filter <= 2) return 1;
|
||||
if (filter <= 4) return 2;
|
||||
if (filter <= 8) return 3;
|
||||
/* else */ return 4;
|
||||
}
|
||||
|
||||
#define R600_ERR(fmt, args...) \
|
||||
fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, ##args)
|
||||
|
||||
|
@@ -152,7 +152,7 @@ static void si_update_descriptors(struct si_context *sctx,
|
||||
7 + /* copy */
|
||||
(4 + desc->element_dw_size) * util_bitcount(desc->dirty_mask) + /* update */
|
||||
4; /* pointer update */
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
#if LLVM_SUPPORTS_GEOM_SHADERS
|
||||
if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
|
||||
desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0)
|
||||
desc->atom.num_dw += 4; /* second pointer update */
|
||||
@@ -177,7 +177,7 @@ static void si_emit_shader_pointer(struct si_context *sctx,
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, va >> 32);
|
||||
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
#if LLVM_SUPPORTS_GEOM_SHADERS
|
||||
if (desc->shader_userdata_reg >= R_00B130_SPI_SHADER_USER_DATA_VS_0 &&
|
||||
desc->shader_userdata_reg < R_00B230_SPI_SHADER_USER_DATA_GS_0) {
|
||||
radeon_emit(cs, PKT3(PKT3_SET_SH_REG, 2, 0));
|
||||
|
@@ -224,7 +224,7 @@ static int si_get_param(struct pipe_screen* pscreen, enum pipe_cap param)
|
||||
return 4;
|
||||
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL:
|
||||
return HAVE_LLVM >= 0x0305 ? 330 : 140;
|
||||
return (LLVM_SUPPORTS_GEOM_SHADERS) ? 330 : 140;
|
||||
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
return MIN2(sscreen->b.info.vram_size, 0xFFFFFFFF);
|
||||
@@ -308,7 +308,7 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
||||
case PIPE_SHADER_VERTEX:
|
||||
break;
|
||||
case PIPE_SHADER_GEOMETRY:
|
||||
#if HAVE_LLVM < 0x0305
|
||||
#if !(LLVM_SUPPORTS_GEOM_SHADERS)
|
||||
return 0;
|
||||
#endif
|
||||
break;
|
||||
|
@@ -39,6 +39,10 @@
|
||||
|
||||
#define SI_MAX_DRAW_CS_DWORDS 18
|
||||
|
||||
#define LLVM_SUPPORTS_GEOM_SHADERS \
|
||||
((HAVE_LLVM >= 0x0305) || \
|
||||
(HAVE_LLVM == 0x0304 && LLVM_VERSION_PATCH >= 1))
|
||||
|
||||
struct si_pipe_compute;
|
||||
|
||||
struct si_screen {
|
||||
|
@@ -2173,7 +2173,7 @@ static void *si_create_fs_state(struct pipe_context *ctx,
|
||||
return si_create_shader_state(ctx, state, PIPE_SHADER_FRAGMENT);
|
||||
}
|
||||
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
#if LLVM_SUPPORTS_GEOM_SHADERS
|
||||
|
||||
static void *si_create_gs_state(struct pipe_context *ctx,
|
||||
const struct pipe_shader_state *state)
|
||||
@@ -2203,7 +2203,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
||||
sctx->vs_shader = sel;
|
||||
}
|
||||
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
#if LLVM_SUPPORTS_GEOM_SHADERS
|
||||
|
||||
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
@@ -2271,7 +2271,7 @@ static void si_delete_vs_shader(struct pipe_context *ctx, void *state)
|
||||
si_delete_shader_selector(ctx, sel);
|
||||
}
|
||||
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
#if LLVM_SUPPORTS_GEOM_SHADERS
|
||||
|
||||
static void si_delete_gs_shader(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
@@ -2599,16 +2599,15 @@ static void *si_create_sampler_state(struct pipe_context *ctx,
|
||||
rstate->val[0] = (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) |
|
||||
S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) |
|
||||
S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) |
|
||||
(state->max_anisotropy & 0x7) << 9 | /* XXX */
|
||||
r600_tex_aniso_filter(state->max_anisotropy) << 9 |
|
||||
S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) |
|
||||
S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) |
|
||||
aniso_flag_offset << 16 | /* XXX */
|
||||
S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map));
|
||||
rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) |
|
||||
S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)));
|
||||
rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) |
|
||||
S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter)) |
|
||||
S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter)) |
|
||||
S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter) | aniso_flag_offset) |
|
||||
S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter) | aniso_flag_offset) |
|
||||
S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter)));
|
||||
rstate->val[3] = S_008F3C_BORDER_COLOR_TYPE(border_color_type);
|
||||
|
||||
@@ -2767,7 +2766,7 @@ static void si_bind_vs_sampler_states(struct pipe_context *ctx, unsigned count,
|
||||
si_set_sampler_states(sctx, pm4, count, states,
|
||||
&sctx->samplers[PIPE_SHADER_VERTEX],
|
||||
R_00B130_SPI_SHADER_USER_DATA_VS_0);
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
#if LLVM_SUPPORTS_GEOM_SHADERS
|
||||
si_set_sampler_states(sctx, pm4, count, states,
|
||||
&sctx->samplers[PIPE_SHADER_VERTEX],
|
||||
R_00B330_SPI_SHADER_USER_DATA_ES_0);
|
||||
@@ -2999,7 +2998,7 @@ void si_init_state_functions(struct si_context *sctx)
|
||||
sctx->b.b.bind_fs_state = si_bind_ps_shader;
|
||||
sctx->b.b.delete_vs_state = si_delete_vs_shader;
|
||||
sctx->b.b.delete_fs_state = si_delete_ps_shader;
|
||||
#if HAVE_LLVM >= 0x0305
|
||||
#if LLVM_SUPPORTS_GEOM_SHADERS
|
||||
sctx->b.b.create_gs_state = si_create_gs_state;
|
||||
sctx->b.b.bind_gs_state = si_bind_gs_shader;
|
||||
sctx->b.b.delete_gs_state = si_delete_gs_shader;
|
||||
|
@@ -591,6 +591,9 @@ struct pipe_blit_info
|
||||
|
||||
boolean scissor_enable;
|
||||
struct pipe_scissor_state scissor;
|
||||
|
||||
boolean render_condition_enable; /**< whether to leave current render
|
||||
condition enabled */
|
||||
};
|
||||
|
||||
|
||||
|
@@ -42,8 +42,11 @@ namespace {
|
||||
device::device(clover::platform &platform, pipe_loader_device *ldev) :
|
||||
platform(platform), ldev(ldev) {
|
||||
pipe = pipe_loader_create_screen(ldev, PIPE_SEARCH_DIR);
|
||||
if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE))
|
||||
if (!pipe || !pipe->get_param(pipe, PIPE_CAP_COMPUTE)) {
|
||||
if (pipe)
|
||||
pipe->destroy(pipe);
|
||||
throw error(CL_INVALID_DEVICE);
|
||||
}
|
||||
}
|
||||
|
||||
device::~device() {
|
||||
|
@@ -223,7 +223,7 @@ XA_EXPORT int
|
||||
xa_copy_prepare(struct xa_context *ctx,
|
||||
struct xa_surface *dst, struct xa_surface *src)
|
||||
{
|
||||
if (src == dst || ctx->srf != NULL)
|
||||
if (src == dst)
|
||||
return -XA_ERR_INVAL;
|
||||
|
||||
if (src->tex->format != dst->tex->format) {
|
||||
|
@@ -48,7 +48,7 @@ AM_LDFLAGS = \
|
||||
-module \
|
||||
-no-undefined \
|
||||
-avoid-version \
|
||||
-Wl,--version-script=$(top_srcdir)/src/gallium/targets/egl-static/egl.sym
|
||||
-Wl,--version-script=$(top_srcdir)/src/gallium/targets/egl-static/egl.sym \
|
||||
$(GC_SECTIONS) \
|
||||
$(LD_NO_UNDEFINED)
|
||||
|
||||
|
@@ -62,11 +62,9 @@
|
||||
#include "program/prog_instruction.h"
|
||||
#include <limits>
|
||||
|
||||
#define f(x) join(x)
|
||||
#define join(x) x ## f
|
||||
#define M_PIf f(M_PI)
|
||||
#define M_PI_2f f(M_PI_2)
|
||||
#define M_PI_4f f(M_PI_4)
|
||||
#define M_PIf ((float) M_PI)
|
||||
#define M_PI_2f ((float) M_PI_2)
|
||||
#define M_PI_4f ((float) M_PI_4)
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
|
@@ -1319,6 +1319,13 @@ layout_qualifier_id:
|
||||
if (match_layout_qualifier("location", $1, state) == 0) {
|
||||
$$.flags.q.explicit_location = 1;
|
||||
|
||||
if ($$.flags.q.attribute == 1 &&
|
||||
state->ARB_explicit_attrib_location_warn) {
|
||||
_mesa_glsl_warning(& @1, state,
|
||||
"GL_ARB_explicit_attrib_location layout "
|
||||
"identifier `%s' used", $1);
|
||||
}
|
||||
|
||||
if ($3 >= 0) {
|
||||
$$.location = $3;
|
||||
} else {
|
||||
@@ -1426,10 +1433,6 @@ layout_qualifier_id:
|
||||
_mesa_glsl_error(& @1, state, "unrecognized layout identifier "
|
||||
"`%s'", $1);
|
||||
YYERROR;
|
||||
} else if (state->ARB_explicit_attrib_location_warn) {
|
||||
_mesa_glsl_warning(& @1, state,
|
||||
"GL_ARB_explicit_attrib_location layout "
|
||||
"identifier `%s' used", $1);
|
||||
}
|
||||
}
|
||||
| interface_block_layout_qualifier
|
||||
|
@@ -1092,11 +1092,11 @@ bool
|
||||
populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
|
||||
hash_table *consumer_inputs,
|
||||
hash_table *consumer_interface_inputs,
|
||||
ir_variable *consumer_inputs_with_locations[MAX_VARYING])
|
||||
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX])
|
||||
{
|
||||
memset(consumer_inputs_with_locations,
|
||||
0,
|
||||
sizeof(consumer_inputs_with_locations[0]) * MAX_VARYING);
|
||||
sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_MAX);
|
||||
|
||||
foreach_list(node, ir) {
|
||||
ir_variable *const input_var = ((ir_instruction *) node)->as_variable();
|
||||
@@ -1152,7 +1152,7 @@ get_matching_input(void *mem_ctx,
|
||||
const ir_variable *output_var,
|
||||
hash_table *consumer_inputs,
|
||||
hash_table *consumer_interface_inputs,
|
||||
ir_variable *consumer_inputs_with_locations[MAX_VARYING])
|
||||
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX])
|
||||
{
|
||||
ir_variable *input_var;
|
||||
|
||||
@@ -1277,7 +1277,7 @@ assign_varying_locations(struct gl_context *ctx,
|
||||
= hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
|
||||
hash_table *consumer_interface_inputs
|
||||
= hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare);
|
||||
ir_variable *consumer_inputs_with_locations[MAX_VARYING] = {
|
||||
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX] = {
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@@ -39,14 +39,14 @@ bool
|
||||
populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
|
||||
hash_table *consumer_inputs,
|
||||
hash_table *consumer_interface_inputs,
|
||||
ir_variable *consumer_inputs_with_locations[MAX_VARYING]);
|
||||
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]);
|
||||
|
||||
ir_variable *
|
||||
get_matching_input(void *mem_ctx,
|
||||
const ir_variable *output_var,
|
||||
hash_table *consumer_inputs,
|
||||
hash_table *consumer_interface_inputs,
|
||||
ir_variable *consumer_inputs_with_locations[MAX_VARYING]);
|
||||
ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]);
|
||||
}
|
||||
|
||||
class link_varyings : public ::testing::Test {
|
||||
@@ -70,7 +70,7 @@ public:
|
||||
hash_table *consumer_interface_inputs;
|
||||
|
||||
const glsl_type *simple_interface;
|
||||
ir_variable *junk[MAX_VARYING];
|
||||
ir_variable *junk[VARYING_SLOT_MAX];
|
||||
};
|
||||
|
||||
link_varyings::link_varyings()
|
||||
@@ -197,9 +197,8 @@ TEST_F(link_varyings, gl_ClipDistance)
|
||||
consumer_interface_inputs,
|
||||
junk));
|
||||
|
||||
EXPECT_EQ((void *) clipdistance,
|
||||
hash_table_find(consumer_inputs, "gl_ClipDistance"));
|
||||
EXPECT_EQ(1u, num_elements(consumer_inputs));
|
||||
EXPECT_EQ(clipdistance, junk[VARYING_SLOT_CLIP_DIST0]);
|
||||
EXPECT_TRUE(is_empty(consumer_inputs));
|
||||
EXPECT_TRUE(is_empty(consumer_interface_inputs));
|
||||
}
|
||||
|
||||
|
@@ -73,11 +73,15 @@ apple_visual_create_pfobj(CGLPixelFormatObj * pfobj, const struct glx_config * m
|
||||
GLint vsref = 0;
|
||||
CGLError error = 0;
|
||||
|
||||
/* Request an OpenGL 3.2 profile if one is available */
|
||||
if(apple_cgl.version_major > 1 || (apple_cgl.version_major == 1 && apple_cgl.version_minor >= 3)) {
|
||||
attr[numattr++] = kCGLPFAOpenGLProfile;
|
||||
attr[numattr++] = kCGLOGLPVersion_3_2_Core;
|
||||
}
|
||||
/* Request an OpenGL 3.2 profile if one is available and supported */
|
||||
attr[numattr++] = kCGLPFAOpenGLProfile;
|
||||
attr[numattr++] = kCGLOGLPVersion_3_2_Core;
|
||||
|
||||
/* Test for kCGLPFAOpenGLProfile support at runtime and roll it out if not supported */
|
||||
attr[numattr] = 0;
|
||||
error = apple_cgl.choose_pixel_format(attr, pfobj, &vsref);
|
||||
if (error == kCGLBadAttribute)
|
||||
numattr -= 2;
|
||||
|
||||
if (offscreen) {
|
||||
apple_glx_diagnostic
|
||||
|
@@ -141,10 +141,10 @@ fake_queryString(__DRIscreen *screen, int attribute, const char **val)
|
||||
}
|
||||
|
||||
static const __DRI2rendererQueryExtension rendererQueryExt = {
|
||||
.base = { __DRI2_RENDERER_QUERY, 1 },
|
||||
{ __DRI2_RENDERER_QUERY, 1 },
|
||||
|
||||
.queryInteger = fake_queryInteger,
|
||||
.queryString = fake_queryString
|
||||
fake_queryInteger,
|
||||
fake_queryString
|
||||
};
|
||||
|
||||
void dri2_query_renderer_string_test::SetUp()
|
||||
|
@@ -113,7 +113,7 @@ __glapi_gentable_set_remaining_noop(struct _glapi_table *disp) {
|
||||
|
||||
struct _glapi_table *
|
||||
_glapi_create_table_from_handle(void *handle, const char *symbol_prefix) {
|
||||
struct _glapi_table *disp = calloc(1, sizeof(struct _glapi_table));
|
||||
struct _glapi_table *disp = calloc(1, _glapi_get_dispatch_table_size() * sizeof(_glapi_proc));
|
||||
char symboln[512];
|
||||
|
||||
if(!disp)
|
||||
|
@@ -86,6 +86,9 @@
|
||||
/** Return offset in bytes of the field within a vertex struct */
|
||||
#define OFFSET(FIELD) ((void *) offsetof(struct vertex, FIELD))
|
||||
|
||||
static void
|
||||
meta_clear(struct gl_context *ctx, GLbitfield buffers, bool glsl);
|
||||
|
||||
static struct blit_shader *
|
||||
choose_blit_shader(GLenum target, struct blit_shader_table *table);
|
||||
|
||||
@@ -201,6 +204,31 @@ _mesa_meta_link_program_with_debug(struct gl_context *ctx, GLuint program)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void
|
||||
_mesa_meta_compile_and_link_program(struct gl_context *ctx,
|
||||
const char *vs_source,
|
||||
const char *fs_source,
|
||||
const char *name,
|
||||
GLuint *program)
|
||||
{
|
||||
GLuint vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER,
|
||||
vs_source);
|
||||
GLuint fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER,
|
||||
fs_source);
|
||||
|
||||
*program = _mesa_CreateProgram();
|
||||
_mesa_AttachShader(*program, fs);
|
||||
_mesa_DeleteShader(fs);
|
||||
_mesa_AttachShader(*program, vs);
|
||||
_mesa_DeleteShader(vs);
|
||||
_mesa_BindAttribLocation(*program, 0, "position");
|
||||
_mesa_BindAttribLocation(*program, 1, "texcoords");
|
||||
_mesa_meta_link_program_with_debug(ctx, *program);
|
||||
_mesa_ObjectLabel(GL_PROGRAM, *program, -1, name);
|
||||
|
||||
_mesa_UseProgram(*program);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate a generic shader to blit from a texture to a framebuffer
|
||||
*
|
||||
@@ -214,12 +242,25 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
|
||||
GLenum target,
|
||||
struct blit_shader_table *table)
|
||||
{
|
||||
const char *vs_source;
|
||||
char *fs_source;
|
||||
GLuint vs, fs;
|
||||
char *vs_source, *fs_source;
|
||||
void *const mem_ctx = ralloc_context(NULL);
|
||||
struct blit_shader *shader = choose_blit_shader(target, table);
|
||||
char *name;
|
||||
const char *vs_input, *vs_output, *fs_input, *vs_preprocess, *fs_preprocess;
|
||||
|
||||
if (ctx->Const.GLSLVersion < 130) {
|
||||
vs_preprocess = "";
|
||||
vs_input = "attribute";
|
||||
vs_output = "varying";
|
||||
fs_preprocess = "#extension GL_EXT_texture_array : enable";
|
||||
fs_input = "varying";
|
||||
} else {
|
||||
vs_preprocess = "#version 130";
|
||||
vs_input = "in";
|
||||
vs_output = "out";
|
||||
fs_preprocess = "#version 130";
|
||||
fs_input = "in";
|
||||
shader->func = "texture";
|
||||
}
|
||||
|
||||
assert(shader != NULL);
|
||||
|
||||
@@ -228,73 +269,36 @@ _mesa_meta_setup_blit_shader(struct gl_context *ctx,
|
||||
return;
|
||||
}
|
||||
|
||||
if (ctx->Const.GLSLVersion < 130) {
|
||||
vs_source =
|
||||
"attribute vec2 position;\n"
|
||||
"attribute vec4 textureCoords;\n"
|
||||
"varying vec4 texCoords;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" texCoords = textureCoords;\n"
|
||||
" gl_Position = vec4(position, 0.0, 1.0);\n"
|
||||
"}\n";
|
||||
vs_source = ralloc_asprintf(mem_ctx,
|
||||
"%s\n"
|
||||
"%s vec2 position;\n"
|
||||
"%s vec4 textureCoords;\n"
|
||||
"%s vec4 texCoords;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" texCoords = textureCoords;\n"
|
||||
" gl_Position = vec4(position, 0.0, 1.0);\n"
|
||||
"}\n",
|
||||
vs_preprocess, vs_input, vs_input, vs_output);
|
||||
|
||||
fs_source = ralloc_asprintf(mem_ctx,
|
||||
"#extension GL_EXT_texture_array : enable\n"
|
||||
"#extension GL_ARB_texture_cube_map_array: enable\n"
|
||||
"uniform %s texSampler;\n"
|
||||
"varying vec4 texCoords;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_FragColor = %s(texSampler, %s);\n"
|
||||
" gl_FragDepth = gl_FragColor.x;\n"
|
||||
"}\n",
|
||||
shader->type,
|
||||
shader->func, shader->texcoords);
|
||||
}
|
||||
else {
|
||||
vs_source = ralloc_asprintf(mem_ctx,
|
||||
"#version 130\n"
|
||||
"in vec2 position;\n"
|
||||
"in vec4 textureCoords;\n"
|
||||
"out vec4 texCoords;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" texCoords = textureCoords;\n"
|
||||
" gl_Position = vec4(position, 0.0, 1.0);\n"
|
||||
"}\n");
|
||||
fs_source = ralloc_asprintf(mem_ctx,
|
||||
"#version 130\n"
|
||||
"#extension GL_ARB_texture_cube_map_array: enable\n"
|
||||
"uniform %s texSampler;\n"
|
||||
"in vec4 texCoords;\n"
|
||||
"out vec4 out_color;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" out_color = texture(texSampler, %s);\n"
|
||||
" gl_FragDepth = out_color.x;\n"
|
||||
"}\n",
|
||||
shader->type,
|
||||
shader->texcoords);
|
||||
}
|
||||
fs_source = ralloc_asprintf(mem_ctx,
|
||||
"%s\n"
|
||||
"#extension GL_ARB_texture_cube_map_array: enable\n"
|
||||
"uniform %s texSampler;\n"
|
||||
"%s vec4 texCoords;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_FragColor = %s(texSampler, %s);\n"
|
||||
" gl_FragDepth = gl_FragColor.x;\n"
|
||||
"}\n",
|
||||
fs_preprocess, shader->type, fs_input,
|
||||
shader->func, shader->texcoords);
|
||||
|
||||
vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, vs_source);
|
||||
fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, fs_source);
|
||||
|
||||
shader->shader_prog = _mesa_CreateProgram();
|
||||
_mesa_AttachShader(shader->shader_prog, fs);
|
||||
_mesa_DeleteShader(fs);
|
||||
_mesa_AttachShader(shader->shader_prog, vs);
|
||||
_mesa_DeleteShader(vs);
|
||||
_mesa_BindAttribLocation(shader->shader_prog, 0, "position");
|
||||
_mesa_BindAttribLocation(shader->shader_prog, 1, "texcoords");
|
||||
_mesa_meta_link_program_with_debug(ctx, shader->shader_prog);
|
||||
name = ralloc_asprintf(mem_ctx, "%s blit", shader->type);
|
||||
_mesa_ObjectLabel(GL_PROGRAM, shader->shader_prog, -1, name);
|
||||
_mesa_meta_compile_and_link_program(ctx, vs_source, fs_source,
|
||||
ralloc_asprintf(mem_ctx, "%s blit",
|
||||
shader->type),
|
||||
&shader->shader_prog);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
_mesa_UseProgram(shader->shader_prog);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -389,6 +393,24 @@ _mesa_meta_init(struct gl_context *ctx)
|
||||
ctx->Meta = CALLOC_STRUCT(gl_meta_state);
|
||||
}
|
||||
|
||||
static GLenum
|
||||
gl_buffer_index_to_drawbuffers_enum(gl_buffer_index bufindex)
|
||||
{
|
||||
assert(bufindex < BUFFER_COUNT);
|
||||
|
||||
if (bufindex >= BUFFER_COLOR0)
|
||||
return GL_COLOR_ATTACHMENT0 + bufindex - BUFFER_COLOR0;
|
||||
else if (bufindex == BUFFER_FRONT_LEFT)
|
||||
return GL_FRONT_LEFT;
|
||||
else if (bufindex == BUFFER_FRONT_RIGHT)
|
||||
return GL_FRONT_RIGHT;
|
||||
else if (bufindex == BUFFER_BACK_LEFT)
|
||||
return GL_BACK_LEFT;
|
||||
else if (bufindex == BUFFER_BACK_RIGHT)
|
||||
return GL_BACK_RIGHT;
|
||||
|
||||
return GL_NONE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free context meta-op state.
|
||||
@@ -775,6 +797,23 @@ _mesa_meta_begin(struct gl_context *ctx, GLbitfield state)
|
||||
_mesa_set_framebuffer_srgb(ctx, GL_FALSE);
|
||||
}
|
||||
|
||||
if (state & MESA_META_DRAW_BUFFERS) {
|
||||
int buf, real_color_buffers = 0;
|
||||
memset(save->ColorDrawBuffers, 0, sizeof(save->ColorDrawBuffers));
|
||||
|
||||
for (buf = 0; buf < MAX_DRAW_BUFFERS; buf++) {
|
||||
int buf_index = ctx->DrawBuffer->_ColorDrawBufferIndexes[buf];
|
||||
if (buf_index == -1)
|
||||
continue;
|
||||
|
||||
save->ColorDrawBuffers[buf] =
|
||||
gl_buffer_index_to_drawbuffers_enum(buf_index);
|
||||
|
||||
if (++real_color_buffers >= ctx->DrawBuffer->_NumColorDrawBuffers)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* misc */
|
||||
{
|
||||
save->Lighting = ctx->Light.Enabled;
|
||||
@@ -1173,6 +1212,10 @@ _mesa_meta_end(struct gl_context *ctx)
|
||||
ctx->CurrentRenderbuffer->Name != save->RenderbufferName)
|
||||
_mesa_BindRenderbuffer(GL_RENDERBUFFER, save->RenderbufferName);
|
||||
|
||||
if (state & MESA_META_DRAW_BUFFERS) {
|
||||
_mesa_DrawBuffers(MAX_DRAW_BUFFERS, save->ColorDrawBuffers);
|
||||
}
|
||||
|
||||
ctx->Meta->SaveStackDepth--;
|
||||
|
||||
ctx->API = save->API;
|
||||
@@ -1459,100 +1502,13 @@ _mesa_meta_setup_ff_tnl_for_blit(GLuint *VAO, GLuint *VBO,
|
||||
void
|
||||
_mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
{
|
||||
struct clear_state *clear = &ctx->Meta->Clear;
|
||||
struct vertex verts[4];
|
||||
/* save all state but scissor, pixel pack/unpack */
|
||||
GLbitfield metaSave = (MESA_META_ALL -
|
||||
MESA_META_SCISSOR -
|
||||
MESA_META_PIXEL_STORE -
|
||||
MESA_META_CONDITIONAL_RENDER -
|
||||
MESA_META_FRAMEBUFFER_SRGB);
|
||||
const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
|
||||
meta_clear(ctx, buffers, false);
|
||||
}
|
||||
|
||||
if (buffers & BUFFER_BITS_COLOR) {
|
||||
/* if clearing color buffers, don't save/restore colormask */
|
||||
metaSave -= MESA_META_COLOR_MASK;
|
||||
}
|
||||
|
||||
_mesa_meta_begin(ctx, metaSave);
|
||||
|
||||
_mesa_meta_setup_vertex_objects(&clear->VAO, &clear->VBO, false, 3, 0, 4);
|
||||
|
||||
/* GL_COLOR_BUFFER_BIT */
|
||||
if (buffers & BUFFER_BITS_COLOR) {
|
||||
/* leave colormask, glDrawBuffer state as-is */
|
||||
|
||||
/* Clears never have the color clamped. */
|
||||
if (ctx->Extensions.ARB_color_buffer_float)
|
||||
_mesa_ClampColor(GL_CLAMP_FRAGMENT_COLOR, GL_FALSE);
|
||||
}
|
||||
else {
|
||||
ASSERT(metaSave & MESA_META_COLOR_MASK);
|
||||
_mesa_ColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE);
|
||||
}
|
||||
|
||||
/* GL_DEPTH_BUFFER_BIT */
|
||||
if (buffers & BUFFER_BIT_DEPTH) {
|
||||
_mesa_set_enable(ctx, GL_DEPTH_TEST, GL_TRUE);
|
||||
_mesa_DepthFunc(GL_ALWAYS);
|
||||
_mesa_DepthMask(GL_TRUE);
|
||||
}
|
||||
else {
|
||||
assert(!ctx->Depth.Test);
|
||||
}
|
||||
|
||||
/* GL_STENCIL_BUFFER_BIT */
|
||||
if (buffers & BUFFER_BIT_STENCIL) {
|
||||
_mesa_set_enable(ctx, GL_STENCIL_TEST, GL_TRUE);
|
||||
_mesa_StencilOpSeparate(GL_FRONT_AND_BACK,
|
||||
GL_REPLACE, GL_REPLACE, GL_REPLACE);
|
||||
_mesa_StencilFuncSeparate(GL_FRONT_AND_BACK, GL_ALWAYS,
|
||||
ctx->Stencil.Clear & stencilMax,
|
||||
ctx->Stencil.WriteMask[0]);
|
||||
}
|
||||
else {
|
||||
assert(!ctx->Stencil.Enabled);
|
||||
}
|
||||
|
||||
/* vertex positions/colors */
|
||||
{
|
||||
const GLfloat x0 = (GLfloat) ctx->DrawBuffer->_Xmin;
|
||||
const GLfloat y0 = (GLfloat) ctx->DrawBuffer->_Ymin;
|
||||
const GLfloat x1 = (GLfloat) ctx->DrawBuffer->_Xmax;
|
||||
const GLfloat y1 = (GLfloat) ctx->DrawBuffer->_Ymax;
|
||||
const GLfloat z = invert_z(ctx->Depth.Clear);
|
||||
GLuint i;
|
||||
|
||||
verts[0].x = x0;
|
||||
verts[0].y = y0;
|
||||
verts[0].z = z;
|
||||
verts[1].x = x1;
|
||||
verts[1].y = y0;
|
||||
verts[1].z = z;
|
||||
verts[2].x = x1;
|
||||
verts[2].y = y1;
|
||||
verts[2].z = z;
|
||||
verts[3].x = x0;
|
||||
verts[3].y = y1;
|
||||
verts[3].z = z;
|
||||
|
||||
/* vertex colors */
|
||||
for (i = 0; i < 4; i++) {
|
||||
verts[i].r = ctx->Color.ClearColor.f[0];
|
||||
verts[i].g = ctx->Color.ClearColor.f[1];
|
||||
verts[i].b = ctx->Color.ClearColor.f[2];
|
||||
verts[i].a = ctx->Color.ClearColor.f[3];
|
||||
}
|
||||
|
||||
/* upload new vertex data */
|
||||
_mesa_BufferData(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts,
|
||||
GL_DYNAMIC_DRAW_ARB);
|
||||
}
|
||||
|
||||
/* draw quad */
|
||||
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
|
||||
_mesa_meta_end(ctx);
|
||||
void
|
||||
_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
{
|
||||
meta_clear(ctx, buffers, true);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1699,22 +1655,61 @@ meta_glsl_clear_cleanup(struct clear_state *clear)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a bitfield of BUFFER_BIT_x draw buffers, call glDrawBuffers to
|
||||
* set GL to only draw to those buffers.
|
||||
*
|
||||
* Since the bitfield has no associated order, the assignment of draw buffer
|
||||
* indices to color attachment indices is rather arbitrary.
|
||||
*/
|
||||
static void
|
||||
drawbuffers_from_bitfield(GLbitfield bits)
|
||||
{
|
||||
GLenum enums[MAX_DRAW_BUFFERS];
|
||||
int i = 0;
|
||||
int n;
|
||||
|
||||
/* This function is only legal for color buffer bitfields. */
|
||||
assert((bits & ~BUFFER_BITS_COLOR) == 0);
|
||||
|
||||
/* Make sure we don't overflow any arrays. */
|
||||
assert(_mesa_bitcount(bits) <= MAX_DRAW_BUFFERS);
|
||||
|
||||
enums[0] = GL_NONE;
|
||||
|
||||
if (bits & BUFFER_BIT_FRONT_LEFT)
|
||||
enums[i++] = GL_FRONT_LEFT;
|
||||
|
||||
if (bits & BUFFER_BIT_FRONT_RIGHT)
|
||||
enums[i++] = GL_FRONT_RIGHT;
|
||||
|
||||
if (bits & BUFFER_BIT_BACK_LEFT)
|
||||
enums[i++] = GL_BACK_LEFT;
|
||||
|
||||
if (bits & BUFFER_BIT_BACK_RIGHT)
|
||||
enums[i++] = GL_BACK_RIGHT;
|
||||
|
||||
for (n = 0; n < MAX_COLOR_ATTACHMENTS; n++) {
|
||||
if (bits & (1 << (BUFFER_COLOR0 + n)))
|
||||
enums[i++] = GL_COLOR_ATTACHMENT0 + n;
|
||||
}
|
||||
|
||||
_mesa_DrawBuffers(i, enums);
|
||||
}
|
||||
|
||||
/**
|
||||
* Meta implementation of ctx->Driver.Clear() in terms of polygon rendering.
|
||||
*/
|
||||
void
|
||||
_mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
static void
|
||||
meta_clear(struct gl_context *ctx, GLbitfield buffers, bool glsl)
|
||||
{
|
||||
struct clear_state *clear = &ctx->Meta->Clear;
|
||||
GLbitfield metaSave;
|
||||
const GLuint stencilMax = (1 << ctx->DrawBuffer->Visual.stencilBits) - 1;
|
||||
struct gl_framebuffer *fb = ctx->DrawBuffer;
|
||||
const float x0 = ((float)fb->_Xmin / fb->Width) * 2.0f - 1.0f;
|
||||
const float y0 = ((float)fb->_Ymin / fb->Height) * 2.0f - 1.0f;
|
||||
const float x1 = ((float)fb->_Xmax / fb->Width) * 2.0f - 1.0f;
|
||||
const float y1 = ((float)fb->_Ymax / fb->Height) * 2.0f - 1.0f;
|
||||
const float z = -invert_z(ctx->Depth.Clear);
|
||||
float x0, y0, x1, y1, z;
|
||||
struct vertex verts[4];
|
||||
int i;
|
||||
|
||||
metaSave = (MESA_META_ALPHA_TEST |
|
||||
MESA_META_BLEND |
|
||||
@@ -1729,7 +1724,18 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
MESA_META_MULTISAMPLE |
|
||||
MESA_META_OCCLUSION_QUERY);
|
||||
|
||||
if (!(buffers & BUFFER_BITS_COLOR)) {
|
||||
if (!glsl) {
|
||||
metaSave |= MESA_META_FOG |
|
||||
MESA_META_PIXEL_TRANSFER |
|
||||
MESA_META_TRANSFORM |
|
||||
MESA_META_TEXTURE |
|
||||
MESA_META_CLAMP_VERTEX_COLOR |
|
||||
MESA_META_SELECT_FEEDBACK;
|
||||
}
|
||||
|
||||
if (buffers & BUFFER_BITS_COLOR) {
|
||||
metaSave |= MESA_META_DRAW_BUFFERS;
|
||||
} else {
|
||||
/* We'll use colormask to disable color writes. Otherwise,
|
||||
* respect color mask
|
||||
*/
|
||||
@@ -1738,13 +1744,30 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
|
||||
_mesa_meta_begin(ctx, metaSave);
|
||||
|
||||
meta_glsl_clear_init(ctx, clear);
|
||||
if (glsl) {
|
||||
meta_glsl_clear_init(ctx, clear);
|
||||
|
||||
x0 = ((float) fb->_Xmin / fb->Width) * 2.0f - 1.0f;
|
||||
y0 = ((float) fb->_Ymin / fb->Height) * 2.0f - 1.0f;
|
||||
x1 = ((float) fb->_Xmax / fb->Width) * 2.0f - 1.0f;
|
||||
y1 = ((float) fb->_Ymax / fb->Height) * 2.0f - 1.0f;
|
||||
z = -invert_z(ctx->Depth.Clear);
|
||||
} else {
|
||||
_mesa_meta_setup_vertex_objects(&clear->VAO, &clear->VBO, false, 3, 0, 4);
|
||||
|
||||
x0 = (float) fb->_Xmin;
|
||||
y0 = (float) fb->_Ymin;
|
||||
x1 = (float) fb->_Xmax;
|
||||
y1 = (float) fb->_Ymax;
|
||||
z = invert_z(ctx->Depth.Clear);
|
||||
}
|
||||
|
||||
if (fb->_IntegerColor) {
|
||||
assert(glsl);
|
||||
_mesa_UseProgram(clear->IntegerShaderProg);
|
||||
_mesa_Uniform4iv(clear->IntegerColorLocation, 1,
|
||||
ctx->Color.ClearColor.i);
|
||||
} else {
|
||||
} else if (glsl) {
|
||||
_mesa_UseProgram(clear->ShaderProg);
|
||||
_mesa_Uniform4fv(clear->ColorLocation, 1,
|
||||
ctx->Color.ClearColor.f);
|
||||
@@ -1752,7 +1775,10 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
|
||||
/* GL_COLOR_BUFFER_BIT */
|
||||
if (buffers & BUFFER_BITS_COLOR) {
|
||||
/* leave colormask, glDrawBuffer state as-is */
|
||||
/* Only draw to the buffers we were asked to clear. */
|
||||
drawbuffers_from_bitfield(buffers & BUFFER_BITS_COLOR);
|
||||
|
||||
/* leave colormask state as-is */
|
||||
|
||||
/* Clears never have the color clamped. */
|
||||
if (ctx->Extensions.ARB_color_buffer_float)
|
||||
@@ -1800,6 +1826,15 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
verts[3].y = y1;
|
||||
verts[3].z = z;
|
||||
|
||||
if (!glsl) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
verts[i].r = ctx->Color.ClearColor.f[0];
|
||||
verts[i].g = ctx->Color.ClearColor.f[1];
|
||||
verts[i].b = ctx->Color.ClearColor.f[2];
|
||||
verts[i].a = ctx->Color.ClearColor.f[3];
|
||||
}
|
||||
}
|
||||
|
||||
/* upload new vertex data */
|
||||
_mesa_BufferData(GL_ARRAY_BUFFER_ARB, sizeof(verts), verts,
|
||||
GL_DYNAMIC_DRAW_ARB);
|
||||
@@ -1807,6 +1842,7 @@ _mesa_meta_glsl_Clear(struct gl_context *ctx, GLbitfield buffers)
|
||||
/* draw quad(s) */
|
||||
if (fb->MaxNumLayers > 0) {
|
||||
unsigned layer;
|
||||
assert(glsl);
|
||||
for (layer = 0; layer < fb->MaxNumLayers; layer++) {
|
||||
if (fb->_IntegerColor)
|
||||
_mesa_Uniform1i(clear->IntegerLayerLocation, layer);
|
||||
@@ -2774,7 +2810,7 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims,
|
||||
|
||||
_mesa_unlock_texture(ctx, texObj);
|
||||
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL);
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_DRAW_BUFFERS);
|
||||
|
||||
_mesa_GenFramebuffers(1, &fbo);
|
||||
_mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
|
||||
@@ -2812,13 +2848,13 @@ copytexsubimage_using_blit_framebuffer(struct gl_context *ctx, GLuint dims,
|
||||
* are too strict for CopyTexImage. We know meta will be fine with format
|
||||
* changes.
|
||||
*/
|
||||
_mesa_meta_BlitFramebuffer(ctx, x, y,
|
||||
x + width, y + height,
|
||||
xoffset, yoffset,
|
||||
xoffset + width, yoffset + height,
|
||||
mask, GL_NEAREST);
|
||||
mask = _mesa_meta_BlitFramebuffer(ctx, x, y,
|
||||
x + width, y + height,
|
||||
xoffset, yoffset,
|
||||
xoffset + width, yoffset + height,
|
||||
mask, GL_NEAREST);
|
||||
ctx->Meta->Blit.no_ctsi_fallback = false;
|
||||
success = true;
|
||||
success = mask == 0x0;
|
||||
|
||||
out:
|
||||
_mesa_lock_texture(ctx, texObj);
|
||||
@@ -2996,7 +3032,8 @@ decompress_texture_image(struct gl_context *ctx,
|
||||
break;
|
||||
}
|
||||
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_PIXEL_STORE);
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL & ~(MESA_META_PIXEL_STORE |
|
||||
MESA_META_DRAW_BUFFERS));
|
||||
|
||||
samplerSave = ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
|
||||
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
|
||||
|
@@ -58,6 +58,7 @@
|
||||
#define MESA_META_MULTISAMPLE 0x100000
|
||||
#define MESA_META_FRAMEBUFFER_SRGB 0x200000
|
||||
#define MESA_META_OCCLUSION_QUERY 0x400000
|
||||
#define MESA_META_DRAW_BUFFERS 0x800000
|
||||
/**\}*/
|
||||
|
||||
/**
|
||||
@@ -180,6 +181,9 @@ struct save_state
|
||||
GLboolean TransformFeedbackNeedsResume;
|
||||
|
||||
GLuint DrawBufferName, ReadBufferName, RenderbufferName;
|
||||
|
||||
/** MESA_META_DRAW_BUFFERS */
|
||||
GLenum ColorDrawBuffers[MAX_DRAW_BUFFERS];
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -263,6 +267,13 @@ struct blit_state
|
||||
bool no_ctsi_fallback;
|
||||
};
|
||||
|
||||
struct fb_tex_blit_state
|
||||
{
|
||||
GLint baseLevelSave, maxLevelSave;
|
||||
GLuint sampler, samplerSave;
|
||||
GLuint tempTex;
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* State for glClear()
|
||||
@@ -392,11 +403,39 @@ extern GLboolean
|
||||
_mesa_meta_in_progress(struct gl_context *ctx);
|
||||
|
||||
extern void
|
||||
_mesa_meta_fb_tex_blit_begin(const struct gl_context *ctx,
|
||||
struct fb_tex_blit_state *blit);
|
||||
|
||||
extern void
|
||||
_mesa_meta_fb_tex_blit_end(const struct gl_context *ctx, GLenum target,
|
||||
struct fb_tex_blit_state *blit);
|
||||
|
||||
extern GLboolean
|
||||
_mesa_meta_bind_rb_as_tex_image(struct gl_context *ctx,
|
||||
struct gl_renderbuffer *rb,
|
||||
GLuint *tex,
|
||||
struct gl_texture_object **texObj,
|
||||
GLenum *target);
|
||||
|
||||
GLuint
|
||||
_mesa_meta_setup_sampler(struct gl_context *ctx,
|
||||
const struct gl_texture_object *texObj,
|
||||
GLenum target, GLenum filter, GLuint srcLevel);
|
||||
|
||||
extern GLbitfield
|
||||
_mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||||
GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
|
||||
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
|
||||
GLbitfield mask, GLenum filter);
|
||||
|
||||
extern void
|
||||
_mesa_meta_and_swrast_BlitFramebuffer(struct gl_context *ctx,
|
||||
GLint srcX0, GLint srcY0,
|
||||
GLint srcX1, GLint srcY1,
|
||||
GLint dstX0, GLint dstY0,
|
||||
GLint dstX1, GLint dstY1,
|
||||
GLbitfield mask, GLenum filter);
|
||||
|
||||
extern void
|
||||
_mesa_meta_Clear(struct gl_context *ctx, GLbitfield buffers);
|
||||
|
||||
@@ -451,6 +490,13 @@ _mesa_meta_compile_shader_with_debug(struct gl_context *ctx, GLenum target,
|
||||
GLuint
|
||||
_mesa_meta_link_program_with_debug(struct gl_context *ctx, GLuint program);
|
||||
|
||||
void
|
||||
_mesa_meta_compile_and_link_program(struct gl_context *ctx,
|
||||
const char *vs_source,
|
||||
const char *fs_source,
|
||||
const char *name,
|
||||
GLuint *program);
|
||||
|
||||
GLboolean
|
||||
_mesa_meta_alloc_texture(struct temp_texture *tex,
|
||||
GLsizei width, GLsizei height, GLenum intFormat);
|
||||
|
@@ -62,7 +62,6 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
|
||||
{
|
||||
const char *vs_source;
|
||||
char *fs_source;
|
||||
GLuint vs, fs;
|
||||
void *mem_ctx;
|
||||
enum blit_msaa_shader shader_index;
|
||||
bool dst_is_msaa = false;
|
||||
@@ -314,21 +313,10 @@ setup_glsl_msaa_blit_shader(struct gl_context *ctx,
|
||||
sample_resolve);
|
||||
}
|
||||
|
||||
vs = _mesa_meta_compile_shader_with_debug(ctx, GL_VERTEX_SHADER, vs_source);
|
||||
fs = _mesa_meta_compile_shader_with_debug(ctx, GL_FRAGMENT_SHADER, fs_source);
|
||||
_mesa_meta_compile_and_link_program(ctx, vs_source, fs_source, name,
|
||||
&blit->msaa_shaders[shader_index]);
|
||||
|
||||
blit->msaa_shaders[shader_index] = _mesa_CreateProgram();
|
||||
_mesa_AttachShader(blit->msaa_shaders[shader_index], fs);
|
||||
_mesa_DeleteShader(fs);
|
||||
_mesa_AttachShader(blit->msaa_shaders[shader_index], vs);
|
||||
_mesa_DeleteShader(vs);
|
||||
_mesa_BindAttribLocation(blit->msaa_shaders[shader_index], 0, "position");
|
||||
_mesa_BindAttribLocation(blit->msaa_shaders[shader_index], 1, "texcoords");
|
||||
_mesa_meta_link_program_with_debug(ctx, blit->msaa_shaders[shader_index]);
|
||||
_mesa_ObjectLabel(GL_PROGRAM, blit->msaa_shaders[shader_index], -1, name);
|
||||
ralloc_free(mem_ctx);
|
||||
|
||||
_mesa_UseProgram(blit->msaa_shaders[shader_index]);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -340,7 +328,10 @@ setup_glsl_blit_framebuffer(struct gl_context *ctx,
|
||||
/* target = GL_TEXTURE_RECTANGLE is not supported in GLES 3.0 */
|
||||
assert(_mesa_is_desktop_gl(ctx) || target == GL_TEXTURE_2D);
|
||||
|
||||
_mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true, 2, 2, 0);
|
||||
unsigned texcoord_size = 2 + (src_rb->Depth > 1 ? 1 : 0);
|
||||
|
||||
_mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true,
|
||||
2, texcoord_size, 0);
|
||||
|
||||
if (target == GL_TEXTURE_2D_MULTISAMPLE ||
|
||||
target == GL_TEXTURE_2D_MULTISAMPLE_ARRAY) {
|
||||
@@ -368,19 +359,14 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
const struct gl_renderbuffer_attachment *readAtt =
|
||||
&readFb->Attachment[att_index];
|
||||
struct blit_state *blit = &ctx->Meta->Blit;
|
||||
struct fb_tex_blit_state fb_tex_blit;
|
||||
const GLint dstX = MIN2(dstX0, dstX1);
|
||||
const GLint dstY = MIN2(dstY0, dstY1);
|
||||
const GLint dstW = abs(dstX1 - dstX0);
|
||||
const GLint dstH = abs(dstY1 - dstY0);
|
||||
struct gl_texture_object *texObj;
|
||||
GLuint srcLevel;
|
||||
GLint baseLevelSave;
|
||||
GLint maxLevelSave;
|
||||
GLenum target;
|
||||
GLuint sampler, samplerSave =
|
||||
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
|
||||
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
|
||||
GLuint tempTex = 0;
|
||||
struct gl_renderbuffer *rb = readAtt->Renderbuffer;
|
||||
struct temp_texture *meta_temp_texture;
|
||||
|
||||
@@ -392,6 +378,8 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
filter = GL_LINEAR;
|
||||
}
|
||||
|
||||
_mesa_meta_fb_tex_blit_begin(ctx, &fb_tex_blit);
|
||||
|
||||
if (readAtt->Texture &&
|
||||
(readAtt->Texture->Target == GL_TEXTURE_2D ||
|
||||
readAtt->Texture->Target == GL_TEXTURE_RECTANGLE ||
|
||||
@@ -404,38 +392,16 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
texObj = readAtt->Texture;
|
||||
target = texObj->Target;
|
||||
} else if (!readAtt->Texture && ctx->Driver.BindRenderbufferTexImage) {
|
||||
/* Otherwise, we need the driver to be able to bind a renderbuffer as
|
||||
* a texture image.
|
||||
*/
|
||||
struct gl_texture_image *texImage;
|
||||
|
||||
if (rb->NumSamples > 1)
|
||||
target = GL_TEXTURE_2D_MULTISAMPLE;
|
||||
else
|
||||
target = GL_TEXTURE_2D;
|
||||
|
||||
_mesa_GenTextures(1, &tempTex);
|
||||
_mesa_BindTexture(target, tempTex);
|
||||
srcLevel = 0;
|
||||
texObj = _mesa_lookup_texture(ctx, tempTex);
|
||||
texImage = _mesa_get_tex_image(ctx, texObj, target, srcLevel);
|
||||
|
||||
if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, texImage)) {
|
||||
_mesa_DeleteTextures(1, &tempTex);
|
||||
if (!_mesa_meta_bind_rb_as_tex_image(ctx, rb, &fb_tex_blit.tempTex,
|
||||
&texObj, &target))
|
||||
return false;
|
||||
} else {
|
||||
if (ctx->Driver.FinishRenderTexture &&
|
||||
!rb->NeedsFinishRenderTexture) {
|
||||
rb->NeedsFinishRenderTexture = true;
|
||||
ctx->Driver.FinishRenderTexture(ctx, rb);
|
||||
}
|
||||
|
||||
if (_mesa_is_winsys_fbo(readFb)) {
|
||||
GLint temp = srcY0;
|
||||
srcY0 = rb->Height - srcY1;
|
||||
srcY1 = rb->Height - temp;
|
||||
flipY = -flipY;
|
||||
}
|
||||
srcLevel = 0;
|
||||
if (_mesa_is_winsys_fbo(readFb)) {
|
||||
GLint temp = srcY0;
|
||||
srcY0 = rb->Height - srcY1;
|
||||
srcY1 = rb->Height - temp;
|
||||
flipY = -flipY;
|
||||
}
|
||||
} else {
|
||||
GLenum tex_base_format;
|
||||
@@ -476,8 +442,8 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
srcY1 = srcH;
|
||||
}
|
||||
|
||||
baseLevelSave = texObj->BaseLevel;
|
||||
maxLevelSave = texObj->MaxLevel;
|
||||
fb_tex_blit.baseLevelSave = texObj->BaseLevel;
|
||||
fb_tex_blit.maxLevelSave = texObj->MaxLevel;
|
||||
|
||||
if (glsl_version) {
|
||||
setup_glsl_blit_framebuffer(ctx, blit, rb, target);
|
||||
@@ -488,25 +454,14 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
2);
|
||||
}
|
||||
|
||||
_mesa_GenSamplers(1, &sampler);
|
||||
_mesa_BindSampler(ctx->Texture.CurrentUnit, sampler);
|
||||
|
||||
/*
|
||||
printf("Blit from texture!\n");
|
||||
printf(" srcAtt %p dstAtt %p\n", readAtt, drawAtt);
|
||||
printf(" srcTex %p dstText %p\n", texObj, drawAtt->Texture);
|
||||
*/
|
||||
|
||||
/* Prepare src texture state */
|
||||
_mesa_BindTexture(target, texObj->Name);
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, filter);
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, filter);
|
||||
if (target != GL_TEXTURE_RECTANGLE_ARB) {
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel);
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);
|
||||
}
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
fb_tex_blit.sampler = _mesa_meta_setup_sampler(ctx, texObj, target, filter,
|
||||
srcLevel);
|
||||
|
||||
/* Always do our blits with no net sRGB decode or encode.
|
||||
*
|
||||
@@ -527,11 +482,12 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
if (ctx->Extensions.EXT_texture_sRGB_decode) {
|
||||
if (_mesa_get_format_color_encoding(rb->Format) == GL_SRGB &&
|
||||
ctx->DrawBuffer->Visual.sRGBCapable) {
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_SRGB_DECODE_EXT,
|
||||
GL_DECODE_EXT);
|
||||
_mesa_SamplerParameteri(fb_tex_blit.sampler,
|
||||
GL_TEXTURE_SRGB_DECODE_EXT, GL_DECODE_EXT);
|
||||
_mesa_set_framebuffer_srgb(ctx, GL_TRUE);
|
||||
} else {
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_SRGB_DECODE_EXT,
|
||||
_mesa_SamplerParameteri(fb_tex_blit.sampler,
|
||||
GL_TEXTURE_SRGB_DECODE_EXT,
|
||||
GL_SKIP_DECODE_EXT);
|
||||
/* set_framebuffer_srgb was set by _mesa_meta_begin(). */
|
||||
}
|
||||
@@ -580,12 +536,16 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
|
||||
verts[0].tex[0] = s0;
|
||||
verts[0].tex[1] = t0;
|
||||
verts[0].tex[2] = readAtt->Zoffset;
|
||||
verts[1].tex[0] = s1;
|
||||
verts[1].tex[1] = t0;
|
||||
verts[1].tex[2] = readAtt->Zoffset;
|
||||
verts[2].tex[0] = s1;
|
||||
verts[2].tex[1] = t1;
|
||||
verts[2].tex[2] = readAtt->Zoffset;
|
||||
verts[3].tex[0] = s0;
|
||||
verts[3].tex[1] = t1;
|
||||
verts[3].tex[2] = readAtt->Zoffset;
|
||||
|
||||
_mesa_BufferSubData(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
|
||||
}
|
||||
@@ -598,28 +558,100 @@ blitframebuffer_texture(struct gl_context *ctx,
|
||||
_mesa_DepthFunc(GL_ALWAYS);
|
||||
|
||||
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
_mesa_meta_fb_tex_blit_end(ctx, target, &fb_tex_blit);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
_mesa_meta_fb_tex_blit_begin(const struct gl_context *ctx,
|
||||
struct fb_tex_blit_state *blit)
|
||||
{
|
||||
blit->samplerSave =
|
||||
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler ?
|
||||
ctx->Texture.Unit[ctx->Texture.CurrentUnit].Sampler->Name : 0;
|
||||
blit->tempTex = 0;
|
||||
}
|
||||
|
||||
void
|
||||
_mesa_meta_fb_tex_blit_end(const struct gl_context *ctx, GLenum target,
|
||||
struct fb_tex_blit_state *blit)
|
||||
{
|
||||
/* Restore texture object state, the texture binding will
|
||||
* be restored by _mesa_meta_end().
|
||||
*/
|
||||
if (target != GL_TEXTURE_RECTANGLE_ARB) {
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, baseLevelSave);
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, maxLevelSave);
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, blit->baseLevelSave);
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, blit->maxLevelSave);
|
||||
}
|
||||
|
||||
_mesa_BindSampler(ctx->Texture.CurrentUnit, samplerSave);
|
||||
_mesa_DeleteSamplers(1, &sampler);
|
||||
if (tempTex)
|
||||
_mesa_DeleteTextures(1, &tempTex);
|
||||
_mesa_BindSampler(ctx->Texture.CurrentUnit, blit->samplerSave);
|
||||
_mesa_DeleteSamplers(1, &blit->sampler);
|
||||
if (blit->tempTex)
|
||||
_mesa_DeleteTextures(1, &blit->tempTex);
|
||||
}
|
||||
|
||||
GLboolean
|
||||
_mesa_meta_bind_rb_as_tex_image(struct gl_context *ctx,
|
||||
struct gl_renderbuffer *rb,
|
||||
GLuint *tex,
|
||||
struct gl_texture_object **texObj,
|
||||
GLenum *target)
|
||||
{
|
||||
struct gl_texture_image *texImage;
|
||||
|
||||
if (rb->NumSamples > 1)
|
||||
*target = GL_TEXTURE_2D_MULTISAMPLE;
|
||||
else
|
||||
*target = GL_TEXTURE_2D;
|
||||
|
||||
_mesa_GenTextures(1, tex);
|
||||
_mesa_BindTexture(*target, *tex);
|
||||
*texObj = _mesa_lookup_texture(ctx, *tex);
|
||||
texImage = _mesa_get_tex_image(ctx, *texObj, *target, 0);
|
||||
|
||||
if (!ctx->Driver.BindRenderbufferTexImage(ctx, rb, texImage)) {
|
||||
_mesa_DeleteTextures(1, tex);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ctx->Driver.FinishRenderTexture && !rb->NeedsFinishRenderTexture) {
|
||||
rb->NeedsFinishRenderTexture = true;
|
||||
ctx->Driver.FinishRenderTexture(ctx, rb);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
GLuint
|
||||
_mesa_meta_setup_sampler(struct gl_context *ctx,
|
||||
const struct gl_texture_object *texObj,
|
||||
GLenum target, GLenum filter, GLuint srcLevel)
|
||||
{
|
||||
GLuint sampler;
|
||||
|
||||
_mesa_GenSamplers(1, &sampler);
|
||||
_mesa_BindSampler(ctx->Texture.CurrentUnit, sampler);
|
||||
|
||||
/* Prepare src texture state */
|
||||
_mesa_BindTexture(target, texObj->Name);
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_MIN_FILTER, filter);
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_MAG_FILTER, filter);
|
||||
if (target != GL_TEXTURE_RECTANGLE_ARB) {
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_BASE_LEVEL, srcLevel);
|
||||
_mesa_TexParameteri(target, GL_TEXTURE_MAX_LEVEL, srcLevel);
|
||||
}
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
|
||||
_mesa_SamplerParameteri(sampler, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
|
||||
|
||||
return sampler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Meta implementation of ctx->Driver.BlitFramebuffer() in terms
|
||||
* of texture mapping and polygon rendering.
|
||||
*/
|
||||
void
|
||||
GLbitfield
|
||||
_mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||||
GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1,
|
||||
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
|
||||
@@ -644,7 +676,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||||
/* Multisample texture blit support requires texture multisample. */
|
||||
if (ctx->ReadBuffer->Visual.samples > 0 &&
|
||||
!ctx->Extensions.ARB_texture_multisample) {
|
||||
goto fallback;
|
||||
return mask;
|
||||
}
|
||||
|
||||
/* Clip a copy of the blit coordinates. If these differ from the input
|
||||
@@ -653,13 +685,13 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||||
if (!_mesa_clip_blit(ctx, &clip.srcX0, &clip.srcY0, &clip.srcX1, &clip.srcY1,
|
||||
&clip.dstX0, &clip.dstY0, &clip.dstX1, &clip.dstY1)) {
|
||||
/* clipped/scissored everything away */
|
||||
return;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Only scissor affects blit, but we're doing to set a custom scissor if
|
||||
* necessary anyway, so save/clear state.
|
||||
*/
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL);
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_DRAW_BUFFERS);
|
||||
|
||||
/* If the clipping earlier changed the destination rect at all, then
|
||||
* enable the scissor to clip to it.
|
||||
@@ -680,10 +712,6 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||||
filter, dstFlipX, dstFlipY,
|
||||
use_glsl_version, false)) {
|
||||
mask &= ~GL_COLOR_BUFFER_BIT;
|
||||
if (mask == 0x0) {
|
||||
_mesa_meta_end(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -693,10 +721,6 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||||
filter, dstFlipX, dstFlipY,
|
||||
use_glsl_version, true)) {
|
||||
mask &= ~GL_DEPTH_BUFFER_BIT;
|
||||
if (mask == 0x0) {
|
||||
_mesa_meta_end(ctx);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -706,11 +730,7 @@ _mesa_meta_BlitFramebuffer(struct gl_context *ctx,
|
||||
|
||||
_mesa_meta_end(ctx);
|
||||
|
||||
fallback:
|
||||
if (mask) {
|
||||
_swrast_BlitFramebuffer(ctx, srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1, mask, filter);
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -728,3 +748,24 @@ _mesa_meta_glsl_blit_cleanup(struct blit_state *blit)
|
||||
_mesa_DeleteTextures(1, &blit->depthTex.TexObj);
|
||||
blit->depthTex.TexObj = 0;
|
||||
}
|
||||
|
||||
void
|
||||
_mesa_meta_and_swrast_BlitFramebuffer(struct gl_context *ctx,
|
||||
GLint srcX0, GLint srcY0,
|
||||
GLint srcX1, GLint srcY1,
|
||||
GLint dstX0, GLint dstY0,
|
||||
GLint dstX1, GLint dstY1,
|
||||
GLbitfield mask, GLenum filter)
|
||||
{
|
||||
mask = _mesa_meta_BlitFramebuffer(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
if (mask == 0x0)
|
||||
return;
|
||||
|
||||
_swrast_BlitFramebuffer(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
}
|
||||
|
@@ -182,7 +182,7 @@ _mesa_meta_GenerateMipmap(struct gl_context *ctx, GLenum target,
|
||||
faceTarget = target;
|
||||
}
|
||||
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL);
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL & ~MESA_META_DRAW_BUFFERS);
|
||||
|
||||
/* Choose between glsl version and fixed function version of
|
||||
* GenerateMipmap function.
|
||||
|
@@ -741,10 +741,10 @@ intel_blit_framebuffer(struct gl_context *ctx,
|
||||
return;
|
||||
|
||||
|
||||
_mesa_meta_BlitFramebuffer(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
_mesa_meta_and_swrast_BlitFramebuffer(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -76,6 +76,8 @@ i965_FILES = \
|
||||
brw_lower_texture_gradients.cpp \
|
||||
brw_lower_unnormalized_offset.cpp \
|
||||
brw_meta_updownsample.c \
|
||||
brw_meta_stencil_blit.c \
|
||||
brw_meta_util.c \
|
||||
brw_misc_state.c \
|
||||
brw_object_purgeable.c \
|
||||
brw_performance_monitor.c \
|
||||
|
@@ -31,87 +31,10 @@
|
||||
#include "brw_context.h"
|
||||
#include "brw_blorp_blit_eu.h"
|
||||
#include "brw_state.h"
|
||||
#include "brw_meta_util.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_BLORP
|
||||
|
||||
/**
|
||||
* Helper function for handling mirror image blits.
|
||||
*
|
||||
* If coord0 > coord1, swap them and invert the "mirror" boolean.
|
||||
*/
|
||||
static inline void
|
||||
fixup_mirroring(bool &mirror, GLfloat &coord0, GLfloat &coord1)
|
||||
{
|
||||
if (coord0 > coord1) {
|
||||
mirror = !mirror;
|
||||
GLfloat tmp = coord0;
|
||||
coord0 = coord1;
|
||||
coord1 = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Adjust {src,dst}_x{0,1} to account for clipping and scissoring of
|
||||
* destination coordinates.
|
||||
*
|
||||
* Return true if there is still blitting to do, false if all pixels got
|
||||
* rejected by the clip and/or scissor.
|
||||
*
|
||||
* For clarity, the nomenclature of this function assumes we are clipping and
|
||||
* scissoring the X coordinate; the exact same logic applies for Y
|
||||
* coordinates.
|
||||
*
|
||||
* Note: this function may also be used to account for clipping of source
|
||||
* coordinates, by swapping the roles of src and dst.
|
||||
*/
|
||||
static inline bool
|
||||
clip_or_scissor(bool mirror, GLfloat &src_x0, GLfloat &src_x1, GLfloat &dst_x0,
|
||||
GLfloat &dst_x1, GLfloat fb_xmin, GLfloat fb_xmax)
|
||||
{
|
||||
float scale = (float) (src_x1 - src_x0) / (dst_x1 - dst_x0);
|
||||
/* If we are going to scissor everything away, stop. */
|
||||
if (!(fb_xmin < fb_xmax &&
|
||||
dst_x0 < fb_xmax &&
|
||||
fb_xmin < dst_x1 &&
|
||||
dst_x0 < dst_x1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Clip the destination rectangle, and keep track of how many pixels we
|
||||
* clipped off of the left and right sides of it.
|
||||
*/
|
||||
GLint pixels_clipped_left = 0;
|
||||
GLint pixels_clipped_right = 0;
|
||||
if (dst_x0 < fb_xmin) {
|
||||
pixels_clipped_left = fb_xmin - dst_x0;
|
||||
dst_x0 = fb_xmin;
|
||||
}
|
||||
if (fb_xmax < dst_x1) {
|
||||
pixels_clipped_right = dst_x1 - fb_xmax;
|
||||
dst_x1 = fb_xmax;
|
||||
}
|
||||
|
||||
/* If we are mirrored, then before applying pixels_clipped_{left,right} to
|
||||
* the source coordinates, we need to flip them to account for the
|
||||
* mirroring.
|
||||
*/
|
||||
if (mirror) {
|
||||
GLint tmp = pixels_clipped_left;
|
||||
pixels_clipped_left = pixels_clipped_right;
|
||||
pixels_clipped_right = tmp;
|
||||
}
|
||||
|
||||
/* Adjust the source rectangle to remove the pixels corresponding to those
|
||||
* that were clipped/scissored out of the destination rectangle.
|
||||
*/
|
||||
src_x0 += pixels_clipped_left * scale;
|
||||
src_x1 -= pixels_clipped_right * scale;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static struct intel_mipmap_tree *
|
||||
find_miptree(GLbitfield buffer_bit, struct intel_renderbuffer *irb)
|
||||
{
|
||||
@@ -244,47 +167,12 @@ try_blorp_blit(struct brw_context *brw,
|
||||
const struct gl_framebuffer *read_fb = ctx->ReadBuffer;
|
||||
const struct gl_framebuffer *draw_fb = ctx->DrawBuffer;
|
||||
|
||||
/* Detect if the blit needs to be mirrored */
|
||||
bool mirror_x = false, mirror_y = false;
|
||||
fixup_mirroring(mirror_x, srcX0, srcX1);
|
||||
fixup_mirroring(mirror_x, dstX0, dstX1);
|
||||
fixup_mirroring(mirror_y, srcY0, srcY1);
|
||||
fixup_mirroring(mirror_y, dstY0, dstY1);
|
||||
|
||||
/* If the destination rectangle needs to be clipped or scissored, do so.
|
||||
*/
|
||||
if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
|
||||
draw_fb->_Xmin, draw_fb->_Xmax) &&
|
||||
clip_or_scissor(mirror_y, srcY0, srcY1, dstY0, dstY1,
|
||||
draw_fb->_Ymin, draw_fb->_Ymax))) {
|
||||
/* Everything got clipped/scissored away, so the blit was successful. */
|
||||
bool mirror_x, mirror_y;
|
||||
if (brw_meta_mirror_clip_and_scissor(ctx,
|
||||
&srcX0, &srcY0, &srcX1, &srcY1,
|
||||
&dstX0, &dstY0, &dstX1, &dstY1,
|
||||
&mirror_x, &mirror_y))
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If the source rectangle needs to be clipped or scissored, do so. */
|
||||
if (!(clip_or_scissor(mirror_x, dstX0, dstX1, srcX0, srcX1,
|
||||
0, read_fb->Width) &&
|
||||
clip_or_scissor(mirror_y, dstY0, dstY1, srcY0, srcY1,
|
||||
0, read_fb->Height))) {
|
||||
/* Everything got clipped/scissored away, so the blit was successful. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Account for the fact that in the system framebuffer, the origin is at
|
||||
* the lower left.
|
||||
*/
|
||||
if (_mesa_is_winsys_fbo(read_fb)) {
|
||||
GLint tmp = read_fb->Height - srcY0;
|
||||
srcY0 = read_fb->Height - srcY1;
|
||||
srcY1 = tmp;
|
||||
mirror_y = !mirror_y;
|
||||
}
|
||||
if (_mesa_is_winsys_fbo(draw_fb)) {
|
||||
GLint tmp = draw_fb->Height - dstY0;
|
||||
dstY0 = draw_fb->Height - dstY1;
|
||||
dstY1 = tmp;
|
||||
mirror_y = !mirror_y;
|
||||
}
|
||||
|
||||
/* Find buffers */
|
||||
struct intel_renderbuffer *src_irb;
|
||||
|
@@ -1475,9 +1475,23 @@ GLboolean brwCreateContext(gl_api api,
|
||||
/*======================================================================
|
||||
* brw_misc_state.c
|
||||
*/
|
||||
GLuint brw_get_rb_for_slice(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
unsigned level, unsigned layer, bool flat);
|
||||
|
||||
void brw_meta_updownsample(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *src,
|
||||
struct intel_mipmap_tree *dst);
|
||||
|
||||
void brw_meta_fbo_stencil_blit(struct brw_context *brw,
|
||||
GLfloat srcX0, GLfloat srcY0,
|
||||
GLfloat srcX1, GLfloat srcY1,
|
||||
GLfloat dstX0, GLfloat dstY0,
|
||||
GLfloat dstX1, GLfloat dstY1);
|
||||
|
||||
void brw_meta_stencil_updownsample(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *src,
|
||||
struct intel_mipmap_tree *dst);
|
||||
/*======================================================================
|
||||
* brw_misc_state.c
|
||||
*/
|
||||
|
524
src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
Normal file
524
src/mesa/drivers/dri/i965/brw_meta_stencil_blit.c
Normal file
@@ -0,0 +1,524 @@
|
||||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @file brw_meta_stencil_blit.c
|
||||
*
|
||||
* Implements upsampling, downsampling and scaling of stencil miptrees. The
|
||||
* logic can be originally found in brw_blorp_blit.c.
|
||||
* Implementation creates a temporary draw framebuffer object and attaches the
|
||||
* destination stencil buffer attachment as color attachment. Source attachment
|
||||
* is in turn treated as a stencil texture and the glsl program used for the
|
||||
* blitting samples it using stencil-indexing.
|
||||
*
|
||||
* Unfortunately as the data port does not support interleaved msaa-surfaces
|
||||
* (stencil is always IMS), the glsl program needs to handle the writing of
|
||||
* individual samples manually. Surface is configured as if it were single
|
||||
* sampled (with adjusted dimensions) and the glsl program extracts the
|
||||
* sample indices from the input coordinates for correct texturing.
|
||||
*
|
||||
* Target surface is also configured as Y-tiled instead of W-tiled in order
|
||||
* to support generations 6-7. Later hardware supports W-tiled as render target
|
||||
* and the logic here could be simplified for those.
|
||||
*/
|
||||
|
||||
#include "brw_context.h"
|
||||
#include "intel_batchbuffer.h"
|
||||
#include "intel_fbo.h"
|
||||
|
||||
#include "main/blit.h"
|
||||
#include "main/buffers.h"
|
||||
#include "main/fbobject.h"
|
||||
#include "main/uniforms.h"
|
||||
#include "main/texparam.h"
|
||||
#include "main/texobj.h"
|
||||
#include "main/viewport.h"
|
||||
#include "main/enable.h"
|
||||
#include "main/blend.h"
|
||||
#include "main/varray.h"
|
||||
#include "main/shaderapi.h"
|
||||
#include "glsl/ralloc.h"
|
||||
|
||||
#include "drivers/common/meta.h"
|
||||
#include "brw_meta_util.h"
|
||||
|
||||
#define FILE_DEBUG_FLAG DEBUG_FBO
|
||||
|
||||
struct blit_dims {
|
||||
int src_x0, src_y0, src_x1, src_y1;
|
||||
int dst_x0, dst_y0, dst_x1, dst_y1;
|
||||
bool mirror_x, mirror_y;
|
||||
};
|
||||
|
||||
static const char *vs_source =
|
||||
"#version 130\n"
|
||||
"in vec2 position;\n"
|
||||
"out vec2 tex_coords;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" tex_coords = (position + 1.0) / 2.0;\n"
|
||||
" gl_Position = vec4(position, 0.0, 1.0);\n"
|
||||
"}\n";
|
||||
|
||||
static const struct sampler_and_fetch {
|
||||
const char *sampler;
|
||||
const char *fetch;
|
||||
} samplers[] = {
|
||||
{ "uniform usampler2D texSampler;\n",
|
||||
" out_color = texelFetch(texSampler, txl_coords, 0)" },
|
||||
{ "#extension GL_ARB_texture_multisample : enable\n"
|
||||
"uniform usampler2DMS texSampler;\n",
|
||||
" out_color = texelFetch(texSampler, txl_coords, sample_index)" }
|
||||
};
|
||||
|
||||
/**
|
||||
* Translating Y-tiled to W-tiled:
|
||||
*
|
||||
* X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1
|
||||
* Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
|
||||
*/
|
||||
static const char *fs_tmpl =
|
||||
"#version 130\n"
|
||||
"%s"
|
||||
"uniform float src_x_scale;\n"
|
||||
"uniform float src_y_scale;\n"
|
||||
"uniform float src_x_off;\n" /* Top right coordinates of the source */
|
||||
"uniform float src_y_off;\n" /* rectangle in W-tiled space. */
|
||||
"uniform float dst_x_off;\n" /* Top right coordinates of the target */
|
||||
"uniform float dst_y_off;\n" /* rectangle in Y-tiled space. */
|
||||
"uniform float draw_rect_w;\n" /* This is the unnormalized size of the */
|
||||
"uniform float draw_rect_h;\n" /* drawing rectangle in Y-tiled space. */
|
||||
"uniform int dst_x0;\n" /* This is the bounding rectangle in the W-tiled */
|
||||
"uniform int dst_x1;\n" /* space that will be used to skip pixels lying */
|
||||
"uniform int dst_y0;\n" /* outside. In some cases the Y-tiled rectangle */
|
||||
"uniform int dst_y1;\n" /* is larger. */
|
||||
"uniform int dst_num_samples;\n"
|
||||
"in vec2 tex_coords;\n"
|
||||
"ivec2 txl_coords;\n"
|
||||
"int sample_index;\n"
|
||||
"out uvec4 out_color;\n"
|
||||
"\n"
|
||||
"void get_unorm_target_coords()\n"
|
||||
"{\n"
|
||||
" txl_coords.x = int(tex_coords.x * draw_rect_w + dst_x_off);\n"
|
||||
" txl_coords.y = int(tex_coords.y * draw_rect_h + dst_y_off);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void translate_dst_to_src()\n"
|
||||
"{\n"
|
||||
" txl_coords.x = int(float(txl_coords.x) * src_x_scale + src_x_off);\n"
|
||||
" txl_coords.y = int(float(txl_coords.y) * src_y_scale + src_y_off);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void translate_y_to_w_tiling()\n"
|
||||
"{\n"
|
||||
" int X = txl_coords.x;\n"
|
||||
" int Y = txl_coords.y;\n"
|
||||
" txl_coords.x = (X & int(0xfff4)) >> 1;\n"
|
||||
" txl_coords.x |= ((Y & int(0x1)) << 2);\n"
|
||||
" txl_coords.x |= (X & int(0x1));\n"
|
||||
" txl_coords.y = (Y & int(0xfffe)) << 1;\n"
|
||||
" txl_coords.y |= ((X & int(0x8)) >> 2);\n"
|
||||
" txl_coords.y |= ((X & int(0x2)) >> 1);\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void decode_msaa()\n"
|
||||
"{\n"
|
||||
" int X = txl_coords.x;\n"
|
||||
" int Y = txl_coords.y;\n"
|
||||
" switch (dst_num_samples) {\n"
|
||||
" case 0:\n"
|
||||
" sample_index = 0;\n"
|
||||
" break;\n"
|
||||
" case 2:\n"
|
||||
" txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
|
||||
" sample_index = (X & 0x2) >> 1;\n"
|
||||
" break;\n"
|
||||
" case 4:\n"
|
||||
" txl_coords.x = ((X & int(0xfffc)) >> 1) | (X & int(0x1));\n"
|
||||
" txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
|
||||
" sample_index = (Y & 0x2) | ((X & 0x2) >> 1);\n"
|
||||
" break;\n"
|
||||
" case 8:\n"
|
||||
" txl_coords.x = ((X & int(0xfff8)) >> 2) | (X & int(0x1));\n"
|
||||
" txl_coords.y = ((Y & int(0xfffc)) >> 1) | (Y & int(0x1));\n"
|
||||
" sample_index = (X & 0x4) | (Y & 0x2) | ((X & 0x2) >> 1);\n"
|
||||
" }\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void discard_outside_bounding_rect()\n"
|
||||
"{\n"
|
||||
" int X = txl_coords.x;\n"
|
||||
" int Y = txl_coords.y;\n"
|
||||
" if (X >= dst_x1 || X < dst_x0 || Y >= dst_y1 || Y < dst_y0)\n"
|
||||
" discard;\n"
|
||||
"}\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" get_unorm_target_coords();\n"
|
||||
" translate_y_to_w_tiling();\n"
|
||||
" decode_msaa();"
|
||||
" discard_outside_bounding_rect();\n"
|
||||
" translate_dst_to_src();\n"
|
||||
" %s;\n"
|
||||
"}\n";
|
||||
|
||||
/**
|
||||
* Setup uniforms telling the coordinates of the destination rectangle in the
|
||||
* native w-tiled space. These are needed to ignore pixels that lie outside.
|
||||
* The destination is drawn as Y-tiled and in some cases the Y-tiled drawing
|
||||
* rectangle is larger than the original (for example 1x4 w-tiled requires
|
||||
* 16x2 y-tiled).
|
||||
*/
|
||||
static void
|
||||
setup_bounding_rect(GLuint prog, const struct blit_dims *dims)
|
||||
{
|
||||
_mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_x0"), dims->dst_x0);
|
||||
_mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_x1"), dims->dst_x1);
|
||||
_mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_y0"), dims->dst_y0);
|
||||
_mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_y1"), dims->dst_y1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup uniforms telling the destination width, height and the offset. These
|
||||
* are needed to unnoormalize the input coordinates and to correctly translate
|
||||
* between destination and source that may have differing offsets.
|
||||
*/
|
||||
static void
|
||||
setup_drawing_rect(GLuint prog, const struct blit_dims *dims)
|
||||
{
|
||||
_mesa_Uniform1f(_mesa_GetUniformLocation(prog, "draw_rect_w"),
|
||||
dims->dst_x1 - dims->dst_x0);
|
||||
_mesa_Uniform1f(_mesa_GetUniformLocation(prog, "draw_rect_h"),
|
||||
dims->dst_y1 - dims->dst_y0);
|
||||
_mesa_Uniform1f(_mesa_GetUniformLocation(prog, "dst_x_off"), dims->dst_x0);
|
||||
_mesa_Uniform1f(_mesa_GetUniformLocation(prog, "dst_y_off"), dims->dst_y0);
|
||||
}
|
||||
|
||||
/**
|
||||
* When not mirroring a coordinate (say, X), we need:
|
||||
* src_x - src_x0 = (dst_x - dst_x0 + 0.5) * scale
|
||||
* Therefore:
|
||||
* src_x = src_x0 + (dst_x - dst_x0 + 0.5) * scale
|
||||
*
|
||||
* The program uses "round toward zero" to convert the transformed floating
|
||||
* point coordinates to integer coordinates, whereas the behaviour we actually
|
||||
* want is "round to nearest", so 0.5 provides the necessary correction.
|
||||
*
|
||||
* When mirroring X we need:
|
||||
* src_x - src_x0 = dst_x1 - dst_x - 0.5
|
||||
* Therefore:
|
||||
* src_x = src_x0 + (dst_x1 -dst_x - 0.5) * scale
|
||||
*/
|
||||
static void
|
||||
setup_coord_coeff(GLuint prog, GLuint multiplier, GLuint offset,
|
||||
int src_0, int src_1, int dst_0, int dst_1, bool mirror)
|
||||
{
|
||||
const float scale = ((float)(src_1 - src_0)) / (dst_1 - dst_0);
|
||||
|
||||
if (mirror) {
|
||||
_mesa_Uniform1f(multiplier, -scale);
|
||||
_mesa_Uniform1f(offset, src_0 + (dst_1 - 0.5) * scale);
|
||||
} else {
|
||||
_mesa_Uniform1f(multiplier, scale);
|
||||
_mesa_Uniform1f(offset, src_0 + (-dst_0 + 0.5) * scale);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup uniforms providing relation between source and destination surfaces.
|
||||
* Destination coordinates are in Y-tiling layout while texelFetch() expects
|
||||
* W-tiled coordinates. Once the destination coordinates are re-interpreted by
|
||||
* the program into the original W-tiled layout, the program needs to know the
|
||||
* offset and scaling factors between the destination and source.
|
||||
* Note that these are calculated in the original W-tiled space before the
|
||||
* destination rectangle is adjusted for possible msaa and Y-tiling.
|
||||
*/
|
||||
static void
|
||||
setup_coord_transform(GLuint prog, const struct blit_dims *dims)
|
||||
{
|
||||
setup_coord_coeff(prog,
|
||||
_mesa_GetUniformLocation(prog, "src_x_scale"),
|
||||
_mesa_GetUniformLocation(prog, "src_x_off"),
|
||||
dims->src_x0, dims->src_x1, dims->dst_x0, dims->dst_x1,
|
||||
dims->mirror_x);
|
||||
|
||||
setup_coord_coeff(prog,
|
||||
_mesa_GetUniformLocation(prog, "src_y_scale"),
|
||||
_mesa_GetUniformLocation(prog, "src_y_off"),
|
||||
dims->src_y0, dims->src_y1, dims->dst_y0, dims->dst_y1,
|
||||
dims->mirror_y);
|
||||
}
|
||||
|
||||
static GLuint
|
||||
setup_program(struct gl_context *ctx, bool msaa_tex)
|
||||
{
|
||||
struct blit_state *blit = &ctx->Meta->Blit;
|
||||
static GLuint prog_cache[] = { 0, 0 };
|
||||
const char *fs_source;
|
||||
const struct sampler_and_fetch *sampler = &samplers[msaa_tex];
|
||||
|
||||
_mesa_meta_setup_vertex_objects(&blit->VAO, &blit->VBO, true, 2, 2, 0);
|
||||
|
||||
if (prog_cache[msaa_tex]) {
|
||||
_mesa_UseProgram(prog_cache[msaa_tex]);
|
||||
return prog_cache[msaa_tex];
|
||||
}
|
||||
|
||||
fs_source = ralloc_asprintf(NULL, fs_tmpl, sampler->sampler,
|
||||
sampler->fetch);
|
||||
_mesa_meta_compile_and_link_program(ctx, vs_source, fs_source,
|
||||
"i965 stencil blit",
|
||||
&prog_cache[msaa_tex]);
|
||||
ralloc_free(fs_source);
|
||||
|
||||
return prog_cache[msaa_tex];
|
||||
}
|
||||
|
||||
/**
|
||||
* Samples in stencil buffer are interleaved, and unfortunately the data port
|
||||
* does not support it as render target. Therefore the surface is set up as
|
||||
* single sampled and the program handles the interleaving.
|
||||
* In case of single sampled stencil, the render buffer is adjusted with
|
||||
* twice the base level height in order for the program to be able to write
|
||||
* any mip-level. (Used to set the drawing rectangle for the hw).
|
||||
*/
|
||||
static void
|
||||
adjust_msaa(struct blit_dims *dims, int num_samples)
|
||||
{
|
||||
if (num_samples == 2) {
|
||||
dims->dst_x0 *= 2;
|
||||
dims->dst_x1 *= 2;
|
||||
} else if (num_samples) {
|
||||
const int x_num_samples = num_samples / 2;
|
||||
dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0 * x_num_samples, num_samples);
|
||||
dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0 * 2, 4);
|
||||
dims->dst_x1 = ALIGN(dims->dst_x1 * x_num_samples, num_samples);
|
||||
dims->dst_y1 = ALIGN(dims->dst_y1 * 2, 4);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stencil is mapped as Y-tiled render target and the dimensions need to be
|
||||
* adjusted in order for the Y-tiled rectangle to cover the entire linear
|
||||
* memory space of the original W-tiled rectangle.
|
||||
*/
|
||||
static void
|
||||
adjust_tiling(struct blit_dims *dims, int num_samples)
|
||||
{
|
||||
const unsigned x_align = 8, y_align = num_samples > 2 ? 8 : 4;
|
||||
|
||||
dims->dst_x0 = ROUND_DOWN_TO(dims->dst_x0, x_align) * 2;
|
||||
dims->dst_y0 = ROUND_DOWN_TO(dims->dst_y0, y_align) / 2;
|
||||
dims->dst_x1 = ALIGN(dims->dst_x1, x_align) * 2;
|
||||
dims->dst_y1 = ALIGN(dims->dst_y1, y_align) / 2;
|
||||
}
|
||||
|
||||
/**
|
||||
* When stencil is mapped as Y-tiled render target the mip-level offsets
|
||||
* calculated for the Y-tiling do not always match the offsets in W-tiling.
|
||||
* Therefore the sampling engine cannot be used for individual mip-level
|
||||
* access but the program needs to do it internally. This can be achieved
|
||||
* by shifting the coordinates of the blit rectangle here.
|
||||
*/
|
||||
static void
|
||||
adjust_mip_level(const struct intel_mipmap_tree *mt,
|
||||
unsigned level, unsigned layer, struct blit_dims *dims)
|
||||
{
|
||||
unsigned x_offset;
|
||||
unsigned y_offset;
|
||||
|
||||
intel_miptree_get_image_offset(mt, level, layer, &x_offset, &y_offset);
|
||||
|
||||
dims->dst_x0 += x_offset;
|
||||
dims->dst_y0 += y_offset;
|
||||
dims->dst_x1 += x_offset;
|
||||
dims->dst_y1 += y_offset;
|
||||
}
|
||||
|
||||
static void
|
||||
prepare_vertex_data(void)
|
||||
{
|
||||
static const struct vertex verts[] = {
|
||||
{ .x = -1.0f, .y = -1.0f },
|
||||
{ .x = 1.0f, .y = -1.0f },
|
||||
{ .x = 1.0f, .y = 1.0f },
|
||||
{ .x = -1.0f, .y = 1.0f } };
|
||||
|
||||
_mesa_BufferSubData(GL_ARRAY_BUFFER_ARB, 0, sizeof(verts), verts);
|
||||
}
|
||||
|
||||
static void
|
||||
set_read_rb_tex_image(struct gl_context *ctx, struct fb_tex_blit_state *blit,
|
||||
GLenum *target)
|
||||
{
|
||||
const struct gl_renderbuffer_attachment *att =
|
||||
&ctx->ReadBuffer->Attachment[BUFFER_STENCIL];
|
||||
struct gl_renderbuffer *rb = att->Renderbuffer;
|
||||
struct gl_texture_object *tex_obj;
|
||||
unsigned level = 0;
|
||||
|
||||
/* If the renderbuffer is already backed by an tex image, use it. */
|
||||
if (att->Texture) {
|
||||
tex_obj = att->Texture;
|
||||
*target = tex_obj->Target;
|
||||
level = att->TextureLevel;
|
||||
} else {
|
||||
_mesa_meta_bind_rb_as_tex_image(ctx, rb, &blit->tempTex, &tex_obj,
|
||||
target);
|
||||
}
|
||||
|
||||
blit->baseLevelSave = tex_obj->BaseLevel;
|
||||
blit->maxLevelSave = tex_obj->MaxLevel;
|
||||
blit->sampler = _mesa_meta_setup_sampler(ctx, tex_obj, *target,
|
||||
GL_NEAREST, level);
|
||||
}
|
||||
|
||||
static void
|
||||
brw_meta_stencil_blit(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *dst_mt,
|
||||
unsigned dst_level, unsigned dst_layer,
|
||||
const struct blit_dims *orig_dims)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct blit_dims dims = *orig_dims;
|
||||
struct fb_tex_blit_state blit;
|
||||
GLuint prog, fbo, rbo;
|
||||
GLenum target;
|
||||
|
||||
_mesa_meta_fb_tex_blit_begin(ctx, &blit);
|
||||
|
||||
_mesa_GenFramebuffers(1, &fbo);
|
||||
/* Force the surface to be configured for level zero. */
|
||||
rbo = brw_get_rb_for_slice(brw, dst_mt, 0, dst_layer, true);
|
||||
adjust_msaa(&dims, dst_mt->num_samples);
|
||||
adjust_tiling(&dims, dst_mt->num_samples);
|
||||
|
||||
_mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo);
|
||||
_mesa_FramebufferRenderbuffer(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
|
||||
GL_RENDERBUFFER, rbo);
|
||||
_mesa_DrawBuffer(GL_COLOR_ATTACHMENT0);
|
||||
ctx->DrawBuffer->_Status = GL_FRAMEBUFFER_COMPLETE;
|
||||
|
||||
set_read_rb_tex_image(ctx, &blit, &target);
|
||||
|
||||
_mesa_TexParameteri(target, GL_DEPTH_STENCIL_TEXTURE_MODE,
|
||||
GL_STENCIL_INDEX);
|
||||
|
||||
prog = setup_program(ctx, target != GL_TEXTURE_2D);
|
||||
setup_bounding_rect(prog, orig_dims);
|
||||
setup_drawing_rect(prog, &dims);
|
||||
setup_coord_transform(prog, orig_dims);
|
||||
|
||||
_mesa_Uniform1i(_mesa_GetUniformLocation(prog, "dst_num_samples"),
|
||||
dst_mt->num_samples);
|
||||
|
||||
prepare_vertex_data();
|
||||
_mesa_set_viewport(ctx, 0, dims.dst_x0, dims.dst_y0,
|
||||
dims.dst_x1 - dims.dst_x0, dims.dst_y1 - dims.dst_y0);
|
||||
_mesa_ColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
|
||||
_mesa_set_enable(ctx, GL_DEPTH_TEST, false);
|
||||
|
||||
_mesa_DrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
|
||||
_mesa_meta_fb_tex_blit_end(ctx, target, &blit);
|
||||
_mesa_meta_end(ctx);
|
||||
|
||||
_mesa_DeleteRenderbuffers(1, &rbo);
|
||||
_mesa_DeleteFramebuffers(1, &fbo);
|
||||
}
|
||||
|
||||
void
|
||||
brw_meta_fbo_stencil_blit(struct brw_context *brw,
|
||||
GLfloat src_x0, GLfloat src_y0,
|
||||
GLfloat src_x1, GLfloat src_y1,
|
||||
GLfloat dst_x0, GLfloat dst_y0,
|
||||
GLfloat dst_x1, GLfloat dst_y1)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct gl_renderbuffer *draw_fb =
|
||||
ctx->DrawBuffer->Attachment[BUFFER_STENCIL].Renderbuffer;
|
||||
const struct intel_renderbuffer *dst_irb = intel_renderbuffer(draw_fb);
|
||||
struct intel_mipmap_tree *dst_mt = dst_irb->mt;
|
||||
|
||||
if (!dst_mt)
|
||||
return;
|
||||
|
||||
if (dst_mt->stencil_mt)
|
||||
dst_mt = dst_mt->stencil_mt;
|
||||
|
||||
bool mirror_x, mirror_y;
|
||||
if (brw_meta_mirror_clip_and_scissor(ctx,
|
||||
&src_x0, &src_y0, &src_x1, &src_y1,
|
||||
&dst_x0, &dst_y0, &dst_x1, &dst_y1,
|
||||
&mirror_x, &mirror_y))
|
||||
return;
|
||||
|
||||
struct blit_dims dims = { .src_x0 = src_x0, .src_y0 = src_y0,
|
||||
.src_x1 = src_x1, .src_y1 = src_y1,
|
||||
.dst_x0 = dst_x0, .dst_y0 = dst_y0,
|
||||
.dst_x1 = dst_x1, .dst_y1 = dst_y1,
|
||||
.mirror_x = mirror_x, .mirror_y = mirror_y };
|
||||
adjust_mip_level(dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
|
||||
|
||||
intel_batchbuffer_emit_mi_flush(brw);
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL);
|
||||
brw_meta_stencil_blit(brw,
|
||||
dst_mt, dst_irb->mt_level, dst_irb->mt_layer, &dims);
|
||||
intel_batchbuffer_emit_mi_flush(brw);
|
||||
}
|
||||
|
||||
void
|
||||
brw_meta_stencil_updownsample(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *src,
|
||||
struct intel_mipmap_tree *dst)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct blit_dims dims = {
|
||||
.src_x0 = 0, .src_y0 = 0,
|
||||
.src_x1 = src->logical_width0, .src_y1 = src->logical_height0,
|
||||
.dst_x0 = 0, .dst_y0 = 0,
|
||||
.dst_x1 = dst->logical_width0, .dst_y1 = dst->logical_height0,
|
||||
.mirror_x = 0, .mirror_y = 0 };
|
||||
GLuint fbo, rbo;
|
||||
|
||||
if (dst->stencil_mt)
|
||||
dst = dst->stencil_mt;
|
||||
|
||||
intel_batchbuffer_emit_mi_flush(brw);
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL);
|
||||
|
||||
_mesa_GenFramebuffers(1, &fbo);
|
||||
rbo = brw_get_rb_for_slice(brw, src, 0, 0, false);
|
||||
|
||||
_mesa_BindFramebuffer(GL_READ_FRAMEBUFFER, fbo);
|
||||
_mesa_FramebufferRenderbuffer(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT,
|
||||
GL_RENDERBUFFER, rbo);
|
||||
|
||||
brw_meta_stencil_blit(brw, dst, 0, 0, &dims);
|
||||
intel_batchbuffer_emit_mi_flush(brw);
|
||||
|
||||
_mesa_DeleteRenderbuffers(1, &rbo);
|
||||
_mesa_DeleteFramebuffers(1, &fbo);
|
||||
}
|
@@ -27,6 +27,7 @@
|
||||
|
||||
#include "main/blit.h"
|
||||
#include "main/buffers.h"
|
||||
#include "main/enums.h"
|
||||
#include "main/fbobject.h"
|
||||
|
||||
#include "drivers/common/meta.h"
|
||||
@@ -44,8 +45,10 @@
|
||||
*
|
||||
* Clobbers the current renderbuffer binding (ctx->CurrentRenderbuffer).
|
||||
*/
|
||||
static GLuint
|
||||
brw_get_rb_for_first_slice(struct brw_context *brw, struct intel_mipmap_tree *mt)
|
||||
GLuint
|
||||
brw_get_rb_for_slice(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt,
|
||||
unsigned level, unsigned layer, bool flat)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
GLuint rbo;
|
||||
@@ -62,11 +65,27 @@ brw_get_rb_for_first_slice(struct brw_context *brw, struct intel_mipmap_tree *mt
|
||||
irb = intel_renderbuffer(rb);
|
||||
|
||||
rb->Format = mt->format;
|
||||
rb->_BaseFormat = _mesa_base_fbo_format(ctx, mt->format);
|
||||
rb->_BaseFormat = _mesa_get_format_base_format(mt->format);
|
||||
|
||||
rb->NumSamples = mt->num_samples;
|
||||
rb->Width = mt->logical_width0;
|
||||
rb->Height = mt->logical_height0;
|
||||
/* Program takes care of msaa and mip-level access manually for stencil.
|
||||
* The surface is also treated as Y-tiled instead of as W-tiled calling for
|
||||
* twice the width and half the height in dimensions.
|
||||
*/
|
||||
if (flat) {
|
||||
const unsigned halign_stencil = 8;
|
||||
|
||||
rb->NumSamples = 0;
|
||||
rb->Width = ALIGN(mt->total_width, halign_stencil) * 2;
|
||||
rb->Height = (mt->total_height / mt->physical_depth0) / 2;
|
||||
irb->mt_level = 0;
|
||||
} else {
|
||||
rb->NumSamples = mt->num_samples;
|
||||
rb->Width = mt->logical_width0;
|
||||
rb->Height = mt->logical_height0;
|
||||
irb->mt_level = level;
|
||||
}
|
||||
|
||||
irb->mt_layer = layer;
|
||||
|
||||
intel_miptree_reference(&irb->mt, mt);
|
||||
|
||||
@@ -101,8 +120,8 @@ brw_meta_updownsample(struct brw_context *brw,
|
||||
|
||||
_mesa_meta_begin(ctx, MESA_META_ALL);
|
||||
_mesa_GenFramebuffers(2, fbos);
|
||||
src_rbo = brw_get_rb_for_first_slice(brw, src_mt);
|
||||
dst_rbo = brw_get_rb_for_first_slice(brw, dst_mt);
|
||||
src_rbo = brw_get_rb_for_slice(brw, src_mt, 0, 0, false);
|
||||
dst_rbo = brw_get_rb_for_slice(brw, dst_mt, 0, 0, false);
|
||||
src_fbo = fbos[0];
|
||||
dst_fbo = fbos[1];
|
||||
|
||||
|
160
src/mesa/drivers/dri/i965/brw_meta_util.c
Normal file
160
src/mesa/drivers/dri/i965/brw_meta_util.c
Normal file
@@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "brw_meta_util.h"
|
||||
#include "main/fbobject.h"
|
||||
|
||||
/**
|
||||
* Helper function for handling mirror image blits.
|
||||
*
|
||||
* If coord0 > coord1, swap them and invert the "mirror" boolean.
|
||||
*/
|
||||
static inline void
|
||||
fixup_mirroring(bool *mirror, float *coord0, float *coord1)
|
||||
{
|
||||
if (*coord0 > *coord1) {
|
||||
*mirror = !*mirror;
|
||||
float tmp = *coord0;
|
||||
*coord0 = *coord1;
|
||||
*coord1 = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adjust {src,dst}_x{0,1} to account for clipping and scissoring of
|
||||
* destination coordinates.
|
||||
*
|
||||
* Return true if there is still blitting to do, false if all pixels got
|
||||
* rejected by the clip and/or scissor.
|
||||
*
|
||||
* For clarity, the nomenclature of this function assumes we are clipping and
|
||||
* scissoring the X coordinate; the exact same logic applies for Y
|
||||
* coordinates.
|
||||
*
|
||||
* Note: this function may also be used to account for clipping of source
|
||||
* coordinates, by swapping the roles of src and dst.
|
||||
*/
|
||||
static inline bool
|
||||
clip_or_scissor(bool mirror,
|
||||
GLfloat *src_x0, GLfloat *src_x1,
|
||||
GLfloat *dst_x0, GLfloat *dst_x1,
|
||||
GLfloat fb_xmin, GLfloat fb_xmax)
|
||||
{
|
||||
float scale = (float) (*src_x1 - *src_x0) / (*dst_x1 - *dst_x0);
|
||||
/* If we are going to scissor everything away, stop. */
|
||||
if (!(fb_xmin < fb_xmax &&
|
||||
*dst_x0 < fb_xmax &&
|
||||
fb_xmin < *dst_x1 &&
|
||||
*dst_x0 < *dst_x1)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Clip the destination rectangle, and keep track of how many pixels we
|
||||
* clipped off of the left and right sides of it.
|
||||
*/
|
||||
int pixels_clipped_left = 0;
|
||||
int pixels_clipped_right = 0;
|
||||
if (*dst_x0 < fb_xmin) {
|
||||
pixels_clipped_left = fb_xmin - *dst_x0;
|
||||
*dst_x0 = fb_xmin;
|
||||
}
|
||||
if (fb_xmax < *dst_x1) {
|
||||
pixels_clipped_right = *dst_x1 - fb_xmax;
|
||||
*dst_x1 = fb_xmax;
|
||||
}
|
||||
|
||||
/* If we are mirrored, then before applying pixels_clipped_{left,right} to
|
||||
* the source coordinates, we need to flip them to account for the
|
||||
* mirroring.
|
||||
*/
|
||||
if (mirror) {
|
||||
int tmp = pixels_clipped_left;
|
||||
pixels_clipped_left = pixels_clipped_right;
|
||||
pixels_clipped_right = tmp;
|
||||
}
|
||||
|
||||
/* Adjust the source rectangle to remove the pixels corresponding to those
|
||||
* that were clipped/scissored out of the destination rectangle.
|
||||
*/
|
||||
*src_x0 += pixels_clipped_left * scale;
|
||||
*src_x1 -= pixels_clipped_right * scale;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
|
||||
GLfloat *srcX0, GLfloat *srcY0,
|
||||
GLfloat *srcX1, GLfloat *srcY1,
|
||||
GLfloat *dstX0, GLfloat *dstY0,
|
||||
GLfloat *dstX1, GLfloat *dstY1,
|
||||
bool *mirror_x, bool *mirror_y)
|
||||
{
|
||||
const struct gl_framebuffer *read_fb = ctx->ReadBuffer;
|
||||
const struct gl_framebuffer *draw_fb = ctx->DrawBuffer;
|
||||
|
||||
*mirror_x = false;
|
||||
*mirror_y = false;
|
||||
|
||||
/* Detect if the blit needs to be mirrored */
|
||||
fixup_mirroring(mirror_x, srcX0, srcX1);
|
||||
fixup_mirroring(mirror_x, dstX0, dstX1);
|
||||
fixup_mirroring(mirror_y, srcY0, srcY1);
|
||||
fixup_mirroring(mirror_y, dstY0, dstY1);
|
||||
|
||||
/* If the destination rectangle needs to be clipped or scissored, do so. */
|
||||
if (!(clip_or_scissor(*mirror_x, srcX0, srcX1, dstX0, dstX1,
|
||||
draw_fb->_Xmin, draw_fb->_Xmax) &&
|
||||
clip_or_scissor(*mirror_y, srcY0, srcY1, dstY0, dstY1,
|
||||
draw_fb->_Ymin, draw_fb->_Ymax))) {
|
||||
/* Everything got clipped/scissored away, so the blit was successful. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* If the source rectangle needs to be clipped or scissored, do so. */
|
||||
if (!(clip_or_scissor(*mirror_x, dstX0, dstX1, srcX0, srcX1,
|
||||
0, read_fb->Width) &&
|
||||
clip_or_scissor(*mirror_y, dstY0, dstY1, srcY0, srcY1,
|
||||
0, read_fb->Height))) {
|
||||
/* Everything got clipped/scissored away, so the blit was successful. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Account for the fact that in the system framebuffer, the origin is at
|
||||
* the lower left.
|
||||
*/
|
||||
if (_mesa_is_winsys_fbo(read_fb)) {
|
||||
GLint tmp = read_fb->Height - *srcY0;
|
||||
*srcY0 = read_fb->Height - *srcY1;
|
||||
*srcY1 = tmp;
|
||||
*mirror_y = !*mirror_y;
|
||||
}
|
||||
if (_mesa_is_winsys_fbo(draw_fb)) {
|
||||
GLint tmp = draw_fb->Height - *dstY0;
|
||||
*dstY0 = draw_fb->Height - *dstY1;
|
||||
*dstY1 = tmp;
|
||||
*mirror_y = !*mirror_y;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
46
src/mesa/drivers/dri/i965/brw_meta_util.h
Normal file
46
src/mesa/drivers/dri/i965/brw_meta_util.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright © 2014 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef BRW_META_UTIL_H
|
||||
#define BRW_META_UTIL_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "main/mtypes.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
bool
|
||||
brw_meta_mirror_clip_and_scissor(const struct gl_context *ctx,
|
||||
GLfloat *srcX0, GLfloat *srcY0,
|
||||
GLfloat *srcX1, GLfloat *srcY1,
|
||||
GLfloat *dstX0, GLfloat *dstY0,
|
||||
GLfloat *dstX1, GLfloat *dstY1,
|
||||
bool *mirror_x, bool *mirror_y);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BRW_META_UTIL_H */
|
@@ -42,6 +42,13 @@ brw_get_graphics_reset_status(struct gl_context *ctx)
|
||||
*/
|
||||
assert(brw->hw_ctx != NULL);
|
||||
|
||||
/* A reset status other than NO_ERROR was returned last time. I915 returns
|
||||
* nonzero active/pending only if reset has been encountered and completed.
|
||||
* Return NO_ERROR from now on.
|
||||
*/
|
||||
if (brw->reset_count != 0)
|
||||
return GL_NO_ERROR;
|
||||
|
||||
err = drm_intel_get_reset_stats(brw->hw_ctx, &reset_count, &active,
|
||||
&pending);
|
||||
if (err)
|
||||
@@ -50,18 +57,19 @@ brw_get_graphics_reset_status(struct gl_context *ctx)
|
||||
/* A reset was observed while a batch from this context was executing.
|
||||
* Assume that this context was at fault.
|
||||
*/
|
||||
if (active != 0)
|
||||
if (active != 0) {
|
||||
brw->reset_count = reset_count;
|
||||
return GL_GUILTY_CONTEXT_RESET_ARB;
|
||||
}
|
||||
|
||||
/* A reset was observed while a batch from this context was in progress,
|
||||
* but the batch was not executing. In this case, assume that the context
|
||||
* was not at fault.
|
||||
*/
|
||||
if (pending != 0)
|
||||
if (pending != 0) {
|
||||
brw->reset_count = reset_count;
|
||||
return GL_INNOCENT_CONTEXT_RESET_ARB;
|
||||
|
||||
/* FINISHME: Should we report anything if reset_count > brw->reset_count?
|
||||
*/
|
||||
}
|
||||
|
||||
return GL_NO_ERROR;
|
||||
}
|
||||
|
@@ -206,6 +206,12 @@ void gen4_init_vtable_surface_functions(struct brw_context *brw);
|
||||
uint32_t brw_get_surface_tiling_bits(uint32_t tiling);
|
||||
uint32_t brw_get_surface_num_multisamples(unsigned num_samples);
|
||||
|
||||
void brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
|
||||
bool is_render_target,
|
||||
unsigned *width, unsigned *height,
|
||||
unsigned *pitch, uint32_t *tiling,
|
||||
unsigned *format);
|
||||
|
||||
uint32_t brw_format_for_mesa_format(mesa_format mesa_format);
|
||||
|
||||
GLuint translate_tex_target(GLenum target);
|
||||
|
@@ -45,6 +45,7 @@ bool
|
||||
brw_color_buffer_write_enabled(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||
const struct gl_fragment_program *fp = brw->fragment_program;
|
||||
int i;
|
||||
|
||||
|
@@ -101,6 +101,30 @@ brw_get_surface_num_multisamples(unsigned num_samples)
|
||||
return BRW_SURFACE_MULTISAMPLECOUNT_1;
|
||||
}
|
||||
|
||||
void
|
||||
brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
|
||||
bool is_render_target,
|
||||
unsigned *width, unsigned *height,
|
||||
unsigned *pitch, uint32_t *tiling, unsigned *format)
|
||||
{
|
||||
static const unsigned halign_stencil = 8;
|
||||
|
||||
/* In Y-tiling row is twice as wide as in W-tiling, and subsequently
|
||||
* there are half as many rows.
|
||||
* In addition, mip-levels are accessed manually by the program and
|
||||
* therefore the surface is setup to cover all the mip-levels for one slice.
|
||||
* (Hardware is still used to access individual slices).
|
||||
*/
|
||||
*tiling = I915_TILING_Y;
|
||||
*pitch = mt->pitch * 2;
|
||||
*width = ALIGN(mt->total_width, halign_stencil) * 2;
|
||||
*height = (mt->total_height / mt->physical_depth0) / 2;
|
||||
|
||||
if (is_render_target) {
|
||||
*format = BRW_SURFACEFORMAT_R8_UINT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
|
||||
|
@@ -65,7 +65,7 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
|
||||
rb = (struct gl_renderbuffer*) irb;
|
||||
|
||||
if (rb) {
|
||||
depth = MAX2(rb->Depth, 1);
|
||||
depth = MAX2(irb->layer_count, 1);
|
||||
if (rb->TexImage)
|
||||
gl_target = rb->TexImage->TexObject->Target;
|
||||
}
|
||||
@@ -81,19 +81,16 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
|
||||
surftype = BRW_SURFACE_2D;
|
||||
depth *= 6;
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
assert(mt);
|
||||
depth = MAX2(mt->logical_depth0, 1);
|
||||
/* fallthrough */
|
||||
default:
|
||||
surftype = translate_tex_target(gl_target);
|
||||
break;
|
||||
}
|
||||
|
||||
if (fb->MaxNumLayers > 0 || !irb) {
|
||||
min_array_element = 0;
|
||||
} else if (irb->mt->num_samples > 1) {
|
||||
/* Convert physical layer to logical layer. */
|
||||
min_array_element = irb->mt_layer / irb->mt->num_samples;
|
||||
} else {
|
||||
min_array_element = irb->mt_layer;
|
||||
}
|
||||
min_array_element = irb ? irb->mt_layer : 0;
|
||||
|
||||
lod = irb ? irb->mt_level - irb->mt->first_level : 0;
|
||||
|
||||
|
@@ -454,9 +454,11 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
|
||||
mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
uint32_t surftype;
|
||||
bool is_array = false;
|
||||
int depth = MAX2(rb->Depth, 1);
|
||||
int min_array_element;
|
||||
int depth = MAX2(irb->layer_count, 1);
|
||||
const uint8_t mocs = GEN7_MOCS_L3;
|
||||
|
||||
int min_array_element = irb->mt_layer / MAX2(mt->num_samples, 1);
|
||||
|
||||
GLenum gl_target = rb->TexImage ?
|
||||
rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
|
||||
|
||||
@@ -486,20 +488,15 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
|
||||
is_array = true;
|
||||
depth *= 6;
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
depth = MAX2(irb->mt->logical_depth0, 1);
|
||||
/* fallthrough */
|
||||
default:
|
||||
surftype = translate_tex_target(gl_target);
|
||||
is_array = _mesa_tex_target_is_array(gl_target);
|
||||
break;
|
||||
}
|
||||
|
||||
if (layered) {
|
||||
min_array_element = 0;
|
||||
} else if (irb->mt->num_samples > 1) {
|
||||
min_array_element = irb->mt_layer / irb->mt->num_samples;
|
||||
} else {
|
||||
min_array_element = irb->mt_layer;
|
||||
}
|
||||
|
||||
surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT |
|
||||
format << BRW_SURFACE_FORMAT_SHIFT |
|
||||
(irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0
|
||||
|
@@ -215,7 +215,7 @@ gen8_upload_ps_blend(struct brw_context *brw)
|
||||
/* _NEW_BUFFERS */
|
||||
struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[0];
|
||||
|
||||
/* _NEW_BUFFERS | _NEW_COLOR */
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM | _NEW_BUFFERS | _NEW_COLOR */
|
||||
if (brw_color_buffer_write_enabled(brw))
|
||||
dw1 |= GEN8_PS_BLEND_HAS_WRITEABLE_RT;
|
||||
|
||||
@@ -290,7 +290,7 @@ gen8_upload_ps_blend(struct brw_context *brw)
|
||||
const struct brw_tracked_state gen8_ps_blend = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_BUFFERS | _NEW_COLOR | _NEW_MULTISAMPLE,
|
||||
.brw = BRW_NEW_CONTEXT,
|
||||
.brw = BRW_NEW_CONTEXT | BRW_NEW_FRAGMENT_PROGRAM,
|
||||
.cache = 0,
|
||||
},
|
||||
.emit = gen8_upload_ps_blend
|
||||
|
@@ -75,7 +75,7 @@ emit_depth_packets(struct brw_context *brw,
|
||||
OUT_BATCH(((width - 1) << 4) | ((height - 1) << 18) | lod);
|
||||
OUT_BATCH(((depth - 1) << 21) | (min_array_element << 10) | BDW_MOCS_WB);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(depth_mt ? depth_mt->qpitch >> 2 : 0);
|
||||
OUT_BATCH(((depth - 1) << 21) | (depth_mt ? depth_mt->qpitch >> 2 : 0));
|
||||
ADVANCE_BATCH();
|
||||
|
||||
if (!hiz) {
|
||||
@@ -168,7 +168,7 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
|
||||
rb = (struct gl_renderbuffer *) irb;
|
||||
|
||||
if (rb) {
|
||||
depth = MAX2(rb->Depth, 1);
|
||||
depth = MAX2(irb->layer_count, 1);
|
||||
if (rb->TexImage)
|
||||
gl_target = rb->TexImage->TexObject->Target;
|
||||
}
|
||||
@@ -184,19 +184,16 @@ gen8_emit_depth_stencil_hiz(struct brw_context *brw,
|
||||
surftype = BRW_SURFACE_2D;
|
||||
depth *= 6;
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
assert(mt);
|
||||
depth = MAX2(mt->logical_depth0, 1);
|
||||
/* fallthrough */
|
||||
default:
|
||||
surftype = translate_tex_target(gl_target);
|
||||
break;
|
||||
}
|
||||
|
||||
if (fb->MaxNumLayers > 0 || !irb) {
|
||||
min_array_element = 0;
|
||||
} else if (irb->mt->num_samples > 1) {
|
||||
/* Convert physical to logical layer. */
|
||||
min_array_element = irb->mt_layer / irb->mt->num_samples;
|
||||
} else {
|
||||
min_array_element = irb->mt_layer;
|
||||
}
|
||||
min_array_element = irb ? irb->mt_layer : 0;
|
||||
|
||||
lod = irb ? irb->mt_level - irb->mt->first_level : 0;
|
||||
|
||||
|
@@ -73,16 +73,17 @@ gen8_fs_generator::generate_fb_write(fs_inst *ir)
|
||||
|
||||
if (ir->target > 0 && c->key.replicate_alpha) {
|
||||
/* Set "Source0 Alpha Present to RenderTarget" bit in the header. */
|
||||
OR(vec1(retype(brw_message_reg(ir->base_mrf), BRW_REGISTER_TYPE_UD)),
|
||||
vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
|
||||
brw_imm_ud(1 << 11));
|
||||
gen8_instruction *inst =
|
||||
OR(get_element_ud(brw_message_reg(ir->base_mrf), 0),
|
||||
vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)),
|
||||
brw_imm_ud(1 << 11));
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
}
|
||||
|
||||
if (ir->target > 0) {
|
||||
/* Set the render target index for choosing BLEND_STATE. */
|
||||
MOV(retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, ir->base_mrf, 2),
|
||||
BRW_REGISTER_TYPE_UD),
|
||||
brw_imm_ud(ir->target));
|
||||
MOV_RAW(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, ir->base_mrf, 2),
|
||||
brw_imm_ud(ir->target));
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -422,6 +422,7 @@ gen8_generator::IF(unsigned predicate)
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_IF);
|
||||
gen8_set_dst(brw, inst, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
gen8_set_pred_control(inst, predicate);
|
||||
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
|
||||
@@ -435,6 +436,7 @@ gen8_generator::ELSE()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_ELSE);
|
||||
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_mask_control(inst, BRW_MASK_ENABLE);
|
||||
push_if_stack(inst);
|
||||
return inst;
|
||||
@@ -456,6 +458,7 @@ gen8_generator::ENDIF()
|
||||
|
||||
gen8_instruction *endif_inst = next_inst(BRW_OPCODE_ENDIF);
|
||||
gen8_set_mask_control(endif_inst, BRW_MASK_ENABLE);
|
||||
gen8_set_src0(brw, endif_inst, brw_imm_d(0));
|
||||
patch_IF_ELSE(if_inst, else_inst, endif_inst);
|
||||
|
||||
return endif_inst;
|
||||
@@ -577,8 +580,7 @@ gen8_generator::BREAK()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_BREAK);
|
||||
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src1(brw, inst, brw_imm_d(0));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
return inst;
|
||||
}
|
||||
@@ -588,8 +590,7 @@ gen8_generator::CONTINUE()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_CONTINUE);
|
||||
gen8_set_dst(brw, inst, brw_ip_reg());
|
||||
gen8_set_src0(brw, inst, brw_ip_reg());
|
||||
gen8_set_src1(brw, inst, brw_imm_d(0));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
return inst;
|
||||
}
|
||||
@@ -601,8 +602,7 @@ gen8_generator::WHILE()
|
||||
gen8_instruction *while_inst = next_inst(BRW_OPCODE_WHILE);
|
||||
|
||||
gen8_set_dst(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, while_inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src1(brw, while_inst, brw_imm_ud(0));
|
||||
gen8_set_src0(brw, while_inst, brw_imm_d(0));
|
||||
gen8_set_jip(while_inst, 16 * (do_inst - while_inst));
|
||||
gen8_set_exec_size(while_inst, default_state.exec_size);
|
||||
|
||||
@@ -614,7 +614,7 @@ gen8_generator::HALT()
|
||||
{
|
||||
gen8_instruction *inst = next_inst(BRW_OPCODE_HALT);
|
||||
gen8_set_dst(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, inst, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
gen8_set_src0(brw, inst, brw_imm_d(0));
|
||||
gen8_set_exec_size(inst, default_state.exec_size);
|
||||
gen8_set_mask_control(inst, BRW_MASK_DISABLE);
|
||||
return inst;
|
||||
|
@@ -134,17 +134,20 @@ gen8_update_texture_surface(struct gl_context *ctx,
|
||||
struct intel_mipmap_tree *mt = intelObj->mt;
|
||||
struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
|
||||
struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
|
||||
mesa_format format = intelObj->_Format;
|
||||
|
||||
if (tObj->Target == GL_TEXTURE_BUFFER) {
|
||||
brw_update_buffer_texture_surface(ctx, unit, surf_offset);
|
||||
return;
|
||||
}
|
||||
|
||||
if (tObj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL)
|
||||
if (tObj->StencilSampling && firstImage->_BaseFormat == GL_DEPTH_STENCIL) {
|
||||
mt = mt->stencil_mt;
|
||||
format = MESA_FORMAT_S_UINT8;
|
||||
}
|
||||
|
||||
unsigned tiling_mode, pitch;
|
||||
if (mt->format == MESA_FORMAT_S_UINT8) {
|
||||
if (format == MESA_FORMAT_S_UINT8) {
|
||||
tiling_mode = GEN8_SURFACE_TILING_W;
|
||||
pitch = 2 * mt->pitch;
|
||||
} else {
|
||||
@@ -152,9 +155,14 @@ gen8_update_texture_surface(struct gl_context *ctx,
|
||||
pitch = mt->pitch;
|
||||
}
|
||||
|
||||
uint32_t tex_format = translate_tex_format(brw,
|
||||
mt->format,
|
||||
sampler->sRGBDecode);
|
||||
/* If this is a view with restricted NumLayers, then our effective depth
|
||||
* is not just the miptree depth.
|
||||
*/
|
||||
uint32_t effective_depth =
|
||||
(tObj->Immutable && tObj->Target != GL_TEXTURE_3D) ? tObj->NumLayers
|
||||
: mt->logical_depth0;
|
||||
|
||||
uint32_t tex_format = translate_tex_format(brw, format, sampler->sRGBDecode);
|
||||
|
||||
uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
|
||||
13 * 4, 64, surf_offset);
|
||||
@@ -178,11 +186,15 @@ gen8_update_texture_surface(struct gl_context *ctx,
|
||||
surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
|
||||
SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
|
||||
|
||||
surf[3] = SET_FIELD(mt->logical_depth0 - 1, BRW_SURFACE_DEPTH) | (pitch - 1);
|
||||
surf[3] = SET_FIELD(effective_depth - 1, BRW_SURFACE_DEPTH) | (pitch - 1);
|
||||
|
||||
surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
|
||||
surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) |
|
||||
SET_FIELD(tObj->MinLayer, GEN7_SURFACE_MIN_ARRAY_ELEMENT) |
|
||||
SET_FIELD(effective_depth - 1,
|
||||
GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT);
|
||||
|
||||
surf[5] = SET_FIELD(tObj->BaseLevel - mt->first_level, GEN7_SURFACE_MIN_LOD) |
|
||||
surf[5] = SET_FIELD(tObj->MinLevel + tObj->BaseLevel - mt->first_level,
|
||||
GEN7_SURFACE_MIN_LOD) |
|
||||
(intelObj->_MaxLevel - tObj->BaseLevel); /* mip count */
|
||||
|
||||
surf[6] = 0;
|
||||
@@ -272,12 +284,16 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
|
||||
struct intel_mipmap_tree *mt = irb->mt;
|
||||
unsigned width = mt->logical_width0;
|
||||
unsigned height = mt->logical_height0;
|
||||
unsigned pitch = mt->pitch;
|
||||
uint32_t tiling = mt->tiling;
|
||||
uint32_t format = 0;
|
||||
uint32_t surf_type;
|
||||
bool is_array = false;
|
||||
int depth = MAX2(rb->Depth, 1);
|
||||
int min_array_element;
|
||||
|
||||
int depth = MAX2(irb->layer_count, 1);
|
||||
const int min_array_element = (mt->format == MESA_FORMAT_S_UINT8) ?
|
||||
irb->mt_layer : (irb->mt_layer / MAX2(mt->num_samples, 1));
|
||||
GLenum gl_target =
|
||||
rb->TexImage ? rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
|
||||
|
||||
@@ -286,9 +302,6 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
|
||||
|
||||
intel_miptree_used_for_rendering(mt);
|
||||
|
||||
/* Render targets can't use IMS layout. */
|
||||
assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
|
||||
|
||||
switch (gl_target) {
|
||||
case GL_TEXTURE_CUBE_MAP_ARRAY:
|
||||
case GL_TEXTURE_CUBE_MAP:
|
||||
@@ -296,27 +309,31 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
|
||||
is_array = true;
|
||||
depth *= 6;
|
||||
break;
|
||||
case GL_TEXTURE_3D:
|
||||
depth = MAX2(irb->mt->logical_depth0, 1);
|
||||
/* fallthrough */
|
||||
default:
|
||||
surf_type = translate_tex_target(gl_target);
|
||||
is_array = _mesa_tex_target_is_array(gl_target);
|
||||
break;
|
||||
}
|
||||
|
||||
if (layered) {
|
||||
min_array_element = 0;
|
||||
} else if (mt->num_samples > 1) {
|
||||
min_array_element = irb->mt_layer / mt->num_samples;
|
||||
} else {
|
||||
min_array_element = irb->mt_layer;
|
||||
}
|
||||
|
||||
/* _NEW_BUFFERS */
|
||||
mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
|
||||
assert(brw_render_target_supported(brw, rb));
|
||||
format = brw->render_target_format[rb_format];
|
||||
if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
|
||||
_mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
|
||||
__FUNCTION__, _mesa_get_format_name(rb_format));
|
||||
/* Render targets can't use IMS layout. Stencil in turn gets configured as
|
||||
* single sampled and indexed manually by the program.
|
||||
*/
|
||||
if (mt->format == MESA_FORMAT_S_UINT8) {
|
||||
brw_configure_w_tiled(mt, true, &width, &height, &pitch,
|
||||
&tiling, &format);
|
||||
} else {
|
||||
assert(mt->msaa_layout != INTEL_MSAA_LAYOUT_IMS);
|
||||
assert(brw_render_target_supported(brw, rb));
|
||||
mesa_format rb_format = _mesa_get_render_format(ctx,
|
||||
intel_rb_format(irb));
|
||||
format = brw->render_target_format[rb_format];
|
||||
if (unlikely(!brw->format_supported_as_render_target[rb_format]))
|
||||
_mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
|
||||
__FUNCTION__, _mesa_get_format_name(rb_format));
|
||||
}
|
||||
|
||||
uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 13 * 4, 64,
|
||||
@@ -327,20 +344,22 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
|
||||
(format << BRW_SURFACE_FORMAT_SHIFT) |
|
||||
vertical_alignment(mt) |
|
||||
horizontal_alignment(mt) |
|
||||
surface_tiling_mode(mt->tiling);
|
||||
surface_tiling_mode(tiling);
|
||||
|
||||
surf[1] = SET_FIELD(BDW_MOCS_WT, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
|
||||
|
||||
surf[2] = SET_FIELD(mt->logical_width0 - 1, GEN7_SURFACE_WIDTH) |
|
||||
SET_FIELD(mt->logical_height0 - 1, GEN7_SURFACE_HEIGHT);
|
||||
surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
|
||||
SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
|
||||
|
||||
surf[3] = (depth - 1) << BRW_SURFACE_DEPTH_SHIFT |
|
||||
(mt->pitch - 1); /* Surface Pitch */
|
||||
(pitch - 1); /* Surface Pitch */
|
||||
|
||||
surf[4] = gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout) |
|
||||
min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
|
||||
surf[4] = min_array_element << GEN7_SURFACE_MIN_ARRAY_ELEMENT_SHIFT |
|
||||
(depth - 1) << GEN7_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT;
|
||||
|
||||
if (mt->format != MESA_FORMAT_S_UINT8)
|
||||
surf[4] |= gen7_surface_msaa_bits(mt->num_samples, mt->msaa_layout);
|
||||
|
||||
surf[5] = irb->mt_level - irb->mt->first_level;
|
||||
|
||||
surf[6] = 0; /* Nothing of relevance. */
|
||||
|
@@ -65,6 +65,7 @@ intelDrawBuffer(struct gl_context * ctx, GLenum mode)
|
||||
* (including the fake front) before we start rendering again.
|
||||
*/
|
||||
dri2InvalidateDrawable(brw->driContext->driDrawablePriv);
|
||||
intel_prepare_render(brw);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,6 +81,7 @@ intelReadBuffer(struct gl_context * ctx, GLenum mode)
|
||||
* (including the fake front) before we start reading again.
|
||||
*/
|
||||
dri2InvalidateDrawable(brw->driContext->driReadablePriv);
|
||||
intel_prepare_render(brw);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -254,7 +254,8 @@ intelInitExtensions(struct gl_context *ctx)
|
||||
|
||||
ctx->Extensions.EXT_framebuffer_multisample = true;
|
||||
ctx->Extensions.EXT_transform_feedback = true;
|
||||
ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
|
||||
if (brw->gen < 8)
|
||||
ctx->Extensions.EXT_framebuffer_multisample_blit_scaled = true;
|
||||
ctx->Extensions.ARB_blend_func_extended = !driQueryOptionb(&brw->optionCache, "disable_blend_func_extended");
|
||||
ctx->Extensions.ARB_draw_buffers_blend = true;
|
||||
ctx->Extensions.ARB_ES3_compatibility = true;
|
||||
@@ -284,6 +285,7 @@ intelInitExtensions(struct gl_context *ctx)
|
||||
|
||||
if (brw->gen >= 7) {
|
||||
ctx->Extensions.ARB_conservative_depth = true;
|
||||
ctx->Extensions.ARB_texture_view = true;
|
||||
ctx->Extensions.AMD_vertex_shader_layer = true;
|
||||
if (can_do_pipelined_register_writes(brw)) {
|
||||
ctx->Extensions.ARB_transform_feedback2 = true;
|
||||
@@ -302,10 +304,6 @@ intelInitExtensions(struct gl_context *ctx)
|
||||
ctx->Extensions.ARB_compute_shader = true;
|
||||
}
|
||||
|
||||
if (brw->gen == 7) {
|
||||
ctx->Extensions.ARB_texture_view = true;
|
||||
}
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
ctx->Extensions.ARB_stencil_texturing = true;
|
||||
}
|
||||
|
@@ -865,6 +865,8 @@ intel_blit_framebuffer(struct gl_context *ctx,
|
||||
GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1,
|
||||
GLbitfield mask, GLenum filter)
|
||||
{
|
||||
struct brw_context *brw = brw_context(ctx);
|
||||
|
||||
/* Page 679 of OpenGL 4.4 spec says:
|
||||
* "Added BlitFramebuffer to commands affected by conditional rendering in
|
||||
* section 10.10 (Bug 9562)."
|
||||
@@ -872,13 +874,22 @@ intel_blit_framebuffer(struct gl_context *ctx,
|
||||
if (!_mesa_check_conditional_render(ctx))
|
||||
return;
|
||||
|
||||
mask = brw_blorp_framebuffer(brw_context(ctx),
|
||||
mask = brw_blorp_framebuffer(brw,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
if (mask == 0x0)
|
||||
return;
|
||||
|
||||
if (brw->gen >= 8 && (mask & GL_STENCIL_BUFFER_BIT)) {
|
||||
brw_meta_fbo_stencil_blit(brw_context(ctx),
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1);
|
||||
mask &= ~GL_STENCIL_BUFFER_BIT;
|
||||
if (mask == 0x0)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Try using the BLT engine. */
|
||||
mask = intel_blit_framebuffer_with_blitter(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
@@ -887,11 +898,17 @@ intel_blit_framebuffer(struct gl_context *ctx,
|
||||
if (mask == 0x0)
|
||||
return;
|
||||
|
||||
mask = _mesa_meta_BlitFramebuffer(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
if (mask == 0x0)
|
||||
return;
|
||||
|
||||
_mesa_meta_BlitFramebuffer(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
_swrast_BlitFramebuffer(ctx,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -1655,7 +1655,7 @@ intel_miptree_updownsample(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *src,
|
||||
struct intel_mipmap_tree *dst)
|
||||
{
|
||||
if (brw->gen < 8 || src->format == MESA_FORMAT_S_UINT8) {
|
||||
if (brw->gen < 8) {
|
||||
brw_blorp_blit_miptrees(brw,
|
||||
src, 0 /* level */, 0 /* layer */,
|
||||
dst, 0 /* level */, 0 /* layer */,
|
||||
@@ -1664,11 +1664,18 @@ intel_miptree_updownsample(struct brw_context *brw,
|
||||
0, 0,
|
||||
dst->logical_width0, dst->logical_height0,
|
||||
GL_NEAREST, false, false /*mirror x, y*/);
|
||||
} else if (src->format == MESA_FORMAT_S_UINT8) {
|
||||
brw_meta_stencil_updownsample(brw, src, dst);
|
||||
} else {
|
||||
brw_meta_updownsample(brw, src, dst);
|
||||
}
|
||||
|
||||
if (src->stencil_mt) {
|
||||
if (brw->gen >= 8) {
|
||||
brw_meta_stencil_updownsample(brw, src->stencil_mt, dst);
|
||||
return;
|
||||
}
|
||||
|
||||
brw_blorp_blit_miptrees(brw,
|
||||
src->stencil_mt, 0 /* level */, 0 /* layer */,
|
||||
dst->stencil_mt, 0 /* level */, 0 /* layer */,
|
||||
|
@@ -234,6 +234,7 @@ intel_set_texture_image_bo(struct gl_context *ctx,
|
||||
0, width, height, pitch);
|
||||
if (intel_image->mt == NULL)
|
||||
return;
|
||||
intel_image->mt->target = target;
|
||||
intel_image->mt->total_width = width;
|
||||
intel_image->mt->total_height = height;
|
||||
intel_image->mt->level[0].slice[0].x_offset = tile_x;
|
||||
|
@@ -155,5 +155,5 @@ nouveau_driver_functions_init(struct dd_function_table *functions)
|
||||
functions->DrawPixels = _mesa_meta_DrawPixels;
|
||||
functions->CopyPixels = _mesa_meta_CopyPixels;
|
||||
functions->Bitmap = _mesa_meta_Bitmap;
|
||||
functions->BlitFramebuffer = _mesa_meta_BlitFramebuffer;
|
||||
functions->BlitFramebuffer = _mesa_meta_and_swrast_BlitFramebuffer;
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user