Compare commits
126 Commits
mesa-10.3.
...
mesa-10.3.
Author | SHA1 | Date | |
---|---|---|---|
|
1a9cc5f50d | ||
|
3be619f4b4 | ||
|
d5700dc276 | ||
|
d5ada3364f | ||
|
36b7043611 | ||
|
894ac63c34 | ||
|
d26258166c | ||
|
e8c7affa66 | ||
|
62b2c8aca0 | ||
|
e71a41852b | ||
|
136ab97b46 | ||
|
4956788a5f | ||
|
bcf414c1a8 | ||
|
d45d00cf38 | ||
|
4ed4dec642 | ||
|
a3d5e59563 | ||
|
69ac2043cf | ||
|
eaff221c9c | ||
|
a95a93b557 | ||
|
b0d6ba5970 | ||
|
b379e36e64 | ||
|
d6aab6b0c9 | ||
|
5b76a32132 | ||
|
708ee6f188 | ||
|
971ae04fe6 | ||
|
64373f072c | ||
|
9ea0efd1e2 | ||
|
bc96be5662 | ||
|
323380b7ed | ||
|
832cb958ff | ||
|
8a61e39531 | ||
|
4b30efcf99 | ||
|
bce1e7e1c9 | ||
|
bce2d42ddb | ||
|
7e81f4a7e7 | ||
|
63b8a08c45 | ||
|
4a97401abf | ||
|
dd79de214d | ||
|
1acdeab8a4 | ||
|
64e4ac780e | ||
|
5bc1397bda | ||
|
3b1a59259b | ||
|
74a92b1f34 | ||
|
af52b00b19 | ||
|
556d74b810 | ||
|
502c295025 | ||
|
c4f58245d0 | ||
|
eb496ff68d | ||
|
e8820c85b4 | ||
|
da71ef1893 | ||
|
0a003d1dbc | ||
|
3b755280af | ||
|
476c8c5028 | ||
|
6d5a3daca9 | ||
|
4d20bc6e8d | ||
|
f1fd768b98 | ||
|
d37c083778 | ||
|
b46151f2ca | ||
|
419acd3068 | ||
|
64ce1bf8f6 | ||
|
e67e5c6582 | ||
|
d654082d14 | ||
|
1a7fb8f04a | ||
|
fb10a43b84 | ||
|
e1c2a8f2cb | ||
|
0b339336b5 | ||
|
00c3ef169f | ||
|
f61b2185db | ||
|
637ddce9cc | ||
|
737c900506 | ||
|
336b75faca | ||
|
7f6c0f4de4 | ||
|
1a755fcc3a | ||
|
2a90f0fb85 | ||
|
3a64feedb8 | ||
|
544a368626 | ||
|
e10a243abf | ||
|
a3f6e58d6d | ||
|
14f6eb92f8 | ||
|
c3ee102f8e | ||
|
1c160747d0 | ||
|
c912acad17 | ||
|
81bd498908 | ||
|
6244af1343 | ||
|
5af1301751 | ||
|
3f545b96e5 | ||
|
69c1aa728d | ||
|
1d1bc7f7c2 | ||
|
34b62bd12e | ||
|
695a4b2b4e | ||
|
0e9bb8efe4 | ||
|
1a36639b06 | ||
|
06d5717692 | ||
|
852bb5dd62 | ||
|
b2c855b7f9 | ||
|
91f9cbc996 | ||
|
852da37330 | ||
|
ea955ffd4d | ||
|
9995edb700 | ||
|
8dfb9773c4 | ||
|
e90f0daaaa | ||
|
7fded6b548 | ||
|
5e5b48b10e | ||
|
d1794194f6 | ||
|
9599470642 | ||
|
3b6a4758fa | ||
|
e0aaa9591b | ||
|
724f71ef39 | ||
|
6fa07d1d48 | ||
|
8f6f6a28fa | ||
|
10d8287074 | ||
|
c759d1b6bf | ||
|
b37c1d4642 | ||
|
fb20a5aa98 | ||
|
4f33ded115 | ||
|
13a4fd2430 | ||
|
5e6ee119c0 | ||
|
85d7eb730a | ||
|
64c2bdc334 | ||
|
125cd86cd4 | ||
|
e3e68a36db | ||
|
745a0bfd62 | ||
|
ada5fd6e85 | ||
|
ecd2d078ac | ||
|
08f7e3591d | ||
|
fa98c74692 |
@@ -2057,7 +2057,12 @@ if test "x$MESA_LLVM" != x0; then
|
||||
dnl already added all of these objects to LLVM_LIBS.
|
||||
fi
|
||||
else
|
||||
AC_MSG_WARN([Building mesa with staticly linked LLVM may cause compilation issues])
|
||||
AC_MSG_WARN([Building mesa with statically linked LLVM may cause compilation issues])
|
||||
dnl We need to link to llvm system libs when using static libs
|
||||
dnl However, only llvm 3.5+ provides --system-libs
|
||||
if test $LLVM_VERSION_MAJOR -eq 3 -a $LLVM_VERSION_MINOR -ge 5; then
|
||||
LLVM_LIBS="$LLVM_LIBS `$LLVM_CONFIG --system-libs`"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
|
@@ -30,7 +30,9 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBA
|
||||
155afcbad17be8bb80282c761b957d5cc716c14a1fa16c4f5ee04e76df729c6d MesaLib-10.3.1.tar.gz
|
||||
b081d077d717e5d56f2d59677490856052c41573e50378ff86d6c72456714add MesaLib-10.3.1.tar.bz2
|
||||
07a14febfed06412d519e091a62d24513fee6745f1a6f8a8f1956bfe04b77d15 MesaLib-10.3.1.zip
|
||||
</pre>
|
||||
|
||||
<h2>New features</h2>
|
||||
|
115
docs/relnotes/10.3.2.html
Normal file
115
docs/relnotes/10.3.2.html
Normal file
@@ -0,0 +1,115 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 10.3.2 Release Notes / October 24, 2014</h1>
|
||||
|
||||
<p>
|
||||
Mesa 10.3.2 is a bug fix release which fixes bugs found since the 10.3 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 10.3.2 implements the OpenGL 3.3 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 3.3. OpenGL
|
||||
3.3 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
e65f8e691f06f111c1aeb3a376b13c9cc88cb162bee2709e0e7e6b0e6628ca75 MesaLib-10.3.2.tar.gz
|
||||
e9849bcb9aa9acd98a753d6d46d2e7d7238d3367036e11357a60efd16de8bea3 MesaLib-10.3.2.tar.bz2
|
||||
427dc0d670d38e713ebff2675665ec2fe4ff7d04ce227bd54de946999fc1d234 MesaLib-10.3.2.zip
|
||||
</pre>
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=54372">Bug 54372</a> - GLX_INTEL_swap_event crashes driver when swapping window buffers</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81680">Bug 81680</a> - [r600g] Firefox crashes with hardware acceleration turned on</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84140">Bug 84140</a> - mplayer crashes playing some files using vdpau output</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84662">Bug 84662</a> - Long pauses with Unreal demo Elemental on R9270X since : Always flush the HDP cache before submitting a CS to the GPU</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85267">Bug 85267</a> - vlc crashes with vdpau (Radeon 3850HD) [r600]</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Brian Paul (3):</p>
|
||||
<ul>
|
||||
<li>mesa: fix spurious wglGetProcAddress / GL_INVALID_OPERATION error</li>
|
||||
<li>st/wgl: add WINAPI qualifiers on wgl function typedefs</li>
|
||||
<li>glsl: fix several use-after-free bugs</li>
|
||||
</ul>
|
||||
|
||||
<p>Daniel Manjarres (1):</p>
|
||||
<ul>
|
||||
<li>glx: Fix glxUseXFont for glxWindow and glxPixmaps</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (1):</p>
|
||||
<ul>
|
||||
<li>mesa: fix GetTexImage for 1D array depth textures</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (2):</p>
|
||||
<ul>
|
||||
<li>docs: Add sha256 sums for the 10.3.1 release</li>
|
||||
<li>Update VERSION to 10.3.2</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (4):</p>
|
||||
<ul>
|
||||
<li>gm107/ir: add dnz emission for fmul</li>
|
||||
<li>gk110/ir: add dnz flag emission for fmul/fmad</li>
|
||||
<li>nouveau: 3d textures are unsupported, limit 3d levels to 1</li>
|
||||
<li>st/gbm: fix order of arguments passed to is_format_supported</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (3):</p>
|
||||
<ul>
|
||||
<li>i965: Add a BRW_MOCS_PTE #define.</li>
|
||||
<li>i965: Use BDW_MOCS_PTE for renderbuffers.</li>
|
||||
<li>i965: Fix register write checks.</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (2):</p>
|
||||
<ul>
|
||||
<li>st/mesa: use pipe_sampler_view_release for releasing sampler views</li>
|
||||
<li>glsl_to_tgsi: fix the value of gl_FrontFacing with native integers</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (4):</p>
|
||||
<ul>
|
||||
<li>radeonsi: Clear sampler view flags when binding a buffer</li>
|
||||
<li>r600g,radeonsi: Always use GTT again for PIPE_USAGE_STREAM buffers</li>
|
||||
<li>winsys/radeon: Use separate caching buffer manager for each set of flags</li>
|
||||
<li>r600g: Drop references to destroyed blend state</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
207
docs/relnotes/10.3.3.html
Normal file
207
docs/relnotes/10.3.3.html
Normal file
@@ -0,0 +1,207 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 10.3.3 Release Notes / November 8, 2014</h1>
|
||||
|
||||
<p>
|
||||
Mesa 10.3.3 is a bug fix release which fixes bugs found since the 10.3.2 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 10.3.3 implements the OpenGL 3.3 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 3.3. OpenGL
|
||||
3.3 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=70410">Bug 70410</a> - egl-static/Makefile: linking fails with llvm >= 3.4</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82921">Bug 82921</a> - layout(location=0) emits error >= MAX_UNIFORM_LOCATIONS due to integer underflow</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83574">Bug 83574</a> - [llvmpipe] [softpipe] piglit arb_explicit_uniform_location-use-of-unused-loc regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85454">Bug 85454</a> - Unigine Sanctuary with Wine crashes on Mesa Git</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85918">Bug 85918</a> - Mesa: MSVC 2010/2012 Compile error</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Anuj Phogat (2):</p>
|
||||
<ul>
|
||||
<li>glsl: Fix crash due to negative array index</li>
|
||||
<li>glsl: Use signed array index in update_max_array_access()</li>
|
||||
</ul>
|
||||
|
||||
<p>Brian Paul (1):</p>
|
||||
<ul>
|
||||
<li>mesa: fix UNCLAMPED_FLOAT_TO_UBYTE() macro for MSVC</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (2):</p>
|
||||
<ul>
|
||||
<li>docs: Add sha256 sums for the 10.3.2 release</li>
|
||||
<li>Update version to 10.3.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (27):</p>
|
||||
<ul>
|
||||
<li>freedreno/ir3: fix FSLT/etc handling to return 0/-1 instead of 0/1.0</li>
|
||||
<li>freedreno/ir3: INEG operates on src0, not src1</li>
|
||||
<li>freedreno/ir3: add UARL support</li>
|
||||
<li>freedreno/ir3: negate result of USLT/etc</li>
|
||||
<li>freedreno/ir3: use unsigned comparison for UIF</li>
|
||||
<li>freedreno/ir3: add TXL support</li>
|
||||
<li>freedreno/ir3: fix UCMP handling</li>
|
||||
<li>freedreno/ir3: implement UMUL correctly</li>
|
||||
<li>freedreno: add default .dir-locals.el for emacs settings</li>
|
||||
<li>freedreno/ir3: make texture instruction construction more dynamic</li>
|
||||
<li>freedreno/ir3: fix TXB/TXL to actually pull the bias/lod argument</li>
|
||||
<li>freedreno/ir3: add TXQ support</li>
|
||||
<li>freedreno/ir3: add TXB2 support</li>
|
||||
<li>freedreno: dual-source render targets are not supported</li>
|
||||
<li>freedreno: instanced drawing/compute not yet supported</li>
|
||||
<li>freedreno/ir3: avoid fan-in sources referring to same instruction</li>
|
||||
<li>freedreno/ir3: add IDIV/UDIV support</li>
|
||||
<li>freedreno/ir3: add UMOD support, based on UDIV</li>
|
||||
<li>freedreno/ir3: add MOD support</li>
|
||||
<li>freedreno/ir3: add ISSG support</li>
|
||||
<li>freedreno/ir3: add UMAD support</li>
|
||||
<li>freedreno/ir3: make TXQ return integers, not floats</li>
|
||||
<li>freedreno/ir3: shadow comes before array</li>
|
||||
<li>freedreno/ir3: add texture offset support</li>
|
||||
<li>freedreno/ir3: add TXD support and expose ARB_shader_texture_lod</li>
|
||||
<li>freedreno/ir3: add TXF support</li>
|
||||
<li>freedreno: positions come out as integers, not half-integers</li>
|
||||
</ul>
|
||||
|
||||
<p>Jan Vesely (1):</p>
|
||||
<ul>
|
||||
<li>configure: include llvm systemlibs when using static llvm</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (5):</p>
|
||||
<ul>
|
||||
<li>r600g: fix polygon mode for points and lines and point/line fill modes</li>
|
||||
<li>radeonsi: fix polygon mode for points and lines and point/line fill modes</li>
|
||||
<li>radeonsi: fix incorrect index buffer max size for lowered 8-bit indices</li>
|
||||
<li>Revert "st/mesa: set MaxUnrollIterations = 255"</li>
|
||||
<li>r300g: remove enabled/disabled hyperz and AA compression messages</li>
|
||||
</ul>
|
||||
|
||||
<p>Mauro Rossi (1):</p>
|
||||
<ul>
|
||||
<li>gallium/nouveau: fully build the driver under android</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>radeon/llvm: Dynamically allocate branch/loop stack arrays</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (62):</p>
|
||||
<ul>
|
||||
<li>freedreno/ir3: detect scheduler fail</li>
|
||||
<li>freedreno/ir3: add TXB</li>
|
||||
<li>freedreno/ir3: add DDX/DDY</li>
|
||||
<li>freedreno/ir3: bit of debug</li>
|
||||
<li>freedreno/ir3: fix error in bail logic</li>
|
||||
<li>freedreno/ir3: fix constlen with relative addressing</li>
|
||||
<li>freedreno/ir3: add no-copy-propagate fallback step</li>
|
||||
<li>freedreno: don't overflow cmdstream buffer so much</li>
|
||||
<li>freedreno/ir3: fix potential segfault in RA</li>
|
||||
<li>freedreno: update generated headers</li>
|
||||
<li>freedreno/a3xx: enable hw primitive-restart</li>
|
||||
<li>freedreno/a3xx: handle rendering to layer != 0</li>
|
||||
<li>freedreno: update generated headers</li>
|
||||
<li>freedreno/a3xx: format fixes</li>
|
||||
<li>util/u_format: add _is_alpha()</li>
|
||||
<li>freedreno/a3xx: alpha render-target shenanigans</li>
|
||||
<li>freedreno/ir3: catch incorrect usage of tmp-dst</li>
|
||||
<li>freedreno/ir3: add missing put_dst</li>
|
||||
<li>freedreno: "fix" problems with excessive flushes</li>
|
||||
<li>freedreno: update generated headers</li>
|
||||
<li>freedreno/a3xx: 3d/array textures</li>
|
||||
<li>freedreno: add DRM_CONF_SHARE_FD</li>
|
||||
<li>freedreno/a3xx: more texture array fixes</li>
|
||||
<li>freedreno/a3xx: initial texture border-color</li>
|
||||
<li>freedreno: fix compiler warning</li>
|
||||
<li>freedreno: don't advertise mirror-clamp support</li>
|
||||
<li>freedreno: update generated headers</li>
|
||||
<li>freedreno: we have more than 0 viewports!</li>
|
||||
<li>freedreno: turn missing caps into compile warnings</li>
|
||||
<li>freedreno/a3xx: add LOD_BIAS</li>
|
||||
<li>freedreno/a3xx: add flat interpolation mode</li>
|
||||
<li>freedreno/a3xx: add 32bit integer vtx formats</li>
|
||||
<li>freedreno/a3xx: fix border color order</li>
|
||||
<li>freedreno: move bind_sampler_states to per-generation</li>
|
||||
<li>freedreno: add texcoord clamp support to lowering</li>
|
||||
<li>freedreno/a3xx: add support to emulate GL_CLAMP</li>
|
||||
<li>freedreno/a3xx: re-emit shaders on variant change</li>
|
||||
<li>freedreno/lowering: fix token calculation for lowering</li>
|
||||
<li>freedreno: destroy transfer pool after blitter</li>
|
||||
<li>freedreno: max-texture-lod-bias should be 15.0f</li>
|
||||
<li>freedreno: update generated headers</li>
|
||||
<li>freedreno/a3xx: handle large shader program sizes</li>
|
||||
<li>freedreno/a3xx: emit all immediates in one shot</li>
|
||||
<li>freedreno/ir3: fix lockups with lame FRAG shaders</li>
|
||||
<li>freedreno/a3xx: handle VS only outputting BCOLOR</li>
|
||||
<li>freedreno: query fixes</li>
|
||||
<li>freedreno/a3xx: refactor vertex state emit</li>
|
||||
<li>freedreno/a3xx: refactor/optimize emit</li>
|
||||
<li>freedreno/ir3: optimize shader key comparision</li>
|
||||
<li>freedreno: inline fd_draw_emit()</li>
|
||||
<li>freedreno: fix layer_stride</li>
|
||||
<li>freedreno: update generated headers</li>
|
||||
<li>freedreno/ir3: large const support</li>
|
||||
<li>freedreno/a3xx: more layer/level fixes</li>
|
||||
<li>freedreno/ir3: comment + better fxn name</li>
|
||||
<li>freedreno/ir3: fix potential gpu lockup with kill</li>
|
||||
<li>freedreno/a3xx: disable early-z when we have kill's</li>
|
||||
<li>freedreno/ir3: add debug flag to disable cp</li>
|
||||
<li>freedreno: clear vs scissor</li>
|
||||
<li>freedreno: mark scissor state dirty when enable bit changes</li>
|
||||
<li>freedreno/a3xx: fix viewport state during clear</li>
|
||||
<li>freedreno/a3xx: fix depth/stencil restore format</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (2):</p>
|
||||
<ul>
|
||||
<li>glsl: fix uniform location count used for glsl types</li>
|
||||
<li>mesa: check that uniform exists in glUniform* functions</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -465,7 +465,7 @@ dd_configuration(enum drm_conf conf)
|
||||
#endif
|
||||
#if defined(GALLIUM_FREEDRENO)
|
||||
if ((strcmp(driver_name, "kgsl") == 0) || (strcmp(driver_name, "msm") == 0))
|
||||
return NULL;
|
||||
return configuration_query(conf);
|
||||
else
|
||||
#endif
|
||||
return NULL;
|
||||
|
@@ -91,6 +91,23 @@ util_format_is_luminance(enum pipe_format format)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
boolean
|
||||
util_format_is_alpha(enum pipe_format format)
|
||||
{
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
|
||||
if ((desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
|
||||
desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) &&
|
||||
desc->swizzle[0] == UTIL_FORMAT_SWIZZLE_0 &&
|
||||
desc->swizzle[1] == UTIL_FORMAT_SWIZZLE_0 &&
|
||||
desc->swizzle[2] == UTIL_FORMAT_SWIZZLE_0 &&
|
||||
desc->swizzle[3] == UTIL_FORMAT_SWIZZLE_X) {
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
boolean
|
||||
util_format_is_pure_integer(enum pipe_format format)
|
||||
{
|
||||
|
@@ -661,6 +661,8 @@ util_format_has_alpha(enum pipe_format format);
|
||||
boolean
|
||||
util_format_is_luminance(enum pipe_format format);
|
||||
|
||||
boolean
|
||||
util_format_is_alpha(enum pipe_format format);
|
||||
|
||||
boolean
|
||||
util_format_is_luminance_alpha(enum pipe_format format);
|
||||
|
8
src/gallium/drivers/freedreno/.dir-locals.el
Normal file
8
src/gallium/drivers/freedreno/.dir-locals.el
Normal file
@@ -0,0 +1,8 @@
|
||||
((nil
|
||||
(indent-tabs-mode . true)
|
||||
(tab-width . 4)
|
||||
(c-basic-offset . 4)
|
||||
(c-file-style . "k&r")
|
||||
(fill-column . 78)
|
||||
)
|
||||
)
|
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9859 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-07-19 17:20:53)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 58020 bytes, from 2014-07-19 17:21:17)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 36670 bytes, from 2014-07-19 17:18:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10347 bytes, from 2014-10-01 18:55:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14960 bytes, from 2014-07-27 17:22:13)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 60533 bytes, from 2014-10-15 18:32:43)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 41068 bytes, from 2014-08-01 12:22:48)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -1243,13 +1243,13 @@ static inline uint32_t A2XX_CLEAR_COLOR_ALPHA(uint32_t val)
|
||||
#define A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT 0
|
||||
static inline uint32_t A2XX_PA_SU_POINT_SIZE_HEIGHT(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK;
|
||||
return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_HEIGHT__SHIFT) & A2XX_PA_SU_POINT_SIZE_HEIGHT__MASK;
|
||||
}
|
||||
#define A2XX_PA_SU_POINT_SIZE_WIDTH__MASK 0xffff0000
|
||||
#define A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT 16
|
||||
static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK;
|
||||
return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_SIZE_WIDTH__SHIFT) & A2XX_PA_SU_POINT_SIZE_WIDTH__MASK;
|
||||
}
|
||||
|
||||
#define REG_A2XX_PA_SU_POINT_MINMAX 0x00002281
|
||||
@@ -1257,13 +1257,13 @@ static inline uint32_t A2XX_PA_SU_POINT_SIZE_WIDTH(float val)
|
||||
#define A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT 0
|
||||
static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MIN(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK;
|
||||
return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MIN__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MIN__MASK;
|
||||
}
|
||||
#define A2XX_PA_SU_POINT_MINMAX_MAX__MASK 0xffff0000
|
||||
#define A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT 16
|
||||
static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK;
|
||||
return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_POINT_MINMAX_MAX__SHIFT) & A2XX_PA_SU_POINT_MINMAX_MAX__MASK;
|
||||
}
|
||||
|
||||
#define REG_A2XX_PA_SU_LINE_CNTL 0x00002282
|
||||
@@ -1271,7 +1271,7 @@ static inline uint32_t A2XX_PA_SU_POINT_MINMAX_MAX(float val)
|
||||
#define A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT 0
|
||||
static inline uint32_t A2XX_PA_SU_LINE_CNTL_WIDTH(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK;
|
||||
return ((((uint32_t)(val * 16.0))) << A2XX_PA_SU_LINE_CNTL_WIDTH__SHIFT) & A2XX_PA_SU_LINE_CNTL_WIDTH__MASK;
|
||||
}
|
||||
|
||||
#define REG_A2XX_PA_SC_LINE_STIPPLE 0x00002283
|
||||
|
@@ -30,7 +30,6 @@
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "util/u_pack_color.h"
|
||||
|
||||
#include "freedreno_state.h"
|
||||
#include "freedreno_resource.h"
|
||||
@@ -57,8 +56,8 @@ emit_cacheflush(struct fd_ringbuffer *ring)
|
||||
static void
|
||||
emit_vertexbufs(struct fd_context *ctx)
|
||||
{
|
||||
struct fd_vertex_stateobj *vtx = ctx->vtx;
|
||||
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
|
||||
struct fd_vertex_stateobj *vtx = ctx->vtx.vtx;
|
||||
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf;
|
||||
struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS];
|
||||
unsigned i;
|
||||
|
||||
@@ -118,14 +117,6 @@ fd2_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
|
||||
}
|
||||
|
||||
|
||||
static uint32_t
|
||||
pack_rgba(enum pipe_format format, const float *rgba)
|
||||
{
|
||||
union util_color uc;
|
||||
util_pack_color(rgba, format, &uc);
|
||||
return uc.ui[0];
|
||||
}
|
||||
|
||||
static void
|
||||
fd2_clear(struct fd_context *ctx, unsigned buffers,
|
||||
const union pipe_color_union *color, double depth, unsigned stencil)
|
||||
|
@@ -317,10 +317,10 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
|
||||
OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
|
||||
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
|
||||
emit_mem2gmem_surf(ctx, bin_w * bin_h, pfb->zsbuf);
|
||||
|
||||
if (ctx->restore & FD_BUFFER_COLOR)
|
||||
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
|
||||
emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0]);
|
||||
|
||||
/* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
|
||||
|
@@ -174,7 +174,7 @@ patch_vtx_fetches(struct fd_context *ctx, struct fd2_shader_stateobj *so,
|
||||
struct ir2_instruction *instr = so->vfetch_instrs[i];
|
||||
struct pipe_vertex_element *elem = &vtx->pipe[i];
|
||||
struct pipe_vertex_buffer *vb =
|
||||
&ctx->vertexbuf.vb[elem->vertex_buffer_index];
|
||||
&ctx->vtx.vertexbuf.vb[elem->vertex_buffer_index];
|
||||
enum pipe_format format = elem->src_format;
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
@@ -258,7 +258,7 @@ fd2_program_validate(struct fd_context *ctx)
|
||||
|
||||
/* if necessary, fix up vertex fetch instructions: */
|
||||
if (ctx->dirty & (FD_DIRTY_VTXSTATE | FD_DIRTY_PROG))
|
||||
patch_vtx_fetches(ctx, prog->vp, ctx->vtx);
|
||||
patch_vtx_fetches(ctx, prog->vp, ctx->vtx.vtx);
|
||||
|
||||
/* if necessary, fix up texture fetch instructions: */
|
||||
if (ctx->dirty & (FD_DIRTY_TEXSTATE | FD_DIRTY_PROG)) {
|
||||
|
@@ -101,6 +101,25 @@ fd2_sampler_state_create(struct pipe_context *pctx,
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
fd2_sampler_states_bind(struct pipe_context *pctx,
|
||||
unsigned shader, unsigned start,
|
||||
unsigned nr, void **hwcso)
|
||||
{
|
||||
if (shader == PIPE_SHADER_FRAGMENT) {
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
|
||||
/* on a2xx, since there is a flat address space for textures/samplers,
|
||||
* a change in # of fragment textures/samplers will trigger patching and
|
||||
* re-emitting the vertex shader:
|
||||
*/
|
||||
if (nr != ctx->fragtex.num_samplers)
|
||||
ctx->dirty |= FD_DIRTY_TEXSTATE;
|
||||
}
|
||||
|
||||
fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
|
||||
}
|
||||
|
||||
static struct pipe_sampler_view *
|
||||
fd2_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
const struct pipe_sampler_view *cso)
|
||||
@@ -154,5 +173,6 @@ void
|
||||
fd2_texture_init(struct pipe_context *pctx)
|
||||
{
|
||||
pctx->create_sampler_state = fd2_sampler_state_create;
|
||||
pctx->bind_sampler_states = fd2_sampler_states_bind;
|
||||
pctx->create_sampler_view = fd2_sampler_view_create;
|
||||
}
|
||||
|
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9859 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-07-19 17:20:53)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 58020 bytes, from 2014-07-19 17:21:17)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 36670 bytes, from 2014-07-19 17:18:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10347 bytes, from 2014-10-01 18:55:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14960 bytes, from 2014-07-27 17:22:13)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 60533 bytes, from 2014-10-15 18:32:43)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 41068 bytes, from 2014-08-01 12:22:48)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -86,6 +86,14 @@ enum a3xx_vtx_fmt {
|
||||
VFMT_NORM_USHORT_16_16 = 29,
|
||||
VFMT_NORM_USHORT_16_16_16 = 30,
|
||||
VFMT_NORM_USHORT_16_16_16_16 = 31,
|
||||
VFMT_UINT_32 = 32,
|
||||
VFMT_UINT_32_32 = 33,
|
||||
VFMT_UINT_32_32_32 = 34,
|
||||
VFMT_UINT_32_32_32_32 = 35,
|
||||
VFMT_INT_32 = 36,
|
||||
VFMT_INT_32_32 = 37,
|
||||
VFMT_INT_32_32_32 = 38,
|
||||
VFMT_INT_32_32_32_32 = 39,
|
||||
VFMT_UBYTE_8 = 40,
|
||||
VFMT_UBYTE_8_8 = 41,
|
||||
VFMT_UBYTE_8_8_8 = 42,
|
||||
@@ -112,6 +120,7 @@ enum a3xx_tex_fmt {
|
||||
TFMT_NORM_USHORT_565 = 4,
|
||||
TFMT_NORM_USHORT_5551 = 6,
|
||||
TFMT_NORM_USHORT_4444 = 7,
|
||||
TFMT_NORM_USHORT_Z16 = 9,
|
||||
TFMT_NORM_UINT_X8Z24 = 10,
|
||||
TFMT_NORM_UINT_NV12_UV_TILED = 17,
|
||||
TFMT_NORM_UINT_NV12_Y_TILED = 19,
|
||||
@@ -149,6 +158,7 @@ enum a3xx_color_fmt {
|
||||
RB_R8G8B8A8_UNORM = 8,
|
||||
RB_Z16_UNORM = 12,
|
||||
RB_A8_UNORM = 20,
|
||||
RB_R8_UNORM = 21,
|
||||
RB_R16G16B16A16_FLOAT = 27,
|
||||
RB_R32G32B32A32_FLOAT = 51,
|
||||
};
|
||||
@@ -194,6 +204,11 @@ enum a3xx_rb_blend_opcode {
|
||||
BLEND_MAX_DST_SRC = 4,
|
||||
};
|
||||
|
||||
enum a3xx_intp_mode {
|
||||
SMOOTH = 0,
|
||||
FLAT = 1,
|
||||
};
|
||||
|
||||
enum a3xx_tex_filter {
|
||||
A3XX_TEX_NEAREST = 0,
|
||||
A3XX_TEX_LINEAR = 1,
|
||||
@@ -632,13 +647,13 @@ static inline uint32_t A3XX_GRAS_CL_VPORT_ZSCALE(float val)
|
||||
#define A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT 0
|
||||
static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MIN(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
|
||||
return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MIN__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MIN__MASK;
|
||||
}
|
||||
#define A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK 0xffff0000
|
||||
#define A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT 16
|
||||
static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
|
||||
return ((((uint32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_MINMAX_MAX__SHIFT) & A3XX_GRAS_SU_POINT_MINMAX_MAX__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_GRAS_SU_POINT_SIZE 0x00002069
|
||||
@@ -646,7 +661,7 @@ static inline uint32_t A3XX_GRAS_SU_POINT_MINMAX_MAX(float val)
|
||||
#define A3XX_GRAS_SU_POINT_SIZE__SHIFT 0
|
||||
static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 8.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK;
|
||||
return ((((int32_t)(val * 16.0))) << A3XX_GRAS_SU_POINT_SIZE__SHIFT) & A3XX_GRAS_SU_POINT_SIZE__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x0000206c
|
||||
@@ -654,7 +669,7 @@ static inline uint32_t A3XX_GRAS_SU_POINT_SIZE(float val)
|
||||
#define A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT 0
|
||||
static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 28.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
|
||||
return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x0000206d
|
||||
@@ -662,7 +677,7 @@ static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_SCALE_VAL(float val)
|
||||
#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT 0
|
||||
static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 28.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
|
||||
return ((((int32_t)(val * 16384.0))) << A3XX_GRAS_SU_POLY_OFFSET_OFFSET__SHIFT) & A3XX_GRAS_SU_POLY_OFFSET_OFFSET__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_GRAS_SU_MODE_CONTROL 0x00002070
|
||||
@@ -673,7 +688,7 @@ static inline uint32_t A3XX_GRAS_SU_POLY_OFFSET_OFFSET(float val)
|
||||
#define A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT 3
|
||||
static inline uint32_t A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 4.0))) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
|
||||
return ((((int32_t)(val * 4.0))) << A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__SHIFT) & A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH__MASK;
|
||||
}
|
||||
#define A3XX_GRAS_SU_MODE_CONTROL_POLY_OFFSET 0x00000800
|
||||
|
||||
@@ -1265,6 +1280,7 @@ static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_
|
||||
{
|
||||
return ((val) << A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__SHIFT) & A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE__MASK;
|
||||
}
|
||||
#define A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART 0x00100000
|
||||
#define A3XX_PC_PRIM_VTX_CNTL_PROVOKING_VTX_LAST 0x02000000
|
||||
#define A3XX_PC_PRIM_VTX_CNTL_PSIZE 0x04000000
|
||||
|
||||
@@ -1281,7 +1297,12 @@ static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CONSTSWITCHMODE 0x08000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27
|
||||
static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK;
|
||||
}
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_LAZYUPDATEDISABLE 0x10000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE 0x20000000
|
||||
#define A3XX_HLSQ_CONTROL_0_REG_TPFULLUPDATE 0x40000000
|
||||
@@ -1537,6 +1558,7 @@ static inline uint32_t A3XX_VFD_DECODE_INSTR_REGID(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_VFD_DECODE_INSTR_REGID__SHIFT) & A3XX_VFD_DECODE_INSTR_REGID__MASK;
|
||||
}
|
||||
#define A3XX_VFD_DECODE_INSTR_INT 0x00100000
|
||||
#define A3XX_VFD_DECODE_INSTR_SWAP__MASK 0x00c00000
|
||||
#define A3XX_VFD_DECODE_INSTR_SWAP__SHIFT 22
|
||||
static inline uint32_t A3XX_VFD_DECODE_INSTR_SWAP(enum a3xx_color_swap val)
|
||||
@@ -1604,6 +1626,102 @@ static inline uint32_t A3XX_VPC_PACK_NUMNONPOSVSVAR(uint32_t val)
|
||||
static inline uint32_t REG_A3XX_VPC_VARYING_INTERP(uint32_t i0) { return 0x00002282 + 0x1*i0; }
|
||||
|
||||
static inline uint32_t REG_A3XX_VPC_VARYING_INTERP_MODE(uint32_t i0) { return 0x00002282 + 0x1*i0; }
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C0__MASK 0x00000003
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT 0
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C0(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C0__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C0__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C1__MASK 0x0000000c
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT 2
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C1(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C1__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C1__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C2__MASK 0x00000030
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT 4
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C2(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C2__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C2__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C3__MASK 0x000000c0
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT 6
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C3(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C3__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C3__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C4__MASK 0x00000300
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT 8
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C4(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C4__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C4__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C5__MASK 0x00000c00
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT 10
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C5(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C5__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C5__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C6__MASK 0x00003000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT 12
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C6(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C6__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C6__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C7__MASK 0x0000c000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT 14
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C7(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C7__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C7__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C8__MASK 0x00030000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT 16
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C8(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C8__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C8__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C9__MASK 0x000c0000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT 18
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_C9(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_C9__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_C9__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CA__MASK 0x00300000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT 20
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CA(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CA__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CA__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CB__MASK 0x00c00000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT 22
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CB(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CB__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CB__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CC__MASK 0x03000000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT 24
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CC(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CC__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CC__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CD__MASK 0x0c000000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT 26
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CD(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CD__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CD__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CE__MASK 0x30000000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT 28
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CE(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CE__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CE__MASK;
|
||||
}
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CF__MASK 0xc0000000
|
||||
#define A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT 30
|
||||
static inline uint32_t A3XX_VPC_VARYING_INTERP_MODE_CF(enum a3xx_intp_mode val)
|
||||
{
|
||||
return ((val) << A3XX_VPC_VARYING_INTERP_MODE_CF__SHIFT) & A3XX_VPC_VARYING_INTERP_MODE_CF__MASK;
|
||||
}
|
||||
|
||||
static inline uint32_t REG_A3XX_VPC_VARYING_PS_REPL(uint32_t i0) { return 0x00002286 + 0x1*i0; }
|
||||
|
||||
@@ -1696,7 +1814,7 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT__MASK;
|
||||
}
|
||||
#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x3f000000
|
||||
#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__MASK 0x7f000000
|
||||
#define A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING__SHIFT 24
|
||||
static inline uint32_t A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
|
||||
{
|
||||
@@ -2347,17 +2465,23 @@ static inline uint32_t A3XX_TEX_SAMP_0_COMPARE_FUNC(enum adreno_compare_func val
|
||||
#define A3XX_TEX_SAMP_0_UNNORM_COORDS 0x80000000
|
||||
|
||||
#define REG_A3XX_TEX_SAMP_1 0x00000001
|
||||
#define A3XX_TEX_SAMP_1_LOD_BIAS__MASK 0x000007ff
|
||||
#define A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT 0
|
||||
static inline uint32_t A3XX_TEX_SAMP_1_LOD_BIAS(float val)
|
||||
{
|
||||
return ((((int32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_LOD_BIAS__SHIFT) & A3XX_TEX_SAMP_1_LOD_BIAS__MASK;
|
||||
}
|
||||
#define A3XX_TEX_SAMP_1_MAX_LOD__MASK 0x003ff000
|
||||
#define A3XX_TEX_SAMP_1_MAX_LOD__SHIFT 12
|
||||
static inline uint32_t A3XX_TEX_SAMP_1_MAX_LOD(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 12.0))) << A3XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A3XX_TEX_SAMP_1_MAX_LOD__MASK;
|
||||
return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MAX_LOD__SHIFT) & A3XX_TEX_SAMP_1_MAX_LOD__MASK;
|
||||
}
|
||||
#define A3XX_TEX_SAMP_1_MIN_LOD__MASK 0xffc00000
|
||||
#define A3XX_TEX_SAMP_1_MIN_LOD__SHIFT 22
|
||||
static inline uint32_t A3XX_TEX_SAMP_1_MIN_LOD(float val)
|
||||
{
|
||||
return ((((uint32_t)(val * 12.0))) << A3XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A3XX_TEX_SAMP_1_MIN_LOD__MASK;
|
||||
return ((((uint32_t)(val * 64.0))) << A3XX_TEX_SAMP_1_MIN_LOD__SHIFT) & A3XX_TEX_SAMP_1_MIN_LOD__MASK;
|
||||
}
|
||||
|
||||
#define REG_A3XX_TEX_CONST_0 0x00000000
|
||||
@@ -2448,6 +2572,24 @@ static inline uint32_t A3XX_TEX_CONST_2_SWAP(enum a3xx_color_swap val)
|
||||
}
|
||||
|
||||
#define REG_A3XX_TEX_CONST_3 0x00000003
|
||||
#define A3XX_TEX_CONST_3_LAYERSZ1__MASK 0x0000000f
|
||||
#define A3XX_TEX_CONST_3_LAYERSZ1__SHIFT 0
|
||||
static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ1(uint32_t val)
|
||||
{
|
||||
return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ1__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ1__MASK;
|
||||
}
|
||||
#define A3XX_TEX_CONST_3_DEPTH__MASK 0x0ffe0000
|
||||
#define A3XX_TEX_CONST_3_DEPTH__SHIFT 17
|
||||
static inline uint32_t A3XX_TEX_CONST_3_DEPTH(uint32_t val)
|
||||
{
|
||||
return ((val) << A3XX_TEX_CONST_3_DEPTH__SHIFT) & A3XX_TEX_CONST_3_DEPTH__MASK;
|
||||
}
|
||||
#define A3XX_TEX_CONST_3_LAYERSZ2__MASK 0xf0000000
|
||||
#define A3XX_TEX_CONST_3_LAYERSZ2__SHIFT 28
|
||||
static inline uint32_t A3XX_TEX_CONST_3_LAYERSZ2(uint32_t val)
|
||||
{
|
||||
return ((val >> 12) << A3XX_TEX_CONST_3_LAYERSZ2__SHIFT) & A3XX_TEX_CONST_3_LAYERSZ2__MASK;
|
||||
}
|
||||
|
||||
|
||||
#endif /* A3XX_XML */
|
||||
|
@@ -49,6 +49,9 @@ fd3_context_destroy(struct pipe_context *pctx)
|
||||
fd_bo_del(fd3_ctx->fs_pvt_mem);
|
||||
fd_bo_del(fd3_ctx->vsc_size_mem);
|
||||
|
||||
pctx->delete_vertex_elements_state(pctx, fd3_ctx->solid_vbuf_state.vtx);
|
||||
pctx->delete_vertex_elements_state(pctx, fd3_ctx->blit_vbuf_state.vtx);
|
||||
|
||||
pipe_resource_reference(&fd3_ctx->solid_vbuf, NULL);
|
||||
pipe_resource_reference(&fd3_ctx->blit_texcoord_vbuf, NULL);
|
||||
|
||||
@@ -135,7 +138,38 @@ fd3_context_create(struct pipe_screen *pscreen, void *priv)
|
||||
fd3_ctx->solid_vbuf = create_solid_vertexbuf(pctx);
|
||||
fd3_ctx->blit_texcoord_vbuf = create_blit_texcoord_vertexbuf(pctx);
|
||||
|
||||
/* setup solid_vbuf_state: */
|
||||
fd3_ctx->solid_vbuf_state.vtx = pctx->create_vertex_elements_state(
|
||||
pctx, 1, (struct pipe_vertex_element[]){{
|
||||
.vertex_buffer_index = 0,
|
||||
.src_offset = 0,
|
||||
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
|
||||
}});
|
||||
fd3_ctx->solid_vbuf_state.vertexbuf.count = 1;
|
||||
fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].stride = 12;
|
||||
fd3_ctx->solid_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->solid_vbuf;
|
||||
|
||||
/* setup blit_vbuf_state: */
|
||||
fd3_ctx->blit_vbuf_state.vtx = pctx->create_vertex_elements_state(
|
||||
pctx, 2, (struct pipe_vertex_element[]){{
|
||||
.vertex_buffer_index = 0,
|
||||
.src_offset = 0,
|
||||
.src_format = PIPE_FORMAT_R32G32_FLOAT,
|
||||
}, {
|
||||
.vertex_buffer_index = 1,
|
||||
.src_offset = 0,
|
||||
.src_format = PIPE_FORMAT_R32G32B32_FLOAT,
|
||||
}});
|
||||
fd3_ctx->blit_vbuf_state.vertexbuf.count = 2;
|
||||
fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].stride = 8;
|
||||
fd3_ctx->blit_vbuf_state.vertexbuf.vb[0].buffer = fd3_ctx->blit_texcoord_vbuf;
|
||||
fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].stride = 12;
|
||||
fd3_ctx->blit_vbuf_state.vertexbuf.vb[1].buffer = fd3_ctx->solid_vbuf;
|
||||
|
||||
fd3_query_context_init(pctx);
|
||||
|
||||
fd3_ctx->border_color_uploader = u_upload_create(pctx, 4096,
|
||||
2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, 0);
|
||||
|
||||
return pctx;
|
||||
}
|
||||
|
@@ -29,10 +29,15 @@
|
||||
#ifndef FD3_CONTEXT_H_
|
||||
#define FD3_CONTEXT_H_
|
||||
|
||||
#include "util/u_upload_mgr.h"
|
||||
|
||||
#include "freedreno_drmif.h"
|
||||
|
||||
#include "freedreno_context.h"
|
||||
|
||||
#include "ir3_shader.h"
|
||||
|
||||
|
||||
struct fd3_context {
|
||||
struct fd_context base;
|
||||
|
||||
@@ -56,6 +61,55 @@ struct fd3_context {
|
||||
/* vertex buf used for mem->gmem tex coords:
|
||||
*/
|
||||
struct pipe_resource *blit_texcoord_vbuf;
|
||||
|
||||
/* vertex state for solid_vbuf:
|
||||
* - solid_vbuf / 12 / R32G32B32_FLOAT
|
||||
*/
|
||||
struct fd_vertex_state solid_vbuf_state;
|
||||
|
||||
/* vertex state for blit_prog:
|
||||
* - blit_texcoord_vbuf / 8 / R32G32_FLOAT
|
||||
* - solid_vbuf / 12 / R32G32B32_FLOAT
|
||||
*/
|
||||
struct fd_vertex_state blit_vbuf_state;
|
||||
|
||||
|
||||
/*
|
||||
* Border color layout *appears* to be as arrays of 0x40 byte
|
||||
* elements, with frag shader elements starting at (16 x 0x40).
|
||||
* But at some point I should probably experiment more with
|
||||
* samplers in vertex shaders to be sure. Unclear about why
|
||||
* there is this offset when there are separate VS and FS base
|
||||
* addr regs.
|
||||
*
|
||||
* The first 8 bytes of each entry are the requested border
|
||||
* color in fp16. Unclear about the rest.. could be used for
|
||||
* other formats, or could simply be for aligning the pitch
|
||||
* to 32 pixels.
|
||||
*/
|
||||
#define BORDERCOLOR_SIZE 0x40
|
||||
|
||||
struct u_upload_mgr *border_color_uploader;
|
||||
struct pipe_resource *border_color_buf;
|
||||
|
||||
/* if *any* of bits are set in {v,f}saturate_{s,t,r} */
|
||||
bool vsaturate, fsaturate;
|
||||
|
||||
/* bitmask of sampler which needs coords clamped for vertex
|
||||
* shader:
|
||||
*/
|
||||
unsigned vsaturate_s, vsaturate_t, vsaturate_r;
|
||||
|
||||
/* bitmask of sampler which needs coords clamped for frag
|
||||
* shader:
|
||||
*/
|
||||
unsigned fsaturate_s, fsaturate_t, fsaturate_r;
|
||||
|
||||
/* some state changes require a different shader variant. Keep
|
||||
* track of this so we know when we need to re-emit shader state
|
||||
* due to variant change. See fixup_shader_state()
|
||||
*/
|
||||
struct ir3_shader_key last_key;
|
||||
};
|
||||
|
||||
static INLINE struct fd3_context *
|
||||
|
@@ -30,6 +30,7 @@
|
||||
#include "util/u_string.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "util/u_format.h"
|
||||
|
||||
#include "freedreno_state.h"
|
||||
#include "freedreno_resource.h"
|
||||
@@ -43,39 +44,15 @@
|
||||
|
||||
|
||||
static void
|
||||
emit_vertexbufs(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_key key)
|
||||
draw_impl(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct fd3_emit *emit)
|
||||
{
|
||||
struct fd_vertex_stateobj *vtx = ctx->vtx;
|
||||
struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vertexbuf;
|
||||
struct fd3_vertex_buf bufs[PIPE_MAX_ATTRIBS];
|
||||
unsigned i;
|
||||
const struct pipe_draw_info *info = emit->info;
|
||||
|
||||
if (!vtx->num_elements)
|
||||
return;
|
||||
fd3_emit_state(ctx, ring, emit);
|
||||
|
||||
for (i = 0; i < vtx->num_elements; i++) {
|
||||
struct pipe_vertex_element *elem = &vtx->pipe[i];
|
||||
struct pipe_vertex_buffer *vb =
|
||||
&vertexbuf->vb[elem->vertex_buffer_index];
|
||||
bufs[i].offset = vb->buffer_offset + elem->src_offset;
|
||||
bufs[i].stride = vb->stride;
|
||||
bufs[i].prsc = vb->buffer;
|
||||
bufs[i].format = elem->src_format;
|
||||
}
|
||||
|
||||
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->prog.vp, key),
|
||||
bufs, vtx->num_elements);
|
||||
}
|
||||
|
||||
static void
|
||||
draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
|
||||
struct fd_ringbuffer *ring, unsigned dirty, struct ir3_shader_key key)
|
||||
{
|
||||
fd3_emit_state(ctx, ring, &ctx->prog, dirty, key);
|
||||
|
||||
if (dirty & FD_DIRTY_VTXBUF)
|
||||
emit_vertexbufs(ctx, ring, key);
|
||||
if (emit->dirty & (FD_DIRTY_VTXBUF | FD_DIRTY_VTXSTATE))
|
||||
fd3_emit_vertex_bufs(ring, emit);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1);
|
||||
OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */
|
||||
@@ -91,27 +68,103 @@ draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info,
|
||||
info->restart_index : 0xffffffff);
|
||||
|
||||
fd_draw_emit(ctx, ring,
|
||||
key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
|
||||
emit->key.binning_pass ? IGNORE_VISIBILITY : USE_VISIBILITY,
|
||||
info);
|
||||
}
|
||||
|
||||
/* fixup dirty shader state in case some "unrelated" (from the state-
|
||||
* tracker's perspective) state change causes us to switch to a
|
||||
* different variant.
|
||||
*/
|
||||
static void
|
||||
fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
|
||||
{
|
||||
struct fd3_context *fd3_ctx = fd3_context(ctx);
|
||||
struct ir3_shader_key *last_key = &fd3_ctx->last_key;
|
||||
|
||||
if (!ir3_shader_key_equal(last_key, key)) {
|
||||
ctx->dirty |= FD_DIRTY_PROG;
|
||||
|
||||
if (last_key->has_per_samp || key->has_per_samp) {
|
||||
if ((last_key->vsaturate_s != key->vsaturate_s) ||
|
||||
(last_key->vsaturate_t != key->vsaturate_t) ||
|
||||
(last_key->vsaturate_r != key->vsaturate_r))
|
||||
ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
|
||||
|
||||
if ((last_key->fsaturate_s != key->fsaturate_s) ||
|
||||
(last_key->fsaturate_t != key->fsaturate_t) ||
|
||||
(last_key->fsaturate_r != key->fsaturate_r))
|
||||
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
|
||||
}
|
||||
|
||||
if (last_key->color_two_side != key->color_two_side)
|
||||
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
|
||||
|
||||
if (last_key->half_precision != key->half_precision)
|
||||
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
|
||||
|
||||
if (last_key->alpha != key->alpha)
|
||||
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
|
||||
|
||||
fd3_ctx->last_key = *key;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
|
||||
{
|
||||
unsigned dirty = ctx->dirty;
|
||||
struct ir3_shader_key key = {
|
||||
struct fd3_context *fd3_ctx = fd3_context(ctx);
|
||||
struct fd3_emit emit = {
|
||||
.vtx = &ctx->vtx,
|
||||
.prog = &ctx->prog,
|
||||
.info = info,
|
||||
.key = {
|
||||
/* do binning pass first: */
|
||||
.binning_pass = true,
|
||||
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
|
||||
.alpha = util_format_is_alpha(pipe_surface_format(ctx->framebuffer.cbufs[0])),
|
||||
// TODO set .half_precision based on render target format,
|
||||
// ie. float16 and smaller use half, float32 use full..
|
||||
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
|
||||
.has_per_samp = fd3_ctx->fsaturate || fd3_ctx->vsaturate,
|
||||
.vsaturate_s = fd3_ctx->vsaturate_s,
|
||||
.vsaturate_t = fd3_ctx->vsaturate_t,
|
||||
.vsaturate_r = fd3_ctx->vsaturate_r,
|
||||
.fsaturate_s = fd3_ctx->fsaturate_s,
|
||||
.fsaturate_t = fd3_ctx->fsaturate_t,
|
||||
.fsaturate_r = fd3_ctx->fsaturate_r,
|
||||
},
|
||||
.rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
|
||||
};
|
||||
draw_impl(ctx, info, ctx->binning_ring,
|
||||
dirty & ~(FD_DIRTY_BLEND), key);
|
||||
unsigned dirty;
|
||||
|
||||
fixup_shader_state(ctx, &emit.key);
|
||||
|
||||
dirty = ctx->dirty;
|
||||
emit.dirty = dirty & ~(FD_DIRTY_BLEND);
|
||||
draw_impl(ctx, ctx->binning_ring, &emit);
|
||||
|
||||
/* and now regular (non-binning) pass: */
|
||||
key.binning_pass = false;
|
||||
draw_impl(ctx, info, ctx->ring, dirty, key);
|
||||
emit.key.binning_pass = false;
|
||||
emit.dirty = dirty;
|
||||
emit.vp = NULL; /* we changed key so need to refetch vp */
|
||||
draw_impl(ctx, ctx->ring, &emit);
|
||||
}
|
||||
|
||||
/* clear operations ignore viewport state, so we need to reset it
|
||||
* based on framebuffer state:
|
||||
*/
|
||||
static void
|
||||
reset_viewport(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb)
|
||||
{
|
||||
float half_width = pfb->width * 0.5f;
|
||||
float half_height = pfb->height * 0.5f;
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 4);
|
||||
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XOFFSET(half_width - 0.5));
|
||||
OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(half_width));
|
||||
OUT_RING(ring, A3XX_GRAS_CL_VPORT_YOFFSET(half_height - 0.5));
|
||||
OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(-half_height));
|
||||
}
|
||||
|
||||
/* binning pass cmds for a clear:
|
||||
@@ -127,19 +180,19 @@ fd3_clear_binning(struct fd_context *ctx, unsigned dirty)
|
||||
{
|
||||
struct fd3_context *fd3_ctx = fd3_context(ctx);
|
||||
struct fd_ringbuffer *ring = ctx->binning_ring;
|
||||
struct ir3_shader_key key = {
|
||||
struct fd3_emit emit = {
|
||||
.vtx = &fd3_ctx->solid_vbuf_state,
|
||||
.prog = &ctx->solid_prog,
|
||||
.key = {
|
||||
.binning_pass = true,
|
||||
.half_precision = true,
|
||||
},
|
||||
.dirty = dirty,
|
||||
};
|
||||
|
||||
fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);
|
||||
|
||||
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
|
||||
(struct fd3_vertex_buf[]) {{
|
||||
.prsc = fd3_ctx->solid_vbuf,
|
||||
.stride = 12,
|
||||
.format = PIPE_FORMAT_R32G32B32_FLOAT,
|
||||
}}, 1);
|
||||
fd3_emit_state(ctx, ring, &emit);
|
||||
fd3_emit_vertex_bufs(ring, &emit);
|
||||
reset_viewport(ring, &ctx->framebuffer);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
|
||||
OUT_RING(ring, A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(0) |
|
||||
@@ -168,17 +221,23 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
|
||||
struct fd_ringbuffer *ring = ctx->ring;
|
||||
unsigned dirty = ctx->dirty;
|
||||
unsigned ce, i;
|
||||
struct ir3_shader_key key = {
|
||||
struct fd3_emit emit = {
|
||||
.vtx = &fd3_ctx->solid_vbuf_state,
|
||||
.prog = &ctx->solid_prog,
|
||||
.key = {
|
||||
.half_precision = true,
|
||||
},
|
||||
};
|
||||
|
||||
dirty &= FD_DIRTY_VIEWPORT | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
|
||||
dirty &= FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR;
|
||||
dirty |= FD_DIRTY_PROG;
|
||||
emit.dirty = dirty;
|
||||
|
||||
fd3_clear_binning(ctx, dirty);
|
||||
|
||||
/* emit generic state now: */
|
||||
fd3_emit_state(ctx, ring, &ctx->solid_prog, dirty, key);
|
||||
fd3_emit_state(ctx, ring, &emit);
|
||||
reset_viewport(ring, &ctx->framebuffer);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_RB_BLEND_ALPHA, 1);
|
||||
OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(0xff) |
|
||||
@@ -269,12 +328,7 @@ fd3_clear(struct fd_context *ctx, unsigned buffers,
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1);
|
||||
OUT_RING(ring, A3XX_GRAS_SU_MODE_CONTROL_LINEHALFWIDTH(0));
|
||||
|
||||
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
|
||||
(struct fd3_vertex_buf[]) {{
|
||||
.prsc = fd3_ctx->solid_vbuf,
|
||||
.stride = 12,
|
||||
.format = PIPE_FORMAT_R32G32B32_FLOAT,
|
||||
}}, 1);
|
||||
fd3_emit_vertex_bufs(ring, &emit);
|
||||
|
||||
fd3_emit_constant(ring, SB_FRAG_SHADER, 0, 0, 4, color->ui, NULL);
|
||||
|
||||
|
@@ -92,14 +92,13 @@ emit_constants(struct fd_ringbuffer *ring,
|
||||
uint32_t enabled_mask = constbuf->enabled_mask;
|
||||
uint32_t first_immediate;
|
||||
uint32_t base = 0;
|
||||
unsigned i;
|
||||
|
||||
// XXX TODO only emit dirty consts.. but we need to keep track if
|
||||
// they are clobbered by a clear, gmem2mem, or mem2gmem..
|
||||
constbuf->dirty_mask = enabled_mask;
|
||||
|
||||
/* in particular, with binning shader and a unneeded consts no
|
||||
* longer referenced, we could end up w/ constlen that is smaller
|
||||
/* in particular, with binning shader we may end up with unused
|
||||
* consts, ie. we could end up w/ constlen that is smaller
|
||||
* than first_immediate. In that case truncate the user consts
|
||||
* early to avoid HLSQ lockup caused by writing too many consts
|
||||
*/
|
||||
@@ -137,12 +136,21 @@ emit_constants(struct fd_ringbuffer *ring,
|
||||
|
||||
/* emit shader immediates: */
|
||||
if (shader) {
|
||||
for (i = 0; i < shader->immediates_count; i++) {
|
||||
base = 4 * (shader->first_immediate + i);
|
||||
if (base >= (4 * shader->constlen))
|
||||
break;
|
||||
int size = shader->immediates_count;
|
||||
base = shader->first_immediate;
|
||||
|
||||
/* truncate size to avoid writing constants that shader
|
||||
* does not use:
|
||||
*/
|
||||
size = MIN2(size + base, shader->constlen) - base;
|
||||
|
||||
/* convert out of vec4: */
|
||||
base *= 4;
|
||||
size *= 4;
|
||||
|
||||
if (size > 0) {
|
||||
fd3_emit_constant(ring, sb, base,
|
||||
0, 4, shader->immediates[i].val, NULL);
|
||||
0, size, shader->immediates[0].val, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -152,9 +160,8 @@ emit_constants(struct fd_ringbuffer *ring,
|
||||
#define BASETABLE_SZ A3XX_MAX_MIP_LEVELS
|
||||
|
||||
static void
|
||||
emit_textures(struct fd_ringbuffer *ring,
|
||||
enum adreno_state_block sb,
|
||||
struct fd_texture_stateobj *tex)
|
||||
emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
enum adreno_state_block sb, struct fd_texture_stateobj *tex)
|
||||
{
|
||||
static const unsigned tex_off[] = {
|
||||
[SB_VERT_TEX] = VERT_TEX_OFF,
|
||||
@@ -164,7 +171,18 @@ emit_textures(struct fd_ringbuffer *ring,
|
||||
[SB_VERT_TEX] = SB_VERT_MIPADDR,
|
||||
[SB_FRAG_TEX] = SB_FRAG_MIPADDR,
|
||||
};
|
||||
unsigned i, j;
|
||||
static const uint32_t bcolor_reg[] = {
|
||||
[SB_VERT_TEX] = REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR,
|
||||
[SB_FRAG_TEX] = REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR,
|
||||
};
|
||||
struct fd3_context *fd3_ctx = fd3_context(ctx);
|
||||
unsigned i, j, off;
|
||||
void *ptr;
|
||||
|
||||
u_upload_alloc(fd3_ctx->border_color_uploader,
|
||||
0, 2 * PIPE_MAX_SAMPLERS * BORDERCOLOR_SIZE, &off,
|
||||
&fd3_ctx->border_color_buf,
|
||||
&ptr);
|
||||
|
||||
if (tex->num_samplers > 0) {
|
||||
/* output sampler state: */
|
||||
@@ -180,6 +198,15 @@ emit_textures(struct fd_ringbuffer *ring,
|
||||
const struct fd3_sampler_stateobj *sampler = tex->samplers[i] ?
|
||||
fd3_sampler_stateobj(tex->samplers[i]) :
|
||||
&dummy_sampler;
|
||||
uint16_t *bcolor = (uint16_t *)((uint8_t *)ptr +
|
||||
(BORDERCOLOR_SIZE * tex_off[sb]) +
|
||||
(BORDERCOLOR_SIZE * i));
|
||||
|
||||
bcolor[0] = util_float_to_half(sampler->base.border_color.f[2]);
|
||||
bcolor[1] = util_float_to_half(sampler->base.border_color.f[1]);
|
||||
bcolor[2] = util_float_to_half(sampler->base.border_color.f[0]);
|
||||
bcolor[3] = util_float_to_half(sampler->base.border_color.f[3]);
|
||||
|
||||
OUT_RING(ring, sampler->texsamp0);
|
||||
OUT_RING(ring, sampler->texsamp1);
|
||||
}
|
||||
@@ -237,18 +264,31 @@ emit_textures(struct fd_ringbuffer *ring,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OUT_PKT0(ring, bcolor_reg[sb], 1);
|
||||
OUT_RELOC(ring, fd_resource(fd3_ctx->border_color_buf)->bo, off, 0, 0);
|
||||
|
||||
u_upload_unmap(fd3_ctx->border_color_uploader);
|
||||
}
|
||||
|
||||
/* emit texture state for mem->gmem restore operation.. eventually it would
|
||||
* be good to get rid of this and use normal CSO/etc state for more of these
|
||||
* special cases, but for now the compiler is not sufficient..
|
||||
*
|
||||
* Also, for using normal state, not quite sure how to handle the special
|
||||
* case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil.
|
||||
*/
|
||||
void
|
||||
fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf)
|
||||
{
|
||||
struct fd_resource *rsc = fd_resource(psurf->texture);
|
||||
unsigned lvl = psurf->u.tex.level;
|
||||
struct fd_resource_slice *slice = &rsc->slices[lvl];
|
||||
uint32_t layer_offset = slice->size0 * psurf->u.tex.first_layer;
|
||||
enum pipe_format format = fd3_gmem_restore_format(psurf->format);
|
||||
|
||||
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
|
||||
|
||||
/* output sampler state: */
|
||||
OUT_PKT3(ring, CP_LOAD_STATE, 4);
|
||||
OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) |
|
||||
@@ -272,14 +312,14 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
|
||||
CP_LOAD_STATE_0_NUM_UNIT(1));
|
||||
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
|
||||
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
|
||||
OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(psurf->format)) |
|
||||
OUT_RING(ring, A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) |
|
||||
A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) |
|
||||
fd3_tex_swiz(format, PIPE_SWIZZLE_RED, PIPE_SWIZZLE_GREEN,
|
||||
PIPE_SWIZZLE_BLUE, PIPE_SWIZZLE_ALPHA));
|
||||
OUT_RING(ring, A3XX_TEX_CONST_1_FETCHSIZE(TFETCH_DISABLE) |
|
||||
A3XX_TEX_CONST_1_WIDTH(psurf->width) |
|
||||
A3XX_TEX_CONST_1_HEIGHT(psurf->height));
|
||||
OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(rsc->slices[0].pitch * rsc->cpp) |
|
||||
OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(slice->pitch * rsc->cpp) |
|
||||
A3XX_TEX_CONST_2_INDX(0));
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
@@ -291,18 +331,21 @@ fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, struct pipe_surface *psurf
|
||||
CP_LOAD_STATE_0_NUM_UNIT(1));
|
||||
OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) |
|
||||
CP_LOAD_STATE_1_EXT_SRC_ADDR(0));
|
||||
OUT_RELOC(ring, rsc->bo, 0, 0, 0);
|
||||
OUT_RELOC(ring, rsc->bo, layer_offset, 0, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_variant *vp,
|
||||
struct fd3_vertex_buf *vbufs, uint32_t n)
|
||||
fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit)
|
||||
{
|
||||
uint32_t i, j, last = 0;
|
||||
uint32_t total_in = 0;
|
||||
const struct fd_vertex_state *vtx = emit->vtx;
|
||||
struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
|
||||
unsigned n = MIN2(vtx->vtx->num_elements, vp->inputs_count);
|
||||
|
||||
n = MIN2(n, vp->inputs_count);
|
||||
/* hw doesn't like to be configured for zero vbo's, it seems: */
|
||||
if (vtx->vtx->num_elements == 0)
|
||||
return;
|
||||
|
||||
for (i = 0; i < n; i++)
|
||||
if (vp->inputs[i].compmask)
|
||||
@@ -310,22 +353,25 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
|
||||
|
||||
for (i = 0, j = 0; i <= last; i++) {
|
||||
if (vp->inputs[i].compmask) {
|
||||
struct pipe_resource *prsc = vbufs[i].prsc;
|
||||
struct fd_resource *rsc = fd_resource(prsc);
|
||||
enum pipe_format pfmt = vbufs[i].format;
|
||||
struct pipe_vertex_element *elem = &vtx->vtx->pipe[i];
|
||||
const struct pipe_vertex_buffer *vb =
|
||||
&vtx->vertexbuf.vb[elem->vertex_buffer_index];
|
||||
struct fd_resource *rsc = fd_resource(vb->buffer);
|
||||
enum pipe_format pfmt = elem->src_format;
|
||||
enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt);
|
||||
bool switchnext = (i != last);
|
||||
bool isint = util_format_is_pure_integer(pfmt);
|
||||
uint32_t fs = util_format_get_blocksize(pfmt);
|
||||
|
||||
debug_assert(fmt != ~0);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2);
|
||||
OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) |
|
||||
A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vbufs[i].stride) |
|
||||
A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) |
|
||||
COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) |
|
||||
A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) |
|
||||
A3XX_VFD_FETCH_INSTR_0_STEPRATE(1));
|
||||
OUT_RELOC(ring, rsc->bo, vbufs[i].offset, 0, 0);
|
||||
OUT_RELOC(ring, rsc->bo, vb->buffer_offset + elem->src_offset, 0, 0);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1);
|
||||
OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL |
|
||||
@@ -335,6 +381,7 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
|
||||
A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) |
|
||||
A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) |
|
||||
A3XX_VFD_DECODE_INSTR_LASTCOMPVALID |
|
||||
COND(isint, A3XX_VFD_DECODE_INSTR_INT) |
|
||||
COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT));
|
||||
|
||||
total_in += vp->inputs[i].ncomp;
|
||||
@@ -354,14 +401,11 @@ fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
|
||||
|
||||
void
|
||||
fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct fd_program_stateobj *prog, uint32_t dirty,
|
||||
struct ir3_shader_key key)
|
||||
struct fd3_emit *emit)
|
||||
{
|
||||
struct ir3_shader_variant *vp;
|
||||
struct ir3_shader_variant *fp;
|
||||
|
||||
fp = fd3_shader_variant(prog->fp, key);
|
||||
vp = fd3_shader_variant(prog->vp, key);
|
||||
struct ir3_shader_variant *vp = fd3_emit_get_vp(emit);
|
||||
struct ir3_shader_variant *fp = fd3_emit_get_fp(emit);
|
||||
uint32_t dirty = emit->dirty;
|
||||
|
||||
emit_marker(ring, 5);
|
||||
|
||||
@@ -372,7 +416,7 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask));
|
||||
}
|
||||
|
||||
if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !key.binning_pass) {
|
||||
if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_PROG)) && !emit->key.binning_pass) {
|
||||
uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control;
|
||||
|
||||
val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS);
|
||||
@@ -414,6 +458,9 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z;
|
||||
val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
|
||||
}
|
||||
if (fp->has_kill) {
|
||||
val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE;
|
||||
}
|
||||
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
|
||||
OUT_RING(ring, val);
|
||||
}
|
||||
@@ -444,17 +491,27 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, val);
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) {
|
||||
/* NOTE: since primitive_restart is not actually part of any
|
||||
* state object, we need to make sure that we always emit
|
||||
* PRIM_VTX_CNTL.. either that or be more clever and detect
|
||||
* when it changes.
|
||||
*/
|
||||
if (emit->info) {
|
||||
const struct pipe_draw_info *info = emit->info;
|
||||
uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)
|
||||
->pc_prim_vtx_cntl;
|
||||
|
||||
if (!key.binning_pass) {
|
||||
if (!emit->key.binning_pass) {
|
||||
uint32_t stride_in_vpc = align(fp->total_in, 4) / 4;
|
||||
if (stride_in_vpc > 0)
|
||||
stride_in_vpc = MAX2(stride_in_vpc, 2);
|
||||
val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc);
|
||||
}
|
||||
|
||||
if (info->indexed && info->primitive_restart) {
|
||||
val |= A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART;
|
||||
}
|
||||
|
||||
val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1);
|
||||
@@ -487,9 +544,8 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2]));
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_PROG) {
|
||||
fd3_program_emit(ring, prog, key);
|
||||
}
|
||||
if (dirty & FD_DIRTY_PROG)
|
||||
fd3_program_emit(ring, emit);
|
||||
|
||||
/* TODO we should not need this or fd_wfi() before emit_constants():
|
||||
*/
|
||||
@@ -498,15 +554,15 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
|
||||
if ((dirty & (FD_DIRTY_PROG | FD_DIRTY_CONSTBUF)) &&
|
||||
/* evil hack to deal sanely with clear path: */
|
||||
(prog == &ctx->prog)) {
|
||||
(emit->prog == &ctx->prog)) {
|
||||
fd_wfi(ctx, ring);
|
||||
emit_constants(ring, SB_VERT_SHADER,
|
||||
&ctx->constbuf[PIPE_SHADER_VERTEX],
|
||||
(prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
|
||||
if (!key.binning_pass) {
|
||||
(emit->prog->dirty & FD_SHADER_DIRTY_VP) ? vp : NULL);
|
||||
if (!emit->key.binning_pass) {
|
||||
emit_constants(ring, SB_FRAG_SHADER,
|
||||
&ctx->constbuf[PIPE_SHADER_FRAGMENT],
|
||||
(prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
|
||||
(emit->prog->dirty & FD_SHADER_DIRTY_FP) ? fp : NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -541,14 +597,14 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
|
||||
if (dirty & FD_DIRTY_VERTTEX) {
|
||||
if (vp->has_samp)
|
||||
emit_textures(ring, SB_VERT_TEX, &ctx->verttex);
|
||||
emit_textures(ctx, ring, SB_VERT_TEX, &ctx->verttex);
|
||||
else
|
||||
dirty &= ~FD_DIRTY_VERTTEX;
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_FRAGTEX) {
|
||||
if (fp->has_samp)
|
||||
emit_textures(ring, SB_FRAG_TEX, &ctx->fragtex);
|
||||
emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->fragtex);
|
||||
else
|
||||
dirty &= ~FD_DIRTY_FRAGTEX;
|
||||
}
|
||||
|
@@ -33,6 +33,7 @@
|
||||
|
||||
#include "freedreno_context.h"
|
||||
#include "fd3_util.h"
|
||||
#include "fd3_program.h"
|
||||
#include "ir3_shader.h"
|
||||
|
||||
struct fd_ringbuffer;
|
||||
@@ -46,21 +47,44 @@ void fd3_emit_constant(struct fd_ringbuffer *ring,
|
||||
void fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring,
|
||||
struct pipe_surface *psurf);
|
||||
|
||||
/* NOTE: this just exists because we don't have proper vertex/vertexbuf
|
||||
* state objs for clear, and mem2gmem/gmem2mem operations..
|
||||
*/
|
||||
struct fd3_vertex_buf {
|
||||
unsigned offset, stride;
|
||||
struct pipe_resource *prsc;
|
||||
enum pipe_format format;
|
||||
/* grouped together emit-state for prog/vertex/state emit: */
|
||||
struct fd3_emit {
|
||||
const struct fd_vertex_state *vtx;
|
||||
const struct fd_program_stateobj *prog;
|
||||
const struct pipe_draw_info *info;
|
||||
struct ir3_shader_key key;
|
||||
uint32_t dirty;
|
||||
bool rasterflat;
|
||||
|
||||
/* cached to avoid repeated lookups of same variants: */
|
||||
struct ir3_shader_variant *vp, *fp;
|
||||
};
|
||||
|
||||
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring,
|
||||
struct ir3_shader_variant *vp,
|
||||
struct fd3_vertex_buf *vbufs, uint32_t n);
|
||||
static inline struct ir3_shader_variant *
|
||||
fd3_emit_get_vp(struct fd3_emit *emit)
|
||||
{
|
||||
if (!emit->vp) {
|
||||
struct fd3_shader_stateobj *so = emit->prog->vp;
|
||||
emit->vp = ir3_shader_variant(so->shader, emit->key);
|
||||
}
|
||||
return emit->vp;
|
||||
}
|
||||
|
||||
static inline struct ir3_shader_variant *
|
||||
fd3_emit_get_fp(struct fd3_emit *emit)
|
||||
{
|
||||
if (!emit->fp) {
|
||||
struct fd3_shader_stateobj *so = emit->prog->fp;
|
||||
emit->fp = ir3_shader_variant(so->shader, emit->key);
|
||||
}
|
||||
return emit->fp;
|
||||
}
|
||||
|
||||
void fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit);
|
||||
|
||||
void fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
struct fd_program_stateobj *prog, uint32_t dirty,
|
||||
struct ir3_shader_key key);
|
||||
struct fd3_emit *emit);
|
||||
|
||||
void fd3_emit_restore(struct fd_context *ctx);
|
||||
|
||||
#endif /* FD3_EMIT_H */
|
||||
|
@@ -69,6 +69,7 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
|
||||
struct fd_resource_slice *slice = NULL;
|
||||
uint32_t stride = 0;
|
||||
uint32_t base = 0;
|
||||
uint32_t layer_offset = 0;
|
||||
|
||||
if ((i < nr_bufs) && bufs[i]) {
|
||||
struct pipe_surface *psurf = bufs[i];
|
||||
@@ -78,6 +79,10 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
|
||||
format = fd3_pipe2color(psurf->format);
|
||||
swap = fd3_pipe2swap(psurf->format);
|
||||
|
||||
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
|
||||
|
||||
layer_offset = slice->size0 * psurf->u.tex.first_layer;
|
||||
|
||||
if (bin_w) {
|
||||
stride = bin_w * rsc->cpp;
|
||||
|
||||
@@ -97,7 +102,8 @@ emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs,
|
||||
if (bin_w || (i >= nr_bufs)) {
|
||||
OUT_RING(ring, A3XX_RB_MRT_BUF_BASE_COLOR_BUF_BASE(base));
|
||||
} else {
|
||||
OUT_RELOCW(ring, rsc->bo, slice->offset, 0, -1);
|
||||
OUT_RELOCW(ring, rsc->bo,
|
||||
slice->offset + layer_offset, 0, -1);
|
||||
}
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_IMAGE_OUTPUT_REG(i), 1);
|
||||
@@ -152,6 +158,11 @@ emit_binning_workaround(struct fd_context *ctx)
|
||||
struct fd3_context *fd3_ctx = fd3_context(ctx);
|
||||
struct fd_gmem_stateobj *gmem = &ctx->gmem;
|
||||
struct fd_ringbuffer *ring = ctx->ring;
|
||||
struct fd3_emit emit = {
|
||||
.vtx = &fd3_ctx->solid_vbuf_state,
|
||||
.prog = &ctx->solid_prog,
|
||||
.key = key,
|
||||
};
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_RB_MODE_CONTROL, 2);
|
||||
OUT_RING(ring, A3XX_RB_MODE_CONTROL_RENDER_MODE(RB_RESOLVE_PASS) |
|
||||
@@ -177,13 +188,8 @@ emit_binning_workaround(struct fd_context *ctx)
|
||||
A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) |
|
||||
A3XX_GRAS_SC_CONTROL_RASTER_MODE(1));
|
||||
|
||||
fd3_program_emit(ring, &ctx->solid_prog, key);
|
||||
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
|
||||
(struct fd3_vertex_buf[]) {{
|
||||
.prsc = fd3_ctx->solid_vbuf,
|
||||
.stride = 12,
|
||||
.format = PIPE_FORMAT_R32G32B32_FLOAT,
|
||||
}}, 1);
|
||||
fd3_program_emit(ring, &emit);
|
||||
fd3_emit_vertex_bufs(ring, &emit);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 4);
|
||||
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
|
||||
@@ -303,12 +309,16 @@ emit_gmem2mem_surf(struct fd_context *ctx,
|
||||
struct fd_ringbuffer *ring = ctx->ring;
|
||||
struct fd_resource *rsc = fd_resource(psurf->texture);
|
||||
struct fd_resource_slice *slice = &rsc->slices[psurf->u.tex.level];
|
||||
uint32_t layer_offset = slice->size0 * psurf->u.tex.first_layer;
|
||||
|
||||
debug_assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_RB_COPY_CONTROL, 4);
|
||||
OUT_RING(ring, A3XX_RB_COPY_CONTROL_MSAA_RESOLVE(MSAA_ONE) |
|
||||
A3XX_RB_COPY_CONTROL_MODE(mode) |
|
||||
A3XX_RB_COPY_CONTROL_GMEM_BASE(base));
|
||||
OUT_RELOCW(ring, rsc->bo, slice->offset, 0, -1); /* RB_COPY_DEST_BASE */
|
||||
|
||||
OUT_RELOCW(ring, rsc->bo, slice->offset + layer_offset, 0, -1); /* RB_COPY_DEST_BASE */
|
||||
OUT_RING(ring, A3XX_RB_COPY_DEST_PITCH_PITCH(slice->pitch * rsc->cpp));
|
||||
OUT_RING(ring, A3XX_RB_COPY_DEST_INFO_TILE(LINEAR) |
|
||||
A3XX_RB_COPY_DEST_INFO_FORMAT(fd3_pipe2color(psurf->format)) |
|
||||
@@ -326,6 +336,11 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
|
||||
struct fd3_context *fd3_ctx = fd3_context(ctx);
|
||||
struct fd_ringbuffer *ring = ctx->ring;
|
||||
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
|
||||
struct fd3_emit emit = {
|
||||
.vtx = &fd3_ctx->solid_vbuf_state,
|
||||
.prog = &ctx->solid_prog,
|
||||
.key = key,
|
||||
};
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1);
|
||||
OUT_RING(ring, A3XX_RB_DEPTH_CONTROL_ZFUNC(FUNC_NEVER));
|
||||
@@ -398,13 +413,8 @@ fd3_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
|
||||
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
|
||||
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
|
||||
|
||||
fd3_program_emit(ring, &ctx->solid_prog, key);
|
||||
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->solid_prog.vp, key),
|
||||
(struct fd3_vertex_buf[]) {{
|
||||
.prsc = fd3_ctx->solid_vbuf,
|
||||
.stride = 12,
|
||||
.format = PIPE_FORMAT_R32G32B32_FLOAT,
|
||||
}}, 1);
|
||||
fd3_program_emit(ring, &emit);
|
||||
fd3_emit_vertex_bufs(ring, &emit);
|
||||
|
||||
if (ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) {
|
||||
uint32_t base = depth_base(ctx);
|
||||
@@ -448,6 +458,11 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
|
||||
struct fd_gmem_stateobj *gmem = &ctx->gmem;
|
||||
struct fd_ringbuffer *ring = ctx->ring;
|
||||
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
|
||||
struct fd3_emit emit = {
|
||||
.vtx = &fd3_ctx->blit_vbuf_state,
|
||||
.prog = &ctx->blit_prog,
|
||||
.key = key,
|
||||
};
|
||||
float x0, y0, x1, y1;
|
||||
unsigned bin_w = tile->bin_w;
|
||||
unsigned bin_h = tile->bin_h;
|
||||
@@ -542,17 +557,8 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
|
||||
OUT_RING(ring, 0); /* VFD_INSTANCEID_OFFSET */
|
||||
OUT_RING(ring, 0); /* VFD_INDEX_OFFSET */
|
||||
|
||||
fd3_program_emit(ring, &ctx->blit_prog, key);
|
||||
fd3_emit_vertex_bufs(ring, fd3_shader_variant(ctx->blit_prog.vp, key),
|
||||
(struct fd3_vertex_buf[]) {{
|
||||
.prsc = fd3_ctx->blit_texcoord_vbuf,
|
||||
.stride = 8,
|
||||
.format = PIPE_FORMAT_R32G32_FLOAT,
|
||||
}, {
|
||||
.prsc = fd3_ctx->solid_vbuf,
|
||||
.stride = 12,
|
||||
.format = PIPE_FORMAT_R32G32B32_FLOAT,
|
||||
}}, 2);
|
||||
fd3_program_emit(ring, &emit);
|
||||
fd3_emit_vertex_bufs(ring, &emit);
|
||||
|
||||
/* for gmem pitch/base calculations, we need to use the non-
|
||||
* truncated tile sizes:
|
||||
@@ -560,10 +566,10 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
|
||||
bin_w = gmem->bin_w;
|
||||
bin_h = gmem->bin_h;
|
||||
|
||||
if (ctx->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
|
||||
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
|
||||
emit_mem2gmem_surf(ctx, depth_base(ctx), pfb->zsbuf, bin_w);
|
||||
|
||||
if (ctx->restore & FD_BUFFER_COLOR)
|
||||
if (fd_gmem_needs_restore(ctx, tile, FD_BUFFER_COLOR))
|
||||
emit_mem2gmem_surf(ctx, 0, pfb->cbufs[0], bin_w);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1);
|
||||
@@ -603,8 +609,11 @@ fd3_emit_sysmem_prep(struct fd_context *ctx)
|
||||
struct fd_ringbuffer *ring = ctx->ring;
|
||||
uint32_t pitch = 0;
|
||||
|
||||
if (pfb->cbufs[0])
|
||||
pitch = fd_resource(pfb->cbufs[0]->texture)->slices[0].pitch;
|
||||
if (pfb->cbufs[0]) {
|
||||
struct pipe_surface *psurf = pfb->cbufs[0];
|
||||
unsigned lvl = psurf->u.tex.level;
|
||||
pitch = fd_resource(psurf->texture)->slices[lvl].pitch;
|
||||
}
|
||||
|
||||
fd3_emit_restore(ctx);
|
||||
|
||||
|
@@ -140,13 +140,21 @@ find_output(const struct ir3_shader_variant *so, ir3_semantic semantic)
|
||||
* in the vertex shader.. but the fragment shader doesn't know this
|
||||
* so it will always have both IN.COLOR[n] and IN.BCOLOR[n]. So
|
||||
* at link time if there is no matching OUT.BCOLOR[n], we must map
|
||||
* OUT.COLOR[n] to IN.BCOLOR[n].
|
||||
* OUT.COLOR[n] to IN.BCOLOR[n]. And visa versa if there is only
|
||||
* a OUT.BCOLOR[n] but no matching OUT.COLOR[n]
|
||||
*/
|
||||
if (sem2name(semantic) == TGSI_SEMANTIC_BCOLOR) {
|
||||
unsigned idx = sem2idx(semantic);
|
||||
return find_output(so, ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx));
|
||||
semantic = ir3_semantic_name(TGSI_SEMANTIC_COLOR, idx);
|
||||
} else if (sem2name(semantic) == TGSI_SEMANTIC_COLOR) {
|
||||
unsigned idx = sem2idx(semantic);
|
||||
semantic = ir3_semantic_name(TGSI_SEMANTIC_BCOLOR, idx);
|
||||
}
|
||||
|
||||
for (j = 0; j < so->outputs_count; j++)
|
||||
if (so->outputs[j].semantic == semantic)
|
||||
return j;
|
||||
|
||||
debug_assert(0);
|
||||
|
||||
return 0;
|
||||
@@ -172,27 +180,72 @@ find_output_regid(const struct ir3_shader_variant *so, ir3_semantic semantic)
|
||||
}
|
||||
|
||||
void
|
||||
fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
struct fd_program_stateobj *prog, struct ir3_shader_key key)
|
||||
fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit)
|
||||
{
|
||||
const struct ir3_shader_variant *vp, *fp;
|
||||
const struct ir3_info *vsi, *fsi;
|
||||
enum a3xx_instrbuffermode fpbuffer, vpbuffer;
|
||||
uint32_t fpbuffersz, vpbuffersz, fsoff;
|
||||
uint32_t pos_regid, posz_regid, psize_regid, color_regid;
|
||||
int constmode;
|
||||
int i, j, k;
|
||||
|
||||
vp = fd3_shader_variant(prog->vp, key);
|
||||
vp = fd3_emit_get_vp(emit);
|
||||
|
||||
if (key.binning_pass) {
|
||||
if (emit->key.binning_pass) {
|
||||
/* use dummy stateobj to simplify binning vs non-binning: */
|
||||
static const struct ir3_shader_variant binning_fp = {};
|
||||
fp = &binning_fp;
|
||||
} else {
|
||||
fp = fd3_shader_variant(prog->fp, key);
|
||||
fp = fd3_emit_get_fp(emit);
|
||||
}
|
||||
|
||||
vsi = &vp->info;
|
||||
fsi = &fp->info;
|
||||
|
||||
fpbuffer = BUFFER;
|
||||
vpbuffer = BUFFER;
|
||||
fpbuffersz = fp->instrlen;
|
||||
vpbuffersz = vp->instrlen;
|
||||
|
||||
/*
|
||||
* Decide whether to use BUFFER or CACHE mode for VS and FS. It
|
||||
* appears like 256 is the hard limit, but when the combined size
|
||||
* exceeds 128 then blob will try to keep FS in BUFFER mode and
|
||||
* switch to CACHE for VS until VS is too large. The blob seems
|
||||
* to switch FS out of BUFFER mode at slightly under 128. But
|
||||
* a bit fuzzy on the decision tree, so use slightly conservative
|
||||
* limits.
|
||||
*
|
||||
* TODO check if these thresholds for BUFFER vs CACHE mode are the
|
||||
* same for all a3xx or whether we need to consider the gpuid
|
||||
*/
|
||||
|
||||
if ((fpbuffersz + vpbuffersz) > 128) {
|
||||
if (fpbuffersz < 112) {
|
||||
/* FP:BUFFER VP:CACHE */
|
||||
vpbuffer = CACHE;
|
||||
vpbuffersz = 256 - fpbuffersz;
|
||||
} else if (vpbuffersz < 112) {
|
||||
/* FP:CACHE VP:BUFFER */
|
||||
fpbuffer = CACHE;
|
||||
fpbuffersz = 256 - vpbuffersz;
|
||||
} else {
|
||||
/* FP:CACHE VP:CACHE */
|
||||
vpbuffer = fpbuffer = CACHE;
|
||||
vpbuffersz = fpbuffersz = 192;
|
||||
}
|
||||
}
|
||||
|
||||
if (fpbuffer == BUFFER) {
|
||||
fsoff = 128 - fpbuffersz;
|
||||
} else {
|
||||
fsoff = 256 - fpbuffersz;
|
||||
}
|
||||
|
||||
/* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */
|
||||
constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0;
|
||||
|
||||
pos_regid = find_output_regid(vp,
|
||||
ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0));
|
||||
posz_regid = find_output_regid(fp,
|
||||
@@ -208,6 +261,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6);
|
||||
OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) |
|
||||
A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) |
|
||||
/* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe
|
||||
* flush some caches? I think we only need to set those
|
||||
* bits if we have updated const or shader..
|
||||
@@ -221,14 +275,14 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
|
||||
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
|
||||
A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) |
|
||||
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vp->instrlen));
|
||||
A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz));
|
||||
OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) |
|
||||
A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) |
|
||||
A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fp->instrlen));
|
||||
A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz));
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1);
|
||||
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(0) |
|
||||
COND(key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
|
||||
OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) |
|
||||
COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) |
|
||||
A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) |
|
||||
A3XX_SP_SP_CTRL_REG_L0MODE(0));
|
||||
|
||||
@@ -237,18 +291,18 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3);
|
||||
OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) |
|
||||
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
|
||||
A3XX_SP_VS_CTRL_REG0_CACHEINVALID |
|
||||
A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) |
|
||||
COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
|
||||
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
|
||||
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
|
||||
A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
|
||||
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
|
||||
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
|
||||
COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
|
||||
A3XX_SP_VS_CTRL_REG0_LENGTH(vp->instrlen));
|
||||
A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
|
||||
OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
|
||||
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
|
||||
A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vsi->max_const, 0)));
|
||||
A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0)));
|
||||
OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) |
|
||||
A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) |
|
||||
A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4));
|
||||
@@ -301,7 +355,7 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
|
||||
OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */
|
||||
|
||||
if (key.binning_pass) {
|
||||
if (emit->key.binning_pass) {
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
@@ -309,35 +363,37 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
|
||||
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER));
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1);
|
||||
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
|
||||
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
|
||||
} else {
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1);
|
||||
OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen));
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2);
|
||||
OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) |
|
||||
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER) |
|
||||
A3XX_SP_FS_CTRL_REG0_CACHEINVALID |
|
||||
A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) |
|
||||
COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
|
||||
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
|
||||
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
|
||||
A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
|
||||
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
|
||||
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
|
||||
COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
|
||||
A3XX_SP_FS_CTRL_REG0_LENGTH(fp->instrlen));
|
||||
A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz));
|
||||
OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) |
|
||||
A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) |
|
||||
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fsi->max_const, 0)) |
|
||||
A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) |
|
||||
A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63));
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2);
|
||||
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) |
|
||||
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0));
|
||||
OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(
|
||||
MAX2(128, vp->constlen)) |
|
||||
A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff));
|
||||
OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */
|
||||
}
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
|
||||
OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
|
||||
OUT_RING(ring, 0x00000000); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1);
|
||||
if (fp->writes_pos) {
|
||||
OUT_RING(ring, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE |
|
||||
@@ -353,13 +409,37 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
|
||||
OUT_RING(ring, A3XX_SP_FS_MRT_REG_REGID(0));
|
||||
|
||||
if (key.binning_pass) {
|
||||
if (emit->key.binning_pass) {
|
||||
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
|
||||
OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) |
|
||||
A3XX_VPC_ATTR_LMSIZE(1) |
|
||||
COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE));
|
||||
OUT_RING(ring, 0x00000000);
|
||||
} else {
|
||||
uint32_t vinterp[4] = {0}, flatshade[2] = {0};
|
||||
|
||||
/* figure out VARYING_INTERP / FLAT_SHAD register values: */
|
||||
for (j = -1; (j = next_varying(fp, j)) < (int)fp->inputs_count; ) {
|
||||
uint32_t interp = fp->inputs[j].interpolate;
|
||||
if ((interp == TGSI_INTERPOLATE_CONSTANT) ||
|
||||
((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) {
|
||||
/* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG
|
||||
* instead.. rather than -8 everywhere else..
|
||||
*/
|
||||
uint32_t loc = fp->inputs[j].inloc - 8;
|
||||
|
||||
/* currently assuming varyings aligned to 4 (not
|
||||
* packed):
|
||||
*/
|
||||
debug_assert((loc % 4) == 0);
|
||||
|
||||
for (i = 0; i < 4; i++, loc++) {
|
||||
vinterp[loc / 16] |= FLAT << ((loc % 16) * 2);
|
||||
flatshade[loc / 32] |= 1 << (loc % 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2);
|
||||
OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) |
|
||||
A3XX_VPC_ATTR_THRDASSIGN(1) |
|
||||
@@ -369,29 +449,35 @@ fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in));
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4);
|
||||
OUT_RING(ring, fp->shader->vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
|
||||
OUT_RING(ring, fp->shader->vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
|
||||
OUT_RING(ring, fp->shader->vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
|
||||
OUT_RING(ring, fp->shader->vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
|
||||
OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */
|
||||
OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */
|
||||
OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */
|
||||
OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4);
|
||||
OUT_RING(ring, fp->shader->vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */
|
||||
OUT_RING(ring, fp->shader->vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */
|
||||
OUT_RING(ring, fp->shader->vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */
|
||||
OUT_RING(ring, fp->shader->vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2);
|
||||
OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */
|
||||
OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */
|
||||
}
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1);
|
||||
OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) |
|
||||
A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252));
|
||||
|
||||
emit_shader(ring, vp);
|
||||
if (vpbuffer == BUFFER)
|
||||
emit_shader(ring, vp);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
|
||||
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
|
||||
|
||||
if (!key.binning_pass) {
|
||||
emit_shader(ring, fp);
|
||||
if (!emit->key.binning_pass) {
|
||||
if (fpbuffer == BUFFER)
|
||||
emit_shader(ring, fp);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1);
|
||||
OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */
|
||||
|
@@ -37,8 +37,9 @@ struct fd3_shader_stateobj {
|
||||
struct ir3_shader *shader;
|
||||
};
|
||||
|
||||
void fd3_program_emit(struct fd_ringbuffer *ring,
|
||||
struct fd_program_stateobj *prog, struct ir3_shader_key key);
|
||||
struct fd3_emit;
|
||||
|
||||
void fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit);
|
||||
|
||||
void fd3_prog_init(struct pipe_context *pctx);
|
||||
|
||||
|
@@ -119,14 +119,14 @@ occlusion_predicate_accumulate_result(struct fd_context *ctx,
|
||||
|
||||
static const struct fd_hw_sample_provider occlusion_counter = {
|
||||
.query_type = PIPE_QUERY_OCCLUSION_COUNTER,
|
||||
.active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */
|
||||
.active = FD_STAGE_DRAW,
|
||||
.get_sample = occlusion_get_sample,
|
||||
.accumulate_result = occlusion_counter_accumulate_result,
|
||||
};
|
||||
|
||||
static const struct fd_hw_sample_provider occlusion_predicate = {
|
||||
.query_type = PIPE_QUERY_OCCLUSION_PREDICATE,
|
||||
.active = FD_STAGE_DRAW, /* | FD_STAGE_CLEAR ??? */
|
||||
.active = FD_STAGE_DRAW,
|
||||
.get_sample = occlusion_get_sample,
|
||||
.accumulate_result = occlusion_predicate_accumulate_result,
|
||||
};
|
||||
|
@@ -36,28 +36,31 @@
|
||||
#include "fd3_util.h"
|
||||
|
||||
static enum a3xx_tex_clamp
|
||||
tex_clamp(unsigned wrap)
|
||||
tex_clamp(unsigned wrap, bool clamp_to_edge)
|
||||
{
|
||||
/* hardware probably supports more, but we can't coax all the
|
||||
* wrap/clamp modes out of the GLESv2 blob driver.
|
||||
*
|
||||
* TODO once we have basics working, go back and just try
|
||||
* different values and see what happens
|
||||
*/
|
||||
/* Hardware does not support _CLAMP, but we emulate it: */
|
||||
if (wrap == PIPE_TEX_WRAP_CLAMP) {
|
||||
wrap = (clamp_to_edge) ?
|
||||
PIPE_TEX_WRAP_CLAMP_TO_EDGE : PIPE_TEX_WRAP_CLAMP_TO_BORDER;
|
||||
}
|
||||
|
||||
switch (wrap) {
|
||||
case PIPE_TEX_WRAP_REPEAT:
|
||||
return A3XX_TEX_REPEAT;
|
||||
case PIPE_TEX_WRAP_CLAMP:
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
|
||||
return A3XX_TEX_CLAMP_TO_EDGE;
|
||||
case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
|
||||
return A3XX_TEX_CLAMP_TO_BORDER;
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP:
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
|
||||
/* only works for PoT.. need to emulate otherwise! */
|
||||
return A3XX_TEX_MIRROR_CLAMP;
|
||||
case PIPE_TEX_WRAP_MIRROR_REPEAT:
|
||||
return A3XX_TEX_MIRROR_REPEAT;
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP:
|
||||
case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
|
||||
/* these two we could perhaps emulate, but we currently
|
||||
* just don't advertise PIPE_CAP_TEXTURE_MIRROR_CLAMP
|
||||
*/
|
||||
default:
|
||||
DBG("invalid wrap: %u", wrap);
|
||||
return 0;
|
||||
@@ -84,6 +87,7 @@ fd3_sampler_state_create(struct pipe_context *pctx,
|
||||
{
|
||||
struct fd3_sampler_stateobj *so = CALLOC_STRUCT(fd3_sampler_stateobj);
|
||||
bool miplinear = false;
|
||||
bool clamp_to_edge;
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
@@ -93,20 +97,36 @@ fd3_sampler_state_create(struct pipe_context *pctx,
|
||||
|
||||
so->base = *cso;
|
||||
|
||||
/*
|
||||
* For nearest filtering, _CLAMP means _CLAMP_TO_EDGE; for linear
|
||||
* filtering, _CLAMP means _CLAMP_TO_BORDER while additionally
|
||||
* clamping the texture coordinates to [0.0, 1.0].
|
||||
*
|
||||
* The clamping will be taken care of in the shaders. There are two
|
||||
* filters here, but let the minification one has a say.
|
||||
*/
|
||||
clamp_to_edge = (cso->min_img_filter == PIPE_TEX_FILTER_NEAREST);
|
||||
if (!clamp_to_edge) {
|
||||
so->saturate_s = (cso->wrap_s == PIPE_TEX_WRAP_CLAMP);
|
||||
so->saturate_t = (cso->wrap_t == PIPE_TEX_WRAP_CLAMP);
|
||||
so->saturate_r = (cso->wrap_r == PIPE_TEX_WRAP_CLAMP);
|
||||
}
|
||||
|
||||
so->texsamp0 =
|
||||
COND(!cso->normalized_coords, A3XX_TEX_SAMP_0_UNNORM_COORDS) |
|
||||
COND(miplinear, A3XX_TEX_SAMP_0_MIPFILTER_LINEAR) |
|
||||
A3XX_TEX_SAMP_0_XY_MAG(tex_filter(cso->mag_img_filter)) |
|
||||
A3XX_TEX_SAMP_0_XY_MIN(tex_filter(cso->min_img_filter)) |
|
||||
A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s)) |
|
||||
A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t)) |
|
||||
A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r));
|
||||
A3XX_TEX_SAMP_0_WRAP_S(tex_clamp(cso->wrap_s, clamp_to_edge)) |
|
||||
A3XX_TEX_SAMP_0_WRAP_T(tex_clamp(cso->wrap_t, clamp_to_edge)) |
|
||||
A3XX_TEX_SAMP_0_WRAP_R(tex_clamp(cso->wrap_r, clamp_to_edge));
|
||||
|
||||
if (cso->compare_mode)
|
||||
so->texsamp0 |= A3XX_TEX_SAMP_0_COMPARE_FUNC(cso->compare_func); /* maps 1:1 */
|
||||
|
||||
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
|
||||
so->texsamp1 =
|
||||
A3XX_TEX_SAMP_1_LOD_BIAS(cso->lod_bias) |
|
||||
A3XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
|
||||
A3XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
|
||||
} else {
|
||||
@@ -116,6 +136,50 @@ fd3_sampler_state_create(struct pipe_context *pctx,
|
||||
return so;
|
||||
}
|
||||
|
||||
static void
|
||||
fd3_sampler_states_bind(struct pipe_context *pctx,
|
||||
unsigned shader, unsigned start,
|
||||
unsigned nr, void **hwcso)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd3_context *fd3_ctx = fd3_context(ctx);
|
||||
uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (hwcso[i]) {
|
||||
struct fd3_sampler_stateobj *sampler =
|
||||
fd3_sampler_stateobj(hwcso[i]);
|
||||
if (sampler->saturate_s)
|
||||
saturate_s |= (1 << i);
|
||||
if (sampler->saturate_t)
|
||||
saturate_t |= (1 << i);
|
||||
if (sampler->saturate_r)
|
||||
saturate_r |= (1 << i);
|
||||
}
|
||||
}
|
||||
|
||||
fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
|
||||
|
||||
if (shader == PIPE_SHADER_FRAGMENT) {
|
||||
fd3_ctx->fsaturate =
|
||||
(saturate_s != 0) ||
|
||||
(saturate_t != 0) ||
|
||||
(saturate_r != 0);
|
||||
fd3_ctx->fsaturate_s = saturate_s;
|
||||
fd3_ctx->fsaturate_t = saturate_t;
|
||||
fd3_ctx->fsaturate_r = saturate_r;
|
||||
} else if (shader == PIPE_SHADER_VERTEX) {
|
||||
fd3_ctx->vsaturate =
|
||||
(saturate_s != 0) ||
|
||||
(saturate_t != 0) ||
|
||||
(saturate_r != 0);
|
||||
fd3_ctx->vsaturate_s = saturate_s;
|
||||
fd3_ctx->vsaturate_t = saturate_t;
|
||||
fd3_ctx->vsaturate_r = saturate_r;
|
||||
}
|
||||
}
|
||||
|
||||
static enum a3xx_tex_type
|
||||
tex_type(unsigned target)
|
||||
{
|
||||
@@ -175,7 +239,24 @@ fd3_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
/* when emitted, A3XX_TEX_CONST_2_INDX() must be OR'd in: */
|
||||
so->texconst2 =
|
||||
A3XX_TEX_CONST_2_PITCH(rsc->slices[lvl].pitch * rsc->cpp);
|
||||
so->texconst3 = 0x00000000; /* ??? */
|
||||
switch (prsc->target) {
|
||||
case PIPE_TEXTURE_1D_ARRAY:
|
||||
case PIPE_TEXTURE_2D_ARRAY:
|
||||
so->texconst3 =
|
||||
A3XX_TEX_CONST_3_DEPTH(prsc->array_size - 1) |
|
||||
A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[0].size0) |
|
||||
A3XX_TEX_CONST_3_LAYERSZ2(rsc->slices[0].size0);
|
||||
break;
|
||||
case PIPE_TEXTURE_3D:
|
||||
so->texconst3 =
|
||||
A3XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
|
||||
A3XX_TEX_CONST_3_LAYERSZ1(rsc->slices[0].size0) |
|
||||
A3XX_TEX_CONST_3_LAYERSZ2(rsc->slices[0].size0);
|
||||
break;
|
||||
default:
|
||||
so->texconst3 = 0x00000000;
|
||||
break;
|
||||
}
|
||||
|
||||
return &so->base;
|
||||
}
|
||||
@@ -184,5 +265,6 @@ void
|
||||
fd3_texture_init(struct pipe_context *pctx)
|
||||
{
|
||||
pctx->create_sampler_state = fd3_sampler_state_create;
|
||||
pctx->bind_sampler_states = fd3_sampler_states_bind;
|
||||
pctx->create_sampler_view = fd3_sampler_view_create;
|
||||
}
|
||||
|
@@ -40,6 +40,7 @@
|
||||
struct fd3_sampler_stateobj {
|
||||
struct pipe_sampler_state base;
|
||||
uint32_t texsamp0, texsamp1;
|
||||
bool saturate_s, saturate_t, saturate_r;
|
||||
};
|
||||
|
||||
static INLINE struct fd3_sampler_stateobj *
|
||||
|
@@ -134,6 +134,14 @@ fd3_pipe2vtx(enum pipe_format format)
|
||||
case PIPE_FORMAT_R16G16_SNORM:
|
||||
return VFMT_NORM_SHORT_16_16;
|
||||
|
||||
case PIPE_FORMAT_R32_UINT:
|
||||
case PIPE_FORMAT_R32_USCALED:
|
||||
return VFMT_UINT_32;
|
||||
|
||||
case PIPE_FORMAT_R32_SINT:
|
||||
case PIPE_FORMAT_R32_SSCALED:
|
||||
return VFMT_INT_32;
|
||||
|
||||
case PIPE_FORMAT_R10G10B10A2_UNORM:
|
||||
return VFMT_NORM_UINT_10_10_10_2;
|
||||
|
||||
@@ -196,6 +204,14 @@ fd3_pipe2vtx(enum pipe_format format)
|
||||
case PIPE_FORMAT_R16G16B16A16_FLOAT:
|
||||
return VFMT_FLOAT_16_16_16_16;
|
||||
|
||||
case PIPE_FORMAT_R32G32_UINT:
|
||||
case PIPE_FORMAT_R32G32_USCALED:
|
||||
return VFMT_UINT_32_32;
|
||||
|
||||
case PIPE_FORMAT_R32G32_SINT:
|
||||
case PIPE_FORMAT_R32G32_SSCALED:
|
||||
return VFMT_INT_32_32;
|
||||
|
||||
/* 96-bit buffers. */
|
||||
case PIPE_FORMAT_R32G32B32_FLOAT:
|
||||
return VFMT_FLOAT_32_32_32;
|
||||
@@ -203,6 +219,14 @@ fd3_pipe2vtx(enum pipe_format format)
|
||||
case PIPE_FORMAT_R32G32B32_FIXED:
|
||||
return VFMT_FIXED_32_32_32;
|
||||
|
||||
case PIPE_FORMAT_R32G32B32_UINT:
|
||||
case PIPE_FORMAT_R32G32B32_USCALED:
|
||||
return VFMT_UINT_32_32_32;
|
||||
|
||||
case PIPE_FORMAT_R32G32B32_SINT:
|
||||
case PIPE_FORMAT_R32G32B32_SSCALED:
|
||||
return VFMT_INT_32_32_32;
|
||||
|
||||
/* 128-bit buffers. */
|
||||
case PIPE_FORMAT_R32G32B32A32_FLOAT:
|
||||
return VFMT_FLOAT_32_32_32_32;
|
||||
@@ -210,26 +234,20 @@ fd3_pipe2vtx(enum pipe_format format)
|
||||
case PIPE_FORMAT_R32G32B32A32_FIXED:
|
||||
return VFMT_FIXED_32_32_32_32;
|
||||
|
||||
/* TODO probably need gles3 blob drivers to find the 32bit int formats:
|
||||
case PIPE_FORMAT_R32G32B32A32_UINT:
|
||||
case PIPE_FORMAT_R32G32B32A32_USCALED:
|
||||
return VFMT_UINT_32_32_32_32;
|
||||
|
||||
case PIPE_FORMAT_R32G32B32A32_SINT:
|
||||
case PIPE_FORMAT_R32G32B32A32_SSCALED:
|
||||
return VFMT_INT_32_32_32_32;
|
||||
|
||||
/* TODO normalized 32bit int formats do not appear to be supported
|
||||
* natively.. will require either shader variant or VFD_DECODE
|
||||
* gymnastics like the blob driver does..
|
||||
case PIPE_FORMAT_R32G32B32A32_SNORM:
|
||||
case PIPE_FORMAT_R32G32B32A32_UNORM:
|
||||
case PIPE_FORMAT_R32G32B32A32_SINT:
|
||||
case PIPE_FORMAT_R32G32B32A32_UINT:
|
||||
|
||||
case PIPE_FORMAT_R32_UINT:
|
||||
case PIPE_FORMAT_R32_SINT:
|
||||
case PIPE_FORMAT_A32_UINT:
|
||||
case PIPE_FORMAT_A32_SINT:
|
||||
case PIPE_FORMAT_L32_UINT:
|
||||
case PIPE_FORMAT_L32_SINT:
|
||||
case PIPE_FORMAT_I32_UINT:
|
||||
case PIPE_FORMAT_I32_SINT:
|
||||
|
||||
case PIPE_FORMAT_R32G32_SINT:
|
||||
case PIPE_FORMAT_R32G32_UINT:
|
||||
case PIPE_FORMAT_L32A32_UINT:
|
||||
case PIPE_FORMAT_L32A32_SINT:
|
||||
*/
|
||||
*/
|
||||
|
||||
default:
|
||||
return ~0;
|
||||
@@ -246,6 +264,9 @@ fd3_pipe2tex(enum pipe_format format)
|
||||
case PIPE_FORMAT_I8_UNORM:
|
||||
return TFMT_NORM_UINT_8;
|
||||
|
||||
case PIPE_FORMAT_R8G8_UNORM:
|
||||
return TFMT_NORM_UINT_8_8;
|
||||
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
@@ -257,13 +278,11 @@ fd3_pipe2tex(enum pipe_format format)
|
||||
return TFMT_NORM_UINT_8_8_8_8;
|
||||
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return TFMT_NORM_UINT_X8Z24;
|
||||
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return TFMT_NORM_UINT_8_8_8_8;
|
||||
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
return TFMT_NORM_UINT_8_8;
|
||||
return TFMT_NORM_USHORT_Z16;
|
||||
|
||||
case PIPE_FORMAT_R16G16B16A16_FLOAT:
|
||||
case PIPE_FORMAT_R16G16B16X16_FLOAT:
|
||||
@@ -331,6 +350,8 @@ fd3_pipe2color(enum pipe_format format)
|
||||
|
||||
case PIPE_FORMAT_R8_UNORM:
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
return RB_R8_UNORM;
|
||||
|
||||
case PIPE_FORMAT_A8_UNORM:
|
||||
return RB_A8_UNORM;
|
||||
|
||||
@@ -360,8 +381,9 @@ fd3_gmem_restore_format(enum pipe_format format)
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_Z24X8_UNORM:
|
||||
case PIPE_FORMAT_Z24_UNORM_S8_UINT:
|
||||
return PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
case PIPE_FORMAT_Z16_UNORM:
|
||||
return PIPE_FORMAT_B8G8R8A8_UNORM;
|
||||
return PIPE_FORMAT_R8G8_UNORM;
|
||||
default:
|
||||
return format;
|
||||
}
|
||||
|
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9859 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-07-19 17:20:53)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 58020 bytes, from 2014-07-19 17:21:17)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 36670 bytes, from 2014-07-19 17:18:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10347 bytes, from 2014-10-01 18:55:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14960 bytes, from 2014-07-27 17:22:13)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 60533 bytes, from 2014-10-15 18:32:43)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 41068 bytes, from 2014-08-01 12:22:48)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -132,6 +132,7 @@ enum a3xx_threadmode {
|
||||
};
|
||||
|
||||
enum a3xx_instrbuffermode {
|
||||
CACHE = 0,
|
||||
BUFFER = 1,
|
||||
};
|
||||
|
||||
|
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 364 bytes, from 2013-11-30 14:47:15)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2013-03-31 16:51:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 9859 bytes, from 2014-06-02 15:21:30)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14477 bytes, from 2014-07-19 17:20:53)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 58020 bytes, from 2014-07-19 17:21:17)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 36670 bytes, from 2014-07-19 17:18:34)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10347 bytes, from 2014-10-01 18:55:57)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14960 bytes, from 2014-07-27 17:22:13)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 60533 bytes, from 2014-10-15 18:32:43)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 41068 bytes, from 2014-08-01 12:22:48)
|
||||
|
||||
Copyright (C) 2013-2014 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -163,12 +163,16 @@ enum adreno_pm4_type3_packets {
|
||||
CP_INDIRECT_BUFFER_PFE = 63,
|
||||
CP_SET_BIN = 76,
|
||||
CP_TEST_TWO_MEMS = 113,
|
||||
CP_REG_WR_NO_CTXT = 120,
|
||||
CP_RECORD_PFP_TIMESTAMP = 17,
|
||||
CP_WAIT_FOR_ME = 19,
|
||||
CP_SET_DRAW_STATE = 67,
|
||||
CP_DRAW_INDX_OFFSET = 56,
|
||||
CP_DRAW_INDIRECT = 40,
|
||||
CP_DRAW_INDX_INDIRECT = 41,
|
||||
CP_DRAW_AUTO = 36,
|
||||
CP_UNKNOWN_1A = 26,
|
||||
CP_WIDE_REG_WRITE = 116,
|
||||
IN_IB_PREFETCH_END = 23,
|
||||
IN_SUBBLK_PREFETCH = 31,
|
||||
IN_INSTR_PREFETCH = 32,
|
||||
|
@@ -100,7 +100,7 @@ fd_context_render(struct pipe_context *pctx)
|
||||
if (!ctx->needs_flush)
|
||||
return;
|
||||
|
||||
fd_gmem_render_tiles(pctx);
|
||||
fd_gmem_render_tiles(ctx);
|
||||
|
||||
DBG("%p/%p/%p", ctx->ring->start, ctx->ring->cur, ctx->ring->end);
|
||||
|
||||
@@ -111,7 +111,7 @@ fd_context_render(struct pipe_context *pctx)
|
||||
fd_context_next_rb(pctx);
|
||||
|
||||
ctx->needs_flush = false;
|
||||
ctx->cleared = ctx->restore = ctx->resolve = 0;
|
||||
ctx->cleared = ctx->partial_cleared = ctx->restore = ctx->resolve = 0;
|
||||
ctx->gmem_reason = 0;
|
||||
ctx->num_draws = 0;
|
||||
|
||||
@@ -148,8 +148,6 @@ fd_context_destroy(struct pipe_context *pctx)
|
||||
fd_prog_fini(pctx);
|
||||
fd_hw_query_fini(pctx);
|
||||
|
||||
util_slab_destroy(&ctx->transfer_pool);
|
||||
|
||||
util_dynarray_fini(&ctx->draw_patches);
|
||||
|
||||
if (ctx->blitter)
|
||||
@@ -158,6 +156,8 @@ fd_context_destroy(struct pipe_context *pctx)
|
||||
if (ctx->primconvert)
|
||||
util_primconvert_destroy(ctx->primconvert);
|
||||
|
||||
util_slab_destroy(&ctx->transfer_pool);
|
||||
|
||||
fd_ringmarker_del(ctx->draw_start);
|
||||
fd_ringmarker_del(ctx->draw_end);
|
||||
fd_ringmarker_del(ctx->binning_start);
|
||||
|
@@ -83,6 +83,15 @@ struct fd_vertex_stateobj {
|
||||
unsigned num_elements;
|
||||
};
|
||||
|
||||
/* group together the vertex and vertexbuf state.. for ease of passing
|
||||
* around, and because various internal operations (gmem<->mem, etc)
|
||||
* need their own vertex state:
|
||||
*/
|
||||
struct fd_vertex_state {
|
||||
struct fd_vertex_stateobj *vtx;
|
||||
struct fd_vertexbuf_stateobj vertexbuf;
|
||||
};
|
||||
|
||||
/* Bitmask of stages in rendering that a particular query query is
|
||||
* active. Queries will be automatically started/stopped (generating
|
||||
* additional fd_hw_sample_period's) on entrance/exit from stages that
|
||||
@@ -174,6 +183,10 @@ struct fd_context {
|
||||
* there was a glClear() that invalidated the entire previous buffer
|
||||
* contents. Keep track of which buffer(s) are cleared, or needs
|
||||
* restore. Masks of PIPE_CLEAR_*
|
||||
*
|
||||
* The 'cleared' bits will be set for buffers which are *entirely*
|
||||
* cleared, and 'partial_cleared' bits will be set if you must
|
||||
* check cleared_scissor.
|
||||
*/
|
||||
enum {
|
||||
/* align bitmask values w/ PIPE_CLEAR_*.. since that is convenient.. */
|
||||
@@ -181,7 +194,7 @@ struct fd_context {
|
||||
FD_BUFFER_DEPTH = PIPE_CLEAR_DEPTH,
|
||||
FD_BUFFER_STENCIL = PIPE_CLEAR_STENCIL,
|
||||
FD_BUFFER_ALL = FD_BUFFER_COLOR | FD_BUFFER_DEPTH | FD_BUFFER_STENCIL,
|
||||
} cleared, restore, resolve;
|
||||
} cleared, partial_cleared, restore, resolve;
|
||||
|
||||
bool needs_flush;
|
||||
|
||||
@@ -222,6 +235,14 @@ struct fd_context {
|
||||
struct fd_ringbuffer *rings[8];
|
||||
unsigned rings_idx;
|
||||
|
||||
/* NOTE: currently using a single ringbuffer for both draw and
|
||||
* tiling commands, we need to make sure we need to leave enough
|
||||
* room at the end to append the tiling commands when we flush.
|
||||
* 0x7000 dwords should be a couple times more than we ever need
|
||||
* so should be a nice conservative threshold.
|
||||
*/
|
||||
#define FD_TILING_COMMANDS_DWORDS 0x7000
|
||||
|
||||
/* normal draw/clear cmds: */
|
||||
struct fd_ringbuffer *ring;
|
||||
struct fd_ringmarker *draw_start, *draw_end;
|
||||
@@ -260,6 +281,14 @@ struct fd_context {
|
||||
*/
|
||||
struct pipe_scissor_state max_scissor;
|
||||
|
||||
/* Track the cleared scissor for color/depth/stencil, so we know
|
||||
* which, if any, tiles need to be restored (mem2gmem). Only valid
|
||||
* if the corresponding bit in ctx->cleared is set.
|
||||
*/
|
||||
struct {
|
||||
struct pipe_scissor_state color, depth, stencil;
|
||||
} cleared_scissor;
|
||||
|
||||
/* Current gmem/tiling configuration.. gets updated on render_tiles()
|
||||
* if out of date with current maximal-scissor/cpp:
|
||||
*/
|
||||
@@ -297,7 +326,7 @@ struct fd_context {
|
||||
|
||||
struct fd_program_stateobj prog;
|
||||
|
||||
struct fd_vertex_stateobj *vtx;
|
||||
struct fd_vertex_state vtx;
|
||||
|
||||
struct pipe_blend_color blend_color;
|
||||
struct pipe_stencil_ref stencil_ref;
|
||||
@@ -306,7 +335,6 @@ struct fd_context {
|
||||
struct pipe_poly_stipple stipple;
|
||||
struct pipe_viewport_state viewport;
|
||||
struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
|
||||
struct fd_vertexbuf_stateobj vertexbuf;
|
||||
struct pipe_index_buffer indexbuf;
|
||||
|
||||
/* GMEM/tile handling fxns: */
|
||||
|
@@ -40,51 +40,6 @@
|
||||
#include "freedreno_util.h"
|
||||
|
||||
|
||||
static enum pc_di_index_size
|
||||
size2indextype(unsigned index_size)
|
||||
{
|
||||
switch (index_size) {
|
||||
case 1: return INDEX_SIZE_8_BIT;
|
||||
case 2: return INDEX_SIZE_16_BIT;
|
||||
case 4: return INDEX_SIZE_32_BIT;
|
||||
}
|
||||
DBG("unsupported index size: %d", index_size);
|
||||
assert(0);
|
||||
return INDEX_SIZE_IGN;
|
||||
}
|
||||
|
||||
/* this is same for a2xx/a3xx, so split into helper: */
|
||||
void
|
||||
fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
enum pc_di_vis_cull_mode vismode,
|
||||
const struct pipe_draw_info *info)
|
||||
{
|
||||
struct pipe_index_buffer *idx = &ctx->indexbuf;
|
||||
struct fd_bo *idx_bo = NULL;
|
||||
enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
|
||||
enum pc_di_src_sel src_sel;
|
||||
uint32_t idx_size, idx_offset;
|
||||
|
||||
if (info->indexed) {
|
||||
assert(!idx->user_buffer);
|
||||
|
||||
idx_bo = fd_resource(idx->buffer)->bo;
|
||||
idx_type = size2indextype(idx->index_size);
|
||||
idx_size = idx->index_size * info->count;
|
||||
idx_offset = idx->offset + (info->start * idx->index_size);
|
||||
src_sel = DI_SRC_SEL_DMA;
|
||||
} else {
|
||||
idx_bo = NULL;
|
||||
idx_type = INDEX_SIZE_IGN;
|
||||
idx_size = 0;
|
||||
idx_offset = 0;
|
||||
src_sel = DI_SRC_SEL_AUTO_INDEX;
|
||||
}
|
||||
|
||||
fd_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
|
||||
info->count, idx_type, idx_size, idx_offset, idx_bo);
|
||||
}
|
||||
|
||||
static void
|
||||
fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
{
|
||||
@@ -152,13 +107,30 @@ fd_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
ctx->stats.prims_emitted +=
|
||||
u_reduced_prims_for_vertices(info->mode, info->count);
|
||||
|
||||
/* any buffers that haven't been cleared, we need to restore: */
|
||||
/* any buffers that haven't been cleared yet, we need to restore: */
|
||||
ctx->restore |= buffers & (FD_BUFFER_ALL & ~ctx->cleared);
|
||||
/* and any buffers used, need to be resolved: */
|
||||
ctx->resolve |= buffers;
|
||||
|
||||
DBG("%x num_draws=%u (%s/%s)", buffers, ctx->num_draws,
|
||||
util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
|
||||
util_format_short_name(pipe_surface_format(pfb->zsbuf)));
|
||||
|
||||
fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_DRAW);
|
||||
ctx->draw(ctx, info);
|
||||
|
||||
/* if an app (or, well, piglit test) does many thousands of draws
|
||||
* without flush (or anything which implicitly flushes, like
|
||||
* changing render targets), we can exceed the ringbuffer size.
|
||||
* Since we don't currently have a sane way to wrapparound, and
|
||||
* we use the same buffer for both draw and tiling commands, for
|
||||
* now we need to do this hack and trigger flush if we are running
|
||||
* low on remaining space for cmds:
|
||||
*/
|
||||
if (((ctx->ring->cur - ctx->ring->start) >
|
||||
(ctx->ring->size/4 - FD_TILING_COMMANDS_DWORDS)) ||
|
||||
(fd_mesa_debug & FD_DBG_FLUSH))
|
||||
fd_context_render(pctx);
|
||||
}
|
||||
|
||||
/* TODO figure out how to make better use of existing state mechanism
|
||||
@@ -173,8 +145,30 @@ fd_clear(struct pipe_context *pctx, unsigned buffers,
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
|
||||
struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx);
|
||||
unsigned cleared_buffers;
|
||||
|
||||
ctx->cleared |= buffers;
|
||||
/* for bookkeeping about which buffers have been cleared (and thus
|
||||
* can fully or partially skip mem2gmem) we need to ignore buffers
|
||||
* that have already had a draw, in case apps do silly things like
|
||||
* clear after draw (ie. if you only clear the color buffer, but
|
||||
* something like alpha-test causes side effects from the draw in
|
||||
* the depth buffer, etc)
|
||||
*/
|
||||
cleared_buffers = buffers & (FD_BUFFER_ALL & ~ctx->restore);
|
||||
|
||||
/* do we have full-screen scissor? */
|
||||
if (!memcmp(scissor, &ctx->disabled_scissor, sizeof(*scissor))) {
|
||||
ctx->cleared |= cleared_buffers;
|
||||
} else {
|
||||
ctx->partial_cleared |= cleared_buffers;
|
||||
if (cleared_buffers & PIPE_CLEAR_COLOR)
|
||||
ctx->cleared_scissor.color = *scissor;
|
||||
if (cleared_buffers & PIPE_CLEAR_DEPTH)
|
||||
ctx->cleared_scissor.depth = *scissor;
|
||||
if (cleared_buffers & PIPE_CLEAR_STENCIL)
|
||||
ctx->cleared_scissor.stencil = *scissor;
|
||||
}
|
||||
ctx->resolve |= buffers;
|
||||
ctx->needs_flush = true;
|
||||
|
||||
|
@@ -33,15 +33,12 @@
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_screen.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
struct fd_ringbuffer;
|
||||
|
||||
void fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
enum pc_di_vis_cull_mode vismode,
|
||||
const struct pipe_draw_info *info);
|
||||
|
||||
void fd_draw_init(struct pipe_context *pctx);
|
||||
|
||||
static inline void
|
||||
@@ -98,4 +95,50 @@ fd_draw(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
fd_reset_wfi(ctx);
|
||||
}
|
||||
|
||||
|
||||
static inline enum pc_di_index_size
|
||||
size2indextype(unsigned index_size)
|
||||
{
|
||||
switch (index_size) {
|
||||
case 1: return INDEX_SIZE_8_BIT;
|
||||
case 2: return INDEX_SIZE_16_BIT;
|
||||
case 4: return INDEX_SIZE_32_BIT;
|
||||
}
|
||||
DBG("unsupported index size: %d", index_size);
|
||||
assert(0);
|
||||
return INDEX_SIZE_IGN;
|
||||
}
|
||||
|
||||
/* this is same for a2xx/a3xx, so split into helper: */
|
||||
static inline void
|
||||
fd_draw_emit(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
enum pc_di_vis_cull_mode vismode,
|
||||
const struct pipe_draw_info *info)
|
||||
{
|
||||
struct pipe_index_buffer *idx = &ctx->indexbuf;
|
||||
struct fd_bo *idx_bo = NULL;
|
||||
enum pc_di_index_size idx_type = INDEX_SIZE_IGN;
|
||||
enum pc_di_src_sel src_sel;
|
||||
uint32_t idx_size, idx_offset;
|
||||
|
||||
if (info->indexed) {
|
||||
assert(!idx->user_buffer);
|
||||
|
||||
idx_bo = fd_resource(idx->buffer)->bo;
|
||||
idx_type = size2indextype(idx->index_size);
|
||||
idx_size = idx->index_size * info->count;
|
||||
idx_offset = idx->offset + (info->start * idx->index_size);
|
||||
src_sel = DI_SRC_SEL_DMA;
|
||||
} else {
|
||||
idx_bo = NULL;
|
||||
idx_type = INDEX_SIZE_IGN;
|
||||
idx_size = 0;
|
||||
idx_offset = 0;
|
||||
src_sel = DI_SRC_SEL_AUTO_INDEX;
|
||||
}
|
||||
|
||||
fd_draw(ctx, ring, ctx->primtypes[info->mode], vismode, src_sel,
|
||||
info->count, idx_type, idx_size, idx_offset, idx_bo);
|
||||
}
|
||||
|
||||
#endif /* FREEDRENO_DRAW_H_ */
|
||||
|
@@ -314,9 +314,8 @@ render_sysmem(struct fd_context *ctx)
|
||||
}
|
||||
|
||||
void
|
||||
fd_gmem_render_tiles(struct pipe_context *pctx)
|
||||
fd_gmem_render_tiles(struct fd_context *ctx)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
|
||||
uint32_t timestamp = 0;
|
||||
bool sysmem = false;
|
||||
@@ -381,28 +380,50 @@ fd_gmem_render_tiles(struct pipe_context *pctx)
|
||||
ctx->max_scissor.minx = ctx->max_scissor.miny = ~0;
|
||||
ctx->max_scissor.maxx = ctx->max_scissor.maxy = 0;
|
||||
|
||||
/* Note that because the per-tile setup and mem2gmem/gmem2mem are emitted
|
||||
* after the draw/clear calls, but executed before, we need to preemptively
|
||||
* flag some state as dirty before the first draw/clear call.
|
||||
*
|
||||
* TODO maybe we need to mark all state as dirty to not worry about state
|
||||
* being clobbered by other contexts?
|
||||
*/
|
||||
ctx->dirty |= FD_DIRTY_ZSA |
|
||||
FD_DIRTY_RASTERIZER |
|
||||
FD_DIRTY_FRAMEBUFFER |
|
||||
FD_DIRTY_SAMPLE_MASK |
|
||||
FD_DIRTY_VIEWPORT |
|
||||
FD_DIRTY_CONSTBUF |
|
||||
FD_DIRTY_PROG |
|
||||
FD_DIRTY_SCISSOR |
|
||||
/* probably only needed if we need to mem2gmem on the next
|
||||
* draw.. but not sure if there is a good way to know?
|
||||
*/
|
||||
FD_DIRTY_VERTTEX |
|
||||
FD_DIRTY_FRAGTEX |
|
||||
FD_DIRTY_BLEND;
|
||||
|
||||
if (fd_mesa_debug & FD_DBG_DGMEM)
|
||||
ctx->dirty = 0xffffffff;
|
||||
ctx->dirty = ~0;
|
||||
}
|
||||
|
||||
/* tile needs restore if it isn't completely contained within the
|
||||
* cleared scissor:
|
||||
*/
|
||||
static bool
|
||||
skip_restore(struct pipe_scissor_state *scissor, struct fd_tile *tile)
|
||||
{
|
||||
unsigned minx = tile->xoff;
|
||||
unsigned maxx = tile->xoff + tile->bin_w;
|
||||
unsigned miny = tile->yoff;
|
||||
unsigned maxy = tile->yoff + tile->bin_h;
|
||||
return (minx >= scissor->minx) && (maxx <= scissor->maxx) &&
|
||||
(miny >= scissor->miny) && (maxy <= scissor->maxy);
|
||||
}
|
||||
|
||||
/* When deciding whether a tile needs mem2gmem, we need to take into
|
||||
* account the scissor rect(s) that were cleared. To simplify we only
|
||||
* consider the last scissor rect for each buffer, since the common
|
||||
* case would be a single clear.
|
||||
*/
|
||||
bool
|
||||
fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile,
|
||||
uint32_t buffers)
|
||||
{
|
||||
if (!(ctx->restore & buffers))
|
||||
return false;
|
||||
|
||||
/* if buffers partially cleared, then slow-path to figure out
|
||||
* if this particular tile needs restoring:
|
||||
*/
|
||||
if ((buffers & FD_BUFFER_COLOR) &&
|
||||
(ctx->partial_cleared & FD_BUFFER_COLOR) &&
|
||||
skip_restore(&ctx->cleared_scissor.color, tile))
|
||||
return false;
|
||||
if ((buffers & FD_BUFFER_DEPTH) &&
|
||||
(ctx->partial_cleared & FD_BUFFER_DEPTH) &&
|
||||
skip_restore(&ctx->cleared_scissor.depth, tile))
|
||||
return false;
|
||||
if ((buffers & FD_BUFFER_STENCIL) &&
|
||||
(ctx->partial_cleared & FD_BUFFER_STENCIL) &&
|
||||
skip_restore(&ctx->cleared_scissor.stencil, tile))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -55,6 +55,11 @@ struct fd_gmem_stateobj {
|
||||
bool has_zs; /* gmem config using depth/stencil? */
|
||||
};
|
||||
|
||||
void fd_gmem_render_tiles(struct pipe_context *pctx);
|
||||
struct fd_context;
|
||||
|
||||
void fd_gmem_render_tiles(struct fd_context *ctx);
|
||||
|
||||
bool fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile,
|
||||
uint32_t buffers);
|
||||
|
||||
#endif /* FREEDRENO_GMEM_H_ */
|
||||
|
@@ -52,6 +52,7 @@ struct fd_lowering_context {
|
||||
#define B 1
|
||||
struct tgsi_full_src_register imm;
|
||||
int emitted_decls;
|
||||
unsigned saturate;
|
||||
};
|
||||
|
||||
static inline struct fd_lowering_context *
|
||||
@@ -130,12 +131,14 @@ aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
|
||||
static void
|
||||
create_mov(struct tgsi_transform_context *tctx,
|
||||
const struct tgsi_full_dst_register *dst,
|
||||
const struct tgsi_full_src_register *src, unsigned mask)
|
||||
const struct tgsi_full_src_register *src,
|
||||
unsigned mask, unsigned saturate)
|
||||
{
|
||||
struct tgsi_full_instruction new_inst;
|
||||
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
|
||||
new_inst.Instruction.Saturate = saturate;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], dst, mask);
|
||||
new_inst.Instruction.NumSrcRegs = 1;
|
||||
@@ -143,6 +146,25 @@ create_mov(struct tgsi_transform_context *tctx,
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
}
|
||||
|
||||
/* to help calculate # of tgsi tokens for a lowering.. we assume
|
||||
* the worst case, ie. removed instructions don't have ADDR[] or
|
||||
* anything which increases the # of tokens per src/dst and the
|
||||
* inserted instructions do.
|
||||
*
|
||||
* OINST() - old instruction
|
||||
* 1 : instruction itself
|
||||
* 1 : dst
|
||||
* 1 * nargs : srcN
|
||||
*
|
||||
* NINST() - new instruction
|
||||
* 1 : instruction itself
|
||||
* 2 : dst
|
||||
* 2 * nargs : srcN
|
||||
*/
|
||||
|
||||
#define OINST(nargs) (1 + 1 + 1 * (nargs))
|
||||
#define NINST(nargs) (1 + 2 + 2 * (nargs))
|
||||
|
||||
/*
|
||||
* Lowering Translators:
|
||||
*/
|
||||
@@ -169,7 +191,8 @@ create_mov(struct tgsi_transform_context *tctx,
|
||||
* MOV dst.w, src1.w
|
||||
* MOV dst.x, imm{1.0}
|
||||
*/
|
||||
#define DST_GROW (19 - 4)
|
||||
#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
|
||||
NINST(1) + NINST(1) - OINST(2))
|
||||
#define DST_TMP 2
|
||||
static void
|
||||
transform_dst(struct tgsi_transform_context *tctx,
|
||||
@@ -182,12 +205,12 @@ transform_dst(struct tgsi_transform_context *tctx,
|
||||
struct tgsi_full_instruction new_inst;
|
||||
|
||||
if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
|
||||
create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ);
|
||||
create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
|
||||
src0 = &ctx->tmp[A].src;
|
||||
}
|
||||
|
||||
if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
|
||||
create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW);
|
||||
create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
|
||||
src1 = &ctx->tmp[B].src;
|
||||
}
|
||||
|
||||
@@ -249,7 +272,7 @@ transform_dst(struct tgsi_transform_context *tctx,
|
||||
* SUB dst.xyz, tmpA.xyz, tmpB.xyz
|
||||
* MOV dst.w, imm{1.0}
|
||||
*/
|
||||
#define XPD_GROW (15 - 4)
|
||||
#define XPD_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(1) - OINST(2))
|
||||
#define XPD_TMP 2
|
||||
static void
|
||||
transform_xpd(struct tgsi_transform_context *tctx,
|
||||
@@ -320,7 +343,7 @@ transform_xpd(struct tgsi_transform_context *tctx,
|
||||
* SIN dst.y, src.x
|
||||
* MOV dst.zw, imm{0.0, 1.0}
|
||||
*/
|
||||
#define SCS_GROW (12 - 3)
|
||||
#define SCS_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) - OINST(1))
|
||||
#define SCS_TMP 1
|
||||
static void
|
||||
transform_scs(struct tgsi_transform_context *tctx,
|
||||
@@ -332,7 +355,7 @@ transform_scs(struct tgsi_transform_context *tctx,
|
||||
struct tgsi_full_instruction new_inst;
|
||||
|
||||
if (aliases(dst, TGSI_WRITEMASK_X, src, TGSI_WRITEMASK_X)) {
|
||||
create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X);
|
||||
create_mov(tctx, &ctx->tmp[A].dst, src, TGSI_WRITEMASK_X, 0);
|
||||
src = &ctx->tmp[A].src;
|
||||
}
|
||||
|
||||
@@ -382,7 +405,7 @@ transform_scs(struct tgsi_transform_context *tctx,
|
||||
* MUL tmpB, tmpB, src2
|
||||
* ADD dst, tmpA, tmpB
|
||||
*/
|
||||
#define LRP_GROW (16 - 4)
|
||||
#define LRP_GROW (NINST(2) + NINST(2) + NINST(2) + NINST(2) - OINST(3))
|
||||
#define LRP_TMP 2
|
||||
static void
|
||||
transform_lrp(struct tgsi_transform_context *tctx,
|
||||
@@ -448,7 +471,7 @@ transform_lrp(struct tgsi_transform_context *tctx,
|
||||
* FLR tmpA, src
|
||||
* SUB dst, src, tmpA
|
||||
*/
|
||||
#define FRC_GROW (7 - 3)
|
||||
#define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
|
||||
#define FRC_TMP 1
|
||||
static void
|
||||
transform_frc(struct tgsi_transform_context *tctx,
|
||||
@@ -492,7 +515,7 @@ transform_frc(struct tgsi_transform_context *tctx,
|
||||
* MUL tmpA.x, src1.x, tmpA.x
|
||||
* EX2 dst, tmpA.x
|
||||
*/
|
||||
#define POW_GROW (10 - 4)
|
||||
#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
|
||||
#define POW_TMP 1
|
||||
static void
|
||||
transform_pow(struct tgsi_transform_context *tctx,
|
||||
@@ -551,7 +574,8 @@ transform_pow(struct tgsi_transform_context *tctx,
|
||||
* MOV dst.yz, tmpA.xy
|
||||
* MOV dst.xw, imm{1.0}
|
||||
*/
|
||||
#define LIT_GROW (30 - 3)
|
||||
#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
|
||||
NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
|
||||
#define LIT_TMP 1
|
||||
static void
|
||||
transform_lit(struct tgsi_transform_context *tctx,
|
||||
@@ -661,7 +685,8 @@ transform_lit(struct tgsi_transform_context *tctx,
|
||||
* MOV dst.z, tmpA.y
|
||||
* MOV dst.w, imm{1.0}
|
||||
*/
|
||||
#define EXP_GROW (19 - 3)
|
||||
#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
|
||||
NINST(1)+ NINST(1) - OINST(1))
|
||||
#define EXP_TMP 1
|
||||
static void
|
||||
transform_exp(struct tgsi_transform_context *tctx,
|
||||
@@ -755,7 +780,8 @@ transform_exp(struct tgsi_transform_context *tctx,
|
||||
* MOV dst.xz, tmpA.yx
|
||||
* MOV dst.w, imm{1.0}
|
||||
*/
|
||||
#define LOG_GROW (25 - 3)
|
||||
#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
|
||||
NINST(2) + NINST(1) + NINST(1) - OINST(1))
|
||||
#define LOG_TMP 1
|
||||
static void
|
||||
transform_log(struct tgsi_transform_context *tctx,
|
||||
@@ -879,11 +905,11 @@ transform_log(struct tgsi_transform_context *tctx,
|
||||
* }
|
||||
* ; fixup last instruction to replicate into dst
|
||||
*/
|
||||
#define DP4_GROW (19 - 4)
|
||||
#define DP3_GROW (14 - 4)
|
||||
#define DPH_GROW (18 - 4)
|
||||
#define DP2_GROW ( 9 - 4)
|
||||
#define DP2A_GROW (13 - 4)
|
||||
#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
|
||||
#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
|
||||
#define DPH_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(2) - OINST(2))
|
||||
#define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
|
||||
#define DP2A_GROW (NINST(2) + NINST(3) + NINST(2) - OINST(3))
|
||||
#define DOTP_TMP 1
|
||||
static void
|
||||
transform_dotp(struct tgsi_transform_context *tctx,
|
||||
@@ -981,6 +1007,138 @@ transform_dotp(struct tgsi_transform_context *tctx,
|
||||
}
|
||||
}
|
||||
|
||||
/* Inserts a MOV_SAT for the needed components of tex coord. Note that
|
||||
* in the case of TXP, the clamping must happen *after* projection, so
|
||||
* we need to lower TXP to TEX.
|
||||
*
|
||||
* MOV tmpA, src0
|
||||
* if (opc == TXP) {
|
||||
* ; do perspective division manually before clamping:
|
||||
* RCP tmpB, tmpA.w
|
||||
* MUL tmpB.<pmask>, tmpA, tmpB.xxxx
|
||||
* opc = TEX;
|
||||
* }
|
||||
* MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
|
||||
* <opc> dst, tmpA, ...
|
||||
*/
|
||||
#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
|
||||
#define SAMP_TMP 2
|
||||
static int
|
||||
transform_samp(struct tgsi_transform_context *tctx,
|
||||
struct tgsi_full_instruction *inst)
|
||||
{
|
||||
struct fd_lowering_context *ctx = fd_lowering_context(tctx);
|
||||
struct tgsi_full_src_register *coord = &inst->Src[0];
|
||||
struct tgsi_full_src_register *samp;
|
||||
struct tgsi_full_instruction new_inst;
|
||||
/* mask is clamped coords, pmask is all coords (for projection): */
|
||||
unsigned mask = 0, pmask = 0, smask;
|
||||
unsigned opcode = inst->Instruction.Opcode;
|
||||
|
||||
if (opcode == TGSI_OPCODE_TXB2) {
|
||||
samp = &inst->Src[2];
|
||||
} else {
|
||||
samp = &inst->Src[1];
|
||||
}
|
||||
|
||||
/* convert sampler # to bitmask to test: */
|
||||
smask = 1 << samp->Register.Index;
|
||||
|
||||
/* check if we actually need to lower this one: */
|
||||
if (!(ctx->saturate & smask))
|
||||
return -1;
|
||||
|
||||
/* figure out which coordinates need saturating:
|
||||
* - RECT textures should not get saturated
|
||||
* - array index coords should not get saturated
|
||||
*/
|
||||
switch (inst->Texture.Texture) {
|
||||
case TGSI_TEXTURE_3D:
|
||||
case TGSI_TEXTURE_CUBE:
|
||||
case TGSI_TEXTURE_CUBE_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOWCUBE:
|
||||
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
|
||||
if (ctx->config->saturate_r & smask)
|
||||
mask |= TGSI_WRITEMASK_Z;
|
||||
pmask |= TGSI_WRITEMASK_Z;
|
||||
/* fallthrough */
|
||||
|
||||
case TGSI_TEXTURE_2D:
|
||||
case TGSI_TEXTURE_2D_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOW2D:
|
||||
case TGSI_TEXTURE_SHADOW2D_ARRAY:
|
||||
case TGSI_TEXTURE_2D_MSAA:
|
||||
case TGSI_TEXTURE_2D_ARRAY_MSAA:
|
||||
if (ctx->config->saturate_t & smask)
|
||||
mask |= TGSI_WRITEMASK_Y;
|
||||
pmask |= TGSI_WRITEMASK_Y;
|
||||
/* fallthrough */
|
||||
|
||||
case TGSI_TEXTURE_1D:
|
||||
case TGSI_TEXTURE_1D_ARRAY:
|
||||
case TGSI_TEXTURE_SHADOW1D:
|
||||
case TGSI_TEXTURE_SHADOW1D_ARRAY:
|
||||
if (ctx->config->saturate_s & smask)
|
||||
mask |= TGSI_WRITEMASK_X;
|
||||
pmask |= TGSI_WRITEMASK_X;
|
||||
break;
|
||||
|
||||
/* TODO: I think we should ignore these?
|
||||
case TGSI_TEXTURE_RECT:
|
||||
case TGSI_TEXTURE_SHADOWRECT:
|
||||
*/
|
||||
}
|
||||
|
||||
/* sanity check.. driver could be asking to saturate a non-
|
||||
* existent coordinate component:
|
||||
*/
|
||||
if (!mask)
|
||||
return -1;
|
||||
|
||||
/* MOV tmpA, src0 */
|
||||
create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
|
||||
|
||||
/* This is a bit sad.. we need to clamp *after* the coords
|
||||
* are projected, which means lowering TXP to TEX and doing
|
||||
* the projection ourself. But since I haven't figured out
|
||||
* how to make the lowering code deliver an electric shock
|
||||
* to anyone using GL_CLAMP, we must do this instead:
|
||||
*/
|
||||
if (opcode == TGSI_OPCODE_TXP) {
|
||||
/* RCP tmpB.x tmpA.w */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
|
||||
new_inst.Instruction.NumSrcRegs = 1;
|
||||
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W,_,_,_));
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
/* MUL tmpA.mask, tmpA, tmpB.xxxx */
|
||||
new_inst = tgsi_default_full_instruction();
|
||||
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
|
||||
new_inst.Instruction.NumDstRegs = 1;
|
||||
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
|
||||
new_inst.Instruction.NumSrcRegs = 2;
|
||||
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X,Y,Z,W));
|
||||
reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X,X,X,X));
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
opcode = TGSI_OPCODE_TEX;
|
||||
}
|
||||
|
||||
/* MOV_SAT tmpA.<mask>, tmpA */
|
||||
create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask,
|
||||
TGSI_SAT_ZERO_ONE);
|
||||
|
||||
/* modify the texture samp instruction to take fixed up coord: */
|
||||
new_inst = *inst;
|
||||
new_inst.Instruction.Opcode = opcode;
|
||||
new_inst.Src[0] = ctx->tmp[A].src;
|
||||
tctx->emit_instruction(tctx, &new_inst);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Two-sided color emulation:
|
||||
* For each COLOR input, create a corresponding BCOLOR input, plus
|
||||
@@ -990,7 +1148,7 @@ transform_dotp(struct tgsi_transform_context *tctx,
|
||||
2 + /* FACE */ \
|
||||
((n) * 2) + /* IN[] BCOLOR[n] */ \
|
||||
((n) * 1) + /* TEMP[] */ \
|
||||
((n) * 5) /* CMP instr */ \
|
||||
((n) * NINST(3)) /* CMP instr */ \
|
||||
)
|
||||
|
||||
static void
|
||||
@@ -1234,6 +1392,14 @@ transform_instr(struct tgsi_transform_context *tctx,
|
||||
goto skip;
|
||||
transform_dotp(tctx, inst);
|
||||
break;
|
||||
case TGSI_OPCODE_TEX:
|
||||
case TGSI_OPCODE_TXP:
|
||||
case TGSI_OPCODE_TXB:
|
||||
case TGSI_OPCODE_TXB2:
|
||||
case TGSI_OPCODE_TXL:
|
||||
if (transform_samp(tctx, inst))
|
||||
goto skip;
|
||||
break;
|
||||
default:
|
||||
skip:
|
||||
tctx->emit_instruction(tctx, inst);
|
||||
@@ -1254,6 +1420,9 @@ fd_transform_lowering(const struct fd_lowering_config *config,
|
||||
struct tgsi_token *newtoks;
|
||||
int newlen, numtmp;
|
||||
|
||||
/* sanity check in case limit is ever increased: */
|
||||
assert((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
|
||||
|
||||
memset(&ctx, 0, sizeof(ctx));
|
||||
ctx.base.transform_instruction = transform_instr;
|
||||
ctx.info = info;
|
||||
@@ -1277,6 +1446,8 @@ fd_transform_lowering(const struct fd_lowering_config *config,
|
||||
}
|
||||
}
|
||||
|
||||
ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
|
||||
|
||||
#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
|
||||
/* if there are no instructions to lower, then we are done: */
|
||||
if (!(OPCS(DST) ||
|
||||
@@ -1293,7 +1464,8 @@ fd_transform_lowering(const struct fd_lowering_config *config,
|
||||
OPCS(DPH) ||
|
||||
OPCS(DP2) ||
|
||||
OPCS(DP2A) ||
|
||||
ctx.two_side_colors))
|
||||
ctx.two_side_colors ||
|
||||
ctx.saturate))
|
||||
return NULL;
|
||||
|
||||
#if 0 /* debug */
|
||||
@@ -1359,6 +1531,15 @@ fd_transform_lowering(const struct fd_lowering_config *config,
|
||||
newlen += DP2A_GROW * OPCS(DP2A);
|
||||
numtmp = MAX2(numtmp, DOTP_TMP);
|
||||
}
|
||||
if (ctx.saturate) {
|
||||
int n = info->opcode_count[TGSI_OPCODE_TEX] +
|
||||
info->opcode_count[TGSI_OPCODE_TXP] +
|
||||
info->opcode_count[TGSI_OPCODE_TXB] +
|
||||
info->opcode_count[TGSI_OPCODE_TXB2] +
|
||||
info->opcode_count[TGSI_OPCODE_TXL];
|
||||
newlen += SAMP_GROW * n;
|
||||
numtmp = MAX2(numtmp, SAMP_TMP);
|
||||
}
|
||||
|
||||
/* specifically don't include two_side_colors temps in the count: */
|
||||
ctx.numtmp = numtmp;
|
||||
|
@@ -69,6 +69,16 @@ struct fd_lowering_config {
|
||||
unsigned lower_DPH : 1;
|
||||
unsigned lower_DP2 : 1;
|
||||
unsigned lower_DP2A : 1;
|
||||
|
||||
/* To emulate certain texture wrap modes, this can be used
|
||||
* to saturate the specified tex coord to [0.0, 1.0]. The
|
||||
* bits are according to sampler #, ie. if, for example:
|
||||
*
|
||||
* (conf->saturate_s & (1 << n))
|
||||
*
|
||||
* is true, then the s coord for sampler n is saturated.
|
||||
*/
|
||||
unsigned saturate_s, saturate_t, saturate_r;
|
||||
};
|
||||
|
||||
const struct tgsi_token * fd_transform_lowering(
|
||||
|
@@ -67,7 +67,7 @@ static const char *solid_vp =
|
||||
static const char *blit_fp =
|
||||
"FRAG \n"
|
||||
"PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1 \n"
|
||||
"DCL IN[0], TEXCOORD \n"
|
||||
"DCL IN[0], TEXCOORD[0], PERSPECTIVE \n"
|
||||
"DCL OUT[0], COLOR \n"
|
||||
"DCL SAMP[0] \n"
|
||||
" 0: TEX OUT[0], IN[0], SAMP[0], 2D \n"
|
||||
@@ -77,7 +77,7 @@ static const char *blit_vp =
|
||||
"VERT \n"
|
||||
"DCL IN[0] \n"
|
||||
"DCL IN[1] \n"
|
||||
"DCL OUT[0], TEXCOORD \n"
|
||||
"DCL OUT[0], TEXCOORD[0] \n"
|
||||
"DCL OUT[1], POSITION \n"
|
||||
" 0: MOV OUT[0], IN[0] \n"
|
||||
" 0: MOV OUT[1], IN[1] \n"
|
||||
|
@@ -183,12 +183,16 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
|
||||
return false;
|
||||
|
||||
/* if the app tries to read back the query result before the
|
||||
* back is submitted, that forces us to flush so that there
|
||||
* batch is submitted, that forces us to flush so that there
|
||||
* are actually results to wait for:
|
||||
*/
|
||||
if (!LIST_IS_EMPTY(&hq->list)) {
|
||||
/* if app didn't actually trigger any cmdstream, then
|
||||
* we have nothing to do:
|
||||
*/
|
||||
if (!ctx->needs_flush)
|
||||
return true;
|
||||
DBG("reading query result forces flush!");
|
||||
ctx->needs_flush = true;
|
||||
fd_context_render(&ctx->base);
|
||||
}
|
||||
|
||||
@@ -201,9 +205,6 @@ fd_hw_get_query_result(struct fd_context *ctx, struct fd_query *q,
|
||||
assert(LIST_IS_EMPTY(&hq->current_periods));
|
||||
assert(!hq->period);
|
||||
|
||||
if (LIST_IS_EMPTY(&hq->periods))
|
||||
return true;
|
||||
|
||||
/* if !wait, then check the last sample (the one most likely to
|
||||
* not be ready yet) and bail if it is not ready:
|
||||
*/
|
||||
|
@@ -116,7 +116,7 @@ fd_resource_transfer_map(struct pipe_context *pctx,
|
||||
ptrans->usage = usage;
|
||||
ptrans->box = *box;
|
||||
ptrans->stride = slice->pitch * rsc->cpp;
|
||||
ptrans->layer_stride = ptrans->stride;
|
||||
ptrans->layer_stride = slice->size0;
|
||||
|
||||
if (usage & PIPE_TRANSFER_READ)
|
||||
op |= DRM_FREEDRENO_PREP_READ;
|
||||
@@ -199,9 +199,8 @@ setup_slices(struct fd_resource *rsc)
|
||||
|
||||
for (level = 0; level <= prsc->last_level; level++) {
|
||||
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
|
||||
uint32_t aligned_width = align(width, 32);
|
||||
|
||||
slice->pitch = aligned_width;
|
||||
slice->pitch = align(width, 32);
|
||||
slice->offset = size;
|
||||
slice->size0 = slice->pitch * height * rsc->cpp;
|
||||
|
||||
@@ -215,6 +214,35 @@ setup_slices(struct fd_resource *rsc)
|
||||
return size;
|
||||
}
|
||||
|
||||
/* 2d array and 3d textures seem to want their layers aligned to
|
||||
* page boundaries
|
||||
*/
|
||||
static uint32_t
|
||||
setup_slices_array(struct fd_resource *rsc)
|
||||
{
|
||||
struct pipe_resource *prsc = &rsc->base.b;
|
||||
uint32_t level, size = 0;
|
||||
uint32_t width = prsc->width0;
|
||||
uint32_t height = prsc->height0;
|
||||
uint32_t depth = prsc->depth0;
|
||||
|
||||
for (level = 0; level <= prsc->last_level; level++) {
|
||||
struct fd_resource_slice *slice = fd_resource_slice(rsc, level);
|
||||
|
||||
slice->pitch = align(width, 32);
|
||||
slice->offset = size;
|
||||
slice->size0 = align(slice->pitch * height * rsc->cpp, 4096);
|
||||
|
||||
size += slice->size0 * depth * prsc->array_size;
|
||||
|
||||
width = u_minify(width, 1);
|
||||
height = u_minify(height, 1);
|
||||
depth = u_minify(depth, 1);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new texture object, using the given template info.
|
||||
*/
|
||||
@@ -246,7 +274,16 @@ fd_resource_create(struct pipe_screen *pscreen,
|
||||
|
||||
assert(rsc->cpp);
|
||||
|
||||
size = setup_slices(rsc);
|
||||
switch (tmpl->target) {
|
||||
case PIPE_TEXTURE_3D:
|
||||
case PIPE_TEXTURE_1D_ARRAY:
|
||||
case PIPE_TEXTURE_2D_ARRAY:
|
||||
size = setup_slices_array(rsc);
|
||||
break;
|
||||
default:
|
||||
size = setup_slices(rsc);
|
||||
break;
|
||||
}
|
||||
|
||||
realloc_bo(rsc, size);
|
||||
if (!rsc->bo)
|
||||
@@ -410,8 +447,8 @@ fd_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
|
||||
static void
|
||||
fd_blitter_pipe_begin(struct fd_context *ctx)
|
||||
{
|
||||
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vertexbuf.vb);
|
||||
util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx);
|
||||
util_blitter_save_vertex_buffer_slot(ctx->blitter, ctx->vtx.vertexbuf.vb);
|
||||
util_blitter_save_vertex_elements(ctx->blitter, ctx->vtx.vtx);
|
||||
util_blitter_save_vertex_shader(ctx->blitter, ctx->prog.vp);
|
||||
util_blitter_save_rasterizer(ctx->blitter, ctx->rasterizer);
|
||||
util_blitter_save_viewport(ctx->blitter, &ctx->viewport);
|
||||
|
@@ -60,7 +60,7 @@ static const struct debug_named_value debug_options[] = {
|
||||
{"msgs", FD_DBG_MSGS, "Print debug messages"},
|
||||
{"disasm", FD_DBG_DISASM, "Dump TGSI and adreno shader disassembly"},
|
||||
{"dclear", FD_DBG_DCLEAR, "Mark all state dirty after clear"},
|
||||
{"dgmem", FD_DBG_DGMEM, "Mark all state dirty after GMEM tile pass"},
|
||||
{"flush", FD_DBG_FLUSH, "Force flush after every draw"},
|
||||
{"dscis", FD_DBG_DSCIS, "Disable scissor optimization"},
|
||||
{"direct", FD_DBG_DIRECT, "Force inline (SS_DIRECT) state loads"},
|
||||
{"dbypass", FD_DBG_DBYPASS,"Disable GMEM bypass"},
|
||||
@@ -70,6 +70,7 @@ static const struct debug_named_value debug_options[] = {
|
||||
{"optmsgs", FD_DBG_OPTMSGS,"Enable optimizater debug messages"},
|
||||
{"optdump", FD_DBG_OPTDUMP,"Dump shader DAG to .dot files"},
|
||||
{"glsl130", FD_DBG_GLSL130,"Temporary flag to enable GLSL 130 on a3xx+"},
|
||||
{"nocp", FD_DBG_NOCP, "Disable copy-propagation"},
|
||||
DEBUG_NAMED_VALUE_END
|
||||
};
|
||||
|
||||
@@ -156,23 +157,18 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_ANISOTROPIC_FILTER:
|
||||
case PIPE_CAP_POINT_SPRITE:
|
||||
case PIPE_CAP_TEXTURE_SHADOW_MAP:
|
||||
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
|
||||
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
|
||||
case PIPE_CAP_TEXTURE_SWIZZLE:
|
||||
case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
|
||||
case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
|
||||
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP:
|
||||
case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
|
||||
case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
|
||||
case PIPE_CAP_TGSI_INSTANCEID:
|
||||
case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
|
||||
case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
|
||||
case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
case PIPE_CAP_START_INSTANCE:
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
case PIPE_CAP_USER_CONSTANT_BUFFERS:
|
||||
case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
|
||||
return 1;
|
||||
@@ -181,12 +177,23 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_TGSI_TEXCOORD:
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART:
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
case PIPE_CAP_TEXTURE_BARRIER:
|
||||
case PIPE_CAP_SM3:
|
||||
case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
|
||||
case PIPE_CAP_CUBE_MAP_ARRAY:
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
|
||||
case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
|
||||
case PIPE_CAP_TGSI_INSTANCEID:
|
||||
case PIPE_CAP_START_INSTANCE:
|
||||
case PIPE_CAP_COMPUTE:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_SM3:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART:
|
||||
return (screen->gpu_id >= 300) ? 1 : 0;
|
||||
|
||||
case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
|
||||
return 256;
|
||||
|
||||
@@ -199,7 +206,7 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_DEPTH_CLIP_DISABLE:
|
||||
case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
|
||||
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
|
||||
case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
|
||||
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
|
||||
case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
|
||||
case PIPE_CAP_VERTEX_COLOR_CLAMPED:
|
||||
@@ -220,6 +227,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_CONDITIONAL_RENDER_INVERTED:
|
||||
return 0;
|
||||
|
||||
case PIPE_CAP_MAX_VIEWPORTS:
|
||||
return 1;
|
||||
|
||||
/* Stream output. */
|
||||
case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
|
||||
case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
|
||||
@@ -235,11 +245,13 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
|
||||
/* Texturing. */
|
||||
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
|
||||
return MAX_MIP_LEVELS;
|
||||
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
|
||||
return 11;
|
||||
|
||||
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
|
||||
return 0; /* TODO: a3xx+ should support (required in gles3) */
|
||||
return (screen->gpu_id >= 300) ? 256 : 0;
|
||||
|
||||
/* Render targets. */
|
||||
case PIPE_CAP_MAX_RENDER_TARGETS:
|
||||
@@ -277,11 +289,9 @@ fd_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
return 10;
|
||||
case PIPE_CAP_UMA:
|
||||
return 1;
|
||||
|
||||
default:
|
||||
DBG("unknown param %d", param);
|
||||
return 0;
|
||||
}
|
||||
debug_printf("unknown param %d\n", param);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static float
|
||||
@@ -296,16 +306,15 @@ fd_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
||||
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
|
||||
return 16.0f;
|
||||
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
|
||||
return 16.0f;
|
||||
return 15.0f;
|
||||
case PIPE_CAPF_GUARD_BAND_LEFT:
|
||||
case PIPE_CAPF_GUARD_BAND_TOP:
|
||||
case PIPE_CAPF_GUARD_BAND_RIGHT:
|
||||
case PIPE_CAPF_GUARD_BAND_BOTTOM:
|
||||
return 0.0f;
|
||||
default:
|
||||
DBG("unknown paramf %d", param);
|
||||
return 0;
|
||||
}
|
||||
debug_printf("unknown paramf %d\n", param);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -342,7 +351,11 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
case PIPE_SHADER_CAP_MAX_TEMPS:
|
||||
return 64; /* Max native temporaries. */
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFER_SIZE:
|
||||
return ((screen->gpu_id >= 300) ? 1024 : 64) * sizeof(float[4]);
|
||||
/* NOTE: seems to be limit for a3xx is actually 512 but
|
||||
* split between VS and FS. Use lower limit of 256 to
|
||||
* avoid getting into impossible situations:
|
||||
*/
|
||||
return ((screen->gpu_id >= 300) ? 256 : 64) * sizeof(float[4]);
|
||||
case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_PREDS:
|
||||
@@ -355,6 +368,7 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_SUBROUTINES:
|
||||
case PIPE_SHADER_CAP_DOUBLES:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 1;
|
||||
@@ -368,10 +382,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
default:
|
||||
DBG("unknown shader param %d", param);
|
||||
return 0;
|
||||
}
|
||||
debug_printf("unknown shader param %d\n", param);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -388,6 +400,9 @@ fd_screen_bo_get_handle(struct pipe_screen *pscreen,
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
|
||||
whandle->handle = fd_bo_handle(bo);
|
||||
return TRUE;
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
|
||||
whandle->handle = fd_bo_dmabuf(bo);
|
||||
return TRUE;
|
||||
} else {
|
||||
return FALSE;
|
||||
}
|
||||
@@ -401,12 +416,17 @@ fd_screen_bo_from_handle(struct pipe_screen *pscreen,
|
||||
struct fd_screen *screen = fd_screen(pscreen);
|
||||
struct fd_bo *bo;
|
||||
|
||||
if (whandle->type != DRM_API_HANDLE_TYPE_SHARED) {
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
|
||||
bo = fd_bo_from_name(screen->dev, whandle->handle);
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_KMS) {
|
||||
bo = fd_bo_from_handle(screen->dev, whandle->handle, 0);
|
||||
} else if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
|
||||
bo = fd_bo_from_dmabuf(screen->dev, whandle->handle);
|
||||
} else {
|
||||
DBG("Attempt to import unsupported handle type %d", whandle->type);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bo = fd_bo_from_name(screen->dev, whandle->handle);
|
||||
if (!bo) {
|
||||
DBG("ref name 0x%08x failed", whandle->handle);
|
||||
return NULL;
|
||||
|
@@ -177,7 +177,7 @@ fd_set_vertex_buffers(struct pipe_context *pctx,
|
||||
const struct pipe_vertex_buffer *vb)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct fd_vertexbuf_stateobj *so = &ctx->vertexbuf;
|
||||
struct fd_vertexbuf_stateobj *so = &ctx->vtx.vertexbuf;
|
||||
int i;
|
||||
|
||||
/* on a2xx, pitch is encoded in the vtx fetch instruction, so
|
||||
@@ -237,8 +237,18 @@ static void
|
||||
fd_rasterizer_state_bind(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
struct pipe_scissor_state *old_scissor = fd_context_get_scissor(ctx);
|
||||
|
||||
ctx->rasterizer = hwcso;
|
||||
ctx->dirty |= FD_DIRTY_RASTERIZER;
|
||||
|
||||
/* if scissor enable bit changed we need to mark scissor
|
||||
* state as dirty as well:
|
||||
* NOTE: we can do a shallow compare, since we only care
|
||||
* if it changed to/from &ctx->disable_scissor
|
||||
*/
|
||||
if (old_scissor != fd_context_get_scissor(ctx))
|
||||
ctx->dirty |= FD_DIRTY_SCISSOR;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -286,7 +296,7 @@ static void
|
||||
fd_vertex_state_bind(struct pipe_context *pctx, void *hwcso)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
ctx->vtx = hwcso;
|
||||
ctx->vtx.vtx = hwcso;
|
||||
ctx->dirty |= FD_DIRTY_VTXSTATE;
|
||||
}
|
||||
|
||||
|
@@ -49,7 +49,7 @@ fd_sampler_view_destroy(struct pipe_context *pctx,
|
||||
FREE(view);
|
||||
}
|
||||
|
||||
static void bind_sampler_states(struct fd_texture_stateobj *prog,
|
||||
static void bind_sampler_states(struct fd_texture_stateobj *tex,
|
||||
unsigned nr, void **hwcso)
|
||||
{
|
||||
unsigned i;
|
||||
@@ -58,19 +58,19 @@ static void bind_sampler_states(struct fd_texture_stateobj *prog,
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (hwcso[i])
|
||||
new_nr = i + 1;
|
||||
prog->samplers[i] = hwcso[i];
|
||||
prog->dirty_samplers |= (1 << i);
|
||||
tex->samplers[i] = hwcso[i];
|
||||
tex->dirty_samplers |= (1 << i);
|
||||
}
|
||||
|
||||
for (; i < prog->num_samplers; i++) {
|
||||
prog->samplers[i] = NULL;
|
||||
prog->dirty_samplers |= (1 << i);
|
||||
for (; i < tex->num_samplers; i++) {
|
||||
tex->samplers[i] = NULL;
|
||||
tex->dirty_samplers |= (1 << i);
|
||||
}
|
||||
|
||||
prog->num_samplers = new_nr;
|
||||
tex->num_samplers = new_nr;
|
||||
}
|
||||
|
||||
static void set_sampler_views(struct fd_texture_stateobj *prog,
|
||||
static void set_sampler_views(struct fd_texture_stateobj *tex,
|
||||
unsigned nr, struct pipe_sampler_view **views)
|
||||
{
|
||||
unsigned i;
|
||||
@@ -79,19 +79,19 @@ static void set_sampler_views(struct fd_texture_stateobj *prog,
|
||||
for (i = 0; i < nr; i++) {
|
||||
if (views[i])
|
||||
new_nr = i + 1;
|
||||
pipe_sampler_view_reference(&prog->textures[i], views[i]);
|
||||
prog->dirty_samplers |= (1 << i);
|
||||
pipe_sampler_view_reference(&tex->textures[i], views[i]);
|
||||
tex->dirty_samplers |= (1 << i);
|
||||
}
|
||||
|
||||
for (; i < prog->num_textures; i++) {
|
||||
pipe_sampler_view_reference(&prog->textures[i], NULL);
|
||||
prog->dirty_samplers |= (1 << i);
|
||||
for (; i < tex->num_textures; i++) {
|
||||
pipe_sampler_view_reference(&tex->textures[i], NULL);
|
||||
tex->dirty_samplers |= (1 << i);
|
||||
}
|
||||
|
||||
prog->num_textures = new_nr;
|
||||
tex->num_textures = new_nr;
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
fd_sampler_states_bind(struct pipe_context *pctx,
|
||||
unsigned shader, unsigned start,
|
||||
unsigned nr, void **hwcso)
|
||||
@@ -101,13 +101,6 @@ fd_sampler_states_bind(struct pipe_context *pctx,
|
||||
assert(start == 0);
|
||||
|
||||
if (shader == PIPE_SHADER_FRAGMENT) {
|
||||
/* on a2xx, since there is a flat address space for textures/samplers,
|
||||
* a change in # of fragment textures/samplers will trigger patching and
|
||||
* re-emitting the vertex shader:
|
||||
*/
|
||||
if (nr != ctx->fragtex.num_samplers)
|
||||
ctx->dirty |= FD_DIRTY_TEXSTATE;
|
||||
|
||||
bind_sampler_states(&ctx->fragtex, nr, hwcso);
|
||||
ctx->dirty |= FD_DIRTY_FRAGTEX;
|
||||
}
|
||||
@@ -169,6 +162,5 @@ fd_texture_init(struct pipe_context *pctx)
|
||||
|
||||
pctx->sampler_view_destroy = fd_sampler_view_destroy;
|
||||
|
||||
pctx->bind_sampler_states = fd_sampler_states_bind;
|
||||
pctx->set_sampler_views = fd_set_sampler_views;
|
||||
}
|
||||
|
@@ -31,6 +31,10 @@
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
void fd_sampler_states_bind(struct pipe_context *pctx,
|
||||
unsigned shader, unsigned start,
|
||||
unsigned nr, void **hwcso);
|
||||
|
||||
void fd_texture_init(struct pipe_context *pctx);
|
||||
|
||||
#endif /* FREEDRENO_TEXTURE_H_ */
|
||||
|
@@ -38,6 +38,7 @@
|
||||
#include "util/u_math.h"
|
||||
#include "util/u_half.h"
|
||||
#include "util/u_dynarray.h"
|
||||
#include "util/u_pack_color.h"
|
||||
|
||||
#include "adreno_common.xml.h"
|
||||
#include "adreno_pm4.xml.h"
|
||||
@@ -55,7 +56,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
|
||||
#define FD_DBG_MSGS 0x0001
|
||||
#define FD_DBG_DISASM 0x0002
|
||||
#define FD_DBG_DCLEAR 0x0004
|
||||
#define FD_DBG_DGMEM 0x0008
|
||||
#define FD_DBG_FLUSH 0x0008
|
||||
#define FD_DBG_DSCIS 0x0010
|
||||
#define FD_DBG_DIRECT 0x0020
|
||||
#define FD_DBG_DBYPASS 0x0040
|
||||
@@ -65,6 +66,7 @@ enum adreno_stencil_op fd_stencil_op(unsigned op);
|
||||
#define FD_DBG_OPTMSGS 0x0400
|
||||
#define FD_DBG_OPTDUMP 0x0800
|
||||
#define FD_DBG_GLSL130 0x1000
|
||||
#define FD_DBG_NOCP 0x2000
|
||||
|
||||
extern int fd_mesa_debug;
|
||||
extern bool fd_binning_enabled;
|
||||
@@ -208,6 +210,10 @@ static inline void
|
||||
OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
|
||||
struct fd_ringmarker *end)
|
||||
{
|
||||
uint32_t dwords = fd_ringmarker_dwords(start, end);
|
||||
|
||||
assert(dwords > 0);
|
||||
|
||||
/* for debug after a lock up, write a unique counter value
|
||||
* to scratch6 for each IB, to make it easier to match up
|
||||
* register dumps to cmdstream. The combination of IB and
|
||||
@@ -218,7 +224,7 @@ OUT_IB(struct fd_ringbuffer *ring, struct fd_ringmarker *start,
|
||||
|
||||
OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
|
||||
fd_ringbuffer_emit_reloc_ring(ring, start, end);
|
||||
OUT_RING(ring, fd_ringmarker_dwords(start, end));
|
||||
OUT_RING(ring, dwords);
|
||||
|
||||
emit_marker(ring, 6);
|
||||
}
|
||||
@@ -238,4 +244,24 @@ emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
|
||||
OUT_RING(ring, ++marker_cnt);
|
||||
}
|
||||
|
||||
/* helper to get numeric value from environment variable.. mostly
|
||||
* just leaving this here because it is helpful to brute-force figure
|
||||
* out unknown formats, etc, which blob driver does not support:
|
||||
*/
|
||||
static inline uint32_t env2u(const char *envvar)
|
||||
{
|
||||
char *str = getenv(envvar);
|
||||
if (str)
|
||||
return strtol(str, NULL, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
pack_rgba(enum pipe_format format, const float *rgba)
|
||||
{
|
||||
union util_color uc;
|
||||
util_pack_color(rgba, format, &uc);
|
||||
return uc.ui[0];
|
||||
}
|
||||
|
||||
#endif /* FREEDRENO_UTIL_H_ */
|
||||
|
@@ -81,6 +81,8 @@ void ir3_destroy(struct ir3 *shader)
|
||||
shader->chunk = chunk->next;
|
||||
free(chunk);
|
||||
}
|
||||
free(shader->instrs);
|
||||
free(shader->baryfs);
|
||||
free(shader);
|
||||
}
|
||||
|
||||
@@ -104,7 +106,7 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
|
||||
val.iim_val = reg->iim_val;
|
||||
} else {
|
||||
int8_t components = util_last_bit(reg->wrmask);
|
||||
int8_t max = (reg->num + repeat + components - 1) >> 2;
|
||||
int16_t max = (reg->num + repeat + components - 1) >> 2;
|
||||
|
||||
val.comp = reg->num & 0x3;
|
||||
val.num = reg->num >> 2;
|
||||
@@ -596,6 +598,15 @@ static void insert_instr(struct ir3 *shader,
|
||||
shader->instrs_sz * sizeof(shader->instrs[0]));
|
||||
}
|
||||
shader->instrs[shader->instrs_count++] = instr;
|
||||
|
||||
if (is_input(instr)) {
|
||||
if (shader->baryfs_count == shader->baryfs_sz) {
|
||||
shader->baryfs_sz = MAX2(2 * shader->baryfs_sz, 16);
|
||||
shader->baryfs = realloc(shader->baryfs,
|
||||
shader->baryfs_sz * sizeof(shader->baryfs[0]));
|
||||
}
|
||||
shader->baryfs[shader->baryfs_count++] = instr;
|
||||
}
|
||||
}
|
||||
|
||||
struct ir3_block * ir3_block_create(struct ir3 *shader,
|
||||
|
@@ -47,7 +47,7 @@ struct ir3_info {
|
||||
*/
|
||||
int8_t max_reg; /* highest GPR # used by shader */
|
||||
int8_t max_half_reg;
|
||||
int8_t max_const;
|
||||
int16_t max_const;
|
||||
};
|
||||
|
||||
struct ir3_register {
|
||||
@@ -97,6 +97,8 @@ struct ir3_register {
|
||||
int wrmask;
|
||||
};
|
||||
|
||||
#define IR3_INSTR_SRCS 10
|
||||
|
||||
struct ir3_instruction {
|
||||
struct ir3_block *block;
|
||||
int category;
|
||||
@@ -156,7 +158,7 @@ struct ir3_instruction {
|
||||
} flags;
|
||||
int repeat;
|
||||
unsigned regs_count;
|
||||
struct ir3_register *regs[5];
|
||||
struct ir3_register *regs[1 + IR3_INSTR_SRCS];
|
||||
union {
|
||||
struct {
|
||||
char inv;
|
||||
@@ -208,7 +210,11 @@ struct ir3_instruction {
|
||||
* result of moving a const to a reg would have a low cost, so to
|
||||
* it could make sense to duplicate the instruction at various
|
||||
* points where the result is needed to reduce register footprint.
|
||||
*
|
||||
* DEPTH_UNUSED used to mark unused instructions after depth
|
||||
* calculation pass.
|
||||
*/
|
||||
#define DEPTH_UNUSED ~0
|
||||
unsigned depth;
|
||||
};
|
||||
struct ir3_instruction *next;
|
||||
@@ -222,6 +228,8 @@ struct ir3_heap_chunk;
|
||||
struct ir3 {
|
||||
unsigned instrs_count, instrs_sz;
|
||||
struct ir3_instruction **instrs;
|
||||
unsigned baryfs_count, baryfs_sz;
|
||||
struct ir3_instruction **baryfs;
|
||||
unsigned heap_idx;
|
||||
struct ir3_heap_chunk *chunk;
|
||||
};
|
||||
@@ -270,6 +278,10 @@ static inline void ir3_clear_mark(struct ir3 *shader)
|
||||
/* TODO would be nice to drop the instruction array.. for
|
||||
* new compiler, _clear_mark() is all we use it for, and
|
||||
* we could probably manage a linked list instead..
|
||||
*
|
||||
* Also, we'll probably want to mark instructions within
|
||||
* a block, so tracking the list of instrs globally is
|
||||
* unlikely to be what we want.
|
||||
*/
|
||||
unsigned i;
|
||||
for (i = 0; i < shader->instrs_count; i++) {
|
||||
@@ -406,12 +418,12 @@ void ir3_block_depth(struct ir3_block *block);
|
||||
void ir3_block_cp(struct ir3_block *block);
|
||||
|
||||
/* scheduling: */
|
||||
void ir3_block_sched(struct ir3_block *block);
|
||||
int ir3_block_sched(struct ir3_block *block);
|
||||
|
||||
/* register assignment: */
|
||||
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
|
||||
bool half_precision, bool frag_coord, bool frag_face,
|
||||
bool *has_samp);
|
||||
bool *has_samp, int *max_bary);
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
# define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
@@ -444,7 +456,7 @@ static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
|
||||
{
|
||||
unsigned idx = regmask_idx(reg);
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; i++, idx++)
|
||||
for (i = 0; i < IR3_INSTR_SRCS; i++, idx++)
|
||||
if (reg->wrmask & (1 << i))
|
||||
(*regmask)[idx / 8] |= 1 << (idx % 8);
|
||||
}
|
||||
@@ -457,7 +469,7 @@ static inline void regmask_set_if_not(regmask_t *a,
|
||||
{
|
||||
unsigned idx = regmask_idx(reg);
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; i++, idx++)
|
||||
for (i = 0; i < IR3_INSTR_SRCS; i++, idx++)
|
||||
if (reg->wrmask & (1 << i))
|
||||
if (!((*b)[idx / 8] & (1 << (idx % 8))))
|
||||
(*a)[idx / 8] |= 1 << (idx % 8);
|
||||
@@ -468,7 +480,7 @@ static inline unsigned regmask_get(regmask_t *regmask,
|
||||
{
|
||||
unsigned idx = regmask_idx(reg);
|
||||
unsigned i;
|
||||
for (i = 0; i < 4; i++, idx++)
|
||||
for (i = 0; i < IR3_INSTR_SRCS; i++, idx++)
|
||||
if (reg->wrmask & (1 << i))
|
||||
if ((*regmask)[idx / 8] & (1 << (idx % 8)))
|
||||
return true;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -34,7 +34,7 @@
|
||||
|
||||
int ir3_compile_shader(struct ir3_shader_variant *so,
|
||||
const struct tgsi_token *tokens,
|
||||
struct ir3_shader_key key);
|
||||
struct ir3_shader_key key, bool cp);
|
||||
int ir3_compile_shader_old(struct ir3_shader_variant *so,
|
||||
const struct tgsi_token *tokens,
|
||||
struct ir3_shader_key key);
|
||||
|
@@ -125,7 +125,7 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
|
||||
{
|
||||
unsigned ret, base = 0;
|
||||
struct tgsi_shader_info *info = &ctx->info;
|
||||
const struct fd_lowering_config lconfig = {
|
||||
struct fd_lowering_config lconfig = {
|
||||
.color_two_side = so->key.color_two_side,
|
||||
.lower_DST = true,
|
||||
.lower_XPD = true,
|
||||
@@ -143,6 +143,20 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
|
||||
.lower_DP2A = true,
|
||||
};
|
||||
|
||||
switch (so->type) {
|
||||
case SHADER_FRAGMENT:
|
||||
case SHADER_COMPUTE:
|
||||
lconfig.saturate_s = so->key.fsaturate_s;
|
||||
lconfig.saturate_t = so->key.fsaturate_t;
|
||||
lconfig.saturate_r = so->key.fsaturate_r;
|
||||
break;
|
||||
case SHADER_VERTEX:
|
||||
lconfig.saturate_s = so->key.vsaturate_s;
|
||||
lconfig.saturate_t = so->key.vsaturate_t;
|
||||
lconfig.saturate_r = so->key.vsaturate_r;
|
||||
break;
|
||||
}
|
||||
|
||||
ctx->tokens = fd_transform_lowering(&lconfig, tokens, &ctx->info);
|
||||
ctx->free_tokens = !!ctx->tokens;
|
||||
if (!ctx->tokens) {
|
||||
|
@@ -70,7 +70,7 @@ static void walk_children(struct ir3_instruction *instr, bool keep)
|
||||
static struct ir3_instruction *
|
||||
instr_cp_fanin(struct ir3_instruction *instr)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned i, j;
|
||||
|
||||
/* we need to handle fanin specially, to detect cases
|
||||
* when we need to keep a mov
|
||||
@@ -92,7 +92,15 @@ instr_cp_fanin(struct ir3_instruction *instr)
|
||||
if (is_meta(cand) && (cand->opc == OPC_META_FO))
|
||||
cand = instr_cp(src->instr, true);
|
||||
|
||||
src->instr = cand;
|
||||
/* we can't have 2 registers referring to the same instruction, so
|
||||
* go through and check if any already refer to the candidate
|
||||
* instruction. if so, don't do the propagation.
|
||||
*/
|
||||
for (j = 1; j < instr->regs_count; j++)
|
||||
if (instr->regs[j]->instr == cand)
|
||||
break;
|
||||
if (j == instr->regs_count)
|
||||
src->instr = cand;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -150,10 +150,22 @@ void ir3_block_depth(struct ir3_block *block)
|
||||
if (block->outputs[i])
|
||||
ir3_instr_depth(block->outputs[i]);
|
||||
|
||||
/* at this point, any unvisited input is unused: */
|
||||
/* mark un-used instructions: */
|
||||
for (i = 0; i < block->shader->instrs_count; i++) {
|
||||
struct ir3_instruction *instr = block->shader->instrs[i];
|
||||
|
||||
/* just consider instructions within this block: */
|
||||
if (instr->block != block)
|
||||
continue;
|
||||
|
||||
if (!ir3_instr_check_mark(instr))
|
||||
instr->depth = DEPTH_UNUSED;
|
||||
}
|
||||
|
||||
/* cleanup unused inputs: */
|
||||
for (i = 0; i < block->ninputs; i++) {
|
||||
struct ir3_instruction *in = block->inputs[i];
|
||||
if (in && !ir3_instr_check_mark(in))
|
||||
if (in && (in->depth == DEPTH_UNUSED))
|
||||
block->inputs[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
@@ -58,6 +58,7 @@ struct ir3_ra_ctx {
|
||||
bool frag_face;
|
||||
bool has_samp;
|
||||
int cnt;
|
||||
int max_bary;
|
||||
bool error;
|
||||
};
|
||||
|
||||
@@ -253,7 +254,9 @@ static int alloc_block(struct ir3_ra_ctx *ctx,
|
||||
(instr->regs_count == 1)) {
|
||||
unsigned i, base = instr->regs[0]->num & ~0x3;
|
||||
for (i = 0; i < 4; i++) {
|
||||
struct ir3_instruction *in = ctx->block->inputs[base + i];
|
||||
struct ir3_instruction *in = NULL;
|
||||
if ((base + i) < ctx->block->ninputs)
|
||||
in = ctx->block->inputs[base + i];
|
||||
if (in)
|
||||
compute_clobbers(ctx, in->next, in, &liveregs);
|
||||
}
|
||||
@@ -471,7 +474,9 @@ static void ra_assign_dst_shader_input(struct ir3_visitor *v,
|
||||
|
||||
/* trigger assignment of all our companion input components: */
|
||||
for (i = 0; i < 4; i++) {
|
||||
struct ir3_instruction *in = instr->block->inputs[i+base];
|
||||
struct ir3_instruction *in = NULL;
|
||||
if ((base + i) < instr->block->ninputs)
|
||||
in = instr->block->inputs[base + i];
|
||||
if (in && is_meta(in) && (in->opc == OPC_META_INPUT))
|
||||
ra_assign(a->ctx, in, a->num + off + i);
|
||||
}
|
||||
@@ -610,6 +615,12 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
if (is_meta(n))
|
||||
continue;
|
||||
|
||||
if (is_input(n)) {
|
||||
struct ir3_register *inloc = n->regs[1];
|
||||
assert(inloc->flags & IR3_REG_IMMED);
|
||||
ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
|
||||
}
|
||||
|
||||
for (i = 1; i < n->regs_count; i++) {
|
||||
reg = n->regs[i];
|
||||
|
||||
@@ -771,7 +782,7 @@ static int block_ra(struct ir3_ra_ctx *ctx, struct ir3_block *block)
|
||||
|
||||
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
|
||||
bool half_precision, bool frag_coord, bool frag_face,
|
||||
bool *has_samp)
|
||||
bool *has_samp, int *max_bary)
|
||||
{
|
||||
struct ir3_ra_ctx ctx = {
|
||||
.block = block,
|
||||
@@ -779,12 +790,14 @@ int ir3_block_ra(struct ir3_block *block, enum shader_t type,
|
||||
.half_precision = half_precision,
|
||||
.frag_coord = frag_coord,
|
||||
.frag_face = frag_face,
|
||||
.max_bary = -1,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ir3_clear_mark(block->shader);
|
||||
ret = block_ra(&ctx, block);
|
||||
*has_samp = ctx.has_samp;
|
||||
*max_bary = ctx.max_bary;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -64,6 +64,7 @@ struct ir3_sched_ctx {
|
||||
struct ir3_instruction *addr; /* current a0.x user, if any */
|
||||
struct ir3_instruction *pred; /* current p0.x user, if any */
|
||||
unsigned cnt;
|
||||
bool error;
|
||||
};
|
||||
|
||||
static struct ir3_instruction *
|
||||
@@ -161,7 +162,8 @@ static void schedule(struct ir3_sched_ctx *ctx,
|
||||
* Delay-slot calculation. Follows fanin/fanout.
|
||||
*/
|
||||
|
||||
static unsigned delay_calc2(struct ir3_sched_ctx *ctx,
|
||||
/* calculate delay for specified src: */
|
||||
static unsigned delay_calc_srcn(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *assigner,
|
||||
struct ir3_instruction *consumer, unsigned srcn)
|
||||
{
|
||||
@@ -172,7 +174,7 @@ static unsigned delay_calc2(struct ir3_sched_ctx *ctx,
|
||||
for (i = 1; i < assigner->regs_count; i++) {
|
||||
struct ir3_register *reg = assigner->regs[i];
|
||||
if (reg->flags & IR3_REG_SSA) {
|
||||
unsigned d = delay_calc2(ctx, reg->instr,
|
||||
unsigned d = delay_calc_srcn(ctx, reg->instr,
|
||||
consumer, srcn);
|
||||
delay = MAX2(delay, d);
|
||||
}
|
||||
@@ -185,6 +187,7 @@ static unsigned delay_calc2(struct ir3_sched_ctx *ctx,
|
||||
return delay;
|
||||
}
|
||||
|
||||
/* calculate delay for instruction (maximum of delay for all srcs): */
|
||||
static unsigned delay_calc(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr)
|
||||
{
|
||||
@@ -193,7 +196,7 @@ static unsigned delay_calc(struct ir3_sched_ctx *ctx,
|
||||
for (i = 1; i < instr->regs_count; i++) {
|
||||
struct ir3_register *reg = instr->regs[i];
|
||||
if (reg->flags & IR3_REG_SSA) {
|
||||
unsigned d = delay_calc2(ctx, reg->instr,
|
||||
unsigned d = delay_calc_srcn(ctx, reg->instr,
|
||||
instr, i - 1);
|
||||
delay = MAX2(delay, d);
|
||||
}
|
||||
@@ -239,6 +242,32 @@ static int trysched(struct ir3_sched_ctx *ctx,
|
||||
if (delay)
|
||||
return delay;
|
||||
|
||||
/* if the instruction is a kill, we need to ensure *every*
|
||||
* bary.f is scheduled. The hw seems unhappy if the thread
|
||||
* gets killed before the end-input (ei) flag is hit.
|
||||
*
|
||||
* We could do this by adding each bary.f instruction as
|
||||
* virtual ssa src for the kill instruction. But we have
|
||||
* fixed length instr->regs[].
|
||||
*
|
||||
* TODO this wouldn't be quite right if we had multiple
|
||||
* basic blocks, if any block was conditional. We'd need
|
||||
* to schedule the bary.f's outside of any block which
|
||||
* was conditional that contained a kill.. I think..
|
||||
*/
|
||||
if (is_kill(instr)) {
|
||||
struct ir3 *ir = instr->block->shader;
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < ir->baryfs_count; i++) {
|
||||
if (ir->baryfs[i]->depth == DEPTH_UNUSED)
|
||||
continue;
|
||||
delay = trysched(ctx, ir->baryfs[i]);
|
||||
if (delay)
|
||||
return delay;
|
||||
}
|
||||
}
|
||||
|
||||
/* if this is a write to address/predicate register, and that
|
||||
* register is currently in use, we need to defer until it is
|
||||
* free:
|
||||
@@ -308,7 +337,8 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
|
||||
struct ir3_instruction *instr = block->head;
|
||||
bool addr_in_use = false;
|
||||
bool pred_in_use = false;
|
||||
unsigned cnt = ~0;
|
||||
bool all_delayed = true;
|
||||
unsigned cnt = ~0, attempted = 0;
|
||||
|
||||
while (instr) {
|
||||
struct ir3_instruction *next = instr->next;
|
||||
@@ -317,6 +347,10 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
|
||||
|
||||
if (addr || pred) {
|
||||
int ret = trysched(ctx, instr);
|
||||
|
||||
if (ret != DELAYED)
|
||||
all_delayed = false;
|
||||
|
||||
if (ret == SCHEDULED)
|
||||
cnt = 0;
|
||||
else if (ret > 0)
|
||||
@@ -325,6 +359,8 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
|
||||
addr_in_use = true;
|
||||
if (pred)
|
||||
pred_in_use = true;
|
||||
|
||||
attempted++;
|
||||
}
|
||||
|
||||
instr = next;
|
||||
@@ -336,6 +372,12 @@ static int block_sched_undelayed(struct ir3_sched_ctx *ctx,
|
||||
if (!pred_in_use)
|
||||
ctx->pred = NULL;
|
||||
|
||||
/* detect if we've gotten ourselves into an impossible situation
|
||||
* and bail if needed
|
||||
*/
|
||||
if (all_delayed && (attempted > 0))
|
||||
ctx->error = true;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
@@ -356,7 +398,7 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||
}
|
||||
}
|
||||
|
||||
while ((instr = block->head)) {
|
||||
while ((instr = block->head) && !ctx->error) {
|
||||
/* NOTE: always grab next *before* trysched(), in case the
|
||||
* instruction is actually scheduled (and therefore moved
|
||||
* from depth list into scheduled list)
|
||||
@@ -393,9 +435,12 @@ static void block_sched(struct ir3_sched_ctx *ctx, struct ir3_block *block)
|
||||
block->head = reverse(ctx->scheduled);
|
||||
}
|
||||
|
||||
void ir3_block_sched(struct ir3_block *block)
|
||||
int ir3_block_sched(struct ir3_block *block)
|
||||
{
|
||||
struct ir3_sched_ctx ctx = {0};
|
||||
ir3_clear_mark(block->shader);
|
||||
block_sched(&ctx, block);
|
||||
if (ctx.error)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
@@ -68,7 +68,11 @@ assemble_variant(struct ir3_shader_variant *v)
|
||||
free(bin);
|
||||
|
||||
v->instrlen = v->info.sizedwords / 8;
|
||||
v->constlen = v->info.max_const + 1;
|
||||
/* NOTE: if relative addressing is used, we set constlen in
|
||||
* the compiler (to worst-case value) since we don't know in
|
||||
* the assembler what the max addr reg value can be:
|
||||
*/
|
||||
v->constlen = MAX2(v->constlen, v->info.max_const + 1);
|
||||
}
|
||||
|
||||
/* for vertex shader, the inputs are loaded into registers before the shader
|
||||
@@ -81,16 +85,27 @@ fixup_vp_regfootprint(struct ir3_shader_variant *v)
|
||||
unsigned i;
|
||||
for (i = 0; i < v->inputs_count; i++) {
|
||||
if (v->inputs[i].compmask) {
|
||||
uint32_t regid = (v->inputs[i].regid + 3) >> 2;
|
||||
int32_t regid = (v->inputs[i].regid + 3) >> 2;
|
||||
v->info.max_reg = MAX2(v->info.max_reg, regid);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < v->outputs_count; i++) {
|
||||
uint32_t regid = (v->outputs[i].regid + 3) >> 2;
|
||||
int32_t regid = (v->outputs[i].regid + 3) >> 2;
|
||||
v->info.max_reg = MAX2(v->info.max_reg, regid);
|
||||
}
|
||||
}
|
||||
|
||||
/* reset before attempting to compile again.. */
|
||||
static void reset_variant(struct ir3_shader_variant *v, const char *msg)
|
||||
{
|
||||
debug_error(msg);
|
||||
v->inputs_count = 0;
|
||||
v->outputs_count = 0;
|
||||
v->total_in = 0;
|
||||
v->has_samp = false;
|
||||
v->immediates_count = 0;
|
||||
}
|
||||
|
||||
static struct ir3_shader_variant *
|
||||
create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
|
||||
{
|
||||
@@ -112,15 +127,12 @@ create_variant(struct ir3_shader *shader, struct ir3_shader_key key)
|
||||
}
|
||||
|
||||
if (!(fd_mesa_debug & FD_DBG_NOOPT)) {
|
||||
ret = ir3_compile_shader(v, tokens, key);
|
||||
ret = ir3_compile_shader(v, tokens, key, true);
|
||||
if (ret) {
|
||||
debug_error("new compiler failed, trying fallback!");
|
||||
|
||||
v->inputs_count = 0;
|
||||
v->outputs_count = 0;
|
||||
v->total_in = 0;
|
||||
v->has_samp = false;
|
||||
v->immediates_count = 0;
|
||||
reset_variant(v, "new compiler failed, trying without copy propagation!");
|
||||
ret = ir3_compile_shader(v, tokens, key, false);
|
||||
if (ret)
|
||||
reset_variant(v, "new compiler failed, trying fallback!");
|
||||
}
|
||||
} else {
|
||||
ret = -1; /* force fallback to old compiler */
|
||||
@@ -165,16 +177,30 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
|
||||
* so normalize the key to avoid constructing multiple identical
|
||||
* variants:
|
||||
*/
|
||||
if (shader->type == SHADER_FRAGMENT) {
|
||||
switch (shader->type) {
|
||||
case SHADER_FRAGMENT:
|
||||
case SHADER_COMPUTE:
|
||||
key.binning_pass = false;
|
||||
}
|
||||
if (shader->type == SHADER_VERTEX) {
|
||||
if (key.has_per_samp) {
|
||||
key.vsaturate_s = 0;
|
||||
key.vsaturate_t = 0;
|
||||
key.vsaturate_r = 0;
|
||||
}
|
||||
break;
|
||||
case SHADER_VERTEX:
|
||||
key.color_two_side = false;
|
||||
key.half_precision = false;
|
||||
key.alpha = false;
|
||||
if (key.has_per_samp) {
|
||||
key.fsaturate_s = 0;
|
||||
key.fsaturate_t = 0;
|
||||
key.fsaturate_r = 0;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
for (v = shader->variants; v; v = v->next)
|
||||
if (!memcmp(&key, &v->key, sizeof(key)))
|
||||
if (ir3_shader_key_equal(&key, &v->key))
|
||||
return v;
|
||||
|
||||
/* compile new variant if it doesn't exist already: */
|
||||
|
@@ -54,14 +54,54 @@ static inline uint16_t sem2idx(ir3_semantic sem)
|
||||
* in hw (two sided color), binning-pass vertex shader, etc.
|
||||
*/
|
||||
struct ir3_shader_key {
|
||||
/* vertex shader variant parameters: */
|
||||
unsigned binning_pass : 1;
|
||||
union {
|
||||
struct {
|
||||
/* do we need to check {v,f}saturate_{s,t,r}? */
|
||||
unsigned has_per_samp : 1;
|
||||
|
||||
/*
|
||||
* Vertex shader variant parameters:
|
||||
*/
|
||||
unsigned binning_pass : 1;
|
||||
|
||||
/*
|
||||
* Fragment shader variant parameters:
|
||||
*/
|
||||
unsigned color_two_side : 1;
|
||||
unsigned half_precision : 1;
|
||||
/* For rendering to alpha, we need a bit of special handling
|
||||
* since the hw always takes gl_FragColor starting from x
|
||||
* component, rather than figuring out to take the w component.
|
||||
* We could be more clever and generate variants for other
|
||||
* render target formats (ie. luminance formats are xxx1), but
|
||||
* let's start with this and see how it goes:
|
||||
*/
|
||||
unsigned alpha : 1;
|
||||
};
|
||||
uint32_t global;
|
||||
};
|
||||
|
||||
/* bitmask of sampler which needs coords clamped for vertex
|
||||
* shader:
|
||||
*/
|
||||
uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
|
||||
|
||||
/* bitmask of sampler which needs coords clamped for frag
|
||||
* shader:
|
||||
*/
|
||||
uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
|
||||
|
||||
/* fragment shader variant parameters: */
|
||||
unsigned color_two_side : 1;
|
||||
unsigned half_precision : 1;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
|
||||
{
|
||||
/* slow-path if we need to check {v,f}saturate_{s,t,r} */
|
||||
if (a->has_per_samp || b->has_per_samp)
|
||||
return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0;
|
||||
return a->global == b->global;
|
||||
}
|
||||
|
||||
struct ir3_shader_variant {
|
||||
struct fd_bo *bo;
|
||||
|
||||
@@ -110,9 +150,20 @@ struct ir3_shader_variant {
|
||||
uint8_t regid;
|
||||
uint8_t compmask;
|
||||
uint8_t ncomp;
|
||||
/* in theory inloc of fs should match outloc of vs: */
|
||||
/* In theory inloc of fs should match outloc of vs. Or
|
||||
* rather the outloc of the vs is 8 plus the offset passed
|
||||
* to bary.f. Presumably that +8 is to account for
|
||||
* gl_Position/gl_PointSize?
|
||||
*
|
||||
* NOTE inloc is currently aligned to 4 (we don't try
|
||||
* to pack varyings). Changing this would likely break
|
||||
* assumptions in few places (like setting up of flat
|
||||
* shading in fd3_program) so be sure to check all the
|
||||
* spots where inloc is used.
|
||||
*/
|
||||
uint8_t inloc;
|
||||
uint8_t bary;
|
||||
uint8_t interpolate;
|
||||
} inputs[16 + 2]; /* +POSITION +FACE */
|
||||
|
||||
unsigned total_in; /* sum of inputs (scalar) */
|
||||
@@ -120,6 +171,9 @@ struct ir3_shader_variant {
|
||||
/* do we have one or more texture sample instructions: */
|
||||
bool has_samp;
|
||||
|
||||
/* do we have kill instructions: */
|
||||
bool has_kill;
|
||||
|
||||
/* const reg # of first immediate, ie. 1 == c1
|
||||
* (not regid, because TGSI thinks in terms of vec4 registers,
|
||||
* not scalar registers)
|
||||
@@ -147,9 +201,9 @@ struct ir3_shader {
|
||||
struct ir3_shader_variant *variants;
|
||||
|
||||
/* so far, only used for blit_prog shader.. values for
|
||||
* VPC_VARYING_INTERP[i].MODE and VPC_VARYING_PS_REPL[i].MODE
|
||||
* VPC_VARYING_PS_REPL[i].MODE
|
||||
*/
|
||||
uint32_t vinterp[4], vpsrepl[4];
|
||||
uint32_t vpsrepl[4];
|
||||
};
|
||||
|
||||
|
||||
|
@@ -32,7 +32,7 @@ LOCAL_SRC_FILES := \
|
||||
$(C_SOURCES) \
|
||||
$(NV30_C_SOURCES) \
|
||||
$(NV50_CODEGEN_SOURCES) \
|
||||
$(NV50_C_SOURES) \
|
||||
$(NV50_C_SOURCES) \
|
||||
$(NVC0_CODEGEN_SOURCES) \
|
||||
$(NVC0_C_SOURCES)
|
||||
|
||||
|
@@ -140,6 +140,7 @@ private:
|
||||
code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
|
||||
|
||||
#define FTZ_(b) if (i->ftz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
|
||||
#define DNZ_(b) if (i->dnz) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
|
||||
|
||||
#define SAT_(b) if (i->saturate) code[(0x##b) / 32] |= 1 << ((0x##b) % 32)
|
||||
|
||||
@@ -464,6 +465,7 @@ CodeEmitterGK110::emitFMAD(const Instruction *i)
|
||||
SAT_(35);
|
||||
RND_(36, F);
|
||||
FTZ_(38);
|
||||
DNZ_(39);
|
||||
|
||||
bool neg1 = (i->src(0).mod ^ i->src(1).mod).neg();
|
||||
|
||||
@@ -487,6 +489,7 @@ CodeEmitterGK110::emitFMUL(const Instruction *i)
|
||||
emitForm_L(i, 0x200, 0x2, Modifier(0));
|
||||
|
||||
FTZ_(38);
|
||||
DNZ_(39);
|
||||
SAT_(3a);
|
||||
if (neg)
|
||||
code[1] ^= 1 << 22;
|
||||
@@ -499,6 +502,7 @@ CodeEmitterGK110::emitFMUL(const Instruction *i)
|
||||
|
||||
RND_(2a, F);
|
||||
FTZ_(2f);
|
||||
DNZ_(30);
|
||||
SAT_(35);
|
||||
|
||||
if (code[0] & 0x1) {
|
||||
|
@@ -432,7 +432,7 @@ CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
|
||||
void
|
||||
CodeEmitterGM107::emitFMZ(int pos, int len)
|
||||
{
|
||||
emitField(pos, len, /*XXX: insn->dnz << 1 | */ insn->ftz);
|
||||
emitField(pos, len, insn->dnz << 1 | insn->ftz);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -546,9 +546,9 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
|
||||
S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
|
||||
S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
|
||||
S_028814_FACE(!state->front_ccw) |
|
||||
S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
|
||||
S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
|
||||
S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
|
||||
S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
|
||||
state->fill_back != PIPE_POLYGON_MODE_FILL) |
|
||||
S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
|
||||
|
@@ -535,9 +535,9 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
|
||||
S_028814_CULL_FRONT(state->cull_face & PIPE_FACE_FRONT ? 1 : 0) |
|
||||
S_028814_CULL_BACK(state->cull_face & PIPE_FACE_BACK ? 1 : 0) |
|
||||
S_028814_FACE(!state->front_ccw) |
|
||||
S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
|
||||
S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
|
||||
S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
|
||||
S_028814_POLY_MODE(state->fill_front != PIPE_POLYGON_MODE_FILL ||
|
||||
state->fill_back != PIPE_POLYGON_MODE_FILL) |
|
||||
S_028814_POLYMODE_FRONT_PTYPE(r600_translate_fill(state->fill_front)) |
|
||||
|
@@ -158,8 +158,10 @@ static void r600_bind_blend_state(struct pipe_context *ctx, void *state)
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_blend_state *blend = (struct r600_blend_state *)state;
|
||||
|
||||
if (blend == NULL)
|
||||
if (blend == NULL) {
|
||||
r600_set_cso_state_with_cb(&rctx->blend_state, NULL, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
r600_bind_blend_state_internal(rctx, blend, rctx->force_blend_disable);
|
||||
}
|
||||
@@ -447,8 +449,13 @@ static void r600_delete_sampler_state(struct pipe_context *ctx, void *state)
|
||||
|
||||
static void r600_delete_blend_state(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
struct r600_blend_state *blend = (struct r600_blend_state*)state;
|
||||
|
||||
if (rctx->blend_state.cso == state) {
|
||||
ctx->bind_blend_state(ctx, NULL);
|
||||
}
|
||||
|
||||
r600_release_command_buffer(&blend->buffer);
|
||||
r600_release_command_buffer(&blend->buffer_no_blend);
|
||||
FREE(blend);
|
||||
|
@@ -110,11 +110,13 @@ bool r600_init_resource(struct r600_common_screen *rscreen,
|
||||
enum radeon_bo_flag flags = 0;
|
||||
|
||||
switch (res->b.b.usage) {
|
||||
case PIPE_USAGE_STREAM:
|
||||
flags = RADEON_FLAG_GTT_WC;
|
||||
/* fall through */
|
||||
case PIPE_USAGE_STAGING:
|
||||
/* Transfers are likely to occur more often with these resources. */
|
||||
res->domains = RADEON_DOMAIN_GTT;
|
||||
break;
|
||||
case PIPE_USAGE_STREAM:
|
||||
case PIPE_USAGE_DYNAMIC:
|
||||
/* Older kernels didn't always flush the HDP cache before
|
||||
* CS execution
|
||||
|
@@ -33,10 +33,10 @@
|
||||
|
||||
#define RADEON_LLVM_MAX_INPUTS 32 * 4
|
||||
#define RADEON_LLVM_MAX_OUTPUTS 32 * 4
|
||||
#define RADEON_LLVM_MAX_BRANCH_DEPTH 16
|
||||
#define RADEON_LLVM_MAX_LOOP_DEPTH 16
|
||||
#define RADEON_LLVM_MAX_ARRAYS 16
|
||||
|
||||
#define RADEON_LLVM_INITIAL_CF_DEPTH 4
|
||||
|
||||
#define RADEON_LLVM_MAX_SYSTEM_VALUES 4
|
||||
|
||||
struct radeon_llvm_branch {
|
||||
@@ -122,11 +122,13 @@ struct radeon_llvm_context {
|
||||
|
||||
/*=== Private Members ===*/
|
||||
|
||||
struct radeon_llvm_branch branch[RADEON_LLVM_MAX_BRANCH_DEPTH];
|
||||
struct radeon_llvm_loop loop[RADEON_LLVM_MAX_LOOP_DEPTH];
|
||||
struct radeon_llvm_branch *branch;
|
||||
struct radeon_llvm_loop *loop;
|
||||
|
||||
unsigned branch_depth;
|
||||
unsigned branch_depth_max;
|
||||
unsigned loop_depth;
|
||||
unsigned loop_depth_max;
|
||||
|
||||
struct tgsi_declaration_range arrays[RADEON_LLVM_MAX_ARRAYS];
|
||||
unsigned num_arrays;
|
||||
|
@@ -445,7 +445,19 @@ static void bgnloop_emit(
|
||||
endloop_block, "LOOP");
|
||||
LLVMBuildBr(gallivm->builder, loop_block);
|
||||
LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
|
||||
ctx->loop_depth++;
|
||||
|
||||
if (++ctx->loop_depth > ctx->loop_depth_max) {
|
||||
unsigned new_max = ctx->loop_depth_max << 1;
|
||||
|
||||
if (!new_max)
|
||||
new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
|
||||
|
||||
ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
|
||||
sizeof(ctx->loop[0]),
|
||||
new_max * sizeof(ctx->loop[0]));
|
||||
ctx->loop_depth_max = new_max;
|
||||
}
|
||||
|
||||
ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
|
||||
ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
|
||||
}
|
||||
@@ -576,7 +588,18 @@ static void if_cond_emit(
|
||||
LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
|
||||
LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
|
||||
|
||||
ctx->branch_depth++;
|
||||
if (++ctx->branch_depth > ctx->branch_depth_max) {
|
||||
unsigned new_max = ctx->branch_depth_max << 1;
|
||||
|
||||
if (!new_max)
|
||||
new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
|
||||
|
||||
ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
|
||||
sizeof(ctx->branch[0]),
|
||||
new_max * sizeof(ctx->branch[0]));
|
||||
ctx->branch_depth_max = new_max;
|
||||
}
|
||||
|
||||
ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
|
||||
ctx->branch[ctx->branch_depth - 1].if_block = if_block;
|
||||
ctx->branch[ctx->branch_depth - 1].else_block = else_block;
|
||||
@@ -1439,4 +1462,10 @@ void radeon_llvm_dispose(struct radeon_llvm_context * ctx)
|
||||
LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
|
||||
FREE(ctx->temps);
|
||||
ctx->temps = NULL;
|
||||
FREE(ctx->loop);
|
||||
ctx->loop = NULL;
|
||||
ctx->loop_depth_max = 0;
|
||||
FREE(ctx->branch);
|
||||
ctx->branch = NULL;
|
||||
ctx->branch_depth_max = 0;
|
||||
}
|
||||
|
@@ -411,6 +411,11 @@ static void si_set_sampler_views(struct pipe_context *ctx,
|
||||
si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
|
||||
NULL, NULL);
|
||||
}
|
||||
} else {
|
||||
samplers->depth_texture_mask &= ~(1 << slot);
|
||||
samplers->compressed_colortex_mask &= ~(1 << slot);
|
||||
si_set_sampler_view(sctx, shader, SI_FMASK_TEX_OFFSET + slot,
|
||||
NULL, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -599,9 +599,9 @@ static void *si_create_rs_state(struct pipe_context *ctx,
|
||||
S_028814_CULL_FRONT(state->rasterizer_discard || (state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) |
|
||||
S_028814_CULL_BACK(state->rasterizer_discard || (state->cull_face & PIPE_FACE_BACK) ? 1 : 0) |
|
||||
S_028814_FACE(!state->front_ccw) |
|
||||
S_028814_POLY_OFFSET_FRONT_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_BACK_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_tri) |
|
||||
S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) |
|
||||
S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) |
|
||||
S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) |
|
||||
S_028814_POLY_MODE(polygon_dual_mode) |
|
||||
S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) |
|
||||
S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back));
|
||||
|
@@ -781,7 +781,7 @@ static void si_state_draw(struct si_context *sctx,
|
||||
|
||||
if (info->indexed) {
|
||||
uint32_t max_size = (ib->buffer->width0 - ib->offset) /
|
||||
sctx->index_buffer.index_size;
|
||||
ib->index_size;
|
||||
uint64_t va = r600_resource(ib->buffer)->gpu_address + ib->offset;
|
||||
|
||||
si_pm4_add_bo(pm4, (struct r600_resource *)ib->buffer, RADEON_USAGE_READ,
|
||||
|
@@ -85,7 +85,7 @@ gbm_gallium_drm_is_format_supported(struct gbm_device *gbm,
|
||||
if (pf == PIPE_FORMAT_NONE)
|
||||
return 0;
|
||||
|
||||
if (!gdrm->screen->is_format_supported(gdrm->screen, PIPE_TEXTURE_2D, pf, 0,
|
||||
if (!gdrm->screen->is_format_supported(gdrm->screen, pf, PIPE_TEXTURE_2D, 0,
|
||||
gbm_usage_to_gallium(usage)))
|
||||
return 0;
|
||||
|
||||
|
@@ -50,8 +50,8 @@
|
||||
HGLRC WINAPI
|
||||
wglCreateContextAttribsARB(HDC hDC, HGLRC hShareContext, const int *attribList)
|
||||
{
|
||||
typedef HGLRC (*wglCreateContext_t)(HDC hdc);
|
||||
typedef BOOL (*wglDeleteContext_t)(HGLRC hglrc);
|
||||
typedef HGLRC (WINAPI *wglCreateContext_t)(HDC hdc);
|
||||
typedef BOOL (WINAPI *wglDeleteContext_t)(HGLRC hglrc);
|
||||
HGLRC context;
|
||||
static HMODULE opengl_lib = 0;
|
||||
static wglCreateContext_t wglCreateContext_func = 0;
|
||||
|
@@ -17,5 +17,28 @@ create_screen(int fd)
|
||||
return screen;
|
||||
}
|
||||
|
||||
static const struct drm_conf_ret throttle_ret = {
|
||||
.type = DRM_CONF_INT,
|
||||
.val.val_int = 2,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
{
|
||||
switch (conf) {
|
||||
case DRM_CONF_THROTTLE:
|
||||
return &throttle_ret;
|
||||
case DRM_CONF_SHARE_FD:
|
||||
return &share_fd_ret;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PUBLIC
|
||||
DRM_DRIVER_DESCRIPTOR("msm", "freedreno", create_screen, NULL)
|
||||
DRM_DRIVER_DESCRIPTOR("msm", "freedreno", create_screen, drm_configuration)
|
||||
|
@@ -811,17 +811,12 @@ radeon_winsys_bo_create(struct radeon_winsys *rws,
|
||||
desc.flags = flags;
|
||||
|
||||
/* Assign a buffer manager. */
|
||||
assert(flags < RADEON_NUM_CACHE_MANAGERS);
|
||||
if (use_reusable_pool) {
|
||||
if (domain == RADEON_DOMAIN_VRAM) {
|
||||
if (flags & RADEON_FLAG_GTT_WC)
|
||||
provider = ws->cman_vram_gtt_wc;
|
||||
else
|
||||
provider = ws->cman_vram;
|
||||
} else if (flags & RADEON_FLAG_GTT_WC) {
|
||||
provider = ws->cman_gtt_wc;
|
||||
} else {
|
||||
provider = ws->cman_gtt;
|
||||
}
|
||||
if (domain == RADEON_DOMAIN_VRAM)
|
||||
provider = ws->cman_vram[flags];
|
||||
else
|
||||
provider = ws->cman_gtt[flags];
|
||||
} else {
|
||||
provider = ws->kman;
|
||||
}
|
||||
|
@@ -97,13 +97,11 @@ static boolean radeon_set_fd_access(struct radeon_drm_cs *applier,
|
||||
if (enable) {
|
||||
if (value) {
|
||||
*owner = applier;
|
||||
printf("radeon: Acquired access to %s.\n", request_name);
|
||||
pipe_mutex_unlock(*mutex);
|
||||
return TRUE;
|
||||
}
|
||||
} else {
|
||||
*owner = NULL;
|
||||
printf("radeon: Released access to %s.\n", request_name);
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(*mutex);
|
||||
@@ -441,6 +439,7 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
static void radeon_winsys_destroy(struct radeon_winsys *rws)
|
||||
{
|
||||
struct radeon_drm_winsys *ws = (struct radeon_drm_winsys*)rws;
|
||||
int i;
|
||||
|
||||
if (ws->thread) {
|
||||
ws->kill_thread = 1;
|
||||
@@ -453,10 +452,10 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
|
||||
pipe_mutex_destroy(ws->cmask_owner_mutex);
|
||||
pipe_mutex_destroy(ws->cs_stack_lock);
|
||||
|
||||
ws->cman_vram->destroy(ws->cman_vram);
|
||||
ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc);
|
||||
ws->cman_gtt->destroy(ws->cman_gtt);
|
||||
ws->cman_gtt_wc->destroy(ws->cman_gtt_wc);
|
||||
for (i = 0; i < RADEON_NUM_CACHE_MANAGERS; i++) {
|
||||
ws->cman_gtt[i]->destroy(ws->cman_gtt[i]);
|
||||
ws->cman_vram[i]->destroy(ws->cman_vram[i]);
|
||||
}
|
||||
ws->kman->destroy(ws->kman);
|
||||
if (ws->gen >= DRV_R600) {
|
||||
radeon_surface_manager_free(ws->surf_man);
|
||||
@@ -643,6 +642,7 @@ PUBLIC struct radeon_winsys *
|
||||
radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
||||
{
|
||||
struct radeon_drm_winsys *ws;
|
||||
int i;
|
||||
|
||||
pipe_mutex_lock(fd_tab_mutex);
|
||||
if (!fd_tab) {
|
||||
@@ -671,18 +671,16 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
||||
ws->kman = radeon_bomgr_create(ws);
|
||||
if (!ws->kman)
|
||||
goto fail;
|
||||
ws->cman_vram = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
|
||||
if (!ws->cman_vram)
|
||||
goto fail;
|
||||
ws->cman_vram_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
|
||||
if (!ws->cman_vram_gtt_wc)
|
||||
goto fail;
|
||||
ws->cman_gtt = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
|
||||
if (!ws->cman_gtt)
|
||||
goto fail;
|
||||
ws->cman_gtt_wc = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
|
||||
if (!ws->cman_gtt_wc)
|
||||
goto fail;
|
||||
|
||||
for (i = 0; i < RADEON_NUM_CACHE_MANAGERS; i++) {
|
||||
ws->cman_vram[i] = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
|
||||
if (!ws->cman_vram[i])
|
||||
goto fail;
|
||||
|
||||
ws->cman_gtt[i] = pb_cache_manager_create(ws->kman, 1000000, 2.0f, 0);
|
||||
if (!ws->cman_gtt[i])
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (ws->gen >= DRV_R600) {
|
||||
ws->surf_man = radeon_surface_manager_new(fd);
|
||||
@@ -737,14 +735,12 @@ radeon_drm_winsys_create(int fd, radeon_screen_create_t screen_create)
|
||||
|
||||
fail:
|
||||
pipe_mutex_unlock(fd_tab_mutex);
|
||||
if (ws->cman_gtt)
|
||||
ws->cman_gtt->destroy(ws->cman_gtt);
|
||||
if (ws->cman_gtt_wc)
|
||||
ws->cman_gtt_wc->destroy(ws->cman_gtt_wc);
|
||||
if (ws->cman_vram)
|
||||
ws->cman_vram->destroy(ws->cman_vram);
|
||||
if (ws->cman_vram_gtt_wc)
|
||||
ws->cman_vram_gtt_wc->destroy(ws->cman_vram_gtt_wc);
|
||||
for (i = 0; i < RADEON_NUM_CACHE_MANAGERS; i++) {
|
||||
if (ws->cman_gtt[i])
|
||||
ws->cman_gtt[i]->destroy(ws->cman_gtt[i]);
|
||||
if (ws->cman_vram[i])
|
||||
ws->cman_vram[i]->destroy(ws->cman_vram[i]);
|
||||
}
|
||||
if (ws->kman)
|
||||
ws->kman->destroy(ws->kman);
|
||||
if (ws->surf_man)
|
||||
|
@@ -41,6 +41,8 @@ enum radeon_generation {
|
||||
DRV_SI
|
||||
};
|
||||
|
||||
#define RADEON_NUM_CACHE_MANAGERS 8
|
||||
|
||||
struct radeon_drm_winsys {
|
||||
struct radeon_winsys base;
|
||||
struct pipe_reference reference;
|
||||
@@ -58,10 +60,8 @@ struct radeon_drm_winsys {
|
||||
uint32_t accel_working2;
|
||||
|
||||
struct pb_manager *kman;
|
||||
struct pb_manager *cman_vram;
|
||||
struct pb_manager *cman_vram_gtt_wc;
|
||||
struct pb_manager *cman_gtt;
|
||||
struct pb_manager *cman_gtt_wc;
|
||||
struct pb_manager *cman_vram[RADEON_NUM_CACHE_MANAGERS];
|
||||
struct pb_manager *cman_gtt[RADEON_NUM_CACHE_MANAGERS];
|
||||
struct radeon_surface_manager *surf_man;
|
||||
|
||||
uint32_t num_cpus; /* Number of CPUs. */
|
||||
|
@@ -49,12 +49,12 @@ ast_array_specifier::print(void) const
|
||||
* loc and state to report the error.
|
||||
*/
|
||||
static void
|
||||
update_max_array_access(ir_rvalue *ir, unsigned idx, YYLTYPE *loc,
|
||||
update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE *loc,
|
||||
struct _mesa_glsl_parse_state *state)
|
||||
{
|
||||
if (ir_dereference_variable *deref_var = ir->as_dereference_variable()) {
|
||||
ir_variable *var = deref_var->var;
|
||||
if (idx > var->data.max_array_access) {
|
||||
if (idx > (int)var->data.max_array_access) {
|
||||
var->data.max_array_access = idx;
|
||||
|
||||
/* Check whether this access will, as a side effect, implicitly cause
|
||||
@@ -88,7 +88,7 @@ update_max_array_access(ir_rvalue *ir, unsigned idx, YYLTYPE *loc,
|
||||
unsigned field_index =
|
||||
deref_record->record->type->field_index(deref_record->field);
|
||||
assert(field_index < interface_type->length);
|
||||
if (idx > deref_var->var->max_ifc_array_access[field_index]) {
|
||||
if (idx > (int)deref_var->var->max_ifc_array_access[field_index]) {
|
||||
deref_var->var->max_ifc_array_access[field_index] = idx;
|
||||
|
||||
/* Check whether this access will, as a side effect, implicitly
|
||||
|
@@ -3760,7 +3760,7 @@ ast_declarator_list::hir(exec_list *instructions,
|
||||
earlier->data.how_declared == ir_var_declared_in_block) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"`%s' has already been redeclared using "
|
||||
"gl_PerVertex", var->name);
|
||||
"gl_PerVertex", earlier->name);
|
||||
}
|
||||
earlier->data.how_declared = ir_var_declared_normally;
|
||||
}
|
||||
@@ -5674,17 +5674,21 @@ ast_interface_block::hir(exec_list *instructions,
|
||||
|
||||
var->data.stream = this->layout.stream;
|
||||
|
||||
/* Examine var name here since var may get deleted in the next call */
|
||||
bool var_is_gl_id = is_gl_identifier(var->name);
|
||||
|
||||
if (redeclaring_per_vertex) {
|
||||
ir_variable *earlier =
|
||||
get_variable_being_redeclared(var, loc, state,
|
||||
true /* allow_all_redeclarations */);
|
||||
if (!is_gl_identifier(var->name) || earlier == NULL) {
|
||||
if (!var_is_gl_id || earlier == NULL) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"redeclaration of gl_PerVertex can only "
|
||||
"include built-in variables");
|
||||
} else if (earlier->data.how_declared == ir_var_declared_normally) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"`%s' has already been redeclared", var->name);
|
||||
"`%s' has already been redeclared",
|
||||
earlier->name);
|
||||
} else {
|
||||
earlier->data.how_declared = ir_var_declared_in_block;
|
||||
earlier->reinit_interface_type(block_type);
|
||||
|
@@ -678,12 +678,17 @@ glsl_type::component_slots() const
|
||||
unsigned
|
||||
glsl_type::uniform_locations() const
|
||||
{
|
||||
if (this->is_matrix())
|
||||
return 1;
|
||||
|
||||
unsigned size = 0;
|
||||
|
||||
switch (this->base_type) {
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_FLOAT:
|
||||
case GLSL_TYPE_BOOL:
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
case GLSL_TYPE_IMAGE:
|
||||
return 1;
|
||||
|
||||
case GLSL_TYPE_STRUCT:
|
||||
case GLSL_TYPE_INTERFACE:
|
||||
for (unsigned i = 0; i < this->length; i++)
|
||||
@@ -692,13 +697,8 @@ glsl_type::uniform_locations() const
|
||||
case GLSL_TYPE_ARRAY:
|
||||
return this->length * this->fields.array->uniform_locations();
|
||||
default:
|
||||
break;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* The location count for many types match with component_slots() result,
|
||||
* all expections should be handled above.
|
||||
*/
|
||||
return component_slots();
|
||||
}
|
||||
|
||||
bool
|
||||
|
@@ -279,6 +279,9 @@ struct glsl_type {
|
||||
/**
|
||||
* Calculate the number of unique values from glGetUniformLocation for the
|
||||
* elements of the type.
|
||||
*
|
||||
* This is used to allocate slots in the UniformRemapTable, the amount of
|
||||
* locations may not match with actual used storage space by the driver.
|
||||
*/
|
||||
unsigned uniform_locations() const;
|
||||
|
||||
|
@@ -295,7 +295,7 @@ ir_array_splitting_visitor::split_deref(ir_dereference **deref)
|
||||
ir_constant *constant = deref_array->array_index->as_constant();
|
||||
assert(constant);
|
||||
|
||||
if (constant->value.i[0] < (int)entry->size) {
|
||||
if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) {
|
||||
*deref = new(entry->mem_ctx)
|
||||
ir_dereference_variable(entry->components[constant->value.i[0]]);
|
||||
} else {
|
||||
|
@@ -221,6 +221,7 @@ DRI_glXUseXFont(struct glx_context *CC, Font font, int first, int count, int lis
|
||||
XGCValues values;
|
||||
unsigned long valuemask;
|
||||
XFontStruct *fs;
|
||||
__GLXDRIdrawable *glxdraw;
|
||||
|
||||
GLint swapbytes, lsbfirst, rowlength;
|
||||
GLint skiprows, skippixels, alignment;
|
||||
@@ -233,6 +234,10 @@ DRI_glXUseXFont(struct glx_context *CC, Font font, int first, int count, int lis
|
||||
dpy = CC->currentDpy;
|
||||
win = CC->currentDrawable;
|
||||
|
||||
glxdraw = GetGLXDRIDrawable(CC->currentDpy, CC->currentDrawable);
|
||||
if (glxdraw)
|
||||
win = glxdraw->xDrawable;
|
||||
|
||||
fs = XQueryFont(dpy, font);
|
||||
if (!fs) {
|
||||
__glXSetError(CC, GL_INVALID_VALUE);
|
||||
|
@@ -2312,9 +2312,13 @@ enum brw_wm_barycentric_interp_mode {
|
||||
#define HSW_MOCS_WB_LLC_WB_ELLC (2 << 1)
|
||||
#define HSW_MOCS_UC_LLC_WB_ELLC (3 << 1)
|
||||
|
||||
/* Broadwell: write-back or write-through; always use all the caches. */
|
||||
#define BDW_MOCS_WB 0x78
|
||||
#define BDW_MOCS_WT 0x58
|
||||
/* Broadwell: these defines always use all available caches (L3, LLC, eLLC),
|
||||
* and let you force write-back (WB) or write-through (WT) caching, or leave
|
||||
* it up to the page table entry (PTE) specified by the kernel.
|
||||
*/
|
||||
#define BDW_MOCS_WB 0x78
|
||||
#define BDW_MOCS_WT 0x58
|
||||
#define BDW_MOCS_PTE 0x18
|
||||
|
||||
#include "intel_chipset.h"
|
||||
|
||||
|
@@ -377,7 +377,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
|
||||
horizontal_alignment(mt) |
|
||||
surface_tiling_mode(tiling);
|
||||
|
||||
surf[1] = SET_FIELD(BDW_MOCS_WT, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
|
||||
surf[1] = SET_FIELD(BDW_MOCS_PTE, GEN8_SURFACE_MOCS) | mt->qpitch >> 2;
|
||||
|
||||
surf[2] = SET_FIELD(width - 1, GEN7_SURFACE_WIDTH) |
|
||||
SET_FIELD(height - 1, GEN7_SURFACE_HEIGHT);
|
||||
|
@@ -87,6 +87,7 @@ can_do_pipelined_register_writes(struct brw_context *brw)
|
||||
|
||||
/* Check whether the value got written. */
|
||||
drm_intel_bo_map(brw->batch.workaround_bo, false);
|
||||
data = brw->batch.workaround_bo->virtual;
|
||||
bool success = data[offset] == expected_value;
|
||||
drm_intel_bo_unmap(brw->batch.workaround_bo);
|
||||
|
||||
@@ -145,6 +146,7 @@ can_write_oacontrol(struct brw_context *brw)
|
||||
|
||||
/* Check whether the value got written. */
|
||||
drm_intel_bo_map(brw->batch.workaround_bo, false);
|
||||
data = brw->batch.workaround_bo->virtual;
|
||||
bool success = data[offset] == expected_value;
|
||||
drm_intel_bo_unmap(brw->batch.workaround_bo);
|
||||
|
||||
|
@@ -189,6 +189,9 @@ nouveau_context_init(struct gl_context *ctx, gl_api api,
|
||||
ctx->Extensions.NV_texture_env_combine4 = true;
|
||||
ctx->Const.MaxColorAttachments = 1;
|
||||
|
||||
/* This effectively disables 3D textures */
|
||||
ctx->Const.Max3DTextureLevels = 1;
|
||||
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
|
@@ -896,7 +896,21 @@ _mesa_generic_nop(void)
|
||||
|
||||
|
||||
/**
|
||||
* Allocate and initialize a new dispatch table.
|
||||
* Special no-op glFlush, see below.
|
||||
*/
|
||||
#if defined(_WIN32)
|
||||
static void GLAPIENTRY
|
||||
nop_glFlush(void)
|
||||
{
|
||||
/* don't record an error like we do in _mesa_generic_nop() */
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/**
|
||||
* Allocate and initialize a new dispatch table. All the dispatch
|
||||
* function pointers will point at the _mesa_generic_nop() function
|
||||
* which raises GL_INVALID_OPERATION.
|
||||
*/
|
||||
struct _glapi_table *
|
||||
_mesa_alloc_dispatch_table()
|
||||
@@ -916,6 +930,26 @@ _mesa_alloc_dispatch_table()
|
||||
for (i = 0; i < numEntries; i++) {
|
||||
entry[i] = (_glapi_proc) _mesa_generic_nop;
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
/* This is a special case for Windows in the event that
|
||||
* wglGetProcAddress is called between glBegin/End().
|
||||
*
|
||||
* The MS opengl32.dll library apparently calls glFlush from
|
||||
* wglGetProcAddress(). If we're inside glBegin/End(), glFlush
|
||||
* will dispatch to _mesa_generic_nop() and we'll generate a
|
||||
* GL_INVALID_OPERATION error.
|
||||
*
|
||||
* The specific case which hits this is piglit's primitive-restart
|
||||
* test which calls glPrimitiveRestartNV() inside glBegin/End. The
|
||||
* first time we call glPrimitiveRestartNV() Piglit's API dispatch
|
||||
* code will try to resolve the function by calling wglGetProcAddress.
|
||||
* This raises GL_INVALID_OPERATION and an assert(glGetError()==0)
|
||||
* will fail causing the test to fail. By suppressing the error, the
|
||||
* assertion passes and the test continues.
|
||||
*/
|
||||
SET_Flush(table, nop_glFlush);
|
||||
#endif
|
||||
}
|
||||
return table;
|
||||
}
|
||||
|
@@ -144,10 +144,10 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256];
|
||||
/* This function/macro is sensitive to precision. Test very carefully
|
||||
* if you change it!
|
||||
*/
|
||||
#define UNCLAMPED_FLOAT_TO_UBYTE(UB, F) \
|
||||
#define UNCLAMPED_FLOAT_TO_UBYTE(UB, FLT) \
|
||||
do { \
|
||||
fi_type __tmp; \
|
||||
__tmp.f = (F); \
|
||||
__tmp.f = (FLT); \
|
||||
if (__tmp.i < 0) \
|
||||
UB = (GLubyte) 0; \
|
||||
else if (__tmp.i >= IEEE_ONE) \
|
||||
@@ -157,10 +157,10 @@ extern GLfloat _mesa_ubyte_to_float_color_tab[256];
|
||||
UB = (GLubyte) __tmp.i; \
|
||||
} \
|
||||
} while (0)
|
||||
#define CLAMPED_FLOAT_TO_UBYTE(UB, F) \
|
||||
#define CLAMPED_FLOAT_TO_UBYTE(UB, FLT) \
|
||||
do { \
|
||||
fi_type __tmp; \
|
||||
__tmp.f = (F) * (255.0F/256.0F) + 32768.0F; \
|
||||
__tmp.f = (FLT) * (255.0F/256.0F) + 32768.0F; \
|
||||
UB = (GLubyte) __tmp.i; \
|
||||
} while (0)
|
||||
#else
|
||||
|
@@ -78,8 +78,8 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions,
|
||||
struct gl_texture_image *texImage)
|
||||
{
|
||||
const GLint width = texImage->Width;
|
||||
const GLint height = texImage->Height;
|
||||
const GLint depth = texImage->Depth;
|
||||
GLint height = texImage->Height;
|
||||
GLint depth = texImage->Depth;
|
||||
GLint img, row;
|
||||
GLfloat *depthRow = malloc(width * sizeof(GLfloat));
|
||||
|
||||
@@ -88,6 +88,11 @@ get_tex_depth(struct gl_context *ctx, GLuint dimensions,
|
||||
return;
|
||||
}
|
||||
|
||||
if (texImage->TexObject->Target == GL_TEXTURE_1D_ARRAY) {
|
||||
depth = height;
|
||||
height = 1;
|
||||
}
|
||||
|
||||
for (img = 0; img < depth; img++) {
|
||||
GLubyte *srcMap;
|
||||
GLint srcRowStride;
|
||||
|
@@ -226,6 +226,13 @@ validate_uniform_parameters(struct gl_context *ctx,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Check that the given location is in bounds of uniform remap table. */
|
||||
if (location >= (GLint) shProg->NumUniformRemapTable) {
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
|
||||
caller, location);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Page 82 (page 96 of the PDF) of the OpenGL 2.1 spec says:
|
||||
*
|
||||
* "If any of the following conditions occur, an INVALID_OPERATION
|
||||
@@ -239,19 +246,12 @@ validate_uniform_parameters(struct gl_context *ctx,
|
||||
* - if count is greater than one, and the uniform declared in the
|
||||
* shader is not an array variable,
|
||||
*/
|
||||
if (location < -1) {
|
||||
if (location < -1 || !shProg->UniformRemapTable[location]) {
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
|
||||
caller, location);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Check that the given location is in bounds of uniform remap table. */
|
||||
if (location >= (GLint) shProg->NumUniformRemapTable) {
|
||||
_mesa_error(ctx, GL_INVALID_OPERATION, "%s(location=%d)",
|
||||
caller, location);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* If the driver storage pointer in remap table is -1, we ignore silently.
|
||||
*
|
||||
* GL_ARB_explicit_uniform_location spec says:
|
||||
|
@@ -96,6 +96,7 @@ st_bind_surface(struct gl_context *ctx, GLenum target,
|
||||
struct gl_texture_image *texImage,
|
||||
struct pipe_surface *ps)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
struct st_texture_object *stObj;
|
||||
struct st_texture_image *stImage;
|
||||
GLenum internalFormat;
|
||||
@@ -124,7 +125,7 @@ st_bind_surface(struct gl_context *ctx, GLenum target,
|
||||
|
||||
/* FIXME create a non-default sampler view from the pipe_surface? */
|
||||
pipe_resource_reference(&stObj->pt, ps->texture);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
pipe_resource_reference(&stImage->pt, stObj->pt);
|
||||
|
||||
stObj->width0 = ps->width;
|
||||
|
@@ -152,10 +152,11 @@ static void
|
||||
st_DeleteTextureObject(struct gl_context *ctx,
|
||||
struct gl_texture_object *texObj)
|
||||
{
|
||||
struct st_context *st = st_context(ctx);
|
||||
struct st_texture_object *stObj = st_texture_object(texObj);
|
||||
|
||||
pipe_resource_reference(&stObj->pt, NULL);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
st_texture_free_sampler_views(stObj);
|
||||
_mesa_delete_texture_object(ctx, texObj);
|
||||
}
|
||||
@@ -512,7 +513,7 @@ st_AllocTextureImageBuffer(struct gl_context *ctx,
|
||||
/* The parent texture object does not have space for this image */
|
||||
|
||||
pipe_resource_reference(&stObj->pt, NULL);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
|
||||
if (!guess_and_alloc_texture(st, stObj, stImage)) {
|
||||
/* Probably out of memory.
|
||||
@@ -1564,13 +1565,13 @@ st_finalize_texture(struct gl_context *ctx,
|
||||
|
||||
if (!st_obj) {
|
||||
pipe_resource_reference(&stObj->pt, NULL);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
if (st_obj->buffer != stObj->pt) {
|
||||
pipe_resource_reference(&stObj->pt, st_obj->buffer);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
stObj->width0 = stObj->pt->width0 / _mesa_get_format_bytes(tObj->_BufferObjectFormat);
|
||||
stObj->height0 = 1;
|
||||
stObj->depth0 = 1;
|
||||
@@ -1591,7 +1592,7 @@ st_finalize_texture(struct gl_context *ctx,
|
||||
firstImage->pt != stObj->pt &&
|
||||
(!stObj->pt || firstImage->pt->last_level >= stObj->pt->last_level)) {
|
||||
pipe_resource_reference(&stObj->pt, firstImage->pt);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
}
|
||||
|
||||
/* If this texture comes from a window system, there is nothing else to do. */
|
||||
@@ -1639,7 +1640,7 @@ st_finalize_texture(struct gl_context *ctx,
|
||||
* gallium texture now. We'll make a new one below.
|
||||
*/
|
||||
pipe_resource_reference(&stObj->pt, NULL);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
st->dirty.st |= ST_NEW_FRAMEBUFFER;
|
||||
}
|
||||
}
|
||||
|
@@ -237,8 +237,7 @@ void st_init_limits(struct pipe_screen *screen,
|
||||
|
||||
if (options->EmitNoLoops)
|
||||
options->MaxUnrollIterations = MIN2(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INSTRUCTIONS), 65536);
|
||||
else
|
||||
options->MaxUnrollIterations = 255; /* SM3 limit */
|
||||
|
||||
options->LowerClipDistance = true;
|
||||
}
|
||||
|
||||
|
@@ -124,7 +124,7 @@ st_generate_mipmap(struct gl_context *ctx, GLenum target,
|
||||
|
||||
/* release the old tex (will likely be freed too) */
|
||||
pipe_resource_reference(&oldTex, NULL);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
}
|
||||
else {
|
||||
/* Make sure that the base texture image data is present in the
|
||||
|
@@ -4818,15 +4818,19 @@ emit_wpos(struct st_context *st,
|
||||
* saturating the value to [0,1] does the job.
|
||||
*/
|
||||
static void
|
||||
emit_face_var(struct st_translate *t)
|
||||
emit_face_var(struct gl_context *ctx, struct st_translate *t)
|
||||
{
|
||||
struct ureg_program *ureg = t->ureg;
|
||||
struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
|
||||
struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
|
||||
|
||||
/* MOV_SAT face_temp, input[face] */
|
||||
face_temp = ureg_saturate(face_temp);
|
||||
ureg_MOV(ureg, face_temp, face_input);
|
||||
if (ctx->Const.NativeIntegers) {
|
||||
ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
|
||||
}
|
||||
else {
|
||||
/* MOV_SAT face_temp, input[face] */
|
||||
ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
|
||||
}
|
||||
|
||||
/* Use face_temp as face input from here on: */
|
||||
t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
|
||||
@@ -4946,7 +4950,7 @@ st_translate_program(
|
||||
}
|
||||
|
||||
if (proginfo->InputsRead & VARYING_BIT_FACE)
|
||||
emit_face_var(t);
|
||||
emit_face_var(ctx, t);
|
||||
|
||||
/*
|
||||
* Declare output attributes.
|
||||
|
@@ -507,12 +507,14 @@ st_texture_release_sampler_view(struct st_context *st,
|
||||
}
|
||||
|
||||
void
|
||||
st_texture_release_all_sampler_views(struct st_texture_object *stObj)
|
||||
st_texture_release_all_sampler_views(struct st_context *st,
|
||||
struct st_texture_object *stObj)
|
||||
{
|
||||
GLuint i;
|
||||
|
||||
/* XXX This should use sampler_views[i]->pipe, not st->pipe */
|
||||
for (i = 0; i < stObj->num_sampler_views; ++i)
|
||||
pipe_sampler_view_reference(&stObj->sampler_views[i], NULL);
|
||||
pipe_sampler_view_release(st->pipe, &stObj->sampler_views[i]);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -255,7 +255,8 @@ st_texture_release_sampler_view(struct st_context *st,
|
||||
struct st_texture_object *stObj);
|
||||
|
||||
extern void
|
||||
st_texture_release_all_sampler_views(struct st_texture_object *stObj);
|
||||
st_texture_release_all_sampler_views(struct st_context *st,
|
||||
struct st_texture_object *stObj);
|
||||
|
||||
void
|
||||
st_texture_free_sampler_views(struct st_texture_object *stObj);
|
||||
|
@@ -139,7 +139,7 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
|
||||
texFormat);
|
||||
|
||||
pipe_resource_reference(&stObj->pt, res);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
pipe_resource_reference(&stImage->pt, res);
|
||||
|
||||
u_sampler_view_default_template(&templ, res, res->format);
|
||||
@@ -172,7 +172,7 @@ st_vdpau_unmap_surface(struct gl_context *ctx, GLenum target, GLenum access,
|
||||
struct st_texture_image *stImage = st_texture_image(texImage);
|
||||
|
||||
pipe_resource_reference(&stObj->pt, NULL);
|
||||
st_texture_release_all_sampler_views(stObj);
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
pipe_resource_reference(&stImage->pt, NULL);
|
||||
|
||||
_mesa_dirty_texobj(ctx, texObj);
|
||||
|
Reference in New Issue
Block a user