Compare commits
155 Commits
mesa-20.0.
...
mesa-11.1.
Author | SHA1 | Date | |
---|---|---|---|
|
5a616125ac | ||
|
a8b2698494 | ||
|
7753691f1a | ||
|
ce914d941d | ||
|
300f807649 | ||
|
61a275b789 | ||
|
0f3892ed9d | ||
|
3d942ee4e5 | ||
|
efdf841238 | ||
|
5913a8c9ec | ||
|
3c9e76fc24 | ||
|
67b1e7b947 | ||
|
0586c5844f | ||
|
7d226ee279 | ||
|
36ff210d0e | ||
|
017f4755fd | ||
|
61cb4db868 | ||
|
34785fb7b9 | ||
|
22d6bf5078 | ||
|
9908d19699 | ||
|
a500109aad | ||
|
0e78a67709 | ||
|
4bb16d712a | ||
|
950e9886d0 | ||
|
dff89432d8 | ||
|
499d409a20 | ||
|
a16f5195ef | ||
|
f65b790089 | ||
|
aa19234943 | ||
|
66f216d8ce | ||
|
4d34038ae5 | ||
|
781a68555d | ||
|
e0b11bcc87 | ||
|
359679cb33 | ||
|
fcf6091521 | ||
|
aa5082b135 | ||
|
1df00e17d3 | ||
|
ad3df9d168 | ||
|
e4cf550501 | ||
|
ecf2885d7f | ||
|
fc59ca4064 | ||
|
396fbdc721 | ||
|
50ac2100df | ||
|
08cf0f8529 | ||
|
ba51596b1d | ||
|
3d13bb8851 | ||
|
3bf2c6b96a | ||
|
5ab1bb4bec | ||
|
c5ca18ec2f | ||
|
f6cca7a0c9 | ||
|
ae649bf1ad | ||
|
132303cfe4 | ||
|
9df2431194 | ||
|
dd409e2a41 | ||
|
38c770ec29 | ||
|
d8450616d9 | ||
|
c9fe9e4b42 | ||
|
693e938321 | ||
|
7a0661839b | ||
|
4c234d183b | ||
|
b37189523e | ||
|
20db46c227 | ||
|
b2a5efb56f | ||
|
38c645b60a | ||
|
2dff4c6fa7 | ||
|
d81ddb3ed8 | ||
|
c25c1dbf51 | ||
|
bed982c4b7 | ||
|
dcaf3989d1 | ||
|
996a4958da | ||
|
0cf5a8159f | ||
|
6cc9a53d84 | ||
|
0a51e77fa1 | ||
|
ca6d0a3dbe | ||
|
4ae9142f8b | ||
|
aff9f8a6f7 | ||
|
b0b163c82a | ||
|
f70574c835 | ||
|
26dff8a7bb | ||
|
ea21336d15 | ||
|
7f6e9c5f59 | ||
|
0828391a34 | ||
|
75b6f14ab8 | ||
|
69df6ac272 | ||
|
0f53b2010c | ||
|
67be605b96 | ||
|
be20f1d7c1 | ||
|
15344c978b | ||
|
f1bb27acc5 | ||
|
dd37db0c80 | ||
|
8e3fbb90a9 | ||
|
79f3aaca4f | ||
|
f9a2bd212a | ||
|
aefd6769e8 | ||
|
82a363b851 | ||
|
b3183c81c4 | ||
|
f5e508649d | ||
|
31546c0e8f | ||
|
6b149bedc3 | ||
|
7a4ba7bfad | ||
|
ef6769f18f | ||
|
a71db1c46e | ||
|
88cd21fefb | ||
|
97d4954f3f | ||
|
3d525c8650 | ||
|
9b9fff6830 | ||
|
3d09bede30 | ||
|
aad5c7d1ca | ||
|
323161333c | ||
|
80febef0ad | ||
|
cf70584907 | ||
|
96e1bf8791 | ||
|
34521c2840 | ||
|
6a71090002 | ||
|
88fd679706 | ||
|
bb7a1ee11f | ||
|
7a41162b45 | ||
|
5e853a4f01 | ||
|
2e073938d0 | ||
|
30e1c390b3 | ||
|
72e51e5dfa | ||
|
902378d6c8 | ||
|
f6f127b597 | ||
|
a2f2329cdd | ||
|
642b66291c | ||
|
06c3ed8d21 | ||
|
cfbb08168a | ||
|
43b0b8a9a3 | ||
|
85b6f905e1 | ||
|
6a6326dcd4 | ||
|
17a64701cb | ||
|
cb4f6e2a30 | ||
|
8c564f0376 | ||
|
3d2bf5a5f5 | ||
|
d1b7a1f5af | ||
|
9c2a7cfbbf | ||
|
089fa07dee | ||
|
7ebc8c36a0 | ||
|
79468fac69 | ||
|
756e323f2c | ||
|
507732ea3d | ||
|
01909c1f29 | ||
|
76b155c9cd | ||
|
4a5c29d877 | ||
|
bf3f0b9e9b | ||
|
e097324fee | ||
|
aa607c69af | ||
|
72470a9c37 | ||
|
de299e1e2e | ||
|
ded66b1451 | ||
|
b7b4104a7f | ||
|
236fb067a5 | ||
|
2d9093fdf0 | ||
|
d757c04215 | ||
|
f9339359d5 |
@@ -767,6 +767,11 @@ linux*)
|
||||
dri3_default=no
|
||||
;;
|
||||
esac
|
||||
|
||||
if test "x$enable_dri" = xno; then
|
||||
dri3_default=no
|
||||
fi
|
||||
|
||||
AC_ARG_ENABLE([dri3],
|
||||
[AS_HELP_STRING([--enable-dri3],
|
||||
[enable DRI3 @<:@default=auto@:>@])],
|
||||
@@ -2173,7 +2178,9 @@ if test -n "$with_gallium_drivers"; then
|
||||
gallium_require_drm_loader
|
||||
|
||||
PKG_CHECK_MODULES([SIMPENROSE], [simpenrose],
|
||||
[USE_VC4_SIMULATOR=yes], [USE_VC4_SIMULATOR=no])
|
||||
[USE_VC4_SIMULATOR=yes;
|
||||
DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"],
|
||||
[USE_VC4_SIMULATOR=no])
|
||||
;;
|
||||
xvirgl)
|
||||
HAVE_GALLIUM_VIRGL=yes
|
||||
|
@@ -238,6 +238,12 @@ for details.
|
||||
</ul>
|
||||
|
||||
|
||||
<h3>VA-API state tracker environment variables</h3>
|
||||
<ul>
|
||||
<li>VAAPI_MPEG4_ENABLED - enable MPEG4 for VA-API, disabled by default.
|
||||
</ul>
|
||||
|
||||
|
||||
<p>
|
||||
Other Gallium drivers have their own environment variables. These may change
|
||||
frequently so the source code should be consulted for details.
|
||||
|
@@ -14,7 +14,7 @@
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.1.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 11.1.0 Release Notes / 15 December 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.1.0 is a new development release.
|
||||
@@ -51,14 +51,20 @@ Note: some of the new features are only available with certain drivers.
|
||||
<li>GL_ARB_arrays_of_arrays on i965</li>
|
||||
<li>GL_ARB_blend_func_extended on freedreno (a3xx)</li>
|
||||
<li>GL_ARB_clear_texture on nv50, nvc0</li>
|
||||
<li>GL_ARB_clip_control on freedreno/a4xx</li>
|
||||
<li>GL_ARB_copy_image on nv50, nvc0, radeonsi</li>
|
||||
<li>GL_ARB_depth_clamp on freedreno/a4xx</li>
|
||||
<li>GL_ARB_fragment_layer_viewport on i965 (gen6+)</li>
|
||||
<li>GL_ARB_gpu_shader_fp64 on r600 for Cypress/Cayman/Aruba chips</li>
|
||||
<li>GL_ARB_gpu_shader5 on r600 for Evergreen and later chips</li>
|
||||
<li>GL_ARB_seamless_cubemap_per_texture on freedreno/a4xx</li>
|
||||
<li>GL_ARB_shader_clock on i965 (gen7+)</li>
|
||||
<li>GL_ARB_shader_stencil_export on i965 (gen9+)</li>
|
||||
<li>GL_ARB_shader_storage_buffer_object on i965</li>
|
||||
<li>GL_ARB_shader_texture_image_samples on i965, nv50, nvc0, r600, radeonsi</li>
|
||||
<li>GL_ARB_texture_barrier / GL_NV_texture_barrier on i965</li>
|
||||
<li>GL_ARB_texture_buffer_range on freedreno/a3xx</li>
|
||||
<li>GL_ARB_texture_compression_bptc on freedreno/a4xx</li>
|
||||
<li>GL_ARB_texture_query_lod on softpipe</li>
|
||||
<li>GL_ARB_texture_view on radeonsi and r600 (for evergeen and newer)</li>
|
||||
<li>GL_ARB_vertex_type_2_10_10_10_rev on freedreno (a3xx, a4xx)</li>
|
||||
@@ -78,11 +84,196 @@ Note: some of the new features are only available with certain drivers.
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
TBD.
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=28130">Bug 28130</a> - vbo: premature flushing breaks GL_LINE_LOOP</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=49779">Bug 49779</a> - Extra line segments in GL_LINE_LOOP</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71789">Bug 71789</a> - [r300g] Visuals not found in (default) depth = 24</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=79783">Bug 79783</a> - Distorted output in obs-studio where other vendors "work"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80821">Bug 80821</a> - When LIBGL_ALWAYS_SOFTWARE is set, KHR_create_context is not supported</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=81174">Bug 81174</a> - Gallium: GL_LINE_LOOP broken with more than 512 points</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=83508">Bug 83508</a> - [UBO] Assertion for array of blocks</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw@entry=0x7fffd4097a08, fb=fb@entry=0x7fffd40fa900, buffers=buffers@entry=2, partial_clear=partial_clear@entry=false)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86469">Bug 86469</a> - Unreal Engine demo doesn't run</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89014">Bug 89014</a> - PIPE_QUERY_GPU_FINISHED is not acting as expected on SI</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90175">Bug 90175</a> - [hsw bisected][PATCH] atomic counters doesn't work for a binding point different to zero</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90348">Bug 90348</a> - Spilling failure of b96 merged value</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90631">Bug 90631</a> - Compilation failure for fragment shader with many branches on Sandy Bridge</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is > 32k</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91044">Bug 91044</a> - piglit spec/egl_khr_create_context/valid debug flag gles* fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91718">Bug 91718</a> - piglit.spec.arb_shader_image_load_store.invalid causes intermittent GPU HANG</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91780">Bug 91780</a> - Rendering issues with geometry shader</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91785">Bug 91785</a> - make check DispatchSanity_test.GLES31 regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image & DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91898">Bug 91898</a> - src/util/mesa-sha1.c:250:25: fatal error: openssl/sha.h: No such file or directory</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91927">Bug 91927</a> - [SKL] [regression] piglit compressed textures tests fail with kernel upgrade</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91930">Bug 91930</a> - Program with GtkGLArea widget does not redraw</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91985">Bug 91985</a> - [regression, bisected] FTBFS with commit f9caabe8f1: R600_UCP_CONST_BUFFER is undefined</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91993">Bug 91993</a> - Graphical glitch in Astromenace (open-source game).</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92033">Bug 92033</a> - [SNB,regression,dEQP,bisected] functional.shaders.random tests regressed</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92052">Bug 92052</a> - nir/nir_builder.h:79: error: expected primary-expression before ‘.’ token</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92054">Bug 92054</a> - make check gbm-symbols-check regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92066">Bug 92066</a> - [ILK,G45,regression] New assertion on BRW_MAX_MRF breaks ilk and g45</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92122">Bug 92122</a> - [bisected, cts] Regression with Assault Android Cactus</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92124">Bug 92124</a> - shader_query.cpp:841:34: error: ‘strndup’ was not declared in this scope</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92183">Bug 92183</a> - linker.cpp:3187:46: error: ‘strtok_r’ was not declared in this scope</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92193">Bug 92193</a> - [SKL] ES2-CTS.gtf.GL2ExtensionTests.compressed_astc_texture.compressed_astc_texture fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92214">Bug 92214</a> - Flightgear crashes during splashboot with R600 driver, LLVM 3.7.0 and mesa 11.0.2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92221">Bug 92221</a> - Unintended code changes in _mesa_base_tex_format commit</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92265">Bug 92265</a> - Black windows in weston after update mesa to 11.0.2-1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92363">Bug 92363</a> - [BSW/BDW] ogles1conform Gets test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92437">Bug 92437</a> - osmesa: Expose GL entry points for Windows build, via .def file</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92438">Bug 92438</a> - Segfault in pushbuf_kref when running the android emulator (qemu) on nv50</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92476">Bug 92476</a> - [cts] ES2-CTS.gtf.GL2ExtensionTests.egl_image.egl_image fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92588">Bug 92588</a> - [HSW,BDW,BSW,SKL-Y][GLES 3.1 CTS] ES31-CTS.arrays_of_arrays.InteractionFunctionCalls2 - assert</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92621">Bug 92621</a> - [G965 ILK G45] Regression: 24 piglit regressions in glsl-1.10</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92623">Bug 92623</a> - Differences in prog_data ignored when caching fragment programs (causes hangs)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92634">Bug 92634</a> - gallium's vl_mpeg12_decoder does not work with st/va</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92639">Bug 92639</a> - [Regression bisected] Ogles1conform mustpass.c fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92641">Bug 92641</a> - [SKL BSW] [Regression] Ogles1conform userclip.c fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92645">Bug 92645</a> - kodi vdpau interop fails since mesa,meta: move gl_texture_object::TargetIndex initializations</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92705">Bug 92705</a> - [clover] fail to build with llvm-svn/clang-svn 3.8</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92709">Bug 92709</a> - "LLVM triggered Diagnostic Handler: unsupported call to function ldexpf in main" when starting race in stuntrally</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92738">Bug 92738</a> - Randon R7 240 doesn't work on 16KiB page size platform</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92744">Bug 92744</a> - [g965 Regression bisected] Performance regression and piglit assertions due to liveness analysis</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92770">Bug 92770</a> - [SNB, regression, dEQP] deqp-gles3.functional.shaders.discard.dynamic_loop_texture</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92824">Bug 92824</a> - [regression, bisected] `make check` dispatch-sanity broken by GL_EXT_buffer_storage</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92859">Bug 92859</a> - [regression, bisected] validate_intrinsic_instr: Assertion triggered</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92860">Bug 92860</a> - [radeonsi][bisected] st/mesa: implement ARB_copy_image - Corruption in ARK Survival Evolved</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92900">Bug 92900</a> - [regression bisected] About 700 piglit regressions is what could go wrong</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92985">Bug 92985</a> - Mac OS X build error "ar: no archive members specified"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93015">Bug 93015</a> - Tonga Elemental segfault + VM faults since radeon: implement r600_query_hw_get_result via function pointers</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93048">Bug 93048</a> - [CTS regression] mesa af2723 breaks GL Conformance for debug extension</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93063">Bug 93063</a> - drm_helper.h:227:1: error: static declaration of ‘pipe_virgl_create_screen’ follows non-static declaration</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93091">Bug 93091</a> - [opencl] segfault when running any opencl programs (like clinfo)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93126">Bug 93126</a> - wrongly claim supporting GL_EXT_texture_rg</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93180">Bug 93180</a> - [regression] arb_separate_shader_objects.active sampler conflict fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93235">Bug 93235</a> - [regression] dispatch sanity broken by GetPointerv</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
TBD.
|
||||
<li>MPEG4 decoding has been disabled by default in the VAAPI driver</li>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -37,12 +37,12 @@ libpipe_loader_static_la_SOURCES += \
|
||||
libpipe_loader_dynamic_la_SOURCES += \
|
||||
$(DRM_SOURCES)
|
||||
|
||||
endif
|
||||
|
||||
libpipe_loader_static_la_LIBADD = \
|
||||
$(top_builddir)/src/loader/libloader.la
|
||||
|
||||
libpipe_loader_dynamic_la_LIBADD = \
|
||||
$(top_builddir)/src/loader/libloader.la
|
||||
|
||||
endif
|
||||
|
||||
EXTRA_DIST = SConscript
|
||||
|
@@ -94,6 +94,18 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
|
||||
.create_screen = pipe_i915_create_screen,
|
||||
.configuration = configuration_query,
|
||||
},
|
||||
#ifdef USE_VC4_SIMULATOR
|
||||
/* VC4 simulator and ILO (i965) are mutually exclusive (error at
|
||||
* configure). As the latter is unconditionally added, keep this one above
|
||||
* it.
|
||||
*/
|
||||
{
|
||||
.name = "i965",
|
||||
.driver_name = "vc4",
|
||||
.create_screen = pipe_vc4_create_screen,
|
||||
.configuration = configuration_query,
|
||||
},
|
||||
#endif
|
||||
{
|
||||
.name = "i965",
|
||||
.driver_name = "i915",
|
||||
@@ -154,14 +166,6 @@ static const struct drm_driver_descriptor driver_descriptors[] = {
|
||||
.create_screen = pipe_vc4_create_screen,
|
||||
.configuration = configuration_query,
|
||||
},
|
||||
#ifdef USE_VC4_SIMULATOR
|
||||
{
|
||||
.name = "i965",
|
||||
.driver_name = "vc4",
|
||||
.create_screen = pipe_vc4_create_screen,
|
||||
.configuration = configuration_query,
|
||||
},
|
||||
#endif
|
||||
};
|
||||
#endif
|
||||
|
||||
|
@@ -33,9 +33,10 @@
|
||||
#include "sw/kms-dri/kms_dri_sw_winsys.h"
|
||||
#include "sw/null/null_sw_winsys.h"
|
||||
#include "sw/wrapper/wrapper_sw_winsys.h"
|
||||
#include "target-helpers/inline_sw_helper.h"
|
||||
#include "target-helpers/sw_helper_public.h"
|
||||
#include "state_tracker/drisw_api.h"
|
||||
#include "state_tracker/sw_driver.h"
|
||||
#include "state_tracker/sw_winsys.h"
|
||||
|
||||
struct pipe_loader_sw_device {
|
||||
struct pipe_loader_device base;
|
||||
@@ -136,7 +137,7 @@ pipe_loader_sw_probe_dri(struct pipe_loader_device **devs, struct drisw_loader_f
|
||||
if (!pipe_loader_sw_probe_init_common(sdev))
|
||||
goto fail;
|
||||
|
||||
for (i = 0; sdev->dd->winsys; i++) {
|
||||
for (i = 0; sdev->dd->winsys[i].name; i++) {
|
||||
if (strcmp(sdev->dd->winsys[i].name, "dri") == 0) {
|
||||
sdev->ws = sdev->dd->winsys[i].create_winsys(drisw_lf);
|
||||
break;
|
||||
@@ -168,7 +169,7 @@ pipe_loader_sw_probe_kms(struct pipe_loader_device **devs, int fd)
|
||||
if (!pipe_loader_sw_probe_init_common(sdev))
|
||||
goto fail;
|
||||
|
||||
for (i = 0; sdev->dd->winsys; i++) {
|
||||
for (i = 0; sdev->dd->winsys[i].name; i++) {
|
||||
if (strcmp(sdev->dd->winsys[i].name, "kms_dri") == 0) {
|
||||
sdev->ws = sdev->dd->winsys[i].create_winsys(fd);
|
||||
break;
|
||||
@@ -199,7 +200,7 @@ pipe_loader_sw_probe_null(struct pipe_loader_device **devs)
|
||||
if (!pipe_loader_sw_probe_init_common(sdev))
|
||||
goto fail;
|
||||
|
||||
for (i = 0; sdev->dd->winsys; i++) {
|
||||
for (i = 0; sdev->dd->winsys[i].name; i++) {
|
||||
if (strcmp(sdev->dd->winsys[i].name, "null") == 0) {
|
||||
sdev->ws = sdev->dd->winsys[i].create_winsys();
|
||||
break;
|
||||
@@ -222,7 +223,7 @@ pipe_loader_sw_probe(struct pipe_loader_device **devs, int ndev)
|
||||
{
|
||||
int i = 1;
|
||||
|
||||
if (i < ndev) {
|
||||
if (i <= ndev) {
|
||||
if (!pipe_loader_sw_probe_null(devs)) {
|
||||
i--;
|
||||
}
|
||||
@@ -244,7 +245,7 @@ pipe_loader_sw_probe_wrapped(struct pipe_loader_device **dev,
|
||||
if (!pipe_loader_sw_probe_init_common(sdev))
|
||||
goto fail;
|
||||
|
||||
for (i = 0; sdev->dd->winsys; i++) {
|
||||
for (i = 0; sdev->dd->winsys[i].name; i++) {
|
||||
if (strcmp(sdev->dd->winsys[i].name, "wrapped") == 0) {
|
||||
sdev->ws = sdev->dd->winsys[i].create_winsys(screen);
|
||||
break;
|
||||
|
@@ -223,7 +223,7 @@ pipe_freedreno_create_screen(int fd)
|
||||
#include "virgl/drm/virgl_drm_public.h"
|
||||
#include "virgl/virgl_public.h"
|
||||
|
||||
static struct pipe_screen *
|
||||
struct pipe_screen *
|
||||
pipe_virgl_create_screen(int fd)
|
||||
{
|
||||
struct virgl_winsys *vws;
|
||||
|
73
src/gallium/auxiliary/target-helpers/sw_helper.h
Normal file
73
src/gallium/auxiliary/target-helpers/sw_helper.h
Normal file
@@ -0,0 +1,73 @@
|
||||
|
||||
#ifndef SW_HELPER_H
|
||||
#define SW_HELPER_H
|
||||
|
||||
#include "pipe/p_compiler.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "target-helpers/sw_helper_public.h"
|
||||
#include "state_tracker/sw_winsys.h"
|
||||
|
||||
|
||||
/* Helper function to choose and instantiate one of the software rasterizers:
|
||||
* llvmpipe, softpipe.
|
||||
*/
|
||||
|
||||
#ifdef GALLIUM_SOFTPIPE
|
||||
#include "softpipe/sp_public.h"
|
||||
#endif
|
||||
|
||||
#ifdef GALLIUM_LLVMPIPE
|
||||
#include "llvmpipe/lp_public.h"
|
||||
#endif
|
||||
|
||||
#ifdef GALLIUM_VIRGL
|
||||
#include "virgl/virgl_public.h"
|
||||
#include "virgl/vtest/virgl_vtest_public.h"
|
||||
#endif
|
||||
|
||||
static inline struct pipe_screen *
|
||||
sw_screen_create_named(struct sw_winsys *winsys, const char *driver)
|
||||
{
|
||||
struct pipe_screen *screen = NULL;
|
||||
|
||||
#if defined(GALLIUM_LLVMPIPE)
|
||||
if (screen == NULL && strcmp(driver, "llvmpipe") == 0)
|
||||
screen = llvmpipe_create_screen(winsys);
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_VIRGL)
|
||||
if (screen == NULL && strcmp(driver, "virpipe") == 0) {
|
||||
struct virgl_winsys *vws;
|
||||
vws = virgl_vtest_winsys_wrap(winsys);
|
||||
screen = virgl_create_screen(vws);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(GALLIUM_SOFTPIPE)
|
||||
if (screen == NULL)
|
||||
screen = softpipe_create_screen(winsys);
|
||||
#endif
|
||||
|
||||
return screen;
|
||||
}
|
||||
|
||||
|
||||
struct pipe_screen *
|
||||
sw_screen_create(struct sw_winsys *winsys)
|
||||
{
|
||||
const char *default_driver;
|
||||
const char *driver;
|
||||
|
||||
#if defined(GALLIUM_LLVMPIPE)
|
||||
default_driver = "llvmpipe";
|
||||
#elif defined(GALLIUM_SOFTPIPE)
|
||||
default_driver = "softpipe";
|
||||
#else
|
||||
default_driver = "";
|
||||
#endif
|
||||
|
||||
driver = debug_get_option("GALLIUM_DRIVER", default_driver);
|
||||
return sw_screen_create_named(winsys, driver);
|
||||
}
|
||||
|
||||
#endif
|
10
src/gallium/auxiliary/target-helpers/sw_helper_public.h
Normal file
10
src/gallium/auxiliary/target-helpers/sw_helper_public.h
Normal file
@@ -0,0 +1,10 @@
|
||||
#ifndef _SW_HELPER_PUBLIC_H
|
||||
#define _SW_HELPER_PUBLIC_H
|
||||
|
||||
struct pipe_screen;
|
||||
struct sw_winsys;
|
||||
|
||||
struct pipe_screen *
|
||||
sw_screen_create(struct sw_winsys *winsys);
|
||||
|
||||
#endif /* _SW_HELPER_PUBLIC_H */
|
@@ -115,7 +115,7 @@ vl_video_buffer_formats(struct pipe_screen *screen, enum pipe_format format)
|
||||
return const_resource_formats_VUYA;
|
||||
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
return const_resource_formats_VUYX;
|
||||
return const_resource_formats_YUVX;
|
||||
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
return const_resource_formats_VUYX;
|
||||
|
@@ -392,7 +392,7 @@ vl_dri2_screen_create(Display *display, int screen)
|
||||
goto free_connect;
|
||||
|
||||
if (drmGetMagic(fd, &magic))
|
||||
goto free_connect;
|
||||
goto close_fd;
|
||||
|
||||
authenticate_cookie = xcb_dri2_authenticate_unchecked(scrn->conn,
|
||||
get_xcb_screen(s, screen)->root,
|
||||
@@ -402,7 +402,7 @@ vl_dri2_screen_create(Display *display, int screen)
|
||||
if (authenticate == NULL || !authenticate->authenticated)
|
||||
goto free_authenticate;
|
||||
|
||||
if (pipe_loader_drm_probe_fd(&scrn->base.dev, dup(fd)))
|
||||
if (pipe_loader_drm_probe_fd(&scrn->base.dev, fd))
|
||||
scrn->base.pscreen = pipe_loader_create_screen(scrn->base.dev);
|
||||
|
||||
if (!scrn->base.pscreen)
|
||||
@@ -428,8 +428,11 @@ vl_dri2_screen_create(Display *display, int screen)
|
||||
release_pipe:
|
||||
if (scrn->base.dev)
|
||||
pipe_loader_release(&scrn->base.dev, 1);
|
||||
fd = -1;
|
||||
free_authenticate:
|
||||
free(authenticate);
|
||||
close_fd:
|
||||
close(fd);
|
||||
free_connect:
|
||||
free(connect);
|
||||
free_query:
|
||||
|
@@ -41,12 +41,16 @@ struct vl_screen *
|
||||
vl_drm_screen_create(int fd)
|
||||
{
|
||||
struct vl_screen *vscreen;
|
||||
int new_fd = -1;
|
||||
|
||||
vscreen = CALLOC_STRUCT(vl_screen);
|
||||
if (!vscreen)
|
||||
return NULL;
|
||||
|
||||
if (pipe_loader_drm_probe_fd(&vscreen->dev, dup(fd)))
|
||||
if (fd < 0 || (new_fd = dup(fd)) < 0)
|
||||
goto error;
|
||||
|
||||
if (pipe_loader_drm_probe_fd(&vscreen->dev, new_fd))
|
||||
vscreen->pscreen = pipe_loader_create_screen(vscreen->dev);
|
||||
|
||||
if (!vscreen->pscreen)
|
||||
@@ -63,6 +67,8 @@ vl_drm_screen_create(int fd)
|
||||
error:
|
||||
if (vscreen->dev)
|
||||
pipe_loader_release(&vscreen->dev, 1);
|
||||
else
|
||||
close(new_fd);
|
||||
|
||||
FREE(vscreen);
|
||||
return NULL;
|
||||
|
@@ -627,7 +627,7 @@ static inline uint32_t A4XX_RB_FS_OUTPUT_ENABLE_BLEND(uint32_t val)
|
||||
{
|
||||
return ((val) << A4XX_RB_FS_OUTPUT_ENABLE_BLEND__SHIFT) & A4XX_RB_FS_OUTPUT_ENABLE_BLEND__MASK;
|
||||
}
|
||||
#define A4XX_RB_FS_OUTPUT_FAST_CLEAR 0x00000100
|
||||
#define A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND 0x00000100
|
||||
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__MASK 0xffff0000
|
||||
#define A4XX_RB_FS_OUTPUT_SAMPLE_MASK__SHIFT 16
|
||||
static inline uint32_t A4XX_RB_FS_OUTPUT_SAMPLE_MASK(uint32_t val)
|
||||
|
@@ -137,7 +137,8 @@ fd4_blend_state_create(struct pipe_context *pctx,
|
||||
so->rb_mrt[i].buf_info |= A4XX_RB_MRT_BUF_INFO_DITHER_MODE(DITHER_ALWAYS);
|
||||
}
|
||||
|
||||
so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend);
|
||||
so->rb_fs_output = A4XX_RB_FS_OUTPUT_ENABLE_BLEND(mrt_blend) |
|
||||
COND(cso->independent_blend_enable, A4XX_RB_FS_OUTPUT_INDEPENDENT_BLEND);
|
||||
|
||||
return so;
|
||||
}
|
||||
|
@@ -194,7 +194,7 @@ emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
if (view->base.texture) {
|
||||
struct fd_resource *rsc = fd_resource(view->base.texture);
|
||||
uint32_t offset = fd_resource_offset(rsc, start, 0);
|
||||
OUT_RELOC(ring, rsc->bo, offset, view->textconst4, 0);
|
||||
OUT_RELOC(ring, rsc->bo, offset, view->texconst4, 0);
|
||||
} else {
|
||||
OUT_RING(ring, 0x00000000);
|
||||
}
|
||||
@@ -497,11 +497,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
OUT_RINGP(ring, val, &fd4_context(ctx)->rbrc_patches);
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_ZSA) {
|
||||
if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_FRAMEBUFFER)) {
|
||||
struct fd4_zsa_stateobj *zsa = fd4_zsa_stateobj(ctx->zsa);
|
||||
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
|
||||
uint32_t rb_alpha_control = zsa->rb_alpha_control;
|
||||
|
||||
if (util_format_is_pure_integer(pipe_surface_format(pfb->cbufs[0])))
|
||||
rb_alpha_control &= ~A4XX_RB_ALPHA_CONTROL_ALPHA_TEST;
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_RB_ALPHA_CONTROL, 1);
|
||||
OUT_RING(ring, zsa->rb_alpha_control);
|
||||
OUT_RING(ring, rb_alpha_control);
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_RB_STENCIL_CONTROL, 2);
|
||||
OUT_RING(ring, zsa->rb_stencil_control);
|
||||
@@ -628,10 +633,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
for (i = 0; i < A4XX_MAX_RENDER_TARGETS; i++) {
|
||||
enum pipe_format format = pipe_surface_format(
|
||||
ctx->framebuffer.cbufs[i]);
|
||||
bool is_int = util_format_is_pure_integer(format);
|
||||
bool has_alpha = util_format_has_alpha(format);
|
||||
uint32_t control = blend->rb_mrt[i].control;
|
||||
uint32_t blend_control = blend->rb_mrt[i].blend_control_alpha;
|
||||
|
||||
if (is_int) {
|
||||
control &= A4XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK;
|
||||
control |= A4XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY);
|
||||
}
|
||||
|
||||
if (has_alpha) {
|
||||
blend_control |= blend->rb_mrt[i].blend_control_rgb;
|
||||
} else {
|
||||
@@ -651,19 +662,48 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
A4XX_RB_FS_OUTPUT_SAMPLE_MASK(0xffff));
|
||||
}
|
||||
|
||||
if (dirty & FD_DIRTY_BLEND_COLOR) {
|
||||
if (dirty & (FD_DIRTY_BLEND_COLOR | FD_DIRTY_FRAMEBUFFER)) {
|
||||
struct pipe_blend_color *bcolor = &ctx->blend_color;
|
||||
struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
|
||||
float factor = 65535.0;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pfb->nr_cbufs; i++) {
|
||||
enum pipe_format format = pipe_surface_format(pfb->cbufs[i]);
|
||||
const struct util_format_description *desc =
|
||||
util_format_description(format);
|
||||
int j;
|
||||
|
||||
if (desc->is_mixed)
|
||||
continue;
|
||||
|
||||
j = util_format_get_first_non_void_channel(format);
|
||||
if (j == -1)
|
||||
continue;
|
||||
|
||||
if (desc->channel[j].size > 8 || !desc->channel[j].normalized ||
|
||||
desc->channel[j].pure_integer)
|
||||
continue;
|
||||
|
||||
/* Just use the first unorm8/snorm8 render buffer. Can't keep
|
||||
* everyone happy.
|
||||
*/
|
||||
if (desc->channel[j].type == UTIL_FORMAT_TYPE_SIGNED)
|
||||
factor = 32767.0;
|
||||
break;
|
||||
}
|
||||
|
||||
OUT_PKT0(ring, REG_A4XX_RB_BLEND_RED, 8);
|
||||
OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * 65535.0) |
|
||||
OUT_RING(ring, A4XX_RB_BLEND_RED_UINT(bcolor->color[0] * factor) |
|
||||
A4XX_RB_BLEND_RED_FLOAT(bcolor->color[0]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_RED_F32(bcolor->color[0]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * 65535.0) |
|
||||
OUT_RING(ring, A4XX_RB_BLEND_GREEN_UINT(bcolor->color[1] * factor) |
|
||||
A4XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_GREEN_F32(bcolor->color[1]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * 65535.0) |
|
||||
OUT_RING(ring, A4XX_RB_BLEND_BLUE_UINT(bcolor->color[2] * factor) |
|
||||
A4XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_BLUE_F32(bcolor->color[2]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * 65535.0) |
|
||||
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_UINT(bcolor->color[3] * factor) |
|
||||
A4XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3]));
|
||||
OUT_RING(ring, A4XX_RB_BLEND_ALPHA_F32(bcolor->color[3]));
|
||||
}
|
||||
|
@@ -214,6 +214,7 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
struct fd_resource *rsc = fd_resource(prsc);
|
||||
unsigned lvl = fd_sampler_first_level(cso);
|
||||
unsigned miplevels = fd_sampler_last_level(cso) - lvl;
|
||||
uint32_t sz2 = 0;
|
||||
|
||||
if (!so)
|
||||
return NULL;
|
||||
@@ -259,7 +260,10 @@ fd4_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
|
||||
case PIPE_TEXTURE_3D:
|
||||
so->texconst3 =
|
||||
A4XX_TEX_CONST_3_DEPTH(u_minify(prsc->depth0, lvl)) |
|
||||
A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[0].size0);
|
||||
A4XX_TEX_CONST_3_LAYERSZ(rsc->slices[lvl].size0);
|
||||
while (lvl < cso->u.tex.last_level && sz2 != rsc->slices[lvl+1].size0)
|
||||
sz2 = rsc->slices[++lvl].size0;
|
||||
so->texconst4 = A4XX_TEX_CONST_4_LAYERSZ(sz2);
|
||||
break;
|
||||
default:
|
||||
so->texconst3 = 0x00000000;
|
||||
|
@@ -51,7 +51,7 @@ fd4_sampler_stateobj(struct pipe_sampler_state *samp)
|
||||
|
||||
struct fd4_pipe_sampler_view {
|
||||
struct pipe_sampler_view base;
|
||||
uint32_t texconst0, texconst1, texconst2, texconst3, textconst4;
|
||||
uint32_t texconst0, texconst1, texconst2, texconst3, texconst4;
|
||||
};
|
||||
|
||||
static inline struct fd4_pipe_sampler_view *
|
||||
|
@@ -551,7 +551,7 @@ fd_resource_create(struct pipe_screen *pscreen,
|
||||
struct fd_resource *rsc = CALLOC_STRUCT(fd_resource);
|
||||
struct pipe_resource *prsc = &rsc->base.b;
|
||||
enum pipe_format format = tmpl->format;
|
||||
uint32_t size;
|
||||
uint32_t size, alignment;
|
||||
|
||||
DBG("target=%d, format=%s, %ux%ux%u, array_size=%u, last_level=%u, "
|
||||
"nr_samples=%u, usage=%u, bind=%x, flags=%x",
|
||||
@@ -583,6 +583,7 @@ fd_resource_create(struct pipe_screen *pscreen,
|
||||
|
||||
assert(rsc->cpp);
|
||||
|
||||
alignment = slice_alignment(pscreen, tmpl);
|
||||
if (is_a4xx(fd_screen(pscreen))) {
|
||||
switch (tmpl->target) {
|
||||
case PIPE_TEXTURE_3D:
|
||||
@@ -590,11 +591,12 @@ fd_resource_create(struct pipe_screen *pscreen,
|
||||
break;
|
||||
default:
|
||||
rsc->layer_first = true;
|
||||
alignment = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
size = setup_slices(rsc, slice_alignment(pscreen, tmpl), format);
|
||||
size = setup_slices(rsc, alignment, format);
|
||||
|
||||
if (rsc->layer_first) {
|
||||
rsc->layer_size = align(size, 4096);
|
||||
|
@@ -291,7 +291,7 @@ void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b)
|
||||
|
||||
if (b->prev)
|
||||
b->prev->next = b;
|
||||
if (a->prev)
|
||||
if (a->next)
|
||||
a->next->prev = a;
|
||||
}
|
||||
|
||||
|
@@ -575,8 +575,8 @@ CodeEmitterGK110::emitIMUL(const Instruction *i)
|
||||
if (isLIMM(i->src(1), TYPE_S32)) {
|
||||
emitForm_L(i, 0x280, 2, Modifier(0));
|
||||
|
||||
assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH);
|
||||
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
code[1] |= 1 << 24;
|
||||
if (i->sType == TYPE_S32)
|
||||
code[1] |= 3 << 25;
|
||||
} else {
|
||||
@@ -695,14 +695,9 @@ CodeEmitterGK110::emitIMAD(const Instruction *i)
|
||||
if (i->sType == TYPE_S32)
|
||||
code[1] |= (1 << 19) | (1 << 24);
|
||||
|
||||
if (code[0] & 0x1) {
|
||||
assert(!i->subOp);
|
||||
SAT_(39);
|
||||
} else {
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
code[1] |= 1 << 25;
|
||||
SAT_(35);
|
||||
}
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
code[1] |= 1 << 25;
|
||||
SAT_(35);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -2893,6 +2893,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
|
||||
}
|
||||
setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
|
||||
|
||||
// If the loop never breaks (e.g. only has RET's inside), then there
|
||||
// will be no way to get to the break bb. However BGNLOOP will have
|
||||
// already made a PREBREAK to it, so it must be in the CFG.
|
||||
if (getBB()->cfg.incidentCount() == 0)
|
||||
loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_BRK:
|
||||
|
@@ -202,7 +202,8 @@ NV50LegalizePostRA::visit(Function *fn)
|
||||
Program *prog = fn->getProgram();
|
||||
|
||||
r63 = new_LValue(fn, FILE_GPR);
|
||||
if (prog->maxGPR < 63)
|
||||
// GPR units on nv50 are in half-regs
|
||||
if (prog->maxGPR < 126)
|
||||
r63->reg.data.id = 63;
|
||||
else
|
||||
r63->reg.data.id = 127;
|
||||
@@ -832,7 +833,7 @@ NV50LoweringPreSSA::handleTXB(TexInstruction *i)
|
||||
}
|
||||
Value *flags = bld.getScratch(1, FILE_FLAGS);
|
||||
bld.setPosition(cond, true);
|
||||
bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0));
|
||||
bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0))->flagsDef = 0;
|
||||
|
||||
Instruction *tex[4];
|
||||
for (l = 0; l < 4; ++l) {
|
||||
|
@@ -686,7 +686,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
i->tex.s = 0x1f;
|
||||
i->setIndirectR(hnd);
|
||||
i->setIndirectS(NULL);
|
||||
} else if (i->tex.r == i->tex.s) {
|
||||
} else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
|
||||
i->tex.r += prog->driver->io.texBindBase / 4;
|
||||
i->tex.s = 0; // only a single cX[] value possible here
|
||||
} else {
|
||||
|
@@ -858,6 +858,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
i->src(0).mod = i->src(t).mod;
|
||||
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
|
||||
i->src(1).mod = 0;
|
||||
} else
|
||||
if (i->postFactor && i->sType == TYPE_F32) {
|
||||
/* Can't emit a postfactor with an immediate, have to fold it in */
|
||||
i->setSrc(s, new_ImmediateValue(
|
||||
prog, imm0.reg.data.f32 * exp2f(i->postFactor)));
|
||||
i->postFactor = 0;
|
||||
}
|
||||
break;
|
||||
case OP_MAD:
|
||||
@@ -2654,7 +2660,7 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
|
||||
break;
|
||||
|
||||
def = i->getSrc(1)->getInsn();
|
||||
if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
|
||||
if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
|
||||
vtmp = i->getSrc(1);
|
||||
i->setSrc(1, def->getSrc(0));
|
||||
|
||||
@@ -2956,6 +2962,16 @@ DeadCodeElim::visit(BasicBlock *bb)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Each load can go into up to 4 destinations, any of which might potentially
|
||||
// be dead (i.e. a hole). These can always be split into 2 loads, independent
|
||||
// of where the holes are. We find the first contiguous region, put it into
|
||||
// the first load, and then put the second contiguous region into the second
|
||||
// load. There can be at most 2 contiguous regions.
|
||||
//
|
||||
// Note that there are some restrictions, for example it's not possible to do
|
||||
// a 64-bit load that's not 64-bit aligned, so such a load has to be split
|
||||
// up. Also hardware doesn't support 96-bit loads, so those also have to be
|
||||
// split into a 64-bit and 32-bit load.
|
||||
void
|
||||
DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
||||
{
|
||||
@@ -2976,6 +2992,8 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
||||
addr1 = ld1->getSrc(0)->reg.data.offset;
|
||||
n1 = n2 = 0;
|
||||
size1 = size2 = 0;
|
||||
|
||||
// Compute address/width for first load
|
||||
for (d = 0; ld1->defExists(d); ++d) {
|
||||
if (mask & (1 << d)) {
|
||||
if (size1 && (addr1 & 0x7))
|
||||
@@ -2989,16 +3007,34 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale back the size of the first load until it can be loaded. This
|
||||
// typically happens for TYPE_B96 loads.
|
||||
while (n1 &&
|
||||
!prog->getTarget()->isAccessSupported(ld1->getSrc(0)->reg.file,
|
||||
typeOfSize(size1))) {
|
||||
size1 -= def1[--n1]->reg.size;
|
||||
d--;
|
||||
}
|
||||
|
||||
// Compute address/width for second load
|
||||
for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
|
||||
if (mask & (1 << d)) {
|
||||
assert(!size2 || !(addr2 & 0x7));
|
||||
def2[n2] = ld1->getDef(d);
|
||||
size2 += def2[n2++]->reg.size;
|
||||
} else {
|
||||
} else if (!n2) {
|
||||
assert(!n2);
|
||||
addr2 += ld1->getDef(d)->reg.size;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure that we've processed all the values
|
||||
for (; ld1->defExists(d); ++d)
|
||||
assert(!(mask & (1 << d)));
|
||||
|
||||
updateLdStOffset(ld1, addr1, func);
|
||||
ld1->setType(typeOfSize(size1));
|
||||
for (d = 0; d < 4; ++d)
|
||||
|
@@ -1573,10 +1573,28 @@ SpillCodeInserter::spill(Instruction *defi, Value *slot, LValue *lval)
|
||||
|
||||
Instruction *st;
|
||||
if (slot->reg.file == FILE_MEMORY_LOCAL) {
|
||||
st = new_Instruction(func, OP_STORE, ty);
|
||||
st->setSrc(0, slot);
|
||||
st->setSrc(1, lval);
|
||||
lval->noSpill = 1;
|
||||
if (ty != TYPE_B96) {
|
||||
st = new_Instruction(func, OP_STORE, ty);
|
||||
st->setSrc(0, slot);
|
||||
st->setSrc(1, lval);
|
||||
} else {
|
||||
st = new_Instruction(func, OP_SPLIT, ty);
|
||||
st->setSrc(0, lval);
|
||||
for (int d = 0; d < lval->reg.size / 4; ++d)
|
||||
st->setDef(d, new_LValue(func, FILE_GPR));
|
||||
|
||||
for (int d = lval->reg.size / 4 - 1; d >= 0; --d) {
|
||||
Value *tmp = cloneShallow(func, slot);
|
||||
tmp->reg.size = 4;
|
||||
tmp->reg.data.offset += 4 * d;
|
||||
|
||||
Instruction *s = new_Instruction(func, OP_STORE, TYPE_U32);
|
||||
s->setSrc(0, tmp);
|
||||
s->setSrc(1, st->getDef(d));
|
||||
defi->bb->insertAfter(defi, s);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
st = new_Instruction(func, OP_CVT, ty);
|
||||
st->setDef(0, slot);
|
||||
@@ -1596,7 +1614,27 @@ SpillCodeInserter::unspill(Instruction *usei, LValue *lval, Value *slot)
|
||||
Instruction *ld;
|
||||
if (slot->reg.file == FILE_MEMORY_LOCAL) {
|
||||
lval->noSpill = 1;
|
||||
ld = new_Instruction(func, OP_LOAD, ty);
|
||||
if (ty != TYPE_B96) {
|
||||
ld = new_Instruction(func, OP_LOAD, ty);
|
||||
} else {
|
||||
ld = new_Instruction(func, OP_MERGE, ty);
|
||||
for (int d = 0; d < lval->reg.size / 4; ++d) {
|
||||
Value *tmp = cloneShallow(func, slot);
|
||||
LValue *val;
|
||||
tmp->reg.size = 4;
|
||||
tmp->reg.data.offset += 4 * d;
|
||||
|
||||
Instruction *l = new_Instruction(func, OP_LOAD, TYPE_U32);
|
||||
l->setDef(0, (val = new_LValue(func, FILE_GPR)));
|
||||
l->setSrc(0, tmp);
|
||||
usei->bb->insertBefore(usei, l);
|
||||
ld->setSrc(d, val);
|
||||
val->noSpill = 1;
|
||||
}
|
||||
ld->setDef(0, lval);
|
||||
usei->bb->insertBefore(usei, ld);
|
||||
return lval;
|
||||
}
|
||||
} else {
|
||||
ld = new_Instruction(func, OP_CVT, ty);
|
||||
}
|
||||
|
@@ -454,7 +454,7 @@ TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (s >= 3)
|
||||
if (s >= opInfo[insn->op].srcNr || s >= 3)
|
||||
return false;
|
||||
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
|
||||
}
|
||||
|
@@ -439,7 +439,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (s >= 3)
|
||||
if (s >= opInfo[insn->op].srcNr || s >= 3)
|
||||
return false;
|
||||
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
|
||||
}
|
||||
|
@@ -657,8 +657,8 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
|
||||
if (buffer->base.flags & (PIPE_RESOURCE_FLAG_MAP_PERSISTENT |
|
||||
PIPE_RESOURCE_FLAG_MAP_COHERENT)) {
|
||||
buffer->domain = NOUVEAU_BO_GART;
|
||||
} else if (buffer->base.bind &
|
||||
(screen->vidmem_bindings & screen->sysmem_bindings)) {
|
||||
} else if (buffer->base.bind == 0 || (buffer->base.bind &
|
||||
(screen->vidmem_bindings & screen->sysmem_bindings))) {
|
||||
switch (buffer->base.usage) {
|
||||
case PIPE_USAGE_DEFAULT:
|
||||
case PIPE_USAGE_IMMUTABLE:
|
||||
@@ -685,6 +685,10 @@ nouveau_buffer_create(struct pipe_screen *pscreen,
|
||||
if (buffer->base.bind & screen->sysmem_bindings)
|
||||
buffer->domain = NOUVEAU_BO_GART;
|
||||
}
|
||||
/* There can be very special situations where we want non-gpu-mapped
|
||||
* buffers, but never through this interface.
|
||||
*/
|
||||
assert(buffer->domain);
|
||||
ret = nouveau_buffer_allocate(screen, buffer, buffer->domain);
|
||||
|
||||
if (ret == false)
|
||||
|
@@ -168,9 +168,10 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
int ref)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(&ctx->pipe);
|
||||
unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
|
||||
unsigned s, i;
|
||||
|
||||
if (res->bind & PIPE_BIND_RENDER_TARGET) {
|
||||
if (bind & PIPE_BIND_RENDER_TARGET) {
|
||||
assert(nv50->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS);
|
||||
for (i = 0; i < nv50->framebuffer.nr_cbufs; ++i) {
|
||||
if (nv50->framebuffer.cbufs[i] &&
|
||||
@@ -182,7 +183,7 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
|
||||
if (bind & PIPE_BIND_DEPTH_STENCIL) {
|
||||
if (nv50->framebuffer.zsbuf &&
|
||||
nv50->framebuffer.zsbuf->texture == res) {
|
||||
nv50->dirty |= NV50_NEW_FRAMEBUFFER;
|
||||
@@ -192,11 +193,11 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
|
||||
PIPE_BIND_INDEX_BUFFER |
|
||||
PIPE_BIND_CONSTANT_BUFFER |
|
||||
PIPE_BIND_STREAM_OUTPUT |
|
||||
PIPE_BIND_SAMPLER_VIEW)) {
|
||||
if (bind & (PIPE_BIND_VERTEX_BUFFER |
|
||||
PIPE_BIND_INDEX_BUFFER |
|
||||
PIPE_BIND_CONSTANT_BUFFER |
|
||||
PIPE_BIND_STREAM_OUTPUT |
|
||||
PIPE_BIND_SAMPLER_VIEW)) {
|
||||
|
||||
assert(nv50->num_vtxbufs <= PIPE_MAX_ATTRIBS);
|
||||
for (i = 0; i < nv50->num_vtxbufs; ++i) {
|
||||
|
@@ -180,9 +180,10 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
int ref)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
|
||||
unsigned bind = res->bind ? res->bind : PIPE_BIND_VERTEX_BUFFER;
|
||||
unsigned s, i;
|
||||
|
||||
if (res->bind & PIPE_BIND_RENDER_TARGET) {
|
||||
if (bind & PIPE_BIND_RENDER_TARGET) {
|
||||
for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
|
||||
if (nvc0->framebuffer.cbufs[i] &&
|
||||
nvc0->framebuffer.cbufs[i]->texture == res) {
|
||||
@@ -193,7 +194,7 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
}
|
||||
}
|
||||
}
|
||||
if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
|
||||
if (bind & PIPE_BIND_DEPTH_STENCIL) {
|
||||
if (nvc0->framebuffer.zsbuf &&
|
||||
nvc0->framebuffer.zsbuf->texture == res) {
|
||||
nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
|
||||
@@ -203,12 +204,12 @@ nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
if (res->bind & (PIPE_BIND_VERTEX_BUFFER |
|
||||
PIPE_BIND_INDEX_BUFFER |
|
||||
PIPE_BIND_CONSTANT_BUFFER |
|
||||
PIPE_BIND_STREAM_OUTPUT |
|
||||
PIPE_BIND_COMMAND_ARGS_BUFFER |
|
||||
PIPE_BIND_SAMPLER_VIEW)) {
|
||||
if (bind & (PIPE_BIND_VERTEX_BUFFER |
|
||||
PIPE_BIND_INDEX_BUFFER |
|
||||
PIPE_BIND_CONSTANT_BUFFER |
|
||||
PIPE_BIND_STREAM_OUTPUT |
|
||||
PIPE_BIND_COMMAND_ARGS_BUFFER |
|
||||
PIPE_BIND_SAMPLER_VIEW)) {
|
||||
for (i = 0; i < nvc0->num_vtxbufs; ++i) {
|
||||
if (nvc0->vtxbuf[i].buffer == res) {
|
||||
nvc0->dirty |= NVC0_NEW_ARRAYS;
|
||||
|
@@ -59,7 +59,7 @@
|
||||
|
||||
/* the number of CS dwords for flushing and drawing */
|
||||
#define R600_MAX_FLUSH_CS_DWORDS 16
|
||||
#define R600_MAX_DRAW_CS_DWORDS 47
|
||||
#define R600_MAX_DRAW_CS_DWORDS 52
|
||||
#define R600_TRACE_CS_DWORDS 7
|
||||
|
||||
#define R600_MAX_USER_CONST_BUFFERS 13
|
||||
|
@@ -598,6 +598,106 @@ static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* execute a single slot ALU calculation */
|
||||
static int single_alu_op2(struct r600_shader_ctx *ctx, int op,
|
||||
int dst_sel, int dst_chan,
|
||||
int src0_sel, unsigned src0_chan_val,
|
||||
int src1_sel, unsigned src1_chan_val)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r, i;
|
||||
|
||||
if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.src[0].sel = src0_sel;
|
||||
if (src0_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[0].value = src0_chan_val;
|
||||
else
|
||||
alu.src[0].chan = src0_chan_val;
|
||||
alu.src[1].sel = src1_sel;
|
||||
if (src1_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[1].value = src1_chan_val;
|
||||
else
|
||||
alu.src[1].chan = src1_chan_val;
|
||||
alu.dst.sel = dst_sel;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = i == dst_chan;
|
||||
alu.last = (i == 3);
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.src[0].sel = src0_sel;
|
||||
if (src0_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[0].value = src0_chan_val;
|
||||
else
|
||||
alu.src[0].chan = src0_chan_val;
|
||||
alu.src[1].sel = src1_sel;
|
||||
if (src1_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[1].value = src1_chan_val;
|
||||
else
|
||||
alu.src[1].chan = src1_chan_val;
|
||||
alu.dst.sel = dst_sel;
|
||||
alu.dst.chan = dst_chan;
|
||||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* execute a single slot ALU calculation */
|
||||
static int single_alu_op3(struct r600_shader_ctx *ctx, int op,
|
||||
int dst_sel, int dst_chan,
|
||||
int src0_sel, unsigned src0_chan_val,
|
||||
int src1_sel, unsigned src1_chan_val,
|
||||
int src2_sel, unsigned src2_chan_val)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
|
||||
/* validate this for other ops */
|
||||
assert(op == ALU_OP3_MULADD_UINT24);
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.src[0].sel = src0_sel;
|
||||
if (src0_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[0].value = src0_chan_val;
|
||||
else
|
||||
alu.src[0].chan = src0_chan_val;
|
||||
alu.src[1].sel = src1_sel;
|
||||
if (src1_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[1].value = src1_chan_val;
|
||||
else
|
||||
alu.src[1].chan = src1_chan_val;
|
||||
alu.src[2].sel = src2_sel;
|
||||
if (src2_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[2].value = src2_chan_val;
|
||||
else
|
||||
alu.src[2].chan = src2_chan_val;
|
||||
alu.dst.sel = dst_sel;
|
||||
alu.dst.chan = dst_chan;
|
||||
alu.is_op3 = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index)
|
||||
{
|
||||
return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg;
|
||||
}
|
||||
|
||||
static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
|
||||
{
|
||||
int i;
|
||||
@@ -1129,6 +1229,7 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
unsigned vtx_id = src->Dimension.Index;
|
||||
int offset_reg = vtx_id / 3;
|
||||
int offset_chan = vtx_id % 3;
|
||||
int t2 = 0;
|
||||
|
||||
/* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
|
||||
* R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
|
||||
@@ -1136,13 +1237,24 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
if (offset_reg == 0 && offset_chan == 2)
|
||||
offset_chan = 3;
|
||||
|
||||
if (src->Dimension.Indirect || src->Register.Indirect)
|
||||
t2 = r600_get_temp(ctx);
|
||||
|
||||
if (src->Dimension.Indirect) {
|
||||
int treg[3];
|
||||
int t2;
|
||||
struct r600_bytecode_alu alu;
|
||||
int r, i;
|
||||
|
||||
/* you have got to be shitting me -
|
||||
unsigned addr_reg;
|
||||
addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index);
|
||||
if (src->DimIndirect.Index > 0) {
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
ctx->bc->ar_reg, 0,
|
||||
addr_reg, 0,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
/*
|
||||
we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
|
||||
at least this is what fglrx seems to do. */
|
||||
for (i = 0; i < 3; i++) {
|
||||
@@ -1150,7 +1262,6 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
}
|
||||
r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
|
||||
|
||||
t2 = r600_get_temp(ctx);
|
||||
for (i = 0; i < 3; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
@@ -1175,8 +1286,33 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
if (r)
|
||||
return r;
|
||||
offset_reg = t2;
|
||||
offset_chan = 0;
|
||||
}
|
||||
|
||||
if (src->Register.Indirect) {
|
||||
int addr_reg;
|
||||
unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID];
|
||||
|
||||
addr_reg = get_address_file_reg(ctx, src->Indirect.Index);
|
||||
|
||||
/* pull the value from index_reg */
|
||||
r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
|
||||
t2, 1,
|
||||
addr_reg, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, first);
|
||||
if (r)
|
||||
return r;
|
||||
r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
|
||||
t2, 0,
|
||||
t2, 1,
|
||||
V_SQ_ALU_SRC_LITERAL, 4,
|
||||
offset_reg, offset_chan);
|
||||
if (r)
|
||||
return r;
|
||||
offset_reg = t2;
|
||||
offset_chan = 0;
|
||||
index = src->Register.Index - first;
|
||||
}
|
||||
|
||||
memset(&vtx, 0, sizeof(vtx));
|
||||
vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
|
||||
@@ -1222,6 +1358,7 @@ static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
|
||||
|
||||
fetch_gs_input(ctx, src, treg);
|
||||
ctx->src[i].sel = treg;
|
||||
ctx->src[i].rel = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@@ -1498,7 +1635,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
|
||||
*last_exp_pos = NULL, *last_exp_param = NULL;
|
||||
int i, j, next_clip_pos = 61, next_param = 0;
|
||||
int ring;
|
||||
|
||||
bool only_ring_0 = true;
|
||||
cshader = calloc(1, sizeof(struct r600_pipe_shader));
|
||||
if (!cshader)
|
||||
return 0;
|
||||
@@ -1570,6 +1707,8 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
|
||||
for (i = 0; i < so->num_outputs; i++) {
|
||||
if (so->output[i].stream == ring) {
|
||||
enabled = true;
|
||||
if (ring > 0)
|
||||
only_ring_0 = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1604,7 +1743,7 @@ static int generate_gs_copy_shader(struct r600_context *rctx,
|
||||
cf_jump = ctx.bc->cf_last;
|
||||
|
||||
if (enabled)
|
||||
emit_streamout(&ctx, so, ring, &cshader->shader.ring_item_sizes[ring]);
|
||||
emit_streamout(&ctx, so, only_ring_0 ? -1 : ring, &cshader->shader.ring_item_sizes[ring]);
|
||||
cshader->shader.ring_item_sizes[ring] = ocnt * 16;
|
||||
}
|
||||
|
||||
@@ -2206,6 +2345,11 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
if (ctx.type == TGSI_PROCESSOR_GEOMETRY) {
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
|
||||
/* GS thread with no output workaround - emit a cut at start of GS */
|
||||
if (ctx.bc->chip_class == R600)
|
||||
r600_bytecode_add_cfinst(ctx.bc, CF_OP_CUT_VERTEX);
|
||||
|
||||
for (j = 0; j < 4; j++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
@@ -7180,7 +7324,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
unsigned reg = inst->Dst[0].Register.Index > 0 ? ctx->bc->index_reg[inst->Dst[0].Register.Index - 1] : ctx->bc->ar_reg;
|
||||
unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index);
|
||||
|
||||
assert(inst->Dst[0].Register.Index < 3);
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
|
@@ -2213,10 +2213,11 @@ void r600_init_atom_start_cs(struct r600_context *rctx)
|
||||
num_temp_gprs = 4;
|
||||
num_gs_gprs = 0;
|
||||
num_es_gprs = 0;
|
||||
num_ps_threads = 136;
|
||||
num_vs_threads = 48;
|
||||
num_gs_threads = 4;
|
||||
num_es_threads = 4;
|
||||
/* use limits 40 VS and at least 16 ES/GS */
|
||||
num_ps_threads = 120;
|
||||
num_vs_threads = 40;
|
||||
num_gs_threads = 16;
|
||||
num_es_threads = 16;
|
||||
num_ps_stack_entries = 40;
|
||||
num_vs_stack_entries = 40;
|
||||
num_gs_stack_entries = 32;
|
||||
@@ -2675,6 +2676,9 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
||||
S_02881C_USE_VTX_VIEWPORT_INDX(rshader->vs_out_viewport);
|
||||
}
|
||||
|
||||
#define RV610_GSVS_ALIGN 32
|
||||
#define R600_GSVS_ALIGN 16
|
||||
|
||||
void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *shader)
|
||||
{
|
||||
struct r600_context *rctx = (struct r600_context *)ctx;
|
||||
@@ -2684,6 +2688,23 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
||||
unsigned gsvs_itemsize =
|
||||
(cp_shader->ring_item_sizes[0] * shader->selector->gs_max_out_vertices) >> 2;
|
||||
|
||||
/* some r600s needs gsvs itemsize aligned to cacheline size
|
||||
this was fixed in rs780 and above. */
|
||||
switch (rctx->b.family) {
|
||||
case CHIP_RV610:
|
||||
gsvs_itemsize = align(gsvs_itemsize, RV610_GSVS_ALIGN);
|
||||
break;
|
||||
case CHIP_R600:
|
||||
case CHIP_RV630:
|
||||
case CHIP_RV670:
|
||||
case CHIP_RV620:
|
||||
case CHIP_RV635:
|
||||
gsvs_itemsize = align(gsvs_itemsize, R600_GSVS_ALIGN);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
r600_init_command_buffer(cb, 64);
|
||||
|
||||
/* VGT_GS_MODE is written by r600_emit_shader_stages */
|
||||
|
@@ -1770,6 +1770,24 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
||||
(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
|
||||
}
|
||||
|
||||
/* SMX returns CONTEXT_DONE too early workaround */
|
||||
if (rctx->b.family == CHIP_R600 ||
|
||||
rctx->b.family == CHIP_RV610 ||
|
||||
rctx->b.family == CHIP_RV630 ||
|
||||
rctx->b.family == CHIP_RV635) {
|
||||
/* if we have gs shader or streamout
|
||||
we need to do a wait idle after every draw */
|
||||
if (rctx->gs_shader || rctx->b.streamout.streamout_enabled) {
|
||||
radeon_set_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
|
||||
}
|
||||
}
|
||||
|
||||
/* ES ring rolling over at EOP - workaround */
|
||||
if (rctx->b.chip_class == R600) {
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
|
||||
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT);
|
||||
}
|
||||
|
||||
if (rctx->screen->b.trace_bo) {
|
||||
r600_trace_emit(rctx);
|
||||
}
|
||||
|
@@ -130,6 +130,7 @@
|
||||
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
|
||||
#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
|
||||
#define EVENT_TYPE_VGT_FLUSH 0x24
|
||||
#define EVENT_TYPE_SQ_NON_EVENT 0x26
|
||||
#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
|
@@ -136,8 +136,12 @@ static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags)
|
||||
void r600_preflush_suspend_features(struct r600_common_context *ctx)
|
||||
{
|
||||
/* suspend queries */
|
||||
if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
|
||||
if (ctx->num_cs_dw_nontimer_queries_suspend) {
|
||||
/* Since non-timer queries are suspended during blits,
|
||||
* we have to guard against double-suspends. */
|
||||
r600_suspend_nontimer_queries(ctx);
|
||||
ctx->nontimer_queries_suspended_by_flush = true;
|
||||
}
|
||||
if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
|
||||
r600_suspend_timer_queries(ctx);
|
||||
|
||||
@@ -158,8 +162,10 @@ void r600_postflush_resume_features(struct r600_common_context *ctx)
|
||||
/* resume queries */
|
||||
if (!LIST_IS_EMPTY(&ctx->active_timer_queries))
|
||||
r600_resume_timer_queries(ctx);
|
||||
if (!LIST_IS_EMPTY(&ctx->active_nontimer_queries))
|
||||
if (ctx->nontimer_queries_suspended_by_flush) {
|
||||
ctx->nontimer_queries_suspended_by_flush = false;
|
||||
r600_resume_nontimer_queries(ctx);
|
||||
}
|
||||
}
|
||||
|
||||
static void r600_flush_from_st(struct pipe_context *ctx,
|
||||
@@ -233,8 +239,8 @@ bool r600_common_context_init(struct r600_common_context *rctx,
|
||||
rctx->family = rscreen->family;
|
||||
rctx->chip_class = rscreen->chip_class;
|
||||
|
||||
if (rscreen->family == CHIP_HAWAII)
|
||||
rctx->max_db = 16;
|
||||
if (rscreen->chip_class >= CIK)
|
||||
rctx->max_db = MAX2(8, rscreen->info.r600_num_backends);
|
||||
else if (rscreen->chip_class >= EVERGREEN)
|
||||
rctx->max_db = 8;
|
||||
else
|
||||
@@ -550,10 +556,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
||||
case CHIP_TONGA: return "tonga";
|
||||
case CHIP_ICELAND: return "iceland";
|
||||
case CHIP_CARRIZO: return "carrizo";
|
||||
case CHIP_FIJI: return "fiji";
|
||||
#if HAVE_LLVM <= 0x0307
|
||||
case CHIP_FIJI: return "tonga";
|
||||
case CHIP_STONEY: return "carrizo";
|
||||
#else
|
||||
case CHIP_FIJI: return "fiji";
|
||||
case CHIP_STONEY: return "stoney";
|
||||
#endif
|
||||
default: return "";
|
||||
|
@@ -392,6 +392,7 @@ struct r600_common_context {
|
||||
struct list_head active_nontimer_queries;
|
||||
struct list_head active_timer_queries;
|
||||
unsigned num_cs_dw_nontimer_queries_suspend;
|
||||
bool nontimer_queries_suspended_by_flush;
|
||||
unsigned num_cs_dw_timer_queries_suspend;
|
||||
/* Additional hardware info. */
|
||||
unsigned backend_mask;
|
||||
|
@@ -489,6 +489,10 @@ static void vi_texture_alloc_dcc_separate(struct r600_common_screen *rscreen,
|
||||
if (rscreen->debug_flags & DBG_NO_DCC)
|
||||
return;
|
||||
|
||||
/* TODO: DCC is broken on Stoney */
|
||||
if (rscreen->family == CHIP_STONEY)
|
||||
return;
|
||||
|
||||
rtex->dcc_buffer = (struct r600_resource *)
|
||||
r600_aligned_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT, rtex->surface.dcc_size, rtex->surface.dcc_alignment);
|
||||
|
@@ -1539,7 +1539,7 @@ void radeon_llvm_context_init(struct radeon_llvm_context * ctx)
|
||||
bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
|
||||
bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
|
||||
bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.exp2.f32";
|
||||
bld_base->op_actions[TGSI_OPCODE_EX2].intr_name = "llvm.AMDIL.exp.";
|
||||
bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
|
||||
bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
|
||||
|
@@ -958,6 +958,8 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
|
||||
dec->msg->body.decode.db_pitch = dec->base.width;
|
||||
|
||||
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
|
||||
if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
|
||||
dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2;
|
||||
|
||||
switch (u_reduce_video_profile(picture->profile)) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
|
||||
|
@@ -394,7 +394,10 @@ struct ruvd_msg {
|
||||
uint32_t dt_chroma_top_offset;
|
||||
uint32_t dt_chroma_bottom_offset;
|
||||
uint32_t dt_surf_tile_config;
|
||||
uint32_t dt_reserved[3];
|
||||
uint32_t dt_uv_surf_tile_config;
|
||||
// re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney
|
||||
uint32_t dt_wa_chroma_top_offset;
|
||||
uint32_t dt_wa_chroma_bottom_offset;
|
||||
|
||||
uint32_t reserved[16];
|
||||
|
||||
|
@@ -389,6 +389,11 @@ struct pipe_video_codec *rvce_create_encoder(struct pipe_context *context,
|
||||
struct radeon_surf *tmp_surf;
|
||||
unsigned cpb_size;
|
||||
|
||||
if (rscreen->info.family == CHIP_STONEY) {
|
||||
RVID_ERR("Stoney VCE is not supported!\n");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (!rscreen->info.vce_fw_version) {
|
||||
RVID_ERR("Kernel doesn't supports VCE!\n");
|
||||
return NULL;
|
||||
|
@@ -34,11 +34,6 @@
|
||||
|
||||
#define MAX_GLOBAL_BUFFERS 20
|
||||
|
||||
/* XXX: Even though we don't pass the scratch buffer via user sgprs any more
|
||||
* LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
|
||||
* with older mesa. */
|
||||
#define NUM_USER_SGPRS 4
|
||||
|
||||
struct si_compute {
|
||||
struct si_context *ctx;
|
||||
|
||||
@@ -238,7 +233,6 @@ static void si_launch_grid(
|
||||
uint64_t kernel_args_va;
|
||||
uint64_t scratch_buffer_va = 0;
|
||||
uint64_t shader_va;
|
||||
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
|
||||
unsigned i;
|
||||
struct si_shader *shader = &program->shader;
|
||||
unsigned lds_blocks;
|
||||
@@ -366,20 +360,7 @@ static void si_launch_grid(
|
||||
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, shader_va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
|
||||
/* We always use at least 3 VGPRS, these come from
|
||||
* TIDIG_COMP_CNT.
|
||||
* XXX: The compiler should account for this.
|
||||
*/
|
||||
S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4)
|
||||
/* We always use at least 4 + arg_user_sgpr_count. The 4 extra
|
||||
* sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN
|
||||
* XXX: The compiler should account for this.
|
||||
*/
|
||||
| S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
|
||||
shader->num_sgprs)) - 1) / 8)
|
||||
| S_00B028_FLOAT_MODE(shader->float_mode))
|
||||
;
|
||||
si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->rsrc1);
|
||||
|
||||
lds_blocks = shader->lds_size;
|
||||
/* XXX: We are over allocating LDS. For SI, the shader reports LDS in
|
||||
@@ -395,17 +376,10 @@ static void si_launch_grid(
|
||||
|
||||
assert(lds_blocks <= 0xFF);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
|
||||
S_00B84C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0)
|
||||
| S_00B84C_USER_SGPR(arg_user_sgpr_count)
|
||||
| S_00B84C_TGID_X_EN(1)
|
||||
| S_00B84C_TGID_Y_EN(1)
|
||||
| S_00B84C_TGID_Z_EN(1)
|
||||
| S_00B84C_TG_SIZE_EN(1)
|
||||
| S_00B84C_TIDIG_COMP_CNT(2)
|
||||
| S_00B84C_LDS_SIZE(lds_blocks)
|
||||
| S_00B84C_EXCP_EN(0))
|
||||
;
|
||||
shader->rsrc2 &= C_00B84C_LDS_SIZE;
|
||||
shader->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->rsrc2);
|
||||
si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
|
||||
|
@@ -632,7 +632,7 @@ void si_check_vm_faults(struct si_context *sctx)
|
||||
/* Use conservative timeout 800ms, after which we won't wait any
|
||||
* longer and assume the GPU is hung.
|
||||
*/
|
||||
screen->fence_finish(screen, sctx->last_gfx_fence, 800*1000*1000);
|
||||
sctx->b.ws->fence_wait(sctx->b.ws, sctx->last_gfx_fence, 800*1000*1000);
|
||||
|
||||
if (!si_vm_fault_occured(sctx, &addr))
|
||||
return;
|
||||
|
@@ -594,6 +594,14 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
|
||||
lp_build_const_int32(gallivm, swizzle));
|
||||
|
||||
value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
|
||||
if (type == TGSI_TYPE_DOUBLE) {
|
||||
LLVMValueRef value2;
|
||||
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
|
||||
lp_build_const_int32(gallivm, swizzle + 1));
|
||||
value2 = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
|
||||
return radeon_llvm_emit_fetch_double(bld_base, value, value2);
|
||||
}
|
||||
|
||||
return LLVMBuildBitCast(gallivm->builder, value,
|
||||
tgsi2llvmtype(bld_base, type), "");
|
||||
}
|
||||
@@ -733,6 +741,7 @@ static LLVMValueRef fetch_input_gs(
|
||||
unsigned semantic_name = info->input_semantic_name[reg->Register.Index];
|
||||
unsigned semantic_index = info->input_semantic_index[reg->Register.Index];
|
||||
unsigned param;
|
||||
LLVMValueRef value;
|
||||
|
||||
if (swizzle != ~0 && semantic_name == TGSI_SEMANTIC_PRIMID)
|
||||
return get_primitive_id(bld_base, swizzle);
|
||||
@@ -774,11 +783,22 @@ static LLVMValueRef fetch_input_gs(
|
||||
args[7] = uint->zero; /* SLC */
|
||||
args[8] = uint->zero; /* TFE */
|
||||
|
||||
value = lp_build_intrinsic(gallivm->builder,
|
||||
"llvm.SI.buffer.load.dword.i32.i32",
|
||||
i32, args, 9,
|
||||
LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
|
||||
if (type == TGSI_TYPE_DOUBLE) {
|
||||
LLVMValueRef value2;
|
||||
args[2] = lp_build_const_int32(gallivm, (param * 4 + swizzle + 1) * 256);
|
||||
value2 = lp_build_intrinsic(gallivm->builder,
|
||||
"llvm.SI.buffer.load.dword.i32.i32",
|
||||
i32, args, 9,
|
||||
LLVMReadOnlyAttribute | LLVMNoUnwindAttribute);
|
||||
return radeon_llvm_emit_fetch_double(bld_base,
|
||||
value, value2);
|
||||
}
|
||||
return LLVMBuildBitCast(gallivm->builder,
|
||||
lp_build_intrinsic(gallivm->builder,
|
||||
"llvm.SI.buffer.load.dword.i32.i32",
|
||||
i32, args, 9,
|
||||
LLVMReadOnlyAttribute | LLVMNoUnwindAttribute),
|
||||
value,
|
||||
tgsi2llvmtype(bld_base, type), "");
|
||||
}
|
||||
|
||||
@@ -3745,12 +3765,14 @@ void si_shader_binary_read_config(const struct si_screen *sscreen,
|
||||
shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
|
||||
shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
|
||||
shader->float_mode = G_00B028_FLOAT_MODE(value);
|
||||
shader->rsrc1 = value;
|
||||
break;
|
||||
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
|
||||
shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
|
||||
break;
|
||||
case R_00B84C_COMPUTE_PGM_RSRC2:
|
||||
shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
|
||||
shader->rsrc2 = value;
|
||||
break;
|
||||
case R_0286CC_SPI_PS_INPUT_ENA:
|
||||
shader->spi_ps_input_ena = value;
|
||||
|
@@ -290,8 +290,8 @@ struct si_shader {
|
||||
bool is_gs_copy_shader;
|
||||
bool dx10_clamp_mode; /* convert NaNs to 0 */
|
||||
|
||||
unsigned ls_rsrc1;
|
||||
unsigned ls_rsrc2;
|
||||
unsigned rsrc1;
|
||||
unsigned rsrc2;
|
||||
};
|
||||
|
||||
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
|
||||
|
@@ -163,7 +163,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
|
||||
|
||||
lds_size = output_patch0_offset + output_patch_size * *num_patches;
|
||||
ls_rsrc2 = ls->current->ls_rsrc2;
|
||||
ls_rsrc2 = ls->current->rsrc2;
|
||||
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
assert(lds_size <= 65536);
|
||||
@@ -178,7 +178,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
|
||||
radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
|
||||
radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
|
||||
radeon_emit(cs, ls->current->ls_rsrc1);
|
||||
radeon_emit(cs, ls->current->rsrc1);
|
||||
radeon_emit(cs, ls_rsrc2);
|
||||
|
||||
/* Compute userdata SGPRs. */
|
||||
|
@@ -121,11 +121,11 @@ static void si_shader_ls(struct si_shader *shader)
|
||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
|
||||
|
||||
shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
|
||||
shader->rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
|
||||
S_00B528_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt) |
|
||||
S_00B528_DX10_CLAMP(shader->dx10_clamp_mode);
|
||||
shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
|
||||
shader->rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
|
||||
}
|
||||
|
||||
|
@@ -6,8 +6,4 @@ TARGET_LIB_DEPS += \
|
||||
$(top_builddir)/src/gallium/winsys/vc4/drm/libvc4drm.la \
|
||||
$(top_builddir)/src/gallium/drivers/vc4/libvc4.la
|
||||
|
||||
if USE_VC4_SIMULATOR
|
||||
TARGET_CPPFLAGS += -DUSE_VC4_SIMULATOR
|
||||
endif
|
||||
|
||||
endif
|
||||
|
@@ -23,7 +23,6 @@ include Makefile.sources
|
||||
include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
if USE_VC4_SIMULATOR
|
||||
SIM_CFLAGS = -DUSE_VC4_SIMULATOR=1
|
||||
SIM_LDFLAGS = -lsimpenrose
|
||||
endif
|
||||
|
||||
|
@@ -21,6 +21,7 @@ C_SOURCES := \
|
||||
vc4_job.c \
|
||||
vc4_nir_lower_blend.c \
|
||||
vc4_nir_lower_io.c \
|
||||
vc4_nir_lower_txf_ms.c \
|
||||
vc4_opt_algebraic.c \
|
||||
vc4_opt_constant_folding.c \
|
||||
vc4_opt_copy_propagation.c \
|
||||
|
@@ -121,6 +121,11 @@ enum vc4_packet {
|
||||
#define VC4_PACKET_TILE_COORDINATES_SIZE 3
|
||||
#define VC4_PACKET_GEM_HANDLES_SIZE 9
|
||||
|
||||
/* Number of multisamples supported. */
|
||||
#define VC4_MAX_SAMPLES 4
|
||||
/* Size of a full resolution color or Z tile buffer load/store. */
|
||||
#define VC4_TILE_BUFFER_SIZE (64 * 64 * 4)
|
||||
|
||||
#define VC4_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low))
|
||||
/* Using the GNU statement expression extension */
|
||||
#define VC4_SET_FIELD(value, field) \
|
||||
@@ -151,6 +156,16 @@ enum vc4_packet {
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0)
|
||||
|
||||
/** @{
|
||||
*
|
||||
* low bits of VC4_PACKET_STORE_FULL_RES_TILE_BUFFER and
|
||||
* VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER.
|
||||
*/
|
||||
#define VC4_LOADSTORE_FULL_RES_EOF (1 << 3)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL (1 << 2)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_ZS (1 << 1)
|
||||
#define VC4_LOADSTORE_FULL_RES_DISABLE_COLOR (1 << 0)
|
||||
|
||||
/** @{
|
||||
*
|
||||
* byte 2 of VC4_PACKET_STORE_TILE_BUFFER_GENERAL and
|
||||
|
@@ -36,9 +36,11 @@
|
||||
|
||||
struct vc4_rcl_setup {
|
||||
struct drm_gem_cma_object *color_read;
|
||||
struct drm_gem_cma_object *color_ms_write;
|
||||
struct drm_gem_cma_object *color_write;
|
||||
struct drm_gem_cma_object *zs_read;
|
||||
struct drm_gem_cma_object *zs_write;
|
||||
struct drm_gem_cma_object *msaa_color_write;
|
||||
struct drm_gem_cma_object *msaa_zs_write;
|
||||
|
||||
struct drm_gem_cma_object *rcl;
|
||||
u32 next_offset;
|
||||
@@ -62,7 +64,6 @@ static inline void rcl_u32(struct vc4_rcl_setup *setup, u32 val)
|
||||
setup->next_offset += 4;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Emits a no-op STORE_TILE_BUFFER_GENERAL.
|
||||
*
|
||||
@@ -81,6 +82,22 @@ static void vc4_store_before_load(struct vc4_rcl_setup *setup)
|
||||
rcl_u32(setup, 0); /* no address, since we're in None mode */
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculates the physical address of the start of a tile in a RCL surface.
|
||||
*
|
||||
* Unlike the other load/store packets,
|
||||
* VC4_PACKET_LOAD/STORE_FULL_RES_TILE_BUFFER don't look at the tile
|
||||
* coordinates packet, and instead just store to the address given.
|
||||
*/
|
||||
static uint32_t vc4_full_res_offset(struct vc4_exec_info *exec,
|
||||
struct drm_gem_cma_object *bo,
|
||||
struct drm_vc4_submit_rcl_surface *surf,
|
||||
uint8_t x, uint8_t y)
|
||||
{
|
||||
return bo->paddr + surf->offset + VC4_TILE_BUFFER_SIZE *
|
||||
(DIV_ROUND_UP(exec->args->width, 32) * y + x);
|
||||
}
|
||||
|
||||
/*
|
||||
* Emits a PACKET_TILE_COORDINATES if one isn't already pending.
|
||||
*
|
||||
@@ -108,22 +125,41 @@ static void emit_tile(struct vc4_exec_info *exec,
|
||||
* may be outstanding at a time.
|
||||
*/
|
||||
if (setup->color_read) {
|
||||
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
|
||||
rcl_u16(setup, args->color_read.bits);
|
||||
rcl_u32(setup,
|
||||
setup->color_read->paddr + args->color_read.offset);
|
||||
if (args->color_read.flags &
|
||||
VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
|
||||
rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
|
||||
rcl_u32(setup,
|
||||
vc4_full_res_offset(exec, setup->color_read,
|
||||
&args->color_read, x, y) |
|
||||
VC4_LOADSTORE_FULL_RES_DISABLE_ZS);
|
||||
} else {
|
||||
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
|
||||
rcl_u16(setup, args->color_read.bits);
|
||||
rcl_u32(setup, setup->color_read->paddr +
|
||||
args->color_read.offset);
|
||||
}
|
||||
}
|
||||
|
||||
if (setup->zs_read) {
|
||||
if (setup->color_read) {
|
||||
/* Exec previous load. */
|
||||
vc4_tile_coordinates(setup, x, y);
|
||||
vc4_store_before_load(setup);
|
||||
}
|
||||
if (args->zs_read.flags &
|
||||
VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
|
||||
rcl_u8(setup, VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER);
|
||||
rcl_u32(setup,
|
||||
vc4_full_res_offset(exec, setup->zs_read,
|
||||
&args->zs_read, x, y) |
|
||||
VC4_LOADSTORE_FULL_RES_DISABLE_COLOR);
|
||||
} else {
|
||||
if (setup->color_read) {
|
||||
/* Exec previous load. */
|
||||
vc4_tile_coordinates(setup, x, y);
|
||||
vc4_store_before_load(setup);
|
||||
}
|
||||
|
||||
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
|
||||
rcl_u16(setup, args->zs_read.bits);
|
||||
rcl_u32(setup, setup->zs_read->paddr + args->zs_read.offset);
|
||||
rcl_u8(setup, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
|
||||
rcl_u16(setup, args->zs_read.bits);
|
||||
rcl_u32(setup, setup->zs_read->paddr +
|
||||
args->zs_read.offset);
|
||||
}
|
||||
}
|
||||
|
||||
/* Clipping depends on tile coordinates having been
|
||||
@@ -144,20 +180,60 @@ static void emit_tile(struct vc4_exec_info *exec,
|
||||
(y * exec->bin_tiles_x + x) * 32));
|
||||
}
|
||||
|
||||
if (setup->msaa_color_write) {
|
||||
bool last_tile_write = (!setup->msaa_zs_write &&
|
||||
!setup->zs_write &&
|
||||
!setup->color_write);
|
||||
uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_ZS;
|
||||
|
||||
if (!last_tile_write)
|
||||
bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
|
||||
else if (last)
|
||||
bits |= VC4_LOADSTORE_FULL_RES_EOF;
|
||||
rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
|
||||
rcl_u32(setup,
|
||||
vc4_full_res_offset(exec, setup->msaa_color_write,
|
||||
&args->msaa_color_write, x, y) |
|
||||
bits);
|
||||
}
|
||||
|
||||
if (setup->msaa_zs_write) {
|
||||
bool last_tile_write = (!setup->zs_write &&
|
||||
!setup->color_write);
|
||||
uint32_t bits = VC4_LOADSTORE_FULL_RES_DISABLE_COLOR;
|
||||
|
||||
if (setup->msaa_color_write)
|
||||
vc4_tile_coordinates(setup, x, y);
|
||||
if (!last_tile_write)
|
||||
bits |= VC4_LOADSTORE_FULL_RES_DISABLE_CLEAR_ALL;
|
||||
else if (last)
|
||||
bits |= VC4_LOADSTORE_FULL_RES_EOF;
|
||||
rcl_u8(setup, VC4_PACKET_STORE_FULL_RES_TILE_BUFFER);
|
||||
rcl_u32(setup,
|
||||
vc4_full_res_offset(exec, setup->msaa_zs_write,
|
||||
&args->msaa_zs_write, x, y) |
|
||||
bits);
|
||||
}
|
||||
|
||||
if (setup->zs_write) {
|
||||
bool last_tile_write = !setup->color_write;
|
||||
|
||||
if (setup->msaa_color_write || setup->msaa_zs_write)
|
||||
vc4_tile_coordinates(setup, x, y);
|
||||
|
||||
rcl_u8(setup, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
|
||||
rcl_u16(setup, args->zs_write.bits |
|
||||
(setup->color_ms_write ?
|
||||
VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR : 0));
|
||||
(last_tile_write ?
|
||||
0 : VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR));
|
||||
rcl_u32(setup,
|
||||
(setup->zs_write->paddr + args->zs_write.offset) |
|
||||
((last && !setup->color_ms_write) ?
|
||||
((last && last_tile_write) ?
|
||||
VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
|
||||
}
|
||||
|
||||
if (setup->color_ms_write) {
|
||||
if (setup->zs_write) {
|
||||
/* Reset after previous store */
|
||||
if (setup->color_write) {
|
||||
if (setup->msaa_color_write || setup->msaa_zs_write ||
|
||||
setup->zs_write) {
|
||||
vc4_tile_coordinates(setup, x, y);
|
||||
}
|
||||
|
||||
@@ -192,14 +268,26 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
|
||||
}
|
||||
|
||||
if (setup->color_read) {
|
||||
loop_body_size += (VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE);
|
||||
if (args->color_read.flags &
|
||||
VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
|
||||
loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
|
||||
} else {
|
||||
loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
|
||||
}
|
||||
}
|
||||
if (setup->zs_read) {
|
||||
if (setup->color_read) {
|
||||
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
|
||||
loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
|
||||
if (args->zs_read.flags &
|
||||
VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
|
||||
loop_body_size += VC4_PACKET_LOAD_FULL_RES_TILE_BUFFER_SIZE;
|
||||
} else {
|
||||
if (setup->color_read &&
|
||||
!(args->color_read.flags &
|
||||
VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES)) {
|
||||
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
|
||||
loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
|
||||
}
|
||||
loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
|
||||
}
|
||||
loop_body_size += VC4_PACKET_LOAD_TILE_BUFFER_GENERAL_SIZE;
|
||||
}
|
||||
|
||||
if (has_bin) {
|
||||
@@ -207,13 +295,23 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
|
||||
loop_body_size += VC4_PACKET_BRANCH_TO_SUB_LIST_SIZE;
|
||||
}
|
||||
|
||||
if (setup->msaa_color_write)
|
||||
loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
|
||||
if (setup->msaa_zs_write)
|
||||
loop_body_size += VC4_PACKET_STORE_FULL_RES_TILE_BUFFER_SIZE;
|
||||
|
||||
if (setup->zs_write)
|
||||
loop_body_size += VC4_PACKET_STORE_TILE_BUFFER_GENERAL_SIZE;
|
||||
if (setup->color_ms_write) {
|
||||
if (setup->zs_write)
|
||||
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE;
|
||||
if (setup->color_write)
|
||||
loop_body_size += VC4_PACKET_STORE_MS_TILE_BUFFER_SIZE;
|
||||
}
|
||||
|
||||
/* We need a VC4_PACKET_TILE_COORDINATES in between each store. */
|
||||
loop_body_size += VC4_PACKET_TILE_COORDINATES_SIZE *
|
||||
((setup->msaa_color_write != NULL) +
|
||||
(setup->msaa_zs_write != NULL) +
|
||||
(setup->color_write != NULL) +
|
||||
(setup->zs_write != NULL) - 1);
|
||||
|
||||
size += xtiles * ytiles * loop_body_size;
|
||||
|
||||
setup->rcl = drm_gem_cma_create(dev, size);
|
||||
@@ -224,13 +322,12 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
|
||||
|
||||
rcl_u8(setup, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
|
||||
rcl_u32(setup,
|
||||
(setup->color_ms_write ?
|
||||
(setup->color_ms_write->paddr +
|
||||
args->color_ms_write.offset) :
|
||||
(setup->color_write ? (setup->color_write->paddr +
|
||||
args->color_write.offset) :
|
||||
0));
|
||||
rcl_u16(setup, args->width);
|
||||
rcl_u16(setup, args->height);
|
||||
rcl_u16(setup, args->color_ms_write.bits);
|
||||
rcl_u16(setup, args->color_write.bits);
|
||||
|
||||
/* The tile buffer gets cleared when the previous tile is stored. If
|
||||
* the clear values changed between frames, then the tile buffer has
|
||||
@@ -255,6 +352,7 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
|
||||
for (x = min_x_tile; x <= max_x_tile; x++) {
|
||||
bool first = (x == min_x_tile && y == min_y_tile);
|
||||
bool last = (x == max_x_tile && y == max_y_tile);
|
||||
|
||||
emit_tile(exec, setup, x, y, first, last);
|
||||
}
|
||||
}
|
||||
@@ -266,6 +364,56 @@ static int vc4_create_rcl_bo(struct drm_device *dev, struct vc4_exec_info *exec,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vc4_full_res_bounds_check(struct vc4_exec_info *exec,
|
||||
struct drm_gem_cma_object *obj,
|
||||
struct drm_vc4_submit_rcl_surface *surf)
|
||||
{
|
||||
struct drm_vc4_submit_cl *args = exec->args;
|
||||
u32 render_tiles_stride = DIV_ROUND_UP(exec->args->width, 32);
|
||||
|
||||
if (surf->offset > obj->base.size) {
|
||||
DRM_ERROR("surface offset %d > BO size %zd\n",
|
||||
surf->offset, obj->base.size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((obj->base.size - surf->offset) / VC4_TILE_BUFFER_SIZE <
|
||||
render_tiles_stride * args->max_y_tile + args->max_x_tile) {
|
||||
DRM_ERROR("MSAA tile %d, %d out of bounds "
|
||||
"(bo size %zd, offset %d).\n",
|
||||
args->max_x_tile, args->max_y_tile,
|
||||
obj->base.size,
|
||||
surf->offset);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int vc4_rcl_msaa_surface_setup(struct vc4_exec_info *exec,
|
||||
struct drm_gem_cma_object **obj,
|
||||
struct drm_vc4_submit_rcl_surface *surf)
|
||||
{
|
||||
if (surf->flags != 0 || surf->bits != 0) {
|
||||
DRM_ERROR("MSAA surface had nonzero flags/bits\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (surf->hindex == ~0)
|
||||
return 0;
|
||||
|
||||
*obj = vc4_use_bo(exec, surf->hindex);
|
||||
if (!*obj)
|
||||
return -EINVAL;
|
||||
|
||||
if (surf->offset & 0xf) {
|
||||
DRM_ERROR("MSAA write must be 16b aligned.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return vc4_full_res_bounds_check(exec, *obj, surf);
|
||||
}
|
||||
|
||||
static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
|
||||
struct drm_gem_cma_object **obj,
|
||||
struct drm_vc4_submit_rcl_surface *surf)
|
||||
@@ -277,9 +425,10 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
|
||||
uint8_t format = VC4_GET_FIELD(surf->bits,
|
||||
VC4_LOADSTORE_TILE_BUFFER_FORMAT);
|
||||
int cpp;
|
||||
int ret;
|
||||
|
||||
if (surf->pad != 0) {
|
||||
DRM_ERROR("Padding unset\n");
|
||||
if (surf->flags & ~VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
|
||||
DRM_ERROR("Extra flags set\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -290,6 +439,25 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
|
||||
if (!*obj)
|
||||
return -EINVAL;
|
||||
|
||||
if (surf->flags & VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES) {
|
||||
if (surf == &exec->args->zs_write) {
|
||||
DRM_ERROR("general zs write may not be a full-res.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (surf->bits != 0) {
|
||||
DRM_ERROR("load/store general bits set with "
|
||||
"full res load/store.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = vc4_full_res_bounds_check(exec, *obj, surf);
|
||||
if (!ret)
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (surf->bits & ~(VC4_LOADSTORE_TILE_BUFFER_TILING_MASK |
|
||||
VC4_LOADSTORE_TILE_BUFFER_BUFFER_MASK |
|
||||
VC4_LOADSTORE_TILE_BUFFER_FORMAT_MASK)) {
|
||||
@@ -341,9 +509,10 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
|
||||
}
|
||||
|
||||
static int
|
||||
vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
|
||||
struct drm_gem_cma_object **obj,
|
||||
struct drm_vc4_submit_rcl_surface *surf)
|
||||
vc4_rcl_render_config_surface_setup(struct vc4_exec_info *exec,
|
||||
struct vc4_rcl_setup *setup,
|
||||
struct drm_gem_cma_object **obj,
|
||||
struct drm_vc4_submit_rcl_surface *surf)
|
||||
{
|
||||
uint8_t tiling = VC4_GET_FIELD(surf->bits,
|
||||
VC4_RENDER_CONFIG_MEMORY_FORMAT);
|
||||
@@ -351,13 +520,15 @@ vc4_rcl_ms_surface_setup(struct vc4_exec_info *exec,
|
||||
VC4_RENDER_CONFIG_FORMAT);
|
||||
int cpp;
|
||||
|
||||
if (surf->pad != 0) {
|
||||
DRM_ERROR("Padding unset\n");
|
||||
if (surf->flags != 0) {
|
||||
DRM_ERROR("No flags supported on render config.\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (surf->bits & ~(VC4_RENDER_CONFIG_MEMORY_FORMAT_MASK |
|
||||
VC4_RENDER_CONFIG_FORMAT_MASK)) {
|
||||
VC4_RENDER_CONFIG_FORMAT_MASK |
|
||||
VC4_RENDER_CONFIG_MS_MODE_4X |
|
||||
VC4_RENDER_CONFIG_DECIMATE_MODE_4X)) {
|
||||
DRM_ERROR("Unknown bits in render config: 0x%04x\n",
|
||||
surf->bits);
|
||||
return -EINVAL;
|
||||
@@ -414,18 +585,20 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
|
||||
if (has_bin &&
|
||||
(args->max_x_tile > exec->bin_tiles_x ||
|
||||
args->max_y_tile > exec->bin_tiles_y)) {
|
||||
DRM_ERROR("Render tiles (%d,%d) outside of bin config (%d,%d)\n",
|
||||
DRM_ERROR("Render tiles (%d,%d) outside of bin config "
|
||||
"(%d,%d)\n",
|
||||
args->max_x_tile, args->max_y_tile,
|
||||
exec->bin_tiles_x, exec->bin_tiles_y);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
|
||||
ret = vc4_rcl_render_config_surface_setup(exec, &setup,
|
||||
&setup.color_write,
|
||||
&args->color_write);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = vc4_rcl_ms_surface_setup(exec, &setup.color_ms_write,
|
||||
&args->color_ms_write);
|
||||
ret = vc4_rcl_surface_setup(exec, &setup.color_read, &args->color_read);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -437,10 +610,21 @@ int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_color_write,
|
||||
&args->msaa_color_write);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = vc4_rcl_msaa_surface_setup(exec, &setup.msaa_zs_write,
|
||||
&args->msaa_zs_write);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/* We shouldn't even have the job submitted to us if there's no
|
||||
* surface to write out.
|
||||
*/
|
||||
if (!setup.color_ms_write && !setup.zs_write) {
|
||||
if (!setup.color_write && !setup.zs_write &&
|
||||
!setup.msaa_color_write && !setup.msaa_zs_write) {
|
||||
DRM_ERROR("RCL requires color or Z/S write\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -47,7 +47,6 @@
|
||||
void *validated, \
|
||||
void *untrusted
|
||||
|
||||
|
||||
/** Return the width in pixels of a 64-byte microtile. */
|
||||
static uint32_t
|
||||
utile_width(int cpp)
|
||||
@@ -191,7 +190,7 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
|
||||
|
||||
if (size + offset < size ||
|
||||
size + offset > fbo->base.size) {
|
||||
DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %d)\n",
|
||||
DRM_ERROR("Overflow in %dx%d (%dx%d) fbo size (%d + %d > %zd)\n",
|
||||
width, height,
|
||||
aligned_width, aligned_height,
|
||||
size, offset, fbo->base.size);
|
||||
@@ -201,7 +200,6 @@ vc4_check_tex_size(struct vc4_exec_info *exec, struct drm_gem_cma_object *fbo,
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
validate_flush(VALIDATE_ARGS)
|
||||
{
|
||||
@@ -270,7 +268,7 @@ validate_indexed_prim_list(VALIDATE_ARGS)
|
||||
|
||||
if (offset > ib->base.size ||
|
||||
(ib->base.size - offset) / index_size < length) {
|
||||
DRM_ERROR("IB access overflow (%d + %d*%d > %d)\n",
|
||||
DRM_ERROR("IB access overflow (%d + %d*%d > %zd)\n",
|
||||
offset, length, index_size, ib->base.size);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -361,9 +359,8 @@ validate_tile_binning_config(VALIDATE_ARGS)
|
||||
}
|
||||
|
||||
if (flags & (VC4_BIN_CONFIG_DB_NON_MS |
|
||||
VC4_BIN_CONFIG_TILE_BUFFER_64BIT |
|
||||
VC4_BIN_CONFIG_MS_MODE_4X)) {
|
||||
DRM_ERROR("unsupported bining config flags 0x%02x\n", flags);
|
||||
VC4_BIN_CONFIG_TILE_BUFFER_64BIT)) {
|
||||
DRM_ERROR("unsupported binning config flags 0x%02x\n", flags);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@@ -424,8 +421,8 @@ validate_gem_handles(VALIDATE_ARGS)
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define VC4_DEFINE_PACKET(packet, name, func) \
|
||||
[packet] = { packet ## _SIZE, name, func }
|
||||
#define VC4_DEFINE_PACKET(packet, func) \
|
||||
[packet] = { packet ## _SIZE, #packet, func }
|
||||
|
||||
static const struct cmd_info {
|
||||
uint16_t len;
|
||||
@@ -433,42 +430,42 @@ static const struct cmd_info {
|
||||
int (*func)(struct vc4_exec_info *exec, void *validated,
|
||||
void *untrusted);
|
||||
} cmd_info[] = {
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_HALT, "halt", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_NOP, "nop", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, "flush", validate_flush),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, "flush all state", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING, "start tile binning", validate_start_tile_binning),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE, "increment semaphore", validate_increment_semaphore),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_HALT, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_NOP, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_FLUSH, validate_flush),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_FLUSH_ALL, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_START_TILE_BINNING,
|
||||
validate_start_tile_binning),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_INCREMENT_SEMAPHORE,
|
||||
validate_increment_semaphore),
|
||||
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE, "Indexed Primitive List", validate_indexed_prim_list),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GL_INDEXED_PRIMITIVE,
|
||||
validate_indexed_prim_list),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE,
|
||||
validate_gl_array_primitive),
|
||||
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GL_ARRAY_PRIMITIVE, "Vertex Array Primitives", validate_gl_array_primitive),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, NULL),
|
||||
|
||||
/* This is only used by clipped primitives (packets 48 and 49), which
|
||||
* we don't support parsing yet.
|
||||
*/
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_PRIMITIVE_LIST_FORMAT, "primitive list format", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, validate_gl_shader_state),
|
||||
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GL_SHADER_STATE, "GL Shader State", validate_gl_shader_state),
|
||||
/* We don't support validating NV shader states. */
|
||||
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, "configuration bits", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, "flat shade flags", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, "point size", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, "line width", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, "RHT X boundary", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, "Depth Offset", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, "Clip Window", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, "Viewport Offset", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, "Clipper XY Scaling", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CONFIGURATION_BITS, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_FLAT_SHADE_FLAGS, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_POINT_SIZE, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_LINE_WIDTH, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_RHT_X_BOUNDARY, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_DEPTH_OFFSET, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CLIP_WINDOW, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_VIEWPORT_OFFSET, NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_XY_SCALING, NULL),
|
||||
/* Note: The docs say this was also 105, but it was 106 in the
|
||||
* initial userland code drop.
|
||||
*/
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, "Clipper Z Scale and Offset", NULL),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_CLIPPER_Z_SCALING, NULL),
|
||||
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG, "tile binning configuration", validate_tile_binning_config),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_TILE_BINNING_MODE_CONFIG,
|
||||
validate_tile_binning_config),
|
||||
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, "GEM handles", validate_gem_handles),
|
||||
VC4_DEFINE_PACKET(VC4_PACKET_GEM_HANDLES, validate_gem_handles),
|
||||
};
|
||||
|
||||
int
|
||||
@@ -500,11 +497,6 @@ vc4_validate_bin_cl(struct drm_device *dev,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
DRM_INFO("0x%08x: packet %d (%s) size %d processing...\n",
|
||||
src_offset, cmd, info->name, info->len);
|
||||
#endif
|
||||
|
||||
if (src_offset + info->len > len) {
|
||||
DRM_ERROR("0x%08x: packet %d (%s) length 0x%08x "
|
||||
"exceeds bounds (0x%08x)\n",
|
||||
@@ -519,8 +511,7 @@ vc4_validate_bin_cl(struct drm_device *dev,
|
||||
if (info->func && info->func(exec,
|
||||
dst_pkt + 1,
|
||||
src_pkt + 1)) {
|
||||
DRM_ERROR("0x%08x: packet %d (%s) failed to "
|
||||
"validate\n",
|
||||
DRM_ERROR("0x%08x: packet %d (%s) failed to validate\n",
|
||||
src_offset, cmd, info->name);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -588,12 +579,14 @@ reloc_tex(struct vc4_exec_info *exec,
|
||||
|
||||
if (sample->is_direct) {
|
||||
uint32_t remaining_size = tex->base.size - p0;
|
||||
|
||||
if (p0 > tex->base.size - 4) {
|
||||
DRM_ERROR("UBO offset greater than UBO size\n");
|
||||
goto fail;
|
||||
}
|
||||
if (p1 > remaining_size - 4) {
|
||||
DRM_ERROR("UBO clamp would allow reads outside of UBO\n");
|
||||
DRM_ERROR("UBO clamp would allow reads "
|
||||
"outside of UBO\n");
|
||||
goto fail;
|
||||
}
|
||||
*validated_p0 = tex->paddr + p0;
|
||||
@@ -866,7 +859,7 @@ validate_gl_shader_rec(struct drm_device *dev,
|
||||
|
||||
if (vbo->base.size < offset ||
|
||||
vbo->base.size - offset < attr_size) {
|
||||
DRM_ERROR("BO offset overflow (%d + %d > %d)\n",
|
||||
DRM_ERROR("BO offset overflow (%d + %d > %zd)\n",
|
||||
offset, attr_size, vbo->base.size);
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -875,7 +868,8 @@ validate_gl_shader_rec(struct drm_device *dev,
|
||||
max_index = ((vbo->base.size - offset - attr_size) /
|
||||
stride);
|
||||
if (state->max_index > max_index) {
|
||||
DRM_ERROR("primitives use index %d out of supplied %d\n",
|
||||
DRM_ERROR("primitives use index %d out of "
|
||||
"supplied %d\n",
|
||||
state->max_index, max_index);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
@@ -24,24 +24,16 @@
|
||||
/**
|
||||
* DOC: Shader validator for VC4.
|
||||
*
|
||||
* The VC4 has no IOMMU between it and system memory. So, a user with access
|
||||
* to execute shaders could escalate privilege by overwriting system memory
|
||||
* (using the VPM write address register in the general-purpose DMA mode) or
|
||||
* reading system memory it shouldn't (reading it as a texture, or uniform
|
||||
* data, or vertex data).
|
||||
* The VC4 has no IOMMU between it and system memory, so a user with
|
||||
* access to execute shaders could escalate privilege by overwriting
|
||||
* system memory (using the VPM write address register in the
|
||||
* general-purpose DMA mode) or reading system memory it shouldn't
|
||||
* (reading it as a texture, or uniform data, or vertex data).
|
||||
*
|
||||
* This walks over a shader starting from some offset within a BO, ensuring
|
||||
* that its accesses are appropriately bounded, and recording how many texture
|
||||
* accesses are made and where so that we can do relocations for them in the
|
||||
* This walks over a shader BO, ensuring that its accesses are
|
||||
* appropriately bounded, and recording how many texture accesses are
|
||||
* made and where so that we can do relocations for them in the
|
||||
* uniform stream.
|
||||
*
|
||||
* The kernel API has shaders stored in user-mapped BOs. The BOs will be
|
||||
* forcibly unmapped from the process before validation, and any cache of
|
||||
* validated state will be flushed if the mapping is faulted back in.
|
||||
*
|
||||
* Storing the shaders in BOs means that the validation process will be slow
|
||||
* due to uncached reads, but since shaders are long-lived and shader BOs are
|
||||
* never actually modified, this shouldn't be a problem.
|
||||
*/
|
||||
|
||||
#include "vc4_drv.h"
|
||||
@@ -71,7 +63,6 @@ waddr_to_live_reg_index(uint32_t waddr, bool is_b)
|
||||
else
|
||||
return waddr;
|
||||
} else if (waddr <= QPU_W_ACC3) {
|
||||
|
||||
return 64 + waddr - QPU_W_ACC0;
|
||||
} else {
|
||||
return ~0;
|
||||
@@ -86,15 +77,14 @@ raddr_add_a_to_live_reg_index(uint64_t inst)
|
||||
uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);
|
||||
uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);
|
||||
|
||||
if (add_a == QPU_MUX_A) {
|
||||
if (add_a == QPU_MUX_A)
|
||||
return raddr_a;
|
||||
} else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM) {
|
||||
else if (add_a == QPU_MUX_B && sig != QPU_SIG_SMALL_IMM)
|
||||
return 32 + raddr_b;
|
||||
} else if (add_a <= QPU_MUX_R3) {
|
||||
else if (add_a <= QPU_MUX_R3)
|
||||
return 64 + add_a;
|
||||
} else {
|
||||
else
|
||||
return ~0;
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -112,9 +102,9 @@ is_tmu_write(uint32_t waddr)
|
||||
}
|
||||
|
||||
static bool
|
||||
record_validated_texture_sample(struct vc4_validated_shader_info *validated_shader,
|
||||
struct vc4_shader_validation_state *validation_state,
|
||||
int tmu)
|
||||
record_texture_sample(struct vc4_validated_shader_info *validated_shader,
|
||||
struct vc4_shader_validation_state *validation_state,
|
||||
int tmu)
|
||||
{
|
||||
uint32_t s = validated_shader->num_texture_samples;
|
||||
int i;
|
||||
@@ -227,8 +217,8 @@ check_tmu_write(uint64_t inst,
|
||||
validated_shader->uniforms_size += 4;
|
||||
|
||||
if (submit) {
|
||||
if (!record_validated_texture_sample(validated_shader,
|
||||
validation_state, tmu)) {
|
||||
if (!record_texture_sample(validated_shader,
|
||||
validation_state, tmu)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -239,10 +229,10 @@ check_tmu_write(uint64_t inst,
|
||||
}
|
||||
|
||||
static bool
|
||||
check_register_write(uint64_t inst,
|
||||
struct vc4_validated_shader_info *validated_shader,
|
||||
struct vc4_shader_validation_state *validation_state,
|
||||
bool is_mul)
|
||||
check_reg_write(uint64_t inst,
|
||||
struct vc4_validated_shader_info *validated_shader,
|
||||
struct vc4_shader_validation_state *validation_state,
|
||||
bool is_mul)
|
||||
{
|
||||
uint32_t waddr = (is_mul ?
|
||||
QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
|
||||
@@ -298,7 +288,7 @@ check_register_write(uint64_t inst,
|
||||
return true;
|
||||
|
||||
case QPU_W_TLB_STENCIL_SETUP:
|
||||
return true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -361,7 +351,7 @@ track_live_clamps(uint64_t inst,
|
||||
}
|
||||
|
||||
validation_state->live_max_clamp_regs[lri_add] = true;
|
||||
} if (op_add == QPU_A_MIN) {
|
||||
} else if (op_add == QPU_A_MIN) {
|
||||
/* Track live clamps of a value clamped to a minimum of 0 and
|
||||
* a maximum of some uniform's offset.
|
||||
*/
|
||||
@@ -393,8 +383,10 @@ check_instruction_writes(uint64_t inst,
|
||||
return false;
|
||||
}
|
||||
|
||||
ok = (check_register_write(inst, validated_shader, validation_state, false) &&
|
||||
check_register_write(inst, validated_shader, validation_state, true));
|
||||
ok = (check_reg_write(inst, validated_shader, validation_state,
|
||||
false) &&
|
||||
check_reg_write(inst, validated_shader, validation_state,
|
||||
true));
|
||||
|
||||
track_live_clamps(inst, validated_shader, validation_state);
|
||||
|
||||
@@ -442,7 +434,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||
shader = shader_obj->vaddr;
|
||||
max_ip = shader_obj->base.size / sizeof(uint64_t);
|
||||
|
||||
validated_shader = kcalloc(sizeof(*validated_shader), 1, GFP_KERNEL);
|
||||
validated_shader = kcalloc(1, sizeof(*validated_shader), GFP_KERNEL);
|
||||
if (!validated_shader)
|
||||
return NULL;
|
||||
|
||||
@@ -498,7 +490,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||
|
||||
if (ip == max_ip) {
|
||||
DRM_ERROR("shader failed to terminate before "
|
||||
"shader BO end at %d\n",
|
||||
"shader BO end at %zd\n",
|
||||
shader_obj->base.size);
|
||||
goto fail;
|
||||
}
|
||||
@@ -514,6 +506,9 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||
return validated_shader;
|
||||
|
||||
fail:
|
||||
kfree(validated_shader);
|
||||
if (validated_shader) {
|
||||
kfree(validated_shader->texture_samples);
|
||||
kfree(validated_shader);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -41,24 +41,53 @@ vc4_get_blit_surface(struct pipe_context *pctx,
|
||||
return pctx->create_surface(pctx, prsc, &tmpl);
|
||||
}
|
||||
|
||||
static bool
|
||||
is_tile_unaligned(unsigned size, unsigned tile_size)
|
||||
{
|
||||
return size & (tile_size - 1);
|
||||
}
|
||||
|
||||
static bool
|
||||
vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
|
||||
{
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
bool old_msaa = vc4->msaa;
|
||||
int old_tile_width = vc4->tile_width;
|
||||
int old_tile_height = vc4->tile_height;
|
||||
bool msaa = (info->src.resource->nr_samples ||
|
||||
info->dst.resource->nr_samples);
|
||||
int tile_width = msaa ? 32 : 64;
|
||||
int tile_height = msaa ? 32 : 64;
|
||||
|
||||
if (util_format_is_depth_or_stencil(info->dst.resource->format))
|
||||
return false;
|
||||
|
||||
if (info->scissor_enable)
|
||||
return false;
|
||||
|
||||
if ((info->mask & PIPE_MASK_RGBA) == 0)
|
||||
return false;
|
||||
|
||||
if (info->dst.box.x != 0 || info->dst.box.y != 0 ||
|
||||
info->src.box.x != 0 || info->src.box.y != 0 ||
|
||||
if (info->dst.box.x != info->src.box.x ||
|
||||
info->dst.box.y != info->src.box.y ||
|
||||
info->dst.box.width != info->src.box.width ||
|
||||
info->dst.box.height != info->src.box.height) {
|
||||
return false;
|
||||
}
|
||||
|
||||
int dst_surface_width = u_minify(info->dst.resource->width0,
|
||||
info->dst.level);
|
||||
int dst_surface_height = u_minify(info->dst.resource->height0,
|
||||
info->dst.level);
|
||||
if (is_tile_unaligned(info->dst.box.x, tile_width) ||
|
||||
is_tile_unaligned(info->dst.box.y, tile_height) ||
|
||||
(is_tile_unaligned(info->dst.box.width, tile_width) &&
|
||||
info->dst.box.x + info->dst.box.width != dst_surface_width) ||
|
||||
(is_tile_unaligned(info->dst.box.height, tile_height) &&
|
||||
info->dst.box.y + info->dst.box.height != dst_surface_height)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info->dst.resource->format != info->src.resource->format)
|
||||
return false;
|
||||
|
||||
@@ -70,18 +99,32 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
|
||||
vc4_get_blit_surface(pctx, info->src.resource, info->src.level);
|
||||
|
||||
pipe_surface_reference(&vc4->color_read, src_surf);
|
||||
pipe_surface_reference(&vc4->color_write, dst_surf);
|
||||
pipe_surface_reference(&vc4->color_write,
|
||||
dst_surf->texture->nr_samples ? NULL : dst_surf);
|
||||
pipe_surface_reference(&vc4->msaa_color_write,
|
||||
dst_surf->texture->nr_samples ? dst_surf : NULL);
|
||||
pipe_surface_reference(&vc4->zs_read, NULL);
|
||||
pipe_surface_reference(&vc4->zs_write, NULL);
|
||||
vc4->draw_min_x = 0;
|
||||
vc4->draw_min_y = 0;
|
||||
vc4->draw_max_x = dst_surf->width;
|
||||
vc4->draw_max_y = dst_surf->height;
|
||||
pipe_surface_reference(&vc4->msaa_zs_write, NULL);
|
||||
|
||||
vc4->draw_min_x = info->dst.box.x;
|
||||
vc4->draw_min_y = info->dst.box.y;
|
||||
vc4->draw_max_x = info->dst.box.x + info->dst.box.width;
|
||||
vc4->draw_max_y = info->dst.box.y + info->dst.box.height;
|
||||
vc4->draw_width = dst_surf->width;
|
||||
vc4->draw_height = dst_surf->height;
|
||||
|
||||
vc4->tile_width = tile_width;
|
||||
vc4->tile_height = tile_height;
|
||||
vc4->msaa = msaa;
|
||||
vc4->needs_flush = true;
|
||||
|
||||
vc4_job_submit(vc4);
|
||||
|
||||
vc4->msaa = old_msaa;
|
||||
vc4->tile_width = old_tile_width;
|
||||
vc4->tile_height = old_tile_height;
|
||||
|
||||
pipe_surface_reference(&dst_surf, NULL);
|
||||
pipe_surface_reference(&src_surf, NULL);
|
||||
|
||||
@@ -131,14 +174,6 @@ vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
|
||||
{
|
||||
struct pipe_blit_info info = *blit_info;
|
||||
|
||||
if (info.src.resource->nr_samples > 1 &&
|
||||
info.dst.resource->nr_samples <= 1 &&
|
||||
!util_format_is_depth_or_stencil(info.src.resource->format) &&
|
||||
!util_format_is_pure_integer(info.src.resource->format)) {
|
||||
fprintf(stderr, "color resolve unimplemented\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (vc4_tile_blit(pctx, blit_info))
|
||||
return;
|
||||
|
||||
|
@@ -67,8 +67,16 @@ vc4_flush(struct pipe_context *pctx)
|
||||
cl_u8(&bcl, VC4_PACKET_FLUSH);
|
||||
cl_end(&vc4->bcl, bcl);
|
||||
|
||||
vc4->msaa = false;
|
||||
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
|
||||
pipe_surface_reference(&vc4->color_write, cbuf);
|
||||
pipe_surface_reference(&vc4->color_write,
|
||||
cbuf->texture->nr_samples ? NULL : cbuf);
|
||||
pipe_surface_reference(&vc4->msaa_color_write,
|
||||
cbuf->texture->nr_samples ? cbuf : NULL);
|
||||
|
||||
if (cbuf->texture->nr_samples)
|
||||
vc4->msaa = true;
|
||||
|
||||
if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) {
|
||||
pipe_surface_reference(&vc4->color_read, cbuf);
|
||||
} else {
|
||||
@@ -78,11 +86,21 @@ vc4_flush(struct pipe_context *pctx)
|
||||
} else {
|
||||
pipe_surface_reference(&vc4->color_write, NULL);
|
||||
pipe_surface_reference(&vc4->color_read, NULL);
|
||||
pipe_surface_reference(&vc4->msaa_color_write, NULL);
|
||||
}
|
||||
|
||||
if (vc4->framebuffer.zsbuf &&
|
||||
(vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
|
||||
pipe_surface_reference(&vc4->zs_write, zsbuf);
|
||||
pipe_surface_reference(&vc4->zs_write,
|
||||
zsbuf->texture->nr_samples ?
|
||||
NULL : zsbuf);
|
||||
pipe_surface_reference(&vc4->msaa_zs_write,
|
||||
zsbuf->texture->nr_samples ?
|
||||
zsbuf : NULL);
|
||||
|
||||
if (zsbuf->texture->nr_samples)
|
||||
vc4->msaa = true;
|
||||
|
||||
if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
|
||||
pipe_surface_reference(&vc4->zs_read, zsbuf);
|
||||
} else {
|
||||
@@ -91,6 +109,7 @@ vc4_flush(struct pipe_context *pctx)
|
||||
} else {
|
||||
pipe_surface_reference(&vc4->zs_write, NULL);
|
||||
pipe_surface_reference(&vc4->zs_read, NULL);
|
||||
pipe_surface_reference(&vc4->msaa_zs_write, NULL);
|
||||
}
|
||||
|
||||
vc4_job_submit(vc4);
|
||||
@@ -245,6 +264,8 @@ vc4_context_create(struct pipe_screen *pscreen, void *priv, unsigned flags)
|
||||
|
||||
vc4_debug |= saved_shaderdb_flag;
|
||||
|
||||
vc4->sample_mask = (1 << VC4_MAX_SAMPLES) - 1;
|
||||
|
||||
return &vc4->base;
|
||||
|
||||
fail:
|
||||
|
@@ -206,6 +206,8 @@ struct vc4_context {
|
||||
struct pipe_surface *color_write;
|
||||
struct pipe_surface *zs_read;
|
||||
struct pipe_surface *zs_write;
|
||||
struct pipe_surface *msaa_color_write;
|
||||
struct pipe_surface *msaa_zs_write;
|
||||
/** @} */
|
||||
/** @{
|
||||
* Bounding box of the scissor across all queued drawing.
|
||||
@@ -224,6 +226,15 @@ struct vc4_context {
|
||||
uint32_t draw_width;
|
||||
uint32_t draw_height;
|
||||
/** @} */
|
||||
/** @{ Tile information, depending on MSAA and float color buffer. */
|
||||
uint32_t draw_tiles_x; /** @< Number of tiles wide for framebuffer. */
|
||||
uint32_t draw_tiles_y; /** @< Number of tiles high for framebuffer. */
|
||||
|
||||
uint32_t tile_width; /** @< Width of a tile. */
|
||||
uint32_t tile_height; /** @< Height of a tile. */
|
||||
/** Whether the current rendering is in a 4X MSAA tile buffer. */
|
||||
bool msaa;
|
||||
/** @} */
|
||||
|
||||
struct util_slab_mempool transfer_pool;
|
||||
struct blitter_context *blitter;
|
||||
|
@@ -68,21 +68,17 @@ vc4_start_draw(struct vc4_context *vc4)
|
||||
|
||||
vc4_get_draw_cl_space(vc4);
|
||||
|
||||
uint32_t width = vc4->framebuffer.width;
|
||||
uint32_t height = vc4->framebuffer.height;
|
||||
uint32_t tilew = align(width, 64) / 64;
|
||||
uint32_t tileh = align(height, 64) / 64;
|
||||
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
|
||||
|
||||
// Tile state data is 48 bytes per tile, I think it can be thrown away
|
||||
// as soon as binning is finished.
|
||||
cl_u8(&bcl, VC4_PACKET_TILE_BINNING_MODE_CONFIG);
|
||||
cl_u32(&bcl, 0); /* tile alloc addr, filled by kernel */
|
||||
cl_u32(&bcl, 0); /* tile alloc size, filled by kernel */
|
||||
cl_u32(&bcl, 0); /* tile state addr, filled by kernel */
|
||||
cl_u8(&bcl, tilew);
|
||||
cl_u8(&bcl, tileh);
|
||||
cl_u8(&bcl, 0); /* flags, filled by kernel. */
|
||||
cl_u8(&bcl, vc4->draw_tiles_x);
|
||||
cl_u8(&bcl, vc4->draw_tiles_y);
|
||||
/* Other flags are filled by kernel. */
|
||||
cl_u8(&bcl, vc4->msaa ? VC4_BIN_CONFIG_MS_MODE_4X : 0);
|
||||
|
||||
/* START_TILE_BINNING resets the statechange counters in the hardware,
|
||||
* which are what is used when a primitive is binned to a tile to
|
||||
@@ -102,8 +98,8 @@ vc4_start_draw(struct vc4_context *vc4)
|
||||
|
||||
vc4->needs_flush = true;
|
||||
vc4->draw_calls_queued++;
|
||||
vc4->draw_width = width;
|
||||
vc4->draw_height = height;
|
||||
vc4->draw_width = vc4->framebuffer.width;
|
||||
vc4->draw_height = vc4->framebuffer.height;
|
||||
|
||||
cl_end(&vc4->bcl, bcl);
|
||||
}
|
||||
|
@@ -44,10 +44,13 @@ struct drm_vc4_submit_rcl_surface {
|
||||
uint32_t hindex; /* Handle index, or ~0 if not present. */
|
||||
uint32_t offset; /* Offset to start of buffer. */
|
||||
/*
|
||||
* Bits for either render config (color_ms_write) or load/store packet.
|
||||
* Bits for either render config (color_write) or load/store packet.
|
||||
* Bits should all be 0 for MSAA load/stores.
|
||||
*/
|
||||
uint16_t bits;
|
||||
uint16_t pad;
|
||||
|
||||
#define VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES (1 << 0)
|
||||
uint16_t flags;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -126,9 +129,11 @@ struct drm_vc4_submit_cl {
|
||||
uint8_t max_x_tile;
|
||||
uint8_t max_y_tile;
|
||||
struct drm_vc4_submit_rcl_surface color_read;
|
||||
struct drm_vc4_submit_rcl_surface color_ms_write;
|
||||
struct drm_vc4_submit_rcl_surface color_write;
|
||||
struct drm_vc4_submit_rcl_surface zs_read;
|
||||
struct drm_vc4_submit_rcl_surface zs_write;
|
||||
struct drm_vc4_submit_rcl_surface msaa_color_write;
|
||||
struct drm_vc4_submit_rcl_surface msaa_zs_write;
|
||||
uint32_t clear_color[2];
|
||||
uint32_t clear_z;
|
||||
uint8_t clear_s;
|
||||
|
@@ -29,17 +29,35 @@ vc4_emit_state(struct pipe_context *pctx)
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
|
||||
struct vc4_cl_out *bcl = cl_start(&vc4->bcl);
|
||||
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT)) {
|
||||
if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT |
|
||||
VC4_DIRTY_RASTERIZER)) {
|
||||
float *vpscale = vc4->viewport.scale;
|
||||
float *vptranslate = vc4->viewport.translate;
|
||||
float vp_minx = -fabsf(vpscale[0]) + vptranslate[0];
|
||||
float vp_maxx = fabsf(vpscale[0]) + vptranslate[0];
|
||||
float vp_miny = -fabsf(vpscale[1]) + vptranslate[1];
|
||||
float vp_maxy = fabsf(vpscale[1]) + vptranslate[1];
|
||||
uint32_t minx = MAX2(vc4->scissor.minx, vp_minx);
|
||||
uint32_t miny = MAX2(vc4->scissor.miny, vp_miny);
|
||||
uint32_t maxx = MIN2(vc4->scissor.maxx, vp_maxx);
|
||||
uint32_t maxy = MIN2(vc4->scissor.maxy, vp_maxy);
|
||||
|
||||
/* Clip to the scissor if it's enabled, but still clip to the
|
||||
* drawable regardless since that controls where the binner
|
||||
* tries to put things.
|
||||
*
|
||||
* Additionally, always clip the rendering to the viewport,
|
||||
* since the hardware does guardband clipping, meaning
|
||||
* primitives would rasterize outside of the view volume.
|
||||
*/
|
||||
uint32_t minx, miny, maxx, maxy;
|
||||
if (!vc4->rasterizer->base.scissor) {
|
||||
minx = MAX2(vp_minx, 0);
|
||||
miny = MAX2(vp_miny, 0);
|
||||
maxx = MIN2(vp_maxx, vc4->draw_width);
|
||||
maxy = MIN2(vp_maxy, vc4->draw_height);
|
||||
} else {
|
||||
minx = MAX2(vp_minx, vc4->scissor.minx);
|
||||
miny = MAX2(vp_miny, vc4->scissor.miny);
|
||||
maxx = MIN2(vp_maxx, vc4->scissor.maxx);
|
||||
maxy = MIN2(vp_maxy, vc4->scissor.maxy);
|
||||
}
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_CLIP_WINDOW);
|
||||
cl_u16(&bcl, minx);
|
||||
@@ -54,6 +72,20 @@ vc4_emit_state(struct pipe_context *pctx)
|
||||
}
|
||||
|
||||
if (vc4->dirty & (VC4_DIRTY_RASTERIZER | VC4_DIRTY_ZSA)) {
|
||||
uint8_t ez_enable_mask_out = ~0;
|
||||
|
||||
/* HW-2905: If the RCL ends up doing a full-res load when
|
||||
* multisampling, then early Z tracking may end up with values
|
||||
* from the previous tile due to a HW bug. Disable it to
|
||||
* avoid that.
|
||||
*
|
||||
* We should be able to skip this when the Z is cleared, but I
|
||||
* was seeing bad rendering on glxgears -samples 4 even in
|
||||
* that case.
|
||||
*/
|
||||
if (vc4->msaa)
|
||||
ez_enable_mask_out &= ~VC4_CONFIG_BITS_EARLY_Z;
|
||||
|
||||
cl_u8(&bcl, VC4_PACKET_CONFIGURATION_BITS);
|
||||
cl_u8(&bcl,
|
||||
vc4->rasterizer->config_bits[0] |
|
||||
@@ -62,8 +94,8 @@ vc4_emit_state(struct pipe_context *pctx)
|
||||
vc4->rasterizer->config_bits[1] |
|
||||
vc4->zsa->config_bits[1]);
|
||||
cl_u8(&bcl,
|
||||
vc4->rasterizer->config_bits[2] |
|
||||
vc4->zsa->config_bits[2]);
|
||||
(vc4->rasterizer->config_bits[2] |
|
||||
vc4->zsa->config_bits[2]) & ez_enable_mask_out);
|
||||
}
|
||||
|
||||
if (vc4->dirty & VC4_DIRTY_RASTERIZER) {
|
||||
|
@@ -89,31 +89,37 @@ vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
|
||||
submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
|
||||
submit_surf->offset = surf->offset;
|
||||
|
||||
if (is_depth) {
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
|
||||
VC4_LOADSTORE_TILE_BUFFER_BUFFER);
|
||||
if (psurf->texture->nr_samples == 0) {
|
||||
if (is_depth) {
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
|
||||
VC4_LOADSTORE_TILE_BUFFER_BUFFER);
|
||||
|
||||
} else {
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
|
||||
VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
|
||||
VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
|
||||
VC4_LOADSTORE_TILE_BUFFER_BGR565 :
|
||||
VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
|
||||
VC4_LOADSTORE_TILE_BUFFER_FORMAT);
|
||||
}
|
||||
submit_surf->bits |=
|
||||
VC4_SET_FIELD(surf->tiling,
|
||||
VC4_LOADSTORE_TILE_BUFFER_TILING);
|
||||
} else {
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
|
||||
VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
|
||||
VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
|
||||
VC4_LOADSTORE_TILE_BUFFER_BGR565 :
|
||||
VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
|
||||
VC4_LOADSTORE_TILE_BUFFER_FORMAT);
|
||||
assert(!is_write);
|
||||
submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
|
||||
}
|
||||
submit_surf->bits |=
|
||||
VC4_SET_FIELD(surf->tiling, VC4_LOADSTORE_TILE_BUFFER_TILING);
|
||||
|
||||
if (is_write)
|
||||
rsc->writes++;
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4,
|
||||
struct drm_vc4_submit_rcl_surface *submit_surf,
|
||||
struct pipe_surface *psurf)
|
||||
vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
|
||||
struct drm_vc4_submit_rcl_surface *submit_surf,
|
||||
struct pipe_surface *psurf)
|
||||
{
|
||||
struct vc4_surface *surf = vc4_surface(psurf);
|
||||
|
||||
@@ -126,16 +132,38 @@ vc4_submit_setup_ms_rcl_surface(struct vc4_context *vc4,
|
||||
submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
|
||||
submit_surf->offset = surf->offset;
|
||||
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
|
||||
VC4_RENDER_CONFIG_FORMAT_BGR565 :
|
||||
VC4_RENDER_CONFIG_FORMAT_RGBA8888,
|
||||
VC4_RENDER_CONFIG_FORMAT) |
|
||||
VC4_SET_FIELD(surf->tiling, VC4_RENDER_CONFIG_MEMORY_FORMAT);
|
||||
if (psurf->texture->nr_samples == 0) {
|
||||
submit_surf->bits =
|
||||
VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
|
||||
VC4_RENDER_CONFIG_FORMAT_BGR565 :
|
||||
VC4_RENDER_CONFIG_FORMAT_RGBA8888,
|
||||
VC4_RENDER_CONFIG_FORMAT) |
|
||||
VC4_SET_FIELD(surf->tiling,
|
||||
VC4_RENDER_CONFIG_MEMORY_FORMAT);
|
||||
}
|
||||
|
||||
rsc->writes++;
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_submit_setup_rcl_msaa_surface(struct vc4_context *vc4,
|
||||
struct drm_vc4_submit_rcl_surface *submit_surf,
|
||||
struct pipe_surface *psurf)
|
||||
{
|
||||
struct vc4_surface *surf = vc4_surface(psurf);
|
||||
|
||||
if (!surf) {
|
||||
submit_surf->hindex = ~0;
|
||||
return;
|
||||
}
|
||||
|
||||
struct vc4_resource *rsc = vc4_resource(psurf->texture);
|
||||
submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
|
||||
submit_surf->offset = surf->offset;
|
||||
submit_surf->bits = 0;
|
||||
rsc->writes++;
|
||||
}
|
||||
|
||||
/**
|
||||
* Submits the job to the kernel and then reinitializes it.
|
||||
*/
|
||||
@@ -150,18 +178,35 @@ vc4_job_submit(struct vc4_context *vc4)
|
||||
struct drm_vc4_submit_cl submit;
|
||||
memset(&submit, 0, sizeof(submit));
|
||||
|
||||
cl_ensure_space(&vc4->bo_handles, 4 * sizeof(uint32_t));
|
||||
cl_ensure_space(&vc4->bo_pointers, 4 * sizeof(struct vc4_bo *));
|
||||
cl_ensure_space(&vc4->bo_handles, 6 * sizeof(uint32_t));
|
||||
cl_ensure_space(&vc4->bo_pointers, 6 * sizeof(struct vc4_bo *));
|
||||
|
||||
vc4_submit_setup_rcl_surface(vc4, &submit.color_read,
|
||||
vc4->color_read, false, false);
|
||||
vc4_submit_setup_ms_rcl_surface(vc4, &submit.color_ms_write,
|
||||
vc4->color_write);
|
||||
vc4_submit_setup_rcl_render_config_surface(vc4, &submit.color_write,
|
||||
vc4->color_write);
|
||||
vc4_submit_setup_rcl_surface(vc4, &submit.zs_read,
|
||||
vc4->zs_read, true, false);
|
||||
vc4_submit_setup_rcl_surface(vc4, &submit.zs_write,
|
||||
vc4->zs_write, true, true);
|
||||
|
||||
vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_color_write,
|
||||
vc4->msaa_color_write);
|
||||
vc4_submit_setup_rcl_msaa_surface(vc4, &submit.msaa_zs_write,
|
||||
vc4->msaa_zs_write);
|
||||
|
||||
if (vc4->msaa) {
|
||||
/* This bit controls how many pixels the general
|
||||
* (i.e. subsampled) loads/stores are iterating over
|
||||
* (multisample loads replicate out to the other samples).
|
||||
*/
|
||||
submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
|
||||
/* Controls whether color_write's
|
||||
* VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
|
||||
*/
|
||||
submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
|
||||
}
|
||||
|
||||
submit.bo_handles = (uintptr_t)vc4->bo_handles.base;
|
||||
submit.bo_handle_count = cl_offset(&vc4->bo_handles) / 4;
|
||||
submit.bin_cl = (uintptr_t)vc4->bcl.base;
|
||||
@@ -173,10 +218,10 @@ vc4_job_submit(struct vc4_context *vc4)
|
||||
submit.uniforms_size = cl_offset(&vc4->uniforms);
|
||||
|
||||
assert(vc4->draw_min_x != ~0 && vc4->draw_min_y != ~0);
|
||||
submit.min_x_tile = vc4->draw_min_x / 64;
|
||||
submit.min_y_tile = vc4->draw_min_y / 64;
|
||||
submit.max_x_tile = (vc4->draw_max_x - 1) / 64;
|
||||
submit.max_y_tile = (vc4->draw_max_y - 1) / 64;
|
||||
submit.min_x_tile = vc4->draw_min_x / vc4->tile_width;
|
||||
submit.min_y_tile = vc4->draw_min_y / vc4->tile_height;
|
||||
submit.max_x_tile = (vc4->draw_max_x - 1) / vc4->tile_width;
|
||||
submit.max_y_tile = (vc4->draw_max_y - 1) / vc4->tile_height;
|
||||
submit.width = vc4->draw_width;
|
||||
submit.height = vc4->draw_height;
|
||||
if (vc4->cleared) {
|
||||
|
@@ -29,6 +29,10 @@
|
||||
* from the tile buffer after having waited for the scoreboard (which is
|
||||
* handled by vc4_qpu_emit.c), then do math using your output color and that
|
||||
* destination value, and update the output color appropriately.
|
||||
*
|
||||
* Once this pass is done, the color write will either have one component (for
|
||||
* single sample) with packed argb8888, or 4 components with the per-sample
|
||||
* argb8888 result.
|
||||
*/
|
||||
|
||||
/**
|
||||
@@ -40,15 +44,23 @@
|
||||
#include "glsl/nir/nir_builder.h"
|
||||
#include "vc4_context.h"
|
||||
|
||||
static bool
|
||||
blend_depends_on_dst_color(struct vc4_compile *c)
|
||||
{
|
||||
return (c->fs_key->blend.blend_enable ||
|
||||
c->fs_key->blend.colormask != 0xf ||
|
||||
c->fs_key->logicop_func != PIPE_LOGICOP_COPY);
|
||||
}
|
||||
|
||||
/** Emits a load of the previous fragment color from the tile buffer. */
|
||||
static nir_ssa_def *
|
||||
vc4_nir_get_dst_color(nir_builder *b)
|
||||
vc4_nir_get_dst_color(nir_builder *b, int sample)
|
||||
{
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(b->shader,
|
||||
nir_intrinsic_load_input);
|
||||
load->num_components = 1;
|
||||
load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT;
|
||||
load->const_index[0] = VC4_NIR_TLB_COLOR_READ_INPUT + sample;
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
return &load->dest.ssa;
|
||||
@@ -496,23 +508,26 @@ vc4_nir_swizzle_and_pack(struct vc4_compile *c, nir_builder *b,
|
||||
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
static nir_ssa_def *
|
||||
vc4_nir_blend_pipeline(struct vc4_compile *c, nir_builder *b, nir_ssa_def *src,
|
||||
int sample)
|
||||
{
|
||||
enum pipe_format color_format = c->fs_key->color_format;
|
||||
const uint8_t *format_swiz = vc4_get_format_swizzle(color_format);
|
||||
bool srgb = util_format_is_srgb(color_format);
|
||||
|
||||
/* Pull out the float src/dst color components. */
|
||||
nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b);
|
||||
nir_ssa_def *packed_dst_color = vc4_nir_get_dst_color(b, sample);
|
||||
nir_ssa_def *dst_vec4 = nir_unpack_unorm_4x8(b, packed_dst_color);
|
||||
nir_ssa_def *src_color[4], *unpacked_dst_color[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
src_color[i] = nir_swizzle(b, intr->src[0].ssa, &i, 1, false);
|
||||
unpacked_dst_color[i] = nir_swizzle(b, dst_vec4, &i, 1, false);
|
||||
src_color[i] = nir_channel(b, src, i);
|
||||
unpacked_dst_color[i] = nir_channel(b, dst_vec4, i);
|
||||
}
|
||||
|
||||
if (c->fs_key->sample_alpha_to_one && c->fs_key->msaa)
|
||||
src_color[3] = nir_imm_float(b, 1.0);
|
||||
|
||||
vc4_nir_emit_alpha_test_discard(c, b, src_color[3]);
|
||||
|
||||
nir_ssa_def *packed_color;
|
||||
@@ -560,16 +575,100 @@ vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
|
||||
colormask &= ~(0xff << (i * 8));
|
||||
}
|
||||
}
|
||||
packed_color = nir_ior(b,
|
||||
nir_iand(b, packed_color,
|
||||
nir_imm_int(b, colormask)),
|
||||
nir_iand(b, packed_dst_color,
|
||||
nir_imm_int(b, ~colormask)));
|
||||
|
||||
/* Turn the old vec4 output into a store of the packed color. */
|
||||
nir_instr_rewrite_src(&intr->instr, &intr->src[0],
|
||||
nir_src_for_ssa(packed_color));
|
||||
return nir_ior(b,
|
||||
nir_iand(b, packed_color,
|
||||
nir_imm_int(b, colormask)),
|
||||
nir_iand(b, packed_dst_color,
|
||||
nir_imm_int(b, ~colormask)));
|
||||
}
|
||||
|
||||
static int
|
||||
vc4_nir_next_output_driver_location(nir_shader *s)
|
||||
{
|
||||
int maxloc = -1;
|
||||
|
||||
nir_foreach_variable(var, &s->outputs)
|
||||
maxloc = MAX2(maxloc, (int)var->data.driver_location);
|
||||
|
||||
return maxloc + 1;
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_nir_store_sample_mask(struct vc4_compile *c, nir_builder *b,
|
||||
nir_ssa_def *val)
|
||||
{
|
||||
nir_variable *sample_mask = nir_variable_create(c->s, nir_var_shader_out,
|
||||
glsl_uint_type(),
|
||||
"sample_mask");
|
||||
sample_mask->data.driver_location =
|
||||
vc4_nir_next_output_driver_location(c->s);
|
||||
sample_mask->data.location = FRAG_RESULT_SAMPLE_MASK;
|
||||
|
||||
nir_intrinsic_instr *intr =
|
||||
nir_intrinsic_instr_create(c->s, nir_intrinsic_store_output);
|
||||
intr->num_components = 1;
|
||||
intr->const_index[0] = sample_mask->data.driver_location;
|
||||
|
||||
intr->src[0] = nir_src_for_ssa(val);
|
||||
nir_builder_instr_insert(b, &intr->instr);
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_nir_lower_blend_instr(struct vc4_compile *c, nir_builder *b,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
nir_ssa_def *frag_color = intr->src[0].ssa;
|
||||
|
||||
if (c->fs_key->sample_coverage) {
|
||||
nir_intrinsic_instr *load =
|
||||
nir_intrinsic_instr_create(b->shader,
|
||||
nir_intrinsic_load_sample_mask_in);
|
||||
load->num_components = 1;
|
||||
nir_ssa_dest_init(&load->instr, &load->dest, 1, NULL);
|
||||
nir_builder_instr_insert(b, &load->instr);
|
||||
|
||||
nir_ssa_def *bitmask = &load->dest.ssa;
|
||||
|
||||
vc4_nir_store_sample_mask(c, b, bitmask);
|
||||
} else if (c->fs_key->sample_alpha_to_coverage) {
|
||||
nir_ssa_def *a = nir_channel(b, frag_color, 3);
|
||||
|
||||
/* XXX: We should do a nice dither based on the fragment
|
||||
* coordinate, instead.
|
||||
*/
|
||||
nir_ssa_def *num_samples = nir_imm_float(b, VC4_MAX_SAMPLES);
|
||||
nir_ssa_def *num_bits = nir_f2i(b, nir_fmul(b, a, num_samples));
|
||||
nir_ssa_def *bitmask = nir_isub(b,
|
||||
nir_ishl(b,
|
||||
nir_imm_int(b, 1),
|
||||
num_bits),
|
||||
nir_imm_int(b, 1));
|
||||
vc4_nir_store_sample_mask(c, b, bitmask);
|
||||
}
|
||||
|
||||
/* The TLB color read returns each sample in turn, so if our blending
|
||||
* depends on the destination color, we're going to have to run the
|
||||
* blending function separately for each destination sample value, and
|
||||
* then output the per-sample color using TLB_COLOR_MS.
|
||||
*/
|
||||
nir_ssa_def *blend_output;
|
||||
if (c->fs_key->msaa && blend_depends_on_dst_color(c)) {
|
||||
c->msaa_per_sample_output = true;
|
||||
|
||||
nir_ssa_def *samples[4];
|
||||
for (int i = 0; i < VC4_MAX_SAMPLES; i++)
|
||||
samples[i] = vc4_nir_blend_pipeline(c, b, frag_color, i);
|
||||
blend_output = nir_vec4(b,
|
||||
samples[0], samples[1],
|
||||
samples[2], samples[3]);
|
||||
} else {
|
||||
blend_output = vc4_nir_blend_pipeline(c, b, frag_color, 0);
|
||||
}
|
||||
|
||||
nir_instr_rewrite_src(&intr->instr, &intr->src[0],
|
||||
nir_src_for_ssa(blend_output));
|
||||
intr->num_components = blend_output->num_components;
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -577,7 +676,7 @@ vc4_nir_lower_blend_block(nir_block *block, void *state)
|
||||
{
|
||||
struct vc4_compile *c = state;
|
||||
|
||||
nir_foreach_instr(block, instr) {
|
||||
nir_foreach_instr_safe(block, instr) {
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
continue;
|
||||
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
|
||||
|
@@ -84,7 +84,7 @@ vc4_nir_unpack_16u(nir_builder *b, nir_ssa_def *src, unsigned chan)
|
||||
static nir_ssa_def *
|
||||
vc4_nir_unpack_8f(nir_builder *b, nir_ssa_def *src, unsigned chan)
|
||||
{
|
||||
return nir_swizzle(b, nir_unpack_unorm_4x8(b, src), &chan, 1, false);
|
||||
return nir_channel(b, nir_unpack_unorm_4x8(b, src), chan);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
@@ -226,7 +226,9 @@ vc4_nir_lower_fs_input(struct vc4_compile *c, nir_builder *b,
|
||||
{
|
||||
b->cursor = nir_before_instr(&intr->instr);
|
||||
|
||||
if (intr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
if (intr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT &&
|
||||
intr->const_index[0] < (VC4_NIR_TLB_COLOR_READ_INPUT +
|
||||
VC4_MAX_SAMPLES)) {
|
||||
/* This doesn't need any lowering. */
|
||||
return;
|
||||
}
|
||||
@@ -309,7 +311,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
|
||||
/* Color output is lowered by vc4_nir_lower_blend(). */
|
||||
if (c->stage == QSTAGE_FRAG &&
|
||||
(output_var->data.location == FRAG_RESULT_COLOR ||
|
||||
output_var->data.location == FRAG_RESULT_DATA0)) {
|
||||
output_var->data.location == FRAG_RESULT_DATA0 ||
|
||||
output_var->data.location == FRAG_RESULT_SAMPLE_MASK)) {
|
||||
intr->const_index[0] *= 4;
|
||||
return;
|
||||
}
|
||||
@@ -326,9 +329,8 @@ vc4_nir_lower_output(struct vc4_compile *c, nir_builder *b,
|
||||
intr_comp->const_index[0] = intr->const_index[0] * 4 + i;
|
||||
|
||||
assert(intr->src[0].is_ssa);
|
||||
intr_comp->src[0] = nir_src_for_ssa(nir_swizzle(b,
|
||||
intr->src[0].ssa,
|
||||
&i, 1, false));
|
||||
intr_comp->src[0] =
|
||||
nir_src_for_ssa(nir_channel(b, intr->src[0].ssa, i));
|
||||
nir_builder_instr_insert(b, &intr_comp->instr);
|
||||
}
|
||||
|
||||
|
172
src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
Normal file
172
src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c
Normal file
@@ -0,0 +1,172 @@
|
||||
/*
|
||||
* Copyright © 2015 Broadcom
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "vc4_qir.h"
|
||||
#include "kernel/vc4_packet.h"
|
||||
#include "tgsi/tgsi_info.h"
|
||||
#include "glsl/nir/nir_builder.h"
|
||||
|
||||
/** @file vc4_nir_lower_txf_ms.c
|
||||
* Walks the NIR generated by TGSI-to-NIR to lower its nir_texop_txf_ms
|
||||
* coordinates to do the math necessary and use a plain nir_texop_txf instead.
|
||||
*
|
||||
* MSAA textures are laid out as 32x32-aligned blocks of RGBA8888 or Z24S8.
|
||||
* We can't load them through the normal sampler path because of the lack of
|
||||
* linear support in the hardware. So, we treat MSAA textures as a giant UBO
|
||||
* and do the math in the shader.
|
||||
*/
|
||||
|
||||
static void
|
||||
vc4_nir_lower_txf_ms_instr(struct vc4_compile *c, nir_builder *b,
|
||||
nir_tex_instr *txf_ms)
|
||||
{
|
||||
if (txf_ms->op != nir_texop_txf_ms)
|
||||
return;
|
||||
|
||||
b->cursor = nir_before_instr(&txf_ms->instr);
|
||||
|
||||
nir_tex_instr *txf = nir_tex_instr_create(c->s, 1);
|
||||
txf->op = nir_texop_txf;
|
||||
txf->sampler = txf_ms->sampler;
|
||||
txf->sampler_index = txf_ms->sampler_index;
|
||||
txf->coord_components = txf_ms->coord_components;
|
||||
txf->is_shadow = txf_ms->is_shadow;
|
||||
txf->is_new_style_shadow = txf_ms->is_new_style_shadow;
|
||||
|
||||
nir_ssa_def *coord = NULL, *sample_index = NULL;
|
||||
for (int i = 0; i < txf_ms->num_srcs; i++) {
|
||||
assert(txf_ms->src[i].src.is_ssa);
|
||||
|
||||
switch (txf_ms->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
coord = txf_ms->src[i].src.ssa;
|
||||
break;
|
||||
case nir_tex_src_ms_index:
|
||||
sample_index = txf_ms->src[i].src.ssa;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unknown txf_ms src\n");
|
||||
}
|
||||
}
|
||||
assert(coord);
|
||||
assert(sample_index);
|
||||
|
||||
nir_ssa_def *x = nir_channel(b, coord, 0);
|
||||
nir_ssa_def *y = nir_channel(b, coord, 1);
|
||||
|
||||
uint32_t tile_w = 32;
|
||||
uint32_t tile_h = 32;
|
||||
uint32_t tile_w_shift = 5;
|
||||
uint32_t tile_h_shift = 5;
|
||||
uint32_t tile_size = (tile_h * tile_w *
|
||||
VC4_MAX_SAMPLES * sizeof(uint32_t));
|
||||
unsigned unit = txf_ms->sampler_index;
|
||||
uint32_t w = align(c->key->tex[unit].msaa_width, tile_w);
|
||||
uint32_t w_tiles = w / tile_w;
|
||||
|
||||
nir_ssa_def *x_tile = nir_ushr(b, x, nir_imm_int(b, tile_w_shift));
|
||||
nir_ssa_def *y_tile = nir_ushr(b, y, nir_imm_int(b, tile_h_shift));
|
||||
nir_ssa_def *tile_addr = nir_iadd(b,
|
||||
nir_imul(b, x_tile,
|
||||
nir_imm_int(b, tile_size)),
|
||||
nir_imul(b, y_tile,
|
||||
nir_imm_int(b, (w_tiles *
|
||||
tile_size))));
|
||||
nir_ssa_def *x_subspan = nir_iand(b, x,
|
||||
nir_imm_int(b, (tile_w - 1) & ~1));
|
||||
nir_ssa_def *y_subspan = nir_iand(b, y,
|
||||
nir_imm_int(b, (tile_h - 1) & ~1));
|
||||
nir_ssa_def *subspan_addr = nir_iadd(b,
|
||||
nir_imul(b, x_subspan,
|
||||
nir_imm_int(b, 2 * VC4_MAX_SAMPLES * sizeof(uint32_t))),
|
||||
nir_imul(b, y_subspan,
|
||||
nir_imm_int(b,
|
||||
tile_w *
|
||||
VC4_MAX_SAMPLES *
|
||||
sizeof(uint32_t))));
|
||||
|
||||
nir_ssa_def *pixel_addr = nir_ior(b,
|
||||
nir_iand(b,
|
||||
nir_ishl(b, x,
|
||||
nir_imm_int(b, 2)),
|
||||
nir_imm_int(b, (1 << 2))),
|
||||
nir_iand(b,
|
||||
nir_ishl(b, y,
|
||||
nir_imm_int(b, 3)),
|
||||
nir_imm_int(b, (1 << 3))));
|
||||
|
||||
nir_ssa_def *sample_addr = nir_ishl(b, sample_index, nir_imm_int(b, 4));
|
||||
|
||||
nir_ssa_def *addr = nir_iadd(b,
|
||||
nir_ior(b, sample_addr, pixel_addr),
|
||||
nir_iadd(b, subspan_addr, tile_addr));
|
||||
|
||||
txf->src[0].src_type = nir_tex_src_coord;
|
||||
txf->src[0].src = nir_src_for_ssa(nir_vec2(b, addr, nir_imm_int(b, 0)));
|
||||
nir_ssa_dest_init(&txf->instr, &txf->dest, 4, NULL);
|
||||
nir_builder_instr_insert(b, &txf->instr);
|
||||
nir_ssa_def_rewrite_uses(&txf_ms->dest.ssa,
|
||||
nir_src_for_ssa(&txf->dest.ssa));
|
||||
nir_instr_remove(&txf_ms->instr);
|
||||
}
|
||||
|
||||
static bool
|
||||
vc4_nir_lower_txf_ms_block(nir_block *block, void *arg)
|
||||
{
|
||||
struct vc4_compile *c = arg;
|
||||
nir_function_impl *impl =
|
||||
nir_cf_node_get_function(&block->cf_node);
|
||||
|
||||
nir_builder b;
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
nir_foreach_instr_safe(block, instr) {
|
||||
if (instr->type == nir_instr_type_tex) {
|
||||
vc4_nir_lower_txf_ms_instr(c, &b,
|
||||
nir_instr_as_tex(instr));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
vc4_nir_lower_txf_ms_impl(struct vc4_compile *c, nir_function_impl *impl)
|
||||
{
|
||||
nir_foreach_block(impl, vc4_nir_lower_txf_ms_block, c);
|
||||
|
||||
nir_metadata_preserve(impl,
|
||||
nir_metadata_block_index |
|
||||
nir_metadata_dominance);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
vc4_nir_lower_txf_ms(struct vc4_compile *c)
|
||||
{
|
||||
nir_foreach_overload(c->s, overload) {
|
||||
if (overload->impl)
|
||||
vc4_nir_lower_txf_ms_impl(c, overload->impl);
|
||||
}
|
||||
}
|
@@ -94,7 +94,12 @@ static void
|
||||
replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg)
|
||||
{
|
||||
dump_from(c, inst);
|
||||
inst->op = QOP_MOV;
|
||||
if (qir_is_mul(inst))
|
||||
inst->op = QOP_MMOV;
|
||||
else if (qir_is_float_input(inst))
|
||||
inst->op = QOP_FMOV;
|
||||
else
|
||||
inst->op = QOP_MOV;
|
||||
inst->src[0] = arg;
|
||||
inst->src[1] = c->undef;
|
||||
dump_to(c, inst);
|
||||
@@ -181,6 +186,7 @@ qir_opt_algebraic(struct vc4_compile *c)
|
||||
case QOP_SUB:
|
||||
if (is_zero(c, inst->src[1])) {
|
||||
replace_with_mov(c, inst, inst->src[0]);
|
||||
progress = true;
|
||||
}
|
||||
break;
|
||||
|
||||
|
@@ -294,6 +294,76 @@ ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1)
|
||||
qir_uniform_ui(c, 24)));
|
||||
}
|
||||
|
||||
static struct qreg
|
||||
ntq_scale_depth_texture(struct vc4_compile *c, struct qreg src)
|
||||
{
|
||||
struct qreg depthf = qir_ITOF(c, qir_SHR(c, src,
|
||||
qir_uniform_ui(c, 8)));
|
||||
return qir_FMUL(c, depthf, qir_uniform_f(c, 1.0f/0xffffff));
|
||||
}
|
||||
|
||||
/**
|
||||
* Emits a lowered TXF_MS from an MSAA texture.
|
||||
*
|
||||
* The addressing math has been lowered in NIR, and now we just need to read
|
||||
* it like a UBO.
|
||||
*/
|
||||
static void
|
||||
ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
|
||||
{
|
||||
uint32_t tile_width = 32;
|
||||
uint32_t tile_height = 32;
|
||||
uint32_t tile_size = (tile_height * tile_width *
|
||||
VC4_MAX_SAMPLES * sizeof(uint32_t));
|
||||
|
||||
unsigned unit = instr->sampler_index;
|
||||
uint32_t w = align(c->key->tex[unit].msaa_width, tile_width);
|
||||
uint32_t w_tiles = w / tile_width;
|
||||
uint32_t h = align(c->key->tex[unit].msaa_height, tile_height);
|
||||
uint32_t h_tiles = h / tile_height;
|
||||
uint32_t size = w_tiles * h_tiles * tile_size;
|
||||
|
||||
struct qreg addr;
|
||||
assert(instr->num_srcs == 1);
|
||||
assert(instr->src[0].src_type == nir_tex_src_coord);
|
||||
addr = ntq_get_src(c, instr->src[0].src, 0);
|
||||
|
||||
/* Perform the clamping required by kernel validation. */
|
||||
addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
|
||||
addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
|
||||
|
||||
qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
|
||||
|
||||
struct qreg tex = qir_TEX_RESULT(c);
|
||||
c->num_texture_samples++;
|
||||
|
||||
struct qreg texture_output[4];
|
||||
enum pipe_format format = c->key->tex[unit].format;
|
||||
if (util_format_is_depth_or_stencil(format)) {
|
||||
struct qreg scaled = ntq_scale_depth_texture(c, tex);
|
||||
for (int i = 0; i < 4; i++)
|
||||
texture_output[i] = scaled;
|
||||
} else {
|
||||
struct qreg tex_result_unpacked[4];
|
||||
for (int i = 0; i < 4; i++)
|
||||
tex_result_unpacked[i] = qir_UNPACK_8_F(c, tex, i);
|
||||
|
||||
const uint8_t *format_swiz =
|
||||
vc4_get_format_swizzle(c->key->tex[unit].format);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
texture_output[i] =
|
||||
get_swizzled_channel(c, tex_result_unpacked,
|
||||
format_swiz[i]);
|
||||
}
|
||||
}
|
||||
|
||||
struct qreg *dest = ntq_get_dest(c, &instr->dest);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
dest[i] = get_swizzled_channel(c, texture_output,
|
||||
c->key->tex[unit].swizzle[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
|
||||
{
|
||||
@@ -301,6 +371,11 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
|
||||
bool is_txb = false, is_txl = false, has_proj = false;
|
||||
unsigned unit = instr->sampler_index;
|
||||
|
||||
if (instr->op == nir_texop_txf) {
|
||||
ntq_emit_txf(c, instr);
|
||||
return;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < instr->num_srcs; i++) {
|
||||
switch (instr->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
@@ -396,11 +471,7 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
|
||||
|
||||
struct qreg unpacked[4];
|
||||
if (util_format_is_depth_or_stencil(format)) {
|
||||
struct qreg depthf = qir_ITOF(c, qir_SHR(c, tex,
|
||||
qir_uniform_ui(c, 8)));
|
||||
struct qreg normalized = qir_FMUL(c, depthf,
|
||||
qir_uniform_f(c, 1.0f/0xffffff));
|
||||
|
||||
struct qreg normalized = ntq_scale_depth_texture(c, tex);
|
||||
struct qreg depth_output;
|
||||
|
||||
struct qreg one = qir_uniform_f(c, 1.0f);
|
||||
@@ -1109,6 +1180,10 @@ emit_frag_end(struct vc4_compile *c)
|
||||
}
|
||||
}
|
||||
|
||||
if (c->output_sample_mask_index != -1) {
|
||||
qir_MS_MASK(c, c->outputs[c->output_sample_mask_index]);
|
||||
}
|
||||
|
||||
if (c->fs_key->depth_enabled) {
|
||||
struct qreg z;
|
||||
if (c->output_position_index != -1) {
|
||||
@@ -1120,7 +1195,12 @@ emit_frag_end(struct vc4_compile *c)
|
||||
qir_TLB_Z_WRITE(c, z);
|
||||
}
|
||||
|
||||
qir_TLB_COLOR_WRITE(c, color);
|
||||
if (!c->msaa_per_sample_output) {
|
||||
qir_TLB_COLOR_WRITE(c, color);
|
||||
} else {
|
||||
for (int i = 0; i < VC4_MAX_SAMPLES; i++)
|
||||
qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1171,7 +1251,7 @@ emit_point_size_write(struct vc4_compile *c)
|
||||
struct qreg point_size;
|
||||
|
||||
if (c->output_point_size_index != -1)
|
||||
point_size = c->outputs[c->output_point_size_index + 3];
|
||||
point_size = c->outputs[c->output_point_size_index];
|
||||
else
|
||||
point_size = qir_uniform_f(c, 1.0);
|
||||
|
||||
@@ -1359,6 +1439,9 @@ ntq_setup_outputs(struct vc4_compile *c)
|
||||
case FRAG_RESULT_DEPTH:
|
||||
c->output_position_index = loc;
|
||||
break;
|
||||
case FRAG_RESULT_SAMPLE_MASK:
|
||||
c->output_sample_mask_index = loc;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
switch (var->data.location) {
|
||||
@@ -1462,20 +1545,48 @@ ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr)
|
||||
instr->const_index[0]);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_sample_mask_in:
|
||||
*dest = qir_uniform(c, QUNIFORM_SAMPLE_MASK, 0);
|
||||
break;
|
||||
|
||||
case nir_intrinsic_load_input:
|
||||
assert(instr->num_components == 1);
|
||||
if (instr->const_index[0] == VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
*dest = qir_TLB_COLOR_READ(c);
|
||||
if (instr->const_index[0] >= VC4_NIR_TLB_COLOR_READ_INPUT) {
|
||||
/* Reads of the per-sample color need to be done in
|
||||
* order.
|
||||
*/
|
||||
int sample_index = (instr->const_index[0] -
|
||||
VC4_NIR_TLB_COLOR_READ_INPUT);
|
||||
for (int i = 0; i <= sample_index; i++) {
|
||||
if (c->color_reads[i].file == QFILE_NULL) {
|
||||
c->color_reads[i] =
|
||||
qir_TLB_COLOR_READ(c);
|
||||
}
|
||||
}
|
||||
*dest = c->color_reads[sample_index];
|
||||
} else {
|
||||
*dest = c->inputs[instr->const_index[0]];
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_store_output:
|
||||
assert(instr->num_components == 1);
|
||||
c->outputs[instr->const_index[0]] =
|
||||
qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
|
||||
c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
|
||||
/* MSAA color outputs are the only case where we have an
|
||||
* output that's not lowered to being a store of a single 32
|
||||
* bit value.
|
||||
*/
|
||||
if (c->stage == QSTAGE_FRAG && instr->num_components == 4) {
|
||||
assert(instr->const_index[0] == c->output_color_index);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
c->sample_colors[i] =
|
||||
qir_MOV(c, ntq_get_src(c, instr->src[0],
|
||||
i));
|
||||
}
|
||||
} else {
|
||||
assert(instr->num_components == 1);
|
||||
c->outputs[instr->const_index[0]] =
|
||||
qir_MOV(c, ntq_get_src(c, instr->src[0], 0));
|
||||
c->num_outputs = MAX2(c->num_outputs, instr->const_index[0] + 1);
|
||||
}
|
||||
break;
|
||||
|
||||
case nir_intrinsic_discard:
|
||||
@@ -1672,6 +1783,7 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
|
||||
nir_lower_clip_vs(c->s, c->key->ucp_enables);
|
||||
|
||||
vc4_nir_lower_io(c);
|
||||
vc4_nir_lower_txf_ms(c);
|
||||
nir_lower_idiv(c->s);
|
||||
nir_lower_load_const_to_scalar(c->s);
|
||||
|
||||
@@ -1907,12 +2019,19 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
|
||||
struct pipe_sampler_state *sampler_state =
|
||||
texstate->samplers[i];
|
||||
|
||||
if (sampler) {
|
||||
key->tex[i].format = sampler->format;
|
||||
key->tex[i].swizzle[0] = sampler->swizzle_r;
|
||||
key->tex[i].swizzle[1] = sampler->swizzle_g;
|
||||
key->tex[i].swizzle[2] = sampler->swizzle_b;
|
||||
key->tex[i].swizzle[3] = sampler->swizzle_a;
|
||||
if (!sampler)
|
||||
continue;
|
||||
|
||||
key->tex[i].format = sampler->format;
|
||||
key->tex[i].swizzle[0] = sampler->swizzle_r;
|
||||
key->tex[i].swizzle[1] = sampler->swizzle_g;
|
||||
key->tex[i].swizzle[2] = sampler->swizzle_b;
|
||||
key->tex[i].swizzle[3] = sampler->swizzle_a;
|
||||
|
||||
if (sampler->texture->nr_samples) {
|
||||
key->tex[i].msaa_width = sampler->texture->width0;
|
||||
key->tex[i].msaa_height = sampler->texture->height0;
|
||||
} else if (sampler){
|
||||
key->tex[i].compare_mode = sampler_state->compare_mode;
|
||||
key->tex[i].compare_func = sampler_state->compare_func;
|
||||
key->tex[i].wrap_s = sampler_state->wrap_s;
|
||||
@@ -1952,6 +2071,11 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
|
||||
} else {
|
||||
key->logicop_func = PIPE_LOGICOP_COPY;
|
||||
}
|
||||
key->msaa = vc4->rasterizer->base.multisample;
|
||||
key->sample_coverage = (vc4->rasterizer->base.multisample &&
|
||||
vc4->sample_mask != (1 << VC4_MAX_SAMPLES) - 1);
|
||||
key->sample_alpha_to_coverage = vc4->blend->alpha_to_coverage;
|
||||
key->sample_alpha_to_one = vc4->blend->alpha_to_one;
|
||||
if (vc4->framebuffer.cbufs[0])
|
||||
key->color_format = vc4->framebuffer.cbufs[0]->format;
|
||||
|
||||
|
@@ -86,7 +86,9 @@ static const struct qir_op_info qir_op_info[] = {
|
||||
[QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
|
||||
[QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
|
||||
[QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
|
||||
[QOP_TLB_COLOR_WRITE_MS] = { "tlb_color_ms", 0, 1, true },
|
||||
[QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 },
|
||||
[QOP_MS_MASK] = { "ms_mask", 0, 1, true },
|
||||
[QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
|
||||
|
||||
[QOP_FRAG_X] = { "frag_x", 1, 0 },
|
||||
@@ -399,6 +401,7 @@ qir_compile_init(void)
|
||||
c->output_position_index = -1;
|
||||
c->output_color_index = -1;
|
||||
c->output_point_size_index = -1;
|
||||
c->output_sample_mask_index = -1;
|
||||
|
||||
c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
@@ -420,13 +423,19 @@ qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst)
|
||||
struct qreg
|
||||
qir_follow_movs(struct vc4_compile *c, struct qreg reg)
|
||||
{
|
||||
int pack = reg.pack;
|
||||
|
||||
while (reg.file == QFILE_TEMP &&
|
||||
c->defs[reg.index] &&
|
||||
c->defs[reg.index]->op == QOP_MOV &&
|
||||
!c->defs[reg.index]->dst.pack) {
|
||||
(c->defs[reg.index]->op == QOP_MOV ||
|
||||
c->defs[reg.index]->op == QOP_FMOV ||
|
||||
c->defs[reg.index]->op == QOP_MMOV)&&
|
||||
!c->defs[reg.index]->dst.pack &&
|
||||
!c->defs[reg.index]->src[0].pack) {
|
||||
reg = c->defs[reg.index]->src[0];
|
||||
}
|
||||
|
||||
reg.pack = pack;
|
||||
return reg;
|
||||
}
|
||||
|
||||
|
@@ -38,6 +38,7 @@
|
||||
|
||||
#include "vc4_screen.h"
|
||||
#include "vc4_qpu_defines.h"
|
||||
#include "kernel/vc4_packet.h"
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
struct nir_builder;
|
||||
@@ -121,7 +122,9 @@ enum qop {
|
||||
QOP_TLB_STENCIL_SETUP,
|
||||
QOP_TLB_Z_WRITE,
|
||||
QOP_TLB_COLOR_WRITE,
|
||||
QOP_TLB_COLOR_WRITE_MS,
|
||||
QOP_TLB_COLOR_READ,
|
||||
QOP_MS_MASK,
|
||||
QOP_VARY_ADD_C,
|
||||
|
||||
QOP_FRAG_X,
|
||||
@@ -230,6 +233,8 @@ enum quniform_contents {
|
||||
/** A reference to a texture config parameter 2 cubemap stride uniform */
|
||||
QUNIFORM_TEXTURE_CONFIG_P2,
|
||||
|
||||
QUNIFORM_TEXTURE_MSAA_ADDR,
|
||||
|
||||
QUNIFORM_UBO_ADDR,
|
||||
|
||||
QUNIFORM_TEXRECT_SCALE_X,
|
||||
@@ -247,6 +252,7 @@ enum quniform_contents {
|
||||
QUNIFORM_STENCIL,
|
||||
|
||||
QUNIFORM_ALPHA_REF,
|
||||
QUNIFORM_SAMPLE_MASK,
|
||||
};
|
||||
|
||||
struct vc4_varying_slot {
|
||||
@@ -283,11 +289,18 @@ struct vc4_key {
|
||||
struct vc4_uncompiled_shader *shader_state;
|
||||
struct {
|
||||
enum pipe_format format;
|
||||
unsigned compare_mode:1;
|
||||
unsigned compare_func:3;
|
||||
unsigned wrap_s:3;
|
||||
unsigned wrap_t:3;
|
||||
uint8_t swizzle[4];
|
||||
union {
|
||||
struct {
|
||||
unsigned compare_mode:1;
|
||||
unsigned compare_func:3;
|
||||
unsigned wrap_s:3;
|
||||
unsigned wrap_t:3;
|
||||
};
|
||||
struct {
|
||||
uint16_t msaa_width, msaa_height;
|
||||
};
|
||||
};
|
||||
} tex[VC4_MAX_TEXTURE_SAMPLERS];
|
||||
uint8_t ucp_enables;
|
||||
};
|
||||
@@ -304,6 +317,10 @@ struct vc4_fs_key {
|
||||
bool alpha_test;
|
||||
bool point_coord_upper_left;
|
||||
bool light_twoside;
|
||||
bool msaa;
|
||||
bool sample_coverage;
|
||||
bool sample_alpha_to_coverage;
|
||||
bool sample_alpha_to_one;
|
||||
uint8_t alpha_test_func;
|
||||
uint8_t logicop_func;
|
||||
uint32_t point_sprite_mask;
|
||||
@@ -348,6 +365,9 @@ struct vc4_compile {
|
||||
*/
|
||||
struct qreg *inputs;
|
||||
struct qreg *outputs;
|
||||
bool msaa_per_sample_output;
|
||||
struct qreg color_reads[VC4_MAX_SAMPLES];
|
||||
struct qreg sample_colors[VC4_MAX_SAMPLES];
|
||||
uint32_t inputs_array_size;
|
||||
uint32_t outputs_array_size;
|
||||
uint32_t uniforms_array_size;
|
||||
@@ -396,6 +416,7 @@ struct vc4_compile {
|
||||
uint32_t output_position_index;
|
||||
uint32_t output_color_index;
|
||||
uint32_t output_point_size_index;
|
||||
uint32_t output_sample_mask_index;
|
||||
|
||||
struct qreg undef;
|
||||
enum qstage stage;
|
||||
@@ -418,6 +439,8 @@ struct vc4_compile {
|
||||
*/
|
||||
#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000
|
||||
|
||||
#define VC4_NIR_MS_MASK_OUTPUT 2000000000
|
||||
|
||||
/* Special offset for nir_load_uniform values to get a QUNIFORM_*
|
||||
* state-dependent value.
|
||||
*/
|
||||
@@ -476,6 +499,7 @@ nir_ssa_def *vc4_nir_get_state_uniform(struct nir_builder *b,
|
||||
enum quniform_contents contents);
|
||||
nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,
|
||||
nir_ssa_def **srcs, int swiz);
|
||||
void vc4_nir_lower_txf_ms(struct vc4_compile *c);
|
||||
void qir_lower_uniforms(struct vc4_compile *c);
|
||||
|
||||
void qpu_schedule_instructions(struct vc4_compile *c);
|
||||
@@ -616,9 +640,11 @@ QIR_ALU0(FRAG_REV_FLAG)
|
||||
QIR_ALU0(TEX_RESULT)
|
||||
QIR_ALU0(TLB_COLOR_READ)
|
||||
QIR_NODST_1(TLB_COLOR_WRITE)
|
||||
QIR_NODST_1(TLB_COLOR_WRITE_MS)
|
||||
QIR_NODST_1(TLB_Z_WRITE)
|
||||
QIR_NODST_1(TLB_DISCARD_SETUP)
|
||||
QIR_NODST_1(TLB_STENCIL_SETUP)
|
||||
QIR_NODST_1(MS_MASK)
|
||||
|
||||
static inline struct qreg
|
||||
qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
|
||||
|
@@ -116,6 +116,17 @@ qpu_tlbc()
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline struct qpu_reg
|
||||
qpu_tlbc_ms()
|
||||
{
|
||||
struct qpu_reg r = {
|
||||
QPU_MUX_A,
|
||||
QPU_W_TLB_COLOR_MS,
|
||||
};
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline struct qpu_reg qpu_r0(void) { return qpu_rn(0); }
|
||||
static inline struct qpu_reg qpu_r1(void) { return qpu_rn(1); }
|
||||
static inline struct qpu_reg qpu_r2(void) { return qpu_rn(2); }
|
||||
|
@@ -387,6 +387,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
|
||||
qpu_rb(QPU_R_MS_REV_FLAGS)));
|
||||
break;
|
||||
|
||||
case QOP_MS_MASK:
|
||||
src[1] = qpu_ra(QPU_R_MS_REV_FLAGS);
|
||||
fixup_raddr_conflict(c, dst, &src[0], &src[1],
|
||||
qinst, &unpack);
|
||||
queue(c, qpu_a_AND(qpu_ra(QPU_W_MS_FLAGS),
|
||||
src[0], src[1]) | unpack);
|
||||
break;
|
||||
|
||||
case QOP_FRAG_Z:
|
||||
case QOP_FRAG_W:
|
||||
/* QOP_FRAG_Z/W don't emit instructions, just allocate
|
||||
@@ -430,6 +438,13 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
|
||||
}
|
||||
break;
|
||||
|
||||
case QOP_TLB_COLOR_WRITE_MS:
|
||||
queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0]));
|
||||
if (discard) {
|
||||
set_last_cond_add(c, QPU_COND_ZS);
|
||||
}
|
||||
break;
|
||||
|
||||
case QOP_VARY_ADD_C:
|
||||
queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
|
||||
break;
|
||||
|
@@ -295,6 +295,10 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
|
||||
add_write_dep(state, &state->last_tlb, n);
|
||||
break;
|
||||
|
||||
case QPU_W_MS_FLAGS:
|
||||
add_write_dep(state, &state->last_tlb, n);
|
||||
break;
|
||||
|
||||
case QPU_W_NOP:
|
||||
break;
|
||||
|
||||
|
@@ -22,6 +22,7 @@
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "util/u_blit.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_inlines.h"
|
||||
@@ -72,11 +73,18 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx,
|
||||
{
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
struct vc4_transfer *trans = vc4_transfer(ptrans);
|
||||
struct pipe_resource *prsc = ptrans->resource;
|
||||
struct vc4_resource *rsc = vc4_resource(prsc);
|
||||
struct vc4_resource_slice *slice = &rsc->slices[ptrans->level];
|
||||
|
||||
if (trans->map) {
|
||||
struct vc4_resource *rsc;
|
||||
struct vc4_resource_slice *slice;
|
||||
if (trans->ss_resource) {
|
||||
rsc = vc4_resource(trans->ss_resource);
|
||||
slice = &rsc->slices[0];
|
||||
} else {
|
||||
rsc = vc4_resource(ptrans->resource);
|
||||
slice = &rsc->slices[ptrans->level];
|
||||
}
|
||||
|
||||
if (ptrans->usage & PIPE_TRANSFER_WRITE) {
|
||||
vc4_store_tiled_image(rsc->bo->map + slice->offset +
|
||||
ptrans->box.z * rsc->cube_map_stride,
|
||||
@@ -88,10 +96,52 @@ vc4_resource_transfer_unmap(struct pipe_context *pctx,
|
||||
free(trans->map);
|
||||
}
|
||||
|
||||
if (trans->ss_resource && (ptrans->usage & PIPE_TRANSFER_WRITE)) {
|
||||
struct pipe_blit_info blit;
|
||||
memset(&blit, 0, sizeof(blit));
|
||||
|
||||
blit.src.resource = trans->ss_resource;
|
||||
blit.src.format = trans->ss_resource->format;
|
||||
blit.src.box.width = trans->ss_box.width;
|
||||
blit.src.box.height = trans->ss_box.height;
|
||||
blit.src.box.depth = 1;
|
||||
|
||||
blit.dst.resource = ptrans->resource;
|
||||
blit.dst.format = ptrans->resource->format;
|
||||
blit.dst.level = ptrans->level;
|
||||
blit.dst.box = trans->ss_box;
|
||||
|
||||
blit.mask = util_format_get_mask(ptrans->resource->format);
|
||||
blit.filter = PIPE_TEX_FILTER_NEAREST;
|
||||
|
||||
pctx->blit(pctx, &blit);
|
||||
vc4_flush(pctx);
|
||||
|
||||
pipe_resource_reference(&trans->ss_resource, NULL);
|
||||
}
|
||||
|
||||
pipe_resource_reference(&ptrans->resource, NULL);
|
||||
util_slab_free(&vc4->transfer_pool, ptrans);
|
||||
}
|
||||
|
||||
static struct pipe_resource *
|
||||
vc4_get_temp_resource(struct pipe_context *pctx,
|
||||
struct pipe_resource *prsc,
|
||||
const struct pipe_box *box)
|
||||
{
|
||||
struct pipe_resource temp_setup;
|
||||
|
||||
memset(&temp_setup, 0, sizeof(temp_setup));
|
||||
temp_setup.target = prsc->target;
|
||||
temp_setup.format = prsc->format;
|
||||
temp_setup.width0 = box->width;
|
||||
temp_setup.height0 = box->height;
|
||||
temp_setup.depth0 = 1;
|
||||
temp_setup.array_size = 1;
|
||||
|
||||
return pctx->screen->resource_create(pctx->screen, &temp_setup);
|
||||
}
|
||||
|
||||
static void *
|
||||
vc4_resource_transfer_map(struct pipe_context *pctx,
|
||||
struct pipe_resource *prsc,
|
||||
@@ -101,7 +151,6 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
|
||||
{
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
struct vc4_resource *rsc = vc4_resource(prsc);
|
||||
struct vc4_resource_slice *slice = &rsc->slices[level];
|
||||
struct vc4_transfer *trans;
|
||||
struct pipe_transfer *ptrans;
|
||||
enum pipe_format format = prsc->format;
|
||||
@@ -155,6 +204,50 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
|
||||
ptrans->usage = usage;
|
||||
ptrans->box = *box;
|
||||
|
||||
/* If the resource is multisampled, we need to resolve to single
|
||||
* sample. This seems like it should be handled at a higher layer.
|
||||
*/
|
||||
if (prsc->nr_samples) {
|
||||
trans->ss_resource = vc4_get_temp_resource(pctx, prsc, box);
|
||||
if (!trans->ss_resource)
|
||||
goto fail;
|
||||
assert(!trans->ss_resource->nr_samples);
|
||||
|
||||
/* The ptrans->box gets modified for tile alignment, so save
|
||||
* the original box for unmap time.
|
||||
*/
|
||||
trans->ss_box = *box;
|
||||
|
||||
if (usage & PIPE_TRANSFER_READ) {
|
||||
struct pipe_blit_info blit;
|
||||
memset(&blit, 0, sizeof(blit));
|
||||
|
||||
blit.src.resource = ptrans->resource;
|
||||
blit.src.format = ptrans->resource->format;
|
||||
blit.src.level = ptrans->level;
|
||||
blit.src.box = trans->ss_box;
|
||||
|
||||
blit.dst.resource = trans->ss_resource;
|
||||
blit.dst.format = trans->ss_resource->format;
|
||||
blit.dst.box.width = trans->ss_box.width;
|
||||
blit.dst.box.height = trans->ss_box.height;
|
||||
blit.dst.box.depth = 1;
|
||||
|
||||
blit.mask = util_format_get_mask(prsc->format);
|
||||
blit.filter = PIPE_TEX_FILTER_NEAREST;
|
||||
|
||||
pctx->blit(pctx, &blit);
|
||||
vc4_flush(pctx);
|
||||
}
|
||||
|
||||
/* The rest of the mapping process should use our temporary. */
|
||||
prsc = trans->ss_resource;
|
||||
rsc = vc4_resource(prsc);
|
||||
ptrans->box.x = 0;
|
||||
ptrans->box.y = 0;
|
||||
ptrans->box.z = 0;
|
||||
}
|
||||
|
||||
/* Note that the current kernel implementation is synchronous, so no
|
||||
* need to do syncing stuff here yet.
|
||||
*/
|
||||
@@ -170,6 +263,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
|
||||
|
||||
*pptrans = ptrans;
|
||||
|
||||
struct vc4_resource_slice *slice = &rsc->slices[level];
|
||||
if (rsc->tiled) {
|
||||
uint32_t utile_w = vc4_utile_width(rsc->cpp);
|
||||
uint32_t utile_h = vc4_utile_height(rsc->cpp);
|
||||
@@ -203,7 +297,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
|
||||
ptrans->box.height != orig_height) {
|
||||
vc4_load_tiled_image(trans->map, ptrans->stride,
|
||||
buf + slice->offset +
|
||||
box->z * rsc->cube_map_stride,
|
||||
ptrans->box.z * rsc->cube_map_stride,
|
||||
slice->stride,
|
||||
slice->tiling, rsc->cpp,
|
||||
&ptrans->box);
|
||||
@@ -216,9 +310,9 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
|
||||
ptrans->layer_stride = ptrans->stride;
|
||||
|
||||
return buf + slice->offset +
|
||||
box->y / util_format_get_blockheight(format) * ptrans->stride +
|
||||
box->x / util_format_get_blockwidth(format) * rsc->cpp +
|
||||
box->z * rsc->cube_map_stride;
|
||||
ptrans->box.y / util_format_get_blockheight(format) * ptrans->stride +
|
||||
ptrans->box.x / util_format_get_blockwidth(format) * rsc->cpp +
|
||||
ptrans->box.z * rsc->cube_map_stride;
|
||||
}
|
||||
|
||||
|
||||
@@ -283,7 +377,13 @@ vc4_setup_slices(struct vc4_resource *rsc)
|
||||
|
||||
if (!rsc->tiled) {
|
||||
slice->tiling = VC4_TILING_FORMAT_LINEAR;
|
||||
level_width = align(level_width, utile_w);
|
||||
if (prsc->nr_samples) {
|
||||
/* MSAA (4x) surfaces are stored as raw tile buffer contents. */
|
||||
level_width = align(level_width, 32);
|
||||
level_height = align(level_height, 32);
|
||||
} else {
|
||||
level_width = align(level_width, utile_w);
|
||||
}
|
||||
} else {
|
||||
if (vc4_size_is_lt(level_width, level_height,
|
||||
rsc->cpp)) {
|
||||
@@ -300,7 +400,8 @@ vc4_setup_slices(struct vc4_resource *rsc)
|
||||
}
|
||||
|
||||
slice->offset = offset;
|
||||
slice->stride = level_width * rsc->cpp;
|
||||
slice->stride = (level_width * rsc->cpp *
|
||||
MAX2(prsc->nr_samples, 1));
|
||||
slice->size = level_height * slice->stride;
|
||||
|
||||
offset += slice->size;
|
||||
@@ -357,7 +458,10 @@ vc4_resource_setup(struct pipe_screen *pscreen,
|
||||
prsc->screen = pscreen;
|
||||
|
||||
rsc->base.vtbl = &vc4_resource_vtbl;
|
||||
rsc->cpp = util_format_get_blocksize(tmpl->format);
|
||||
if (prsc->nr_samples == 0)
|
||||
rsc->cpp = util_format_get_blocksize(tmpl->format);
|
||||
else
|
||||
rsc->cpp = sizeof(uint32_t);
|
||||
|
||||
assert(rsc->cpp);
|
||||
|
||||
@@ -371,8 +475,12 @@ get_resource_texture_format(struct pipe_resource *prsc)
|
||||
uint8_t format = vc4_get_tex_format(prsc->format);
|
||||
|
||||
if (!rsc->tiled) {
|
||||
assert(format == VC4_TEXTURE_TYPE_RGBA8888);
|
||||
return VC4_TEXTURE_TYPE_RGBA32R;
|
||||
if (prsc->nr_samples) {
|
||||
return ~0;
|
||||
} else {
|
||||
assert(format == VC4_TEXTURE_TYPE_RGBA8888);
|
||||
return VC4_TEXTURE_TYPE_RGBA32R;
|
||||
}
|
||||
}
|
||||
|
||||
return format;
|
||||
@@ -389,6 +497,7 @@ vc4_resource_create(struct pipe_screen *pscreen,
|
||||
* communicate metadata about tiling currently.
|
||||
*/
|
||||
if (tmpl->target == PIPE_BUFFER ||
|
||||
tmpl->nr_samples ||
|
||||
(tmpl->bind & (PIPE_BIND_SCANOUT |
|
||||
PIPE_BIND_LINEAR |
|
||||
PIPE_BIND_SHARED |
|
||||
@@ -492,13 +601,9 @@ vc4_surface_destroy(struct pipe_context *pctx, struct pipe_surface *psurf)
|
||||
FREE(psurf);
|
||||
}
|
||||
|
||||
/** Debug routine to dump the contents of an 8888 surface to the console */
|
||||
void
|
||||
vc4_dump_surface(struct pipe_surface *psurf)
|
||||
static void
|
||||
vc4_dump_surface_non_msaa(struct pipe_surface *psurf)
|
||||
{
|
||||
if (!psurf)
|
||||
return;
|
||||
|
||||
struct pipe_resource *prsc = psurf->texture;
|
||||
struct vc4_resource *rsc = vc4_resource(prsc);
|
||||
uint32_t *map = vc4_bo_map(rsc->bo);
|
||||
@@ -592,6 +697,147 @@ vc4_dump_surface(struct pipe_surface *psurf)
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
vc4_surface_msaa_get_sample(struct pipe_surface *psurf,
|
||||
uint32_t x, uint32_t y, uint32_t sample)
|
||||
{
|
||||
struct pipe_resource *prsc = psurf->texture;
|
||||
struct vc4_resource *rsc = vc4_resource(prsc);
|
||||
uint32_t tile_w = 32, tile_h = 32;
|
||||
uint32_t tiles_w = DIV_ROUND_UP(psurf->width, 32);
|
||||
|
||||
uint32_t tile_x = x / tile_w;
|
||||
uint32_t tile_y = y / tile_h;
|
||||
uint32_t *tile = (vc4_bo_map(rsc->bo) +
|
||||
VC4_TILE_BUFFER_SIZE * (tile_y * tiles_w + tile_x));
|
||||
uint32_t subtile_x = x % tile_w;
|
||||
uint32_t subtile_y = y % tile_h;
|
||||
|
||||
uint32_t quad_samples = VC4_MAX_SAMPLES * 4;
|
||||
uint32_t tile_stride = quad_samples * tile_w / 2;
|
||||
|
||||
return *((uint32_t *)tile +
|
||||
(subtile_y >> 1) * tile_stride +
|
||||
(subtile_x >> 1) * quad_samples +
|
||||
((subtile_y & 1) << 1) +
|
||||
(subtile_x & 1) +
|
||||
sample);
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_dump_surface_msaa_char(struct pipe_surface *psurf,
|
||||
uint32_t start_x, uint32_t start_y,
|
||||
uint32_t w, uint32_t h)
|
||||
{
|
||||
bool all_same_color = true;
|
||||
uint32_t all_pix = 0;
|
||||
|
||||
for (int y = start_y; y < start_y + h; y++) {
|
||||
for (int x = start_x; x < start_x + w; x++) {
|
||||
for (int s = 0; s < VC4_MAX_SAMPLES; s++) {
|
||||
uint32_t pix = vc4_surface_msaa_get_sample(psurf,
|
||||
x, y,
|
||||
s);
|
||||
if (x == start_x && y == start_y)
|
||||
all_pix = pix;
|
||||
else if (all_pix != pix)
|
||||
all_same_color = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (all_same_color) {
|
||||
static const struct {
|
||||
uint32_t val;
|
||||
const char *c;
|
||||
} named_colors[] = {
|
||||
{ 0xff000000, "█" },
|
||||
{ 0x00000000, "█" },
|
||||
{ 0xffff0000, "r" },
|
||||
{ 0xff00ff00, "g" },
|
||||
{ 0xff0000ff, "b" },
|
||||
{ 0xffffffff, "w" },
|
||||
};
|
||||
int i;
|
||||
for (i = 0; i < ARRAY_SIZE(named_colors); i++) {
|
||||
if (named_colors[i].val == all_pix) {
|
||||
fprintf(stderr, "%s",
|
||||
named_colors[i].c);
|
||||
return;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "x");
|
||||
} else {
|
||||
fprintf(stderr, ".");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_dump_surface_msaa(struct pipe_surface *psurf)
|
||||
{
|
||||
uint32_t tile_w = 32, tile_h = 32;
|
||||
uint32_t tiles_w = DIV_ROUND_UP(psurf->width, tile_w);
|
||||
uint32_t tiles_h = DIV_ROUND_UP(psurf->height, tile_h);
|
||||
uint32_t char_w = 140, char_h = 60;
|
||||
uint32_t char_w_per_tile = char_w / tiles_w - 1;
|
||||
uint32_t char_h_per_tile = char_h / tiles_h - 1;
|
||||
uint32_t found_colors[10];
|
||||
uint32_t num_found_colors = 0;
|
||||
|
||||
fprintf(stderr, "Surface: %dx%d (%dx MSAA)\n",
|
||||
psurf->width, psurf->height, psurf->texture->nr_samples);
|
||||
|
||||
for (int x = 0; x < (char_w_per_tile + 1) * tiles_w; x++)
|
||||
fprintf(stderr, "-");
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
for (int ty = 0; ty < psurf->height; ty += tile_h) {
|
||||
for (int y = 0; y < char_h_per_tile; y++) {
|
||||
|
||||
for (int tx = 0; tx < psurf->width; tx += tile_w) {
|
||||
for (int x = 0; x < char_w_per_tile; x++) {
|
||||
uint32_t bx1 = (x * tile_w /
|
||||
char_w_per_tile);
|
||||
uint32_t bx2 = ((x + 1) * tile_w /
|
||||
char_w_per_tile);
|
||||
uint32_t by1 = (y * tile_h /
|
||||
char_h_per_tile);
|
||||
uint32_t by2 = ((y + 1) * tile_h /
|
||||
char_h_per_tile);
|
||||
|
||||
vc4_dump_surface_msaa_char(psurf,
|
||||
tx + bx1,
|
||||
ty + by1,
|
||||
bx2 - bx1,
|
||||
by2 - by1);
|
||||
}
|
||||
fprintf(stderr, "|");
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
for (int x = 0; x < (char_w_per_tile + 1) * tiles_w; x++)
|
||||
fprintf(stderr, "-");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
|
||||
for (int i = 0; i < num_found_colors; i++) {
|
||||
fprintf(stderr, "color %d: 0x%08x\n", i, found_colors[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/** Debug routine to dump the contents of an 8888 surface to the console */
|
||||
void
|
||||
vc4_dump_surface(struct pipe_surface *psurf)
|
||||
{
|
||||
if (!psurf)
|
||||
return;
|
||||
|
||||
if (psurf->texture->nr_samples)
|
||||
vc4_dump_surface_msaa(psurf);
|
||||
else
|
||||
vc4_dump_surface_non_msaa(psurf);
|
||||
}
|
||||
|
||||
static void
|
||||
vc4_flush_resource(struct pipe_context *pctx, struct pipe_resource *resource)
|
||||
{
|
||||
|
@@ -32,6 +32,9 @@
|
||||
struct vc4_transfer {
|
||||
struct pipe_transfer base;
|
||||
void *map;
|
||||
|
||||
struct pipe_resource *ss_resource;
|
||||
struct pipe_box ss_box;
|
||||
};
|
||||
|
||||
struct vc4_resource_slice {
|
||||
|
@@ -95,6 +95,7 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_BLEND_EQUATION_SEPARATE:
|
||||
case PIPE_CAP_TWO_SIDED_STENCIL:
|
||||
case PIPE_CAP_USER_INDEX_BUFFERS:
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
return 1;
|
||||
|
||||
/* lying for GL 2.0 */
|
||||
@@ -140,7 +141,6 @@ vc4_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
|
||||
case PIPE_CAP_CONDITIONAL_RENDER:
|
||||
case PIPE_CAP_PRIMITIVE_RESTART:
|
||||
case PIPE_CAP_TEXTURE_MULTISAMPLE:
|
||||
case PIPE_CAP_TEXTURE_BARRIER:
|
||||
case PIPE_CAP_SM3:
|
||||
case PIPE_CAP_INDEP_BLEND_ENABLE:
|
||||
@@ -358,7 +358,6 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
unsigned retval = 0;
|
||||
|
||||
if ((target >= PIPE_MAX_TEXTURE_TYPES) ||
|
||||
(sample_count > 1) ||
|
||||
!util_format_is_supported(format, usage)) {
|
||||
return FALSE;
|
||||
}
|
||||
@@ -417,11 +416,13 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
}
|
||||
|
||||
if ((usage & PIPE_BIND_RENDER_TARGET) &&
|
||||
(sample_count == 0 || sample_count == VC4_MAX_SAMPLES) &&
|
||||
vc4_rt_format_supported(format)) {
|
||||
retval |= PIPE_BIND_RENDER_TARGET;
|
||||
}
|
||||
|
||||
if ((usage & PIPE_BIND_SAMPLER_VIEW) &&
|
||||
(sample_count == 0 || sample_count == VC4_MAX_SAMPLES) &&
|
||||
(vc4_tex_format_supported(format))) {
|
||||
retval |= PIPE_BIND_SAMPLER_VIEW;
|
||||
}
|
||||
|
@@ -65,7 +65,7 @@ struct drm_device {
|
||||
};
|
||||
|
||||
struct drm_gem_object {
|
||||
uint32_t size;
|
||||
size_t size;
|
||||
struct drm_device *dev;
|
||||
};
|
||||
|
||||
|
@@ -79,7 +79,7 @@ static void
|
||||
vc4_set_sample_mask(struct pipe_context *pctx, unsigned sample_mask)
|
||||
{
|
||||
struct vc4_context *vc4 = vc4_context(pctx);
|
||||
vc4->sample_mask = (uint16_t)sample_mask;
|
||||
vc4->sample_mask = sample_mask & ((1 << VC4_MAX_SAMPLES) - 1);
|
||||
vc4->dirty |= VC4_DIRTY_SAMPLE_MASK;
|
||||
}
|
||||
|
||||
@@ -121,6 +121,9 @@ vc4_create_rasterizer_state(struct pipe_context *pctx,
|
||||
so->offset_factor = float_to_187_half(cso->offset_scale);
|
||||
}
|
||||
|
||||
if (cso->multisample)
|
||||
so->config_bits[0] |= VC4_CONFIG_BITS_RASTERIZER_OVERSAMPLE_4X;
|
||||
|
||||
return so;
|
||||
}
|
||||
|
||||
@@ -457,6 +460,22 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
|
||||
rsc->cpp);
|
||||
}
|
||||
|
||||
vc4->msaa = false;
|
||||
if (cso->cbufs[0])
|
||||
vc4->msaa = cso->cbufs[0]->texture->nr_samples != 0;
|
||||
else if (cso->zsbuf)
|
||||
vc4->msaa = cso->zsbuf->texture->nr_samples != 0;
|
||||
|
||||
if (vc4->msaa) {
|
||||
vc4->tile_width = 32;
|
||||
vc4->tile_height = 32;
|
||||
} else {
|
||||
vc4->tile_width = 64;
|
||||
vc4->tile_height = 64;
|
||||
}
|
||||
vc4->draw_tiles_x = DIV_ROUND_UP(cso->width, vc4->tile_width);
|
||||
vc4->draw_tiles_y = DIV_ROUND_UP(cso->height, vc4->tile_height);
|
||||
|
||||
vc4->dirty |= VC4_DIRTY_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
|
@@ -71,6 +71,18 @@ write_texture_p2(struct vc4_context *vc4,
|
||||
VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD));
|
||||
}
|
||||
|
||||
static void
|
||||
write_texture_msaa_addr(struct vc4_context *vc4,
|
||||
struct vc4_cl_out **uniforms,
|
||||
struct vc4_texture_stateobj *texstate,
|
||||
uint32_t unit)
|
||||
{
|
||||
struct pipe_sampler_view *texture = texstate->textures[unit];
|
||||
struct vc4_resource *rsc = vc4_resource(texture->texture);
|
||||
|
||||
cl_aligned_reloc(vc4, &vc4->uniforms, uniforms, rsc->bo, 0);
|
||||
}
|
||||
|
||||
|
||||
#define SWIZ(x,y,z,w) { \
|
||||
UTIL_FORMAT_SWIZZLE_##x, \
|
||||
@@ -244,6 +256,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
|
||||
cl_aligned_reloc(vc4, &vc4->uniforms, &uniforms, ubo, 0);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_MSAA_ADDR:
|
||||
write_texture_msaa_addr(vc4, &uniforms,
|
||||
texstate, uinfo->data[i]);
|
||||
break;
|
||||
|
||||
case QUNIFORM_TEXTURE_BORDER_COLOR:
|
||||
write_texture_border_color(vc4, &uniforms,
|
||||
texstate, uinfo->data[i]);
|
||||
@@ -303,6 +320,10 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
|
||||
cl_aligned_f(&uniforms,
|
||||
vc4->zsa->base.alpha.ref_value);
|
||||
break;
|
||||
|
||||
case QUNIFORM_SAMPLE_MASK:
|
||||
cl_aligned_u32(&uniforms, vc4->sample_mask);
|
||||
break;
|
||||
}
|
||||
#if 0
|
||||
uint32_t written_val = *((uint32_t *)uniforms - 1);
|
||||
@@ -345,6 +366,7 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
|
||||
case QUNIFORM_TEXTURE_CONFIG_P1:
|
||||
case QUNIFORM_TEXTURE_CONFIG_P2:
|
||||
case QUNIFORM_TEXTURE_BORDER_COLOR:
|
||||
case QUNIFORM_TEXTURE_MSAA_ADDR:
|
||||
case QUNIFORM_TEXRECT_SCALE_X:
|
||||
case QUNIFORM_TEXRECT_SCALE_Y:
|
||||
dirty |= VC4_DIRTY_TEXSTATE;
|
||||
@@ -363,6 +385,10 @@ vc4_set_shader_uniform_dirty_flags(struct vc4_compiled_shader *shader)
|
||||
case QUNIFORM_ALPHA_REF:
|
||||
dirty |= VC4_DIRTY_ZSA;
|
||||
break;
|
||||
|
||||
case QUNIFORM_SAMPLE_MASK:
|
||||
dirty |= VC4_DIRTY_SAMPLE_MASK;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -32,7 +32,8 @@ platform::platform() : adaptor_range(evals(), devs) {
|
||||
|
||||
for (pipe_loader_device *ldev : ldevs) {
|
||||
try {
|
||||
devs.push_back(create<device>(*this, ldev));
|
||||
if (ldev)
|
||||
devs.push_back(create<device>(*this, ldev));
|
||||
} catch (error &) {
|
||||
pipe_loader_release(&ldev, 1);
|
||||
}
|
||||
|
@@ -1446,6 +1446,7 @@ dri2_init_screen(__DRIscreen * sPriv)
|
||||
struct pipe_screen *pscreen = NULL;
|
||||
const struct drm_conf_ret *throttle_ret;
|
||||
const struct drm_conf_ret *dmabuf_ret;
|
||||
int fd = -1;
|
||||
|
||||
screen = CALLOC_STRUCT(dri_screen);
|
||||
if (!screen)
|
||||
@@ -1457,7 +1458,10 @@ dri2_init_screen(__DRIscreen * sPriv)
|
||||
|
||||
sPriv->driverPrivate = (void *)screen;
|
||||
|
||||
if (pipe_loader_drm_probe_fd(&screen->dev, dup(screen->fd)))
|
||||
if (screen->fd < 0 || (fd = dup(screen->fd)) < 0)
|
||||
goto fail;
|
||||
|
||||
if (pipe_loader_drm_probe_fd(&screen->dev, fd))
|
||||
pscreen = pipe_loader_create_screen(screen->dev);
|
||||
|
||||
if (!pscreen)
|
||||
@@ -1502,6 +1506,8 @@ fail:
|
||||
dri_destroy_screen_helper(screen);
|
||||
if (screen->dev)
|
||||
pipe_loader_release(&screen->dev, 1);
|
||||
else
|
||||
close(fd);
|
||||
FREE(screen);
|
||||
return NULL;
|
||||
}
|
||||
@@ -1519,6 +1525,7 @@ dri_kms_init_screen(__DRIscreen * sPriv)
|
||||
struct dri_screen *screen;
|
||||
struct pipe_screen *pscreen = NULL;
|
||||
uint64_t cap;
|
||||
int fd = -1;
|
||||
|
||||
screen = CALLOC_STRUCT(dri_screen);
|
||||
if (!screen)
|
||||
@@ -1529,7 +1536,10 @@ dri_kms_init_screen(__DRIscreen * sPriv)
|
||||
|
||||
sPriv->driverPrivate = (void *)screen;
|
||||
|
||||
if (pipe_loader_sw_probe_kms(&screen->dev, dup(screen->fd)))
|
||||
if (screen->fd < 0 || (fd = dup(screen->fd)) < 0)
|
||||
goto fail;
|
||||
|
||||
if (pipe_loader_sw_probe_kms(&screen->dev, fd))
|
||||
pscreen = pipe_loader_create_screen(screen->dev);
|
||||
|
||||
if (!pscreen)
|
||||
@@ -1557,6 +1567,8 @@ fail:
|
||||
dri_destroy_screen_helper(screen);
|
||||
if (screen->dev)
|
||||
pipe_loader_release(&screen->dev, 1);
|
||||
else
|
||||
close(fd);
|
||||
FREE(screen);
|
||||
#endif // GALLIUM_SOFTPIPE
|
||||
return NULL;
|
||||
|
@@ -28,10 +28,14 @@
|
||||
|
||||
#include "pipe/p_screen.h"
|
||||
|
||||
#include "util/u_video.h"
|
||||
|
||||
#include "vl/vl_winsys.h"
|
||||
|
||||
#include "va_private.h"
|
||||
|
||||
DEBUG_GET_ONCE_BOOL_OPTION(mpeg4, "VAAPI_MPEG4_ENABLED", false)
|
||||
|
||||
VAStatus
|
||||
vlVaQueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, int *num_profiles)
|
||||
{
|
||||
@@ -45,12 +49,16 @@ vlVaQueryConfigProfiles(VADriverContextP ctx, VAProfile *profile_list, int *num_
|
||||
*num_profiles = 0;
|
||||
|
||||
pscreen = VL_VA_PSCREEN(ctx);
|
||||
for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= PIPE_VIDEO_PROFILE_HEVC_MAIN_444; ++p)
|
||||
for (p = PIPE_VIDEO_PROFILE_MPEG2_SIMPLE; p <= PIPE_VIDEO_PROFILE_HEVC_MAIN_444; ++p) {
|
||||
if (u_reduce_video_profile(p) == PIPE_VIDEO_FORMAT_MPEG4 && !debug_get_option_mpeg4())
|
||||
continue;
|
||||
|
||||
if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, PIPE_VIDEO_CAP_SUPPORTED)) {
|
||||
vap = PipeToProfile(p);
|
||||
if (vap != VAProfileNone)
|
||||
profile_list[(*num_profiles)++] = vap;
|
||||
}
|
||||
}
|
||||
|
||||
/* Support postprocessing through vl_compositor */
|
||||
profile_list[(*num_profiles)++] = VAProfileNone;
|
||||
|
@@ -152,11 +152,15 @@ xa_tracker_create(int drm_fd)
|
||||
struct xa_tracker *xa = calloc(1, sizeof(struct xa_tracker));
|
||||
enum xa_surface_type stype;
|
||||
unsigned int num_formats;
|
||||
int fd = -1;
|
||||
|
||||
if (!xa)
|
||||
return NULL;
|
||||
|
||||
if (pipe_loader_drm_probe_fd(&xa->dev, dup(drm_fd)))
|
||||
if (drm_fd < 0 || (fd = dup(drm_fd)) < 0)
|
||||
goto out_no_fd;
|
||||
|
||||
if (pipe_loader_drm_probe_fd(&xa->dev, fd))
|
||||
xa->screen = pipe_loader_create_screen(xa->dev);
|
||||
|
||||
if (!xa->screen)
|
||||
@@ -208,6 +212,9 @@ xa_tracker_create(int drm_fd)
|
||||
out_no_screen:
|
||||
if (xa->dev)
|
||||
pipe_loader_release(&xa->dev, 1);
|
||||
fd = -1;
|
||||
out_no_fd:
|
||||
close(fd);
|
||||
free(xa);
|
||||
return NULL;
|
||||
}
|
||||
|
@@ -31,6 +31,7 @@
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
#include "target-helpers/drm_helper.h"
|
||||
#include "target-helpers/sw_helper.h"
|
||||
#include "state_tracker/drm_driver.h"
|
||||
|
||||
#include "d3dadapter/d3dadapter9.h"
|
||||
|
@@ -1,4 +1,5 @@
|
||||
#include "target-helpers/drm_helper.h"
|
||||
#include "target-helpers/sw_helper.h"
|
||||
|
||||
#include "dri_screen.h"
|
||||
|
||||
|
@@ -1 +1,2 @@
|
||||
#include "target-helpers/drm_helper.h"
|
||||
#include "target-helpers/sw_helper.h"
|
||||
|
@@ -20,7 +20,7 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
|
||||
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
|
||||
$(top_builddir)/src/util/libmesautil.la \
|
||||
$(ELF_LIB) \
|
||||
-ldl \
|
||||
$(DLOPEN_LIBS) \
|
||||
-lclangCodeGen \
|
||||
-lclangFrontendTool \
|
||||
-lclangFrontend \
|
||||
|
@@ -1 +1,2 @@
|
||||
#include "target-helpers/drm_helper.h"
|
||||
#include "target-helpers/sw_helper.h"
|
||||
|
@@ -1 +1,2 @@
|
||||
#include "target-helpers/drm_helper.h"
|
||||
#include "target-helpers/sw_helper.h"
|
||||
|
@@ -1 +1,2 @@
|
||||
#include "target-helpers/drm_helper.h"
|
||||
#include "target-helpers/sw_helper.h"
|
||||
|
@@ -1 +1,2 @@
|
||||
#include "target-helpers/drm_helper.h"
|
||||
#include "target-helpers/sw_helper.h"
|
||||
|
@@ -1737,7 +1737,7 @@ ast_function_expression::handle_method(exec_list *instructions,
|
||||
result = new(ctx) ir_constant(op->type->array_size());
|
||||
}
|
||||
} else if (op->type->is_vector()) {
|
||||
if (state->ARB_shading_language_420pack_enable) {
|
||||
if (state->has_420pack()) {
|
||||
/* .length() returns int. */
|
||||
result = new(ctx) ir_constant((int) op->type->vector_elements);
|
||||
} else {
|
||||
@@ -1746,7 +1746,7 @@ ast_function_expression::handle_method(exec_list *instructions,
|
||||
goto fail;
|
||||
}
|
||||
} else if (op->type->is_matrix()) {
|
||||
if (state->ARB_shading_language_420pack_enable) {
|
||||
if (state->has_420pack()) {
|
||||
/* .length() returns int. */
|
||||
result = new(ctx) ir_constant((int) op->type->matrix_columns);
|
||||
} else {
|
||||
@@ -2075,7 +2075,7 @@ ast_aggregate_initializer::hir(exec_list *instructions,
|
||||
}
|
||||
const glsl_type *const constructor_type = this->constructor_type;
|
||||
|
||||
if (!state->ARB_shading_language_420pack_enable) {
|
||||
if (!state->has_420pack()) {
|
||||
_mesa_glsl_error(&loc, state, "C-style initialization requires the "
|
||||
"GL_ARB_shading_language_420pack extension");
|
||||
return ir_rvalue::error_value(ctx);
|
||||
|
@@ -2649,7 +2649,9 @@ apply_explicit_binding(struct _mesa_glsl_parse_state *state,
|
||||
|
||||
return;
|
||||
}
|
||||
} else if (state->is_version(420, 310) && base_type->is_image()) {
|
||||
} else if ((state->is_version(420, 310) ||
|
||||
state->ARB_shading_language_420pack_enable) &&
|
||||
base_type->is_image()) {
|
||||
assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
|
||||
if (max_index >= ctx->Const.MaxImageUnits) {
|
||||
_mesa_glsl_error(loc, state, "Image binding %d exceeds the "
|
||||
@@ -3736,7 +3738,7 @@ process_initializer(ir_variable *var, ast_declaration *decl,
|
||||
* expressions. Const-qualified global variables must still be
|
||||
* initialized with constant expressions.
|
||||
*/
|
||||
if (!state->ARB_shading_language_420pack_enable
|
||||
if (!state->has_420pack()
|
||||
|| state->current_function == NULL) {
|
||||
_mesa_glsl_error(& initializer_loc, state,
|
||||
"initializer of %s variable `%s' must be a "
|
||||
@@ -5365,7 +5367,7 @@ ast_jump_statement::hir(exec_list *instructions,
|
||||
if (state->current_function->return_type != ret_type) {
|
||||
YYLTYPE loc = this->get_location();
|
||||
|
||||
if (state->ARB_shading_language_420pack_enable) {
|
||||
if (state->has_420pack()) {
|
||||
if (!apply_implicit_conversion(state->current_function->return_type,
|
||||
ret, state)) {
|
||||
_mesa_glsl_error(& loc, state,
|
||||
|
@@ -948,7 +948,7 @@ parameter_qualifier:
|
||||
if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out))
|
||||
_mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier");
|
||||
|
||||
if (!state->has_420pack() && $2.flags.q.constant)
|
||||
if (!state->has_420pack_or_es31() && $2.flags.q.constant)
|
||||
_mesa_glsl_error(&@1, state, "in/out/inout must come after const "
|
||||
"or precise");
|
||||
|
||||
@@ -960,7 +960,7 @@ parameter_qualifier:
|
||||
if ($2.precision != ast_precision_none)
|
||||
_mesa_glsl_error(&@1, state, "duplicate precision qualifier");
|
||||
|
||||
if (!(state->has_420pack() || state->is_version(420, 310)) &&
|
||||
if (!state->has_420pack_or_es31() &&
|
||||
$2.flags.i != 0)
|
||||
_mesa_glsl_error(&@1, state, "precision qualifiers must come last");
|
||||
|
||||
@@ -1482,7 +1482,7 @@ layout_qualifier_id:
|
||||
$$.index = $3;
|
||||
}
|
||||
|
||||
if ((state->has_420pack() ||
|
||||
if ((state->has_420pack_or_es31() ||
|
||||
state->has_atomic_counters() ||
|
||||
state->has_shader_storage_buffer_objects()) &&
|
||||
match_layout_qualifier("binding", $1, state) == 0) {
|
||||
@@ -1714,7 +1714,7 @@ type_qualifier:
|
||||
if ($2.flags.q.invariant)
|
||||
_mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier");
|
||||
|
||||
if (!state->has_420pack() && $2.flags.q.precise)
|
||||
if (!state->has_420pack_or_es31() && $2.flags.q.precise)
|
||||
_mesa_glsl_error(&@1, state,
|
||||
"\"invariant\" must come after \"precise\"");
|
||||
|
||||
@@ -1747,7 +1747,7 @@ type_qualifier:
|
||||
if ($2.has_interpolation())
|
||||
_mesa_glsl_error(&@1, state, "duplicate interpolation qualifier");
|
||||
|
||||
if (!state->has_420pack() &&
|
||||
if (!state->has_420pack_or_es31() &&
|
||||
($2.flags.q.precise || $2.flags.q.invariant)) {
|
||||
_mesa_glsl_error(&@1, state, "interpolation qualifiers must come "
|
||||
"after \"precise\" or \"invariant\"");
|
||||
@@ -1767,7 +1767,7 @@ type_qualifier:
|
||||
* precise qualifiers since these are useful in ARB_separate_shader_objects.
|
||||
* There is no clear spec guidance on this either.
|
||||
*/
|
||||
if (!state->has_420pack() && $2.has_layout())
|
||||
if (!state->has_420pack_or_es31() && $2.has_layout())
|
||||
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
|
||||
|
||||
$$ = $1;
|
||||
@@ -1785,7 +1785,7 @@ type_qualifier:
|
||||
"duplicate auxiliary storage qualifier (centroid or sample)");
|
||||
}
|
||||
|
||||
if (!state->has_420pack() &&
|
||||
if (!state->has_420pack_or_es31() &&
|
||||
($2.flags.q.precise || $2.flags.q.invariant ||
|
||||
$2.has_interpolation() || $2.has_layout())) {
|
||||
_mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come "
|
||||
@@ -1803,7 +1803,7 @@ type_qualifier:
|
||||
if ($2.has_storage())
|
||||
_mesa_glsl_error(&@1, state, "duplicate storage qualifier");
|
||||
|
||||
if (!state->has_420pack() &&
|
||||
if (!state->has_420pack_or_es31() &&
|
||||
($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() ||
|
||||
$2.has_layout() || $2.has_auxiliary_storage())) {
|
||||
_mesa_glsl_error(&@1, state, "storage qualifiers must come after "
|
||||
@@ -1819,7 +1819,7 @@ type_qualifier:
|
||||
if ($2.precision != ast_precision_none)
|
||||
_mesa_glsl_error(&@1, state, "duplicate precision qualifier");
|
||||
|
||||
if (!(state->has_420pack() || state->is_version(420, 310)) &&
|
||||
if (!(state->has_420pack_or_es31()) &&
|
||||
$2.flags.i != 0)
|
||||
_mesa_glsl_error(&@1, state, "precision qualifiers must come last");
|
||||
|
||||
@@ -2575,7 +2575,7 @@ interface_block:
|
||||
{
|
||||
ast_interface_block *block = (ast_interface_block *) $2;
|
||||
|
||||
if (!state->has_420pack() && block->layout.has_layout() &&
|
||||
if (!state->has_420pack_or_es31() && block->layout.has_layout() &&
|
||||
!block->layout.is_default_qualifier) {
|
||||
_mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers");
|
||||
YYERROR;
|
||||
|
@@ -477,7 +477,7 @@ _mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state,
|
||||
struct gl_context *ctx = state->ctx;
|
||||
|
||||
/* Report the error via GL_ARB_debug_output. */
|
||||
_mesa_shader_debug(ctx, type, &msg_id, msg, strlen(msg));
|
||||
_mesa_shader_debug(ctx, type, &msg_id, msg);
|
||||
|
||||
ralloc_strcat(&state->info_log, "\n");
|
||||
}
|
||||
|
@@ -255,6 +255,11 @@ struct _mesa_glsl_parse_state {
|
||||
return ARB_shading_language_420pack_enable || is_version(420, 0);
|
||||
}
|
||||
|
||||
bool has_420pack_or_es31() const
|
||||
{
|
||||
return ARB_shading_language_420pack_enable || is_version(420, 310);
|
||||
}
|
||||
|
||||
bool has_compute_shader() const
|
||||
{
|
||||
return ARB_compute_shader_enable || is_version(430, 310);
|
||||
|
@@ -57,8 +57,7 @@ _mesa_ast_field_selection_to_hir(const ast_expression *expr,
|
||||
expr->primary_expression.identifier);
|
||||
}
|
||||
} else if (op->type->is_vector() ||
|
||||
(state->ARB_shading_language_420pack_enable &&
|
||||
op->type->is_scalar())) {
|
||||
(state->has_420pack() && op->type->is_scalar())) {
|
||||
ir_swizzle *swiz = ir_swizzle::create(op,
|
||||
expr->primary_expression.identifier,
|
||||
op->type->vector_elements);
|
||||
|
@@ -1669,6 +1669,7 @@ ir_variable::ir_variable(const struct glsl_type *type, const char *name,
|
||||
this->data.pixel_center_integer = false;
|
||||
this->data.depth_layout = ir_depth_layout_none;
|
||||
this->data.used = false;
|
||||
this->data.always_active_io = false;
|
||||
this->data.read_only = false;
|
||||
this->data.centroid = false;
|
||||
this->data.sample = false;
|
||||
|
@@ -658,6 +658,13 @@ public:
|
||||
*/
|
||||
unsigned assigned:1;
|
||||
|
||||
/**
|
||||
* When separate shader programs are enabled, only input/outputs between
|
||||
* the stages of a multi-stage separate program can be safely removed
|
||||
* from the shader interface. Other input/outputs must remains active.
|
||||
*/
|
||||
unsigned always_active_io:1;
|
||||
|
||||
/**
|
||||
* Enum indicating how the variable was declared. See
|
||||
* ir_var_declaration_type.
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user