Compare commits
38 Commits
mesa-17.3.
...
mesa-17.3.
Author | SHA1 | Date | |
---|---|---|---|
|
8d55da9f57 | ||
|
c4b070d25c | ||
|
30abe7dfae | ||
|
5ac9d91ee3 | ||
|
4eae5b39ee | ||
|
4e84aaa906 | ||
|
bcd4f26b41 | ||
|
447afbf31b | ||
|
09f6bd5ef2 | ||
|
a49b70d2ec | ||
|
f1050f0435 | ||
|
14e528b2db | ||
|
c846d72523 | ||
|
56993f4b8a | ||
|
9b2c27a39e | ||
|
6954eb1a2a | ||
|
b79e15b086 | ||
|
77cba992c3 | ||
|
88fd81d3a3 | ||
|
f768744970 | ||
|
1e908f5035 | ||
|
9777d08e57 | ||
|
aa4b1e71cb | ||
|
3dc6072e3d | ||
|
a34ad6f363 | ||
|
4bbc0f366a | ||
|
86b35a9901 | ||
|
f34c7ba4e1 | ||
|
ebb7ccb306 | ||
|
e4d964670a | ||
|
bb8431aa3e | ||
|
a05879c982 | ||
|
3e639156b8 | ||
|
e7904e1275 | ||
|
210bbf948e | ||
|
9ffe450dab | ||
|
2859a8f298 | ||
|
0904becf94 |
@@ -793,6 +793,7 @@ AC_CHECK_HEADER([xlocale.h], [DEFINES="$DEFINES -DHAVE_XLOCALE_H"])
|
||||
AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
|
||||
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
|
||||
AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
|
||||
AC_CHECK_FUNC([memfd_create], [DEFINES="$DEFINES -DHAVE_MEMFD_CREATE"])
|
||||
|
||||
AC_MSG_CHECKING([whether strtod has locale support])
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[
|
||||
|
@@ -32,11 +32,11 @@ It's the fastest software rasterizer for Mesa.
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
<p>An x86 or amd64 processor; 64-bit mode recommended.</p>
|
||||
<p>For x86 or amd64 processors, 64-bit mode is recommended.</p>
|
||||
<p>
|
||||
Support for SSE2 is strongly encouraged. Support for SSE3 and SSE4.1 will
|
||||
yield the most efficient code. The fewer features the CPU has the more
|
||||
likely is that you run into underperforming, buggy, or incomplete code.
|
||||
likely it is that you will run into underperforming, buggy, or incomplete code.
|
||||
</p>
|
||||
<p>
|
||||
See /proc/cpuinfo to know what your CPU supports.
|
||||
@@ -50,6 +50,12 @@ It's the fastest software rasterizer for Mesa.
|
||||
<pre>
|
||||
aptitude install llvm-dev
|
||||
</pre>
|
||||
<p>
|
||||
If you want development snapshot builds of LLVM for Debian and derived
|
||||
distributions like Ubuntu, you can use the APT repository at <a
|
||||
href="https://apt.llvm.org/" title="Debian Development packages for LLVM"
|
||||
>apt.llvm.org</a>, which are maintained by Debian's LLVM maintainer.
|
||||
</p>
|
||||
<p>
|
||||
For a RPM-based distribution do:
|
||||
</p>
|
||||
@@ -228,8 +234,8 @@ build/linux-???-debug/gallium/drivers/llvmpipe:
|
||||
</ul>
|
||||
|
||||
<p>
|
||||
Some of this tests can output results and benchmarks to a tab-separated-file
|
||||
for posterior analysis, e.g.:
|
||||
Some of these tests can output results and benchmarks to a tab-separated file
|
||||
for later analysis, e.g.:
|
||||
</p>
|
||||
<pre>
|
||||
build/linux-x86_64-debug/gallium/drivers/llvmpipe/lp_test_blend -o blend.tsv
|
||||
@@ -240,8 +246,8 @@ for posterior analysis, e.g.:
|
||||
|
||||
<ul>
|
||||
<li>
|
||||
When looking to this code by the first time start in lp_state_fs.c, and
|
||||
then skim through the lp_bld_* functions called in there, and the comments
|
||||
When looking at this code for the first time, start in lp_state_fs.c, and
|
||||
then skim through the lp_bld_* functions called there, and the comments
|
||||
at the top of the lp_bld_*.c functions.
|
||||
</li>
|
||||
<li>
|
||||
|
@@ -14,7 +14,7 @@
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.3.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 17.3.0 Release Notes / December 8. 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.3.0 is a new development release.
|
||||
@@ -58,14 +58,187 @@ Note: some of the new features are only available with certain drivers.
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
TBD
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97532">Bug 97532</a> - Regression: GLB 2.7 & Glmark-2 GLES versions segfault due to linker precision error (259fc505) on dead variable</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100438">Bug 100438</a> - glsl/ir.cpp:1376: ir_dereference_variable::ir_dereference_variable(ir_variable*): Assertion `var != NULL' failed.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100613">Bug 100613</a> - Regression in Mesa 17 on s390x (zSystems)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101334">Bug 101334</a> - AMD SI cards: Some vulkan apps freeze the system</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101378">Bug 101378</a> - interpolateAtSample check for input parameter is too strict</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101655">Bug 101655</a> - Explicit sync support for android</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101691">Bug 101691</a> - gfx corruption on windowed 3d-apps running on dGPU</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101709">Bug 101709</a> - [llvmpipe] piglit gl-1.0-scissor-offscreen regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101766">Bug 101766</a> - Assertion `!"invalid type"' failed when constant expression involves literal of different type</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101832">Bug 101832</a> - [PATCH][regression][bisect] Xorg fails to start after f50aa21456d82c8cb6fbaa565835f1acc1720a5d</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101851">Bug 101851</a> - [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101867">Bug 101867</a> - Launch options window renders black in Feral Games in current Mesa trunk</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101876">Bug 101876</a> - SIGSEGV when launching Steam</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101910">Bug 101910</a> - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101925">Bug 101925</a> - playstore/webview crash</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101941">Bug 101941</a> - Getting different output depending on attribute declaration order</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101961">Bug 101961</a> - Serious Sam Fusion hangs system completely</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101981">Bug 101981</a> - Commit ddc32537d6db69198e88ef0dfe19770bf9daa536 breaks rendering in multiple applications</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101982">Bug 101982</a> - Weston crashes when running an OpenGL program on i965</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101983">Bug 101983</a> - [G33] ES2-CTS.functional.shaders.struct.uniform.sampler_nested* regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101989">Bug 101989</a> - ES3-CTS.functional.state_query.integers.viewport_getinteger regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102006">Bug 102006</a> - gstreamer vaapih264enc segfault</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102014">Bug 102014</a> - Mesa git build broken by commit bc7f41e11d325280db12e7b9444501357bc13922</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102015">Bug 102015</a> - [Regression,bisected]: Segfaults with various programs</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102024">Bug 102024</a> - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102038">Bug 102038</a> - assertion failure in update_framebuffer_size</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102050">Bug 102050</a> - commit b4f639d02a causes build breakage on Android 32bit builds</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102052">Bug 102052</a> - No package 'expat' found</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102062">Bug 102062</a> - Segfault at eglCreateContext in android-x86</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102125">Bug 102125</a> - [softpipe] piglit arb_texture_view-targets regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102148">Bug 102148</a> - Crash when running qopenglwidget example on mesa llvmpipe win32</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102177">Bug 102177</a> - [SKL] ES31-CTS.core.sepshaderobjs.StateInteraction fails sporadically</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102201">Bug 102201</a> - [regression, SI] GPU crash in Unigine Valley</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102241">Bug 102241</a> - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102274">Bug 102274</a> - assertion failure in ir_validate.cpp:240</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102308">Bug 102308</a> - segfault in glCompressedTextureSubImage3D</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102358">Bug 102358</a> - WarThunder freezes at start, with activated vsync (vblank_mode=2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102377">Bug 102377</a> - PIPE_*_4BYTE_ALIGNED_ONLY caps crashing</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102429">Bug 102429</a> - [regression, SI] Performance decrease in Unigine Valley & Heaven</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102435">Bug 102435</a> - [skl,kbl] [drm] GPU HANG: ecode 9:0:0x86df7cf9, in csgo_linux64 [4947], reason: Hang on rcs, action: reset</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102454">Bug 102454</a> - glibc 2.26 doesn't provide anymore xlocale.h</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102461">Bug 102461</a> - [llvmpipe] piglit glean fragprog1 XPD test 1 regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102467">Bug 102467</a> - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102496">Bug 102496</a> - Frontbuffer rendering corruption on mesa master</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102502">Bug 102502</a> - [bisected] Kodi crashes since commit 707d2e8b - gallium: fold u_trim_pipe_prim call from st/mesa to drivers</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102530">Bug 102530</a> - [bisected] Kodi crashes when launching a stream - commit bd2662bf</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102552">Bug 102552</a> - Null dereference due to not checking return value of util_format_description</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102565">Bug 102565</a> - u_debug_stack.c:114: undefined reference to `_Ux86_64_getcontext'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102573">Bug 102573</a> - fails to build on armel</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102665">Bug 102665</a> - test_glsl_to_tgsi_lifetime.cpp:53:67: error: ‘>>’ should be ‘> >’ within a nested template argument list</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102677">Bug 102677</a> - [OpenGL CTS] KHR-GL45.CommonBugs.CommonBug_PerVertexValidation fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102680">Bug 102680</a> - [OpenGL CTS] KHR-GL45.shader_ballot_tests.ShaderBallotBitmasks fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102685">Bug 102685</a> - piglit.spec.glsl-1_50.compiler.vs-redeclares-pervertex-out-before-global-redeclaration</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102774">Bug 102774</a> - [BDW] [Bisected] Absolute constant buffers break VAAPI in mpv</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102809">Bug 102809</a> - Rust shadows(?) flash random colours</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102844">Bug 102844</a> - memory leak with glDeleteProgram for shader program type GL_COMPUTE_SHADER</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102847">Bug 102847</a> - swr fail to build with llvm-5.0.0</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102852">Bug 102852</a> - Scons: Support the new Scons 3.0.0</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102904">Bug 102904</a> - piglit and gl45 cts linker tests regressed</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102924">Bug 102924</a> - mesa (git version) images too dark</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102940">Bug 102940</a> - Regression: Vulkan KMS rendering crashes since 17.2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102955">Bug 102955</a> - HyperZ related rendering issue in ARK: Survival Evolved</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102999">Bug 102999</a> - [BISECTED,REGRESSION] Failing Android EGL dEQP with RGBA configs</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103002">Bug 103002</a> - string_buffer_test.cpp:43: error: ISO C++ forbids initialization of member ‘str1’</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103085">Bug 103085</a> - [ivb byt hsw] piglit.spec.arb_indirect_parameters.tf-count-arrays</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103098">Bug 103098</a> - [OpenGL CTS] KHR-GL45.enhanced_layouts.varying_structure_locations fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103101">Bug 103101</a> - [SKL][bisected] DiRT Rally GPU hang</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103115">Bug 103115</a> - [BSW BXT GLK] dEQP-VK.spirv_assembly.instruction.compute.sconvert.int32_to_int64</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103128">Bug 103128</a> - [softpipe] piglit fs-ldexp regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103142">Bug 103142</a> - R600g+sb: optimizer apparently stuck in an endless loop</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103214">Bug 103214</a> - GLES CTS functional.state_query.indexed.atomic_counter regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103227">Bug 103227</a> - [G965 G45 ILK] ES2-CTS.gtf.GL2ExtensionTests.texture_float.texture_float regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103247">Bug 103247</a> - Performance regression: car chase, manhattan</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103253">Bug 103253</a> - blob.h:138:1: error: unknown type name 'ssize_t'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103265">Bug 103265</a> - [llvmpipe] piglit depth-tex-compare regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103323">Bug 103323</a> - Possible unintended error message in file pixel.c line 286</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103388">Bug 103388</a> - Linking libcltgsi.la (llvm/codegen/libclllvm_la-common.lo) fails with "error: no match for 'operator-'" with GCC-7, Mesa from Git and current LLVM revisions</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103393">Bug 103393</a> - glDispatchComputeGroupSizeARB : gl_GlobalInvocationID.x != gl_WorkGroupID.x * gl_LocalGroupSizeARB.x + gl_LocalInvocationID.x</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103412">Bug 103412</a> - gallium/wgl: Another fix to context creation without prior SetPixelFormat()</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103519">Bug 103519</a> - wayland egl apps crash on start with mesa 17.2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103529">Bug 103529</a> - [GM45] GPU hang with mpv fullscreen (bisected)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103537">Bug 103537</a> - i965: Shadow of Mordor broken since commit 379b24a40d3d34ffdaaeb1b328f50e28ecb01468 on Haswell</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103544">Bug 103544</a> - Graphical glitches r600 in game this war of mine linux native</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103616">Bug 103616</a> - Increased difference from reference image in shaders</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103628">Bug 103628</a> - [BXT, GLK, BSW] KHR-GL46.shader_ballot_tests.ShaderBallotBitmasks</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103759">Bug 103759</a> - plasma desktop corrupted rendering</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103787">Bug 103787</a> - [BDW,BSW] gpu hang on spec.arb_pipeline_statistics_query.arb_pipeline_statistics_query-comp</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=103909">Bug 103909</a> - anv_allocator.c:113:1: error: static declaration of ‘memfd_create’ follows non-static declaration</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<ul>
|
||||
TBD
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
|
@@ -227,19 +227,28 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
|
||||
val = ((ir_swizzle *)val)->val;
|
||||
}
|
||||
|
||||
while (val->ir_type == ir_type_dereference_array) {
|
||||
val = ((ir_dereference_array *)val)->array;
|
||||
for (;;) {
|
||||
if (val->ir_type == ir_type_dereference_array) {
|
||||
val = ((ir_dereference_array *)val)->array;
|
||||
} else if (val->ir_type == ir_type_dereference_record &&
|
||||
!state->es_shader) {
|
||||
val = ((ir_dereference_record *)val)->record;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
|
||||
if (!val->as_dereference_variable() ||
|
||||
val->variable_referenced()->data.mode != ir_var_shader_in) {
|
||||
ir_variable *var = NULL;
|
||||
if (const ir_dereference_variable *deref_var = val->as_dereference_variable())
|
||||
var = deref_var->variable_referenced();
|
||||
|
||||
if (!var || var->data.mode != ir_var_shader_in) {
|
||||
_mesa_glsl_error(&loc, state,
|
||||
"parameter `%s` must be a shader input",
|
||||
formal->name);
|
||||
return false;
|
||||
}
|
||||
|
||||
val->variable_referenced()->data.must_be_shader_input = 1;
|
||||
var->data.must_be_shader_input = 1;
|
||||
}
|
||||
|
||||
/* Verify that 'out' and 'inout' actual parameters are lvalues. */
|
||||
@@ -667,8 +676,13 @@ generate_array_index(void *mem_ctx, exec_list *instructions,
|
||||
ir_variable *sub_var = NULL;
|
||||
*function_name = array->primary_expression.identifier;
|
||||
|
||||
match_subroutine_by_name(*function_name, actual_parameters,
|
||||
state, &sub_var);
|
||||
if (!match_subroutine_by_name(*function_name, actual_parameters,
|
||||
state, &sub_var)) {
|
||||
_mesa_glsl_error(&loc, state, "Unknown subroutine `%s'",
|
||||
*function_name);
|
||||
*function_name = NULL; /* indicate error condition to caller */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ir_rvalue *outer_array_idx = idx->hir(instructions, state);
|
||||
return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx);
|
||||
|
@@ -1863,6 +1863,49 @@ set_shader_inout_layout(struct gl_shader *shader,
|
||||
shader->bound_image = state->bound_image_specified;
|
||||
}
|
||||
|
||||
/* src can be NULL if only the symbols found in the exec_list should be
|
||||
* copied
|
||||
*/
|
||||
void
|
||||
_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
|
||||
struct glsl_symbol_table *src,
|
||||
struct glsl_symbol_table *dest)
|
||||
{
|
||||
foreach_in_list (ir_instruction, ir, shader_ir) {
|
||||
switch (ir->ir_type) {
|
||||
case ir_type_function:
|
||||
dest->add_function((ir_function *) ir);
|
||||
break;
|
||||
case ir_type_variable: {
|
||||
ir_variable *const var = (ir_variable *) ir;
|
||||
|
||||
if (var->data.mode != ir_var_temporary)
|
||||
dest->add_variable(var);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (src != NULL) {
|
||||
/* Explicitly copy the gl_PerVertex interface definitions because these
|
||||
* are needed to check they are the same during the interstage link.
|
||||
* They can’t necessarily be found via the exec_list because the members
|
||||
* might not be referenced. The GL spec still requires that they match
|
||||
* in that case.
|
||||
*/
|
||||
const glsl_type *iface =
|
||||
src->get_interface("gl_PerVertex", ir_var_shader_in);
|
||||
if (iface)
|
||||
dest->add_interface(iface->name, iface, ir_var_shader_in);
|
||||
|
||||
iface = src->get_interface("gl_PerVertex", ir_var_shader_out);
|
||||
if (iface)
|
||||
dest->add_interface(iface->name, iface, ir_var_shader_out);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
static void
|
||||
@@ -1936,6 +1979,7 @@ do_late_parsing_checks(struct _mesa_glsl_parse_state *state)
|
||||
|
||||
static void
|
||||
opt_shader_and_create_symbol_table(struct gl_context *ctx,
|
||||
struct glsl_symbol_table *source_symbols,
|
||||
struct gl_shader *shader)
|
||||
{
|
||||
assert(shader->CompileStatus != compile_failure &&
|
||||
@@ -1993,22 +2037,8 @@ opt_shader_and_create_symbol_table(struct gl_context *ctx,
|
||||
* We don't have to worry about types or interface-types here because those
|
||||
* are fly-weights that are looked up by glsl_type.
|
||||
*/
|
||||
foreach_in_list (ir_instruction, ir, shader->ir) {
|
||||
switch (ir->ir_type) {
|
||||
case ir_type_function:
|
||||
shader->symbols->add_function((ir_function *) ir);
|
||||
break;
|
||||
case ir_type_variable: {
|
||||
ir_variable *const var = (ir_variable *) ir;
|
||||
|
||||
if (var->data.mode != ir_var_temporary)
|
||||
shader->symbols->add_variable(var);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
_mesa_glsl_copy_symbols_from_table(shader->ir, source_symbols,
|
||||
shader->symbols);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -2045,7 +2075,9 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
|
||||
return;
|
||||
|
||||
if (shader->CompileStatus == compiled_no_opts) {
|
||||
opt_shader_and_create_symbol_table(ctx, shader);
|
||||
opt_shader_and_create_symbol_table(ctx,
|
||||
NULL, /* source_symbols */
|
||||
shader);
|
||||
shader->CompileStatus = compile_success;
|
||||
return;
|
||||
}
|
||||
@@ -2106,7 +2138,7 @@ _mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader,
|
||||
lower_subroutine(shader->ir, state);
|
||||
|
||||
if (!ctx->Cache || force_recompile)
|
||||
opt_shader_and_create_symbol_table(ctx, shader);
|
||||
opt_shader_and_create_symbol_table(ctx, state->symbols, shader);
|
||||
else {
|
||||
reparent_ir(shader->ir, shader->ir);
|
||||
shader->CompileStatus = compiled_no_opts;
|
||||
|
@@ -948,6 +948,11 @@ extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log,
|
||||
extern void _mesa_destroy_shader_compiler(void);
|
||||
extern void _mesa_destroy_shader_compiler_caches(void);
|
||||
|
||||
extern void
|
||||
_mesa_glsl_copy_symbols_from_table(struct exec_list *shader_ir,
|
||||
struct glsl_symbol_table *src,
|
||||
struct glsl_symbol_table *dest);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -364,6 +364,35 @@ validate_interstage_inout_blocks(struct gl_shader_program *prog,
|
||||
consumer->Stage != MESA_SHADER_FRAGMENT) ||
|
||||
consumer->Stage == MESA_SHADER_GEOMETRY;
|
||||
|
||||
/* Check that block re-declarations of gl_PerVertex are compatible
|
||||
* across shaders: From OpenGL Shading Language 4.5, section
|
||||
* "7.1 Built-In Language Variables", page 130 of the PDF:
|
||||
*
|
||||
* "If multiple shaders using members of a built-in block belonging
|
||||
* to the same interface are linked together in the same program,
|
||||
* they must all redeclare the built-in block in the same way, as
|
||||
* described in section 4.3.9 “Interface Blocks” for interface-block
|
||||
* matching, or a link-time error will result."
|
||||
*
|
||||
* This is done explicitly outside of iterating the member variable
|
||||
* declarations because it is possible that the variables are not used and
|
||||
* so they would have been optimised out.
|
||||
*/
|
||||
const glsl_type *consumer_iface =
|
||||
consumer->symbols->get_interface("gl_PerVertex",
|
||||
ir_var_shader_in);
|
||||
|
||||
const glsl_type *producer_iface =
|
||||
producer->symbols->get_interface("gl_PerVertex",
|
||||
ir_var_shader_out);
|
||||
|
||||
if (producer_iface && consumer_iface &&
|
||||
interstage_member_mismatch(prog, consumer_iface, producer_iface)) {
|
||||
linker_error(prog, "Incompatible or missing gl_PerVertex re-declaration "
|
||||
"in consecutive shaders");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Add output interfaces from the producer to the symbol table. */
|
||||
foreach_in_list(ir_instruction, node, producer->ir) {
|
||||
ir_variable *var = node->as_variable();
|
||||
|
@@ -1261,21 +1261,11 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
|
||||
* Populates a shaders symbol table with all global declarations
|
||||
*/
|
||||
static void
|
||||
populate_symbol_table(gl_linked_shader *sh)
|
||||
populate_symbol_table(gl_linked_shader *sh, glsl_symbol_table *symbols)
|
||||
{
|
||||
sh->symbols = new(sh) glsl_symbol_table;
|
||||
|
||||
foreach_in_list(ir_instruction, inst, sh->ir) {
|
||||
ir_variable *var;
|
||||
ir_function *func;
|
||||
|
||||
if ((func = inst->as_function()) != NULL) {
|
||||
sh->symbols->add_function(func);
|
||||
} else if ((var = inst->as_variable()) != NULL) {
|
||||
if (var->data.mode != ir_var_temporary)
|
||||
sh->symbols->add_variable(var);
|
||||
}
|
||||
}
|
||||
_mesa_glsl_copy_symbols_from_table(sh->ir, symbols, sh->symbols);
|
||||
}
|
||||
|
||||
|
||||
@@ -2293,7 +2283,7 @@ link_intrastage_shaders(void *mem_ctx,
|
||||
|
||||
link_bindless_layout_qualifiers(prog, shader_list, num_shaders);
|
||||
|
||||
populate_symbol_table(linked);
|
||||
populate_symbol_table(linked, shader_list[0]->symbols);
|
||||
|
||||
/* The pointer to the main function in the final linked shader (i.e., the
|
||||
* copy of the original shader that contained the main function).
|
||||
|
@@ -115,6 +115,7 @@ public:
|
||||
void run(exec_list *instructions);
|
||||
|
||||
virtual ir_visitor_status visit_leave(ir_assignment *);
|
||||
virtual ir_visitor_status visit_leave(ir_expression *);
|
||||
virtual void handle_rvalue(ir_rvalue **rvalue);
|
||||
};
|
||||
|
||||
@@ -238,6 +239,23 @@ flatten_named_interface_blocks_declarations::visit_leave(ir_assignment *ir)
|
||||
return rvalue_visit(ir);
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
flatten_named_interface_blocks_declarations::visit_leave(ir_expression *ir)
|
||||
{
|
||||
ir_visitor_status status = rvalue_visit(ir);
|
||||
|
||||
if (ir->operation == ir_unop_interpolate_at_centroid ||
|
||||
ir->operation == ir_binop_interpolate_at_offset ||
|
||||
ir->operation == ir_binop_interpolate_at_sample) {
|
||||
const ir_rvalue *val = ir->operands[0];
|
||||
|
||||
/* This disables varying packing for this input. */
|
||||
val->variable_referenced()->data.must_be_shader_input = 1;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void
|
||||
flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue)
|
||||
{
|
||||
|
@@ -128,7 +128,36 @@ ir_vec_index_to_cond_assign_visitor::convert_vector_extract_to_cond_assign(ir_rv
|
||||
{
|
||||
ir_expression *const expr = ir->as_expression();
|
||||
|
||||
if (expr == NULL || expr->operation != ir_binop_vector_extract)
|
||||
if (expr == NULL)
|
||||
return ir;
|
||||
|
||||
if (expr->operation == ir_unop_interpolate_at_centroid ||
|
||||
expr->operation == ir_binop_interpolate_at_offset ||
|
||||
expr->operation == ir_binop_interpolate_at_sample) {
|
||||
/* Lower interpolateAtXxx(some_vec[idx], ...) to
|
||||
* interpolateAtXxx(some_vec, ...)[idx] before lowering to conditional
|
||||
* assignments, to maintain the rule that the interpolant is an l-value
|
||||
* referring to a (part of a) shader input.
|
||||
*
|
||||
* This is required when idx is dynamic (otherwise it gets lowered to
|
||||
* a swizzle).
|
||||
*/
|
||||
ir_expression *const interpolant = expr->operands[0]->as_expression();
|
||||
if (!interpolant || interpolant->operation != ir_binop_vector_extract)
|
||||
return ir;
|
||||
|
||||
ir_rvalue *vec_input = interpolant->operands[0];
|
||||
ir_expression *const vec_interpolate =
|
||||
new(base_ir) ir_expression(expr->operation, vec_input->type,
|
||||
vec_input, expr->operands[1]);
|
||||
|
||||
return convert_vec_index_to_cond_assign(ralloc_parent(ir),
|
||||
vec_interpolate,
|
||||
interpolant->operands[1],
|
||||
ir->type);
|
||||
}
|
||||
|
||||
if (expr->operation != ir_binop_vector_extract)
|
||||
return ir;
|
||||
|
||||
return convert_vec_index_to_cond_assign(ralloc_parent(ir),
|
||||
|
@@ -847,7 +847,7 @@ dd_copy_draw_state(struct dd_draw_state *dst, struct dd_draw_state *src)
|
||||
}
|
||||
|
||||
dst->num_so_targets = src->num_so_targets;
|
||||
for (i = 0; i < ARRAY_SIZE(src->so_targets); i++)
|
||||
for (i = 0; i < src->num_so_targets; i++)
|
||||
pipe_so_target_reference(&dst->so_targets[i], src->so_targets[i]);
|
||||
memcpy(dst->so_offsets, src->so_offsets, sizeof(src->so_offsets));
|
||||
|
||||
|
@@ -71,9 +71,12 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
|
||||
} else if (cfop->flags & CF_CLAUSE) {
|
||||
/* CF_TEX/VTX (CF_ALU already handled above) */
|
||||
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
|
||||
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode) |
|
||||
bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) |
|
||||
S_SQ_CF_WORD1_BARRIER(1) |
|
||||
S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
|
||||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
|
||||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
id++;
|
||||
} else if (cfop->flags & CF_EXP) {
|
||||
/* EXPORT instructions */
|
||||
bc->bytecode[id++] = S_SQ_CF_ALLOC_EXPORT_WORD0_RW_GPR(cf->output.gpr) |
|
||||
@@ -111,12 +114,14 @@ int eg_bytecode_cf_build(struct r600_bytecode *bc, struct r600_bytecode_cf *cf)
|
||||
} else {
|
||||
/* other instructions */
|
||||
bc->bytecode[id++] = S_SQ_CF_WORD0_ADDR(cf->cf_addr >> 1);
|
||||
bc->bytecode[id++] = S_SQ_CF_WORD1_CF_INST(opcode)|
|
||||
bc->bytecode[id] = S_SQ_CF_WORD1_CF_INST(opcode) |
|
||||
S_SQ_CF_WORD1_BARRIER(1) |
|
||||
S_SQ_CF_WORD1_COND(cf->cond) |
|
||||
S_SQ_CF_WORD1_POP_COUNT(cf->pop_count) |
|
||||
S_SQ_CF_WORD1_COUNT(cf->count) |
|
||||
S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
S_SQ_CF_WORD1_COUNT(cf->count);
|
||||
if (bc->chip_class == EVERGREEN) /* no EOP on cayman */
|
||||
bc->bytecode[id] |= S_SQ_CF_ALLOC_EXPORT_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
id++;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
@@ -746,8 +746,9 @@ void evergreen_emit_cs_shader(struct r600_context *rctx,
|
||||
radeon_compute_set_context_reg_seq(cs, R_0288D0_SQ_PGM_START_LS, 3);
|
||||
radeon_emit(cs, va >> 8); /* R_0288D0_SQ_PGM_START_LS */
|
||||
radeon_emit(cs, /* R_0288D4_SQ_PGM_RESOURCES_LS */
|
||||
S_0288D4_NUM_GPRS(ngpr)
|
||||
| S_0288D4_STACK_SIZE(nstack));
|
||||
S_0288D4_NUM_GPRS(ngpr) |
|
||||
S_0288D4_DX10_CLAMP(1) |
|
||||
S_0288D4_STACK_SIZE(nstack));
|
||||
radeon_emit(cs, 0); /* R_0288D8_SQ_PGM_RESOURCES_LS_2 */
|
||||
|
||||
radeon_emit(cs, PKT3C(PKT3_NOP, 0, 0));
|
||||
|
@@ -3232,6 +3232,7 @@ void evergreen_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader
|
||||
r600_store_value(cb, /* R_028844_SQ_PGM_RESOURCES_PS */
|
||||
S_028844_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_028844_PRIME_CACHE_ON_DRAW(1) |
|
||||
S_028844_DX10_CLAMP(1) |
|
||||
S_028844_STACK_SIZE(rshader->bc.nstack));
|
||||
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
|
||||
|
||||
@@ -3252,6 +3253,7 @@ void evergreen_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader
|
||||
|
||||
r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
|
||||
S_028890_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_028890_DX10_CLAMP(1) |
|
||||
S_028890_STACK_SIZE(rshader->bc.nstack));
|
||||
r600_store_context_reg(cb, R_02888C_SQ_PGM_START_ES,
|
||||
shader->bo->gpu_address >> 8);
|
||||
@@ -3314,6 +3316,7 @@ void evergreen_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader
|
||||
|
||||
r600_store_context_reg(cb, R_028878_SQ_PGM_RESOURCES_GS,
|
||||
S_028878_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_028878_DX10_CLAMP(1) |
|
||||
S_028878_STACK_SIZE(rshader->bc.nstack));
|
||||
r600_store_context_reg(cb, R_028874_SQ_PGM_START_GS,
|
||||
shader->bo->gpu_address >> 8);
|
||||
@@ -3354,6 +3357,7 @@ void evergreen_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader
|
||||
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
|
||||
r600_store_context_reg(cb, R_028860_SQ_PGM_RESOURCES_VS,
|
||||
S_028860_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_028860_DX10_CLAMP(1) |
|
||||
S_028860_STACK_SIZE(rshader->bc.nstack));
|
||||
if (rshader->vs_position_window_space) {
|
||||
r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
|
||||
@@ -3388,6 +3392,7 @@ void evergreen_update_hs_state(struct pipe_context *ctx, struct r600_pipe_shader
|
||||
r600_init_command_buffer(cb, 32);
|
||||
r600_store_context_reg(cb, R_0288BC_SQ_PGM_RESOURCES_HS,
|
||||
S_0288BC_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_0288BC_DX10_CLAMP(1) |
|
||||
S_0288BC_STACK_SIZE(rshader->bc.nstack));
|
||||
r600_store_context_reg(cb, R_0288B8_SQ_PGM_START_HS,
|
||||
shader->bo->gpu_address >> 8);
|
||||
@@ -3401,6 +3406,7 @@ void evergreen_update_ls_state(struct pipe_context *ctx, struct r600_pipe_shader
|
||||
r600_init_command_buffer(cb, 32);
|
||||
r600_store_context_reg(cb, R_0288D4_SQ_PGM_RESOURCES_LS,
|
||||
S_0288D4_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_0288D4_DX10_CLAMP(1) |
|
||||
S_0288D4_STACK_SIZE(rshader->bc.nstack));
|
||||
r600_store_context_reg(cb, R_0288D0_SQ_PGM_START_LS,
|
||||
shader->bo->gpu_address >> 8);
|
||||
|
@@ -1625,7 +1625,8 @@ static void r600_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_byt
|
||||
*bytecode++ = S_SQ_CF_WORD0_ADDR(cf->addr >> 1);
|
||||
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R600, cf->op)) |
|
||||
S_SQ_CF_WORD1_BARRIER(1) |
|
||||
S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1);
|
||||
S_SQ_CF_WORD1_COUNT((cf->ndw / 4) - 1)|
|
||||
S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
}
|
||||
|
||||
/* common for r600/r700 - eg in eg_asm.c */
|
||||
@@ -2088,6 +2089,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
|
||||
bc->bytecode[id + 1], cfop->name);
|
||||
fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
|
||||
fprintf(stderr, "\n");
|
||||
if (cf->end_of_program)
|
||||
fprintf(stderr, "EOP ");
|
||||
} else if (cfop->flags & CF_EXP) {
|
||||
int o = 0;
|
||||
const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
|
||||
|
@@ -3660,7 +3660,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
last = r600_isa_cf(ctx.bc->cf_last->op);
|
||||
|
||||
/* alu clause instructions don't have EOP bit, so add NOP */
|
||||
if (!last || last->flags & CF_ALU || ctx.bc->cf_last->op == CF_OP_LOOP_END || ctx.bc->cf_last->op == CF_OP_CALL_FS || ctx.bc->cf_last->op == CF_OP_POP || ctx.bc->cf_last->op == CF_OP_GDS)
|
||||
if (!last || last->flags & CF_ALU)
|
||||
r600_bytecode_add_cfinst(ctx.bc, CF_OP_NOP);
|
||||
|
||||
ctx.bc->cf_last->end_of_program = 1;
|
||||
@@ -8853,8 +8853,9 @@ static const struct r600_shader_tgsi_instruction r600_shader_tgsi_instruction[]
|
||||
[TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
|
||||
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
|
||||
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
|
||||
/* MIN_DX10 returns non-nan result if one src is NaN, MIN returns NaN */
|
||||
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
|
||||
[TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
|
||||
@@ -9051,8 +9052,8 @@ static const struct r600_shader_tgsi_instruction eg_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
|
||||
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
|
||||
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
|
||||
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
|
||||
[TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
|
||||
@@ -9274,8 +9275,8 @@ static const struct r600_shader_tgsi_instruction cm_shader_tgsi_instruction[] =
|
||||
[TGSI_OPCODE_DP3] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[TGSI_OPCODE_DP4] = { ALU_OP2_DOT4_IEEE, tgsi_dp},
|
||||
[TGSI_OPCODE_DST] = { ALU_OP0_NOP, tgsi_opdst},
|
||||
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN, tgsi_op2},
|
||||
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX, tgsi_op2},
|
||||
[TGSI_OPCODE_MIN] = { ALU_OP2_MIN_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_MAX] = { ALU_OP2_MAX_DX10, tgsi_op2},
|
||||
[TGSI_OPCODE_SLT] = { ALU_OP2_SETGT, tgsi_op2_swap},
|
||||
[TGSI_OPCODE_SGE] = { ALU_OP2_SETGE, tgsi_op2},
|
||||
[TGSI_OPCODE_MAD] = { ALU_OP3_MULADD_IEEE, tgsi_op3},
|
||||
|
@@ -2548,6 +2548,12 @@ void r600_update_ps_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
||||
r600_store_context_reg_seq(cb, R_028850_SQ_PGM_RESOURCES_PS, 2);
|
||||
r600_store_value(cb, /* R_028850_SQ_PGM_RESOURCES_PS*/
|
||||
S_028850_NUM_GPRS(rshader->bc.ngpr) |
|
||||
/*
|
||||
* docs are misleading about the dx10_clamp bit. This only affects
|
||||
* instructions using CLAMP dst modifier, in which case they will
|
||||
* return 0 with this set for a NaN (otherwise NaN).
|
||||
*/
|
||||
S_028850_DX10_CLAMP(1) |
|
||||
S_028850_STACK_SIZE(rshader->bc.nstack) |
|
||||
S_028850_UNCACHED_FIRST_INST(ufi));
|
||||
r600_store_value(cb, exports_ps); /* R_028854_SQ_PGM_EXPORTS_PS */
|
||||
@@ -2597,6 +2603,7 @@ void r600_update_vs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
||||
S_0286C4_VS_EXPORT_COUNT(nparams - 1));
|
||||
r600_store_context_reg(cb, R_028868_SQ_PGM_RESOURCES_VS,
|
||||
S_028868_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_028868_DX10_CLAMP(1) |
|
||||
S_028868_STACK_SIZE(rshader->bc.nstack));
|
||||
if (rshader->vs_position_window_space) {
|
||||
r600_store_context_reg(cb, R_028818_PA_CL_VTE_CNTL,
|
||||
@@ -2681,6 +2688,7 @@ void r600_update_gs_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
||||
|
||||
r600_store_context_reg(cb, R_02887C_SQ_PGM_RESOURCES_GS,
|
||||
S_02887C_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_02887C_DX10_CLAMP(1) |
|
||||
S_02887C_STACK_SIZE(rshader->bc.nstack));
|
||||
r600_store_context_reg(cb, R_02886C_SQ_PGM_START_GS, 0);
|
||||
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
|
||||
@@ -2695,6 +2703,7 @@ void r600_update_es_state(struct pipe_context *ctx, struct r600_pipe_shader *sha
|
||||
|
||||
r600_store_context_reg(cb, R_028890_SQ_PGM_RESOURCES_ES,
|
||||
S_028890_NUM_GPRS(rshader->bc.ngpr) |
|
||||
S_028890_DX10_CLAMP(1) |
|
||||
S_028890_STACK_SIZE(rshader->bc.nstack));
|
||||
r600_store_context_reg(cb, R_028880_SQ_PGM_START_ES, 0);
|
||||
/* After that, the NOP relocation packet must be emitted (shader->bo, RADEON_USAGE_READ). */
|
||||
|
@@ -30,7 +30,8 @@ void r700_bytecode_cf_vtx_build(uint32_t *bytecode, const struct r600_bytecode_c
|
||||
*bytecode++ = S_SQ_CF_WORD1_CF_INST(r600_isa_cf_opcode(ISA_CC_R700, cf->op)) |
|
||||
S_SQ_CF_WORD1_BARRIER(1) |
|
||||
S_SQ_CF_WORD1_COUNT(count) |
|
||||
S_SQ_CF_WORD1_COUNT_3(count >> 3);
|
||||
S_SQ_CF_WORD1_COUNT_3(count >> 3)|
|
||||
S_SQ_CF_WORD1_END_OF_PROGRAM(cf->end_of_program);
|
||||
}
|
||||
|
||||
int r700_bytecode_alu_build(struct r600_bytecode *bc, struct r600_bytecode_alu *alu, unsigned id)
|
||||
|
@@ -933,6 +933,11 @@ void bc_finalizer::cf_peephole() {
|
||||
cf_node *c = static_cast<cf_node*>(*I);
|
||||
|
||||
if (c->jump_after_target) {
|
||||
if (c->jump_target->next == NULL) {
|
||||
c->jump_target->insert_after(sh.create_cf(CF_OP_NOP));
|
||||
if (last_cf == c->jump_target)
|
||||
last_cf = static_cast<cf_node*>(c->jump_target->next);
|
||||
}
|
||||
c->jump_target = static_cast<cf_node*>(c->jump_target->next);
|
||||
c->jump_after_target = false;
|
||||
}
|
||||
|
@@ -753,7 +753,9 @@ bool expr_handler::fold_alu_op2(alu_node& n) {
|
||||
n.bc.src[0].abs == n.bc.src[1].abs) {
|
||||
switch (n.bc.op) {
|
||||
case ALU_OP2_MIN: // (MIN x, x) => (MOV x)
|
||||
case ALU_OP2_MIN_DX10:
|
||||
case ALU_OP2_MAX:
|
||||
case ALU_OP2_MAX_DX10:
|
||||
convert_to_mov(n, v0, n.bc.src[0].neg, n.bc.src[0].abs);
|
||||
return fold_alu_op1(n);
|
||||
case ALU_OP2_ADD: // (ADD x, x) => (MUL x, 2)
|
||||
|
@@ -2506,8 +2506,11 @@ void vi_dcc_clear_level(struct r600_common_context *rctx,
|
||||
assert(rtex->resource.b.b.nr_samples <= 1);
|
||||
clear_size = rtex->surface.dcc_size;
|
||||
} else {
|
||||
unsigned num_layers = util_max_layer(&rtex->resource.b.b, level) + 1;
|
||||
|
||||
dcc_offset += rtex->surface.u.legacy.level[level].dcc_offset;
|
||||
clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size;
|
||||
clear_size = rtex->surface.u.legacy.level[level].dcc_fast_clear_size *
|
||||
num_layers;
|
||||
}
|
||||
|
||||
rctx->clear_buffer(&rctx->b, dcc_buffer, dcc_offset, clear_size,
|
||||
|
@@ -5902,11 +5902,13 @@ static void si_get_vs_prolog_key(const struct tgsi_shader_info *info,
|
||||
key->vs_prolog.num_input_sgprs = num_input_sgprs;
|
||||
key->vs_prolog.last_input = MAX2(1, info->num_inputs) - 1;
|
||||
key->vs_prolog.as_ls = shader_out->key.as_ls;
|
||||
key->vs_prolog.as_es = shader_out->key.as_es;
|
||||
|
||||
if (shader_out->selector->type == PIPE_SHADER_TESS_CTRL) {
|
||||
key->vs_prolog.as_ls = 1;
|
||||
key->vs_prolog.num_merged_next_stage_vgprs = 2;
|
||||
} else if (shader_out->selector->type == PIPE_SHADER_GEOMETRY) {
|
||||
key->vs_prolog.as_es = 1;
|
||||
key->vs_prolog.num_merged_next_stage_vgprs = 5;
|
||||
}
|
||||
|
||||
@@ -6787,6 +6789,8 @@ si_get_shader_part(struct si_screen *sscreen,
|
||||
|
||||
switch (type) {
|
||||
case PIPE_SHADER_VERTEX:
|
||||
shader.key.as_ls = key->vs_prolog.as_ls;
|
||||
shader.key.as_es = key->vs_prolog.as_es;
|
||||
break;
|
||||
case PIPE_SHADER_TESS_CTRL:
|
||||
assert(!prolog);
|
||||
@@ -6829,10 +6833,15 @@ out:
|
||||
static LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
|
||||
{
|
||||
LLVMValueRef ptr[2], list;
|
||||
bool is_merged_shader =
|
||||
ctx->screen->b.chip_class >= GFX9 &&
|
||||
(ctx->type == PIPE_SHADER_TESS_CTRL ||
|
||||
ctx->type == PIPE_SHADER_GEOMETRY ||
|
||||
ctx->shader->key.as_ls || ctx->shader->key.as_es);
|
||||
|
||||
/* Get the pointer to rw buffers. */
|
||||
ptr[0] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS);
|
||||
ptr[1] = LLVMGetParam(ctx->main_fn, SI_SGPR_RW_BUFFERS_HI);
|
||||
ptr[0] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
|
||||
ptr[1] = LLVMGetParam(ctx->main_fn, (is_merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS_HI);
|
||||
list = lp_build_gather_values(&ctx->gallivm, ptr, 2);
|
||||
list = LLVMBuildBitCast(ctx->ac.builder, list, ctx->i64, "");
|
||||
list = LLVMBuildIntToPtr(ctx->ac.builder, list,
|
||||
|
@@ -154,9 +154,6 @@ struct nir_shader;
|
||||
|
||||
/* SGPR user data indices */
|
||||
enum {
|
||||
/* GFX9 merged shaders have RW_BUFFERS among the first 8 system SGPRs,
|
||||
* and these two are used for other purposes.
|
||||
*/
|
||||
SI_SGPR_RW_BUFFERS, /* rings (& stream-out, VS only) */
|
||||
SI_SGPR_RW_BUFFERS_HI,
|
||||
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES,
|
||||
@@ -459,6 +456,7 @@ union si_shader_part_key {
|
||||
unsigned num_merged_next_stage_vgprs:3;
|
||||
unsigned last_input:4;
|
||||
unsigned as_ls:1;
|
||||
unsigned as_es:1;
|
||||
/* Prologs for monolithic shaders shouldn't set EXEC. */
|
||||
unsigned is_monolithic:1;
|
||||
} vs_prolog;
|
||||
|
@@ -4624,7 +4624,7 @@ static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
|
||||
/* Return if DCC is enabled. The texture should be set up with it
|
||||
* already.
|
||||
*/
|
||||
if (md->size_metadata >= 11 * 4 &&
|
||||
if (md->size_metadata >= 10 * 4 && /* at least 2(header) + 8(desc) dwords */
|
||||
md->metadata[0] != 0 &&
|
||||
md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
|
||||
G_008F28_COMPRESSION_EN(desc[6])) {
|
||||
|
@@ -132,6 +132,25 @@ DrvCreateLayerContext(HDC hdc, INT iLayerPlane)
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the stw pixel format that most closely matches the pixel format
|
||||
* on HDC.
|
||||
* Used to get a pixel format when SetPixelFormat() hasn't been called before.
|
||||
*/
|
||||
static int
|
||||
get_matching_pixel_format(HDC hdc)
|
||||
{
|
||||
int iPixelFormat = GetPixelFormat(hdc);
|
||||
PIXELFORMATDESCRIPTOR pfd;
|
||||
|
||||
if (!iPixelFormat)
|
||||
return 0;
|
||||
if (!DescribePixelFormat(hdc, iPixelFormat, sizeof(pfd), &pfd))
|
||||
return 0;
|
||||
return stw_pixelformat_choose(hdc, &pfd);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Called via DrvCreateContext(), DrvCreateLayerContext() and
|
||||
* wglCreateContextAttribsARB() to actually create a rendering context.
|
||||
@@ -174,7 +193,7 @@ stw_create_context_attribs(HDC hdc, INT iLayerPlane, DHGLRC hShareContext,
|
||||
* but not all do, and the opengl32 runtime seems to use a default
|
||||
* pixel format in some cases, so use that.
|
||||
*/
|
||||
iPixelFormat = GetPixelFormat(hdc);
|
||||
iPixelFormat = get_matching_pixel_format(hdc);
|
||||
if (!iPixelFormat)
|
||||
return 0;
|
||||
}
|
||||
@@ -458,7 +477,7 @@ stw_make_current(HDC hDrawDC, HDC hReadDC, DHGLRC dhglrc)
|
||||
* pixel format in some cases, so we must create a framebuffer for
|
||||
* those here.
|
||||
*/
|
||||
int iPixelFormat = GetPixelFormat(hDrawDC);
|
||||
int iPixelFormat = get_matching_pixel_format(hDrawDC);
|
||||
if (iPixelFormat)
|
||||
fb = stw_framebuffer_create( hDrawDC, iPixelFormat );
|
||||
if (!fb)
|
||||
|
@@ -2344,7 +2344,7 @@ blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev,
|
||||
*/
|
||||
blorp_surf_convert_to_single_slice(isl_dev, info);
|
||||
|
||||
if (width || height) {
|
||||
if (width && height) {
|
||||
#ifndef NDEBUG
|
||||
uint32_t right_edge_px = info->tile_x_sa + *x + *width;
|
||||
uint32_t bottom_edge_px = info->tile_y_sa + *y + *height;
|
||||
@@ -2357,7 +2357,7 @@ blorp_surf_convert_to_uncompressed(const struct isl_device *isl_dev,
|
||||
*height = DIV_ROUND_UP(*height, fmtl->bh);
|
||||
}
|
||||
|
||||
if (x || y) {
|
||||
if (x && y) {
|
||||
assert(*x % fmtl->bw == 0);
|
||||
assert(*y % fmtl->bh == 0);
|
||||
*x /= fmtl->bw;
|
||||
|
@@ -88,8 +88,14 @@ opt_saturate_propagation_local(fs_visitor *v, bblock_t *block)
|
||||
scan_inst->src[0].negate = !scan_inst->src[0].negate;
|
||||
inst->src[0].negate = false;
|
||||
} else if (scan_inst->opcode == BRW_OPCODE_MAD) {
|
||||
scan_inst->src[0].negate = !scan_inst->src[0].negate;
|
||||
scan_inst->src[1].negate = !scan_inst->src[1].negate;
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (scan_inst->src[i].file == IMM) {
|
||||
brw_negate_immediate(scan_inst->src[i].type,
|
||||
&scan_inst->src[i].as_brw_reg());
|
||||
} else {
|
||||
scan_inst->src[i].negate = !scan_inst->src[i].negate;
|
||||
}
|
||||
}
|
||||
inst->src[0].negate = false;
|
||||
} else if (scan_inst->opcode == BRW_OPCODE_ADD) {
|
||||
if (scan_inst->src[1].file == IMM) {
|
||||
|
@@ -131,11 +131,13 @@ futex_wait(uint32_t *addr, int32_t value)
|
||||
return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
#ifndef HAVE_MEMFD_CREATE
|
||||
static inline int
|
||||
memfd_create(const char *name, unsigned int flags)
|
||||
{
|
||||
return syscall(SYS_memfd_create, name, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint32_t
|
||||
ilog2_round_up(uint32_t value)
|
||||
|
@@ -27,11 +27,13 @@
|
||||
|
||||
#include "anv_private.h"
|
||||
|
||||
#ifndef HAVE_MEMFD_CREATE
|
||||
static inline int
|
||||
memfd_create(const char *name, unsigned int flags)
|
||||
{
|
||||
return syscall(SYS_memfd_create, name, flags);
|
||||
}
|
||||
#endif
|
||||
|
||||
uint32_t
|
||||
anv_gem_create(struct anv_device *device, uint64_t size)
|
||||
|
@@ -58,12 +58,20 @@ class PrintGlTable(gl_XML.gl_print_base):
|
||||
print '#endif'
|
||||
print ''
|
||||
print ''
|
||||
print '#ifdef __cplusplus'
|
||||
print 'extern "C" {'
|
||||
print '#endif'
|
||||
print ''
|
||||
print 'struct _glapi_table'
|
||||
print '{'
|
||||
return
|
||||
|
||||
def printRealFooter(self):
|
||||
print '};'
|
||||
print ''
|
||||
print '#ifdef __cplusplus'
|
||||
print '}'
|
||||
print '#endif'
|
||||
return
|
||||
|
||||
|
||||
|
@@ -1177,8 +1177,8 @@ err:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int
|
||||
brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
|
||||
static void
|
||||
brw_bo_make_external(struct brw_bo *bo)
|
||||
{
|
||||
struct brw_bufmgr *bufmgr = bo->bufmgr;
|
||||
|
||||
@@ -1190,6 +1190,14 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
|
||||
}
|
||||
mtx_unlock(&bufmgr->lock);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
|
||||
{
|
||||
struct brw_bufmgr *bufmgr = bo->bufmgr;
|
||||
|
||||
brw_bo_make_external(bo);
|
||||
|
||||
if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle,
|
||||
DRM_CLOEXEC, prime_fd) != 0)
|
||||
@@ -1200,6 +1208,14 @@ brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t
|
||||
brw_bo_export_gem_handle(struct brw_bo *bo)
|
||||
{
|
||||
brw_bo_make_external(bo);
|
||||
|
||||
return bo->gem_handle;
|
||||
}
|
||||
|
||||
int
|
||||
brw_bo_flink(struct brw_bo *bo, uint32_t *name)
|
||||
{
|
||||
@@ -1213,11 +1229,8 @@ brw_bo_flink(struct brw_bo *bo, uint32_t *name)
|
||||
if (drmIoctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink))
|
||||
return -errno;
|
||||
|
||||
brw_bo_make_external(bo);
|
||||
mtx_lock(&bufmgr->lock);
|
||||
if (!bo->external) {
|
||||
_mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo);
|
||||
bo->external = true;
|
||||
}
|
||||
if (!bo->global_name) {
|
||||
bo->global_name = flink.name;
|
||||
_mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo);
|
||||
|
@@ -337,6 +337,8 @@ int brw_bo_gem_export_to_prime(struct brw_bo *bo, int *prime_fd);
|
||||
struct brw_bo *brw_bo_gem_create_from_prime(struct brw_bufmgr *bufmgr,
|
||||
int prime_fd);
|
||||
|
||||
uint32_t brw_bo_export_gem_handle(struct brw_bo *bo);
|
||||
|
||||
int brw_reg_read(struct brw_bufmgr *bufmgr, uint32_t offset,
|
||||
uint64_t *result);
|
||||
|
||||
|
@@ -442,23 +442,26 @@ struct brw_reloc_list {
|
||||
int reloc_array_size;
|
||||
};
|
||||
|
||||
struct brw_growing_bo {
|
||||
struct brw_bo *bo;
|
||||
uint32_t *map;
|
||||
uint32_t *cpu_map;
|
||||
};
|
||||
|
||||
struct intel_batchbuffer {
|
||||
/** Current batchbuffer being queued up. */
|
||||
struct brw_bo *bo;
|
||||
/** Last BO submitted to the hardware. Used for glFinish(). */
|
||||
struct brw_bo *last_bo;
|
||||
struct brw_growing_bo batch;
|
||||
/** Current statebuffer being queued up. */
|
||||
struct brw_bo *state_bo;
|
||||
struct brw_growing_bo state;
|
||||
|
||||
/** Last batchbuffer submitted to the hardware. Used for glFinish(). */
|
||||
struct brw_bo *last_bo;
|
||||
|
||||
#ifdef DEBUG
|
||||
uint16_t emit, total;
|
||||
#endif
|
||||
uint16_t reserved_space;
|
||||
uint32_t *map_next;
|
||||
uint32_t *map;
|
||||
uint32_t *batch_cpu_map;
|
||||
uint32_t *state_cpu_map;
|
||||
uint32_t *state_map;
|
||||
uint32_t state_used;
|
||||
|
||||
enum brw_gpu_ring ring;
|
||||
|
@@ -293,6 +293,17 @@ brw_is_color_fast_clear_compatible(struct brw_context *brw,
|
||||
brw->mesa_to_isl_render_format[mt->format])
|
||||
return false;
|
||||
|
||||
/* Gen9 doesn't support fast clear on single-sampled SRGB buffers. When
|
||||
* GL_FRAMEBUFFER_SRGB is enabled any color renderbuffers will be
|
||||
* resolved in intel_update_state. In that case it's pointless to do a
|
||||
* fast clear because it's very likely to be immediately resolved.
|
||||
*/
|
||||
if (devinfo->gen >= 9 &&
|
||||
mt->surf.samples == 1 &&
|
||||
ctx->Color.sRGBEnabled &&
|
||||
_mesa_get_srgb_format_linear(mt->format) != mt->format)
|
||||
return false;
|
||||
|
||||
const mesa_format format = _mesa_get_render_format(ctx, mt->format);
|
||||
if (_mesa_is_format_integer_color(format)) {
|
||||
if (devinfo->gen >= 8) {
|
||||
|
@@ -65,15 +65,15 @@ upload_pipelined_state_pointers(struct brw_context *brw)
|
||||
|
||||
BEGIN_BATCH(7);
|
||||
OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2));
|
||||
OUT_RELOC(brw->batch.state_bo, 0, brw->vs.base.state_offset);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->vs.base.state_offset);
|
||||
if (brw->ff_gs.prog_active)
|
||||
OUT_RELOC(brw->batch.state_bo, 0, brw->ff_gs.state_offset | 1);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->ff_gs.state_offset | 1);
|
||||
else
|
||||
OUT_BATCH(0);
|
||||
OUT_RELOC(brw->batch.state_bo, 0, brw->clip.state_offset | 1);
|
||||
OUT_RELOC(brw->batch.state_bo, 0, brw->sf.state_offset);
|
||||
OUT_RELOC(brw->batch.state_bo, 0, brw->wm.base.state_offset);
|
||||
OUT_RELOC(brw->batch.state_bo, 0, brw->cc.state_offset);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->clip.state_offset | 1);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->sf.state_offset);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->wm.base.state_offset);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, brw->cc.state_offset);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_PSP;
|
||||
@@ -629,9 +629,9 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(mocs_wb << 16);
|
||||
/* Surface state base address: */
|
||||
OUT_RELOC64(brw->batch.state_bo, 0, mocs_wb << 4 | 1);
|
||||
OUT_RELOC64(brw->batch.state.bo, 0, mocs_wb << 4 | 1);
|
||||
/* Dynamic state base address: */
|
||||
OUT_RELOC64(brw->batch.state_bo, 0, mocs_wb << 4 | 1);
|
||||
OUT_RELOC64(brw->batch.state.bo, 0, mocs_wb << 4 | 1);
|
||||
/* Indirect object base address: MEDIA_OBJECT data */
|
||||
OUT_BATCH(mocs_wb << 4 | 1);
|
||||
OUT_BATCH(0);
|
||||
@@ -641,7 +641,7 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
/* General state buffer size */
|
||||
OUT_BATCH(0xfffff001);
|
||||
/* Dynamic state buffer size */
|
||||
OUT_BATCH(ALIGN(brw->batch.state_bo->size, 4096) | 1);
|
||||
OUT_BATCH(ALIGN(MAX_STATE_SIZE, 4096) | 1);
|
||||
/* Indirect object upper bound */
|
||||
OUT_BATCH(0xfffff001);
|
||||
/* Instruction access upper bound */
|
||||
@@ -664,7 +664,7 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
* BINDING_TABLE_STATE
|
||||
* SURFACE_STATE
|
||||
*/
|
||||
OUT_RELOC(brw->batch.state_bo, 0, 1);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1);
|
||||
/* Dynamic state base address:
|
||||
* SAMPLER_STATE
|
||||
* SAMPLER_BORDER_COLOR_STATE
|
||||
@@ -675,7 +675,7 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
* Push constants (when INSTPM: CONSTANT_BUFFER Address Offset
|
||||
* Disable is clear, which we rely on)
|
||||
*/
|
||||
OUT_RELOC(brw->batch.state_bo, 0, 1);
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1);
|
||||
|
||||
OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */
|
||||
|
||||
@@ -696,7 +696,7 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
BEGIN_BATCH(8);
|
||||
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2));
|
||||
OUT_BATCH(1); /* General state base address */
|
||||
OUT_RELOC(brw->batch.state_bo, 0, 1); /* Surface state base address */
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */
|
||||
OUT_BATCH(1); /* Indirect object base address */
|
||||
OUT_RELOC(brw->cache.bo, 0, 1); /* Instruction base address */
|
||||
OUT_BATCH(0xfffff001); /* General state upper bound */
|
||||
@@ -707,7 +707,7 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2));
|
||||
OUT_BATCH(1); /* General state base address */
|
||||
OUT_RELOC(brw->batch.state_bo, 0, 1); /* Surface state base address */
|
||||
OUT_RELOC(brw->batch.state.bo, 0, 1); /* Surface state base address */
|
||||
OUT_BATCH(1); /* Indirect object base address */
|
||||
OUT_BATCH(1); /* General state upper bound */
|
||||
OUT_BATCH(1); /* Indirect object upper bound */
|
||||
|
@@ -146,7 +146,7 @@ brw_fence_insert_locked(struct brw_context *brw, struct brw_fence *fence)
|
||||
assert(!fence->batch_bo);
|
||||
assert(!fence->signalled);
|
||||
|
||||
fence->batch_bo = brw->batch.bo;
|
||||
fence->batch_bo = brw->batch.batch.bo;
|
||||
brw_bo_reference(fence->batch_bo);
|
||||
|
||||
if (intel_batchbuffer_flush(brw) < 0) {
|
||||
|
@@ -28,7 +28,7 @@ dynamic_state_address(struct blorp_batch *batch, uint32_t offset)
|
||||
struct brw_context *brw = batch->driver_batch;
|
||||
|
||||
return (struct blorp_address) {
|
||||
.buffer = brw->batch.state_bo,
|
||||
.buffer = brw->batch.state.bo,
|
||||
.offset = offset,
|
||||
};
|
||||
}
|
||||
|
@@ -60,7 +60,7 @@ blorp_emit_reloc(struct blorp_batch *batch,
|
||||
uint32_t offset;
|
||||
|
||||
if (GEN_GEN < 6 && brw_ptr_in_state_buffer(&brw->batch, location)) {
|
||||
offset = (char *)location - (char *)brw->batch.state_map;
|
||||
offset = (char *)location - (char *)brw->batch.state.map;
|
||||
return brw_state_reloc(&brw->batch, offset,
|
||||
address.buffer, address.offset + delta,
|
||||
address.reloc_flags);
|
||||
@@ -68,7 +68,7 @@ blorp_emit_reloc(struct blorp_batch *batch,
|
||||
|
||||
assert(!brw_ptr_in_state_buffer(&brw->batch, location));
|
||||
|
||||
offset = (char *)location - (char *)brw->batch.map;
|
||||
offset = (char *)location - (char *)brw->batch.batch.map;
|
||||
return brw_batch_reloc(&brw->batch, offset,
|
||||
address.buffer, address.offset + delta,
|
||||
address.reloc_flags);
|
||||
@@ -86,7 +86,7 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t ss_offset,
|
||||
brw_state_reloc(&brw->batch, ss_offset, bo, address.offset + delta,
|
||||
address.reloc_flags);
|
||||
|
||||
void *reloc_ptr = (void *)brw->batch.state_map + ss_offset;
|
||||
void *reloc_ptr = (void *)brw->batch.state.map + ss_offset;
|
||||
#if GEN_GEN >= 8
|
||||
*(uint64_t *)reloc_ptr = reloc_val;
|
||||
#else
|
||||
@@ -150,7 +150,7 @@ blorp_alloc_vertex_buffer(struct blorp_batch *batch, uint32_t size,
|
||||
void *data = brw_state_batch(brw, size, 64, &offset);
|
||||
|
||||
*addr = (struct blorp_address) {
|
||||
.buffer = brw->batch.state_bo,
|
||||
.buffer = brw->batch.state.bo,
|
||||
.offset = offset,
|
||||
|
||||
#if GEN_GEN == 10
|
||||
|
@@ -89,7 +89,7 @@ __gen_combine_address(struct brw_context *brw, void *location,
|
||||
return address.offset + delta;
|
||||
} else {
|
||||
if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) {
|
||||
offset = (char *) location - (char *) brw->batch.state_map;
|
||||
offset = (char *) location - (char *) brw->batch.state.map;
|
||||
return brw_state_reloc(batch, offset, address.bo,
|
||||
address.offset + delta,
|
||||
address.reloc_flags);
|
||||
@@ -97,7 +97,7 @@ __gen_combine_address(struct brw_context *brw, void *location,
|
||||
|
||||
assert(!brw_ptr_in_state_buffer(batch, location));
|
||||
|
||||
offset = (char *) location - (char *) brw->batch.map;
|
||||
offset = (char *) location - (char *) brw->batch.batch.map;
|
||||
return brw_batch_reloc(batch, offset, address.bo,
|
||||
address.offset + delta,
|
||||
address.reloc_flags);
|
||||
@@ -1279,7 +1279,7 @@ genX(upload_clip_state)(struct brw_context *brw)
|
||||
clip.GuardbandClipTestEnable = true;
|
||||
|
||||
clip.ClipperViewportStatePointer =
|
||||
ro_bo(brw->batch.state_bo, brw->clip.vp_offset);
|
||||
ro_bo(brw->batch.state.bo, brw->clip.vp_offset);
|
||||
|
||||
clip.ScreenSpaceViewportXMin = -1;
|
||||
clip.ScreenSpaceViewportXMax = 1;
|
||||
@@ -1496,7 +1496,7 @@ genX(upload_sf)(struct brw_context *brw)
|
||||
* domain.
|
||||
*/
|
||||
sf.SetupViewportStateOffset =
|
||||
ro_bo(brw->batch.state_bo, brw->sf.vp_offset);
|
||||
ro_bo(brw->batch.state.bo, brw->sf.vp_offset);
|
||||
|
||||
sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
|
||||
|
||||
@@ -1789,7 +1789,7 @@ genX(upload_wm)(struct brw_context *brw)
|
||||
|
||||
if (stage_state->sampler_count)
|
||||
wm.SamplerStatePointer =
|
||||
ro_bo(brw->batch.state_bo, stage_state->sampler_offset);
|
||||
ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
|
||||
#if GEN_GEN == 5
|
||||
if (wm_prog_data->prog_offset_2)
|
||||
wm.GRFRegisterCount2 = wm_prog_data->reg_blocks_2;
|
||||
@@ -2082,7 +2082,7 @@ genX(upload_vs_state)(struct brw_context *brw)
|
||||
|
||||
vs.StatisticsEnable = false;
|
||||
vs.SamplerStatePointer =
|
||||
ro_bo(brw->batch.state_bo, stage_state->sampler_offset);
|
||||
ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
|
||||
#endif
|
||||
|
||||
#if GEN_GEN == 5
|
||||
@@ -3331,7 +3331,7 @@ genX(upload_color_calc_state)(struct brw_context *brw)
|
||||
cc.StatisticsEnable = brw->stats_wm;
|
||||
|
||||
cc.CCViewportStatePointer =
|
||||
ro_bo(brw->batch.state_bo, brw->cc.vp_offset);
|
||||
ro_bo(brw->batch.state.bo, brw->cc.vp_offset);
|
||||
#else
|
||||
/* _NEW_COLOR */
|
||||
cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
|
||||
@@ -4332,7 +4332,7 @@ genX(upload_raster)(struct brw_context *brw)
|
||||
raster.CullMode = CULLMODE_NONE;
|
||||
}
|
||||
|
||||
point->SmoothFlag = raster.SmoothPointEnable;
|
||||
raster.SmoothPointEnable = point->SmoothFlag;
|
||||
|
||||
raster.DXMultisampleRasterizationEnable =
|
||||
_mesa_is_multisample_enabled(ctx);
|
||||
@@ -5083,7 +5083,7 @@ genX(update_sampler_state)(struct brw_context *brw,
|
||||
}
|
||||
#if GEN_GEN < 6
|
||||
samp_st.BorderColorPointer =
|
||||
ro_bo(brw->batch.state_bo, border_color_offset);
|
||||
ro_bo(brw->batch.state.bo, border_color_offset);
|
||||
#else
|
||||
samp_st.BorderColorPointer = border_color_offset;
|
||||
#endif
|
||||
|
@@ -52,15 +52,6 @@
|
||||
#define BATCH_SZ (20 * 1024)
|
||||
#define STATE_SZ (16 * 1024)
|
||||
|
||||
/* The kernel assumes batchbuffers are smaller than 256kB. */
|
||||
#define MAX_BATCH_SIZE (256 * 1024)
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
|
||||
* Address, which means that we can't put binding tables beyond 64kB. This
|
||||
* effectively limits the maximum statebuffer size to 64kB.
|
||||
*/
|
||||
#define MAX_STATE_SIZE (64 * 1024)
|
||||
|
||||
static void
|
||||
intel_batchbuffer_reset(struct brw_context *brw);
|
||||
|
||||
@@ -93,11 +84,11 @@ intel_batchbuffer_init(struct brw_context *brw)
|
||||
const struct gen_device_info *devinfo = &screen->devinfo;
|
||||
|
||||
if (!devinfo->has_llc) {
|
||||
batch->batch_cpu_map = malloc(BATCH_SZ);
|
||||
batch->map = batch->batch_cpu_map;
|
||||
batch->map_next = batch->map;
|
||||
batch->state_cpu_map = malloc(STATE_SZ);
|
||||
batch->state_map = batch->state_cpu_map;
|
||||
batch->batch.cpu_map = malloc(BATCH_SZ);
|
||||
batch->batch.map = batch->batch.cpu_map;
|
||||
batch->map_next = batch->batch.map;
|
||||
batch->state.cpu_map = malloc(STATE_SZ);
|
||||
batch->state.map = batch->state.cpu_map;
|
||||
}
|
||||
|
||||
init_reloc_list(&batch->batch_relocs, 250);
|
||||
@@ -180,20 +171,21 @@ intel_batchbuffer_reset(struct brw_context *brw)
|
||||
brw_bo_unreference(batch->last_bo);
|
||||
batch->last_bo = NULL;
|
||||
}
|
||||
batch->last_bo = batch->bo;
|
||||
batch->last_bo = batch->batch.bo;
|
||||
|
||||
batch->bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096);
|
||||
if (!batch->batch_cpu_map) {
|
||||
batch->map = brw_bo_map(brw, batch->bo, MAP_READ | MAP_WRITE);
|
||||
batch->batch.bo = brw_bo_alloc(bufmgr, "batchbuffer", BATCH_SZ, 4096);
|
||||
if (!batch->batch.cpu_map) {
|
||||
batch->batch.map =
|
||||
brw_bo_map(brw, batch->batch.bo, MAP_READ | MAP_WRITE);
|
||||
}
|
||||
batch->map_next = batch->map;
|
||||
batch->map_next = batch->batch.map;
|
||||
|
||||
batch->state_bo = brw_bo_alloc(bufmgr, "statebuffer", STATE_SZ, 4096);
|
||||
batch->state_bo->kflags =
|
||||
batch->state.bo = brw_bo_alloc(bufmgr, "statebuffer", STATE_SZ, 4096);
|
||||
batch->state.bo->kflags =
|
||||
can_do_exec_capture(screen) ? EXEC_OBJECT_CAPTURE : 0;
|
||||
if (!batch->state_cpu_map) {
|
||||
batch->state_map =
|
||||
brw_bo_map(brw, batch->state_bo, MAP_READ | MAP_WRITE);
|
||||
if (!batch->state.cpu_map) {
|
||||
batch->state.map =
|
||||
brw_bo_map(brw, batch->state.bo, MAP_READ | MAP_WRITE);
|
||||
}
|
||||
|
||||
/* Avoid making 0 a valid state offset - otherwise the decoder will try
|
||||
@@ -201,8 +193,8 @@ intel_batchbuffer_reset(struct brw_context *brw)
|
||||
*/
|
||||
batch->state_used = 1;
|
||||
|
||||
add_exec_bo(batch, batch->bo);
|
||||
assert(batch->bo->index == 0);
|
||||
add_exec_bo(batch, batch->batch.bo);
|
||||
assert(batch->batch.bo->index == 0);
|
||||
|
||||
batch->needs_sol_reset = false;
|
||||
batch->state_base_address_emitted = false;
|
||||
@@ -251,8 +243,8 @@ intel_batchbuffer_reset_to_saved(struct brw_context *brw)
|
||||
void
|
||||
intel_batchbuffer_free(struct intel_batchbuffer *batch)
|
||||
{
|
||||
free(batch->batch_cpu_map);
|
||||
free(batch->state_cpu_map);
|
||||
free(batch->batch.cpu_map);
|
||||
free(batch->state.cpu_map);
|
||||
|
||||
for (int i = 0; i < batch->exec_count; i++) {
|
||||
brw_bo_unreference(batch->exec_bos[i]);
|
||||
@@ -263,8 +255,8 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
|
||||
free(batch->validation_list);
|
||||
|
||||
brw_bo_unreference(batch->last_bo);
|
||||
brw_bo_unreference(batch->bo);
|
||||
brw_bo_unreference(batch->state_bo);
|
||||
brw_bo_unreference(batch->batch.bo);
|
||||
brw_bo_unreference(batch->state.bo);
|
||||
if (batch->state_batch_sizes)
|
||||
_mesa_hash_table_destroy(batch->state_batch_sizes, NULL);
|
||||
}
|
||||
@@ -302,7 +294,8 @@ grow_buffer(struct brw_context *brw,
|
||||
uint32_t *old_map = *map_ptr;
|
||||
struct brw_bo *old_bo = *bo_ptr;
|
||||
|
||||
struct brw_bo *new_bo = brw_bo_alloc(bufmgr, old_bo->name, new_size, 4096);
|
||||
struct brw_bo *new_bo =
|
||||
brw_bo_alloc(bufmgr, old_bo->name, new_size, old_bo->align);
|
||||
uint32_t *new_map;
|
||||
|
||||
perf_debug("Growing %s - ran out of space\n", old_bo->name);
|
||||
@@ -321,9 +314,12 @@ grow_buffer(struct brw_context *brw,
|
||||
* This guarantees that our relocations continue to work: values we've
|
||||
* already written into the buffer, values we're going to write into the
|
||||
* buffer, and the validation/relocation lists all will match.
|
||||
*
|
||||
* Also preserve kflags for EXEC_OBJECT_CAPTURE.
|
||||
*/
|
||||
new_bo->gtt_offset = old_bo->gtt_offset;
|
||||
new_bo->index = old_bo->index;
|
||||
new_bo->kflags = old_bo->kflags;
|
||||
|
||||
/* Batch/state buffers are per-context, and if we've run out of space,
|
||||
* we must have actually used them before, so...they will be in the list.
|
||||
@@ -370,17 +366,16 @@ intel_batchbuffer_require_space(struct brw_context *brw, GLuint sz,
|
||||
}
|
||||
|
||||
const unsigned batch_used = USED_BATCH(*batch) * 4;
|
||||
if (batch_used + sz >= BATCH_SZ) {
|
||||
if (!batch->no_wrap) {
|
||||
intel_batchbuffer_flush(brw);
|
||||
} else {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->bo->size + batch->bo->size / 2, MAX_BATCH_SIZE);
|
||||
grow_buffer(brw, &batch->bo, &batch->map, &batch->batch_cpu_map,
|
||||
batch_used, new_size);
|
||||
batch->map_next = (void *) batch->map + batch_used;
|
||||
assert(batch_used + sz < batch->bo->size);
|
||||
}
|
||||
if (batch_used + sz >= BATCH_SZ && !batch->no_wrap) {
|
||||
intel_batchbuffer_flush(brw);
|
||||
} else if (batch_used + sz >= batch->batch.bo->size) {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->batch.bo->size + batch->batch.bo->size / 2,
|
||||
MAX_BATCH_SIZE);
|
||||
grow_buffer(brw, &batch->batch.bo, &batch->batch.map,
|
||||
&batch->batch.cpu_map, batch_used, new_size);
|
||||
batch->map_next = (void *) batch->batch.map + batch_used;
|
||||
assert(batch_used + sz < batch->batch.bo->size);
|
||||
}
|
||||
|
||||
/* The intel_batchbuffer_flush() calls above might have changed
|
||||
@@ -437,16 +432,16 @@ do_batch_dump(struct brw_context *brw)
|
||||
if (batch->ring != RENDER_RING)
|
||||
return;
|
||||
|
||||
uint32_t *batch_data = brw_bo_map(brw, batch->bo, MAP_READ);
|
||||
uint32_t *state = brw_bo_map(brw, batch->state_bo, MAP_READ);
|
||||
uint32_t *batch_data = brw_bo_map(brw, batch->batch.bo, MAP_READ);
|
||||
uint32_t *state = brw_bo_map(brw, batch->state.bo, MAP_READ);
|
||||
if (batch_data == NULL || state == NULL) {
|
||||
fprintf(stderr, "WARNING: failed to map batchbuffer/statebuffer\n");
|
||||
return;
|
||||
}
|
||||
|
||||
uint32_t *end = batch_data + USED_BATCH(*batch);
|
||||
uint32_t batch_gtt_offset = batch->bo->gtt_offset;
|
||||
uint32_t state_gtt_offset = batch->state_bo->gtt_offset;
|
||||
uint32_t batch_gtt_offset = batch->batch.bo->gtt_offset;
|
||||
uint32_t state_gtt_offset = batch->state.bo->gtt_offset;
|
||||
int length;
|
||||
|
||||
bool color = INTEL_DEBUG & DEBUG_COLOR;
|
||||
@@ -567,8 +562,8 @@ do_batch_dump(struct brw_context *brw)
|
||||
}
|
||||
}
|
||||
|
||||
brw_bo_unmap(batch->bo);
|
||||
brw_bo_unmap(batch->state_bo);
|
||||
brw_bo_unmap(batch->batch.bo);
|
||||
brw_bo_unmap(batch->state.bo);
|
||||
}
|
||||
#else
|
||||
static void do_batch_dump(struct brw_context *brw) { }
|
||||
@@ -590,7 +585,7 @@ brw_new_batch(struct brw_context *brw)
|
||||
brw->batch.exec_count = 0;
|
||||
brw->batch.aperture_space = 0;
|
||||
|
||||
brw_bo_unreference(brw->batch.state_bo);
|
||||
brw_bo_unreference(brw->batch.state.bo);
|
||||
|
||||
/* Create a new batchbuffer and reset the associated state: */
|
||||
intel_batchbuffer_reset_and_clear_render_cache(brw);
|
||||
@@ -786,18 +781,18 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
|
||||
struct intel_batchbuffer *batch = &brw->batch;
|
||||
int ret = 0;
|
||||
|
||||
if (batch->batch_cpu_map) {
|
||||
void *bo_map = brw_bo_map(brw, batch->bo, MAP_WRITE);
|
||||
memcpy(bo_map, batch->batch_cpu_map, 4 * USED_BATCH(*batch));
|
||||
if (batch->batch.cpu_map) {
|
||||
void *bo_map = brw_bo_map(brw, batch->batch.bo, MAP_WRITE);
|
||||
memcpy(bo_map, batch->batch.cpu_map, 4 * USED_BATCH(*batch));
|
||||
}
|
||||
|
||||
if (batch->state_cpu_map) {
|
||||
void *bo_map = brw_bo_map(brw, batch->state_bo, MAP_WRITE);
|
||||
memcpy(bo_map, batch->state_cpu_map, batch->state_used);
|
||||
if (batch->state.cpu_map) {
|
||||
void *bo_map = brw_bo_map(brw, batch->state.bo, MAP_WRITE);
|
||||
memcpy(bo_map, batch->state.cpu_map, batch->state_used);
|
||||
}
|
||||
|
||||
brw_bo_unmap(batch->bo);
|
||||
brw_bo_unmap(batch->state_bo);
|
||||
brw_bo_unmap(batch->batch.bo);
|
||||
brw_bo_unmap(batch->state.bo);
|
||||
|
||||
if (!brw->screen->no_hw) {
|
||||
/* The requirement for using I915_EXEC_NO_RELOC are:
|
||||
@@ -825,19 +820,19 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
|
||||
uint32_t hw_ctx = batch->ring == RENDER_RING ? brw->hw_ctx : 0;
|
||||
|
||||
/* Set statebuffer relocations */
|
||||
const unsigned state_index = batch->state_bo->index;
|
||||
const unsigned state_index = batch->state.bo->index;
|
||||
if (state_index < batch->exec_count &&
|
||||
batch->exec_bos[state_index] == batch->state_bo) {
|
||||
batch->exec_bos[state_index] == batch->state.bo) {
|
||||
struct drm_i915_gem_exec_object2 *entry =
|
||||
&batch->validation_list[state_index];
|
||||
assert(entry->handle == batch->state_bo->gem_handle);
|
||||
assert(entry->handle == batch->state.bo->gem_handle);
|
||||
entry->relocation_count = batch->state_relocs.reloc_count;
|
||||
entry->relocs_ptr = (uintptr_t) batch->state_relocs.relocs;
|
||||
}
|
||||
|
||||
/* Set batchbuffer relocations */
|
||||
struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
|
||||
assert(entry->handle == batch->bo->gem_handle);
|
||||
assert(entry->handle == batch->batch.bo->gem_handle);
|
||||
entry->relocation_count = batch->batch_relocs.reloc_count;
|
||||
entry->relocs_ptr = (uintptr_t) batch->batch_relocs.relocs;
|
||||
|
||||
@@ -899,7 +894,7 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
|
||||
intel_upload_finish(brw);
|
||||
|
||||
if (brw->throttle_batch[0] == NULL) {
|
||||
brw->throttle_batch[0] = brw->batch.bo;
|
||||
brw->throttle_batch[0] = brw->batch.batch.bo;
|
||||
brw_bo_reference(brw->throttle_batch[0]);
|
||||
}
|
||||
|
||||
@@ -921,7 +916,7 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
|
||||
fprintf(stderr, "waiting for idle\n");
|
||||
brw_bo_wait_rendering(brw->batch.bo);
|
||||
brw_bo_wait_rendering(brw->batch.batch.bo);
|
||||
}
|
||||
|
||||
/* Start a new batch buffer. */
|
||||
@@ -994,7 +989,7 @@ brw_batch_reloc(struct intel_batchbuffer *batch, uint32_t batch_offset,
|
||||
struct brw_bo *target, uint32_t target_offset,
|
||||
unsigned int reloc_flags)
|
||||
{
|
||||
assert(batch_offset <= batch->bo->size - sizeof(uint32_t));
|
||||
assert(batch_offset <= batch->batch.bo->size - sizeof(uint32_t));
|
||||
|
||||
return emit_reloc(batch, &batch->batch_relocs, batch_offset,
|
||||
target, target_offset, reloc_flags);
|
||||
@@ -1005,7 +1000,7 @@ brw_state_reloc(struct intel_batchbuffer *batch, uint32_t state_offset,
|
||||
struct brw_bo *target, uint32_t target_offset,
|
||||
unsigned int reloc_flags)
|
||||
{
|
||||
assert(state_offset <= batch->state_bo->size - sizeof(uint32_t));
|
||||
assert(state_offset <= batch->state.bo->size - sizeof(uint32_t));
|
||||
|
||||
return emit_reloc(batch, &batch->state_relocs, state_offset,
|
||||
target, target_offset, reloc_flags);
|
||||
@@ -1045,22 +1040,20 @@ brw_state_batch(struct brw_context *brw,
|
||||
{
|
||||
struct intel_batchbuffer *batch = &brw->batch;
|
||||
|
||||
assert(size < batch->bo->size);
|
||||
assert(size < batch->state.bo->size);
|
||||
|
||||
uint32_t offset = ALIGN(batch->state_used, alignment);
|
||||
|
||||
if (offset + size >= STATE_SZ) {
|
||||
if (!batch->no_wrap) {
|
||||
intel_batchbuffer_flush(brw);
|
||||
offset = ALIGN(batch->state_used, alignment);
|
||||
} else {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->state_bo->size + batch->state_bo->size / 2,
|
||||
MAX_STATE_SIZE);
|
||||
grow_buffer(brw, &batch->state_bo, &batch->state_map,
|
||||
&batch->state_cpu_map, batch->state_used, new_size);
|
||||
assert(offset + size < batch->state_bo->size);
|
||||
}
|
||||
if (offset + size >= STATE_SZ && !batch->no_wrap) {
|
||||
intel_batchbuffer_flush(brw);
|
||||
offset = ALIGN(batch->state_used, alignment);
|
||||
} else if (offset + size >= batch->state.bo->size) {
|
||||
const unsigned new_size =
|
||||
MIN2(batch->state.bo->size + batch->state.bo->size / 2,
|
||||
MAX_STATE_SIZE);
|
||||
grow_buffer(brw, &batch->state.bo, &batch->state.map,
|
||||
&batch->state.cpu_map, batch->state_used, new_size);
|
||||
assert(offset + size < batch->state.bo->size);
|
||||
}
|
||||
|
||||
if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
|
||||
@@ -1072,7 +1065,7 @@ brw_state_batch(struct brw_context *brw,
|
||||
batch->state_used = offset + size;
|
||||
|
||||
*out_offset = offset;
|
||||
return batch->state_map + (offset >> 2);
|
||||
return batch->state.map + (offset >> 2);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -10,6 +10,15 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* The kernel assumes batchbuffers are smaller than 256kB. */
|
||||
#define MAX_BATCH_SIZE (256 * 1024)
|
||||
|
||||
/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
|
||||
* Address, which means that we can't put binding tables beyond 64kB. This
|
||||
* effectively limits the maximum statebuffer size to 64kB.
|
||||
*/
|
||||
#define MAX_STATE_SIZE (64 * 1024)
|
||||
|
||||
struct intel_batchbuffer;
|
||||
|
||||
void intel_batchbuffer_init(struct brw_context *brw);
|
||||
@@ -55,7 +64,8 @@ uint64_t brw_state_reloc(struct intel_batchbuffer *batch,
|
||||
uint32_t target_offset,
|
||||
unsigned flags);
|
||||
|
||||
#define USED_BATCH(batch) ((uintptr_t)((batch).map_next - (batch).map))
|
||||
#define USED_BATCH(_batch) \
|
||||
((uintptr_t)((_batch).map_next - (_batch).batch.map))
|
||||
|
||||
static inline uint32_t float_as_int(float f)
|
||||
{
|
||||
@@ -113,8 +123,8 @@ intel_batchbuffer_advance(struct brw_context *brw)
|
||||
static inline bool
|
||||
brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p)
|
||||
{
|
||||
return (char *) p >= (char *) batch->state_map &&
|
||||
(char *) p < (char *) batch->state_map + batch->state_bo->size;
|
||||
return (char *) p >= (char *) batch->state.map &&
|
||||
(char *) p < (char *) batch->state.map + batch->state.bo->size;
|
||||
}
|
||||
|
||||
#define BEGIN_BATCH(n) do { \
|
||||
@@ -131,7 +141,7 @@ brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p)
|
||||
#define OUT_BATCH_F(f) OUT_BATCH(float_as_int((f)))
|
||||
|
||||
#define OUT_RELOC(buf, flags, delta) do { \
|
||||
uint32_t __offset = (__map - brw->batch.map) * 4; \
|
||||
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
|
||||
uint32_t reloc = \
|
||||
brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
|
||||
OUT_BATCH(reloc); \
|
||||
@@ -139,7 +149,7 @@ brw_ptr_in_state_buffer(struct intel_batchbuffer *batch, void *p)
|
||||
|
||||
/* Handle 48-bit address relocations for Gen8+ */
|
||||
#define OUT_RELOC64(buf, flags, delta) do { \
|
||||
uint32_t __offset = (__map - brw->batch.map) * 4; \
|
||||
uint32_t __offset = (__map - brw->batch.batch.map) * 4; \
|
||||
uint64_t reloc64 = \
|
||||
brw_batch_reloc(&brw->batch, __offset, (buf), (delta), (flags)); \
|
||||
OUT_BATCH(reloc64); \
|
||||
|
@@ -207,7 +207,13 @@ intel_miptree_supports_ccs(struct brw_context *brw,
|
||||
if (!brw->mesa_format_supports_render[mt->format])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
if (devinfo->gen >= 9) {
|
||||
mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
|
||||
const enum isl_format isl_format =
|
||||
brw_isl_format_for_mesa_format(linear_format);
|
||||
return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
|
||||
} else
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
@@ -250,7 +256,7 @@ intel_miptree_supports_hiz(const struct brw_context *brw,
|
||||
* our HW tends to support more linear formats than sRGB ones, we use this
|
||||
* format variant for check for CCS_E compatibility.
|
||||
*/
|
||||
static bool
|
||||
MAYBE_UNUSED static bool
|
||||
format_ccs_e_compat_with_miptree(const struct gen_device_info *devinfo,
|
||||
const struct intel_mipmap_tree *mt,
|
||||
enum isl_format access_format)
|
||||
@@ -284,13 +290,12 @@ intel_miptree_supports_ccs_e(struct brw_context *brw,
|
||||
if (!intel_miptree_supports_ccs(brw, mt))
|
||||
return false;
|
||||
|
||||
/* Many window system buffers are sRGB even if they are never rendered as
|
||||
* sRGB. For those, we want CCS_E for when sRGBEncode is false. When the
|
||||
* surface is used as sRGB, we fall back to CCS_D.
|
||||
/* Fast clear can be also used to clear srgb surfaces by using equivalent
|
||||
* linear format. This trick, however, can't be extended to be used with
|
||||
* lossless compression and therefore a check is needed to see if the format
|
||||
* really is linear.
|
||||
*/
|
||||
mesa_format linear_format = _mesa_get_srgb_format_linear(mt->format);
|
||||
enum isl_format isl_format = brw_isl_format_for_mesa_format(linear_format);
|
||||
return isl_format_supports_ccs_e(&brw->screen->devinfo, isl_format);
|
||||
return _mesa_get_srgb_format_linear(mt->format) == mt->format;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -2685,27 +2690,29 @@ intel_miptree_render_aux_usage(struct brw_context *brw,
|
||||
return ISL_AUX_USAGE_MCS;
|
||||
|
||||
case ISL_AUX_USAGE_CCS_D:
|
||||
return mt->mcs_buf ? ISL_AUX_USAGE_CCS_D : ISL_AUX_USAGE_NONE;
|
||||
/* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of
|
||||
* the single-sampled color renderbuffers because the CCS buffer isn't
|
||||
* supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
|
||||
* enabled because otherwise the surface state will be programmed with
|
||||
* the linear equivalent format anyway.
|
||||
*/
|
||||
if (isl_format_is_srgb(render_format) &&
|
||||
_mesa_get_srgb_format_linear(mt->format) != mt->format) {
|
||||
return ISL_AUX_USAGE_NONE;
|
||||
} else if (!mt->mcs_buf) {
|
||||
return ISL_AUX_USAGE_NONE;
|
||||
} else {
|
||||
return ISL_AUX_USAGE_CCS_D;
|
||||
}
|
||||
|
||||
case ISL_AUX_USAGE_CCS_E: {
|
||||
/* If the format supports CCS_E and is compatible with the miptree,
|
||||
* then we can use it.
|
||||
/* Lossless compression is not supported for SRGB formats, it
|
||||
* should be impossible to get here with such surfaces.
|
||||
*/
|
||||
if (format_ccs_e_compat_with_miptree(&brw->screen->devinfo,
|
||||
mt, render_format))
|
||||
return ISL_AUX_USAGE_CCS_E;
|
||||
assert(!isl_format_is_srgb(render_format) ||
|
||||
_mesa_get_srgb_format_linear(mt->format) == mt->format);
|
||||
|
||||
/* Otherwise, we have to fall back to CCS_D */
|
||||
|
||||
/* gen9 hardware technically supports non-0/1 clear colors with sRGB
|
||||
* formats. However, there are issues with blending where it doesn't
|
||||
* properly apply the sRGB curve to the clear color when blending.
|
||||
*/
|
||||
if (blend_enabled && isl_format_is_srgb(render_format) &&
|
||||
!isl_color_value_is_zero_one(mt->fast_clear_color, render_format))
|
||||
return ISL_AUX_USAGE_NONE;
|
||||
|
||||
return ISL_AUX_USAGE_CCS_D;
|
||||
return ISL_AUX_USAGE_CCS_E;
|
||||
}
|
||||
|
||||
default:
|
||||
|
@@ -774,7 +774,7 @@ intel_query_image(__DRIimage *image, int attrib, int *value)
|
||||
*value = image->pitch;
|
||||
return true;
|
||||
case __DRI_IMAGE_ATTRIB_HANDLE:
|
||||
*value = image->bo->gem_handle;
|
||||
*value = brw_bo_export_gem_handle(image->bo);
|
||||
return true;
|
||||
case __DRI_IMAGE_ATTRIB_NAME:
|
||||
return !brw_bo_flink(image->bo, (uint32_t *) value);
|
||||
|
@@ -122,6 +122,56 @@ adjust_for_oes_float_texture(const struct gl_context *ctx,
|
||||
return format;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a corresponding base format for a given internal floating point
|
||||
* format as specifed by OES_texture_float.
|
||||
*/
|
||||
static GLenum
|
||||
oes_float_internal_format(const struct gl_context *ctx,
|
||||
GLenum format, GLenum type)
|
||||
{
|
||||
switch (type) {
|
||||
case GL_FLOAT:
|
||||
if (ctx->Extensions.OES_texture_float) {
|
||||
switch (format) {
|
||||
case GL_RGBA32F:
|
||||
return GL_RGBA;
|
||||
case GL_RGB32F:
|
||||
return GL_RGB;
|
||||
case GL_ALPHA32F_ARB:
|
||||
return GL_ALPHA;
|
||||
case GL_LUMINANCE32F_ARB:
|
||||
return GL_LUMINANCE;
|
||||
case GL_LUMINANCE_ALPHA32F_ARB:
|
||||
return GL_LUMINANCE_ALPHA;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_HALF_FLOAT_OES:
|
||||
if (ctx->Extensions.OES_texture_half_float) {
|
||||
switch (format) {
|
||||
case GL_RGBA16F:
|
||||
return GL_RGBA;
|
||||
case GL_RGB16F:
|
||||
return GL_RGB;
|
||||
case GL_ALPHA16F_ARB:
|
||||
return GL_ALPHA;
|
||||
case GL_LUMINANCE16F_ARB:
|
||||
return GL_LUMINANCE;
|
||||
case GL_LUMINANCE_ALPHA16F_ARB:
|
||||
return GL_LUMINANCE_ALPHA;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
return format;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Install gl_texture_image in a gl_texture_object according to the target
|
||||
@@ -2155,6 +2205,10 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
||||
GLenum internalFormat = _mesa_is_gles(ctx) ?
|
||||
oes_float_internal_format(ctx, texImage->InternalFormat, type) :
|
||||
texImage->InternalFormat;
|
||||
|
||||
/* OpenGL ES 1.x and OpenGL ES 2.0 impose additional restrictions on the
|
||||
* combinations of format, internalFormat, and type that can be used.
|
||||
* Formats and types that require additional extensions (e.g., GL_FLOAT
|
||||
@@ -2162,7 +2216,7 @@ texsubimage_error_check(struct gl_context *ctx, GLuint dimensions,
|
||||
*/
|
||||
if (_mesa_is_gles(ctx) &&
|
||||
texture_format_error_check_gles(ctx, format, type,
|
||||
texImage->InternalFormat,
|
||||
internalFormat,
|
||||
dimensions, callerName)) {
|
||||
return GL_TRUE;
|
||||
}
|
||||
|
@@ -5222,7 +5222,7 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
|
||||
defined = 0;
|
||||
|
||||
inst2 = (glsl_to_tgsi_instruction *) inst->next;
|
||||
do {
|
||||
while (!inst2->is_tail_sentinel()) {
|
||||
if (inst->op == inst2->op &&
|
||||
inst2->dst[defined].file == PROGRAM_UNDEFINED &&
|
||||
inst->src[0].file == inst2->src[0].file &&
|
||||
@@ -5231,9 +5231,9 @@ glsl_to_tgsi_visitor::merge_two_dsts(void)
|
||||
inst->src[0].swizzle == inst2->src[0].swizzle)
|
||||
break;
|
||||
inst2 = (glsl_to_tgsi_instruction *) inst2->next;
|
||||
} while (inst2);
|
||||
}
|
||||
|
||||
if (!inst2) {
|
||||
if (inst2->is_tail_sentinel()) {
|
||||
/* Undefined destinations are not allowed, substitute with an unused
|
||||
* temporary register.
|
||||
*/
|
||||
|
@@ -1170,7 +1170,7 @@ void
|
||||
disk_cache_put_key(struct disk_cache *cache, const cache_key key)
|
||||
{
|
||||
const uint32_t *key_chunk = (const uint32_t *) key;
|
||||
int i = *key_chunk & CACHE_INDEX_KEY_MASK;
|
||||
int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK;
|
||||
unsigned char *entry;
|
||||
|
||||
entry = &cache->stored_keys[i * CACHE_KEY_SIZE];
|
||||
@@ -1189,7 +1189,7 @@ bool
|
||||
disk_cache_has_key(struct disk_cache *cache, const cache_key key)
|
||||
{
|
||||
const uint32_t *key_chunk = (const uint32_t *) key;
|
||||
int i = *key_chunk & CACHE_INDEX_KEY_MASK;
|
||||
int i = CPU_TO_LE32(*key_chunk) & CACHE_INDEX_KEY_MASK;
|
||||
unsigned char *entry;
|
||||
|
||||
entry = &cache->stored_keys[i * CACHE_KEY_SIZE];
|
||||
|
@@ -16,6 +16,7 @@
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "u_endian.h"
|
||||
#include "sha1.h"
|
||||
|
||||
#define rol(value, bits) (((value) << (bits)) | ((value) >> (32 - (bits))))
|
||||
@@ -24,7 +25,7 @@
|
||||
* blk0() and blk() perform the initial expand.
|
||||
* I got the idea of expanding during the round function from SSLeay
|
||||
*/
|
||||
#if BYTE_ORDER == LITTLE_ENDIAN
|
||||
#ifdef PIPE_ARCH_LITTLE_ENDIAN
|
||||
# define blk0(i) (block->l[i] = (rol(block->l[i],24)&0xFF00FF00) \
|
||||
|(rol(block->l[i],8)&0x00FF00FF))
|
||||
#else
|
||||
|
Reference in New Issue
Block a user