Compare commits
58 Commits
mesa-17.2.
...
mesa-17.2.
Author | SHA1 | Date | |
---|---|---|---|
|
d6d2b6b5ec | ||
|
cb778d563f | ||
|
7c3bd519e7 | ||
|
c6b3732967 | ||
|
e012ade1cf | ||
|
41e691d605 | ||
|
b6ae4400fc | ||
|
c47914276e | ||
|
536f852d42 | ||
|
fdc4f6e684 | ||
|
8cf9e5ab56 | ||
|
d5c71e44c4 | ||
|
53190187cc | ||
|
366e02f992 | ||
|
5cec093773 | ||
|
69394e3517 | ||
|
c0c1e05fb6 | ||
|
94b656cb5e | ||
|
5e0b556abc | ||
|
f087dad40d | ||
|
83bbfaddf1 | ||
|
a8eea040cd | ||
|
15ecbb2c07 | ||
|
7fef8d436a | ||
|
ea1bcfc063 | ||
|
6e2b9fba21 | ||
|
dcb634df92 | ||
|
20be71ba7c | ||
|
a3d1ea347a | ||
|
dc640aab63 | ||
|
c8076c8ea1 | ||
|
62f0bb2a87 | ||
|
40f06286f9 | ||
|
0f79eb7abe | ||
|
54c5568fa9 | ||
|
c2f314d721 | ||
|
fe04abc6e9 | ||
|
0e305f0518 | ||
|
cb9dae484a | ||
|
57ecf28668 | ||
|
78e2b539a4 | ||
|
86d4c203bd | ||
|
bda2975eef | ||
|
a4dc18efca | ||
|
8f77409e2b | ||
|
458f52618a | ||
|
402b8073ad | ||
|
6fe4852f59 | ||
|
d080f10fdf | ||
|
93b794f094 | ||
|
520786586d | ||
|
9ffe4cc6c0 | ||
|
2b0e70e2ae | ||
|
3a9b2e0e7d | ||
|
188010c68d | ||
|
b4473dd519 | ||
|
f5925b2897 | ||
|
702950d1ad |
@@ -2,3 +2,19 @@
|
||||
# causing regressions have been reverted.
|
||||
365d34540f331df57780dddf8da87235be0a6bcb mesa: correctly calculate the storage offset for i915
|
||||
de0e62e1065e2d9172acf3ab7c70bba0160125c8 st/mesa: correctly calculate the storage offset
|
||||
|
||||
# stable: Add loader::getCapability patches. It's rather invasive infra
|
||||
# not suitable as a bugfix.
|
||||
1bf703e4ea5c4f742bc7ba55d01e5afc3f4e11f9 dri_interface,egl,gallium: only expose RGBA visuals on Android
|
||||
be5773fa8dfe9255d9abaf5c7d5bbbd2d922da08 Android: fix compile error for DRI2 loader getCapability
|
||||
31a6750988d7dd431f72ff1ff11bfca83bde5d8c st/dri: NULL check before deref DRI loader .getCapability
|
||||
|
||||
# stable: The commit addresses code that did not land in the stable branch
|
||||
31bb8517a194af733deefe2d821537d994d39365 radv/gfx9: fix tile swizzle handling for gfx9
|
||||
|
||||
# stable: Commit is not applicable when 4fab67a4415 is missing.
|
||||
d496780fb2c7f2cf0e32b6a79dc528e5156dfcb3 intel/eu/validate: Look up types on demand in execution_type()
|
||||
|
||||
# fixes: Depend on preseding commit which adds new public GBM API
|
||||
3a5e3aa5a53cff55a5e31766d713a41ffa5a93d7 egl/drm: Fix misused x and y offsets in swrast_put_image2()
|
||||
fe2a6281b3b299998fe7399e7dbcc2077d773824 egl/drm: Fix misused x and y offsets in swrast_get_image()
|
||||
|
21
configure.ac
21
configure.ac
@@ -826,6 +826,27 @@ AC_CHECK_HEADER([sys/sysctl.h], [DEFINES="$DEFINES -DHAVE_SYS_SYSCTL_H"])
|
||||
AC_CHECK_FUNC([strtof], [DEFINES="$DEFINES -DHAVE_STRTOF"])
|
||||
AC_CHECK_FUNC([mkostemp], [DEFINES="$DEFINES -DHAVE_MKOSTEMP"])
|
||||
|
||||
AC_MSG_CHECKING([whether strtod has locale support])
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[
|
||||
#define _GNU_SOURCE
|
||||
#include <stdlib.h>
|
||||
#include <locale.h>
|
||||
#ifdef HAVE_XLOCALE_H
|
||||
#include <xlocale.h>
|
||||
#endif
|
||||
int main() {
|
||||
locale_t loc = newlocale(LC_CTYPE_MASK, "C", NULL);
|
||||
const char *s = "1.0";
|
||||
char *end;
|
||||
double d = strtod_l(s, end, loc);
|
||||
float f = strtof_l(s, end, loc);
|
||||
freelocale(loc);
|
||||
return 0;
|
||||
}]])],
|
||||
[DEFINES="$DEFINES -DHAVE_STRTOD_L"];
|
||||
AC_MSG_RESULT([yes]),
|
||||
AC_MSG_RESULT([no]))
|
||||
|
||||
dnl Check to see if dlopen is in default libraries (like Solaris, which
|
||||
dnl has it in libc), or if libdl is needed to get it.
|
||||
AC_CHECK_FUNC([dlopen], [DEFINES="$DEFINES -DHAVE_DLOPEN"],
|
||||
|
@@ -130,27 +130,6 @@ mesa/demos repository.</p>
|
||||
runtime</p>
|
||||
|
||||
<dl>
|
||||
<dt><code>EGL_DRIVERS_PATH</code></dt>
|
||||
<dd>
|
||||
|
||||
<p>By default, the main library will look for drivers in the directory where
|
||||
the drivers are installed to. This variable specifies a list of
|
||||
colon-separated directories where the main library will look for drivers, in
|
||||
addition to the default directory. This variable is ignored for setuid/setgid
|
||||
binaries.</p>
|
||||
|
||||
<p>This variable is usually set to test an uninstalled build. For example, one
|
||||
may set</p>
|
||||
|
||||
<pre>
|
||||
$ export LD_LIBRARY_PATH=$mesa/lib
|
||||
$ export EGL_DRIVERS_PATH=$mesa/lib/egl
|
||||
</pre>
|
||||
|
||||
<p>to test a build without installation</p>
|
||||
|
||||
</dd>
|
||||
|
||||
<dt><code>EGL_DRIVER</code></dt>
|
||||
<dd>
|
||||
|
||||
|
@@ -14,7 +14,7 @@
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.2.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 17.2.0 Release Notes / September 4, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.2.0 is a new development release.
|
||||
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
9484ad96b4bb6cda5bbf1aef52dfa35183dc21aa6258a2991c245996c2fdaf85 mesa-17.2.0.tar.gz
|
||||
3123448f770eae58bc73e15480e78909defb892f10ab777e9116c9b218094943 mesa-17.2.0.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
@@ -56,9 +57,156 @@ Note: some of the new features are only available with certain drivers.
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
TBD
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68365">Bug 68365</a> - [SNB Bisected]Piglit spec_ARB_framebuffer_object_fbo-blit-stretch fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77240">Bug 77240</a> - khrplatform.h not installed if EGL is disabled</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95530">Bug 95530</a> - Stellaris - colored overlay of sectors doesn't render on i965</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96449">Bug 96449</a> - Dying Light reports OpenGL version 3.0 with mesa-git</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96958">Bug 96958</a> - [SKL] Improper rendering in Europa Universalis IV</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97957">Bug 97957</a> - Awful screen tearing in a separate X server with DRI3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98238">Bug 98238</a> - Witcher 2: objects are black when changing lod on Radeon Pitcairn</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99467">Bug 99467</a> - [radv] DOOM 2016 + wine. Green screen everywhere (but can be started)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100070">Bug 100070</a> - Rocket League: grass gets rendered incorrectly</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100242">Bug 100242</a> - radeon buffer allocation failure during startup of Factorio</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100620">Bug 100620</a> - [SKL] 48-bit addresses break DOOM</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100690">Bug 100690</a> - [Regression, bisected] TotalWar: Warhammer corrupted graphics</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100785">Bug 100785</a> - [regression, bisected] arb_gpu_shader5 piglit fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100871">Bug 100871</a> - gles cts hangs mesa indefinitely</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100877">Bug 100877</a> - vulkan/tests/block_pool_no_free regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100892">Bug 100892</a> - Polaris 12: winsys init bad switch (missing break) initializing addrlib</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100937">Bug 100937</a> - Mesa fails to build with GCC 4.8</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100945">Bug 100945</a> - Build failure in GNOME Continuous</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100988">Bug 100988</a> - glXGetCurrentDisplay() no longer works for FakeGLX contexts?</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101071">Bug 101071</a> - compiling glsl fails with undefined reference to `pthread_create'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101088">Bug 101088</a> - `gallium: remove pipe_index_buffer and set_index_buffer` causes glitches and crash in gallium nine</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101110">Bug 101110</a> - Build failure in GNOME Continuous</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101189">Bug 101189</a> - Latest git fails to compile with radeon</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101252">Bug 101252</a> - eglGetDisplay() is not thread safe</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101254">Bug 101254</a> - VDPAU videos don't start playing with r600 gallium driver</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101283">Bug 101283</a> - skylake: page fault accessing address 0</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101284">Bug 101284</a> - [G45] ES2-CTS.functional.texture.specification.basic_copytexsubimage2d.cube_rgba</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101294">Bug 101294</a> - radeonsi minecraft forge splash freeze since 17.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101306">Bug 101306</a> - [BXT] gles asserts in cts</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101326">Bug 101326</a> - gallium/wgl: Allow context creation without prior SetPixelFormat()</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101334">Bug 101334</a> - AMD SI cards: Some vulkan apps freeze the system</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101336">Bug 101336</a> - glcpp-test.sh regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101340">Bug 101340</a> - i915_surface.c:108:4: error: too few arguments to function ‘util_blitter_default_src_texture’</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101360">Bug 101360</a> - Assertion failure comparing result of ballotARB</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101401">Bug 101401</a> - [REGRESSION][BISECTED] GDM fails to start after 8ec4975cd83365c791a1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101418">Bug 101418</a> - Build failure in GNOME Continuous</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101451">Bug 101451</a> - [G33] ES2-CTS.functional.clipping.polygon regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101464">Bug 101464</a> - PrimitiveRestartNV inside a render list causes a crash</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101471">Bug 101471</a> - Mesa fails to build: unknown typename bool</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101535">Bug 101535</a> - [bisected] [Skylake] Kwin won't start and glxgears coredumps</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101538">Bug 101538</a> - From "Use isl for hiz layouts" commit onwards, everything crashes with Mesa</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101539">Bug 101539</a> - [Regresion] [IVB] Segment fault in recent commit in intel_miptree_level_has_hiz under Ivy bridge</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101558">Bug 101558</a> - [regression][bisected] MPV playing video via opengl "randomly" results in only part of the window / screen being rendered with Mesa GIT.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101596">Bug 101596</a> - Blender renders black UI elements</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101607">Bug 101607</a> - Regression in anisotropic filtering from "i965: Convert fs sampler state to use genxml"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101657">Bug 101657</a> - strtod.c:32:10: fatal error: xlocale.h: No such file or directory</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101666">Bug 101666</a> - bitfieldExtract is marked as a built-in function on OpenGL ES 3.0, but was added in OpenGL ES 3.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101683">Bug 101683</a> - Some games hang while loading when compositing is shut off or absent</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101703">Bug 101703</a> - No stencil buffer allocated when requested by GLUT</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101704">Bug 101704</a> - [regression][bisected] glReadPixels() from pbuffer failing in Android CTS camera tests</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101766">Bug 101766</a> - Assertion `!"invalid type"' failed when constant expression involves literal of different type</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101774">Bug 101774</a> - gen_clflush.h:37:7: error: implicit declaration of function ‘__builtin_ia32_clflush’</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101775">Bug 101775</a> - Xorg segfault since 147d7fb "st/mesa: add a winsys buffers list in st_context"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101829">Bug 101829</a> - read-after-free in st_framebuffer_validate</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101831">Bug 101831</a> - Build failure in GNOME Continuous</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101851">Bug 101851</a> - [regression] libEGL_common.a undefined reference to '__gxx_personality_v0'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101867">Bug 101867</a> - Launch options window renders black in Feral Games in current Mesa trunk</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101876">Bug 101876</a> - SIGSEGV when launching Steam</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101910">Bug 101910</a> - [BYT] ES31-CTS.functional.copy_image.non_compressed.viewclass_96_bits.rgb32f_rgb32f</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101925">Bug 101925</a> - playstore/webview crash</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101961">Bug 101961</a> - Serious Sam Fusion hangs system completely</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101982">Bug 101982</a> - Weston crashes when running an OpenGL program on i965</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101983">Bug 101983</a> - [G33] ES2-CTS.functional.shaders.struct.uniform.sampler_nested* regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102024">Bug 102024</a> - FORMAT_FEATURE_SAMPLED_IMAGE_BIT not supported for D16_UNORM and D32_SFLOAT</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102148">Bug 102148</a> - Crash when running qopenglwidget example on mesa llvmpipe win32</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102241">Bug 102241</a> - gallium/wgl: SwapBuffers freezing regularly with swap interval enabled</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102308">Bug 102308</a> - segfault in glCompressedTextureSubImage3D</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<ul>
|
||||
|
199
docs/relnotes/17.2.1.html
Normal file
199
docs/relnotes/17.2.1.html
Normal file
@@ -0,0 +1,199 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.2.1 Release Notes / September 17, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.2.1 is a bug fix release which fixes bugs found since the 17.2.0 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 17.2.1 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100613">Bug 100613</a> - Regression in Mesa 17 on s390x (zSystems)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101709">Bug 101709</a> - [llvmpipe] piglit gl-1.0-scissor-offscreen regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102454">Bug 102454</a> - glibc 2.26 doesn't provide anymore xlocale.h</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102467">Bug 102467</a> - src/mesa/state_tracker/st_cb_readpixels.c:178]: (warning) Redundant assignment</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=102502">Bug 102502</a> - [bisected] Kodi crashes since commit 707d2e8b - gallium: fold u_trim_pipe_prim call from st/mesa to drivers</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Bas Nieuwenhuizen (4):</p>
|
||||
<ul>
|
||||
<li>radv: Actually set the cmd_buffer usage_flags.</li>
|
||||
<li>radv: Fix vkCopyImage with both depth and stencil aspects.</li>
|
||||
<li>radv: Disable multilayer & multilevel DCC.</li>
|
||||
<li>radv: Don't allocate CMASK for linear images.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ben Crocker (1):</p>
|
||||
<ul>
|
||||
<li>llvmpipe: lp_build_gather_elem_vec BE fix for 3x16 load</li>
|
||||
</ul>
|
||||
|
||||
<p>Brian Paul (1):</p>
|
||||
<ul>
|
||||
<li>llvmpipe: initialize llvmpipe->dirty with LP_NEW_SCISSOR</li>
|
||||
</ul>
|
||||
|
||||
<p>Charmaine Lee (1):</p>
|
||||
<ul>
|
||||
<li>vbo: fix offset in minmax cache key</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (12):</p>
|
||||
<ul>
|
||||
<li>radv: disable 1d/2d linear optimisation on gfx9.</li>
|
||||
<li>radv/gfx9: set descriptor up for base_mip to level range.</li>
|
||||
<li>Revert "radv: disable support for VEGA for now."</li>
|
||||
<li>radv/winsys: use amdgpu_bo_va_op_raw.</li>
|
||||
<li>radv/gfx9: allocate events from uncached VA space</li>
|
||||
<li>radv: use simpler indirect packet 3 if possible.</li>
|
||||
<li>radv: don't use iview for meta image width/height.</li>
|
||||
<li>radv: handle GFX9 1D textures</li>
|
||||
<li>radv/gfx9: set mip0-depth correctly for 2d arrays/3d images</li>
|
||||
<li>radv/ac: bump params array for image atomic comp swap</li>
|
||||
<li>radv/gfx9: fix image resource handling.</li>
|
||||
<li>radv/winsys: fix flags vs va_flags thinko.</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (7):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 17.2.0</li>
|
||||
<li>cherry-ignore: add getCapability patches</li>
|
||||
<li>cherry-ignore: ignore gfx9 tile swizzle fix</li>
|
||||
<li>cherry-ignore: add execution_type() fix to the list</li>
|
||||
<li>cherry-ignore: add EGL+gbm swast patches</li>
|
||||
<li>egl/x11/dri3: adding missing __DRI_BACKGROUND_CALLABLE extension</li>
|
||||
<li>Update version to 17.2.1</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (3):</p>
|
||||
<ul>
|
||||
<li>util: improve compiler guard</li>
|
||||
<li>mesa/st: remove unwanted backup file</li>
|
||||
<li>docs/egl: remove reference to EGL_DRIVERS_PATH</li>
|
||||
</ul>
|
||||
|
||||
<p>Grazvydas Ignotas (1):</p>
|
||||
<ul>
|
||||
<li>radv: don't assert on empty hash table</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (2):</p>
|
||||
<ul>
|
||||
<li>anv/formats: Nicely handle unknown VkFormat enums</li>
|
||||
<li>spirv: Add support for the HelperInvocation builtin</li>
|
||||
</ul>
|
||||
|
||||
<p>Karol Herbst (1):</p>
|
||||
<ul>
|
||||
<li>nvc0: write 0 to pipeline_statistics.cs_invocations</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>i965: Fix crash in fallback GTT mapping.</li>
|
||||
<li>i965: Set "Subslice Hashing Mode" to 16x16 on Apollolake.</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (1):</p>
|
||||
<ul>
|
||||
<li>st/mesa: skip draw calls with pipe_draw_info::count == 0</li>
|
||||
</ul>
|
||||
|
||||
<p>Michael Olbrich (1):</p>
|
||||
<ul>
|
||||
<li>egl/dri2: only destroy created objects</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: apply a mask to gl_SampleMaskIn in the PS prolog</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (4):</p>
|
||||
<ul>
|
||||
<li>radeonsi/gfx9: always flush DB metadata on framebuffer changes</li>
|
||||
<li>st/glsl_to_tgsi: only the first (inner-most) array reference can be a 2D index</li>
|
||||
<li>ac/surface: match Z and stencil tile config</li>
|
||||
<li>glsl: fix glsl_struct_field size calculations for shader cache</li>
|
||||
</ul>
|
||||
|
||||
<p>Ray Strode (1):</p>
|
||||
<ul>
|
||||
<li>gallivm: correct channel shift logic on big endian</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (1):</p>
|
||||
<ul>
|
||||
<li>freedreno: skip batch-cache for compute shaders</li>
|
||||
</ul>
|
||||
|
||||
<p>Roland Scheidegger (1):</p>
|
||||
<ul>
|
||||
<li>st/mesa: fix view template initialization in try_pbo_readpixels</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: update dirty_level_mask before dispatching</li>
|
||||
</ul>
|
||||
|
||||
<p>Timothy Arceri (9):</p>
|
||||
<ul>
|
||||
<li>glsl: allow NULL to be passed to encode_type_to_blob()</li>
|
||||
<li>glsl: stop adding pointers from gl_shader_variable to the cache</li>
|
||||
<li>glsl: stop adding pointers from glsl_struct_field to the cache</li>
|
||||
<li>glsl: add has_uniform_storage() helper to shader cache</li>
|
||||
<li>glsl: don't write uniform storage offset if there isn't one</li>
|
||||
<li>glsl: always write a name/label string to the cache</li>
|
||||
<li>compiler: move pointers to the start of shader_info</li>
|
||||
<li>glsl: stop adding pointers from shader_info to the cache</li>
|
||||
<li>glsl: stop adding pointers from bindless structs to the cache</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -157,6 +157,19 @@ def check_header(env, header):
|
||||
env = conf.Finish()
|
||||
return have_header
|
||||
|
||||
def check_functions(env, functions):
|
||||
'''Check if all of the functions exist'''
|
||||
|
||||
conf = SCons.Script.Configure(env)
|
||||
have_functions = True
|
||||
|
||||
for function in functions:
|
||||
if not conf.CheckFunc(function):
|
||||
have_functions = False
|
||||
|
||||
env = conf.Finish()
|
||||
return have_functions
|
||||
|
||||
def check_prog(env, prog):
|
||||
"""Check whether this program exists."""
|
||||
|
||||
@@ -339,6 +352,9 @@ def generate(env):
|
||||
if check_header(env, 'xlocale.h'):
|
||||
cppdefines += ['HAVE_XLOCALE_H']
|
||||
|
||||
if check_functions(env, ['strtod_l', 'strtof_l']):
|
||||
cppdefines += ['HAVE_STRTOD_L']
|
||||
|
||||
if platform == 'windows':
|
||||
cppdefines += [
|
||||
'WIN32',
|
||||
|
@@ -3309,13 +3309,13 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
|
||||
|
||||
int count;
|
||||
enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
|
||||
bool is_array = glsl_sampler_type_is_array(type);
|
||||
bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
|
||||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
|
||||
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
|
||||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
|
||||
|
||||
count = image_type_to_components_count(dim,
|
||||
glsl_sampler_type_is_array(type));
|
||||
bool gfx9_1d = ctx->options->chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
|
||||
count = image_type_to_components_count(dim, is_array);
|
||||
|
||||
if (is_ms) {
|
||||
LLVMValueRef fmask_load_address[3];
|
||||
@@ -3323,7 +3323,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
|
||||
|
||||
fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
|
||||
fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], "");
|
||||
if (glsl_sampler_type_is_array(type))
|
||||
if (is_array)
|
||||
fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], "");
|
||||
else
|
||||
fmask_load_address[2] = NULL;
|
||||
@@ -3338,7 +3338,7 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
|
||||
sample_index,
|
||||
get_sampler_desc(ctx, instr->variables[0], DESC_FMASK));
|
||||
}
|
||||
if (count == 1) {
|
||||
if (count == 1 && !gfx9_1d) {
|
||||
if (instr->src[0].ssa->num_components)
|
||||
res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
|
||||
else
|
||||
@@ -3348,13 +3348,22 @@ static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
|
||||
if (is_ms)
|
||||
count--;
|
||||
for (chan = 0; chan < count; ++chan) {
|
||||
coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
|
||||
coords[chan] = llvm_extract_elem(ctx, src0, chan);
|
||||
}
|
||||
|
||||
if (add_frag_pos) {
|
||||
for (chan = 0; chan < count; ++chan)
|
||||
coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
|
||||
}
|
||||
|
||||
if (gfx9_1d) {
|
||||
if (is_array) {
|
||||
coords[2] = coords[1];
|
||||
coords[1] = ctx->ac.i32_0;
|
||||
} else
|
||||
coords[1] = ctx->ac.i32_0;
|
||||
count++;
|
||||
}
|
||||
|
||||
if (is_ms) {
|
||||
coords[count] = sample_index;
|
||||
count++;
|
||||
@@ -3490,7 +3499,7 @@ static void visit_image_store(struct nir_to_llvm_context *ctx,
|
||||
static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
|
||||
const nir_intrinsic_instr *instr)
|
||||
{
|
||||
LLVMValueRef params[6];
|
||||
LLVMValueRef params[7];
|
||||
int param_count = 0;
|
||||
const nir_variable *var = instr->variables[0]->var;
|
||||
|
||||
@@ -3591,14 +3600,22 @@ static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
|
||||
|
||||
res = ac_build_image_opcode(&ctx->ac, &args);
|
||||
|
||||
LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
|
||||
|
||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
|
||||
glsl_sampler_type_is_array(type)) {
|
||||
LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
|
||||
LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
|
||||
LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
|
||||
z = LLVMBuildSDiv(ctx->builder, z, six, "");
|
||||
res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
|
||||
}
|
||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
|
||||
glsl_sampler_type_is_array(type)) {
|
||||
LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, res, two, "");
|
||||
res = LLVMBuildInsertElement(ctx->builder, res, layers,
|
||||
ctx->ac.i32_1, "");
|
||||
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -4455,23 +4472,39 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
|
||||
|
||||
/* pack derivatives */
|
||||
if (ddx || ddy) {
|
||||
int num_src_deriv_channels, num_dest_deriv_channels;
|
||||
switch (instr->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
num_deriv_comp = 3;
|
||||
num_src_deriv_channels = 3;
|
||||
num_dest_deriv_channels = 3;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
default:
|
||||
num_src_deriv_channels = 2;
|
||||
num_dest_deriv_channels = 2;
|
||||
num_deriv_comp = 2;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
num_deriv_comp = 1;
|
||||
num_src_deriv_channels = 1;
|
||||
if (ctx->options->chip_class >= GFX9) {
|
||||
num_dest_deriv_channels = 2;
|
||||
num_deriv_comp = 2;
|
||||
} else {
|
||||
num_dest_deriv_channels = 1;
|
||||
num_deriv_comp = 1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < num_deriv_comp; i++) {
|
||||
for (unsigned i = 0; i < num_src_deriv_channels; i++) {
|
||||
derivs[i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddx, i));
|
||||
derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
|
||||
derivs[num_dest_deriv_channels + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
|
||||
}
|
||||
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
|
||||
derivs[i] = ctx->ac.f32_0;
|
||||
derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4512,6 +4545,23 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
|
||||
}
|
||||
address[count++] = coords[2];
|
||||
}
|
||||
|
||||
if (ctx->options->chip_class >= GFX9) {
|
||||
LLVMValueRef filler;
|
||||
if (instr->op == nir_texop_txf)
|
||||
filler = ctx->ac.i32_0;
|
||||
else
|
||||
filler = LLVMConstReal(ctx->f32, 0.5);
|
||||
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
|
||||
if (instr->is_array) {
|
||||
address[count] = address[count - 1];
|
||||
address[count - 1] = filler;
|
||||
count++;
|
||||
} else
|
||||
address[count++] = filler;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Pack LOD */
|
||||
@@ -4606,6 +4656,14 @@ static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
|
||||
LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
|
||||
z = LLVMBuildSDiv(ctx->builder, z, six, "");
|
||||
result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
|
||||
} else if (ctx->options->chip_class >= GFX9 &&
|
||||
instr->op == nir_texop_txs &&
|
||||
instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
|
||||
instr->is_array) {
|
||||
LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
|
||||
LLVMValueRef layers = LLVMBuildExtractElement(ctx->builder, result, two, "");
|
||||
result = LLVMBuildInsertElement(ctx->builder, result, layers,
|
||||
ctx->ac.i32_1, "");
|
||||
} else if (instr->dest.ssa.num_components != 4)
|
||||
result = trim_vector(ctx, result, instr->dest.ssa.num_components);
|
||||
|
||||
|
@@ -553,15 +553,35 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
|
||||
AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
|
||||
AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;
|
||||
|
||||
/* noStencil = 0 can result in a depth part that is incompatible with
|
||||
* mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in
|
||||
* this case, we may end up setting stencil_adjusted).
|
||||
/* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit)
|
||||
* for Z and stencil. This can cause a number of problems which we work
|
||||
* around here:
|
||||
*
|
||||
* TODO: update addrlib to a newer version, remove this, and
|
||||
* use flags.matchStencilTileCfg = 1 as an alternative fix.
|
||||
* - a depth part that is incompatible with mipmapped texturing
|
||||
* - at least on Stoney, entirely incompatible Z/S aspects (e.g.
|
||||
* incorrect tiling applied to the stencil part, stencil buffer
|
||||
* memory accesses that go out of bounds) even without mipmapping
|
||||
*
|
||||
* Some piglit tests that are prone to different types of related
|
||||
* failures:
|
||||
* ./bin/ext_framebuffer_multisample-upsample 2 stencil
|
||||
* ./bin/framebuffer-blit-levels {draw,read} stencil
|
||||
* ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
|
||||
* ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
|
||||
* ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
|
||||
*/
|
||||
if (config->info.levels > 1)
|
||||
int stencil_tile_idx = -1;
|
||||
|
||||
if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
|
||||
(config->info.levels > 1 || info->family == CHIP_STONEY)) {
|
||||
/* Compute stencilTileIdx that is compatible with the (depth)
|
||||
* tileIdx. This degrades the depth surface if necessary to
|
||||
* ensure that a matching stencilTileIdx exists. */
|
||||
AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
|
||||
|
||||
/* Keep the depth mip-tail compatible with texturing. */
|
||||
AddrSurfInfoIn.flags.noStencil = 1;
|
||||
}
|
||||
|
||||
/* Set preferred macrotile parameters. This is usually required
|
||||
* for shared resources. This is for 2D tiling only. */
|
||||
@@ -643,12 +663,33 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
|
||||
if (level > 0)
|
||||
continue;
|
||||
|
||||
/* Check that we actually got a TC-compatible HTILE if
|
||||
* we requested it (only for level 0, since we're not
|
||||
* supporting HTILE on higher mip levels anyway). */
|
||||
assert(AddrSurfInfoOut.tcCompatible ||
|
||||
!AddrSurfInfoIn.flags.tcCompatible ||
|
||||
AddrSurfInfoIn.flags.matchStencilTileCfg);
|
||||
|
||||
if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
|
||||
if (!AddrSurfInfoOut.tcCompatible) {
|
||||
AddrSurfInfoIn.flags.tcCompatible = 0;
|
||||
surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
|
||||
}
|
||||
|
||||
AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
|
||||
AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
|
||||
stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
|
||||
|
||||
assert(stencil_tile_idx >= 0);
|
||||
}
|
||||
|
||||
gfx6_surface_settings(info, &AddrSurfInfoOut, surf);
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate texture layout information for stencil. */
|
||||
if (surf->flags & RADEON_SURF_SBUFFER) {
|
||||
AddrSurfInfoIn.tileIndex = stencil_tile_idx;
|
||||
AddrSurfInfoIn.bpp = 8;
|
||||
AddrSurfInfoIn.flags.depth = 0;
|
||||
AddrSurfInfoIn.flags.stencil = 1;
|
||||
|
@@ -1984,6 +1984,7 @@ VkResult radv_BeginCommandBuffer(
|
||||
|
||||
memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
|
||||
cmd_buffer->state.last_primitive_reset_en = -1;
|
||||
cmd_buffer->usage_flags = pBeginInfo->flags;
|
||||
|
||||
/* setup initial configuration into command buffer */
|
||||
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
|
||||
@@ -2788,20 +2789,30 @@ radv_emit_indirect_draw(struct radv_cmd_buffer *cmd_buffer,
|
||||
radeon_emit(cs, indirect_va);
|
||||
radeon_emit(cs, indirect_va >> 32);
|
||||
|
||||
radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
|
||||
PKT3_DRAW_INDIRECT_MULTI,
|
||||
8, false));
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) |
|
||||
S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
|
||||
S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
|
||||
radeon_emit(cs, draw_count); /* count */
|
||||
radeon_emit(cs, count_va); /* count_addr */
|
||||
radeon_emit(cs, count_va >> 32);
|
||||
radeon_emit(cs, stride); /* stride */
|
||||
radeon_emit(cs, di_src_sel);
|
||||
if (draw_count == 1 && !count_va && !draw_id_enable) {
|
||||
radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT :
|
||||
PKT3_DRAW_INDIRECT, 3, false));
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, di_src_sel);
|
||||
} else {
|
||||
radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI :
|
||||
PKT3_DRAW_INDIRECT_MULTI,
|
||||
8, false));
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, (base_reg - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2);
|
||||
radeon_emit(cs, (((base_reg + 8) - SI_SH_REG_OFFSET) >> 2) |
|
||||
S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
|
||||
S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
|
||||
radeon_emit(cs, draw_count); /* count */
|
||||
radeon_emit(cs, count_va); /* count_addr */
|
||||
radeon_emit(cs, count_va >> 32);
|
||||
radeon_emit(cs, stride); /* stride */
|
||||
radeon_emit(cs, di_src_sel);
|
||||
}
|
||||
|
||||
radv_cmd_buffer_trace_emit(cmd_buffer);
|
||||
}
|
||||
|
||||
|
@@ -2813,7 +2813,7 @@ VkResult radv_CreateEvent(
|
||||
|
||||
event->bo = device->ws->buffer_create(device->ws, 8, 8,
|
||||
RADEON_DOMAIN_GTT,
|
||||
RADEON_FLAG_CPU_ACCESS);
|
||||
RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS);
|
||||
if (!event->bo) {
|
||||
vk_free2(&device->alloc, pAllocator, event);
|
||||
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
|
||||
@@ -3115,8 +3115,8 @@ radv_initialise_color_surface(struct radv_device *device,
|
||||
}
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
uint32_t max_slice = radv_surface_layer_count(iview);
|
||||
unsigned mip0_depth = iview->base_layer + max_slice - 1;
|
||||
unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
|
||||
(iview->extent.depth - 1) : (iview->image->info.array_size - 1);
|
||||
|
||||
cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
|
||||
cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
|
||||
|
@@ -34,7 +34,7 @@
|
||||
#include "util/debug.h"
|
||||
#include "util/u_atomic.h"
|
||||
static unsigned
|
||||
radv_choose_tiling(struct radv_device *Device,
|
||||
radv_choose_tiling(struct radv_device *device,
|
||||
const struct radv_image_create_info *create_info)
|
||||
{
|
||||
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
|
||||
@@ -45,14 +45,15 @@ radv_choose_tiling(struct radv_device *Device,
|
||||
}
|
||||
|
||||
if (!vk_format_is_compressed(pCreateInfo->format) &&
|
||||
!vk_format_is_depth_or_stencil(pCreateInfo->format)) {
|
||||
!vk_format_is_depth_or_stencil(pCreateInfo->format)
|
||||
&& device->physical_device->rad_info.chip_class <= VI) {
|
||||
/* this causes hangs in some VK CTS tests on GFX9. */
|
||||
/* Textures with a very small height are recommended to be linear. */
|
||||
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
|
||||
/* Only very thin and long 2D textures should benefit from
|
||||
* linear_aligned. */
|
||||
(pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
|
||||
return RADEON_SURF_MODE_LINEAR_ALIGNED;
|
||||
|
||||
}
|
||||
|
||||
/* MSAA resources must be 2D tiled. */
|
||||
@@ -119,6 +120,7 @@ radv_init_surface(struct radv_device *device,
|
||||
VK_IMAGE_USAGE_STORAGE_BIT)) ||
|
||||
(pCreateInfo->flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) ||
|
||||
(pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) ||
|
||||
pCreateInfo->mipLevels > 1 || pCreateInfo->arrayLayers > 1 ||
|
||||
device->physical_device->rad_info.chip_class < VI ||
|
||||
create_info->scanout || (device->debug_flags & RADV_DEBUG_NO_DCC) ||
|
||||
!radv_is_colorbuffer_format_supported(pCreateInfo->format, &blendable))
|
||||
@@ -279,10 +281,14 @@ si_set_mutable_tex_desc_fields(struct radv_device *device,
|
||||
}
|
||||
|
||||
static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
|
||||
unsigned nr_layers, unsigned nr_samples, bool is_storage_image)
|
||||
unsigned nr_layers, unsigned nr_samples, bool is_storage_image, bool gfx9)
|
||||
{
|
||||
if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
|
||||
return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
|
||||
|
||||
/* GFX9 allocates 1D textures as 2D. */
|
||||
if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
|
||||
image_type = VK_IMAGE_TYPE_2D;
|
||||
switch (image_type) {
|
||||
case VK_IMAGE_TYPE_1D:
|
||||
return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
|
||||
@@ -373,7 +379,7 @@ si_make_texture_descriptor(struct radv_device *device,
|
||||
}
|
||||
|
||||
type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
|
||||
is_storage_image);
|
||||
is_storage_image, device->physical_device->rad_info.chip_class >= GFX9);
|
||||
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
|
||||
height = 1;
|
||||
depth = image->info.array_size;
|
||||
@@ -494,7 +500,7 @@ si_make_texture_descriptor(struct radv_device *device,
|
||||
S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
|
||||
S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
|
||||
S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
|
||||
S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false));
|
||||
S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false, false));
|
||||
fmask_state[4] = 0;
|
||||
fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
|
||||
fmask_state[6] = 0;
|
||||
@@ -832,8 +838,10 @@ radv_image_create(VkDevice _device,
|
||||
|
||||
if ((pCreateInfo->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) &&
|
||||
pCreateInfo->mipLevels == 1 &&
|
||||
!image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc)
|
||||
!image->surface.dcc_size && image->info.depth == 1 && can_cmask_dcc &&
|
||||
!image->surface.is_linear)
|
||||
radv_image_alloc_cmask(device, image);
|
||||
|
||||
if (image->info.samples > 1 && vk_format_is_color(pCreateInfo->format)) {
|
||||
radv_image_alloc_fmask(device, image);
|
||||
} else if (vk_format_is_depth(pCreateInfo->format)) {
|
||||
@@ -870,6 +878,7 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
|
||||
uint32_t blk_w;
|
||||
uint32_t *descriptor;
|
||||
uint32_t *fmask_descriptor;
|
||||
uint32_t hw_level = 0;
|
||||
|
||||
if (is_storage_image) {
|
||||
descriptor = iview->storage_descriptor;
|
||||
@@ -882,11 +891,13 @@ radv_image_view_make_descriptor(struct radv_image_view *iview,
|
||||
assert(image->surface.blk_w % vk_format_get_blockwidth(image->vk_format) == 0);
|
||||
blk_w = image->surface.blk_w / vk_format_get_blockwidth(image->vk_format) * vk_format_get_blockwidth(iview->vk_format);
|
||||
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9)
|
||||
hw_level = iview->base_mip;
|
||||
si_make_texture_descriptor(device, image, is_storage_image,
|
||||
iview->type,
|
||||
iview->vk_format,
|
||||
components,
|
||||
0, iview->level_count - 1,
|
||||
hw_level, hw_level + iview->level_count - 1,
|
||||
iview->base_layer,
|
||||
iview->base_layer + iview->layer_count - 1,
|
||||
iview->extent.width,
|
||||
@@ -1043,23 +1054,34 @@ radv_DestroyImage(VkDevice _device, VkImage _image,
|
||||
}
|
||||
|
||||
void radv_GetImageSubresourceLayout(
|
||||
VkDevice device,
|
||||
VkDevice _device,
|
||||
VkImage _image,
|
||||
const VkImageSubresource* pSubresource,
|
||||
VkSubresourceLayout* pLayout)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_image, image, _image);
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
int level = pSubresource->mipLevel;
|
||||
int layer = pSubresource->arrayLayer;
|
||||
struct radeon_surf *surface = &image->surface;
|
||||
|
||||
pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
|
||||
pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
|
||||
pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
|
||||
pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
|
||||
pLayout->size = surface->u.legacy.level[level].slice_size;
|
||||
if (image->type == VK_IMAGE_TYPE_3D)
|
||||
pLayout->size *= u_minify(image->info.depth, level);
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
pLayout->offset = surface->u.gfx9.offset[level] + surface->u.gfx9.surf_slice_size * layer;
|
||||
pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe;
|
||||
pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
|
||||
pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
|
||||
pLayout->size = surface->u.gfx9.surf_slice_size;
|
||||
if (image->type == VK_IMAGE_TYPE_3D)
|
||||
pLayout->size *= u_minify(image->info.depth, level);
|
||||
} else {
|
||||
pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
|
||||
pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
|
||||
pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
|
||||
pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
|
||||
pLayout->size = surface->u.legacy.level[level].slice_size;
|
||||
if (image->type == VK_IMAGE_TYPE_3D)
|
||||
pLayout->size *= u_minify(image->info.depth, level);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -275,15 +275,20 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
|
||||
VkFilter blit_filter)
|
||||
{
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
uint32_t src_width = radv_minify(src_iview->image->info.width, src_iview->base_mip);
|
||||
uint32_t src_height = radv_minify(src_iview->image->info.height, src_iview->base_mip);
|
||||
uint32_t src_depth = radv_minify(src_iview->image->info.depth, src_iview->base_mip);
|
||||
uint32_t dst_width = radv_minify(dest_iview->image->info.width, dest_iview->base_mip);
|
||||
uint32_t dst_height = radv_minify(dest_iview->image->info.height, dest_iview->base_mip);
|
||||
|
||||
assert(src_image->info.samples == dest_image->info.samples);
|
||||
|
||||
float vertex_push_constants[5] = {
|
||||
(float)src_offset_0.x / (float)src_iview->extent.width,
|
||||
(float)src_offset_0.y / (float)src_iview->extent.height,
|
||||
(float)src_offset_1.x / (float)src_iview->extent.width,
|
||||
(float)src_offset_1.y / (float)src_iview->extent.height,
|
||||
(float)src_offset_0.z / (float)src_iview->extent.depth,
|
||||
(float)src_offset_0.x / (float)src_width,
|
||||
(float)src_offset_0.y / (float)src_height,
|
||||
(float)src_offset_1.x / (float)src_width,
|
||||
(float)src_offset_1.y / (float)src_height,
|
||||
(float)src_offset_0.z / (float)src_depth,
|
||||
};
|
||||
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
@@ -310,8 +315,8 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
|
||||
.pAttachments = (VkImageView[]) {
|
||||
radv_image_view_to_handle(dest_iview),
|
||||
},
|
||||
.width = dest_iview->extent.width,
|
||||
.height = dest_iview->extent.height,
|
||||
.width = dst_width,
|
||||
.height = dst_height,
|
||||
.layers = 1,
|
||||
}, &cmd_buffer->pool->alloc, &fb);
|
||||
VkPipeline pipeline;
|
||||
|
@@ -53,7 +53,8 @@ enum blit2d_src_type {
|
||||
static void
|
||||
create_iview(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_meta_blit2d_surf *surf,
|
||||
struct radv_image_view *iview, VkFormat depth_format)
|
||||
struct radv_image_view *iview, VkFormat depth_format,
|
||||
VkImageAspectFlagBits aspects)
|
||||
{
|
||||
VkFormat format;
|
||||
|
||||
@@ -69,7 +70,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.format = format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = surf->aspect_mask,
|
||||
.aspectMask = aspects,
|
||||
.baseMipLevel = surf->level,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = surf->layer,
|
||||
@@ -111,7 +112,8 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_meta_blit2d_surf *src_img,
|
||||
struct radv_meta_blit2d_buffer *src_buf,
|
||||
struct blit2d_src_temps *tmp,
|
||||
enum blit2d_src_type src_type, VkFormat depth_format)
|
||||
enum blit2d_src_type src_type, VkFormat depth_format,
|
||||
VkImageAspectFlagBits aspects)
|
||||
{
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
|
||||
@@ -138,7 +140,7 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
|
||||
VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
|
||||
&src_buf->pitch);
|
||||
} else {
|
||||
create_iview(cmd_buffer, src_img, &tmp->iview, depth_format);
|
||||
create_iview(cmd_buffer, src_img, &tmp->iview, depth_format, aspects);
|
||||
|
||||
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
device->meta_state.blit2d.p_layouts[src_type],
|
||||
@@ -175,9 +177,10 @@ blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
VkFormat depth_format,
|
||||
struct blit2d_dst_temps *tmp)
|
||||
struct blit2d_dst_temps *tmp,
|
||||
VkImageAspectFlagBits aspects)
|
||||
{
|
||||
create_iview(cmd_buffer, dst, &tmp->iview, depth_format);
|
||||
create_iview(cmd_buffer, dst, &tmp->iview, depth_format, aspects);
|
||||
|
||||
radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
|
||||
&(VkFramebufferCreateInfo) {
|
||||
@@ -250,106 +253,111 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_device *device = cmd_buffer->device;
|
||||
|
||||
for (unsigned r = 0; r < num_rects; ++r) {
|
||||
VkFormat depth_format = 0;
|
||||
if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
depth_format = vk_format_stencil_only(dst->image->vk_format);
|
||||
else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
depth_format = vk_format_depth_only(dst->image->vk_format);
|
||||
struct blit2d_src_temps src_temps;
|
||||
blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);
|
||||
unsigned i;
|
||||
for_each_bit(i, dst->aspect_mask) {
|
||||
unsigned aspect_mask = 1u << i;
|
||||
VkFormat depth_format = 0;
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
|
||||
depth_format = vk_format_stencil_only(dst->image->vk_format);
|
||||
else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
depth_format = vk_format_depth_only(dst->image->vk_format);
|
||||
struct blit2d_src_temps src_temps;
|
||||
blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format, aspect_mask);
|
||||
|
||||
struct blit2d_dst_temps dst_temps;
|
||||
blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
|
||||
rects[r].dst_y + rects[r].height, depth_format, &dst_temps);
|
||||
struct blit2d_dst_temps dst_temps;
|
||||
blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
|
||||
rects[r].dst_y + rects[r].height, depth_format, &dst_temps, aspect_mask);
|
||||
|
||||
float vertex_push_constants[4] = {
|
||||
rects[r].src_x,
|
||||
rects[r].src_y,
|
||||
rects[r].src_x + rects[r].width,
|
||||
rects[r].src_y + rects[r].height,
|
||||
};
|
||||
float vertex_push_constants[4] = {
|
||||
rects[r].src_x,
|
||||
rects[r].src_y,
|
||||
rects[r].src_x + rects[r].width,
|
||||
rects[r].src_y + rects[r].height,
|
||||
};
|
||||
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
device->meta_state.blit2d.p_layouts[src_type],
|
||||
VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
|
||||
vertex_push_constants);
|
||||
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
device->meta_state.blit2d.p_layouts[src_type],
|
||||
VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
|
||||
vertex_push_constants);
|
||||
|
||||
if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
|
||||
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
|
||||
unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
|
||||
|
||||
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.blit2d.render_passes[fs_key],
|
||||
.framebuffer = dst_temps.fb,
|
||||
.renderArea = {
|
||||
.offset = { rects[r].dst_x, rects[r].dst_y, },
|
||||
.extent = { rects[r].width, rects[r].height },
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
}, VK_SUBPASS_CONTENTS_INLINE);
|
||||
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.blit2d.render_passes[fs_key],
|
||||
.framebuffer = dst_temps.fb,
|
||||
.renderArea = {
|
||||
.offset = { rects[r].dst_x, rects[r].dst_y, },
|
||||
.extent = { rects[r].width, rects[r].height },
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
}, VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
|
||||
bind_pipeline(cmd_buffer, src_type, fs_key);
|
||||
} else if (dst->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.blit2d.depth_only_rp,
|
||||
.framebuffer = dst_temps.fb,
|
||||
.renderArea = {
|
||||
.offset = { rects[r].dst_x, rects[r].dst_y, },
|
||||
.extent = { rects[r].width, rects[r].height },
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
}, VK_SUBPASS_CONTENTS_INLINE);
|
||||
bind_pipeline(cmd_buffer, src_type, fs_key);
|
||||
} else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
|
||||
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.blit2d.depth_only_rp,
|
||||
.framebuffer = dst_temps.fb,
|
||||
.renderArea = {
|
||||
.offset = { rects[r].dst_x, rects[r].dst_y, },
|
||||
.extent = { rects[r].width, rects[r].height },
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
}, VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
|
||||
bind_depth_pipeline(cmd_buffer, src_type);
|
||||
bind_depth_pipeline(cmd_buffer, src_type);
|
||||
|
||||
} else if (dst->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.blit2d.stencil_only_rp,
|
||||
.framebuffer = dst_temps.fb,
|
||||
.renderArea = {
|
||||
.offset = { rects[r].dst_x, rects[r].dst_y, },
|
||||
.extent = { rects[r].width, rects[r].height },
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
}, VK_SUBPASS_CONTENTS_INLINE);
|
||||
} else if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
|
||||
&(VkRenderPassBeginInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
|
||||
.renderPass = device->meta_state.blit2d.stencil_only_rp,
|
||||
.framebuffer = dst_temps.fb,
|
||||
.renderArea = {
|
||||
.offset = { rects[r].dst_x, rects[r].dst_y, },
|
||||
.extent = { rects[r].width, rects[r].height },
|
||||
},
|
||||
.clearValueCount = 0,
|
||||
.pClearValues = NULL,
|
||||
}, VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
|
||||
bind_stencil_pipeline(cmd_buffer, src_type);
|
||||
bind_stencil_pipeline(cmd_buffer, src_type);
|
||||
} else
|
||||
unreachable("Processing blit2d with multiple aspects.");
|
||||
|
||||
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
|
||||
.x = rects[r].dst_x,
|
||||
.y = rects[r].dst_y,
|
||||
.width = rects[r].width,
|
||||
.height = rects[r].height,
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f
|
||||
});
|
||||
|
||||
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
|
||||
.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
|
||||
.extent = (VkExtent2D) { rects[r].width, rects[r].height },
|
||||
});
|
||||
|
||||
|
||||
|
||||
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
|
||||
radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
|
||||
|
||||
/* At the point where we emit the draw call, all data from the
|
||||
* descriptor sets, etc. has been used. We are free to delete it.
|
||||
*/
|
||||
blit2d_unbind_dst(cmd_buffer, &dst_temps);
|
||||
}
|
||||
|
||||
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
|
||||
.x = rects[r].dst_x,
|
||||
.y = rects[r].dst_y,
|
||||
.width = rects[r].width,
|
||||
.height = rects[r].height,
|
||||
.minDepth = 0.0f,
|
||||
.maxDepth = 1.0f
|
||||
});
|
||||
|
||||
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
|
||||
.offset = (VkOffset2D) { rects[r].dst_x, rects[r].dst_y },
|
||||
.extent = (VkExtent2D) { rects[r].width, rects[r].height },
|
||||
});
|
||||
|
||||
|
||||
|
||||
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
|
||||
radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
|
||||
|
||||
/* At the point where we emit the draw call, all data from the
|
||||
* descriptor sets, etc. has been used. We are free to delete it.
|
||||
*/
|
||||
blit2d_unbind_dst(cmd_buffer, &dst_temps);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1176,6 +1176,9 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
|
||||
{
|
||||
VkDevice device_h = radv_device_to_handle(cmd_buffer->device);
|
||||
struct radv_image_view iview;
|
||||
uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
|
||||
uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
|
||||
|
||||
radv_image_view_init(&iview, cmd_buffer->device,
|
||||
&(VkImageViewCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
@@ -1199,9 +1202,9 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
|
||||
.pAttachments = (VkImageView[]) {
|
||||
radv_image_view_to_handle(&iview),
|
||||
},
|
||||
.width = iview.extent.width,
|
||||
.height = iview.extent.height,
|
||||
.layers = 1
|
||||
.width = width,
|
||||
.height = height,
|
||||
.layers = 1
|
||||
},
|
||||
&cmd_buffer->pool->alloc,
|
||||
&fb);
|
||||
@@ -1257,8 +1260,8 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
|
||||
.renderArea = {
|
||||
.offset = { 0, 0, },
|
||||
.extent = {
|
||||
.width = iview.extent.width,
|
||||
.height = iview.extent.height,
|
||||
.width = width,
|
||||
.height = height,
|
||||
},
|
||||
},
|
||||
.renderPass = pass,
|
||||
@@ -1277,7 +1280,7 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
|
||||
VkClearRect clear_rect = {
|
||||
.rect = {
|
||||
.offset = { 0, 0 },
|
||||
.extent = { iview.extent.width, iview.extent.height },
|
||||
.extent = { width, height },
|
||||
},
|
||||
.baseArrayLayer = range->baseArrayLayer,
|
||||
.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */
|
||||
|
@@ -118,6 +118,9 @@ radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
|
||||
const uint32_t mask = cache->table_size - 1;
|
||||
const uint32_t start = (*(uint32_t *) sha1);
|
||||
|
||||
if (cache->table_size == 0)
|
||||
return NULL;
|
||||
|
||||
for (uint32_t i = 0; i < cache->table_size; i++) {
|
||||
const uint32_t index = (start + i) & mask;
|
||||
struct cache_entry *entry = cache->hash_table[index];
|
||||
|
@@ -51,7 +51,8 @@ enum radeon_bo_flag { /* bitfield */
|
||||
RADEON_FLAG_GTT_WC = (1 << 0),
|
||||
RADEON_FLAG_CPU_ACCESS = (1 << 1),
|
||||
RADEON_FLAG_NO_CPU_ACCESS = (1 << 2),
|
||||
RADEON_FLAG_VIRTUAL = (1 << 3)
|
||||
RADEON_FLAG_VIRTUAL = (1 << 3),
|
||||
RADEON_FLAG_VA_UNCACHED = (1 << 4),
|
||||
};
|
||||
|
||||
enum radeon_bo_usage { /* bitfield */
|
||||
|
@@ -39,6 +39,23 @@
|
||||
|
||||
static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo);
|
||||
|
||||
static int
|
||||
radv_amdgpu_bo_va_op(amdgpu_device_handle dev,
|
||||
amdgpu_bo_handle bo,
|
||||
uint64_t offset,
|
||||
uint64_t size,
|
||||
uint64_t addr,
|
||||
uint64_t flags,
|
||||
uint32_t ops)
|
||||
{
|
||||
size = ALIGN(size, getpagesize());
|
||||
flags |= (AMDGPU_VM_PAGE_READABLE |
|
||||
AMDGPU_VM_PAGE_WRITEABLE |
|
||||
AMDGPU_VM_PAGE_EXECUTABLE);
|
||||
return amdgpu_bo_va_op_raw(dev, bo, offset, size, addr,
|
||||
flags, ops);
|
||||
}
|
||||
|
||||
static void
|
||||
radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
|
||||
const struct radv_amdgpu_map_range *range)
|
||||
@@ -49,8 +66,8 @@ radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo,
|
||||
return; /* TODO: PRT mapping */
|
||||
|
||||
p_atomic_inc(&range->bo->ref_count);
|
||||
int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
|
||||
range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
|
||||
int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
|
||||
range->offset + bo->va, 0, AMDGPU_VA_OP_MAP);
|
||||
if (r)
|
||||
abort();
|
||||
}
|
||||
@@ -64,8 +81,8 @@ radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo,
|
||||
if (!range->bo)
|
||||
return; /* TODO: PRT mapping */
|
||||
|
||||
int r = amdgpu_bo_va_op(range->bo->bo, range->bo_offset, range->size,
|
||||
range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
|
||||
int r = radv_amdgpu_bo_va_op(bo->ws->dev, range->bo->bo, range->bo_offset, range->size,
|
||||
range->offset + bo->va, 0, AMDGPU_VA_OP_UNMAP);
|
||||
if (r)
|
||||
abort();
|
||||
radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo);
|
||||
@@ -235,7 +252,7 @@ static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo)
|
||||
bo->ws->num_buffers--;
|
||||
pthread_mutex_unlock(&bo->ws->global_bo_list_lock);
|
||||
}
|
||||
amdgpu_bo_va_op(bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
|
||||
radv_amdgpu_bo_va_op(bo->ws->dev, bo->bo, 0, bo->size, bo->va, 0, AMDGPU_VA_OP_UNMAP);
|
||||
amdgpu_bo_free(bo->bo);
|
||||
}
|
||||
amdgpu_va_range_free(bo->va_handle);
|
||||
@@ -323,7 +340,11 @@ radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws,
|
||||
goto error_bo_alloc;
|
||||
}
|
||||
|
||||
r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP);
|
||||
|
||||
uint32_t va_flags = 0;
|
||||
if (flags & RADEON_FLAG_VA_UNCACHED)
|
||||
va_flags |= AMDGPU_VM_MTYPE_UC;
|
||||
r = radv_amdgpu_bo_va_op(ws->dev, buf_handle, 0, size, va, va_flags, AMDGPU_VA_OP_MAP);
|
||||
if (r)
|
||||
goto error_va_map;
|
||||
|
||||
@@ -399,7 +420,7 @@ radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws,
|
||||
if (r)
|
||||
goto error_query;
|
||||
|
||||
r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
|
||||
r = radv_amdgpu_bo_va_op(ws->dev, result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP);
|
||||
if (r)
|
||||
goto error_va_map;
|
||||
|
||||
|
@@ -46,11 +46,6 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
|
||||
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
|
||||
return false;
|
||||
|
||||
if (ws->info.chip_class >= GFX9) {
|
||||
fprintf(stderr, "radv: VEGA support not completed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* LLVM 5.0 is required for GFX9. */
|
||||
if (ws->info.chip_class >= GFX9 && HAVE_LLVM < 0x0500) {
|
||||
fprintf(stderr, "amdgpu: LLVM 5.0 is required, got LLVM %i.%i\n",
|
||||
|
@@ -74,11 +74,26 @@ compile_shaders(struct gl_context *ctx, struct gl_shader_program *prog) {
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
get_struct_type_field_and_pointer_sizes(size_t *s_field_size,
|
||||
size_t *s_field_ptrs)
|
||||
{
|
||||
*s_field_size = sizeof(glsl_struct_field);
|
||||
*s_field_ptrs =
|
||||
sizeof(((glsl_struct_field *)0)->type) +
|
||||
sizeof(((glsl_struct_field *)0)->name);
|
||||
}
|
||||
|
||||
static void
|
||||
encode_type_to_blob(struct blob *blob, const glsl_type *type)
|
||||
{
|
||||
uint32_t encoding;
|
||||
|
||||
if (!type) {
|
||||
blob_write_uint32(blob, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
switch (type->base_type) {
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_INT:
|
||||
@@ -122,11 +137,18 @@ encode_type_to_blob(struct blob *blob, const glsl_type *type)
|
||||
blob_write_uint32(blob, (type->base_type) << 24);
|
||||
blob_write_string(blob, type->name);
|
||||
blob_write_uint32(blob, type->length);
|
||||
blob_write_bytes(blob, type->fields.structure,
|
||||
sizeof(glsl_struct_field) * type->length);
|
||||
|
||||
size_t s_field_size, s_field_ptrs;
|
||||
get_struct_type_field_and_pointer_sizes(&s_field_size, &s_field_ptrs);
|
||||
|
||||
for (unsigned i = 0; i < type->length; i++) {
|
||||
encode_type_to_blob(blob, type->fields.structure[i].type);
|
||||
blob_write_string(blob, type->fields.structure[i].name);
|
||||
|
||||
/* Write the struct field skipping the pointers */
|
||||
blob_write_bytes(blob,
|
||||
((char *)&type->fields.structure[i]) + s_field_ptrs,
|
||||
s_field_size - s_field_ptrs);
|
||||
}
|
||||
|
||||
if (type->is_interface()) {
|
||||
@@ -149,6 +171,11 @@ static const glsl_type *
|
||||
decode_type_from_blob(struct blob_reader *blob)
|
||||
{
|
||||
uint32_t u = blob_read_uint32(blob);
|
||||
|
||||
if (u == 0) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
glsl_base_type base_type = (glsl_base_type) (u >> 24);
|
||||
|
||||
switch (base_type) {
|
||||
@@ -182,22 +209,33 @@ decode_type_from_blob(struct blob_reader *blob)
|
||||
case GLSL_TYPE_INTERFACE: {
|
||||
char *name = blob_read_string(blob);
|
||||
unsigned num_fields = blob_read_uint32(blob);
|
||||
glsl_struct_field *fields = (glsl_struct_field *)
|
||||
blob_read_bytes(blob, sizeof(glsl_struct_field) * num_fields);
|
||||
|
||||
size_t s_field_size, s_field_ptrs;
|
||||
get_struct_type_field_and_pointer_sizes(&s_field_size, &s_field_ptrs);
|
||||
|
||||
glsl_struct_field *fields =
|
||||
(glsl_struct_field *) malloc(s_field_size * num_fields);
|
||||
for (unsigned i = 0; i < num_fields; i++) {
|
||||
fields[i].type = decode_type_from_blob(blob);
|
||||
fields[i].name = blob_read_string(blob);
|
||||
|
||||
blob_copy_bytes(blob, ((uint8_t *) &fields[i]) + s_field_ptrs,
|
||||
s_field_size - s_field_ptrs);
|
||||
}
|
||||
|
||||
const glsl_type *t;
|
||||
if (base_type == GLSL_TYPE_INTERFACE) {
|
||||
enum glsl_interface_packing packing =
|
||||
(glsl_interface_packing) blob_read_uint32(blob);
|
||||
bool row_major = blob_read_uint32(blob);
|
||||
return glsl_type::get_interface_instance(fields, num_fields,
|
||||
packing, row_major, name);
|
||||
t = glsl_type::get_interface_instance(fields, num_fields, packing,
|
||||
row_major, name);
|
||||
} else {
|
||||
return glsl_type::get_record_instance(fields, num_fields, name);
|
||||
t = glsl_type::get_record_instance(fields, num_fields, name);
|
||||
}
|
||||
|
||||
free(fields);
|
||||
return t;
|
||||
}
|
||||
case GLSL_TYPE_VOID:
|
||||
case GLSL_TYPE_ERROR:
|
||||
@@ -555,6 +593,17 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg)
|
||||
MAX_FEEDBACK_BUFFERS);
|
||||
}
|
||||
|
||||
static bool
|
||||
has_uniform_storage(struct gl_shader_program *prog, unsigned idx)
|
||||
{
|
||||
if (!prog->data->UniformStorage[idx].builtin &&
|
||||
!prog->data->UniformStorage[idx].is_shader_storage &&
|
||||
prog->data->UniformStorage[idx].block_index == -1)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void
|
||||
write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
|
||||
{
|
||||
@@ -566,8 +615,6 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
|
||||
encode_type_to_blob(metadata, prog->data->UniformStorage[i].type);
|
||||
blob_write_uint32(metadata, prog->data->UniformStorage[i].array_elements);
|
||||
blob_write_string(metadata, prog->data->UniformStorage[i].name);
|
||||
blob_write_uint32(metadata, prog->data->UniformStorage[i].storage -
|
||||
prog->data->UniformDataSlots);
|
||||
blob_write_uint32(metadata, prog->data->UniformStorage[i].builtin);
|
||||
blob_write_uint32(metadata, prog->data->UniformStorage[i].remap_location);
|
||||
blob_write_uint32(metadata, prog->data->UniformStorage[i].block_index);
|
||||
@@ -586,6 +633,12 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
|
||||
prog->data->UniformStorage[i].top_level_array_size);
|
||||
blob_write_uint32(metadata,
|
||||
prog->data->UniformStorage[i].top_level_array_stride);
|
||||
|
||||
if (has_uniform_storage(prog, i)) {
|
||||
blob_write_uint32(metadata, prog->data->UniformStorage[i].storage -
|
||||
prog->data->UniformDataSlots);
|
||||
}
|
||||
|
||||
blob_write_bytes(metadata, prog->data->UniformStorage[i].opaque,
|
||||
sizeof(prog->data->UniformStorage[i].opaque));
|
||||
}
|
||||
@@ -597,9 +650,7 @@ write_uniforms(struct blob *metadata, struct gl_shader_program *prog)
|
||||
*/
|
||||
blob_write_uint32(metadata, prog->data->NumHiddenUniforms);
|
||||
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
|
||||
if (!prog->data->UniformStorage[i].builtin &&
|
||||
!prog->data->UniformStorage[i].is_shader_storage &&
|
||||
prog->data->UniformStorage[i].block_index == -1) {
|
||||
if (has_uniform_storage(prog, i)) {
|
||||
unsigned vec_size =
|
||||
prog->data->UniformStorage[i].type->component_slots() *
|
||||
MAX2(prog->data->UniformStorage[i].array_elements, 1);
|
||||
@@ -633,7 +684,6 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
|
||||
uniforms[i].type = decode_type_from_blob(metadata);
|
||||
uniforms[i].array_elements = blob_read_uint32(metadata);
|
||||
uniforms[i].name = ralloc_strdup(prog, blob_read_string (metadata));
|
||||
uniforms[i].storage = data + blob_read_uint32(metadata);
|
||||
uniforms[i].builtin = blob_read_uint32(metadata);
|
||||
uniforms[i].remap_location = blob_read_uint32(metadata);
|
||||
uniforms[i].block_index = blob_read_uint32(metadata);
|
||||
@@ -651,6 +701,10 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
|
||||
uniforms[i].top_level_array_stride = blob_read_uint32(metadata);
|
||||
prog->UniformHash->put(i, uniforms[i].name);
|
||||
|
||||
if (has_uniform_storage(prog, i)) {
|
||||
uniforms[i].storage = data + blob_read_uint32(metadata);
|
||||
}
|
||||
|
||||
memcpy(uniforms[i].opaque,
|
||||
blob_read_bytes(metadata, sizeof(uniforms[i].opaque)),
|
||||
sizeof(uniforms[i].opaque));
|
||||
@@ -659,9 +713,7 @@ read_uniforms(struct blob_reader *metadata, struct gl_shader_program *prog)
|
||||
/* Restore uniform values. */
|
||||
prog->data->NumHiddenUniforms = blob_read_uint32(metadata);
|
||||
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
|
||||
if (!prog->data->UniformStorage[i].builtin &&
|
||||
!prog->data->UniformStorage[i].is_shader_storage &&
|
||||
prog->data->UniformStorage[i].block_index == -1) {
|
||||
if (has_uniform_storage(prog, i)) {
|
||||
unsigned vec_size =
|
||||
prog->data->UniformStorage[i].type->component_slots() *
|
||||
MAX2(prog->data->UniformStorage[i].array_elements, 1);
|
||||
@@ -867,6 +919,18 @@ write_shader_subroutine_index(struct blob *metadata,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
get_shader_var_and_pointer_sizes(size_t *s_var_size, size_t *s_var_ptrs,
|
||||
const gl_shader_variable *var)
|
||||
{
|
||||
*s_var_size = sizeof(gl_shader_variable);
|
||||
*s_var_ptrs =
|
||||
sizeof(var->type) +
|
||||
sizeof(var->interface_type) +
|
||||
sizeof(var->outermost_struct_type) +
|
||||
sizeof(var->name);
|
||||
}
|
||||
|
||||
static void
|
||||
write_program_resource_data(struct blob *metadata,
|
||||
struct gl_shader_program *prog,
|
||||
@@ -878,16 +942,19 @@ write_program_resource_data(struct blob *metadata,
|
||||
case GL_PROGRAM_INPUT:
|
||||
case GL_PROGRAM_OUTPUT: {
|
||||
const gl_shader_variable *var = (gl_shader_variable *)res->Data;
|
||||
blob_write_bytes(metadata, var, sizeof(gl_shader_variable));
|
||||
|
||||
encode_type_to_blob(metadata, var->type);
|
||||
|
||||
if (var->interface_type)
|
||||
encode_type_to_blob(metadata, var->interface_type);
|
||||
|
||||
if (var->outermost_struct_type)
|
||||
encode_type_to_blob(metadata, var->outermost_struct_type);
|
||||
encode_type_to_blob(metadata, var->interface_type);
|
||||
encode_type_to_blob(metadata, var->outermost_struct_type);
|
||||
|
||||
blob_write_string(metadata, var->name);
|
||||
|
||||
size_t s_var_size, s_var_ptrs;
|
||||
get_shader_var_and_pointer_sizes(&s_var_size, &s_var_ptrs, var);
|
||||
|
||||
/* Write gl_shader_variable skipping over the pointers */
|
||||
blob_write_bytes(metadata, ((char *)var) + s_var_ptrs,
|
||||
s_var_size - s_var_ptrs);
|
||||
break;
|
||||
}
|
||||
case GL_UNIFORM_BLOCK:
|
||||
@@ -978,17 +1045,18 @@ read_program_resource_data(struct blob_reader *metadata,
|
||||
case GL_PROGRAM_OUTPUT: {
|
||||
gl_shader_variable *var = ralloc(prog, struct gl_shader_variable);
|
||||
|
||||
blob_copy_bytes(metadata, (uint8_t *) var, sizeof(gl_shader_variable));
|
||||
var->type = decode_type_from_blob(metadata);
|
||||
|
||||
if (var->interface_type)
|
||||
var->interface_type = decode_type_from_blob(metadata);
|
||||
|
||||
if (var->outermost_struct_type)
|
||||
var->outermost_struct_type = decode_type_from_blob(metadata);
|
||||
var->interface_type = decode_type_from_blob(metadata);
|
||||
var->outermost_struct_type = decode_type_from_blob(metadata);
|
||||
|
||||
var->name = ralloc_strdup(prog, blob_read_string(metadata));
|
||||
|
||||
size_t s_var_size, s_var_ptrs;
|
||||
get_shader_var_and_pointer_sizes(&s_var_size, &s_var_ptrs, var);
|
||||
|
||||
blob_copy_bytes(metadata, ((uint8_t *) var) + s_var_ptrs,
|
||||
s_var_size - s_var_ptrs);
|
||||
|
||||
res->Data = var;
|
||||
break;
|
||||
}
|
||||
@@ -1148,18 +1216,20 @@ write_shader_metadata(struct blob *metadata, gl_linked_shader *shader)
|
||||
blob_write_bytes(metadata, glprog->sh.ImageUnits,
|
||||
sizeof(glprog->sh.ImageUnits));
|
||||
|
||||
size_t ptr_size = sizeof(GLvoid *);
|
||||
|
||||
blob_write_uint32(metadata, glprog->sh.NumBindlessSamplers);
|
||||
blob_write_uint32(metadata, glprog->sh.HasBoundBindlessSampler);
|
||||
for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {
|
||||
blob_write_bytes(metadata, &glprog->sh.BindlessSamplers[i],
|
||||
sizeof(struct gl_bindless_sampler));
|
||||
sizeof(struct gl_bindless_sampler) - ptr_size);
|
||||
}
|
||||
|
||||
blob_write_uint32(metadata, glprog->sh.NumBindlessImages);
|
||||
blob_write_uint32(metadata, glprog->sh.HasBoundBindlessImage);
|
||||
for (i = 0; i < glprog->sh.NumBindlessImages; i++) {
|
||||
blob_write_bytes(metadata, &glprog->sh.BindlessImages[i],
|
||||
sizeof(struct gl_bindless_image));
|
||||
sizeof(struct gl_bindless_image) - ptr_size);
|
||||
}
|
||||
|
||||
write_shader_parameters(metadata, glprog->Parameters);
|
||||
@@ -1187,6 +1257,8 @@ read_shader_metadata(struct blob_reader *metadata,
|
||||
blob_copy_bytes(metadata, (uint8_t *) glprog->sh.ImageUnits,
|
||||
sizeof(glprog->sh.ImageUnits));
|
||||
|
||||
size_t ptr_size = sizeof(GLvoid *);
|
||||
|
||||
glprog->sh.NumBindlessSamplers = blob_read_uint32(metadata);
|
||||
glprog->sh.HasBoundBindlessSampler = blob_read_uint32(metadata);
|
||||
if (glprog->sh.NumBindlessSamplers > 0) {
|
||||
@@ -1196,7 +1268,7 @@ read_shader_metadata(struct blob_reader *metadata,
|
||||
|
||||
for (i = 0; i < glprog->sh.NumBindlessSamplers; i++) {
|
||||
blob_copy_bytes(metadata, (uint8_t *) &glprog->sh.BindlessSamplers[i],
|
||||
sizeof(struct gl_bindless_sampler));
|
||||
sizeof(struct gl_bindless_sampler) - ptr_size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1209,7 +1281,7 @@ read_shader_metadata(struct blob_reader *metadata,
|
||||
|
||||
for (i = 0; i < glprog->sh.NumBindlessImages; i++) {
|
||||
blob_copy_bytes(metadata, (uint8_t *) &glprog->sh.BindlessImages[i],
|
||||
sizeof(struct gl_bindless_image));
|
||||
sizeof(struct gl_bindless_image) - ptr_size);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1224,6 +1296,14 @@ create_binding_str(const char *key, unsigned value, void *closure)
|
||||
ralloc_asprintf_append(bindings_str, "%s:%u,", key, value);
|
||||
}
|
||||
|
||||
static void
|
||||
get_shader_info_and_pointer_sizes(size_t *s_info_size, size_t *s_info_ptrs,
|
||||
shader_info *info)
|
||||
{
|
||||
*s_info_size = sizeof(shader_info);
|
||||
*s_info_ptrs = sizeof(info->name) + sizeof(info->label);
|
||||
}
|
||||
|
||||
static void
|
||||
create_linked_shader_and_program(struct gl_context *ctx,
|
||||
gl_shader_stage stage,
|
||||
@@ -1242,12 +1322,16 @@ create_linked_shader_and_program(struct gl_context *ctx,
|
||||
|
||||
read_shader_metadata(metadata, glprog, linked);
|
||||
|
||||
glprog->info.name = ralloc_strdup(glprog, blob_read_string(metadata));
|
||||
glprog->info.label = ralloc_strdup(glprog, blob_read_string(metadata));
|
||||
|
||||
size_t s_info_size, s_info_ptrs;
|
||||
get_shader_info_and_pointer_sizes(&s_info_size, &s_info_ptrs,
|
||||
&glprog->info);
|
||||
|
||||
/* Restore shader info */
|
||||
blob_copy_bytes(metadata, (uint8_t *) &glprog->info, sizeof(shader_info));
|
||||
if (glprog->info.name)
|
||||
glprog->info.name = ralloc_strdup(glprog, blob_read_string(metadata));
|
||||
if (glprog->info.label)
|
||||
glprog->info.label = ralloc_strdup(glprog, blob_read_string(metadata));
|
||||
blob_copy_bytes(metadata, ((uint8_t *) &glprog->info) + s_info_ptrs,
|
||||
s_info_size - s_info_ptrs);
|
||||
|
||||
_mesa_reference_shader_program_data(ctx, &glprog->sh.data, prog->data);
|
||||
_mesa_reference_program(ctx, &linked->Program, glprog);
|
||||
@@ -1286,14 +1370,24 @@ shader_cache_write_program_metadata(struct gl_context *ctx,
|
||||
if (sh) {
|
||||
write_shader_metadata(metadata, sh);
|
||||
|
||||
/* Store nir shader info */
|
||||
blob_write_bytes(metadata, &sh->Program->info, sizeof(shader_info));
|
||||
|
||||
if (sh->Program->info.name)
|
||||
blob_write_string(metadata, sh->Program->info.name);
|
||||
else
|
||||
blob_write_string(metadata, "");
|
||||
|
||||
if (sh->Program->info.label)
|
||||
blob_write_string(metadata, sh->Program->info.label);
|
||||
else
|
||||
blob_write_string(metadata, "");
|
||||
|
||||
size_t s_info_size, s_info_ptrs;
|
||||
get_shader_info_and_pointer_sizes(&s_info_size, &s_info_ptrs,
|
||||
&sh->Program->info);
|
||||
|
||||
/* Store shader info */
|
||||
blob_write_bytes(metadata,
|
||||
((char *) &sh->Program->info) + s_info_ptrs,
|
||||
s_info_size - s_info_ptrs);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -32,14 +32,14 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct shader_info {
|
||||
/** The shader stage, such as MESA_SHADER_VERTEX. */
|
||||
gl_shader_stage stage;
|
||||
|
||||
const char *name;
|
||||
|
||||
/* Descriptive name provided by the client; may be NULL */
|
||||
const char *label;
|
||||
|
||||
/** The shader stage, such as MESA_SHADER_VERTEX. */
|
||||
gl_shader_stage stage;
|
||||
|
||||
/* Number of textures used by this shader */
|
||||
unsigned num_textures;
|
||||
/* Number of uniform buffers used by this shader */
|
||||
|
@@ -1121,6 +1121,10 @@ vtn_get_builtin_location(struct vtn_builder *b,
|
||||
*location = FRAG_RESULT_DEPTH;
|
||||
assert(*mode == nir_var_shader_out);
|
||||
break;
|
||||
case SpvBuiltInHelperInvocation:
|
||||
*location = SYSTEM_VALUE_HELPER_INVOCATION;
|
||||
set_mode_system_value(mode);
|
||||
break;
|
||||
case SpvBuiltInNumWorkgroups:
|
||||
*location = SYSTEM_VALUE_NUM_WORK_GROUPS;
|
||||
set_mode_system_value(mode);
|
||||
@@ -1161,7 +1165,6 @@ vtn_get_builtin_location(struct vtn_builder *b,
|
||||
*location = SYSTEM_VALUE_VIEW_INDEX;
|
||||
set_mode_system_value(mode);
|
||||
break;
|
||||
case SpvBuiltInHelperInvocation:
|
||||
default:
|
||||
unreachable("unsupported builtin");
|
||||
}
|
||||
|
@@ -945,9 +945,12 @@ dri2_display_destroy(_EGLDisplay *disp)
|
||||
zwp_linux_dmabuf_v1_destroy(dri2_dpy->wl_dmabuf);
|
||||
if (dri2_dpy->wl_shm)
|
||||
wl_shm_destroy(dri2_dpy->wl_shm);
|
||||
wl_registry_destroy(dri2_dpy->wl_registry);
|
||||
wl_event_queue_destroy(dri2_dpy->wl_queue);
|
||||
wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
|
||||
if (dri2_dpy->wl_registry)
|
||||
wl_registry_destroy(dri2_dpy->wl_registry);
|
||||
if (dri2_dpy->wl_queue)
|
||||
wl_event_queue_destroy(dri2_dpy->wl_queue);
|
||||
if (dri2_dpy->wl_dpy_wrapper)
|
||||
wl_proxy_wrapper_destroy(dri2_dpy->wl_dpy_wrapper);
|
||||
u_vector_finish(&dri2_dpy->wl_modifiers.argb8888);
|
||||
u_vector_finish(&dri2_dpy->wl_modifiers.xrgb8888);
|
||||
u_vector_finish(&dri2_dpy->wl_modifiers.rgb565);
|
||||
|
@@ -1319,6 +1319,7 @@ static const __DRIextension *dri3_image_loader_extensions[] = {
|
||||
&dri3_image_loader_extension.base,
|
||||
&image_lookup_extension.base,
|
||||
&use_invalidate.base,
|
||||
&background_callable_extension.base,
|
||||
NULL,
|
||||
};
|
||||
|
||||
|
@@ -650,7 +650,13 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
|
||||
for (i = 0; i < format_desc->nr_channels; i++) {
|
||||
struct util_format_channel_description chan_desc = format_desc->channel[i];
|
||||
unsigned blockbits = type.width;
|
||||
unsigned vec_nr = chan_desc.shift / type.width;
|
||||
unsigned vec_nr;
|
||||
|
||||
#ifdef PIPE_ARCH_BIG_ENDIAN
|
||||
vec_nr = (format_desc->block.bits - (chan_desc.shift + chan_desc.size)) / type.width;
|
||||
#else
|
||||
vec_nr = chan_desc.shift / type.width;
|
||||
#endif
|
||||
chan_desc.shift %= type.width;
|
||||
|
||||
output[i] = lp_build_extract_soa_chan(&bld,
|
||||
|
@@ -234,13 +234,39 @@ lp_build_gather_elem_vec(struct gallivm_state *gallivm,
|
||||
*/
|
||||
res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
|
||||
|
||||
if (vector_justify) {
|
||||
#ifdef PIPE_ARCH_BIG_ENDIAN
|
||||
if (vector_justify) {
|
||||
res = LLVMBuildShl(gallivm->builder, res,
|
||||
LLVMConstInt(dst_elem_type,
|
||||
dst_type.width - src_width, 0), "");
|
||||
#endif
|
||||
}
|
||||
if (src_width == 48) {
|
||||
/* Load 3x16 bit vector.
|
||||
* The sequence of loads on big-endian hardware proceeds as follows.
|
||||
* 16-bit fields are denoted by X, Y, Z, and 0. In memory, the sequence
|
||||
* of three fields appears in the order X, Y, Z.
|
||||
*
|
||||
* Load 32-bit word: 0.0.X.Y
|
||||
* Load 16-bit halfword: 0.0.0.Z
|
||||
* Rotate left: 0.X.Y.0
|
||||
* Bitwise OR: 0.X.Y.Z
|
||||
*
|
||||
* The order in which we need the fields in the result is 0.Z.Y.X,
|
||||
* the same as on little-endian; permute 16-bit fields accordingly
|
||||
* within 64-bit register:
|
||||
*/
|
||||
LLVMValueRef shuffles[4] = {
|
||||
lp_build_const_int32(gallivm, 2),
|
||||
lp_build_const_int32(gallivm, 1),
|
||||
lp_build_const_int32(gallivm, 0),
|
||||
lp_build_const_int32(gallivm, 3),
|
||||
};
|
||||
res = LLVMBuildBitCast(gallivm->builder, res,
|
||||
lp_build_vec_type(gallivm, lp_type_uint_vec(16, 4*16)), "");
|
||||
res = LLVMBuildShuffleVector(gallivm->builder, res, res, LLVMConstVector(shuffles, 4), "");
|
||||
res = LLVMBuildBitCast(gallivm->builder, res, dst_elem_type, "");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return res;
|
||||
|
@@ -428,13 +428,7 @@ fd_launch_grid(struct pipe_context *pctx, const struct pipe_grid_info *info)
|
||||
struct fd_batch *batch, *save_batch = NULL;
|
||||
unsigned i;
|
||||
|
||||
/* TODO maybe we don't want to allocate and flush a batch each time?
|
||||
* We could use a special bogus (ie. won't match any fb state) key
|
||||
* in the batch-case for compute shaders, and rely on the rest of
|
||||
* the dependency tracking mechanism to tell us when the compute
|
||||
* batch needs to be flushed?
|
||||
*/
|
||||
batch = fd_bc_alloc_batch(&ctx->screen->batch_cache, ctx);
|
||||
batch = fd_batch_create(ctx);
|
||||
fd_batch_reference(&save_batch, ctx->batch);
|
||||
fd_batch_reference(&ctx->batch, batch);
|
||||
|
||||
|
@@ -227,6 +227,12 @@ llvmpipe_create_context(struct pipe_screen *screen, void *priv,
|
||||
|
||||
lp_reset_counters();
|
||||
|
||||
/* If llvmpipe_set_scissor_states() is never called, we still need to
|
||||
* make sure that derived scissor state is computed.
|
||||
* See https://bugs.freedesktop.org/show_bug.cgi?id=101709
|
||||
*/
|
||||
llvmpipe->dirty |= LP_NEW_SCISSOR;
|
||||
|
||||
return &llvmpipe->pipe;
|
||||
|
||||
fail:
|
||||
|
@@ -346,6 +346,7 @@ nvc0_hw_get_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
|
||||
case PIPE_QUERY_PIPELINE_STATISTICS:
|
||||
for (i = 0; i < 10; ++i)
|
||||
res64[i] = data64[i * 2] - data64[24 + i * 2];
|
||||
result->pipeline_statistics.cs_invocations = 0;
|
||||
break;
|
||||
case NVC0_HW_QUERY_TFB_BUFFER_OFFSET:
|
||||
res32[0] = hq->data[1];
|
||||
|
@@ -557,6 +557,7 @@ struct r600_common_context {
|
||||
unsigned gpu_reset_counter;
|
||||
unsigned last_dirty_tex_counter;
|
||||
unsigned last_compressed_colortex_counter;
|
||||
unsigned last_num_draw_calls;
|
||||
|
||||
struct threaded_context *tc;
|
||||
struct u_suballocator *allocator_zeroed_memory;
|
||||
|
@@ -781,6 +781,11 @@ static void si_launch_grid(
|
||||
program->shader.compilation_failed)
|
||||
return;
|
||||
|
||||
if (sctx->b.last_num_draw_calls != sctx->b.num_draw_calls) {
|
||||
si_update_fb_dirtiness_after_rendering(sctx);
|
||||
sctx->b.last_num_draw_calls = sctx->b.num_draw_calls;
|
||||
}
|
||||
|
||||
si_decompress_compute_textures(sctx);
|
||||
|
||||
/* Add buffer sizes for memory checking in need_cs_space. */
|
||||
|
@@ -59,7 +59,8 @@
|
||||
#define SI_CONTEXT_WRITEBACK_GLOBAL_L2 (R600_CONTEXT_PRIVATE_FLAG << 4)
|
||||
/* gaps */
|
||||
/* Framebuffer caches. */
|
||||
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 7)
|
||||
#define SI_CONTEXT_FLUSH_AND_INV_DB (R600_CONTEXT_PRIVATE_FLAG << 6)
|
||||
#define SI_CONTEXT_FLUSH_AND_INV_DB_META (R600_CONTEXT_PRIVATE_FLAG << 7)
|
||||
#define SI_CONTEXT_FLUSH_AND_INV_CB (R600_CONTEXT_PRIVATE_FLAG << 8)
|
||||
/* Engine synchronization. */
|
||||
#define SI_CONTEXT_VS_PARTIAL_FLUSH (R600_CONTEXT_PRIVATE_FLAG << 9)
|
||||
|
@@ -83,10 +83,10 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
|
||||
static void si_build_ps_epilog_function(struct si_shader_context *ctx,
|
||||
union si_shader_part_key *key);
|
||||
|
||||
/* Ideally pass the sample mask input to the PS epilog as v13, which
|
||||
/* Ideally pass the sample mask input to the PS epilog as v14, which
|
||||
* is its usual location, so that the shader doesn't have to add v_mov.
|
||||
*/
|
||||
#define PS_EPILOG_SAMPLEMASK_MIN_LOC 13
|
||||
#define PS_EPILOG_SAMPLEMASK_MIN_LOC 14
|
||||
|
||||
enum {
|
||||
CONST_ADDR_SPACE = 2,
|
||||
@@ -4420,6 +4420,7 @@ static void create_function(struct si_shader_context *ctx)
|
||||
params[SI_PARAM_FRONT_FACE] = ctx->i32;
|
||||
shader->info.face_vgpr_index = 20;
|
||||
params[SI_PARAM_ANCILLARY] = ctx->i32;
|
||||
shader->info.ancillary_vgpr_index = 21;
|
||||
params[SI_PARAM_SAMPLE_COVERAGE] = ctx->f32;
|
||||
params[SI_PARAM_POS_FIXED_PT] = ctx->i32;
|
||||
num_params = SI_PARAM_POS_FIXED_PT+1;
|
||||
@@ -4495,6 +4496,7 @@ static void create_function(struct si_shader_context *ctx)
|
||||
S_0286D0_LINEAR_CENTER_ENA(1) |
|
||||
S_0286D0_LINEAR_CENTROID_ENA(1) |
|
||||
S_0286D0_FRONT_FACE_ENA(1) |
|
||||
S_0286D0_ANCILLARY_ENA(1) |
|
||||
S_0286D0_POS_FIXED_PT_ENA(1));
|
||||
}
|
||||
|
||||
@@ -5649,6 +5651,7 @@ static void si_get_ps_prolog_key(struct si_shader *shader,
|
||||
key->ps_prolog.states.force_linear_center_interp ||
|
||||
key->ps_prolog.states.bc_optimize_for_persp ||
|
||||
key->ps_prolog.states.bc_optimize_for_linear);
|
||||
key->ps_prolog.ancillary_vgpr_index = shader->info.ancillary_vgpr_index;
|
||||
|
||||
if (info->colors_read) {
|
||||
unsigned *color = shader->selector->color_attr_index;
|
||||
@@ -5758,7 +5761,8 @@ static bool si_need_ps_prolog(const union si_shader_part_key *key)
|
||||
key->ps_prolog.states.force_linear_center_interp ||
|
||||
key->ps_prolog.states.bc_optimize_for_persp ||
|
||||
key->ps_prolog.states.bc_optimize_for_linear ||
|
||||
key->ps_prolog.states.poly_stipple;
|
||||
key->ps_prolog.states.poly_stipple ||
|
||||
key->ps_prolog.states.samplemask_log_ps_iter;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -6405,6 +6409,7 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||
if (ctx.type == PIPE_SHADER_FRAGMENT) {
|
||||
shader->info.num_input_vgprs = 0;
|
||||
shader->info.face_vgpr_index = -1;
|
||||
shader->info.ancillary_vgpr_index = -1;
|
||||
|
||||
if (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_addr))
|
||||
shader->info.num_input_vgprs += 2;
|
||||
@@ -6434,8 +6439,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
|
||||
shader->info.face_vgpr_index = shader->info.num_input_vgprs;
|
||||
shader->info.num_input_vgprs += 1;
|
||||
}
|
||||
if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr))
|
||||
if (G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr)) {
|
||||
shader->info.ancillary_vgpr_index = shader->info.num_input_vgprs;
|
||||
shader->info.num_input_vgprs += 1;
|
||||
}
|
||||
if (G_0286CC_SAMPLE_COVERAGE_ENA(shader->config.spi_ps_input_addr))
|
||||
shader->info.num_input_vgprs += 1;
|
||||
if (G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr))
|
||||
@@ -7079,6 +7086,54 @@ static void si_build_ps_prolog_function(struct si_shader_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
/* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
|
||||
* says:
|
||||
*
|
||||
* "When per-sample shading is active due to the use of a fragment
|
||||
* input qualified by sample or due to the use of the gl_SampleID
|
||||
* or gl_SamplePosition variables, only the bit for the current
|
||||
* sample is set in gl_SampleMaskIn. When state specifies multiple
|
||||
* fragment shader invocations for a given fragment, the sample
|
||||
* mask for any single fragment shader invocation may specify a
|
||||
* subset of the covered samples for the fragment. In this case,
|
||||
* the bit corresponding to each covered sample will be set in
|
||||
* exactly one fragment shader invocation."
|
||||
*
|
||||
* The samplemask loaded by hardware is always the coverage of the
|
||||
* entire pixel/fragment, so mask bits out based on the sample ID.
|
||||
*/
|
||||
if (key->ps_prolog.states.samplemask_log_ps_iter) {
|
||||
/* The bit pattern matches that used by fixed function fragment
|
||||
* processing. */
|
||||
static const uint16_t ps_iter_masks[] = {
|
||||
0xffff, /* not used */
|
||||
0x5555,
|
||||
0x1111,
|
||||
0x0101,
|
||||
0x0001,
|
||||
};
|
||||
assert(key->ps_prolog.states.samplemask_log_ps_iter < ARRAY_SIZE(ps_iter_masks));
|
||||
|
||||
uint32_t ps_iter_mask = ps_iter_masks[key->ps_prolog.states.samplemask_log_ps_iter];
|
||||
unsigned ancillary_vgpr = key->ps_prolog.num_input_sgprs +
|
||||
key->ps_prolog.ancillary_vgpr_index;
|
||||
LLVMValueRef sampleid = unpack_param(ctx, ancillary_vgpr, 8, 4);
|
||||
LLVMValueRef samplemask = LLVMGetParam(func, ancillary_vgpr + 1);
|
||||
|
||||
samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->i32, "");
|
||||
samplemask = LLVMBuildAnd(
|
||||
gallivm->builder,
|
||||
samplemask,
|
||||
LLVMBuildShl(gallivm->builder,
|
||||
LLVMConstInt(ctx->i32, ps_iter_mask, false),
|
||||
sampleid, ""),
|
||||
"");
|
||||
samplemask = LLVMBuildBitCast(gallivm->builder, samplemask, ctx->f32, "");
|
||||
|
||||
ret = LLVMBuildInsertValue(gallivm->builder, ret, samplemask,
|
||||
ancillary_vgpr + 1, "");
|
||||
}
|
||||
|
||||
/* Tell LLVM to insert WQM instruction sequence when needed. */
|
||||
if (key->ps_prolog.wqm) {
|
||||
LLVMAddTargetDependentFunctionAttr(func,
|
||||
@@ -7276,6 +7331,12 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
|
||||
assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
|
||||
}
|
||||
|
||||
/* Samplemask fixup requires the sample ID. */
|
||||
if (shader->key.part.ps.prolog.samplemask_log_ps_iter) {
|
||||
shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
|
||||
assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr));
|
||||
}
|
||||
|
||||
/* The sample mask input is always enabled, because the API shader always
|
||||
* passes it through to the epilog. Disable it here if it's unused.
|
||||
*/
|
||||
@@ -7353,6 +7414,7 @@ int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
|
||||
shader->info.num_input_sgprs = mainp->info.num_input_sgprs;
|
||||
shader->info.num_input_vgprs = mainp->info.num_input_vgprs;
|
||||
shader->info.face_vgpr_index = mainp->info.face_vgpr_index;
|
||||
shader->info.ancillary_vgpr_index = mainp->info.ancillary_vgpr_index;
|
||||
memcpy(shader->info.vs_output_param_offset,
|
||||
mainp->info.vs_output_param_offset,
|
||||
sizeof(mainp->info.vs_output_param_offset));
|
||||
|
@@ -416,6 +416,7 @@ struct si_ps_prolog_bits {
|
||||
unsigned force_linear_center_interp:1;
|
||||
unsigned bc_optimize_for_persp:1;
|
||||
unsigned bc_optimize_for_linear:1;
|
||||
unsigned samplemask_log_ps_iter:3;
|
||||
};
|
||||
|
||||
/* Common PS bits between the shader key and the epilog key. */
|
||||
@@ -457,6 +458,7 @@ union si_shader_part_key {
|
||||
unsigned colors_read:8; /* color input components read */
|
||||
unsigned num_interp_inputs:5; /* BCOLOR is at this location */
|
||||
unsigned face_vgpr_index:5;
|
||||
unsigned ancillary_vgpr_index:5;
|
||||
unsigned wqm:1;
|
||||
char color_attr_index[2];
|
||||
char color_interp_vgpr_index[2]; /* -1 == constant */
|
||||
@@ -549,7 +551,8 @@ struct si_shader_info {
|
||||
ubyte vs_output_param_offset[SI_MAX_VS_OUTPUTS];
|
||||
ubyte num_input_sgprs;
|
||||
ubyte num_input_vgprs;
|
||||
char face_vgpr_index;
|
||||
signed char face_vgpr_index;
|
||||
signed char ancillary_vgpr_index;
|
||||
bool uses_instanceid;
|
||||
ubyte nr_pos_exports;
|
||||
ubyte nr_param_exports;
|
||||
|
@@ -2579,6 +2579,14 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
|
||||
sctx->b.flags |= SI_CONTEXT_INV_VMEM_L1 |
|
||||
SI_CONTEXT_INV_GLOBAL_L2 |
|
||||
SI_CONTEXT_FLUSH_AND_INV_DB;
|
||||
} else if (sctx->b.chip_class == GFX9) {
|
||||
/* It appears that DB metadata "leaks" in a sequence of:
|
||||
* - depth clear
|
||||
* - DCC decompress for shader image writes (with DB disabled)
|
||||
* - render with DEPTH_BEFORE_SHADER=1
|
||||
* Flushing DB metadata works around the problem.
|
||||
*/
|
||||
sctx->b.flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META;
|
||||
}
|
||||
|
||||
/* Take the maximum of the old and new count. If the new count is lower,
|
||||
|
@@ -910,7 +910,8 @@ void si_emit_cache_flush(struct si_context *sctx)
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
|
||||
}
|
||||
if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB) {
|
||||
if (rctx->flags & (SI_CONTEXT_FLUSH_AND_INV_DB |
|
||||
SI_CONTEXT_FLUSH_AND_INV_DB_META)) {
|
||||
/* Flush HTILE. SURFACE_SYNC will wait for idle. */
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
|
||||
|
@@ -1412,6 +1412,12 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
|
||||
sctx->framebuffer.nr_samples <= 1;
|
||||
key->part.ps.epilog.clamp_color = rs->clamp_fragment_color;
|
||||
|
||||
if (sctx->ps_iter_samples > 1 &&
|
||||
sel->info.reads_samplemask) {
|
||||
key->part.ps.prolog.samplemask_log_ps_iter =
|
||||
util_logbase2(util_next_power_of_two(sctx->ps_iter_samples));
|
||||
}
|
||||
|
||||
if (rs->force_persample_interp &&
|
||||
rs->multisample_enable &&
|
||||
sctx->framebuffer.nr_samples > 1 &&
|
||||
|
@@ -250,6 +250,15 @@ static const struct anv_format anv_formats[] = {
|
||||
|
||||
#undef fmt
|
||||
|
||||
static bool
|
||||
format_supported(VkFormat vk_format)
|
||||
{
|
||||
if (vk_format >= ARRAY_SIZE(anv_formats))
|
||||
return false;
|
||||
|
||||
return anv_formats[vk_format].isl_format != ISL_FORMAT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Exactly one bit must be set in \a aspect.
|
||||
*/
|
||||
@@ -257,10 +266,10 @@ struct anv_format
|
||||
anv_get_format(const struct gen_device_info *devinfo, VkFormat vk_format,
|
||||
VkImageAspectFlags aspect, VkImageTiling tiling)
|
||||
{
|
||||
struct anv_format format = anv_formats[vk_format];
|
||||
if (!format_supported(vk_format))
|
||||
return anv_formats[VK_FORMAT_UNDEFINED];
|
||||
|
||||
if (format.isl_format == ISL_FORMAT_UNSUPPORTED)
|
||||
return format;
|
||||
struct anv_format format = anv_formats[vk_format];
|
||||
|
||||
if (aspect == VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
assert(vk_format_aspects(vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT);
|
||||
@@ -391,7 +400,7 @@ anv_physical_device_get_format_properties(struct anv_physical_device *physical_d
|
||||
gen += 5;
|
||||
|
||||
VkFormatFeatureFlags linear = 0, tiled = 0, buffer = 0;
|
||||
if (anv_formats[format].isl_format == ISL_FORMAT_UNSUPPORTED) {
|
||||
if (!format_supported(format)) {
|
||||
/* Nothing to do here */
|
||||
} else if (vk_format_is_depth_or_stencil(format)) {
|
||||
tiled |= VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT;
|
||||
@@ -489,7 +498,7 @@ anv_get_image_format_properties(
|
||||
uint32_t maxArraySize;
|
||||
VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
|
||||
|
||||
if (anv_formats[info->format].isl_format == ISL_FORMAT_UNSUPPORTED)
|
||||
if (!format_supported(info->format))
|
||||
goto unsupported;
|
||||
|
||||
anv_physical_device_get_format_properties(physical_device, info->format,
|
||||
|
@@ -953,8 +953,10 @@ brw_bo_map(struct brw_context *brw, struct brw_bo *bo, unsigned flags)
|
||||
* We skip MAP_RAW because we want to avoid map_gtt's fence detiling.
|
||||
*/
|
||||
if (!map && !(flags & MAP_RAW)) {
|
||||
perf_debug("Fallback GTT mapping for %s with access flags %x\n",
|
||||
bo->name, flags);
|
||||
if (brw) {
|
||||
perf_debug("Fallback GTT mapping for %s with access flags %x\n",
|
||||
bo->name, flags);
|
||||
}
|
||||
map = brw_bo_map_gtt(brw, bo, flags);
|
||||
}
|
||||
|
||||
|
@@ -1617,6 +1617,13 @@ enum brw_pixel_shader_coverage_mask_mode {
|
||||
# define GEN8_HIZ_PMA_MASK_BITS \
|
||||
REG_MASK(GEN8_HIZ_NP_PMA_FIX_ENABLE | GEN8_HIZ_NP_EARLY_Z_FAILS_DISABLE)
|
||||
|
||||
#define GEN7_GT_MODE 0x7008
|
||||
# define GEN9_SUBSLICE_HASHING_8x8 (0 << 8)
|
||||
# define GEN9_SUBSLICE_HASHING_16x4 (1 << 8)
|
||||
# define GEN9_SUBSLICE_HASHING_8x4 (2 << 8)
|
||||
# define GEN9_SUBSLICE_HASHING_16x16 (3 << 8)
|
||||
# define GEN9_SUBSLICE_HASHING_MASK_BITS REG_MASK(3 << 8)
|
||||
|
||||
/* Predicate registers */
|
||||
#define MI_PREDICATE_SRC0 0x2400
|
||||
#define MI_PREDICATE_SRC1 0x2408
|
||||
|
@@ -72,6 +72,15 @@ brw_upload_initial_gpu_state(struct brw_context *brw)
|
||||
GEN9_FLOAT_BLEND_OPTIMIZATION_ENABLE |
|
||||
GEN9_PARTIAL_RESOLVE_DISABLE_IN_VC);
|
||||
ADVANCE_BATCH();
|
||||
|
||||
if (brw->is_broxton) {
|
||||
BEGIN_BATCH(3);
|
||||
OUT_BATCH(MI_LOAD_REGISTER_IMM | (3 - 2));
|
||||
OUT_BATCH(GEN7_GT_MODE);
|
||||
OUT_BATCH(GEN9_SUBSLICE_HASHING_MASK_BITS |
|
||||
GEN9_SUBSLICE_HASHING_16x16);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
|
@@ -175,7 +175,7 @@ try_pbo_readpixels(struct st_context *st, struct st_renderbuffer *strb,
|
||||
|
||||
if (view_target != PIPE_TEXTURE_3D) {
|
||||
templ.u.tex.first_layer = surface->u.tex.first_layer;
|
||||
templ.u.tex.last_layer = templ.u.tex.last_layer;
|
||||
templ.u.tex.last_layer = templ.u.tex.first_layer;
|
||||
} else {
|
||||
addr.constants.layer_offset = surface->u.tex.first_layer;
|
||||
}
|
||||
|
@@ -198,9 +198,14 @@ st_draw_vbo(struct gl_context *ctx,
|
||||
|
||||
/* do actual drawing */
|
||||
for (i = 0; i < nr_prims; i++) {
|
||||
info.count = prims[i].count;
|
||||
|
||||
/* Skip no-op draw calls. */
|
||||
if (!info.count && !tfb_vertcount)
|
||||
continue;
|
||||
|
||||
info.mode = translate_prim(ctx, prims[i].mode);
|
||||
info.start = start + prims[i].start;
|
||||
info.count = prims[i].count;
|
||||
info.start_instance = prims[i].base_instance;
|
||||
info.instance_count = prims[i].num_instances;
|
||||
info.index_bias = prims[i].basevertex;
|
||||
|
@@ -1,479 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2015 Red Hat
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
* SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "st_nir.h"
|
||||
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
#include "program/program.h"
|
||||
#include "program/prog_statevars.h"
|
||||
#include "program/prog_parameter.h"
|
||||
#include "program/ir_to_mesa.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/errors.h"
|
||||
#include "main/shaderapi.h"
|
||||
#include "main/uniforms.h"
|
||||
#include "util/string_to_uint_map.h"
|
||||
|
||||
#include "st_context.h"
|
||||
#include "st_program.h"
|
||||
|
||||
#include "compiler/nir/nir.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "compiler/glsl/glsl_to_nir.h"
|
||||
#include "compiler/glsl/ir.h"
|
||||
|
||||
|
||||
static int
|
||||
type_size(const struct glsl_type *type)
|
||||
{
|
||||
return type->count_attribute_slots(false);
|
||||
}
|
||||
|
||||
/* Depending on PIPE_CAP_TGSI_TEXCOORD (st->needs_texcoord_semantic) we
|
||||
* may need to fix up varying slots so the glsl->nir path is aligned
|
||||
* with the anything->tgsi->nir path.
|
||||
*/
|
||||
static void
|
||||
st_nir_fixup_varying_slots(struct st_context *st, struct exec_list *var_list)
|
||||
{
|
||||
if (st->needs_texcoord_semantic)
|
||||
return;
|
||||
|
||||
nir_foreach_variable(var, var_list) {
|
||||
if (var->data.location >= VARYING_SLOT_VAR0) {
|
||||
var->data.location += 9;
|
||||
} else if ((var->data.location >= VARYING_SLOT_TEX0) &&
|
||||
(var->data.location <= VARYING_SLOT_TEX7)) {
|
||||
var->data.location += VARYING_SLOT_VAR0 - VARYING_SLOT_TEX0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* input location assignment for VS inputs must be handled specially, so
|
||||
* that it is aligned w/ st's vbo state.
|
||||
* (This isn't the case with, for ex, FS inputs, which only need to agree
|
||||
* on varying-slot w/ the VS outputs)
|
||||
*/
|
||||
static void
|
||||
st_nir_assign_vs_in_locations(struct gl_program *prog, nir_shader *nir)
|
||||
{
|
||||
unsigned attr, num_inputs = 0;
|
||||
unsigned input_to_index[VERT_ATTRIB_MAX] = {0};
|
||||
|
||||
/* TODO de-duplicate w/ similar code in st_translate_vertex_program()? */
|
||||
for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) {
|
||||
if ((prog->info.inputs_read & BITFIELD64_BIT(attr)) != 0) {
|
||||
input_to_index[attr] = num_inputs;
|
||||
num_inputs++;
|
||||
if ((prog->info.double_inputs_read & BITFIELD64_BIT(attr)) != 0) {
|
||||
/* add placeholder for second part of a double attribute */
|
||||
num_inputs++;
|
||||
}
|
||||
} else {
|
||||
input_to_index[attr] = ~0;
|
||||
}
|
||||
}
|
||||
|
||||
/* bit of a hack, mirroring st_translate_vertex_program */
|
||||
input_to_index[VERT_ATTRIB_EDGEFLAG] = num_inputs;
|
||||
|
||||
nir->num_inputs = 0;
|
||||
nir_foreach_variable_safe(var, &nir->inputs) {
|
||||
attr = var->data.location;
|
||||
assert(attr < ARRAY_SIZE(input_to_index));
|
||||
|
||||
if (input_to_index[attr] != ~0u) {
|
||||
var->data.driver_location = input_to_index[attr];
|
||||
nir->num_inputs++;
|
||||
} else {
|
||||
/* Move unused input variables to the globals list (with no
|
||||
* initialization), to avoid confusing drivers looking through the
|
||||
* inputs array and expecting to find inputs with a driver_location
|
||||
* set.
|
||||
*/
|
||||
exec_node_remove(&var->node);
|
||||
var->data.mode = nir_var_global;
|
||||
exec_list_push_tail(&nir->globals, &var->node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
st_nir_lookup_parameter_index(const struct gl_program_parameter_list *params,
|
||||
const char *name)
|
||||
{
|
||||
int loc = _mesa_lookup_parameter_index(params, name);
|
||||
|
||||
/* is there a better way to do this? If we have something like:
|
||||
*
|
||||
* struct S {
|
||||
* float f;
|
||||
* vec4 v;
|
||||
* };
|
||||
* uniform S color;
|
||||
*
|
||||
* Then what we get in prog->Parameters looks like:
|
||||
*
|
||||
* 0: Name=color.f, Type=6, DataType=1406, Size=1
|
||||
* 1: Name=color.v, Type=6, DataType=8b52, Size=4
|
||||
*
|
||||
* So the name doesn't match up and _mesa_lookup_parameter_index()
|
||||
* fails. In this case just find the first matching "color.*"..
|
||||
*
|
||||
* Note for arrays you could end up w/ color[n].f, for example.
|
||||
*
|
||||
* glsl_to_tgsi works slightly differently in this regard. It is
|
||||
* emitting something more low level, so it just translates the
|
||||
* params list 1:1 to CONST[] regs. Going from GLSL IR to TGSI,
|
||||
* it just calculates the additional offset of struct field members
|
||||
* in glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) or
|
||||
* glsl_to_tgsi_visitor::visit(ir_dereference_array *ir). It never
|
||||
* needs to work backwards to get base var loc from the param-list
|
||||
* which already has them separated out.
|
||||
*/
|
||||
if (loc < 0) {
|
||||
int namelen = strlen(name);
|
||||
for (unsigned i = 0; i < params->NumParameters; i++) {
|
||||
struct gl_program_parameter *p = ¶ms->Parameters[i];
|
||||
if ((strncmp(p->Name, name, namelen) == 0) &&
|
||||
((p->Name[namelen] == '.') || (p->Name[namelen] == '['))) {
|
||||
loc = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return loc;
|
||||
}
|
||||
|
||||
static void
|
||||
st_nir_assign_uniform_locations(struct gl_program *prog,
|
||||
struct gl_shader_program *shader_program,
|
||||
struct exec_list *uniform_list, unsigned *size)
|
||||
{
|
||||
int max = 0;
|
||||
int shaderidx = 0;
|
||||
|
||||
nir_foreach_variable(uniform, uniform_list) {
|
||||
int loc;
|
||||
|
||||
/*
|
||||
* UBO's have their own address spaces, so don't count them towards the
|
||||
* number of global uniforms
|
||||
*/
|
||||
if ((uniform->data.mode == nir_var_uniform || uniform->data.mode == nir_var_shader_storage) &&
|
||||
uniform->interface_type != NULL)
|
||||
continue;
|
||||
|
||||
if (uniform->type->is_sampler()) {
|
||||
unsigned val = 0;
|
||||
bool found = shader_program->UniformHash->get(val, uniform->name);
|
||||
loc = shaderidx++;
|
||||
assert(found);
|
||||
(void) found; /* silence unused var warning */
|
||||
/* this ensure that nir_lower_samplers looks at the correct
|
||||
* shader_program->UniformStorage[location]:
|
||||
*/
|
||||
uniform->data.location = val;
|
||||
} else if (strncmp(uniform->name, "gl_", 3) == 0) {
|
||||
const gl_state_index *const stateTokens = (gl_state_index *)uniform->state_slots[0].tokens;
|
||||
/* This state reference has already been setup by ir_to_mesa, but we'll
|
||||
* get the same index back here.
|
||||
*/
|
||||
loc = _mesa_add_state_reference(prog->Parameters, stateTokens);
|
||||
} else {
|
||||
loc = st_nir_lookup_parameter_index(prog->Parameters, uniform->name);
|
||||
}
|
||||
|
||||
uniform->data.driver_location = loc;
|
||||
|
||||
max = MAX2(max, loc + type_size(uniform->type));
|
||||
}
|
||||
*size = max;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
/* First half of converting glsl_to_nir.. this leaves things in a pre-
|
||||
* nir_lower_io state, so that shader variants can more easily insert/
|
||||
* replace variables, etc.
|
||||
*/
|
||||
nir_shader *
|
||||
st_glsl_to_nir(struct st_context *st, struct gl_program *prog,
|
||||
struct gl_shader_program *shader_program,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
struct pipe_screen *pscreen = st->pipe->screen;
|
||||
enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage);
|
||||
const nir_shader_compiler_options *options;
|
||||
nir_shader *nir;
|
||||
|
||||
assert(pscreen->get_compiler_options); /* drivers using NIR must implement this */
|
||||
|
||||
options = (const nir_shader_compiler_options *)
|
||||
pscreen->get_compiler_options(pscreen, PIPE_SHADER_IR_NIR, ptarget);
|
||||
assert(options);
|
||||
|
||||
if (prog->nir)
|
||||
return prog->nir;
|
||||
|
||||
nir = glsl_to_nir(shader_program, stage, options);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_io_to_temporaries,
|
||||
nir_shader_get_entrypoint(nir),
|
||||
true, true);
|
||||
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
||||
NIR_PASS_V(nir, nir_split_var_copies);
|
||||
NIR_PASS_V(nir, nir_lower_var_copies);
|
||||
NIR_PASS_V(nir, st_nir_lower_builtin);
|
||||
NIR_PASS_V(nir, nir_lower_atomics, shader_program);
|
||||
|
||||
/* fragment shaders may need : */
|
||||
if (stage == MESA_SHADER_FRAGMENT) {
|
||||
static const gl_state_index wposTransformState[STATE_LENGTH] = {
|
||||
STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
|
||||
};
|
||||
nir_lower_wpos_ytransform_options wpos_options = { { 0 } };
|
||||
struct pipe_screen *pscreen = st->pipe->screen;
|
||||
|
||||
memcpy(wpos_options.state_tokens, wposTransformState,
|
||||
sizeof(wpos_options.state_tokens));
|
||||
wpos_options.fs_coord_origin_upper_left =
|
||||
pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
|
||||
wpos_options.fs_coord_origin_lower_left =
|
||||
pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
|
||||
wpos_options.fs_coord_pixel_center_integer =
|
||||
pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
|
||||
wpos_options.fs_coord_pixel_center_half_integer =
|
||||
pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
|
||||
|
||||
if (nir_lower_wpos_ytransform(nir, &wpos_options)) {
|
||||
nir_validate_shader(nir);
|
||||
_mesa_add_state_reference(prog->Parameters, wposTransformState);
|
||||
}
|
||||
}
|
||||
|
||||
if (st->ctx->_Shader->Flags & GLSL_DUMP) {
|
||||
_mesa_log("\n");
|
||||
_mesa_log("NIR IR for linked %s program %d:\n",
|
||||
_mesa_shader_stage_to_string(stage),
|
||||
shader_program->Name);
|
||||
nir_print_shader(nir, _mesa_get_log_file());
|
||||
_mesa_log("\n\n");
|
||||
}
|
||||
|
||||
prog->nir = nir;
|
||||
|
||||
return nir;
|
||||
}
|
||||
|
||||
/* TODO any better helper somewhere to sort a list? */
|
||||
|
||||
static void
|
||||
insert_sorted(struct exec_list *var_list, nir_variable *new_var)
|
||||
{
|
||||
nir_foreach_variable(var, var_list) {
|
||||
if (var->data.location > new_var->data.location) {
|
||||
exec_node_insert_node_before(&var->node, &new_var->node);
|
||||
return;
|
||||
}
|
||||
}
|
||||
exec_list_push_tail(var_list, &new_var->node);
|
||||
}
|
||||
|
||||
static void
|
||||
sort_varyings(struct exec_list *var_list)
|
||||
{
|
||||
struct exec_list new_list;
|
||||
exec_list_make_empty(&new_list);
|
||||
nir_foreach_variable_safe(var, var_list) {
|
||||
exec_node_remove(&var->node);
|
||||
insert_sorted(&new_list, var);
|
||||
}
|
||||
exec_list_move_nodes_to(&new_list, var_list);
|
||||
}
|
||||
|
||||
/* Second half of preparing nir from glsl, which happens after shader
|
||||
* variant lowering.
|
||||
*/
|
||||
void
|
||||
st_finalize_nir(struct st_context *st, struct gl_program *prog, nir_shader *nir)
|
||||
{
|
||||
struct pipe_screen *screen = st->pipe->screen;
|
||||
|
||||
NIR_PASS_V(nir, nir_split_var_copies);
|
||||
NIR_PASS_V(nir, nir_lower_var_copies);
|
||||
NIR_PASS_V(nir, nir_lower_io_types);
|
||||
|
||||
if (nir->stage == MESA_SHADER_VERTEX) {
|
||||
/* Needs special handling so drvloc matches the vbo state: */
|
||||
st_nir_assign_vs_in_locations(prog, nir);
|
||||
/* Re-lower global vars, to deal with any dead VS inputs. */
|
||||
NIR_PASS_V(nir, nir_lower_global_vars_to_local);
|
||||
|
||||
sort_varyings(&nir->outputs);
|
||||
nir_assign_var_locations(&nir->outputs,
|
||||
&nir->num_outputs,
|
||||
type_size);
|
||||
st_nir_fixup_varying_slots(st, &nir->outputs);
|
||||
} else if (nir->stage == MESA_SHADER_FRAGMENT) {
|
||||
sort_varyings(&nir->inputs);
|
||||
nir_assign_var_locations(&nir->inputs,
|
||||
&nir->num_inputs,
|
||||
type_size);
|
||||
st_nir_fixup_varying_slots(st, &nir->inputs);
|
||||
nir_assign_var_locations(&nir->outputs,
|
||||
&nir->num_outputs,
|
||||
type_size);
|
||||
} else if (nir->stage == MESA_SHADER_COMPUTE) {
|
||||
/* TODO? */
|
||||
} else {
|
||||
unreachable("invalid shader type for tgsi bypass\n");
|
||||
}
|
||||
|
||||
struct gl_shader_program *shader_program;
|
||||
switch (nir->stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
shader_program = ((struct st_vertex_program *)prog)->shader_program;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
shader_program = ((struct st_fragment_program *)prog)->shader_program;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
shader_program = ((struct st_compute_program *)prog)->shader_program;
|
||||
break;
|
||||
default:
|
||||
assert(!"should not be reached");
|
||||
return;
|
||||
}
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_atomics_to_ssbo,
|
||||
st->ctx->Const.Program[nir->stage].MaxAtomicBuffers);
|
||||
|
||||
st_nir_assign_uniform_locations(prog, shader_program,
|
||||
&nir->uniforms, &nir->num_uniforms);
|
||||
|
||||
NIR_PASS_V(nir, nir_lower_system_values);
|
||||
|
||||
if (screen->get_param(screen, PIPE_CAP_NIR_SAMPLERS_AS_DEREF))
|
||||
NIR_PASS_V(nir, nir_lower_samplers_as_deref, shader_program);
|
||||
else
|
||||
NIR_PASS_V(nir, nir_lower_samplers, shader_program);
|
||||
}
|
||||
|
||||
struct gl_program *
|
||||
st_nir_get_mesa_program(struct gl_context *ctx,
|
||||
struct gl_shader_program *shader_program,
|
||||
struct gl_linked_shader *shader)
|
||||
{
|
||||
struct gl_program *prog;
|
||||
|
||||
validate_ir_tree(shader->ir);
|
||||
|
||||
prog = shader->Program;
|
||||
|
||||
prog->Parameters = _mesa_new_parameter_list();
|
||||
|
||||
do_set_program_inouts(shader->ir, prog, shader->Stage);
|
||||
|
||||
_mesa_copy_linked_program_data(shader_program, shader);
|
||||
_mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader,
|
||||
prog->Parameters);
|
||||
|
||||
/* Make a pass over the IR to add state references for any built-in
|
||||
* uniforms that are used. This has to be done now (during linking).
|
||||
* Code generation doesn't happen until the first time this shader is
|
||||
* used for rendering. Waiting until then to generate the parameters is
|
||||
* too late. At that point, the values for the built-in uniforms won't
|
||||
* get sent to the shader.
|
||||
*/
|
||||
foreach_in_list(ir_instruction, node, shader->ir) {
|
||||
ir_variable *var = node->as_variable();
|
||||
|
||||
if ((var == NULL) || (var->data.mode != ir_var_uniform) ||
|
||||
(strncmp(var->name, "gl_", 3) != 0))
|
||||
continue;
|
||||
|
||||
const ir_state_slot *const slots = var->get_state_slots();
|
||||
assert(slots != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < var->get_num_state_slots(); i++) {
|
||||
_mesa_add_state_reference(prog->Parameters,
|
||||
(gl_state_index *) slots[i].tokens);
|
||||
}
|
||||
}
|
||||
|
||||
if (ctx->_Shader->Flags & GLSL_DUMP) {
|
||||
_mesa_log("\n");
|
||||
_mesa_log("GLSL IR for linked %s program %d:\n",
|
||||
_mesa_shader_stage_to_string(shader->Stage),
|
||||
shader_program->Name);
|
||||
_mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
|
||||
_mesa_log("\n\n");
|
||||
}
|
||||
|
||||
prog->ShadowSamplers = shader->shadow_samplers;
|
||||
prog->ExternalSamplersUsed = gl_external_samplers(prog);
|
||||
_mesa_update_shader_textures_used(shader_program, prog);
|
||||
|
||||
/* Avoid reallocation of the program parameter list, because the uniform
|
||||
* storage is only associated with the original parameter list.
|
||||
* This should be enough for Bitmap and DrawPixels constants.
|
||||
*/
|
||||
_mesa_reserve_parameter_storage(prog->Parameters, 8);
|
||||
|
||||
/* This has to be done last. Any operation the can cause
|
||||
* prog->ParameterValues to get reallocated (e.g., anything that adds a
|
||||
* program constant) has to happen before creating this linkage.
|
||||
*/
|
||||
_mesa_associate_uniform_storage(ctx, shader_program, prog, true);
|
||||
|
||||
struct st_vertex_program *stvp;
|
||||
struct st_fragment_program *stfp;
|
||||
struct st_compute_program *stcp;
|
||||
|
||||
switch (shader->Stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
stvp = (struct st_vertex_program *)prog;
|
||||
stvp->shader_program = shader_program;
|
||||
break;
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
stfp = (struct st_fragment_program *)prog;
|
||||
stfp->shader_program = shader_program;
|
||||
break;
|
||||
case MESA_SHADER_COMPUTE:
|
||||
stcp = (struct st_compute_program *)prog;
|
||||
stcp->shader_program = shader_program;
|
||||
break;
|
||||
default:
|
||||
assert(!"should not be reached");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return prog;
|
||||
}
|
||||
|
||||
} /* extern "C" */
|
@@ -2843,7 +2843,7 @@ glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
|
||||
ir->array->accept(this);
|
||||
src = this->result;
|
||||
|
||||
if (ir->array->ir_type != ir_type_dereference_array) {
|
||||
if (!src.has_index2) {
|
||||
switch (this->prog->Target) {
|
||||
case GL_TESS_CONTROL_PROGRAM_NV:
|
||||
is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) &&
|
||||
|
@@ -245,6 +245,7 @@ vbo_get_minmax_index(struct gl_context *ctx,
|
||||
_mesa_primitive_restart_index(ctx, ib->index_size);
|
||||
const char *indices;
|
||||
GLuint i;
|
||||
GLintptr offset = 0;
|
||||
|
||||
indices = (char *) ib->ptr + prim->start * ib->index_size;
|
||||
if (_mesa_is_bufferobj(ib->obj)) {
|
||||
@@ -254,7 +255,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
|
||||
count, min_index, max_index))
|
||||
return;
|
||||
|
||||
indices = ctx->Driver.MapBufferRange(ctx, (GLintptr) indices, size,
|
||||
offset = (GLintptr) indices;
|
||||
indices = ctx->Driver.MapBufferRange(ctx, offset, size,
|
||||
GL_MAP_READ_BIT, ib->obj,
|
||||
MAP_INTERNAL);
|
||||
}
|
||||
@@ -337,8 +339,8 @@ vbo_get_minmax_index(struct gl_context *ctx,
|
||||
}
|
||||
|
||||
if (_mesa_is_bufferobj(ib->obj)) {
|
||||
vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, prim->start, count,
|
||||
*min_index, *max_index);
|
||||
vbo_minmax_cache_store(ctx, ib->obj, ib->index_size, offset,
|
||||
count, *min_index, *max_index);
|
||||
ctx->Driver.UnmapBuffer(ctx, ib->obj, MAP_INTERNAL);
|
||||
}
|
||||
}
|
||||
|
@@ -26,12 +26,12 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#ifdef _GNU_SOURCE
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
|
||||
#include <locale.h>
|
||||
#ifdef HAVE_XLOCALE_H
|
||||
#include <xlocale.h>
|
||||
static locale_t loc;
|
||||
#endif
|
||||
static locale_t loc;
|
||||
#endif
|
||||
|
||||
#include "strtod.h"
|
||||
@@ -40,7 +40,7 @@ static locale_t loc;
|
||||
void
|
||||
_mesa_locale_init(void)
|
||||
{
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
|
||||
loc = newlocale(LC_CTYPE_MASK, "C", NULL);
|
||||
#endif
|
||||
}
|
||||
@@ -48,7 +48,7 @@ _mesa_locale_init(void)
|
||||
void
|
||||
_mesa_locale_fini(void)
|
||||
{
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
|
||||
freelocale(loc);
|
||||
#endif
|
||||
}
|
||||
@@ -60,7 +60,7 @@ _mesa_locale_fini(void)
|
||||
double
|
||||
_mesa_strtod(const char *s, char **end)
|
||||
{
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
|
||||
return strtod_l(s, end, loc);
|
||||
#else
|
||||
return strtod(s, end);
|
||||
@@ -75,7 +75,7 @@ _mesa_strtod(const char *s, char **end)
|
||||
float
|
||||
_mesa_strtof(const char *s, char **end)
|
||||
{
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_XLOCALE_H)
|
||||
#if defined(_GNU_SOURCE) && defined(HAVE_STRTOD_L)
|
||||
return strtof_l(s, end, loc);
|
||||
#elif defined(HAVE_STRTOF)
|
||||
return strtof(s, end);
|
||||
|
Reference in New Issue
Block a user