Compare commits
82 Commits
mesa-18.1.
...
mesa-18.1.
Author | SHA1 | Date | |
---|---|---|---|
|
f7e89b2f48 | ||
|
765fdbe2f8 | ||
|
ed54f93d91 | ||
|
07049c0b67 | ||
|
696be22905 | ||
|
bc67499beb | ||
|
d96eecbdd7 | ||
|
915d9166bf | ||
|
f0f66ee4ba | ||
|
0747f76b85 | ||
|
f77cae2c59 | ||
|
abe65eb58f | ||
|
6a1ef7ccb8 | ||
|
b5f154a860 | ||
|
fc0d0ad019 | ||
|
dd14a0f3e1 | ||
|
2a0015de3f | ||
|
3c1eaa596f | ||
|
564c882021 | ||
|
decb031bd8 | ||
|
e979b79cec | ||
|
19655023a9 | ||
|
4c07e44ae5 | ||
|
7540acb137 | ||
|
6bc8fcbc94 | ||
|
9b8c90fc67 | ||
|
c86a99adef | ||
|
a4609fe84f | ||
|
f3ec346ab1 | ||
|
ae39496831 | ||
|
2008ca24d7 | ||
|
87453e9fe1 | ||
|
156d0230a5 | ||
|
49f43bdcbf | ||
|
66bc41a3f7 | ||
|
b274062906 | ||
|
888b7fcaf4 | ||
|
c41a74622d | ||
|
dd355ee90e | ||
|
aa1f6934f8 | ||
|
87fb8bf6a8 | ||
|
b9725e3aa4 | ||
|
54b38ea495 | ||
|
f9500edb96 | ||
|
c001ecebe6 | ||
|
d9c527942f | ||
|
31c70b88a7 | ||
|
5a3107b7ec | ||
|
5bf748045c | ||
|
70e56e8ea3 | ||
|
0de0b5c406 | ||
|
98dfe161b0 | ||
|
bd60b6ef54 | ||
|
0376e1a79a | ||
|
dd173da7a2 | ||
|
d6533c02d0 | ||
|
3737e4df7c | ||
|
ab9e4f9733 | ||
|
5fb59f4686 | ||
|
47e22895eb | ||
|
c2e44a3540 | ||
|
0046ee83d7 | ||
|
cc4743a518 | ||
|
8b76c603dd | ||
|
3a85a6180b | ||
|
f5865f1f58 | ||
|
b7c27dc7a3 | ||
|
d6aacb54b5 | ||
|
f9bff89350 | ||
|
4599cebfb3 | ||
|
371e5bd93b | ||
|
c317fef670 | ||
|
96cefc2cbc | ||
|
8952c4370d | ||
|
1a2205636c | ||
|
44d164537f | ||
|
f0ad21dfce | ||
|
0dd373e841 | ||
|
d6245b0a38 | ||
|
0d0135276f | ||
|
675495b575 | ||
|
85d02bcf76 |
32
bin/.cherry-ignore
Normal file
32
bin/.cherry-ignore
Normal file
@@ -0,0 +1,32 @@
|
||||
d89f58a6b8436b59dcf3b896c0ccddabed3f78fd
|
||||
a7d0c53ab89ca86b705014925214101f5bc4187f
|
||||
|
||||
# These patches are ignored becuase Jason provided a version rebased on the 18.1
|
||||
# branch that was pulled instead
|
||||
#
|
||||
778e2881a04817e8c10c7a400bf1e37414420194
|
||||
3b54dd87f707a0fa40a1555bee64aeb06a381c27
|
||||
eeae4851494c16d2a6591550bfa6ef77d887ebe3
|
||||
a26693493570a9d0f0fba1be617e01ee7bfff4db
|
||||
0e7f3febf7e739c075a139ae641d65a0618752f3
|
||||
|
||||
# This has a warning that it fixes more than one commit, but isn't needed in
|
||||
# 18.1
|
||||
#
|
||||
a1220e73116bad74f39c1792a0b0cf0e4e5031db
|
||||
|
||||
# This doesn't apply and isn't necessary since
|
||||
# 1cc2e0cc6b47bd5efbf2af266405060785085e6b isn't in the 18.1 branch
|
||||
#
|
||||
587e712eda95c31d88ea9d20e59ad0ae59afef4f
|
||||
|
||||
# This requires too many previous patch, and Marek (the author) decided to
|
||||
# to drop it from stable
|
||||
#
|
||||
cac7ab1192eefdd8d8b3f25053fb006b5c330eb8
|
||||
|
||||
# This patch is excluded since it requires additional patches to be pulled,
|
||||
# and is mainly aimed at developers, who rarely (if ever) work in the
|
||||
# stable branch
|
||||
#
|
||||
a2f5292c82ad07731d633b36a663e46adc181db9
|
57
configure.ac
57
configure.ac
@@ -433,26 +433,40 @@ fi
|
||||
AM_CONDITIONAL([SSE41_SUPPORTED], [test x$SSE41_SUPPORTED = x1])
|
||||
AC_SUBST([SSE41_CFLAGS], $SSE41_CFLAGS)
|
||||
|
||||
dnl Check for new-style atomic builtins
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
dnl Check for new-style atomic builtins. We first check without linking to
|
||||
dnl -latomic.
|
||||
AC_MSG_CHECKING(whether __atomic_load_n is supported)
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[
|
||||
#include <stdint.h>
|
||||
int main() {
|
||||
int n;
|
||||
return __atomic_load_n(&n, __ATOMIC_ACQUIRE);
|
||||
}]])], GCC_ATOMIC_BUILTINS_SUPPORTED=1)
|
||||
if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
|
||||
struct {
|
||||
uint64_t *v;
|
||||
} x;
|
||||
return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE) &
|
||||
(int)__atomic_add_fetch(x.v, (uint64_t)1, __ATOMIC_ACQ_REL);
|
||||
}]])], GCC_ATOMIC_BUILTINS_SUPPORTED=yes, GCC_ATOMIC_BUILTINS_SUPPORTED=no)
|
||||
|
||||
dnl If that didn't work, we try linking with -latomic, which is needed on some
|
||||
dnl platforms.
|
||||
if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" != xyes; then
|
||||
save_LDFLAGS=$LDFLAGS
|
||||
LDFLAGS="$LDFLAGS -latomic"
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[
|
||||
#include <stdint.h>
|
||||
int main() {
|
||||
struct {
|
||||
uint64_t *v;
|
||||
} x;
|
||||
return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE) &
|
||||
(int)__atomic_add_fetch(x.v, (uint64_t)1, __ATOMIC_ACQ_REL);
|
||||
}]])], GCC_ATOMIC_BUILTINS_SUPPORTED=yes LIBATOMIC_LIBS="-latomic",
|
||||
GCC_ATOMIC_BUILTINS_SUPPORTED=no)
|
||||
LDFLAGS=$save_LDFLAGS
|
||||
fi
|
||||
AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_SUPPORTED)
|
||||
|
||||
if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = xyes; then
|
||||
DEFINES="$DEFINES -DUSE_GCC_ATOMIC_BUILTINS"
|
||||
dnl On some platforms, new-style atomics need a helper library
|
||||
AC_MSG_CHECKING(whether -latomic is needed)
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[
|
||||
#include <stdint.h>
|
||||
uint64_t v;
|
||||
int main() {
|
||||
return (int)__atomic_load_n(&v, __ATOMIC_ACQUIRE);
|
||||
}]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes)
|
||||
AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC)
|
||||
if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then
|
||||
LIBATOMIC_LIBS="-latomic"
|
||||
fi
|
||||
fi
|
||||
AC_SUBST([LIBATOMIC_LIBS])
|
||||
|
||||
@@ -2085,6 +2099,9 @@ if test -n "$with_vulkan_drivers"; then
|
||||
PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
|
||||
radeon_llvm_check $LLVM_REQUIRED_RADV "radv"
|
||||
require_x11_dri3 "radv"
|
||||
if test "x$acv_mako_found" = xno; then
|
||||
AC_MSG_ERROR([Python mako module v$PYTHON_MAKO_REQUIRED or higher not found])
|
||||
fi
|
||||
HAVE_RADEON_VULKAN=yes
|
||||
;;
|
||||
*)
|
||||
@@ -2202,13 +2219,13 @@ else
|
||||
have_vdpau_platform=no
|
||||
fi
|
||||
|
||||
if echo $platforms | grep -q "x11\|drm"; then
|
||||
if echo $platforms | egrep -q "x11|drm"; then
|
||||
have_omx_platform=yes
|
||||
else
|
||||
have_omx_platform=no
|
||||
fi
|
||||
|
||||
if echo $platforms | grep -q "x11\|drm\|wayland"; then
|
||||
if echo $platforms | egrep -q "x11|drm|wayland"; then
|
||||
have_va_platform=yes
|
||||
else
|
||||
have_va_platform=no
|
||||
|
@@ -31,7 +31,8 @@ Compatibility contexts may report a lower version depending on each driver.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
366a35f7530a016f2a8284fb0ee5759eeb216b4d6fa47f0e96b89ad2e43faf96 mesa-18.1.1.tar.gz
|
||||
d3312a2ede5aac14a47476b208b8e3a401367838330197c4588ab8ad420d7781 mesa-18.1.1.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
170
docs/relnotes/18.1.2.html
Normal file
170
docs/relnotes/18.1.2.html
Normal file
@@ -0,0 +1,170 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 18.1.2 Release Notes / June 15 2018</h1>
|
||||
|
||||
<p>
|
||||
Mesa 18.1.2 is a bug fix release which fixes bugs found since the 18.1.1 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 18.1.2 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation.
|
||||
Compatibility contexts may report a lower version depending on each driver.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
a644df23937f4078a2bd9a54349f6315c1955f5e3a4ac272832da51dea4d3c11 mesa-18.1.1.tar.gz
|
||||
070bf0648ba5b242d7303ceed32aed80842f4c0ba16e5acc1a650a46eadfb1f9 mesa-18.1.1.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>None<p>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Alex Smith (4):</p>
|
||||
<ul>
|
||||
<li>radv: Consolidate GFX9 merged shader lookup logic</li>
|
||||
<li>radv: Handle GFX9 merged shaders in radv_flush_constants()</li>
|
||||
<li>radeonsi: Fix crash on shaders using MSAA image load/store</li>
|
||||
<li>radv: Set active_stages the same whether or not shaders were cached</li>
|
||||
</ul>
|
||||
|
||||
<p>Andrew Galante (2):</p>
|
||||
<ul>
|
||||
<li>meson: Test for __atomic_add_fetch in atomic checks</li>
|
||||
<li>configure.ac: Test for __atomic_add_fetch in atomic checks</li>
|
||||
</ul>
|
||||
|
||||
<p>Bas Nieuwenhuizen (1):</p>
|
||||
<ul>
|
||||
<li>radv: Don't pass a TESS_EVAL shader when tesselation is not enabled.</li>
|
||||
</ul>
|
||||
|
||||
<p>Cameron Kumar (1):</p>
|
||||
<ul>
|
||||
<li>vulkan/wsi: Destroy swapchain images after terminating FIFO queues</li>
|
||||
</ul>
|
||||
|
||||
<p>Dylan Baker (6):</p>
|
||||
<ul>
|
||||
<li>docs/relnotes: Add sha256 sums for mesa 18.1.1</li>
|
||||
<li>cherry-ignore: add commits not to pull</li>
|
||||
<li>cherry-ignore: Add patches from Jason that he rebased on 18.1</li>
|
||||
<li>meson: work around gentoo applying -m32 to host compiler in cross builds</li>
|
||||
<li>cherry-ignore: Add another patch</li>
|
||||
<li>version: bump version for 18.1.2 release</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (3):</p>
|
||||
<ul>
|
||||
<li>autotools: add missing android file to package</li>
|
||||
<li>configure: radv depends on mako</li>
|
||||
<li>i965: fix resource leak</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (10):</p>
|
||||
<ul>
|
||||
<li>intel/eu: Add some brw_get_default_ helpers</li>
|
||||
<li>intel/eu: Copy fields manually in brw_next_insn</li>
|
||||
<li>intel/eu: Set flag [sub]register number differently for 3src</li>
|
||||
<li>intel/blorp: Don't vertex fetch directly from clear values</li>
|
||||
<li>intel/isl: Add bounds-checking assertions in isl_format_get_layout</li>
|
||||
<li>intel/isl: Add bounds-checking assertions for the format_info table</li>
|
||||
<li>i965/screen: Refactor query_dma_buf_formats</li>
|
||||
<li>i965/screen: Use RGBA non-sRGB formats for images</li>
|
||||
<li>anv: Set fence/semaphore types to NONE in impl_cleanup</li>
|
||||
<li>i965/screen: Return false for unsupported formats in query_modifiers</li>
|
||||
</ul>
|
||||
|
||||
<p>Jordan Justen (1):</p>
|
||||
<ul>
|
||||
<li>mesa/program_binary: add implicit UseProgram after successful ProgramBinary</li>
|
||||
</ul>
|
||||
|
||||
<p>Juan A. Suarez Romero (1):</p>
|
||||
<ul>
|
||||
<li>glsl: Add ir_binop_vector_extract in NIR</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>i965: Fix batch-last mode to properly swap BOs.</li>
|
||||
<li>anv: Disable __gen_validate_value if NDEBUG is set.</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (1):</p>
|
||||
<ul>
|
||||
<li>r300g/swtcl: make pipe_context uploaders use malloc'd memory as before</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Turner (1):</p>
|
||||
<ul>
|
||||
<li>meson: Fix -latomic check</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>glx: Fix number of property values to read in glXImportContextEXT</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolas Boichat (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac/meson.build: Fix -latomic test</li>
|
||||
</ul>
|
||||
|
||||
<p>Philip Rebohle (1):</p>
|
||||
<ul>
|
||||
<li>radv: Use correct color format for fast clears</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (3):</p>
|
||||
<ul>
|
||||
<li>radv: fix a GPU hang when MRTs are sparse</li>
|
||||
<li>radv: fix missing ZRANGE_PRECISION(1) for GFX9+</li>
|
||||
<li>radv: add a workaround for DXVK hangs by setting amdgpu-skip-threshold</li>
|
||||
</ul>
|
||||
|
||||
<p>Scott D Phillips (1):</p>
|
||||
<ul>
|
||||
<li>intel/tools: add intel_sanitize_gpu to EXTRA_DIST</li>
|
||||
</ul>
|
||||
|
||||
<p>Thomas Petazzoni (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: rework -latomic check</li>
|
||||
</ul>
|
||||
|
||||
<p>Timothy Arceri (2):</p>
|
||||
<ul>
|
||||
<li>ac: fix possible truncation of intrinsic name</li>
|
||||
<li>radeonsi: fix possible truncation on renderer string</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
167
docs/relnotes/18.1.3.html
Normal file
167
docs/relnotes/18.1.3.html
Normal file
@@ -0,0 +1,167 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 18.1.3 Release Notes / June 29 2018</h1>
|
||||
|
||||
<p>
|
||||
Mesa 18.1.3 is a bug fix release which fixes bugs found since the 18.1.2 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 18.1.2 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation.
|
||||
Compatibility contexts may report a lower version depending on each driver.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD mesa-18.1.3.tar.gz
|
||||
TBD mesa-18.1.3.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105396">Bug 105396</a> - tc compatible htile sets depth of htiles of discarded fragments to 1.0</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105399">Bug 105399</a> - [snb] GPU hang: after geometry shader emits no geometry, the program hangs</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106756">Bug 106756</a> - Wine 3.9 crashes with DXVK on Just Cause 3 and Quantum Break on VEGA but works ON POLARIS</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106774">Bug 106774</a> - GLSL IR copy propagates loads of SSBOs</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106903">Bug 106903</a> - radv: Fragment shader output goes to wrong attachments when render targets are sparse</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106907">Bug 106907</a> - Correct Transform Feedback Varyings information is expected after using ProgramBinary</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106912">Bug 106912</a> - radv: 16-bit depth buffer causes artifacts in Shadow Warrior 2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106980">Bug 106980</a> - Basemark GPU vulkan benchmark fails.</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
<p>Andrii Simiklit (1):</p>
|
||||
<ul>
|
||||
<li>i965/gen6/gs: Handle case where a GS doesn't allocate VUE</li>
|
||||
</ul>
|
||||
|
||||
<p>Bas Nieuwenhuizen (2):</p>
|
||||
<ul>
|
||||
<li>radv: Fix output for sparse MRTs.</li>
|
||||
<li>ac/surface: Set compressZ for stencil-only surfaces.</li>
|
||||
</ul>
|
||||
|
||||
<p>Christian Gmeiner (1):</p>
|
||||
<ul>
|
||||
<li>util/bitset: include util/macro.h</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (1):</p>
|
||||
<ul>
|
||||
<li>glsl: allow standalone semicolons outside main()</li>
|
||||
</ul>
|
||||
|
||||
<p>Dylan Baker (8):</p>
|
||||
<ul>
|
||||
<li>docs: Add release notes for 18.1.2</li>
|
||||
<li>cherry-ignore: Add 587e712eda95c31d88ea9d20e59ad0ae59afef4f</li>
|
||||
<li>meson: Fix auto option for va</li>
|
||||
<li>meson: Fix auto option for xvmc</li>
|
||||
<li>meson: Correct behavior of vdpau=auto</li>
|
||||
<li>cherry-ignore: Ignore cac7ab1192eefdd8d8b3f25053fb006b5c330eb8</li>
|
||||
<li>cherry-ignore: add a2f5292c82ad07731d633b36a663e46adc181db9</li>
|
||||
<li>VERSION: bump version to 18.1.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (2):</p>
|
||||
<ul>
|
||||
<li>configure: use compliant grep regex checks</li>
|
||||
<li>glsl/tests/glcpp: reinstate "error out if no tests found"</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (3):</p>
|
||||
<ul>
|
||||
<li>radv: fix reported number of available VGPRs</li>
|
||||
<li>radv: fix bitwise check</li>
|
||||
<li>meson: fix i965/anv/isl genX static lib names</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (2):</p>
|
||||
<ul>
|
||||
<li>glsl: Don't copy propagate from SSBO or shared variables either</li>
|
||||
<li>glsl: Don't copy propagate elements from SSBO or shared variables either</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (2):</p>
|
||||
<ul>
|
||||
<li>nir: Handle call instructions in foreach_src</li>
|
||||
<li>nir/validate: Use the type from the tail of call parameter derefs</li>
|
||||
</ul>
|
||||
|
||||
<p>Lukas Rusak (2):</p>
|
||||
<ul>
|
||||
<li>meson: only build vl_winsys_dri.c when x11 platform is used</li>
|
||||
<li>meson: fix private libs when building without glx</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (5):</p>
|
||||
<ul>
|
||||
<li>radeonsi/gfx9: fix si_get_buffer_from_descriptors for 48-bit pointers</li>
|
||||
<li>ac/gpu_info: report real total memory sizes</li>
|
||||
<li>ac/gpu_info: add kernel_flushes_hdp_before_ib</li>
|
||||
<li>radeonsi: always put persistent buffers into GTT on radeon</li>
|
||||
<li>mesa: fix glGetInteger64v for arrays of integers</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (1):</p>
|
||||
<ul>
|
||||
<li>freedreno/ir3: fix base_vertex</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (6):</p>
|
||||
<ul>
|
||||
<li>radv: don't fast clear HTILE for 16-bit depth surfaces on GFX8</li>
|
||||
<li>radv: update the ZRANGE_PRECISION value for the TC-compat bug</li>
|
||||
<li>radv: fix emitting the TCS regs on GFX9</li>
|
||||
<li>radv: fix HTILE metadata initialization in presence of subpass clears</li>
|
||||
<li>radv: ignore pInheritanceInfo for primary command buffers</li>
|
||||
<li>radv: use separate bind points for the dynamic buffers</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (1):</p>
|
||||
<ul>
|
||||
<li>glsl: serialize data from glTransformFeedbackVaryings</li>
|
||||
</ul>
|
||||
|
||||
<p>Tomeu Vizoso (1):</p>
|
||||
<ul>
|
||||
<li>virgl: Remove debugging left-overs</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
78
meson.build
78
meson.build
@@ -420,16 +420,17 @@ elif not (with_gallium_r300 or with_gallium_r600 or with_gallium_radeonsi or
|
||||
else
|
||||
_vdpau = 'false'
|
||||
endif
|
||||
elif _vdpau == 'auto'
|
||||
_vdpau = 'true'
|
||||
endif
|
||||
with_gallium_vdpau = _vdpau == 'true'
|
||||
dep_vdpau = null_dep
|
||||
if with_gallium_vdpau
|
||||
dep_vdpau = dependency('vdpau', version : '>= 1.1')
|
||||
dep_vdpau = declare_dependency(
|
||||
compile_args : run_command(prog_pkgconfig, ['vdpau', '--cflags']).stdout().split()
|
||||
)
|
||||
with_gallium_vdpau = false
|
||||
if _vdpau != 'false'
|
||||
dep_vdpau = dependency('vdpau', version : '>= 1.1', required : _vdpau == 'true')
|
||||
if dep_vdpau.found()
|
||||
dep_vdpau = declare_dependency(
|
||||
compile_args : run_command(prog_pkgconfig, ['vdpau', '--cflags']).stdout().split()
|
||||
)
|
||||
with_gallium_vdpau = true
|
||||
endif
|
||||
endif
|
||||
|
||||
if with_gallium_vdpau
|
||||
@@ -459,13 +460,12 @@ elif not (with_gallium_r600 or with_gallium_nouveau)
|
||||
else
|
||||
_xvmc = 'false'
|
||||
endif
|
||||
elif _xvmc == 'auto'
|
||||
_xvmc = 'true'
|
||||
endif
|
||||
with_gallium_xvmc = _xvmc == 'true'
|
||||
dep_xvmc = null_dep
|
||||
if with_gallium_xvmc
|
||||
dep_xvmc = dependency('xvmc', version : '>= 1.0.6')
|
||||
with_gallium_xvmc = false
|
||||
if _xvmc != 'false'
|
||||
dep_xvmc = dependency('xvmc', version : '>= 1.0.6', required : _xvmc == 'true')
|
||||
with_gallium_xvmc = dep_xvmc.found()
|
||||
endif
|
||||
|
||||
xvmc_drivers_path = get_option('xvmc-libs-path')
|
||||
@@ -581,13 +581,16 @@ elif not (with_gallium_r600 or with_gallium_radeonsi or with_gallium_nouveau)
|
||||
elif _va == 'auto'
|
||||
_va = 'true'
|
||||
endif
|
||||
with_gallium_va = _va == 'true'
|
||||
with_gallium_va = false
|
||||
dep_va = null_dep
|
||||
if with_gallium_va
|
||||
dep_va = dependency('libva', version : '>= 0.39.0')
|
||||
dep_va_headers = declare_dependency(
|
||||
compile_args : run_command(prog_pkgconfig, ['libva', '--cflags']).stdout().split()
|
||||
)
|
||||
if _va != 'false'
|
||||
dep_va = dependency('libva', version : '>= 0.38.0', required : _va == 'true')
|
||||
if dep_va.found()
|
||||
dep_va_headers = declare_dependency(
|
||||
compile_args : run_command(prog_pkgconfig, ['libva', '--cflags']).stdout().split()
|
||||
)
|
||||
with_gallium_va = true
|
||||
endif
|
||||
endif
|
||||
|
||||
va_drivers_path = get_option('va-libs-path')
|
||||
@@ -836,7 +839,15 @@ endif
|
||||
# Check for GCC style atomics
|
||||
dep_atomic = null_dep
|
||||
|
||||
if cc.compiles('int main() { int n; return __atomic_load_n(&n, __ATOMIC_ACQUIRE); }',
|
||||
if cc.compiles('''#include <stdint.h>
|
||||
int main() {
|
||||
struct {
|
||||
uint64_t *v;
|
||||
} x;
|
||||
return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE) &
|
||||
(int)__atomic_add_fetch(x.v, (uint64_t)1, __ATOMIC_ACQ_REL);
|
||||
|
||||
}''',
|
||||
name : 'GCC atomic builtins')
|
||||
pre_args += '-DUSE_GCC_ATOMIC_BUILTINS'
|
||||
|
||||
@@ -848,8 +859,11 @@ if cc.compiles('int main() { int n; return __atomic_load_n(&n, __ATOMIC_ACQUIRE)
|
||||
# as ARM.
|
||||
if not cc.links('''#include <stdint.h>
|
||||
int main() {
|
||||
uint64_t n;
|
||||
return (int)__atomic_load_n(&n, __ATOMIC_ACQUIRE);
|
||||
struct {
|
||||
uint64_t *v;
|
||||
} x;
|
||||
return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE) &
|
||||
(int)__atomic_add_fetch(x.v, (uint64_t)1, __ATOMIC_ACQ_REL);
|
||||
}''',
|
||||
name : 'GCC atomic builtins required -latomic')
|
||||
dep_atomic = cc.find_library('atomic')
|
||||
@@ -873,7 +887,7 @@ endif
|
||||
# TODO: it should be possible to use an exe_wrapper to run the binary during
|
||||
# the build.
|
||||
if meson.is_cross_build()
|
||||
if not (build_machine.cpu_family() == 'x86_64' and host_machine.cpu_family() == 'x86'
|
||||
if not (build_machine.cpu_family().startswith('x86') and host_machine.cpu_family() == 'x86'
|
||||
and build_machine.system() == host_machine.system())
|
||||
message('Cross compiling to x86 from non-x86, disabling asm')
|
||||
with_asm = false
|
||||
@@ -1329,18 +1343,24 @@ endforeach
|
||||
|
||||
inc_include = include_directories('include')
|
||||
|
||||
gl_priv_reqs = [
|
||||
'x11', 'xext', 'xdamage >= 1.1', 'xfixes', 'x11-xcb', 'xcb',
|
||||
'xcb-glx >= 1.8.1']
|
||||
gl_priv_reqs = []
|
||||
|
||||
if with_glx == 'xlib' or with_glx == 'gallium-xlib'
|
||||
gl_priv_reqs += ['x11', 'xext', 'xcb']
|
||||
elif with_glx == 'dri'
|
||||
gl_priv_reqs += [
|
||||
'x11', 'xext', 'xdamage >= 1.1', 'xfixes', 'x11-xcb', 'xcb',
|
||||
'xcb-glx >= 1.8.1']
|
||||
if with_dri_platform == 'drm'
|
||||
gl_priv_reqs += 'xcb-dri2 >= 1.8'
|
||||
endif
|
||||
endif
|
||||
if dep_libdrm.found()
|
||||
gl_priv_reqs += 'libdrm >= 2.4.75'
|
||||
endif
|
||||
if dep_xxf86vm.found()
|
||||
gl_priv_reqs += 'xxf86vm'
|
||||
endif
|
||||
if with_dri_platform == 'drm'
|
||||
gl_priv_reqs += 'xcb-dri2 >= 1.8'
|
||||
endif
|
||||
|
||||
gl_priv_libs = []
|
||||
if dep_thread.found()
|
||||
|
@@ -97,7 +97,6 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
||||
struct amdgpu_gpu_info *amdinfo)
|
||||
{
|
||||
struct amdgpu_buffer_size_alignments alignment_info = {};
|
||||
struct amdgpu_heap_info vram, vram_vis, gtt;
|
||||
struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {};
|
||||
struct drm_amdgpu_info_hw_ip uvd_enc = {}, vce = {}, vcn_dec = {};
|
||||
struct drm_amdgpu_info_hw_ip vcn_enc = {}, gfx = {};
|
||||
@@ -131,26 +130,6 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
||||
return false;
|
||||
}
|
||||
|
||||
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
|
||||
&vram_vis);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, &dma);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
|
||||
@@ -255,6 +234,60 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info->drm_minor >= 9) {
|
||||
struct drm_amdgpu_memory_info meminfo;
|
||||
|
||||
r = amdgpu_query_info(dev, AMDGPU_INFO_MEMORY, sizeof(meminfo), &meminfo);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_info(memory) failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Note: usable_heap_size values can be random and can't be relied on. */
|
||||
info->gart_size = meminfo.gtt.total_heap_size;
|
||||
info->vram_size = meminfo.vram.total_heap_size;
|
||||
info->vram_vis_size = meminfo.cpu_accessible_vram.total_heap_size;
|
||||
|
||||
info->max_alloc_size = MAX2(meminfo.vram.max_allocation,
|
||||
meminfo.gtt.max_allocation);
|
||||
} else {
|
||||
/* This is a deprecated interface, which reports usable sizes
|
||||
* (total minus pinned), but the pinned size computation is
|
||||
* buggy, so the values returned from these functions can be
|
||||
* random.
|
||||
*/
|
||||
struct amdgpu_heap_info vram, vram_vis, gtt;
|
||||
|
||||
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM,
|
||||
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
|
||||
&vram_vis);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, >t);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
info->gart_size = gtt.heap_size;
|
||||
info->vram_size = vram.heap_size;
|
||||
info->vram_vis_size = vram_vis.heap_size;
|
||||
|
||||
/* The kernel can split large buffers in VRAM but not in GTT, so large
|
||||
* allocations can fail or cause buffer movement failures in the kernel.
|
||||
*/
|
||||
info->max_alloc_size = MAX2(info->vram_size * 0.9, info->gart_size * 0.7);
|
||||
}
|
||||
|
||||
/* Set chip identification. */
|
||||
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
|
||||
info->vce_harvest_config = amdinfo->vce_harvest_config;
|
||||
@@ -287,15 +320,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
||||
!(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
|
||||
|
||||
/* Set hardware information. */
|
||||
info->gart_size = gtt.heap_size;
|
||||
info->vram_size = vram.heap_size;
|
||||
info->vram_vis_size = vram_vis.heap_size;
|
||||
info->gds_size = gds.gds_total_size;
|
||||
info->gds_gfx_partition_size = gds.gds_gfx_partition_size;
|
||||
/* The kernel can split large buffers in VRAM but not in GTT, so large
|
||||
* allocations can fail or cause buffer movement failures in the kernel.
|
||||
*/
|
||||
info->max_alloc_size = MIN2(info->vram_size * 0.9, info->gart_size * 0.7);
|
||||
/* convert the shader clock from KHz to MHz */
|
||||
info->max_shader_clock = amdinfo->max_engine_clk / 1000;
|
||||
info->max_se = amdinfo->num_shader_engines;
|
||||
@@ -316,6 +342,8 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
||||
/* TODO: Enable this once the kernel handles it efficiently. */
|
||||
info->has_local_buffers = info->drm_minor >= 20 &&
|
||||
!info->has_dedicated_vram;
|
||||
info->kernel_flushes_hdp_before_ib = true;
|
||||
|
||||
info->num_render_backends = amdinfo->rb_pipes;
|
||||
info->clock_crystal_freq = amdinfo->gpu_counter_freq;
|
||||
if (!info->clock_crystal_freq) {
|
||||
@@ -458,6 +486,7 @@ void ac_print_gpu_info(struct radeon_info *info)
|
||||
printf(" has_fence_to_handle = %u\n", info->has_fence_to_handle);
|
||||
printf(" has_ctx_priority = %u\n", info->has_ctx_priority);
|
||||
printf(" has_local_buffers = %u\n", info->has_local_buffers);
|
||||
printf(" kernel_flushes_hdp_before_ib = %u\n", info->kernel_flushes_hdp_before_ib);
|
||||
|
||||
printf("Shader core info:\n");
|
||||
printf(" max_shader_clock = %i\n", info->max_shader_clock);
|
||||
|
@@ -96,6 +96,7 @@ struct radeon_info {
|
||||
bool has_fence_to_handle;
|
||||
bool has_ctx_priority;
|
||||
bool has_local_buffers;
|
||||
bool kernel_flushes_hdp_before_ib;
|
||||
|
||||
/* Shader cores. */
|
||||
uint32_t r600_max_quad_pipes; /* wave size / 16 */
|
||||
|
@@ -2866,7 +2866,7 @@ static LLVMValueRef
|
||||
ac_build_set_inactive(struct ac_llvm_context *ctx, LLVMValueRef src,
|
||||
LLVMValueRef inactive)
|
||||
{
|
||||
char name[32], type[8];
|
||||
char name[33], type[8];
|
||||
LLVMTypeRef src_type = LLVMTypeOf(src);
|
||||
src = ac_to_integer(ctx, src);
|
||||
inactive = ac_to_integer(ctx, inactive);
|
||||
|
@@ -624,7 +624,7 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
|
||||
config->info.levels == 1);
|
||||
|
||||
AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
|
||||
AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;
|
||||
AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
|
||||
|
||||
/* On CI/VI, the DB uses the same pitch and tile mode (except tilesplit)
|
||||
* for Z and stencil. This can cause a number of problems which we work
|
||||
|
@@ -559,20 +559,8 @@ radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
|
||||
gl_shader_stage stage,
|
||||
int idx)
|
||||
{
|
||||
if (stage == MESA_SHADER_VERTEX) {
|
||||
if (pipeline->shaders[MESA_SHADER_VERTEX])
|
||||
return &pipeline->shaders[MESA_SHADER_VERTEX]->info.user_sgprs_locs.shader_data[idx];
|
||||
if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
|
||||
return &pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.user_sgprs_locs.shader_data[idx];
|
||||
if (pipeline->shaders[MESA_SHADER_GEOMETRY])
|
||||
return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.user_sgprs_locs.shader_data[idx];
|
||||
} else if (stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
|
||||
return &pipeline->shaders[MESA_SHADER_TESS_EVAL]->info.user_sgprs_locs.shader_data[idx];
|
||||
if (pipeline->shaders[MESA_SHADER_GEOMETRY])
|
||||
return &pipeline->shaders[MESA_SHADER_GEOMETRY]->info.user_sgprs_locs.shader_data[idx];
|
||||
}
|
||||
return &pipeline->shaders[stage]->info.user_sgprs_locs.shader_data[idx];
|
||||
struct radv_shader_variant *shader = radv_get_shader(pipeline, stage);
|
||||
return &shader->info.user_sgprs_locs.shader_data[idx];
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1020,6 +1008,68 @@ radv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_ds_buffer_info *ds,
|
||||
struct radv_image *image, VkImageLayout layout,
|
||||
bool requires_cond_write)
|
||||
{
|
||||
uint32_t db_z_info = ds->db_z_info;
|
||||
uint32_t db_z_info_reg;
|
||||
|
||||
if (!radv_image_is_tc_compat_htile(image))
|
||||
return;
|
||||
|
||||
if (!radv_layout_has_htile(image, layout,
|
||||
radv_image_queue_family_mask(image,
|
||||
cmd_buffer->queue_family_index,
|
||||
cmd_buffer->queue_family_index))) {
|
||||
db_z_info &= C_028040_TILE_SURFACE_ENABLE;
|
||||
}
|
||||
|
||||
db_z_info &= C_028040_ZRANGE_PRECISION;
|
||||
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
|
||||
db_z_info_reg = R_028038_DB_Z_INFO;
|
||||
} else {
|
||||
db_z_info_reg = R_028040_DB_Z_INFO;
|
||||
}
|
||||
|
||||
/* When we don't know the last fast clear value we need to emit a
|
||||
* conditional packet, otherwise we can update DB_Z_INFO directly.
|
||||
*/
|
||||
if (requires_cond_write) {
|
||||
radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_WRITE, 7, 0));
|
||||
|
||||
const uint32_t write_space = 0 << 8; /* register */
|
||||
const uint32_t poll_space = 1 << 4; /* memory */
|
||||
const uint32_t function = 3 << 0; /* equal to the reference */
|
||||
const uint32_t options = write_space | poll_space | function;
|
||||
radeon_emit(cmd_buffer->cs, options);
|
||||
|
||||
/* poll address - location of the depth clear value */
|
||||
uint64_t va = radv_buffer_get_va(image->bo);
|
||||
va += image->offset + image->clear_value_offset;
|
||||
|
||||
/* In presence of stencil format, we have to adjust the base
|
||||
* address because the first value is the stencil clear value.
|
||||
*/
|
||||
if (vk_format_is_stencil(image->vk_format))
|
||||
va += 4;
|
||||
|
||||
radeon_emit(cmd_buffer->cs, va);
|
||||
radeon_emit(cmd_buffer->cs, va >> 32);
|
||||
|
||||
radeon_emit(cmd_buffer->cs, fui(0.0f)); /* reference value */
|
||||
radeon_emit(cmd_buffer->cs, (uint32_t)-1); /* comparison mask */
|
||||
radeon_emit(cmd_buffer->cs, db_z_info_reg >> 2); /* write address low */
|
||||
radeon_emit(cmd_buffer->cs, 0u); /* write address high */
|
||||
radeon_emit(cmd_buffer->cs, db_z_info);
|
||||
} else {
|
||||
radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_ds_buffer_info *ds,
|
||||
@@ -1078,6 +1128,9 @@ radv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
}
|
||||
|
||||
/* Update the ZRANGE_PRECISION value for the TC-compat bug. */
|
||||
radv_update_zrange_precision(cmd_buffer, ds, image, layout, true);
|
||||
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
|
||||
ds->pa_su_poly_offset_db_fmt_cntl);
|
||||
}
|
||||
@@ -1119,6 +1172,35 @@ radv_set_depth_clear_regs(struct radv_cmd_buffer *cmd_buffer,
|
||||
radeon_emit(cmd_buffer->cs, ds_clear_value.stencil); /* R_028028_DB_STENCIL_CLEAR */
|
||||
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
radeon_emit(cmd_buffer->cs, fui(ds_clear_value.depth)); /* R_02802C_DB_DEPTH_CLEAR */
|
||||
|
||||
/* Update the ZRANGE_PRECISION value for the TC-compat bug. This is
|
||||
* only needed when clearing Z to 0.0.
|
||||
*/
|
||||
if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
|
||||
ds_clear_value.depth == 0.0) {
|
||||
struct radv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
|
||||
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
|
||||
|
||||
if (!framebuffer || !subpass)
|
||||
return;
|
||||
|
||||
if (subpass->depth_stencil_attachment.attachment == VK_ATTACHMENT_UNUSED)
|
||||
return;
|
||||
|
||||
int idx = subpass->depth_stencil_attachment.attachment;
|
||||
VkImageLayout layout = subpass->depth_stencil_attachment.layout;
|
||||
struct radv_attachment_info *att = &framebuffer->attachments[idx];
|
||||
struct radv_image *image = att->attachment->image;
|
||||
|
||||
/* Only needed if the image is currently bound as the depth
|
||||
* surface.
|
||||
*/
|
||||
if (att->attachment->image != image)
|
||||
return;
|
||||
|
||||
radv_update_zrange_precision(cmd_buffer, &att->ds, image,
|
||||
layout, false);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1588,7 +1670,13 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_pipeline *pipeline = stages & VK_SHADER_STAGE_COMPUTE_BIT
|
||||
? cmd_buffer->state.compute_pipeline
|
||||
: cmd_buffer->state.pipeline;
|
||||
VkPipelineBindPoint bind_point = stages & VK_SHADER_STAGE_COMPUTE_BIT ?
|
||||
VK_PIPELINE_BIND_POINT_COMPUTE :
|
||||
VK_PIPELINE_BIND_POINT_GRAPHICS;
|
||||
struct radv_descriptor_state *descriptors_state =
|
||||
radv_get_descriptors_state(cmd_buffer, bind_point);
|
||||
struct radv_pipeline_layout *layout = pipeline->layout;
|
||||
struct radv_shader_variant *shader, *prev_shader;
|
||||
unsigned offset;
|
||||
void *ptr;
|
||||
uint64_t va;
|
||||
@@ -1604,7 +1692,8 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
|
||||
return;
|
||||
|
||||
memcpy(ptr, cmd_buffer->push_constants, layout->push_constant_size);
|
||||
memcpy((char*)ptr + layout->push_constant_size, cmd_buffer->dynamic_buffers,
|
||||
memcpy((char*)ptr + layout->push_constant_size,
|
||||
descriptors_state->dynamic_buffers,
|
||||
16 * layout->dynamic_offset_count);
|
||||
|
||||
va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
|
||||
@@ -1613,10 +1702,16 @@ radv_flush_constants(struct radv_cmd_buffer *cmd_buffer,
|
||||
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws,
|
||||
cmd_buffer->cs, MESA_SHADER_STAGES * 4);
|
||||
|
||||
prev_shader = NULL;
|
||||
radv_foreach_stage(stage, stages) {
|
||||
if (pipeline->shaders[stage]) {
|
||||
shader = radv_get_shader(pipeline, stage);
|
||||
|
||||
/* Avoid redundantly emitting the address for merged stages. */
|
||||
if (shader && shader != prev_shader) {
|
||||
radv_emit_userdata_address(cmd_buffer, pipeline, stage,
|
||||
AC_UD_PUSH_CONSTANTS, va);
|
||||
|
||||
prev_shader = shader;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1631,7 +1726,7 @@ radv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer,
|
||||
if ((pipeline_is_dirty ||
|
||||
(cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
|
||||
cmd_buffer->state.pipeline->vertex_elements.count &&
|
||||
radv_get_vertex_shader(cmd_buffer->state.pipeline)->info.info.vs.has_vertex_buffers) {
|
||||
radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.has_vertex_buffers) {
|
||||
struct radv_vertex_elements_info *velems = &cmd_buffer->state.pipeline->vertex_elements;
|
||||
unsigned vb_offset;
|
||||
void *vb_ptr;
|
||||
@@ -2102,7 +2197,8 @@ VkResult radv_BeginCommandBuffer(
|
||||
}
|
||||
}
|
||||
|
||||
if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) {
|
||||
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
|
||||
(pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
|
||||
assert(pBeginInfo->pInheritanceInfo);
|
||||
cmd_buffer->state.framebuffer = radv_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
|
||||
cmd_buffer->state.pass = radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
|
||||
@@ -2231,6 +2327,8 @@ void radv_CmdBindDescriptorSets(
|
||||
unsigned dyn_idx = 0;
|
||||
|
||||
const bool no_dynamic_bounds = cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
|
||||
struct radv_descriptor_state *descriptors_state =
|
||||
radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
|
||||
|
||||
for (unsigned i = 0; i < descriptorSetCount; ++i) {
|
||||
unsigned idx = i + firstSet;
|
||||
@@ -2239,7 +2337,7 @@ void radv_CmdBindDescriptorSets(
|
||||
|
||||
for(unsigned j = 0; j < set->layout->dynamic_offset_count; ++j, ++dyn_idx) {
|
||||
unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
|
||||
uint32_t *dst = cmd_buffer->dynamic_buffers + idx * 4;
|
||||
uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
|
||||
assert(dyn_idx < dynamicOffsetCount);
|
||||
|
||||
struct radv_descriptor_range *range = set->dynamic_descriptors + j;
|
||||
@@ -2956,7 +3054,7 @@ radv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radeon_winsys_cs *cs = cmd_buffer->cs;
|
||||
unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA
|
||||
: V_0287F0_DI_SRC_SEL_AUTO_INDEX;
|
||||
bool draw_id_enable = radv_get_vertex_shader(cmd_buffer->state.pipeline)->info.info.vs.needs_draw_id;
|
||||
bool draw_id_enable = radv_get_shader(cmd_buffer->state.pipeline, MESA_SHADER_VERTEX)->info.info.vs.needs_draw_id;
|
||||
uint32_t base_reg = cmd_buffer->state.pipeline->graphics.vtx_base_sgpr;
|
||||
assert(base_reg);
|
||||
|
||||
@@ -3711,6 +3809,20 @@ static void radv_initialize_htile(struct radv_cmd_buffer *cmd_buffer,
|
||||
size, clear_word);
|
||||
|
||||
state->flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
|
||||
|
||||
/* Initialize the depth clear registers and update the ZRANGE_PRECISION
|
||||
* value for the TC-compat bug (because ZRANGE_PRECISION is 1 by
|
||||
* default). This is only needed whean clearing Z to 0.0f.
|
||||
*/
|
||||
if (radv_image_is_tc_compat_htile(image) && clear_word == 0) {
|
||||
VkImageAspectFlags aspects = VK_IMAGE_ASPECT_DEPTH_BIT;
|
||||
VkClearDepthStencilValue value = {};
|
||||
|
||||
if (vk_format_is_stencil(image->vk_format))
|
||||
aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
|
||||
|
||||
radv_set_depth_clear_regs(cmd_buffer, image, value, aspects);
|
||||
}
|
||||
}
|
||||
|
||||
static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer,
|
||||
@@ -3725,14 +3837,7 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
|
||||
if (!radv_image_has_htile(image))
|
||||
return;
|
||||
|
||||
if (dst_layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
|
||||
(pending_clears & vk_format_aspects(image->vk_format)) == vk_format_aspects(image->vk_format) &&
|
||||
cmd_buffer->state.render_area.offset.x == 0 && cmd_buffer->state.render_area.offset.y == 0 &&
|
||||
cmd_buffer->state.render_area.extent.width == image->info.width &&
|
||||
cmd_buffer->state.render_area.extent.height == image->info.height) {
|
||||
/* The clear will initialize htile. */
|
||||
return;
|
||||
} else if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
|
||||
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED &&
|
||||
radv_layout_has_htile(image, dst_layout, dst_queue_mask)) {
|
||||
/* TODO: merge with the clear if applicable */
|
||||
radv_initialize_htile(cmd_buffer, image, range, 0);
|
||||
|
@@ -3928,7 +3928,8 @@ radv_initialise_ds_surface(struct radv_device *device,
|
||||
ds->db_z_info = S_028038_FORMAT(format) |
|
||||
S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
|
||||
S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
|
||||
S_028038_MAXMIP(iview->image->info.levels - 1);
|
||||
S_028038_MAXMIP(iview->image->info.levels - 1) |
|
||||
S_028038_ZRANGE_PRECISION(1);
|
||||
ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
|
||||
S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
|
||||
|
||||
|
@@ -717,6 +717,14 @@ emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
|
||||
if ((clear_value.depth != 0.0 && clear_value.depth != 1.0) || !(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
|
||||
goto fail;
|
||||
|
||||
/* GFX8 only supports 32-bit depth surfaces but we can enable TC-compat
|
||||
* HTILE for 16-bit surfaces if no Z planes are compressed. Though,
|
||||
* fast HTILE clears don't seem to work.
|
||||
*/
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == VI &&
|
||||
iview->image->vk_format == VK_FORMAT_D16_UNORM)
|
||||
goto fail;
|
||||
|
||||
if (vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT) {
|
||||
if (clear_value.stencil != 0 || !(aspects & VK_IMAGE_ASPECT_STENCIL_BIT))
|
||||
goto fail;
|
||||
@@ -1035,7 +1043,7 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
|
||||
goto fail;
|
||||
|
||||
/* DCC */
|
||||
ret = radv_format_pack_clear_color(iview->image->vk_format,
|
||||
ret = radv_format_pack_clear_color(iview->vk_format,
|
||||
clear_color, &clear_value);
|
||||
if (ret == false)
|
||||
goto fail;
|
||||
@@ -1056,7 +1064,7 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
|
||||
bool can_avoid_fast_clear_elim;
|
||||
bool need_decompress_pass = false;
|
||||
|
||||
vi_get_fast_clear_parameters(iview->image->vk_format,
|
||||
vi_get_fast_clear_parameters(iview->vk_format,
|
||||
&clear_value, &reset_value,
|
||||
&can_avoid_fast_clear_elim);
|
||||
|
||||
|
@@ -3051,7 +3051,6 @@ static void ac_nir_fixup_ls_hs_input_vgprs(struct radv_shader_context *ctx)
|
||||
LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
|
||||
ctx->ac.i32_0, "");
|
||||
ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, "");
|
||||
ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, "");
|
||||
ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, "");
|
||||
ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, "");
|
||||
}
|
||||
|
@@ -504,6 +504,7 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
|
||||
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
|
||||
struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
|
||||
unsigned col_format = 0;
|
||||
unsigned num_targets;
|
||||
|
||||
for (unsigned i = 0; i < (blend->single_cb_enable ? 1 : subpass->color_count); ++i) {
|
||||
unsigned cf;
|
||||
@@ -523,6 +524,16 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
|
||||
col_format |= cf << (4 * i);
|
||||
}
|
||||
|
||||
/* If the i-th target format is set, all previous target formats must
|
||||
* be non-zero to avoid hangs.
|
||||
*/
|
||||
num_targets = (util_last_bit(col_format) + 3) / 4;
|
||||
for (unsigned i = 0; i < num_targets; i++) {
|
||||
if (!(col_format & (0xf << (i * 4)))) {
|
||||
col_format |= V_028714_SPI_SHADER_32_R << (i * 4);
|
||||
}
|
||||
}
|
||||
|
||||
blend->cb_shader_mask = ac_get_cb_shader_mask(col_format);
|
||||
|
||||
if (blend->mrt0_is_dual_src)
|
||||
@@ -611,7 +622,7 @@ radv_blend_check_commutativity(struct radv_blend_state *blend,
|
||||
(1u << VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA);
|
||||
|
||||
if (dst == VK_BLEND_FACTOR_ONE &&
|
||||
(src_allowed && (1u << src))) {
|
||||
(src_allowed & (1u << src))) {
|
||||
/* Addition is commutative, but floating point addition isn't
|
||||
* associative: subtle changes can be introduced via different
|
||||
* rounding. Be conservative, only enable for min and max.
|
||||
@@ -1583,21 +1594,25 @@ static void si_multiwave_lds_size_workaround(struct radv_device *device,
|
||||
}
|
||||
|
||||
struct radv_shader_variant *
|
||||
radv_get_vertex_shader(struct radv_pipeline *pipeline)
|
||||
radv_get_shader(struct radv_pipeline *pipeline,
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
if (pipeline->shaders[MESA_SHADER_VERTEX])
|
||||
return pipeline->shaders[MESA_SHADER_VERTEX];
|
||||
if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
|
||||
return pipeline->shaders[MESA_SHADER_TESS_CTRL];
|
||||
return pipeline->shaders[MESA_SHADER_GEOMETRY];
|
||||
}
|
||||
|
||||
static struct radv_shader_variant *
|
||||
radv_get_tess_eval_shader(struct radv_pipeline *pipeline)
|
||||
{
|
||||
if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
|
||||
return pipeline->shaders[MESA_SHADER_TESS_EVAL];
|
||||
return pipeline->shaders[MESA_SHADER_GEOMETRY];
|
||||
if (stage == MESA_SHADER_VERTEX) {
|
||||
if (pipeline->shaders[MESA_SHADER_VERTEX])
|
||||
return pipeline->shaders[MESA_SHADER_VERTEX];
|
||||
if (pipeline->shaders[MESA_SHADER_TESS_CTRL])
|
||||
return pipeline->shaders[MESA_SHADER_TESS_CTRL];
|
||||
if (pipeline->shaders[MESA_SHADER_GEOMETRY])
|
||||
return pipeline->shaders[MESA_SHADER_GEOMETRY];
|
||||
} else if (stage == MESA_SHADER_TESS_EVAL) {
|
||||
if (!radv_pipeline_has_tess(pipeline))
|
||||
return NULL;
|
||||
if (pipeline->shaders[MESA_SHADER_TESS_EVAL])
|
||||
return pipeline->shaders[MESA_SHADER_TESS_EVAL];
|
||||
if (pipeline->shaders[MESA_SHADER_GEOMETRY])
|
||||
return pipeline->shaders[MESA_SHADER_GEOMETRY];
|
||||
}
|
||||
return pipeline->shaders[stage];
|
||||
}
|
||||
|
||||
static struct radv_tessellation_state
|
||||
@@ -1632,7 +1647,7 @@ calculate_tess_state(struct radv_pipeline *pipeline,
|
||||
S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp);
|
||||
tess.num_patches = num_patches;
|
||||
|
||||
struct radv_shader_variant *tes = radv_get_tess_eval_shader(pipeline);
|
||||
struct radv_shader_variant *tes = radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL);
|
||||
unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0;
|
||||
|
||||
switch (tes->info.tes.primitive_mode) {
|
||||
@@ -1960,6 +1975,8 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
_mesa_sha1_compute(modules[i]->nir->info.name,
|
||||
strlen(modules[i]->nir->info.name),
|
||||
modules[i]->sha1);
|
||||
|
||||
pipeline->active_stages |= mesa_to_vk_shader_stage(i);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1975,10 +1992,6 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
|
||||
if (radv_create_shader_variants_from_pipeline_cache(device, cache, hash, pipeline->shaders) &&
|
||||
(!modules[MESA_SHADER_GEOMETRY] || pipeline->gs_copy_shader)) {
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
|
||||
if (pipeline->shaders[i])
|
||||
pipeline->active_stages |= mesa_to_vk_shader_stage(i);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2010,7 +2023,6 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
|
||||
nir[i] = radv_shader_compile_to_nir(device, modules[i],
|
||||
stage ? stage->pName : "main", i,
|
||||
stage ? stage->pSpecializationInfo : NULL);
|
||||
pipeline->active_stages |= mesa_to_vk_shader_stage(i);
|
||||
|
||||
/* We don't want to alter meta shaders IR directly so clone it
|
||||
* first.
|
||||
@@ -3141,7 +3153,7 @@ radv_pipeline_generate_vgt_vertex_reuse(struct radeon_winsys_cs *cs,
|
||||
|
||||
unsigned vtx_reuse_depth = 30;
|
||||
if (radv_pipeline_has_tess(pipeline) &&
|
||||
radv_get_tess_eval_shader(pipeline)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
|
||||
radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.tes.spacing == TESS_SPACING_FRACTIONAL_ODD) {
|
||||
vtx_reuse_depth = 14;
|
||||
}
|
||||
radeon_set_context_reg(cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL,
|
||||
@@ -3301,7 +3313,7 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
|
||||
if (radv_pipeline_has_tess(pipeline)) {
|
||||
/* SWITCH_ON_EOI must be set if PrimID is used. */
|
||||
if (pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.info.uses_prim_id ||
|
||||
radv_get_tess_eval_shader(pipeline)->info.info.uses_prim_id)
|
||||
radv_get_shader(pipeline, MESA_SHADER_TESS_EVAL)->info.info.uses_prim_id)
|
||||
ia_multi_vgt_param.ia_switch_on_eoi = true;
|
||||
}
|
||||
|
||||
@@ -3491,7 +3503,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
|
||||
if (loc->sgpr_idx != -1) {
|
||||
pipeline->graphics.vtx_base_sgpr = pipeline->user_data_0[MESA_SHADER_VERTEX];
|
||||
pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
|
||||
if (radv_get_vertex_shader(pipeline)->info.info.vs.needs_draw_id)
|
||||
if (radv_get_shader(pipeline, MESA_SHADER_VERTEX)->info.info.vs.needs_draw_id)
|
||||
pipeline->graphics.vtx_emit_num = 3;
|
||||
else
|
||||
pipeline->graphics.vtx_emit_num = 2;
|
||||
|
@@ -928,6 +928,7 @@ struct radv_descriptor_state {
|
||||
uint32_t valid;
|
||||
struct radv_push_descriptor_set push_set;
|
||||
bool push_dirty;
|
||||
uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
|
||||
};
|
||||
|
||||
struct radv_cmd_state {
|
||||
@@ -1013,7 +1014,6 @@ struct radv_cmd_buffer {
|
||||
uint32_t queue_family_index;
|
||||
|
||||
uint8_t push_constants[MAX_PUSH_CONSTANTS_SIZE];
|
||||
uint32_t dynamic_buffers[4 * MAX_DYNAMIC_BUFFERS];
|
||||
VkShaderStageFlags push_constant_stages;
|
||||
struct radv_descriptor_set meta_push_descriptors;
|
||||
|
||||
@@ -1280,7 +1280,8 @@ struct radv_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
|
||||
gl_shader_stage stage,
|
||||
int idx);
|
||||
|
||||
struct radv_shader_variant *radv_get_vertex_shader(struct radv_pipeline *pipeline);
|
||||
struct radv_shader_variant *radv_get_shader(struct radv_pipeline *pipeline,
|
||||
gl_shader_stage stage);
|
||||
|
||||
struct radv_graphics_pipeline_create_info {
|
||||
bool use_rectlist;
|
||||
|
@@ -36,6 +36,7 @@
|
||||
|
||||
#include <llvm-c/Core.h>
|
||||
#include <llvm-c/TargetMachine.h>
|
||||
#include <llvm-c/Support.h>
|
||||
|
||||
#include "sid.h"
|
||||
#include "gfx9d.h"
|
||||
@@ -458,6 +459,82 @@ radv_fill_shader_variant(struct radv_device *device,
|
||||
memcpy(ptr, binary->code, binary->code_size);
|
||||
}
|
||||
|
||||
static void radv_init_llvm_target()
|
||||
{
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
|
||||
/* For inline assembly. */
|
||||
LLVMInitializeAMDGPUAsmParser();
|
||||
|
||||
/* Workaround for bug in llvm 4.0 that causes image intrinsics
|
||||
* to disappear.
|
||||
* https://reviews.llvm.org/D26348
|
||||
*
|
||||
* Workaround for bug in llvm that causes the GPU to hang in presence
|
||||
* of nested loops because there is an exec mask issue. The proper
|
||||
* solution is to fix LLVM but this might require a bunch of work.
|
||||
* https://bugs.llvm.org/show_bug.cgi?id=37744
|
||||
*
|
||||
* "mesa" is the prefix for error messages.
|
||||
*/
|
||||
const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false",
|
||||
"-amdgpu-skip-threshold=1" };
|
||||
LLVMParseCommandLineOptions(3, argv, NULL);
|
||||
}
|
||||
|
||||
static once_flag radv_init_llvm_target_once_flag = ONCE_FLAG_INIT;
|
||||
|
||||
static LLVMTargetRef radv_get_llvm_target(const char *triple)
|
||||
{
|
||||
LLVMTargetRef target = NULL;
|
||||
char *err_message = NULL;
|
||||
|
||||
call_once(&radv_init_llvm_target_once_flag, radv_init_llvm_target);
|
||||
|
||||
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
|
||||
fprintf(stderr, "Cannot find target for triple %s ", triple);
|
||||
if (err_message) {
|
||||
fprintf(stderr, "%s\n", err_message);
|
||||
}
|
||||
LLVMDisposeMessage(err_message);
|
||||
return NULL;
|
||||
}
|
||||
return target;
|
||||
}
|
||||
|
||||
static LLVMTargetMachineRef radv_create_target_machine(enum radeon_family family,
|
||||
enum ac_target_machine_options tm_options,
|
||||
const char **out_triple)
|
||||
{
|
||||
assert(family >= CHIP_TAHITI);
|
||||
char features[256];
|
||||
const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
|
||||
LLVMTargetRef target = radv_get_llvm_target(triple);
|
||||
|
||||
snprintf(features, sizeof(features),
|
||||
"+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
|
||||
tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
|
||||
tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
|
||||
tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
|
||||
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
|
||||
|
||||
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
|
||||
target,
|
||||
triple,
|
||||
ac_get_llvm_processor_name(family),
|
||||
features,
|
||||
LLVMCodeGenLevelDefault,
|
||||
LLVMRelocDefault,
|
||||
LLVMCodeModelDefault);
|
||||
|
||||
if (out_triple)
|
||||
*out_triple = triple;
|
||||
return tm;
|
||||
}
|
||||
|
||||
static struct radv_shader_variant *
|
||||
shader_variant_create(struct radv_device *device,
|
||||
struct radv_shader_module *module,
|
||||
@@ -491,7 +568,7 @@ shader_variant_create(struct radv_device *device,
|
||||
tm_options |= AC_TM_SUPPORTS_SPILL;
|
||||
if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
|
||||
tm_options |= AC_TM_SISCHED;
|
||||
tm = ac_create_target_machine(chip_family, tm_options);
|
||||
tm = radv_create_target_machine(chip_family, tm_options, NULL);
|
||||
|
||||
if (gs_copy_shader) {
|
||||
assert(shader_count == 1);
|
||||
@@ -731,7 +808,7 @@ radv_GetShaderInfoAMD(VkDevice _device,
|
||||
unsigned workgroup_size = local_size[0] * local_size[1] * local_size[2];
|
||||
|
||||
statistics.numAvailableVgprs = statistics.numPhysicalVgprs /
|
||||
ceil(workgroup_size / statistics.numPhysicalVgprs);
|
||||
ceil((double)workgroup_size / statistics.numPhysicalVgprs);
|
||||
|
||||
statistics.computeWorkGroupSize[0] = local_size[0];
|
||||
statistics.computeWorkGroupSize[1] = local_size[1];
|
||||
|
@@ -119,6 +119,9 @@ def test_unix(args):
|
||||
for l in diff:
|
||||
print(l, file=sys.stderr)
|
||||
|
||||
if not total:
|
||||
raise Exception('Could not find any tests.')
|
||||
|
||||
print('{}/{}'.format(passed, total), 'tests returned correct results')
|
||||
return total == passed
|
||||
|
||||
@@ -155,6 +158,9 @@ def _replace_test(args, replace):
|
||||
for l in diff:
|
||||
print(l, file=sys.stderr)
|
||||
|
||||
if not total:
|
||||
raise Exception('Could not find any tests.')
|
||||
|
||||
print('{}/{}'.format(passed, total), 'tests returned correct results')
|
||||
return total == passed
|
||||
|
||||
@@ -197,6 +203,9 @@ def test_valgrind(args):
|
||||
print('FAIL')
|
||||
print(log, file=sys.stderr)
|
||||
|
||||
if not total:
|
||||
raise Exception('Could not find any tests.')
|
||||
|
||||
print('{}/{}'.format(passed, total), 'tests returned correct results')
|
||||
return total == passed
|
||||
|
||||
|
@@ -2676,6 +2676,7 @@ external_declaration:
|
||||
| declaration { $$ = $1; }
|
||||
| pragma_statement { $$ = NULL; }
|
||||
| layout_defaults { $$ = $1; }
|
||||
| ';' { $$ = NULL; }
|
||||
;
|
||||
|
||||
function_definition:
|
||||
|
@@ -1928,6 +1928,15 @@ nir_visitor::visit(ir_expression *ir)
|
||||
unreachable("not reached");
|
||||
}
|
||||
break;
|
||||
case ir_binop_vector_extract: {
|
||||
result = nir_channel(&b, srcs[0], 0);
|
||||
for (unsigned i = 1; i < ir->operands[0]->type->vector_elements; i++) {
|
||||
nir_ssa_def *swizzled = nir_channel(&b, srcs[0], i);
|
||||
result = nir_bcsel(&b, nir_ieq(&b, srcs[1], nir_imm_int(&b, i)),
|
||||
swizzled, result);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;
|
||||
case ir_triop_fma:
|
||||
|
@@ -347,6 +347,8 @@ ir_copy_propagation_visitor::add_copy(ir_assignment *ir)
|
||||
if (lhs_var != NULL && rhs_var != NULL && lhs_var != rhs_var) {
|
||||
if (lhs_var->data.mode != ir_var_shader_storage &&
|
||||
lhs_var->data.mode != ir_var_shader_shared &&
|
||||
rhs_var->data.mode != ir_var_shader_storage &&
|
||||
rhs_var->data.mode != ir_var_shader_shared &&
|
||||
lhs_var->data.precise == rhs_var->data.precise) {
|
||||
_mesa_hash_table_insert(acp, lhs_var, rhs_var);
|
||||
}
|
||||
|
@@ -544,6 +544,10 @@ ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
|
||||
if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
|
||||
return;
|
||||
|
||||
if (lhs->var->data.mode == ir_var_shader_storage ||
|
||||
lhs->var->data.mode == ir_var_shader_shared)
|
||||
return;
|
||||
|
||||
ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
|
||||
if (!rhs) {
|
||||
ir_swizzle *swiz = ir->rhs->as_swizzle();
|
||||
@@ -560,6 +564,10 @@ ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
|
||||
orig_swizzle[3] = swiz->mask.w;
|
||||
}
|
||||
|
||||
if (rhs->var->data.mode == ir_var_shader_storage ||
|
||||
rhs->var->data.mode == ir_var_shader_shared)
|
||||
return;
|
||||
|
||||
/* Move the swizzle channels out to the positions they match in the
|
||||
* destination. We don't want to have to rewrite the swizzle[]
|
||||
* array every time we clear a bit of the write_mask.
|
||||
|
@@ -323,6 +323,14 @@ write_xfb(struct blob *metadata, struct gl_shader_program *shProg)
|
||||
|
||||
blob_write_uint32(metadata, prog->info.stage);
|
||||
|
||||
/* Data set by glTransformFeedbackVaryings. */
|
||||
blob_write_uint32(metadata, shProg->TransformFeedback.BufferMode);
|
||||
blob_write_bytes(metadata, shProg->TransformFeedback.BufferStride,
|
||||
sizeof(shProg->TransformFeedback.BufferStride));
|
||||
blob_write_uint32(metadata, shProg->TransformFeedback.NumVarying);
|
||||
for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++)
|
||||
blob_write_string(metadata, shProg->TransformFeedback.VaryingNames[i]);
|
||||
|
||||
blob_write_uint32(metadata, ltf->NumOutputs);
|
||||
blob_write_uint32(metadata, ltf->ActiveBuffers);
|
||||
blob_write_uint32(metadata, ltf->NumVarying);
|
||||
@@ -352,6 +360,18 @@ read_xfb(struct blob_reader *metadata, struct gl_shader_program *shProg)
|
||||
if (xfb_stage == ~0u)
|
||||
return;
|
||||
|
||||
/* Data set by glTransformFeedbackVaryings. */
|
||||
shProg->TransformFeedback.BufferMode = blob_read_uint32(metadata);
|
||||
blob_copy_bytes(metadata, &shProg->TransformFeedback.BufferStride,
|
||||
sizeof(shProg->TransformFeedback.BufferStride));
|
||||
shProg->TransformFeedback.NumVarying = blob_read_uint32(metadata);
|
||||
shProg->TransformFeedback.VaryingNames = (char **)
|
||||
malloc(shProg->TransformFeedback.NumVarying * sizeof(GLchar *));
|
||||
/* Note, malloc used with VaryingNames. */
|
||||
for (unsigned i = 0; i < shProg->TransformFeedback.NumVarying; i++)
|
||||
shProg->TransformFeedback.VaryingNames[i] =
|
||||
strdup(blob_read_string(metadata));
|
||||
|
||||
struct gl_program *prog = shProg->_LinkedShaders[xfb_stage]->Program;
|
||||
struct gl_transform_feedback_info *ltf =
|
||||
rzalloc(prog, struct gl_transform_feedback_info);
|
||||
|
@@ -1373,6 +1373,20 @@ visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state)
|
||||
{
|
||||
if (instr->return_deref && !visit_deref_src(instr->return_deref, cb, state))
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < instr->num_params; i++) {
|
||||
if (!visit_deref_src(instr->params[i], cb, state))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb,
|
||||
void *state)
|
||||
@@ -1449,7 +1463,8 @@ nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state)
|
||||
return false;
|
||||
break;
|
||||
case nir_instr_type_call:
|
||||
/* Call instructions have no regular sources */
|
||||
if (!visit_call_src(nir_instr_as_call(instr), cb, state))
|
||||
return false;
|
||||
break;
|
||||
case nir_instr_type_load_const:
|
||||
/* Constant load instructions have no regular sources */
|
||||
|
@@ -567,14 +567,16 @@ validate_call_instr(nir_call_instr *instr, validate_state *state)
|
||||
if (instr->return_deref == NULL) {
|
||||
validate_assert(state, glsl_type_is_void(instr->callee->return_type));
|
||||
} else {
|
||||
validate_assert(state, instr->return_deref->deref.type == instr->callee->return_type);
|
||||
validate_assert(state, instr->callee->return_type ==
|
||||
nir_deref_tail(&instr->return_deref->deref)->type);
|
||||
validate_deref_var(instr, instr->return_deref, state);
|
||||
}
|
||||
|
||||
validate_assert(state, instr->num_params == instr->callee->num_params);
|
||||
|
||||
for (unsigned i = 0; i < instr->num_params; i++) {
|
||||
validate_assert(state, instr->callee->params[i].type == instr->params[i]->deref.type);
|
||||
validate_assert(state, instr->callee->params[i].type ==
|
||||
nir_deref_tail(&instr->params[i]->deref)->type);
|
||||
validate_deref_var(instr, instr->params[i], state);
|
||||
}
|
||||
}
|
||||
|
@@ -118,6 +118,7 @@ EXTRA_DIST = \
|
||||
postprocess/ADDING \
|
||||
rbug/README \
|
||||
target-helpers \
|
||||
util/u_debug_stack_android.cpp \
|
||||
util/u_format.csv \
|
||||
util/u_format_pack.py \
|
||||
util/u_format_parse.py \
|
||||
|
@@ -484,7 +484,7 @@ files_libgalliumvl = files(
|
||||
|
||||
vlwinsys_deps = []
|
||||
files_libgalliumvlwinsys = files('vl/vl_winsys.h')
|
||||
if with_dri2
|
||||
if with_dri2 and with_platform_x11
|
||||
files_libgalliumvlwinsys += files('vl/vl_winsys_dri.c')
|
||||
if with_dri3
|
||||
vlwinsys_deps += [
|
||||
|
@@ -2182,6 +2182,7 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
ctx->ir->outputs[n] = src[i];
|
||||
}
|
||||
break;
|
||||
case nir_intrinsic_load_base_vertex:
|
||||
case nir_intrinsic_load_first_vertex:
|
||||
if (!ctx->basevertex) {
|
||||
ctx->basevertex = create_driver_param(ctx, IR3_DP_VTXID_BASE);
|
||||
|
@@ -88,6 +88,8 @@ static void r300_destroy_context(struct pipe_context* context)
|
||||
|
||||
if (r300->uploader)
|
||||
u_upload_destroy(r300->uploader);
|
||||
if (r300->context.stream_uploader)
|
||||
u_upload_destroy(r300->context.stream_uploader);
|
||||
|
||||
/* XXX: This function assumes r300->query_list was initialized */
|
||||
r300_release_referenced_objects(r300);
|
||||
@@ -424,10 +426,11 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
|
||||
r300->context.create_video_codec = vl_create_decoder;
|
||||
r300->context.create_video_buffer = vl_video_buffer_create;
|
||||
|
||||
r300->uploader = u_upload_create(&r300->context, 1024 * 1024,
|
||||
r300->uploader = u_upload_create(&r300->context, 128 * 1024,
|
||||
PIPE_BIND_CUSTOM, PIPE_USAGE_STREAM, 0);
|
||||
r300->context.stream_uploader = r300->uploader;
|
||||
r300->context.const_uploader = r300->uploader;
|
||||
r300->context.stream_uploader = u_upload_create(&r300->context, 1024 * 1024,
|
||||
0, PIPE_USAGE_STREAM, 0);
|
||||
r300->context.const_uploader = r300->context.stream_uploader;
|
||||
|
||||
r300->blitter = util_blitter_create(&r300->context);
|
||||
if (r300->blitter == NULL)
|
||||
|
@@ -124,8 +124,7 @@ void si_init_resource_fields(struct si_screen *sscreen,
|
||||
/* Older kernels didn't always flush the HDP cache before
|
||||
* CS execution
|
||||
*/
|
||||
if (sscreen->info.drm_major == 2 &&
|
||||
sscreen->info.drm_minor < 40) {
|
||||
if (!sscreen->info.kernel_flushes_hdp_before_ib) {
|
||||
res->domains = RADEON_DOMAIN_GTT;
|
||||
res->flags |= RADEON_FLAG_GTT_WC;
|
||||
break;
|
||||
@@ -151,9 +150,12 @@ void si_init_resource_fields(struct si_screen *sscreen,
|
||||
* Write-combined CPU mappings are fine, the kernel
|
||||
* ensures all CPU writes finish before the GPU
|
||||
* executes a command stream.
|
||||
*
|
||||
* radeon doesn't have good BO move throttling, so put all
|
||||
* persistent buffers into GTT to prevent VRAM CPU page faults.
|
||||
*/
|
||||
if (sscreen->info.drm_major == 2 &&
|
||||
sscreen->info.drm_minor < 40)
|
||||
if (!sscreen->info.kernel_flushes_hdp_before_ib ||
|
||||
sscreen->info.drm_major == 2)
|
||||
res->domains = RADEON_DOMAIN_GTT;
|
||||
}
|
||||
|
||||
|
@@ -94,7 +94,7 @@ static uint32_t null_image_descriptor[8] = {
|
||||
* descriptor */
|
||||
};
|
||||
|
||||
static uint64_t si_desc_extract_buffer_address(uint32_t *desc)
|
||||
static uint64_t si_desc_extract_buffer_address(const uint32_t *desc)
|
||||
{
|
||||
uint64_t va = desc[0] |
|
||||
((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
|
||||
@@ -1054,7 +1054,7 @@ static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
|
||||
*size = desc[2];
|
||||
|
||||
assert(G_008F04_STRIDE(desc[1]) == 0);
|
||||
va = ((uint64_t)desc[1] << 32) | desc[0];
|
||||
va = si_desc_extract_buffer_address(desc);
|
||||
|
||||
assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
|
||||
*offset = va - res->gpu_address;
|
||||
|
@@ -178,7 +178,7 @@ struct si_screen {
|
||||
|
||||
struct radeon_info info;
|
||||
uint64_t debug_flags;
|
||||
char renderer_string[100];
|
||||
char renderer_string[183];
|
||||
|
||||
unsigned gs_table_depth;
|
||||
unsigned tess_offchip_block_dw_size;
|
||||
|
@@ -276,10 +276,16 @@ static void image_fetch_coords(
|
||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
LLVMBuilderRef builder = ctx->ac.builder;
|
||||
unsigned target = inst->Memory.Texture;
|
||||
const unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
|
||||
unsigned num_coords = tgsi_util_get_texture_coord_dim(target);
|
||||
LLVMValueRef tmp;
|
||||
int chan;
|
||||
|
||||
if (target == TGSI_TEXTURE_2D_MSAA ||
|
||||
target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
|
||||
/* Need the sample index as well. */
|
||||
num_coords++;
|
||||
}
|
||||
|
||||
for (chan = 0; chan < num_coords; ++chan) {
|
||||
tmp = lp_build_emit_fetch(bld_base, inst, src, chan);
|
||||
tmp = ac_to_integer(&ctx->ac, tmp);
|
||||
|
@@ -528,6 +528,7 @@ static bool do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
ws->accel_working2 < 3);
|
||||
ws->info.tcc_cache_line_size = 64; /* TC L2 line size on GCN */
|
||||
ws->info.ib_start_alignment = 4096;
|
||||
ws->info.kernel_flushes_hdp_before_ib = ws->info.drm_minor >= 40;
|
||||
|
||||
ws->check_vm = strstr(debug_get_option("R600_DEBUG", ""), "check_vm") != NULL;
|
||||
|
||||
|
@@ -417,7 +417,6 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
|
||||
}
|
||||
|
||||
res = util_hash_table_get(qdws->bo_handles, (void*)(uintptr_t)handle);
|
||||
fprintf(stderr, "resource %p for handle %d, pfd=%d\n", res, handle, whandle->handle);
|
||||
if (res) {
|
||||
struct virgl_hw_res *r = NULL;
|
||||
virgl_drm_resource_reference(qdws, &r, res);
|
||||
@@ -431,7 +430,6 @@ virgl_drm_winsys_resource_create_handle(struct virgl_winsys *qws,
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_FD) {
|
||||
res->bo_handle = handle;
|
||||
} else {
|
||||
fprintf(stderr, "gem open handle %d\n", handle);
|
||||
memset(&open_arg, 0, sizeof(open_arg));
|
||||
open_arg.name = whandle->handle;
|
||||
if (drmIoctl(qdws->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) {
|
||||
|
@@ -1463,7 +1463,7 @@ glXImportContextEXT(Display *dpy, GLXContextID contextID)
|
||||
if (_XReply(dpy, (xReply *) & reply, 0, False) &&
|
||||
reply.n < (INT32_MAX / 2)) {
|
||||
|
||||
for (i = 0; i < reply.n * 2; i++) {
|
||||
for (i = 0; i < reply.n; i++) {
|
||||
int prop[2];
|
||||
|
||||
_XRead(dpy, (char *)prop, sizeof(prop));
|
||||
|
@@ -69,6 +69,8 @@ EXTRA_DIST = \
|
||||
dev/meson.build \
|
||||
genxml/meson.build \
|
||||
isl/meson.build \
|
||||
tools/intel_sanitize_gpu.c \
|
||||
tools/intel_sanitize_gpu.in \
|
||||
tools/meson.build \
|
||||
vulkan/meson.build \
|
||||
meson.build
|
||||
|
@@ -200,6 +200,14 @@ emit_urb_config(struct blorp_batch *batch,
|
||||
blorp_emit_urb_config(batch, vs_entry_size, sf_entry_size);
|
||||
}
|
||||
|
||||
#if GEN_GEN >= 7
|
||||
static void
|
||||
blorp_emit_memcpy(struct blorp_batch *batch,
|
||||
struct blorp_address dst,
|
||||
struct blorp_address src,
|
||||
uint32_t size);
|
||||
#endif
|
||||
|
||||
static void
|
||||
blorp_emit_vertex_data(struct blorp_batch *batch,
|
||||
const struct blorp_params *params,
|
||||
@@ -260,6 +268,31 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
|
||||
}
|
||||
|
||||
blorp_flush_range(batch, data, *size);
|
||||
|
||||
if (params->dst_clear_color_as_input) {
|
||||
#if GEN_GEN >= 7
|
||||
/* In this case, the clear color isn't known statically and instead
|
||||
* comes in through an indirect which we have to copy into the vertex
|
||||
* buffer before we execute the 3DPRIMITIVE. We already copied the
|
||||
* value of params->wm_inputs.clear_color into the vertex buffer in the
|
||||
* loop above. Now we emit code to stomp it from the GPU with the
|
||||
* actual clear color value.
|
||||
*/
|
||||
assert(num_varyings == 1);
|
||||
|
||||
/* The clear color is the first thing after the header */
|
||||
struct blorp_address clear_color_input_addr = *addr;
|
||||
clear_color_input_addr.offset += 16;
|
||||
|
||||
const unsigned clear_color_size =
|
||||
GEN_GEN < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4;
|
||||
blorp_emit_memcpy(batch, clear_color_input_addr,
|
||||
params->dst.clear_color_addr,
|
||||
clear_color_size);
|
||||
#else
|
||||
unreachable("MCS partial resolve is not a thing on SNB and earlier");
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -298,6 +331,7 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
struct GENX(VERTEX_BUFFER_STATE) vb[3];
|
||||
uint32_t num_vbs = 2;
|
||||
memset(vb, 0, sizeof(vb));
|
||||
|
||||
struct blorp_address addr;
|
||||
@@ -308,15 +342,6 @@ blorp_emit_vertex_buffers(struct blorp_batch *batch,
|
||||
blorp_emit_input_varying_data(batch, params, &addr, &size);
|
||||
blorp_fill_vertex_buffer_state(batch, vb, 1, addr, size, 0);
|
||||
|
||||
uint32_t num_vbs = 2;
|
||||
if (params->dst_clear_color_as_input) {
|
||||
const unsigned clear_color_size =
|
||||
GEN_GEN < 10 ? batch->blorp->isl_dev->ss.clear_value_size : 4 * 4;
|
||||
blorp_fill_vertex_buffer_state(batch, vb, num_vbs++,
|
||||
params->dst.clear_color_addr,
|
||||
clear_color_size, 0);
|
||||
}
|
||||
|
||||
const unsigned num_dwords = 1 + num_vbs * GENX(VERTEX_BUFFER_STATE_length);
|
||||
uint32_t *dw = blorp_emitn(batch, GENX(3DSTATE_VERTEX_BUFFERS), num_dwords);
|
||||
if (!dw)
|
||||
@@ -449,49 +474,21 @@ blorp_emit_vertex_elements(struct blorp_batch *batch,
|
||||
};
|
||||
slot++;
|
||||
|
||||
if (params->dst_clear_color_as_input) {
|
||||
/* If the caller wants the destination indirect clear color, redirect
|
||||
* to vertex buffer 2 where we stored it earlier. The only users of
|
||||
* an indirect clear color source have that as their only vertex
|
||||
* attribute.
|
||||
*/
|
||||
assert(num_varyings == 1);
|
||||
for (unsigned i = 0; i < num_varyings; ++i) {
|
||||
ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
|
||||
.VertexBufferIndex = 2,
|
||||
.VertexBufferIndex = 1,
|
||||
.Valid = true,
|
||||
.SourceElementOffset = 0,
|
||||
.Component0Control = VFCOMP_STORE_SRC,
|
||||
#if GEN_GEN >= 9
|
||||
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
|
||||
.SourceElementOffset = 16 + i * 4 * sizeof(float),
|
||||
.Component0Control = VFCOMP_STORE_SRC,
|
||||
.Component1Control = VFCOMP_STORE_SRC,
|
||||
.Component2Control = VFCOMP_STORE_SRC,
|
||||
.Component3Control = VFCOMP_STORE_SRC,
|
||||
#else
|
||||
/* Clear colors on gen7-8 are for bits out of one dword */
|
||||
.SourceElementFormat = ISL_FORMAT_R32_FLOAT,
|
||||
.Component1Control = VFCOMP_STORE_0,
|
||||
.Component2Control = VFCOMP_STORE_0,
|
||||
.Component3Control = VFCOMP_STORE_0,
|
||||
#if GEN_GEN <= 5
|
||||
.DestinationElementOffset = slot * 4,
|
||||
#endif
|
||||
};
|
||||
slot++;
|
||||
} else {
|
||||
for (unsigned i = 0; i < num_varyings; ++i) {
|
||||
ve[slot] = (struct GENX(VERTEX_ELEMENT_STATE)) {
|
||||
.VertexBufferIndex = 1,
|
||||
.Valid = true,
|
||||
.SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
|
||||
.SourceElementOffset = 16 + i * 4 * sizeof(float),
|
||||
.Component0Control = VFCOMP_STORE_SRC,
|
||||
.Component1Control = VFCOMP_STORE_SRC,
|
||||
.Component2Control = VFCOMP_STORE_SRC,
|
||||
.Component3Control = VFCOMP_STORE_SRC,
|
||||
#if GEN_GEN <= 5
|
||||
.DestinationElementOffset = slot * 4,
|
||||
#endif
|
||||
};
|
||||
slot++;
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned num_dwords =
|
||||
@@ -1244,7 +1241,7 @@ blorp_emit_pipeline(struct blorp_batch *batch,
|
||||
|
||||
#endif /* GEN_GEN >= 6 */
|
||||
|
||||
#if GEN_GEN >= 7 && GEN_GEN < 10
|
||||
#if GEN_GEN >= 7
|
||||
static void
|
||||
blorp_emit_memcpy(struct blorp_batch *batch,
|
||||
struct blorp_address dst,
|
||||
|
@@ -126,6 +126,35 @@ brw_swizzle_immediate(enum brw_reg_type type, uint32_t x, unsigned swz)
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
brw_get_default_exec_size(struct brw_codegen *p)
|
||||
{
|
||||
return brw_inst_exec_size(p->devinfo, p->current);
|
||||
}
|
||||
|
||||
unsigned
|
||||
brw_get_default_group(struct brw_codegen *p)
|
||||
{
|
||||
if (p->devinfo->gen >= 6) {
|
||||
unsigned group = brw_inst_qtr_control(p->devinfo, p->current) * 8;
|
||||
if (p->devinfo->gen >= 7)
|
||||
group += brw_inst_nib_control(p->devinfo, p->current) * 4;
|
||||
return group;
|
||||
} else {
|
||||
unsigned qtr_control = brw_inst_qtr_control(p->devinfo, p->current);
|
||||
if (qtr_control == BRW_COMPRESSION_COMPRESSED)
|
||||
return 0;
|
||||
else
|
||||
return qtr_control * 8;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned
|
||||
brw_get_default_access_mode(struct brw_codegen *p)
|
||||
{
|
||||
return brw_inst_access_mode(p->devinfo, p->current);
|
||||
}
|
||||
|
||||
void
|
||||
brw_set_default_exec_size(struct brw_codegen *p, unsigned value)
|
||||
{
|
||||
|
@@ -107,6 +107,9 @@ struct brw_codegen {
|
||||
|
||||
void brw_pop_insn_state( struct brw_codegen *p );
|
||||
void brw_push_insn_state( struct brw_codegen *p );
|
||||
unsigned brw_get_default_exec_size(struct brw_codegen *p);
|
||||
unsigned brw_get_default_group(struct brw_codegen *p);
|
||||
unsigned brw_get_default_access_mode(struct brw_codegen *p);
|
||||
void brw_set_default_exec_size(struct brw_codegen *p, unsigned value);
|
||||
void brw_set_default_mask_control( struct brw_codegen *p, unsigned value );
|
||||
void brw_set_default_saturate( struct brw_codegen *p, bool enable );
|
||||
|
@@ -621,6 +621,101 @@ gen7_set_dp_scratch_message(struct brw_codegen *p,
|
||||
brw_inst_set_scratch_addr_offset(devinfo, inst, addr_offset);
|
||||
}
|
||||
|
||||
struct brw_insn_state {
|
||||
/* One of BRW_EXECUTE_* */
|
||||
unsigned exec_size:3;
|
||||
|
||||
/* Group in units of channels */
|
||||
unsigned group:5;
|
||||
|
||||
/* Compression control on gen4-5 */
|
||||
bool compressed:1;
|
||||
|
||||
/* One of BRW_MASK_* */
|
||||
unsigned mask_control:1;
|
||||
|
||||
bool saturate:1;
|
||||
|
||||
/* One of BRW_ALIGN_* */
|
||||
unsigned access_mode:1;
|
||||
|
||||
/* One of BRW_PREDICATE_* */
|
||||
enum brw_predicate predicate:4;
|
||||
|
||||
bool pred_inv:1;
|
||||
|
||||
/* Flag subreg. Bottom bit is subreg, top bit is reg */
|
||||
unsigned flag_subreg:2;
|
||||
|
||||
bool acc_wr_control:1;
|
||||
};
|
||||
|
||||
static struct brw_insn_state
|
||||
brw_inst_get_state(const struct gen_device_info *devinfo,
|
||||
const brw_inst *insn)
|
||||
{
|
||||
struct brw_insn_state state = { };
|
||||
|
||||
state.exec_size = brw_inst_exec_size(devinfo, insn);
|
||||
if (devinfo->gen >= 6) {
|
||||
state.group = brw_inst_qtr_control(devinfo, insn) * 8;
|
||||
if (devinfo->gen >= 7)
|
||||
state.group += brw_inst_nib_control(devinfo, insn) * 4;
|
||||
} else {
|
||||
unsigned qtr_control = brw_inst_qtr_control(devinfo, insn);
|
||||
if (qtr_control == BRW_COMPRESSION_COMPRESSED) {
|
||||
state.group = 0;
|
||||
state.compressed = true;
|
||||
} else {
|
||||
state.group = qtr_control * 8;
|
||||
state.compressed = false;
|
||||
}
|
||||
}
|
||||
state.access_mode = brw_inst_access_mode(devinfo, insn);
|
||||
state.mask_control = brw_inst_mask_control(devinfo, insn);
|
||||
state.saturate = brw_inst_saturate(devinfo, insn);
|
||||
state.predicate = brw_inst_pred_control(devinfo, insn);
|
||||
state.pred_inv = brw_inst_pred_inv(devinfo, insn);
|
||||
|
||||
state.flag_subreg = brw_inst_flag_subreg_nr(devinfo, insn);
|
||||
if (devinfo->gen >= 7)
|
||||
state.flag_subreg += brw_inst_flag_reg_nr(devinfo, insn) * 2;
|
||||
|
||||
if (devinfo->gen >= 6)
|
||||
state.acc_wr_control = brw_inst_acc_wr_control(devinfo, insn);
|
||||
|
||||
return state;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_inst_set_state(const struct gen_device_info *devinfo,
|
||||
brw_inst *insn,
|
||||
const struct brw_insn_state *state)
|
||||
{
|
||||
brw_inst_set_exec_size(devinfo, insn, state->exec_size);
|
||||
brw_inst_set_group(devinfo, insn, state->group);
|
||||
brw_inst_set_compression(devinfo, insn, state->compressed);
|
||||
brw_inst_set_access_mode(devinfo, insn, state->access_mode);
|
||||
brw_inst_set_mask_control(devinfo, insn, state->mask_control);
|
||||
brw_inst_set_saturate(devinfo, insn, state->saturate);
|
||||
brw_inst_set_pred_control(devinfo, insn, state->predicate);
|
||||
brw_inst_set_pred_inv(devinfo, insn, state->pred_inv);
|
||||
|
||||
if (is_3src(devinfo, brw_inst_opcode(devinfo, insn)) &&
|
||||
state->access_mode == BRW_ALIGN_16) {
|
||||
brw_inst_set_3src_a16_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
|
||||
if (devinfo->gen >= 7)
|
||||
brw_inst_set_3src_a16_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
|
||||
} else {
|
||||
brw_inst_set_flag_subreg_nr(devinfo, insn, state->flag_subreg % 2);
|
||||
if (devinfo->gen >= 7)
|
||||
brw_inst_set_flag_reg_nr(devinfo, insn, state->flag_subreg / 2);
|
||||
}
|
||||
|
||||
if (devinfo->gen >= 6)
|
||||
brw_inst_set_acc_wr_control(devinfo, insn, state->acc_wr_control);
|
||||
}
|
||||
|
||||
#define next_insn brw_next_insn
|
||||
brw_inst *
|
||||
brw_next_insn(struct brw_codegen *p, unsigned opcode)
|
||||
@@ -635,9 +730,14 @@ brw_next_insn(struct brw_codegen *p, unsigned opcode)
|
||||
|
||||
p->next_insn_offset += 16;
|
||||
insn = &p->store[p->nr_insn++];
|
||||
memcpy(insn, p->current, sizeof(*insn));
|
||||
|
||||
memset(insn, 0, sizeof(*insn));
|
||||
brw_inst_set_opcode(devinfo, insn, opcode);
|
||||
|
||||
/* Apply the default instruction state */
|
||||
struct brw_insn_state current = brw_inst_get_state(devinfo, p->current);
|
||||
brw_inst_set_state(devinfo, insn, ¤t);
|
||||
|
||||
return insn;
|
||||
}
|
||||
|
||||
@@ -997,7 +1097,7 @@ brw_MOV(struct brw_codegen *p, struct brw_reg dest, struct brw_reg src0)
|
||||
* each element twice.
|
||||
*/
|
||||
if (devinfo->gen == 7 && !devinfo->is_haswell &&
|
||||
brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1 &&
|
||||
brw_get_default_access_mode(p) == BRW_ALIGN_1 &&
|
||||
dest.type == BRW_REGISTER_TYPE_DF &&
|
||||
(src0.type == BRW_REGISTER_TYPE_F ||
|
||||
src0.type == BRW_REGISTER_TYPE_D ||
|
||||
@@ -1119,7 +1219,7 @@ brw_inst *
|
||||
brw_F32TO16(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
const bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
|
||||
const bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
|
||||
/* The F32TO16 instruction doesn't support 32-bit destination types in
|
||||
* Align1 mode, and neither does the Gen8 implementation in terms of a
|
||||
* converting MOV. Gen7 does zero out the high 16 bits in Align16 mode as
|
||||
@@ -1166,7 +1266,7 @@ brw_inst *
|
||||
brw_F16TO32(struct brw_codegen *p, struct brw_reg dst, struct brw_reg src)
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
bool align16 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_16;
|
||||
bool align16 = brw_get_default_access_mode(p) == BRW_ALIGN_16;
|
||||
|
||||
if (align16) {
|
||||
assert(src.type == BRW_REGISTER_TYPE_UD);
|
||||
@@ -1337,8 +1437,7 @@ gen6_IF(struct brw_codegen *p, enum brw_conditional_mod conditional,
|
||||
insn = next_insn(p, BRW_OPCODE_IF);
|
||||
|
||||
brw_set_dest(p, insn, brw_imm_w(0));
|
||||
brw_inst_set_exec_size(devinfo, insn,
|
||||
brw_inst_exec_size(devinfo, p->current));
|
||||
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
||||
brw_inst_set_gen6_jump_count(devinfo, insn, 0);
|
||||
brw_set_src0(p, insn, src0);
|
||||
brw_set_src1(p, insn, src1);
|
||||
@@ -1624,8 +1723,7 @@ brw_BREAK(struct brw_codegen *p)
|
||||
p->if_depth_in_loop[p->loop_stack_depth]);
|
||||
}
|
||||
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
||||
brw_inst_set_exec_size(devinfo, insn,
|
||||
brw_inst_exec_size(devinfo, p->current));
|
||||
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
||||
|
||||
return insn;
|
||||
}
|
||||
@@ -1650,8 +1748,7 @@ brw_CONT(struct brw_codegen *p)
|
||||
p->if_depth_in_loop[p->loop_stack_depth]);
|
||||
}
|
||||
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
||||
brw_inst_set_exec_size(devinfo, insn,
|
||||
brw_inst_exec_size(devinfo, p->current));
|
||||
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
||||
return insn;
|
||||
}
|
||||
|
||||
@@ -1671,8 +1768,7 @@ gen6_HALT(struct brw_codegen *p)
|
||||
}
|
||||
|
||||
brw_inst_set_qtr_control(devinfo, insn, BRW_COMPRESSION_NONE);
|
||||
brw_inst_set_exec_size(devinfo, insn,
|
||||
brw_inst_exec_size(devinfo, p->current));
|
||||
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
||||
return insn;
|
||||
}
|
||||
|
||||
@@ -1778,8 +1874,7 @@ brw_WHILE(struct brw_codegen *p)
|
||||
brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
|
||||
}
|
||||
|
||||
brw_inst_set_exec_size(devinfo, insn,
|
||||
brw_inst_exec_size(devinfo, p->current));
|
||||
brw_inst_set_exec_size(devinfo, insn, brw_get_default_exec_size(p));
|
||||
|
||||
} else {
|
||||
if (p->single_program_flow) {
|
||||
@@ -2207,7 +2302,7 @@ void brw_oword_block_read(struct brw_codegen *p,
|
||||
const unsigned target_cache =
|
||||
(devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_CONSTANT_CACHE :
|
||||
BRW_DATAPORT_READ_TARGET_DATA_CACHE);
|
||||
const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
|
||||
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
|
||||
|
||||
/* On newer hardware, offset is in units of owords. */
|
||||
if (devinfo->gen >= 6)
|
||||
@@ -2277,7 +2372,7 @@ void brw_fb_WRITE(struct brw_codegen *p,
|
||||
unsigned msg_type;
|
||||
struct brw_reg dest, src0;
|
||||
|
||||
if (brw_inst_exec_size(devinfo, p->current) >= BRW_EXECUTE_16)
|
||||
if (brw_get_default_exec_size(p) >= BRW_EXECUTE_16)
|
||||
dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
|
||||
else
|
||||
dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
|
||||
@@ -2330,7 +2425,7 @@ gen9_fb_READ(struct brw_codegen *p,
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
assert(devinfo->gen >= 9);
|
||||
const unsigned msg_subtype =
|
||||
brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16 ? 0 : 1;
|
||||
brw_get_default_exec_size(p) == BRW_EXECUTE_16 ? 0 : 1;
|
||||
brw_inst *insn = next_insn(p, BRW_OPCODE_SENDC);
|
||||
|
||||
brw_set_dest(p, insn, dst);
|
||||
@@ -2341,8 +2436,7 @@ gen9_fb_READ(struct brw_codegen *p,
|
||||
GEN6_SFID_DATAPORT_RENDER_CACHE,
|
||||
msg_length, true /* header_present */,
|
||||
response_length);
|
||||
brw_inst_set_rt_slot_group(devinfo, insn,
|
||||
brw_inst_qtr_control(devinfo, p->current) / 2);
|
||||
brw_inst_set_rt_slot_group(devinfo, insn, brw_get_default_group(p) / 16);
|
||||
|
||||
return insn;
|
||||
}
|
||||
@@ -2837,11 +2931,9 @@ brw_surface_payload_size(struct brw_codegen *p,
|
||||
bool has_simd4x2,
|
||||
bool has_simd16)
|
||||
{
|
||||
if (has_simd4x2 &&
|
||||
brw_inst_access_mode(p->devinfo, p->current) == BRW_ALIGN_16)
|
||||
if (has_simd4x2 && brw_get_default_access_mode(p) == BRW_ALIGN_16)
|
||||
return 1;
|
||||
else if (has_simd16 &&
|
||||
brw_inst_exec_size(p->devinfo, p->current) == BRW_EXECUTE_16)
|
||||
else if (has_simd16 && brw_get_default_exec_size(p) == BRW_EXECUTE_16)
|
||||
return 2 * num_channels;
|
||||
else
|
||||
return num_channels;
|
||||
@@ -2859,8 +2951,8 @@ brw_set_dp_untyped_atomic_message(struct brw_codegen *p,
|
||||
(response_expected ? 1 << 5 : 0); /* Return data expected */
|
||||
|
||||
if (devinfo->gen >= 8 || devinfo->is_haswell) {
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
|
||||
msg_control |= 1 << 4; /* SIMD8 mode */
|
||||
|
||||
brw_inst_set_dp_msg_type(devinfo, insn,
|
||||
@@ -2873,7 +2965,7 @@ brw_set_dp_untyped_atomic_message(struct brw_codegen *p,
|
||||
brw_inst_set_dp_msg_type(devinfo, insn,
|
||||
GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP);
|
||||
|
||||
if (brw_inst_exec_size(devinfo, p->current) != BRW_EXECUTE_16)
|
||||
if (brw_get_default_exec_size(p) != BRW_EXECUTE_16)
|
||||
msg_control |= 1 << 4; /* SIMD8 mode */
|
||||
}
|
||||
|
||||
@@ -2894,7 +2986,7 @@ brw_untyped_atomic(struct brw_codegen *p,
|
||||
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
||||
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
||||
GEN7_SFID_DATAPORT_DATA_CACHE);
|
||||
const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
|
||||
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
||||
/* Mask out unused components -- This is especially important in Align16
|
||||
* mode on generations that don't have native support for SIMD4x2 atomics,
|
||||
* because unused but enabled components will cause the dataport to perform
|
||||
@@ -2921,8 +3013,8 @@ brw_set_dp_untyped_surface_read_message(struct brw_codegen *p,
|
||||
/* Set mask of 32-bit channels to drop. */
|
||||
unsigned msg_control = 0xf & (0xf << num_channels);
|
||||
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
|
||||
msg_control |= 1 << 4; /* SIMD16 mode */
|
||||
else
|
||||
msg_control |= 2 << 4; /* SIMD8 mode */
|
||||
@@ -2965,8 +3057,8 @@ brw_set_dp_untyped_surface_write_message(struct brw_codegen *p,
|
||||
/* Set mask of 32-bit channels to drop. */
|
||||
unsigned msg_control = 0xf & (0xf << num_channels);
|
||||
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
|
||||
msg_control |= 1 << 4; /* SIMD16 mode */
|
||||
else
|
||||
msg_control |= 2 << 4; /* SIMD8 mode */
|
||||
@@ -2996,7 +3088,7 @@ brw_untyped_surface_write(struct brw_codegen *p,
|
||||
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
||||
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
||||
GEN7_SFID_DATAPORT_DATA_CACHE);
|
||||
const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
|
||||
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
||||
/* Mask out unused components -- See comment in brw_untyped_atomic(). */
|
||||
const unsigned mask = devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
|
||||
WRITEMASK_X : WRITEMASK_XYZW;
|
||||
@@ -3034,7 +3126,7 @@ brw_byte_scattered_read(struct brw_codegen *p,
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
assert(devinfo->gen > 7 || devinfo->is_haswell);
|
||||
assert(brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
|
||||
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
|
||||
const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
|
||||
|
||||
struct brw_inst *insn = brw_send_indirect_surface_message(
|
||||
@@ -3045,7 +3137,7 @@ brw_byte_scattered_read(struct brw_codegen *p,
|
||||
unsigned msg_control =
|
||||
brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
|
||||
|
||||
if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
|
||||
if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
|
||||
msg_control |= 1; /* SIMD16 mode */
|
||||
else
|
||||
msg_control |= 0; /* SIMD8 mode */
|
||||
@@ -3065,7 +3157,7 @@ brw_byte_scattered_write(struct brw_codegen *p,
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
assert(devinfo->gen > 7 || devinfo->is_haswell);
|
||||
assert(brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
|
||||
assert(brw_get_default_access_mode(p) == BRW_ALIGN_1);
|
||||
const unsigned sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
|
||||
|
||||
struct brw_inst *insn = brw_send_indirect_surface_message(
|
||||
@@ -3075,7 +3167,7 @@ brw_byte_scattered_write(struct brw_codegen *p,
|
||||
unsigned msg_control =
|
||||
brw_byte_scattered_data_element_from_bit_size(bit_size) << 2;
|
||||
|
||||
if (brw_inst_exec_size(devinfo, p->current) == BRW_EXECUTE_16)
|
||||
if (brw_get_default_exec_size(p) == BRW_EXECUTE_16)
|
||||
msg_control |= 1;
|
||||
else
|
||||
msg_control |= 0;
|
||||
@@ -3097,8 +3189,8 @@ brw_set_dp_typed_atomic_message(struct brw_codegen *p,
|
||||
(response_expected ? 1 << 5 : 0); /* Return data expected */
|
||||
|
||||
if (devinfo->gen >= 8 || devinfo->is_haswell) {
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if ((brw_get_default_group(p) / 8) % 2 == 1)
|
||||
msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
|
||||
|
||||
brw_inst_set_dp_msg_type(devinfo, insn,
|
||||
@@ -3112,7 +3204,7 @@ brw_set_dp_typed_atomic_message(struct brw_codegen *p,
|
||||
brw_inst_set_dp_msg_type(devinfo, insn,
|
||||
GEN7_DATAPORT_RC_TYPED_ATOMIC_OP);
|
||||
|
||||
if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
|
||||
if ((brw_get_default_group(p) / 8) % 2 == 1)
|
||||
msg_control |= 1 << 4; /* Use high 8 slots of the sample mask */
|
||||
}
|
||||
|
||||
@@ -3132,7 +3224,7 @@ brw_typed_atomic(struct brw_codegen *p,
|
||||
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
||||
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
||||
GEN6_SFID_DATAPORT_RENDER_CACHE);
|
||||
const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
|
||||
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
||||
/* Mask out unused components -- See comment in brw_untyped_atomic(). */
|
||||
const unsigned mask = align1 ? WRITEMASK_XYZW : WRITEMASK_X;
|
||||
struct brw_inst *insn = brw_send_indirect_surface_message(
|
||||
@@ -3155,8 +3247,8 @@ brw_set_dp_typed_surface_read_message(struct brw_codegen *p,
|
||||
unsigned msg_control = 0xf & (0xf << num_channels);
|
||||
|
||||
if (devinfo->gen >= 8 || devinfo->is_haswell) {
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if ((brw_get_default_group(p) / 8) % 2 == 1)
|
||||
msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
|
||||
else
|
||||
msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
|
||||
@@ -3165,8 +3257,8 @@ brw_set_dp_typed_surface_read_message(struct brw_codegen *p,
|
||||
brw_inst_set_dp_msg_type(devinfo, insn,
|
||||
HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ);
|
||||
} else {
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if ((brw_get_default_group(p) / 8) % 2 == 1)
|
||||
msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
|
||||
}
|
||||
|
||||
@@ -3210,8 +3302,8 @@ brw_set_dp_typed_surface_write_message(struct brw_codegen *p,
|
||||
unsigned msg_control = 0xf & (0xf << num_channels);
|
||||
|
||||
if (devinfo->gen >= 8 || devinfo->is_haswell) {
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if ((brw_get_default_group(p) / 8) % 2 == 1)
|
||||
msg_control |= 2 << 4; /* Use high 8 slots of the sample mask */
|
||||
else
|
||||
msg_control |= 1 << 4; /* Use low 8 slots of the sample mask */
|
||||
@@ -3221,8 +3313,8 @@ brw_set_dp_typed_surface_write_message(struct brw_codegen *p,
|
||||
HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE);
|
||||
|
||||
} else {
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_inst_qtr_control(devinfo, p->current) % 2 == 1)
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
if ((brw_get_default_group(p) / 8) % 2 == 1)
|
||||
msg_control |= 1 << 5; /* Use high 8 slots of the sample mask */
|
||||
}
|
||||
|
||||
@@ -3245,7 +3337,7 @@ brw_typed_surface_write(struct brw_codegen *p,
|
||||
const unsigned sfid = (devinfo->gen >= 8 || devinfo->is_haswell ?
|
||||
HSW_SFID_DATAPORT_DATA_CACHE_1 :
|
||||
GEN6_SFID_DATAPORT_RENDER_CACHE);
|
||||
const bool align1 = (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1);
|
||||
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
||||
/* Mask out unused components -- See comment in brw_untyped_atomic(). */
|
||||
const unsigned mask = (devinfo->gen == 7 && !devinfo->is_haswell && !align1 ?
|
||||
WRITEMASK_X : WRITEMASK_XYZW);
|
||||
@@ -3345,7 +3437,7 @@ brw_pixel_interpolator_query(struct brw_codegen *p,
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
struct brw_inst *insn;
|
||||
const uint16_t exec_size = brw_inst_exec_size(devinfo, p->current);
|
||||
const uint16_t exec_size = brw_get_default_exec_size(p);
|
||||
|
||||
/* brw_send_indirect_message will automatically use a direct send message
|
||||
* if data is actually immediate.
|
||||
@@ -3369,8 +3461,8 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
||||
struct brw_reg mask)
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
const unsigned exec_size = 1 << brw_inst_exec_size(devinfo, p->current);
|
||||
const unsigned qtr_control = brw_inst_qtr_control(devinfo, p->current);
|
||||
const unsigned exec_size = 1 << brw_get_default_exec_size(p);
|
||||
const unsigned qtr_control = brw_get_default_group(p) / 8;
|
||||
brw_inst *inst;
|
||||
|
||||
assert(devinfo->gen >= 7);
|
||||
@@ -3378,7 +3470,7 @@ brw_find_live_channel(struct brw_codegen *p, struct brw_reg dst,
|
||||
|
||||
brw_push_insn_state(p);
|
||||
|
||||
if (brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1) {
|
||||
if (brw_get_default_access_mode(p) == BRW_ALIGN_1) {
|
||||
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
|
||||
|
||||
if (devinfo->gen >= 8) {
|
||||
@@ -3485,7 +3577,7 @@ brw_broadcast(struct brw_codegen *p,
|
||||
struct brw_reg idx)
|
||||
{
|
||||
const struct gen_device_info *devinfo = p->devinfo;
|
||||
const bool align1 = brw_inst_access_mode(devinfo, p->current) == BRW_ALIGN_1;
|
||||
const bool align1 = brw_get_default_access_mode(p) == BRW_ALIGN_1;
|
||||
brw_inst *inst;
|
||||
|
||||
brw_push_insn_state(p);
|
||||
|
@@ -2039,7 +2039,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
assert(devinfo->gen == 6);
|
||||
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
|
||||
} else {
|
||||
brw_IF(p, brw_inst_exec_size(devinfo, p->current));
|
||||
brw_IF(p, brw_get_default_exec_size(p));
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -2051,7 +2051,7 @@ fs_generator::generate_code(const cfg_t *cfg, int dispatch_width)
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_DO:
|
||||
brw_DO(p, brw_inst_exec_size(devinfo, p->current));
|
||||
brw_DO(p, brw_get_default_exec_size(p));
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_BREAK:
|
||||
|
@@ -350,27 +350,27 @@ gen6_gs_visitor::emit_thread_end()
|
||||
int max_usable_mrf = FIRST_SPILL_MRF(devinfo->gen);
|
||||
|
||||
/* Issue the FF_SYNC message and obtain the initial VUE handle. */
|
||||
this->current_annotation = "gen6 thread end: ff_sync";
|
||||
|
||||
vec4_instruction *inst = NULL;
|
||||
if (prog->info.has_transform_feedback_varyings) {
|
||||
src_reg sol_temp(this, glsl_type::uvec4_type);
|
||||
emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
|
||||
dst_reg(this->svbi),
|
||||
this->vertex_count,
|
||||
this->prim_count,
|
||||
sol_temp);
|
||||
inst = emit(GS_OPCODE_FF_SYNC,
|
||||
dst_reg(this->temp), this->prim_count, this->svbi);
|
||||
} else {
|
||||
inst = emit(GS_OPCODE_FF_SYNC,
|
||||
dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
|
||||
}
|
||||
inst->base_mrf = base_mrf;
|
||||
|
||||
emit(CMP(dst_null_ud(), this->vertex_count, brw_imm_ud(0u), BRW_CONDITIONAL_G));
|
||||
emit(IF(BRW_PREDICATE_NORMAL));
|
||||
{
|
||||
this->current_annotation = "gen6 thread end: ff_sync";
|
||||
|
||||
vec4_instruction *inst;
|
||||
if (prog->info.has_transform_feedback_varyings) {
|
||||
src_reg sol_temp(this, glsl_type::uvec4_type);
|
||||
emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
|
||||
dst_reg(this->svbi),
|
||||
this->vertex_count,
|
||||
this->prim_count,
|
||||
sol_temp);
|
||||
inst = emit(GS_OPCODE_FF_SYNC,
|
||||
dst_reg(this->temp), this->prim_count, this->svbi);
|
||||
} else {
|
||||
inst = emit(GS_OPCODE_FF_SYNC,
|
||||
dst_reg(this->temp), this->prim_count, brw_imm_ud(0u));
|
||||
}
|
||||
inst->base_mrf = base_mrf;
|
||||
|
||||
/* Loop over all buffered vertices and emit URB write messages */
|
||||
this->current_annotation = "gen6 thread end: urb writes init";
|
||||
src_reg vertex(this, glsl_type::uint_type);
|
||||
@@ -414,7 +414,7 @@ gen6_gs_visitor::emit_thread_end()
|
||||
dst_reg reg = dst_reg(MRF, mrf);
|
||||
reg.type = output_reg[varying][0].type;
|
||||
data.type = reg.type;
|
||||
vec4_instruction *inst = emit(MOV(reg, data));
|
||||
inst = emit(MOV(reg, data));
|
||||
inst->force_writemask_all = true;
|
||||
|
||||
mrf++;
|
||||
@@ -460,7 +460,7 @@ gen6_gs_visitor::emit_thread_end()
|
||||
*
|
||||
* However, this would lead us to end the program with an ENDIF opcode,
|
||||
* which we want to avoid, so what we do is that we always request a new
|
||||
* VUE handle every time we do a URB WRITE, even for the last vertex we emit.
|
||||
* VUE handle every time, even if GS produces no output.
|
||||
* With this we make sure that whether we have emitted at least one vertex
|
||||
* or none at all, we have to finish the thread without writing to the URB,
|
||||
* which works for both cases by setting the COMPLETE and UNUSED flags in
|
||||
@@ -476,7 +476,7 @@ gen6_gs_visitor::emit_thread_end()
|
||||
emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
|
||||
}
|
||||
|
||||
vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
|
||||
inst = emit(GS_OPCODE_THREAD_END);
|
||||
inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
|
||||
inst->base_mrf = base_mrf;
|
||||
inst->mlen = 1;
|
||||
|
@@ -389,6 +389,9 @@ enum isl_format {
|
||||
ISL_FORMAT_GEN9_CCS_64BPP,
|
||||
ISL_FORMAT_GEN9_CCS_128BPP,
|
||||
|
||||
/* An upper bound on the supported format enumerations */
|
||||
ISL_NUM_FORMATS,
|
||||
|
||||
/* Hardware doesn't understand this out-of-band value */
|
||||
ISL_FORMAT_UNSUPPORTED = UINT16_MAX,
|
||||
};
|
||||
@@ -1422,6 +1425,8 @@ isl_device_get_sample_counts(struct isl_device *dev);
|
||||
static inline const struct isl_format_layout * ATTRIBUTE_CONST
|
||||
isl_format_get_layout(enum isl_format fmt)
|
||||
{
|
||||
assert(fmt != ISL_FORMAT_UNSUPPORTED);
|
||||
assert(fmt < ISL_NUM_FORMATS);
|
||||
return &isl_format_layouts[fmt];
|
||||
}
|
||||
|
||||
@@ -1430,7 +1435,7 @@ bool isl_format_is_valid(enum isl_format);
|
||||
static inline const char * ATTRIBUTE_CONST
|
||||
isl_format_get_name(enum isl_format fmt)
|
||||
{
|
||||
return isl_format_layouts[fmt].name;
|
||||
return isl_format_get_layout(fmt)->name;
|
||||
}
|
||||
|
||||
bool isl_format_supports_rendering(const struct gen_device_info *devinfo,
|
||||
@@ -1545,7 +1550,7 @@ isl_format_block_is_1x1x1(enum isl_format fmt)
|
||||
static inline bool
|
||||
isl_format_is_srgb(enum isl_format fmt)
|
||||
{
|
||||
return isl_format_layouts[fmt].colorspace == ISL_COLORSPACE_SRGB;
|
||||
return isl_format_get_layout(fmt)->colorspace == ISL_COLORSPACE_SRGB;
|
||||
}
|
||||
|
||||
enum isl_format isl_format_srgb_to_linear(enum isl_format fmt);
|
||||
@@ -1555,10 +1560,13 @@ isl_format_is_rgb(enum isl_format fmt)
|
||||
{
|
||||
if (isl_format_is_yuv(fmt))
|
||||
return false;
|
||||
return isl_format_layouts[fmt].channels.r.bits > 0 &&
|
||||
isl_format_layouts[fmt].channels.g.bits > 0 &&
|
||||
isl_format_layouts[fmt].channels.b.bits > 0 &&
|
||||
isl_format_layouts[fmt].channels.a.bits == 0;
|
||||
|
||||
const struct isl_format_layout *fmtl = isl_format_get_layout(fmt);
|
||||
|
||||
return fmtl->channels.r.bits > 0 &&
|
||||
fmtl->channels.g.bits > 0 &&
|
||||
fmtl->channels.b.bits > 0 &&
|
||||
fmtl->channels.a.bits == 0;
|
||||
}
|
||||
|
||||
enum isl_format isl_format_rgb_to_rgba(enum isl_format rgb) ATTRIBUTE_CONST;
|
||||
|
@@ -365,11 +365,19 @@ format_gen(const struct gen_device_info *devinfo)
|
||||
return devinfo->gen * 10 + (devinfo->is_g4x || devinfo->is_haswell) * 5;
|
||||
}
|
||||
|
||||
static bool
|
||||
format_info_exists(enum isl_format format)
|
||||
{
|
||||
assert(format != ISL_FORMAT_UNSUPPORTED);
|
||||
assert(format < ISL_NUM_FORMATS);
|
||||
return format < ARRAY_SIZE(format_info) && format_info[format].exists;
|
||||
}
|
||||
|
||||
bool
|
||||
isl_format_supports_rendering(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
return format_gen(devinfo) >= format_info[format].render_target;
|
||||
@@ -379,7 +387,7 @@ bool
|
||||
isl_format_supports_alpha_blending(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
return format_gen(devinfo) >= format_info[format].alpha_blend;
|
||||
@@ -389,7 +397,7 @@ bool
|
||||
isl_format_supports_sampling(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
if (devinfo->is_baytrail) {
|
||||
@@ -422,7 +430,7 @@ bool
|
||||
isl_format_supports_filtering(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
if (devinfo->is_baytrail) {
|
||||
@@ -455,7 +463,7 @@ bool
|
||||
isl_format_supports_vertex_fetch(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
/* For vertex fetch, Bay Trail supports the same set of formats as Haswell
|
||||
@@ -474,7 +482,7 @@ bool
|
||||
isl_format_supports_typed_writes(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
return format_gen(devinfo) >= format_info[format].typed_write;
|
||||
@@ -495,7 +503,7 @@ bool
|
||||
isl_format_supports_typed_reads(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
return format_gen(devinfo) >= format_info[format].typed_read;
|
||||
@@ -533,7 +541,7 @@ bool
|
||||
isl_format_supports_ccs_e(const struct gen_device_info *devinfo,
|
||||
enum isl_format format)
|
||||
{
|
||||
if (!format_info[format].exists)
|
||||
if (!format_info_exists(format))
|
||||
return false;
|
||||
|
||||
/* For simplicity, only report that a format supports CCS_E if blorp can
|
||||
|
@@ -312,6 +312,6 @@ isl_buffer_fill_image_param(const struct isl_device *dev,
|
||||
{
|
||||
*param = image_param_defaults;
|
||||
|
||||
param->stride[0] = isl_format_layouts[format].bpb / 8;
|
||||
param->stride[0] = isl_format_get_layout(format)->bpb / 8;
|
||||
param->size[0] = size / param->stride[0];
|
||||
}
|
||||
|
@@ -54,7 +54,7 @@ foreach g : [['40', isl_gen4_files], ['50', []], ['60', isl_gen6_files],
|
||||
['90', isl_gen9_files], ['100', []], ['110', []]]
|
||||
_gen = g[0]
|
||||
isl_gen_libs += static_library(
|
||||
'libisl_gen@0@'.format(_gen),
|
||||
'isl_gen@0@'.format(_gen),
|
||||
[g[1], isl_gen_files, gen_xml_pack],
|
||||
include_directories : [inc_common, inc_intel],
|
||||
c_args : [c_vis_args, no_override_init_args,
|
||||
|
@@ -36,7 +36,9 @@
|
||||
#include <valgrind.h>
|
||||
#include <memcheck.h>
|
||||
#define VG(x) x
|
||||
#ifndef NDEBUG
|
||||
#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))
|
||||
#endif
|
||||
#else
|
||||
#define VG(x)
|
||||
#endif
|
||||
|
@@ -311,18 +311,21 @@ anv_fence_impl_cleanup(struct anv_device *device,
|
||||
switch (impl->type) {
|
||||
case ANV_FENCE_TYPE_NONE:
|
||||
/* Dummy. Nothing to do */
|
||||
return;
|
||||
break;
|
||||
|
||||
case ANV_FENCE_TYPE_BO:
|
||||
anv_bo_pool_free(&device->batch_bo_pool, &impl->bo.bo);
|
||||
return;
|
||||
break;
|
||||
|
||||
case ANV_FENCE_TYPE_SYNCOBJ:
|
||||
anv_gem_syncobj_destroy(device, impl->syncobj);
|
||||
return;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid fence type");
|
||||
}
|
||||
|
||||
unreachable("Invalid fence type");
|
||||
impl->type = ANV_FENCE_TYPE_NONE;
|
||||
}
|
||||
|
||||
void anv_DestroyFence(
|
||||
@@ -359,10 +362,8 @@ VkResult anv_ResetFences(
|
||||
* first restored. The remaining operations described therefore
|
||||
* operate on the restored payload.
|
||||
*/
|
||||
if (fence->temporary.type != ANV_FENCE_TYPE_NONE) {
|
||||
if (fence->temporary.type != ANV_FENCE_TYPE_NONE)
|
||||
anv_fence_impl_cleanup(device, &fence->temporary);
|
||||
fence->temporary.type = ANV_FENCE_TYPE_NONE;
|
||||
}
|
||||
|
||||
struct anv_fence_impl *impl = &fence->permanent;
|
||||
|
||||
@@ -914,22 +915,25 @@ anv_semaphore_impl_cleanup(struct anv_device *device,
|
||||
case ANV_SEMAPHORE_TYPE_NONE:
|
||||
case ANV_SEMAPHORE_TYPE_DUMMY:
|
||||
/* Dummy. Nothing to do */
|
||||
return;
|
||||
break;
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_BO:
|
||||
anv_bo_cache_release(device, &device->bo_cache, impl->bo);
|
||||
return;
|
||||
break;
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_SYNC_FILE:
|
||||
close(impl->fd);
|
||||
return;
|
||||
break;
|
||||
|
||||
case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
|
||||
anv_gem_syncobj_destroy(device, impl->syncobj);
|
||||
return;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid semaphore type");
|
||||
}
|
||||
|
||||
unreachable("Invalid semaphore type");
|
||||
impl->type = ANV_SEMAPHORE_TYPE_NONE;
|
||||
}
|
||||
|
||||
void
|
||||
@@ -940,7 +944,6 @@ anv_semaphore_reset_temporary(struct anv_device *device,
|
||||
return;
|
||||
|
||||
anv_semaphore_impl_cleanup(device, &semaphore->temporary);
|
||||
semaphore->temporary.type = ANV_SEMAPHORE_TYPE_NONE;
|
||||
}
|
||||
|
||||
void anv_DestroySemaphore(
|
||||
|
@@ -104,7 +104,7 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', ['gen7_cmd_buffer.c']],
|
||||
['100', ['gen8_cmd_buffer.c']], ['110', ['gen8_cmd_buffer.c']]]
|
||||
_gen = g[0]
|
||||
libanv_gen_libs += static_library(
|
||||
'libanv_gen@0@'.format(_gen),
|
||||
'anv_gen@0@'.format(_gen),
|
||||
[anv_gen_files, g[1], block_entrypoints],
|
||||
include_directories : [
|
||||
inc_common, inc_compiler, inc_drm_uapi, inc_intel, inc_vulkan_util,
|
||||
|
@@ -969,11 +969,16 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
|
||||
} else {
|
||||
/* Move the batch to the end of the validation list */
|
||||
struct drm_i915_gem_exec_object2 tmp;
|
||||
struct brw_bo *tmp_bo;
|
||||
const unsigned index = batch->exec_count - 1;
|
||||
|
||||
tmp = *entry;
|
||||
*entry = batch->validation_list[index];
|
||||
batch->validation_list[index] = tmp;
|
||||
|
||||
tmp_bo = batch->exec_bos[0];
|
||||
batch->exec_bos[0] = batch->exec_bos[index];
|
||||
batch->exec_bos[index] = tmp_bo;
|
||||
}
|
||||
|
||||
ret = execbuffer(dri_screen->fd, batch, hw_ctx,
|
||||
|
@@ -915,8 +915,10 @@ miptree_create_for_planar_image(struct brw_context *brw,
|
||||
image->strides[index],
|
||||
tiling,
|
||||
MIPTREE_CREATE_NO_AUX);
|
||||
if (mt == NULL)
|
||||
if (mt == NULL) {
|
||||
intel_miptree_release(&planar_mt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
mt->target = target;
|
||||
|
||||
|
@@ -335,6 +335,10 @@ modifier_is_supported(const struct gen_device_info *devinfo,
|
||||
}
|
||||
|
||||
mesa_format format = driImageFormatToGLFormat(dri_format);
|
||||
/* Whether or not we support compression is based on the RGBA non-sRGB
|
||||
* version of the format.
|
||||
*/
|
||||
format = _mesa_format_fallback_rgbx_to_rgba(format);
|
||||
format = _mesa_get_srgb_format_linear(format);
|
||||
if (!isl_format_supports_ccs_e(devinfo,
|
||||
brw_isl_format_for_mesa_format(format)))
|
||||
@@ -1088,6 +1092,11 @@ intel_create_image_from_fds_common(__DRIscreen *dri_screen,
|
||||
image->strides[index] = strides[index];
|
||||
|
||||
mesa_format format = driImageFormatToGLFormat(f->planes[i].dri_format);
|
||||
/* The images we will create are actually based on the RGBA non-sRGB
|
||||
* version of the format.
|
||||
*/
|
||||
format = _mesa_format_fallback_rgbx_to_rgba(format);
|
||||
format = _mesa_get_srgb_format_linear(format);
|
||||
|
||||
ok = isl_surf_init(&screen->isl_dev, &surf,
|
||||
.dim = ISL_SURF_DIM_2D,
|
||||
@@ -1255,24 +1264,35 @@ intel_create_image_from_dma_bufs(__DRIscreen *dri_screen,
|
||||
loaderPrivate);
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_image_format_is_supported(const struct intel_image_format *fmt)
|
||||
{
|
||||
if (fmt->fourcc == __DRI_IMAGE_FOURCC_SARGB8888)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static GLboolean
|
||||
intel_query_dma_buf_formats(__DRIscreen *screen, int max,
|
||||
int *formats, int *count)
|
||||
{
|
||||
int i, j = 0;
|
||||
int num_formats = 0, i;
|
||||
|
||||
if (max == 0) {
|
||||
*count = ARRAY_SIZE(intel_image_formats) - 1; /* not SARGB */
|
||||
return true;
|
||||
for (i = 0; i < ARRAY_SIZE(intel_image_formats); i++) {
|
||||
if (!intel_image_format_is_supported(&intel_image_formats[i]))
|
||||
continue;
|
||||
|
||||
num_formats++;
|
||||
if (max == 0)
|
||||
continue;
|
||||
|
||||
formats[num_formats - 1] = intel_image_formats[i].fourcc;
|
||||
if (num_formats >= max)
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < (ARRAY_SIZE(intel_image_formats)) && j < max; i++) {
|
||||
if (intel_image_formats[i].fourcc == __DRI_IMAGE_FOURCC_SARGB8888)
|
||||
continue;
|
||||
formats[j++] = intel_image_formats[i].fourcc;
|
||||
}
|
||||
|
||||
*count = j;
|
||||
*count = num_formats;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1290,6 +1310,9 @@ intel_query_dma_buf_modifiers(__DRIscreen *_screen, int fourcc, int max,
|
||||
if (f == NULL)
|
||||
return false;
|
||||
|
||||
if (!intel_image_format_is_supported(f))
|
||||
return false;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(supported_modifiers); i++) {
|
||||
uint64_t modifier = supported_modifiers[i].modifier;
|
||||
if (!modifier_is_supported(&screen->devinfo, f, 0, modifier))
|
||||
|
@@ -138,7 +138,7 @@ files_i965 = files(
|
||||
i965_gen_libs = []
|
||||
foreach v : ['40', '45', '50', '60', '70', '75', '80', '90', '100', '110']
|
||||
i965_gen_libs += static_library(
|
||||
'libi965_gen@0@'.format(v),
|
||||
'i965_gen@0@'.format(v),
|
||||
['genX_blorp_exec.c', 'genX_state_upload.c', gen_xml_pack],
|
||||
include_directories : [inc_common, inc_intel, inc_dri_common],
|
||||
c_args : [
|
||||
|
@@ -1969,7 +1969,7 @@ _mesa_GetInteger64v(GLenum pname, GLint64 *params)
|
||||
|
||||
case TYPE_INT_N:
|
||||
for (i = 0; i < v.value_int_n.n; i++)
|
||||
params[i] = INT_TO_BOOLEAN(v.value_int_n.ints[i]);
|
||||
params[i] = v.value_int_n.ints[i];
|
||||
break;
|
||||
|
||||
case TYPE_UINT_4:
|
||||
|
@@ -33,6 +33,8 @@
|
||||
#include "compiler/glsl/serialize.h"
|
||||
#include "main/errors.h"
|
||||
#include "main/mtypes.h"
|
||||
#include "main/shaderapi.h"
|
||||
#include "util/bitscan.h"
|
||||
#include "util/crc32.h"
|
||||
#include "program_binary.h"
|
||||
#include "program/prog_parameter.h"
|
||||
@@ -282,10 +284,39 @@ _mesa_program_binary(struct gl_context *ctx, struct gl_shader_program *sh_prog,
|
||||
struct blob_reader blob;
|
||||
blob_reader_init(&blob, payload, length - header_size);
|
||||
|
||||
unsigned programs_in_use = 0;
|
||||
if (ctx->_Shader)
|
||||
for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) {
|
||||
if (ctx->_Shader->CurrentProgram[stage] &&
|
||||
ctx->_Shader->CurrentProgram[stage]->Id == sh_prog->Name) {
|
||||
programs_in_use |= 1 << stage;
|
||||
}
|
||||
}
|
||||
|
||||
if (!read_program_payload(ctx, &blob, binary_format, sh_prog)) {
|
||||
sh_prog->data->LinkStatus = LINKING_FAILURE;
|
||||
return;
|
||||
}
|
||||
|
||||
/* From section 7.3 (Program Objects) of the OpenGL 4.5 spec:
|
||||
*
|
||||
* "If LinkProgram or ProgramBinary successfully re-links a program
|
||||
* object that is active for any shader stage, then the newly generated
|
||||
* executable code will be installed as part of the current rendering
|
||||
* state for all shader stages where the program is active.
|
||||
* Additionally, the newly generated executable code is made part of
|
||||
* the state of any program pipeline for all stages where the program
|
||||
* is attached."
|
||||
*/
|
||||
while (programs_in_use) {
|
||||
const int stage = u_bit_scan(&programs_in_use);
|
||||
|
||||
struct gl_program *prog = NULL;
|
||||
if (sh_prog->_LinkedShaders[stage])
|
||||
prog = sh_prog->_LinkedShaders[stage]->Program;
|
||||
|
||||
_mesa_use_program(ctx, stage, sh_prog, prog, ctx->_Shader);
|
||||
}
|
||||
|
||||
sh_prog->data->LinkStatus = LINKING_SKIPPED;
|
||||
}
|
||||
|
@@ -32,6 +32,7 @@
|
||||
#define BITSET_H
|
||||
|
||||
#include "util/bitscan.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
/****************************************************************************
|
||||
* generic bitset implementation
|
||||
|
@@ -1235,9 +1235,6 @@ x11_swapchain_destroy(struct wsi_swapchain *anv_chain,
|
||||
struct x11_swapchain *chain = (struct x11_swapchain *)anv_chain;
|
||||
xcb_void_cookie_t cookie;
|
||||
|
||||
for (uint32_t i = 0; i < chain->base.image_count; i++)
|
||||
x11_image_finish(chain, pAllocator, &chain->images[i]);
|
||||
|
||||
if (chain->threaded) {
|
||||
chain->status = VK_ERROR_OUT_OF_DATE_KHR;
|
||||
/* Push a UINT32_MAX to wake up the manager */
|
||||
@@ -1247,6 +1244,9 @@ x11_swapchain_destroy(struct wsi_swapchain *anv_chain,
|
||||
wsi_queue_destroy(&chain->present_queue);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < chain->base.image_count; i++)
|
||||
x11_image_finish(chain, pAllocator, &chain->images[i]);
|
||||
|
||||
xcb_unregister_for_special_event(chain->conn, chain->special_event);
|
||||
cookie = xcb_present_select_input_checked(chain->conn, chain->event_id,
|
||||
chain->window,
|
||||
|
Reference in New Issue
Block a user