Compare commits
83 Commits
mesa-17.1.
...
mesa-17.1.
Author | SHA1 | Date | |
---|---|---|---|
|
e60d010ef4 | ||
|
9bb0350752 | ||
|
03a0fdbdea | ||
|
02b5d52bc3 | ||
|
0069a613ac | ||
|
b7ad1fc50b | ||
|
2cce0c2971 | ||
|
065643fe67 | ||
|
f47380a530 | ||
|
8943444b03 | ||
|
8d5debbcc6 | ||
|
c2ed693501 | ||
|
f9c7c9f48a | ||
|
3700dc5f97 | ||
|
e8eb5e80f1 | ||
|
62cbd6d1dd | ||
|
84f756994f | ||
|
940e50f754 | ||
|
8b7ba85705 | ||
|
ccf9de7a59 | ||
|
4c2da8e40d | ||
|
352263385b | ||
|
9eadeb7964 | ||
|
c56792f758 | ||
|
bf425b48ea | ||
|
83c15002fb | ||
|
a49cad4896 | ||
|
0d8ed99411 | ||
|
6e9b965e4e | ||
|
f174362ce7 | ||
|
365c326861 | ||
|
6af4b0fdf3 | ||
|
b53e528e18 | ||
|
527cbf088e | ||
|
6227edd978 | ||
|
fae8aeae44 | ||
|
604304d528 | ||
|
7872aae4b7 | ||
|
acd0a0a926 | ||
|
1ef236050c | ||
|
f1d6d6bb1b | ||
|
e8368dbd6b | ||
|
c0f7d34247 | ||
|
e3a2ff07e7 | ||
|
0889282a7d | ||
|
d0201078d5 | ||
|
3ec9e03501 | ||
|
175b0442df | ||
|
7d4dcbcad9 | ||
|
ea294dd259 | ||
|
1161debb60 | ||
|
6e7d5532f3 | ||
|
3427a2e52e | ||
|
8714f8da9d | ||
|
decd6b4c34 | ||
|
f60875e211 | ||
|
5ab872d64a | ||
|
9bc4ee1c8e | ||
|
b708c2961e | ||
|
538975fdf8 | ||
|
3734a7de6c | ||
|
423dab9d32 | ||
|
18fd7249c5 | ||
|
f66de22af4 | ||
|
8bd7d8c042 | ||
|
ffb46c8826 | ||
|
c8226d3782 | ||
|
6f062ba893 | ||
|
891dafc8e7 | ||
|
2a7279fa8f | ||
|
ae960d7dee | ||
|
3a193c009b | ||
|
4889bb6af3 | ||
|
845c238ce2 | ||
|
67acca073a | ||
|
7b10ed6a12 | ||
|
5a8d7ef65a | ||
|
98564569d0 | ||
|
6348a02e27 | ||
|
c24bdf046e | ||
|
15b5e5996a | ||
|
8cfaa8ad66 | ||
|
4908b1e909 |
@@ -59,6 +59,7 @@ LOCAL_CFLAGS += \
|
||||
-DHAVE_PTHREAD=1 \
|
||||
-DHAVE_DLOPEN \
|
||||
-DHAVE_DL_ITERATE_PHDR \
|
||||
-DMAJOR_IN_SYSMACROS \
|
||||
-fvisibility=hidden \
|
||||
-Wno-sign-compare
|
||||
|
||||
|
@@ -1,4 +1,14 @@
|
||||
# This commit depends on 9fd9a7d0ba3 and 678d568c7b2, neither of which is in branch.
|
||||
# stable: This commit depends on 9fd9a7d0ba3 and 678d568c7b2, neither
|
||||
# of which is in branch.
|
||||
b84b631c6381d9b36bca5d0e7cc67dd23af188c1 radeonsi: load patch_id for TES-as-ES when exporting for PS
|
||||
# This commit addressed an earlier commit 126d5ad which did not land in branch.
|
||||
# fixes: This commit addressed an earlier commit 126d5ad which did not
|
||||
# land in branch.
|
||||
9da104593386f6e8ddec8f0d9d288aceb8908fe1 radv: fix regression in descriptor set freeing.
|
||||
# stable: This commit addressed an earlier commit 944455217b which did
|
||||
# not land in branch.
|
||||
b28938ffce0580e89e6012826900da2b6013b0df st/glsl_to_tgsi: use correct writemask when converting generic intrinsics
|
||||
# stable: This commit depends on 330d0607e and 61d8f3387d, neither of
|
||||
# which is in branch.
|
||||
c12f8305a8ae4fd5d78a9ab8bbda790a711d5bed nv50,nvc0: remove IDX from bufctx immediately, to avoid conflicts with clear
|
||||
# fixes: Genuine false positive.
|
||||
5d87667fed1bd5ab850abdfb3a10db8c8c21c330 bin/get-fixes-pick-list.sh: better identify multiple "fixes:" tags" has more than one Fixes tag
|
||||
|
15
configure.ac
15
configure.ac
@@ -97,7 +97,7 @@ XSHMFENCE_REQUIRED=1.1
|
||||
XVMC_REQUIRED=1.0.6
|
||||
PYTHON_MAKO_REQUIRED=0.8.0
|
||||
LIBSENSORS_REQUIRED=4.0.0
|
||||
ZLIB_REQUIRED=1.2.8
|
||||
ZLIB_REQUIRED=1.2.3
|
||||
|
||||
dnl LLVM versions
|
||||
LLVM_REQUIRED_GALLIUM=3.3.0
|
||||
@@ -837,6 +837,11 @@ dnl is not valid for that platform.
|
||||
if test "x$android" = xno; then
|
||||
test -z "$PTHREAD_LIBS" && PTHREAD_LIBS="-lpthread"
|
||||
fi
|
||||
dnl According to the manual when using pthreads, one should add -pthread to
|
||||
dnl both compile and link-time arguments.
|
||||
dnl In practise that should be sufficient for all platforms, since any
|
||||
dnl platforms build with GCC and Clang support the flag.
|
||||
PTHREAD_LIBS="$PTHREAD_LIBS -pthread"
|
||||
|
||||
dnl pthread-stubs is mandatory on BSD platforms, due to the nature of the
|
||||
dnl project. Even then there's a notable issue as described in the project README
|
||||
@@ -2476,10 +2481,10 @@ if test -n "$with_gallium_drivers"; then
|
||||
xswr)
|
||||
llvm_require_version $LLVM_REQUIRED_SWR "swr"
|
||||
|
||||
swr_require_cxx_feature_flags "C++14" "__cplusplus >= 201402L" \
|
||||
"-std=c++14" \
|
||||
SWR_CXX14_CXXFLAGS
|
||||
AC_SUBST([SWR_CXX14_CXXFLAGS])
|
||||
swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
|
||||
",-std=c++11" \
|
||||
SWR_CXX11_CXXFLAGS
|
||||
AC_SUBST([SWR_CXX11_CXXFLAGS])
|
||||
|
||||
swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
|
||||
",-mavx,-march=core-avx" \
|
||||
|
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
0d2020c2115db0d13a5be0075abf0da143290f69f5817a2f277861e89166a3e1 mesa-17.1.2.tar.gz
|
||||
0937804f43746339b1f9540d8f9c8b4a1bb3d3eec0e4020eac283b8799798239 mesa-17.1.2.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
156
docs/relnotes/17.1.3.html
Normal file
156
docs/relnotes/17.1.3.html
Normal file
@@ -0,0 +1,156 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.1.3 Release Notes / June 19, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.1.3 is a bug fix release which fixes bugs found since the 17.1.2 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 17.1.3 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
81ae9127286ff8d631e466d258608d6dea9854fe7bee2e8521da44c7544f01e5 mesa-17.1.3.tar.gz
|
||||
5f1ee9a8aea2880f887884df2dea0c16dd1b13eb42fd2e52265db0dc1b380e8c mesa-17.1.3.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100988">Bug 100988</a> - glXGetCurrentDisplay() no longer works for FakeGLX contexts?</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Bas Nieuwenhuizen (3):</p>
|
||||
<ul>
|
||||
<li>radv: Set both compute and graphics SGPRS on descriptor set flush.</li>
|
||||
<li>radv: Dirty all descriptors sets when changing the pipeline.</li>
|
||||
<li>radv: Remove SI num RB override for occlusion queries.</li>
|
||||
</ul>
|
||||
|
||||
<p>Brian Paul (1):</p>
|
||||
<ul>
|
||||
<li>xlib: fix glXGetCurrentDisplay() failure</li>
|
||||
</ul>
|
||||
|
||||
<p>Chad Versace (1):</p>
|
||||
<ul>
|
||||
<li>i965/dri: Fix bad GL error in intel_create_winsys_renderbuffer()</li>
|
||||
</ul>
|
||||
|
||||
<p>Chuck Atkins (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: Reduce zlib requirement from 1.2.8 to 1.2.3.</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (3):</p>
|
||||
<ul>
|
||||
<li>radv: expose integrated device type for APUs.</li>
|
||||
<li>radv: set fmask state to all 0s when no fmask. (v2)</li>
|
||||
<li>glsl/lower_distance: only set max_array_access for 1D clip dist arrays</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (1):</p>
|
||||
<ul>
|
||||
<li>Update version to 17.1.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Grazvydas Ignotas (1):</p>
|
||||
<ul>
|
||||
<li>radv: fix trace dumping for !use_ib_bos</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (4):</p>
|
||||
<ul>
|
||||
<li>i965/blorp: Take a layer range in intel_hiz_exec</li>
|
||||
<li>i965: Move the pre-depth-clear flush/stalls to intel_hiz_exec</li>
|
||||
<li>i965: Perform HiZ flush/stall prior to HiZ resolves</li>
|
||||
<li>i965: Mark depth surfaces as needing a HiZ resolve after blitting</li>
|
||||
</ul>
|
||||
|
||||
<p>José Fonseca (1):</p>
|
||||
<ul>
|
||||
<li>automake: Link all libGL.so variants with -Bsymbolic.</li>
|
||||
</ul>
|
||||
|
||||
<p>Juan A. Suarez Romero (1):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 17.1.2</li>
|
||||
</ul>
|
||||
|
||||
<p>Lucas Stach (1):</p>
|
||||
<ul>
|
||||
<li>etnaviv: always do cpu_fini in transfer_unmap</li>
|
||||
</ul>
|
||||
|
||||
<p>Lyude (1):</p>
|
||||
<ul>
|
||||
<li>nvc0: disable BGRA8 images on Fermi</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (3):</p>
|
||||
<ul>
|
||||
<li>st/mesa: don't load cached TGSI shaders on demand</li>
|
||||
<li>radeonsi: fix a GPU hang with tessellation on 2-CU configs</li>
|
||||
<li>radeonsi: disable the patch ID workaround on SI when the patch ID isn't used (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (1):</p>
|
||||
<ul>
|
||||
<li>radv: fewer than 8 RBs are possible</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolas Dechesne (1):</p>
|
||||
<ul>
|
||||
<li>util/rand_xor: add missing include statements</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (1):</p>
|
||||
<ul>
|
||||
<li>egl: fix _eglQuerySurface in EGL_BUFFER_AGE_EXT case</li>
|
||||
</ul>
|
||||
|
||||
<p>Thomas Hellstrom (1):</p>
|
||||
<ul>
|
||||
<li>dri3/GLX: Fix drawable invalidation v2</li>
|
||||
</ul>
|
||||
|
||||
<p>Tim Rowley (1):</p>
|
||||
<ul>
|
||||
<li>swr: relax c++ requirement from c++14 to c++11</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
219
docs/relnotes/17.1.4.html
Normal file
219
docs/relnotes/17.1.4.html
Normal file
@@ -0,0 +1,219 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.1.4 Release Notes / June 30, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.1.4 is a bug fix release which fixes bugs found since the 17.1.3 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 17.1.4 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77240">Bug 77240</a> - khrplatform.h not installed if EGL is disabled</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95530">Bug 95530</a> - Stellaris - colored overlay of sectors doesn't render on i965</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96958">Bug 96958</a> - [SKL] Improper rendering in Europa Universalis IV</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99467">Bug 99467</a> - [radv] DOOM 2016 + wine. Green screen everywhere (but can be started)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101071">Bug 101071</a> - compiling glsl fails with undefined reference to `pthread_create'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101252">Bug 101252</a> - eglGetDisplay() is not thread safe</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101294">Bug 101294</a> - radeonsi minecraft forge splash freeze since 17.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101451">Bug 101451</a> - [G33] ES2-CTS.functional.clipping.polygon regression</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Alex Deucher (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: add new polaris12 pci id</li>
|
||||
</ul>
|
||||
|
||||
<p>Andres Gomez (3):</p>
|
||||
<ul>
|
||||
<li>cherry-ignore: 17.1.4 rejected commits</li>
|
||||
<li>cherry-ignore: bin/get-fixes-pick-list.sh: better identify multiple "fixes:" tags</li>
|
||||
<li>Update version to 17.1.4</li>
|
||||
</ul>
|
||||
|
||||
<p>Anuj Phogat (2):</p>
|
||||
<ul>
|
||||
<li>i965: Add and initialize l3_banks field for gen7+</li>
|
||||
<li>i965: Fix broxton 2x6 l3 config</li>
|
||||
</ul>
|
||||
|
||||
<p>Ben Crocker (1):</p>
|
||||
<ul>
|
||||
<li>egl_dri2: swrastGetDrawableInfo: set *x, common.py [v2]</li>
|
||||
</ul>
|
||||
|
||||
<p>Brian Paul (2):</p>
|
||||
<ul>
|
||||
<li>svga: check return value from svga_set_shader( SVGA3D_SHADERTYPE_GS, NULL)</li>
|
||||
<li>gallium/vbuf: avoid segfault when we get invalid glDrawRangeElements()</li>
|
||||
</ul>
|
||||
|
||||
<p>Chad Versace (1):</p>
|
||||
<ul>
|
||||
<li>egl/android: Change order of EGLConfig generation (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Chandu Babu N (1):</p>
|
||||
<ul>
|
||||
<li>change va max_entrypoints</li>
|
||||
</ul>
|
||||
|
||||
<p>Charmaine Lee (1):</p>
|
||||
<ul>
|
||||
<li>svga: use the winsys interface to invalidate surface</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (3):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 17.1.3</li>
|
||||
<li>configure.ac: add -pthread to PTHREAD_LIBS</li>
|
||||
<li>radeonsi: include ac_binary.h for struct ac_shader_binary</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (3):</p>
|
||||
<ul>
|
||||
<li>egl: properly count configs</li>
|
||||
<li>egl/display: only detect the platform once</li>
|
||||
<li>egl/display: make platform detection thread-safe</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Le Bihan (1):</p>
|
||||
<ul>
|
||||
<li>Fix khrplatform.h not installed if EGL is disabled.</li>
|
||||
</ul>
|
||||
|
||||
<p>Iago Toral Quiroga (1):</p>
|
||||
<ul>
|
||||
<li>i965: update MaxTextureRectSize to match PRMs and comply with OpenGL 4.1+</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (2):</p>
|
||||
<ul>
|
||||
<li>nv50/ir: fetch indirect sources BEFORE the op that uses them</li>
|
||||
<li>nv50/ir: fix combineLd/St to update existing records as necessary</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (10):</p>
|
||||
<ul>
|
||||
<li>i965: Flush around state base address</li>
|
||||
<li>i965: Take a uint64_t immediate in emit_pipe_control_write</li>
|
||||
<li>i965: Unify the two emit_pipe_control functions</li>
|
||||
<li>i965: Do an end-of-pipe sync prior to STATE_BASE_ADDRESS</li>
|
||||
<li>i965/blorp: Do an end-of-pipe sync around CCS ops</li>
|
||||
<li>i965: Do an end-of-pipe sync after flushes</li>
|
||||
<li>i965: Disable the interleaved vertex optimization when instancing</li>
|
||||
<li>i965: Set step_rate = 0 for interleaved vertex buffers</li>
|
||||
<li>spirv: Work around the Doom shader bug</li>
|
||||
<li>i965: Clamp clear colors to the representable range</li>
|
||||
</ul>
|
||||
|
||||
<p>Jonas Kulla (1):</p>
|
||||
<ul>
|
||||
<li>anv: Fix L3 cache programming on Bay Trail</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (1):</p>
|
||||
<ul>
|
||||
<li>i965: Ignore anisotropic filtering in nearest mode.</li>
|
||||
</ul>
|
||||
|
||||
<p>Lucas Stach (7):</p>
|
||||
<ul>
|
||||
<li>etnaviv: don't try RS blit if blit region is unaligned</li>
|
||||
<li>etnaviv: use padded width/height for resource copies</li>
|
||||
<li>etnaviv: remove bogus assert</li>
|
||||
<li>etnaviv: replace translate_clear_color with util_pack_color</li>
|
||||
<li>etnaviv: mask correct channel for RB swapped rendertargets</li>
|
||||
<li>etnaviv: advertise correct max LOD bias</li>
|
||||
<li>etnaviv: only flush resource to self if no scanout buffer exists</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (4):</p>
|
||||
<ul>
|
||||
<li>winsys/amdgpu: fix a deadlock when waiting for submission_in_progress</li>
|
||||
<li>mesa: flush vertices before changing viewports</li>
|
||||
<li>mesa: flush vertices before updating ctx->_Shader</li>
|
||||
<li>st/mesa: fix pipe_rasterizer_state::scissor with multiple viewports</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>gallium/util: Break recursion in pipe_resource_reference</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (2):</p>
|
||||
<ul>
|
||||
<li>gallium/radeon/gfx9: fix PBO texture uploads to compressed textures</li>
|
||||
<li>amd/common: fix off-by-one in sid_tables.py</li>
|
||||
</ul>
|
||||
|
||||
<p>Pierre Moreau (1):</p>
|
||||
<ul>
|
||||
<li>nv50/ir: Properly fold constants in SPLIT operation</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Herring (1):</p>
|
||||
<ul>
|
||||
<li>Android: major/minor/makedev live in <sys/sysmacros.h></li>
|
||||
</ul>
|
||||
|
||||
<p>Topi Pohjolainen (2):</p>
|
||||
<ul>
|
||||
<li>i965: Add an end-of-pipe sync helper</li>
|
||||
<li>i965/gen4: Set depth offset when there is stencil attachment only</li>
|
||||
</ul>
|
||||
|
||||
<p>Ville Syrjälä (2):</p>
|
||||
<ul>
|
||||
<li>i915: Fix gl_Fragcoord interpolation</li>
|
||||
<li>i915: Fix wpos_tex vs. -1 comparison</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -213,6 +213,7 @@ CHIPSET(0x6985, POLARIS12_, POLARIS12)
|
||||
CHIPSET(0x6986, POLARIS12_, POLARIS12)
|
||||
CHIPSET(0x6987, POLARIS12_, POLARIS12)
|
||||
CHIPSET(0x6995, POLARIS12_, POLARIS12)
|
||||
CHIPSET(0x6997, POLARIS12_, POLARIS12)
|
||||
CHIPSET(0x699F, POLARIS12_, POLARIS12)
|
||||
|
||||
CHIPSET(0x6860, VEGA10_, VEGA10)
|
||||
|
@@ -110,7 +110,7 @@ class IntTable:
|
||||
[static] const typename name[] = { ... };
|
||||
to filp.
|
||||
"""
|
||||
idxs = sorted(self.idxs) + [-1]
|
||||
idxs = sorted(self.idxs) + [len(self.table)]
|
||||
|
||||
fragments = [
|
||||
('\t/* %s */ %s' % (
|
||||
|
@@ -1268,38 +1268,39 @@ emit_stage_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
static void
|
||||
radv_emit_descriptor_set_userdata(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_pipeline *pipeline,
|
||||
VkShaderStageFlags stages,
|
||||
struct radv_descriptor_set *set,
|
||||
unsigned idx)
|
||||
{
|
||||
if (stages & VK_SHADER_STAGE_FRAGMENT_BIT)
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_FRAGMENT);
|
||||
if (cmd_buffer->state.pipeline) {
|
||||
if (stages & VK_SHADER_STAGE_FRAGMENT_BIT)
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_FRAGMENT);
|
||||
|
||||
if (stages & VK_SHADER_STAGE_VERTEX_BIT)
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_VERTEX);
|
||||
if (stages & VK_SHADER_STAGE_VERTEX_BIT)
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_VERTEX);
|
||||
|
||||
if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(pipeline))
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_GEOMETRY);
|
||||
if ((stages & VK_SHADER_STAGE_GEOMETRY_BIT) && radv_pipeline_has_gs(cmd_buffer->state.pipeline))
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_GEOMETRY);
|
||||
|
||||
if ((stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) && radv_pipeline_has_tess(pipeline))
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_TESS_CTRL);
|
||||
if ((stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) && radv_pipeline_has_tess(cmd_buffer->state.pipeline))
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_TESS_CTRL);
|
||||
|
||||
if ((stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) && radv_pipeline_has_tess(pipeline))
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_TESS_EVAL);
|
||||
if ((stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) && radv_pipeline_has_tess(cmd_buffer->state.pipeline))
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_TESS_EVAL);
|
||||
}
|
||||
|
||||
if (stages & VK_SHADER_STAGE_COMPUTE_BIT)
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, pipeline,
|
||||
if (cmd_buffer->state.compute_pipeline && (stages & VK_SHADER_STAGE_COMPUTE_BIT))
|
||||
emit_stage_descriptor_set_userdata(cmd_buffer, cmd_buffer->state.compute_pipeline,
|
||||
idx, set->va,
|
||||
MESA_SHADER_COMPUTE);
|
||||
}
|
||||
@@ -1324,7 +1325,6 @@ radv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
||||
static void
|
||||
radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
|
||||
struct radv_pipeline *pipeline,
|
||||
VkShaderStageFlags stages)
|
||||
{
|
||||
unsigned i;
|
||||
@@ -1345,7 +1345,7 @@ radv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer,
|
||||
if (!set)
|
||||
continue;
|
||||
|
||||
radv_emit_descriptor_set_userdata(cmd_buffer, pipeline, stages, set, i);
|
||||
radv_emit_descriptor_set_userdata(cmd_buffer, stages, set, i);
|
||||
}
|
||||
cmd_buffer->state.descriptors_dirty = 0;
|
||||
cmd_buffer->state.push_descriptors_dirty = false;
|
||||
@@ -1515,8 +1515,7 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
radv_emit_primitive_reset_state(cmd_buffer, indexed_draw);
|
||||
|
||||
radv_flush_descriptors(cmd_buffer, cmd_buffer->state.pipeline,
|
||||
VK_SHADER_STAGE_ALL_GRAPHICS);
|
||||
radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_ALL_GRAPHICS);
|
||||
radv_flush_constants(cmd_buffer, cmd_buffer->state.pipeline,
|
||||
VK_SHADER_STAGE_ALL_GRAPHICS);
|
||||
|
||||
@@ -2153,6 +2152,13 @@ radv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer)
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
}
|
||||
|
||||
static void radv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
for (unsigned i = 0; i < MAX_SETS; i++) {
|
||||
if (cmd_buffer->state.descriptors[i])
|
||||
cmd_buffer->state.descriptors_dirty |= (1u << i);
|
||||
}
|
||||
}
|
||||
|
||||
void radv_CmdBindPipeline(
|
||||
VkCommandBuffer commandBuffer,
|
||||
@@ -2162,10 +2168,7 @@ void radv_CmdBindPipeline(
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
|
||||
|
||||
for (unsigned i = 0; i < MAX_SETS; i++) {
|
||||
if (cmd_buffer->state.descriptors[i])
|
||||
cmd_buffer->state.descriptors_dirty |= (1 << i);
|
||||
}
|
||||
radv_mark_descriptor_sets_dirty(cmd_buffer);
|
||||
|
||||
switch (pipelineBindPoint) {
|
||||
case VK_PIPELINE_BIND_POINT_COMPUTE:
|
||||
@@ -2174,6 +2177,9 @@ void radv_CmdBindPipeline(
|
||||
break;
|
||||
case VK_PIPELINE_BIND_POINT_GRAPHICS:
|
||||
cmd_buffer->state.pipeline = pipeline;
|
||||
if (!pipeline)
|
||||
break;
|
||||
|
||||
cmd_buffer->state.vertex_descriptors_dirty = true;
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE;
|
||||
cmd_buffer->push_constant_stages |= pipeline->active_stages;
|
||||
@@ -2336,7 +2342,6 @@ void radv_CmdSetStencilReference(
|
||||
cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
|
||||
}
|
||||
|
||||
|
||||
void radv_CmdExecuteCommands(
|
||||
VkCommandBuffer commandBuffer,
|
||||
uint32_t commandBufferCount,
|
||||
@@ -2381,6 +2386,7 @@ void radv_CmdExecuteCommands(
|
||||
primary->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_ALL;
|
||||
primary->state.last_primitive_reset_en = -1;
|
||||
primary->state.last_primitive_reset_index = 0;
|
||||
radv_mark_descriptor_sets_dirty(primary);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2757,8 +2763,7 @@ static void
|
||||
radv_flush_compute_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
radv_emit_compute_pipeline(cmd_buffer);
|
||||
radv_flush_descriptors(cmd_buffer, cmd_buffer->state.compute_pipeline,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
radv_flush_constants(cmd_buffer, cmd_buffer->state.compute_pipeline,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
si_emit_cache_flush(cmd_buffer);
|
||||
|
@@ -676,7 +676,7 @@ void radv_GetPhysicalDeviceProperties(
|
||||
.driverVersion = radv_get_driver_version(),
|
||||
.vendorID = 0x1002,
|
||||
.deviceID = pdevice->rad_info.pci_id,
|
||||
.deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
|
||||
.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
|
||||
.limits = limits,
|
||||
.sparseProperties = {0},
|
||||
};
|
||||
|
@@ -382,7 +382,8 @@ si_make_texture_descriptor(struct radv_device *device,
|
||||
S_008F24_LAST_ARRAY(last_layer);
|
||||
fmask_state[6] = 0;
|
||||
fmask_state[7] = 0;
|
||||
}
|
||||
} else if (fmask_state)
|
||||
memset(fmask_state, 0, 8 * 4);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -51,10 +51,10 @@ void
|
||||
radv_meta_restore(const struct radv_meta_saved_state *state,
|
||||
struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
cmd_buffer->state.pipeline = state->old_pipeline;
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
radv_pipeline_to_handle(state->old_pipeline));
|
||||
|
||||
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
|
||||
cmd_buffer->state.descriptors_dirty |= (1u << 0);
|
||||
memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
|
||||
sizeof(state->old_vertex_bindings));
|
||||
|
||||
@@ -114,7 +114,6 @@ radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
|
||||
radv_pipeline_to_handle(state->old_pipeline));
|
||||
|
||||
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
|
||||
cmd_buffer->state.descriptors_dirty |= (1u << 0);
|
||||
|
||||
if (push_constant_size) {
|
||||
memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size);
|
||||
|
@@ -44,11 +44,6 @@ static unsigned get_max_db(struct radv_device *device)
|
||||
unsigned num_db = device->physical_device->rad_info.num_render_backends;
|
||||
MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;
|
||||
|
||||
if (device->physical_device->rad_info.chip_class == SI)
|
||||
num_db = 8;
|
||||
else
|
||||
num_db = MAX2(8, num_db);
|
||||
|
||||
/* Otherwise we need to change the query reset procedure */
|
||||
assert(rb_mask == ((1ull << num_db) - 1));
|
||||
|
||||
|
@@ -931,6 +931,9 @@ static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
|
||||
{
|
||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
|
||||
void *ret = NULL;
|
||||
|
||||
if (!cs->ib_buffer)
|
||||
return NULL;
|
||||
for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
|
||||
struct radv_amdgpu_winsys_bo *bo;
|
||||
|
||||
@@ -949,10 +952,15 @@ static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
|
||||
uint32_t trace_id)
|
||||
{
|
||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
|
||||
void *ib = cs->base.buf;
|
||||
int num_dw = cs->base.cdw;
|
||||
|
||||
ac_parse_ib(file,
|
||||
radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
|
||||
cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class,
|
||||
if (cs->ws->use_ib_bos) {
|
||||
ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
|
||||
num_dw = cs->ib.size;
|
||||
}
|
||||
assert(ib);
|
||||
ac_parse_ib(file, ib, num_dw, trace_id, "main IB", cs->ws->info.chip_class,
|
||||
radv_amdgpu_winsys_get_cpu_addr, cs);
|
||||
}
|
||||
|
||||
|
@@ -167,7 +167,6 @@ lower_distance_visitor::visit(ir_variable *ir)
|
||||
/* Clone the old var so that we inherit all of its properties */
|
||||
*new_var = ir->clone(ralloc_parent(ir), NULL);
|
||||
(*new_var)->name = ralloc_strdup(*new_var, GLSL_CLIP_VAR_NAME);
|
||||
(*new_var)->data.max_array_access = new_size - 1;
|
||||
(*new_var)->data.location = VARYING_SLOT_CLIP_DIST0;
|
||||
|
||||
if (!ir->type->fields.array->is_array()) {
|
||||
@@ -182,6 +181,7 @@ lower_distance_visitor::visit(ir_variable *ir)
|
||||
this->shader_stage == MESA_SHADER_GEOMETRY)));
|
||||
|
||||
assert (ir->type->fields.array == glsl_type::float_type);
|
||||
(*new_var)->data.max_array_access = new_size - 1;
|
||||
|
||||
/* And change the properties that we need to change */
|
||||
(*new_var)->type = glsl_type::get_array_instance(glsl_type::vec4_type,
|
||||
|
@@ -288,6 +288,20 @@ struct vtn_variable {
|
||||
nir_variable *var;
|
||||
nir_variable **members;
|
||||
|
||||
/**
|
||||
* In some early released versions of GLSLang, it implemented all function
|
||||
* calls by making copies of all parameters into temporary variables and
|
||||
* passing those variables into the function. It even did so for samplers
|
||||
* and images which violates the SPIR-V spec. Unfortunately, two games
|
||||
* (Talos Principle and Doom) shipped with this old version of GLSLang and
|
||||
* also happen to pass samplers into functions. Talos Principle received
|
||||
* an update fairly shortly after release with an updated GLSLang. Doom,
|
||||
* on the other hand, has never received an update so we need to work
|
||||
* around this GLSLang issue in SPIR-V -> NIR. Hopefully, we can drop this
|
||||
* hack at some point in the future.
|
||||
*/
|
||||
struct vtn_access_chain *copy_prop_sampler;
|
||||
|
||||
struct vtn_access_chain chain;
|
||||
};
|
||||
|
||||
|
@@ -96,6 +96,10 @@ rewrite_deref_types(nir_deref *deref, const struct glsl_type *type)
|
||||
nir_deref_var *
|
||||
vtn_access_chain_to_deref(struct vtn_builder *b, struct vtn_access_chain *chain)
|
||||
{
|
||||
/* Do on-the-fly copy propagation for samplers. */
|
||||
if (chain->var->copy_prop_sampler)
|
||||
return vtn_access_chain_to_deref(b, chain->var->copy_prop_sampler);
|
||||
|
||||
nir_deref_var *deref_var;
|
||||
if (chain->var->var) {
|
||||
deref_var = nir_deref_var_create(b, chain->var->var);
|
||||
@@ -1609,6 +1613,16 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
|
||||
case SpvOpStore: {
|
||||
struct vtn_access_chain *dest =
|
||||
vtn_value(b, w[1], vtn_value_type_access_chain)->access_chain;
|
||||
|
||||
if (glsl_type_is_sampler(dest->var->type->type)) {
|
||||
vtn_warn("OpStore of a sampler detected. Doing on-the-fly copy "
|
||||
"propagation to workaround the problem.");
|
||||
assert(dest->var->copy_prop_sampler == NULL);
|
||||
dest->var->copy_prop_sampler =
|
||||
vtn_value(b, w[2], vtn_value_type_access_chain)->access_chain;
|
||||
break;
|
||||
}
|
||||
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]);
|
||||
vtn_variable_store(b, src, dest);
|
||||
break;
|
||||
|
@@ -163,9 +163,6 @@ pkgconfigdir = $(libdir)/pkgconfig
|
||||
|
||||
pkgconfig_DATA = main/egl.pc
|
||||
|
||||
khrdir = $(includedir)/KHR
|
||||
khr_HEADERS = $(top_srcdir)/include/KHR/khrplatform.h
|
||||
|
||||
egldir = $(includedir)/EGL
|
||||
egl_HEADERS = \
|
||||
$(top_srcdir)/include/EGL/eglext.h \
|
||||
|
@@ -609,10 +609,10 @@ droid_query_buffer_age(_EGLDriver *drv,
|
||||
|
||||
if (update_buffers(dri2_surf) < 0) {
|
||||
_eglError(EGL_BAD_ALLOC, "droid_query_buffer_age");
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return dri2_surf->back->age;
|
||||
return dri2_surf->back ? dri2_surf->back->age : 0;
|
||||
}
|
||||
|
||||
static EGLBoolean
|
||||
@@ -1005,20 +1005,39 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
|
||||
int count, i, j;
|
||||
|
||||
/* The nesting of loops is significant here. Also significant is the order
|
||||
* of the HAL pixel formats. Many Android apps (such as Google's official
|
||||
* NDK GLES2 example app), and even portions the core framework code (such
|
||||
* as SystemServiceManager in Nougat), incorrectly choose their EGLConfig.
|
||||
* They neglect to match the EGLConfig's EGL_NATIVE_VISUAL_ID against the
|
||||
* window's native format, and instead choose the first EGLConfig whose
|
||||
* channel sizes match those of the native window format while ignoring the
|
||||
* channel *ordering*.
|
||||
*
|
||||
* We can detect such buggy clients in logcat when they call
|
||||
* eglCreateSurface, by detecting the mismatch between the EGLConfig's
|
||||
* format and the window's format.
|
||||
*
|
||||
* As a workaround, we generate EGLConfigs such that all EGLConfigs for HAL
|
||||
* pixel format i precede those for HAL pixel format i+1. In my
|
||||
* (chadversary) testing on Android Nougat, this was good enough to pacify
|
||||
* the buggy clients.
|
||||
*/
|
||||
count = 0;
|
||||
for (i = 0; dri2_dpy->driver_configs[i]; i++) {
|
||||
for (i = 0; i < ARRAY_SIZE(visuals); i++) {
|
||||
const EGLint surface_type = EGL_WINDOW_BIT | EGL_PBUFFER_BIT;
|
||||
struct dri2_egl_config *dri2_conf;
|
||||
|
||||
for (j = 0; j < ARRAY_SIZE(visuals); j++) {
|
||||
config_attrs[1] = visuals[j].format;
|
||||
config_attrs[3] = visuals[j].format;
|
||||
for (j = 0; dri2_dpy->driver_configs[j]; j++) {
|
||||
config_attrs[1] = visuals[i].format;
|
||||
config_attrs[3] = visuals[i].format;
|
||||
|
||||
dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[i],
|
||||
count + 1, surface_type, config_attrs, visuals[j].rgba_masks);
|
||||
dri2_conf = dri2_add_config(dpy, dri2_dpy->driver_configs[j],
|
||||
count + 1, surface_type, config_attrs, visuals[i].rgba_masks);
|
||||
if (dri2_conf) {
|
||||
count++;
|
||||
format_count[j]++;
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
format_count[i]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -463,7 +463,7 @@ dri2_drm_query_buffer_age(_EGLDriver *drv,
|
||||
|
||||
if (get_back_bo(dri2_surf) < 0) {
|
||||
_eglError(EGL_BAD_ALLOC, "dri2_query_buffer_age");
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return dri2_surf->back->age;
|
||||
@@ -630,7 +630,8 @@ drm_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
|
||||
count + 1, EGL_WINDOW_BIT, attr_list, NULL);
|
||||
if (dri2_conf) {
|
||||
count++;
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
format_count[j]++;
|
||||
}
|
||||
}
|
||||
|
@@ -212,7 +212,8 @@ surfaceless_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
count + 1, EGL_PBUFFER_BIT, NULL, visuals[j].rgba_masks);
|
||||
|
||||
if (dri2_conf) {
|
||||
count++;
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
format_count[j]++;
|
||||
}
|
||||
}
|
||||
|
@@ -808,7 +808,7 @@ dri2_wl_query_buffer_age(_EGLDriver *drv,
|
||||
|
||||
if (get_back_bo(dri2_surf) < 0) {
|
||||
_eglError(EGL_BAD_ALLOC, "dri2_query_buffer_age");
|
||||
return 0;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return dri2_surf->back->age;
|
||||
@@ -1128,7 +1128,8 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
|
||||
count + 1, EGL_WINDOW_BIT, NULL, visuals[j].rgba_masks);
|
||||
if (dri2_conf) {
|
||||
count++;
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
format_count[j]++;
|
||||
}
|
||||
}
|
||||
|
@@ -110,7 +110,7 @@ swrastGetDrawableInfo(__DRIdrawable * draw,
|
||||
xcb_get_geometry_reply_t *reply;
|
||||
xcb_generic_error_t *error;
|
||||
|
||||
*w = *h = 0;
|
||||
*x = *y = *w = *h = 0;
|
||||
cookie = xcb_get_geometry (dri2_dpy->conn, dri2_surf->drawable);
|
||||
reply = xcb_get_geometry_reply (dri2_dpy->conn, cookie, &error);
|
||||
if (reply == NULL)
|
||||
@@ -120,6 +120,8 @@ swrastGetDrawableInfo(__DRIdrawable * draw,
|
||||
_eglLog(_EGL_WARNING, "error in xcb_get_geometry");
|
||||
free(error);
|
||||
} else {
|
||||
*x = reply->x;
|
||||
*y = reply->y;
|
||||
*w = reply->width;
|
||||
*h = reply->height;
|
||||
}
|
||||
@@ -772,7 +774,8 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
|
||||
dri2_conf = dri2_add_config(disp, config, count + 1, surface_type,
|
||||
config_attrs, rgba_masks);
|
||||
if (dri2_conf)
|
||||
count++;
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
|
||||
/* Allow a 24-bit RGB visual to match a 32-bit RGBA EGLConfig.
|
||||
* Otherwise it will only match a 32-bit RGBA visual. On a
|
||||
@@ -787,7 +790,8 @@ dri2_x11_add_configs_for_visuals(struct dri2_egl_display *dri2_dpy,
|
||||
dri2_conf = dri2_add_config(disp, config, count + 1, surface_type,
|
||||
config_attrs, rgba_masks);
|
||||
if (dri2_conf)
|
||||
count++;
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "c11/threads.h"
|
||||
#include "util/u_atomic.h"
|
||||
|
||||
#include "eglcontext.h"
|
||||
#include "eglcurrent.h"
|
||||
@@ -180,25 +181,32 @@ _eglNativePlatformDetectNativeDisplay(void *nativeDisplay)
|
||||
_EGLPlatformType
|
||||
_eglGetNativePlatform(void *nativeDisplay)
|
||||
{
|
||||
static _EGLPlatformType native_platform;
|
||||
char *detection_method;
|
||||
static _EGLPlatformType native_platform = _EGL_INVALID_PLATFORM;
|
||||
_EGLPlatformType detected_platform = native_platform;
|
||||
|
||||
native_platform = _eglGetNativePlatformFromEnv();
|
||||
detection_method = "environment overwrite";
|
||||
if (detected_platform == _EGL_INVALID_PLATFORM) {
|
||||
const char *detection_method;
|
||||
|
||||
if (native_platform == _EGL_INVALID_PLATFORM) {
|
||||
native_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay);
|
||||
detection_method = "autodetected";
|
||||
detected_platform = _eglGetNativePlatformFromEnv();
|
||||
detection_method = "environment overwrite";
|
||||
|
||||
if (detected_platform == _EGL_INVALID_PLATFORM) {
|
||||
detected_platform = _eglNativePlatformDetectNativeDisplay(nativeDisplay);
|
||||
detection_method = "autodetected";
|
||||
}
|
||||
|
||||
if (detected_platform == _EGL_INVALID_PLATFORM) {
|
||||
detected_platform = _EGL_NATIVE_PLATFORM;
|
||||
detection_method = "build-time configuration";
|
||||
}
|
||||
|
||||
_eglLog(_EGL_DEBUG, "Native platform type: %s (%s)",
|
||||
egl_platforms[detected_platform].name, detection_method);
|
||||
|
||||
p_atomic_cmpxchg(&native_platform, _EGL_INVALID_PLATFORM,
|
||||
detected_platform);
|
||||
}
|
||||
|
||||
if (native_platform == _EGL_INVALID_PLATFORM) {
|
||||
native_platform = _EGL_NATIVE_PLATFORM;
|
||||
detection_method = "build-time configuration";
|
||||
}
|
||||
|
||||
_eglLog(_EGL_DEBUG, "Native platform type: %s (%s)",
|
||||
egl_platforms[native_platform].name, detection_method);
|
||||
|
||||
return native_platform;
|
||||
}
|
||||
|
||||
|
@@ -409,7 +409,11 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
|
||||
_eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface");
|
||||
return EGL_FALSE;
|
||||
}
|
||||
*value = drv->API.QueryBufferAge(drv, dpy, surface);
|
||||
EGLint result = drv->API.QueryBufferAge(drv, dpy, surface);
|
||||
/* error happened */
|
||||
if (result < 0)
|
||||
return EGL_FALSE;
|
||||
*value = result;
|
||||
break;
|
||||
default:
|
||||
_eglError(EGL_BAD_ATTRIBUTE, "eglQuerySurface");
|
||||
|
@@ -137,8 +137,14 @@ pipe_resource_reference(struct pipe_resource **ptr, struct pipe_resource *tex)
|
||||
|
||||
if (pipe_reference_described(&(*ptr)->reference, &tex->reference,
|
||||
(debug_reference_descriptor)debug_describe_resource)) {
|
||||
pipe_resource_reference(&old_tex->next, NULL);
|
||||
old_tex->screen->resource_destroy(old_tex->screen, old_tex);
|
||||
/* Avoid recursion, which would prevent inlining this function */
|
||||
do {
|
||||
struct pipe_resource *next = old_tex->next;
|
||||
|
||||
old_tex->screen->resource_destroy(old_tex->screen, old_tex);
|
||||
old_tex = next;
|
||||
} while (pipe_reference_described(&old_tex->reference, NULL,
|
||||
(debug_reference_descriptor)debug_describe_resource));
|
||||
}
|
||||
*ptr = tex;
|
||||
}
|
||||
|
@@ -423,8 +423,22 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
|
||||
unsigned size = vb->stride ? num_vertices * vb->stride
|
||||
: sizeof(double)*4;
|
||||
|
||||
if (offset+size > vb->buffer->width0) {
|
||||
if (offset + size > vb->buffer->width0) {
|
||||
/* Don't try to map past end of buffer. This often happens when
|
||||
* we're translating an attribute that's at offset > 0 from the
|
||||
* start of the vertex. If we'd subtract attrib's offset from
|
||||
* the size, this probably wouldn't happen.
|
||||
*/
|
||||
size = vb->buffer->width0 - offset;
|
||||
|
||||
/* Also adjust num_vertices. A common user error is to call
|
||||
* glDrawRangeElements() with incorrect 'end' argument. The 'end
|
||||
* value should be the max index value, but people often
|
||||
* accidentally add one to this value. This adjustment avoids
|
||||
* crashing (by reading past the end of a hardware buffer mapping)
|
||||
* when people do that.
|
||||
*/
|
||||
num_vertices = (size + vb->stride - 1) / vb->stride;
|
||||
}
|
||||
|
||||
map = pipe_buffer_map_range(mgr->pipe, vb->buffer, offset, size,
|
||||
|
@@ -48,7 +48,7 @@ etna_blend_state_create(struct pipe_context *pctx,
|
||||
* - NOT source factor is ONE and destination factor ZERO for both rgb and
|
||||
* alpha (which would mean that blending is effectively disabled)
|
||||
*/
|
||||
bool enable = rt0->blend_enable &&
|
||||
co->enable = rt0->blend_enable &&
|
||||
!(rt0->rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
|
||||
rt0->rgb_dst_factor == PIPE_BLENDFACTOR_ZERO &&
|
||||
rt0->alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
|
||||
@@ -59,17 +59,11 @@ etna_blend_state_create(struct pipe_context *pctx,
|
||||
* - NOT source factor is equal to destination factor for both rgb abd
|
||||
* alpha (which would effectively that mean alpha is not separate)
|
||||
*/
|
||||
bool separate_alpha = enable &&
|
||||
bool separate_alpha = co->enable &&
|
||||
!(rt0->rgb_src_factor == rt0->alpha_src_factor &&
|
||||
rt0->rgb_dst_factor == rt0->alpha_dst_factor);
|
||||
|
||||
/* If the complete render target is written, set full_overwrite:
|
||||
* - The color mask is 1111
|
||||
* - No blending is used
|
||||
*/
|
||||
bool full_overwrite = (rt0->colormask == 15) && !enable;
|
||||
|
||||
if (enable) {
|
||||
if (co->enable) {
|
||||
co->PE_ALPHA_CONFIG =
|
||||
VIVS_PE_ALPHA_CONFIG_BLEND_ENABLE_COLOR |
|
||||
COND(separate_alpha, VIVS_PE_ALPHA_CONFIG_BLEND_SEPARATE_ALPHA) |
|
||||
@@ -83,10 +77,6 @@ etna_blend_state_create(struct pipe_context *pctx,
|
||||
co->PE_ALPHA_CONFIG = 0;
|
||||
}
|
||||
|
||||
co->PE_COLOR_FORMAT =
|
||||
VIVS_PE_COLOR_FORMAT_COMPONENTS(rt0->colormask) |
|
||||
COND(full_overwrite, VIVS_PE_COLOR_FORMAT_OVERWRITE);
|
||||
|
||||
co->PE_LOGIC_OP =
|
||||
VIVS_PE_LOGIC_OP_OP(so->logicop_enable ? so->logicop_func : LOGIC_OP_COPY) |
|
||||
0x000E4000 /* ??? */;
|
||||
@@ -107,3 +97,35 @@ etna_blend_state_create(struct pipe_context *pctx,
|
||||
|
||||
return co;
|
||||
}
|
||||
|
||||
bool
|
||||
etna_update_blend(struct etna_context *ctx)
|
||||
{
|
||||
struct pipe_framebuffer_state *pfb = &ctx->framebuffer_s;
|
||||
struct pipe_blend_state *pblend = ctx->blend;
|
||||
struct etna_blend_state *blend = etna_blend_state(pblend);
|
||||
const struct pipe_rt_blend_state *rt0 = &pblend->rt[0];
|
||||
uint32_t colormask;
|
||||
|
||||
if (pfb->cbufs[0] &&
|
||||
translate_rs_format_rb_swap(pfb->cbufs[0]->texture->format)) {
|
||||
colormask = rt0->colormask & (PIPE_MASK_A | PIPE_MASK_G);
|
||||
if (rt0->colormask & PIPE_MASK_R)
|
||||
colormask |= PIPE_MASK_B;
|
||||
if (rt0->colormask & PIPE_MASK_B)
|
||||
colormask |= PIPE_MASK_R;
|
||||
} else {
|
||||
colormask = rt0->colormask;
|
||||
}
|
||||
|
||||
/* If the complete render target is written, set full_overwrite:
|
||||
* - The color mask is 1111
|
||||
* - No blending is used
|
||||
*/
|
||||
bool full_overwrite = (rt0->colormask == 0xf) && !blend->enable;
|
||||
blend->PE_COLOR_FORMAT =
|
||||
VIVS_PE_COLOR_FORMAT_COMPONENTS(colormask) |
|
||||
COND(full_overwrite, VIVS_PE_COLOR_FORMAT_OVERWRITE);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@@ -30,9 +30,13 @@
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_state.h"
|
||||
|
||||
struct etna_context;
|
||||
|
||||
struct etna_blend_state {
|
||||
struct pipe_blend_state base;
|
||||
|
||||
bool enable;
|
||||
|
||||
uint32_t PE_ALPHA_CONFIG;
|
||||
uint32_t PE_COLOR_FORMAT;
|
||||
uint32_t PE_LOGIC_OP;
|
||||
@@ -49,4 +53,7 @@ void *
|
||||
etna_blend_state_create(struct pipe_context *pctx,
|
||||
const struct pipe_blend_state *so);
|
||||
|
||||
bool
|
||||
etna_update_blend(struct etna_context *ctx);
|
||||
|
||||
#endif
|
||||
|
@@ -100,13 +100,24 @@ etna_rs_gen_clear_surface(struct etna_context *ctx, struct etna_surface *surf,
|
||||
});
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
pack_rgba(enum pipe_format format, const float *rgba)
|
||||
{
|
||||
union util_color uc;
|
||||
util_pack_color(rgba, format, &uc);
|
||||
if (util_format_get_blocksize(format) == 2)
|
||||
return uc.ui[0] << 16 | uc.ui[0];
|
||||
else
|
||||
return uc.ui[0];
|
||||
}
|
||||
|
||||
static void
|
||||
etna_blit_clear_color(struct pipe_context *pctx, struct pipe_surface *dst,
|
||||
const union pipe_color_union *color)
|
||||
{
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
struct etna_surface *surf = etna_surface(dst);
|
||||
uint32_t new_clear_value = translate_clear_color(surf->base.format, color);
|
||||
uint32_t new_clear_value = pack_rgba(surf->base.format, color->f);
|
||||
|
||||
if (surf->surf.ts_size) { /* TS: use precompiled clear command */
|
||||
ctx->framebuffer.TS_COLOR_CLEAR_VALUE = new_clear_value;
|
||||
@@ -287,8 +298,6 @@ etna_resource_copy_region(struct pipe_context *pctx, struct pipe_resource *dst,
|
||||
|
||||
/* The resource must be of the same format. */
|
||||
assert(src->format == dst->format);
|
||||
/* Resources with nr_samples > 1 are not allowed. */
|
||||
assert(src->nr_samples <= 1 && dst->nr_samples <= 1);
|
||||
|
||||
/* XXX we can use the RS as a literal copy engine here
|
||||
* the only complexity is tiling; the size of the boxes needs to be aligned
|
||||
@@ -448,7 +457,8 @@ etna_try_rs_blit(struct pipe_context *pctx,
|
||||
if (width > src_lev->padded_width ||
|
||||
width > dst_lev->padded_width * msaa_xscale ||
|
||||
height > src_lev->padded_height ||
|
||||
height > dst_lev->padded_height * msaa_yscale)
|
||||
height > dst_lev->padded_height * msaa_yscale ||
|
||||
width & (w_align - 1) || height & (h_align - 1))
|
||||
goto manual;
|
||||
|
||||
if (src->base.nr_samples > 1) {
|
||||
@@ -593,10 +603,11 @@ etna_flush_resource(struct pipe_context *pctx, struct pipe_resource *prsc)
|
||||
{
|
||||
struct etna_resource *rsc = etna_resource(prsc);
|
||||
|
||||
if (rsc->scanout &&
|
||||
etna_resource_older(etna_resource(rsc->scanout->prime), rsc)) {
|
||||
etna_copy_resource(pctx, rsc->scanout->prime, prsc, 0, 0);
|
||||
etna_resource(rsc->scanout->prime)->seqno = rsc->seqno;
|
||||
if (rsc->scanout) {
|
||||
if (etna_resource_older(etna_resource(rsc->scanout->prime), rsc)) {
|
||||
etna_copy_resource(pctx, rsc->scanout->prime, prsc, 0, 0);
|
||||
etna_resource(rsc->scanout->prime)->seqno = rsc->seqno;
|
||||
}
|
||||
} else if (etna_resource_needs_flush(rsc)) {
|
||||
etna_copy_resource(pctx, prsc, prsc, 0, 0);
|
||||
rsc->flush_seqno = rsc->seqno;
|
||||
@@ -627,9 +638,9 @@ etna_copy_resource(struct pipe_context *pctx, struct pipe_resource *dst,
|
||||
for (int level = first_level; level <= last_level; level++) {
|
||||
blit.src.level = blit.dst.level = level;
|
||||
blit.src.box.width = blit.dst.box.width =
|
||||
MIN2(src_priv->levels[level].width, dst_priv->levels[level].width);
|
||||
MIN2(src_priv->levels[level].padded_width, dst_priv->levels[level].padded_width);
|
||||
blit.src.box.height = blit.dst.box.height =
|
||||
MIN2(src_priv->levels[level].height, dst_priv->levels[level].height);
|
||||
MIN2(src_priv->levels[level].padded_height, dst_priv->levels[level].padded_height);
|
||||
|
||||
for (int layer = 0; layer < dst->array_size; layer++) {
|
||||
blit.src.box.z = blit.dst.box.z = layer;
|
||||
|
@@ -341,6 +341,8 @@ etna_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
static float
|
||||
etna_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
||||
{
|
||||
struct etna_screen *screen = etna_screen(pscreen);
|
||||
|
||||
switch (param) {
|
||||
case PIPE_CAPF_MAX_LINE_WIDTH:
|
||||
case PIPE_CAPF_MAX_LINE_WIDTH_AA:
|
||||
@@ -350,7 +352,7 @@ etna_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
|
||||
case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
|
||||
return 16.0f;
|
||||
case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
|
||||
return 16.0f;
|
||||
return util_last_bit(screen->specs.max_texture_size);
|
||||
case PIPE_CAPF_GUARD_BAND_LEFT:
|
||||
case PIPE_CAPF_GUARD_BAND_TOP:
|
||||
case PIPE_CAPF_GUARD_BAND_RIGHT:
|
||||
|
@@ -29,6 +29,7 @@
|
||||
|
||||
#include "hw/common.xml.h"
|
||||
|
||||
#include "etnaviv_blend.h"
|
||||
#include "etnaviv_clear_blit.h"
|
||||
#include "etnaviv_context.h"
|
||||
#include "etnaviv_format.h"
|
||||
@@ -624,6 +625,9 @@ static const struct etna_state_updater etna_state_updates[] = {
|
||||
},
|
||||
{
|
||||
etna_shader_link, ETNA_DIRTY_SHADER,
|
||||
},
|
||||
{
|
||||
etna_update_blend, ETNA_DIRTY_BLEND | ETNA_DIRTY_FRAMEBUFFER
|
||||
}
|
||||
};
|
||||
|
||||
|
@@ -70,6 +70,9 @@ etna_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans)
|
||||
if (rsc->texture && !etna_resource_newer(rsc, etna_resource(rsc->texture)))
|
||||
rsc = etna_resource(rsc->texture); /* switch to using the texture resource */
|
||||
|
||||
if (trans->rsc)
|
||||
etna_bo_cpu_fini(etna_resource(trans->rsc)->bo);
|
||||
|
||||
if (ptrans->usage & PIPE_TRANSFER_WRITE) {
|
||||
if (trans->rsc) {
|
||||
/* We have a temporary resource due to either tile status or
|
||||
@@ -105,15 +108,15 @@ etna_transfer_unmap(struct pipe_context *pctx, struct pipe_transfer *ptrans)
|
||||
}
|
||||
|
||||
rsc->seqno++;
|
||||
etna_bo_cpu_fini(rsc->bo);
|
||||
|
||||
if (rsc->base.bind & PIPE_BIND_SAMPLER_VIEW) {
|
||||
/* XXX do we need to flush the CPU cache too or start a write barrier
|
||||
* to make sure the GPU sees it? */
|
||||
ctx->dirty |= ETNA_DIRTY_TEXTURE_CACHES;
|
||||
}
|
||||
}
|
||||
|
||||
if (!trans->rsc)
|
||||
etna_bo_cpu_fini(rsc->bo);
|
||||
|
||||
pipe_resource_reference(&trans->rsc, NULL);
|
||||
pipe_resource_reference(&ptrans->resource, NULL);
|
||||
slab_free(&ctx->transfer_pool, trans);
|
||||
|
@@ -405,53 +405,6 @@ etna_layout_multiple(unsigned layout, unsigned pixel_pipes, bool rs_align,
|
||||
}
|
||||
}
|
||||
|
||||
/* return 32-bit clear pattern for color */
|
||||
static inline uint32_t
|
||||
translate_clear_color(enum pipe_format format,
|
||||
const union pipe_color_union *color)
|
||||
{
|
||||
uint32_t clear_value = 0;
|
||||
|
||||
// XXX util_pack_color
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
clear_value = etna_cfloat_to_uintN(color->f[2], 8) |
|
||||
(etna_cfloat_to_uintN(color->f[1], 8) << 8) |
|
||||
(etna_cfloat_to_uintN(color->f[0], 8) << 16) |
|
||||
(etna_cfloat_to_uintN(color->f[3], 8) << 24);
|
||||
break;
|
||||
case PIPE_FORMAT_B4G4R4X4_UNORM:
|
||||
case PIPE_FORMAT_B4G4R4A4_UNORM:
|
||||
clear_value = etna_cfloat_to_uintN(color->f[2], 4) |
|
||||
(etna_cfloat_to_uintN(color->f[1], 4) << 4) |
|
||||
(etna_cfloat_to_uintN(color->f[0], 4) << 8) |
|
||||
(etna_cfloat_to_uintN(color->f[3], 4) << 12);
|
||||
clear_value |= clear_value << 16;
|
||||
break;
|
||||
case PIPE_FORMAT_B5G5R5X1_UNORM:
|
||||
case PIPE_FORMAT_B5G5R5A1_UNORM:
|
||||
clear_value = etna_cfloat_to_uintN(color->f[2], 5) |
|
||||
(etna_cfloat_to_uintN(color->f[1], 5) << 5) |
|
||||
(etna_cfloat_to_uintN(color->f[0], 5) << 10) |
|
||||
(etna_cfloat_to_uintN(color->f[3], 1) << 15);
|
||||
clear_value |= clear_value << 16;
|
||||
break;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
clear_value = etna_cfloat_to_uintN(color->f[2], 5) |
|
||||
(etna_cfloat_to_uintN(color->f[1], 6) << 5) |
|
||||
(etna_cfloat_to_uintN(color->f[0], 5) << 11);
|
||||
clear_value |= clear_value << 16;
|
||||
break;
|
||||
default:
|
||||
DBG("Unhandled pipe format for color clear: %i", format);
|
||||
}
|
||||
|
||||
return clear_value;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
translate_clear_depth_stencil(enum pipe_format format, float depth,
|
||||
unsigned stencil)
|
||||
|
@@ -2628,6 +2628,10 @@ Converter::handleLOAD(Value *dst0[4])
|
||||
const int r = tgsi.getSrc(0).getIndex(0);
|
||||
int c;
|
||||
std::vector<Value *> off, src, ldv, def;
|
||||
Value *ind = NULL;
|
||||
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
|
||||
|
||||
switch (tgsi.getSrc(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
@@ -2654,8 +2658,8 @@ Converter::handleLOAD(Value *dst0[4])
|
||||
|
||||
Instruction *ld = mkLoad(TYPE_U32, dst0[c], sym, off);
|
||||
ld->cache = tgsi.getCacheMode();
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
ld->setIndirect(0, 1, fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
|
||||
if (ind)
|
||||
ld->setIndirect(0, 1, ind);
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_IMAGE: {
|
||||
@@ -2677,8 +2681,8 @@ Converter::handleLOAD(Value *dst0[4])
|
||||
ld->tex.mask = tgsi.getDst(0).getMask();
|
||||
ld->tex.format = getImageFormat(code, r);
|
||||
ld->cache = tgsi.getCacheMode();
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
ld->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
|
||||
if (ind)
|
||||
ld->setIndirectR(ind);
|
||||
|
||||
FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi)
|
||||
if (dst0[c] != def[c])
|
||||
@@ -2766,6 +2770,10 @@ Converter::handleSTORE()
|
||||
const int r = tgsi.getDst(0).getIndex(0);
|
||||
int c;
|
||||
std::vector<Value *> off, src, dummy;
|
||||
Value *ind = NULL;
|
||||
|
||||
if (tgsi.getDst(0).isIndirect(0))
|
||||
ind = fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0);
|
||||
|
||||
switch (tgsi.getDst(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
@@ -2788,8 +2796,8 @@ Converter::handleSTORE()
|
||||
|
||||
Instruction *st = mkStore(OP_STORE, TYPE_U32, sym, off, fetchSrc(1, c));
|
||||
st->cache = tgsi.getCacheMode();
|
||||
if (tgsi.getDst(0).isIndirect(0))
|
||||
st->setIndirect(0, 1, fetchSrc(tgsi.getDst(0).getIndirect(0), 0, 0));
|
||||
if (ind)
|
||||
st->setIndirect(0, 1, ind);
|
||||
}
|
||||
break;
|
||||
case TGSI_FILE_IMAGE: {
|
||||
@@ -2807,8 +2815,8 @@ Converter::handleSTORE()
|
||||
st->tex.mask = tgsi.getDst(0).getMask();
|
||||
st->tex.format = getImageFormat(code, r);
|
||||
st->cache = tgsi.getCacheMode();
|
||||
if (tgsi.getDst(0).isIndirect(0))
|
||||
st->setIndirectR(fetchSrc(tgsi.getDst(0).getIndirect(0), 0, NULL));
|
||||
if (ind)
|
||||
st->setIndirectR(ind);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
@@ -2877,6 +2885,10 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
||||
std::vector<Value *> srcv;
|
||||
std::vector<Value *> defv;
|
||||
LValue *dst = getScratch();
|
||||
Value *ind = NULL;
|
||||
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
|
||||
|
||||
switch (tgsi.getSrc(0).getFile()) {
|
||||
case TGSI_FILE_BUFFER:
|
||||
@@ -2886,23 +2898,21 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
||||
continue;
|
||||
|
||||
Instruction *insn;
|
||||
Value *off = fetchSrc(1, c), *off2 = NULL;
|
||||
Value *off = fetchSrc(1, c);
|
||||
Value *sym;
|
||||
if (tgsi.getSrc(1).getFile() == TGSI_FILE_IMMEDIATE)
|
||||
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c,
|
||||
tgsi.getSrc(1).getValueU32(c, info));
|
||||
else
|
||||
sym = makeSym(tgsi.getSrc(0).getFile(), r, -1, c, 0);
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
off2 = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
|
||||
if (subOp == NV50_IR_SUBOP_ATOM_CAS)
|
||||
insn = mkOp3(OP_ATOM, ty, dst, sym, fetchSrc(2, c), fetchSrc(3, c));
|
||||
else
|
||||
insn = mkOp2(OP_ATOM, ty, dst, sym, fetchSrc(2, c));
|
||||
if (tgsi.getSrc(1).getFile() != TGSI_FILE_IMMEDIATE)
|
||||
insn->setIndirect(0, 0, off);
|
||||
if (off2)
|
||||
insn->setIndirect(0, 1, off2);
|
||||
if (ind)
|
||||
insn->setIndirect(0, 1, ind);
|
||||
insn->subOp = subOp;
|
||||
}
|
||||
for (int c = 0; c < 4; ++c)
|
||||
@@ -2925,8 +2935,8 @@ Converter::handleATOM(Value *dst0[4], DataType ty, uint16_t subOp)
|
||||
tex->tex.mask = 1;
|
||||
tex->tex.format = getImageFormat(code, r);
|
||||
tex->setType(ty);
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
tex->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
|
||||
if (ind)
|
||||
tex->setIndirectR(ind);
|
||||
|
||||
for (int c = 0; c < 4; ++c)
|
||||
if (dst0[c])
|
||||
@@ -3798,12 +3808,14 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
break;
|
||||
case TGSI_OPCODE_RESQ:
|
||||
if (tgsi.getSrc(0).getFile() == TGSI_FILE_BUFFER) {
|
||||
Value *ind = NULL;
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
ind = fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0);
|
||||
geni = mkOp1(OP_BUFQ, TYPE_U32, dst0[0],
|
||||
makeSym(tgsi.getSrc(0).getFile(),
|
||||
tgsi.getSrc(0).getIndex(0), -1, 0, 0));
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
geni->setIndirect(0, 1,
|
||||
fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, 0));
|
||||
if (ind)
|
||||
geni->setIndirect(0, 1, ind);
|
||||
} else {
|
||||
assert(tgsi.getSrc(0).getFile() == TGSI_FILE_IMAGE);
|
||||
|
||||
@@ -3816,10 +3828,11 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
}
|
||||
texi->tex.r = tgsi.getSrc(0).getIndex(0);
|
||||
texi->tex.target = getImageTarget(code, texi->tex.r);
|
||||
bb->insertTail(texi);
|
||||
|
||||
if (tgsi.getSrc(0).isIndirect(0))
|
||||
texi->setIndirectR(fetchSrc(tgsi.getSrc(0).getIndirect(0), 0, NULL));
|
||||
|
||||
bb->insertTail(texi);
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_IBFE:
|
||||
|
@@ -938,8 +938,9 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
bld.setPosition(i, false);
|
||||
|
||||
uint8_t size = i->getDef(0)->reg.size;
|
||||
uint32_t mask = (1ULL << size) - 1;
|
||||
assert(size <= 32);
|
||||
uint8_t bitsize = size * 8;
|
||||
uint32_t mask = (1ULL << bitsize) - 1;
|
||||
assert(bitsize <= 32);
|
||||
|
||||
uint64_t val = imm0.reg.data.u64;
|
||||
for (int8_t d = 0; i->defExists(d); ++d) {
|
||||
@@ -947,7 +948,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
assert(def->reg.size == size);
|
||||
|
||||
newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), TYPE_U32);
|
||||
val >>= size;
|
||||
val >>= bitsize;
|
||||
}
|
||||
delete_Instruction(prog, i);
|
||||
break;
|
||||
@@ -2485,6 +2486,10 @@ MemoryOpt::combineLd(Record *rec, Instruction *ld)
|
||||
|
||||
assert(sizeRc + sizeLd <= 16 && offRc != offLd);
|
||||
|
||||
// lock any stores that overlap with the load being merged into the
|
||||
// existing record.
|
||||
lockStores(ld);
|
||||
|
||||
for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j);
|
||||
|
||||
if (offLd < offRc) {
|
||||
@@ -2541,6 +2546,10 @@ MemoryOpt::combineSt(Record *rec, Instruction *st)
|
||||
if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
|
||||
return false;
|
||||
|
||||
// remove any existing load/store records for the store being merged into
|
||||
// the existing record.
|
||||
purgeRecords(st, DATA_FILE_COUNT);
|
||||
|
||||
st->takeExtraSources(0, extra); // save predicate and indirect address
|
||||
|
||||
if (offRc < offSt) {
|
||||
|
@@ -90,11 +90,20 @@ nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
bindings &= ~(PIPE_BIND_LINEAR |
|
||||
PIPE_BIND_SHARED);
|
||||
|
||||
if (bindings & PIPE_BIND_SHADER_IMAGE && sample_count > 1 &&
|
||||
nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
|
||||
/* MS images are currently unsupported on Maxwell because they have to
|
||||
* be handled explicitly. */
|
||||
return false;
|
||||
if (bindings & PIPE_BIND_SHADER_IMAGE) {
|
||||
if (sample_count > 1 &&
|
||||
nouveau_screen(pscreen)->class_3d >= GM107_3D_CLASS) {
|
||||
/* MS images are currently unsupported on Maxwell because they have to
|
||||
* be handled explicitly. */
|
||||
return false;
|
||||
}
|
||||
|
||||
if (format == PIPE_FORMAT_B8G8R8A8_UNORM &&
|
||||
nouveau_screen(pscreen)->class_3d < NVE4_3D_CLASS) {
|
||||
/* This should work on Fermi, but for currently unknown reasons it
|
||||
* does not and results in breaking reads from pbos. */
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return (( nvc0_format_table[format].usage |
|
||||
|
@@ -1965,6 +1965,8 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
|
||||
unsigned level = templ->u.tex.level;
|
||||
unsigned width = u_minify(tex->width0, level);
|
||||
unsigned height = u_minify(tex->height0, level);
|
||||
unsigned width0 = tex->width0;
|
||||
unsigned height0 = tex->height0;
|
||||
|
||||
if (tex->target != PIPE_BUFFER && templ->format != tex->format) {
|
||||
const struct util_format_description *tex_desc
|
||||
@@ -1983,11 +1985,14 @@ static struct pipe_surface *r600_create_surface(struct pipe_context *pipe,
|
||||
|
||||
width = nblks_x * templ_desc->block.width;
|
||||
height = nblks_y * templ_desc->block.height;
|
||||
|
||||
width0 = util_format_get_nblocksx(tex->format, width0);
|
||||
height0 = util_format_get_nblocksy(tex->format, height0);
|
||||
}
|
||||
}
|
||||
|
||||
return r600_create_surface_custom(pipe, tex, templ,
|
||||
tex->width0, tex->height0,
|
||||
width0, height0,
|
||||
width, height);
|
||||
}
|
||||
|
||||
|
@@ -366,6 +366,7 @@ struct si_context {
|
||||
struct si_shader_selector *last_tcs;
|
||||
int last_num_tcs_input_cp;
|
||||
int last_tes_sh_base;
|
||||
bool last_tess_uses_primid;
|
||||
unsigned last_num_patches;
|
||||
|
||||
/* Debug state. */
|
||||
|
@@ -72,9 +72,9 @@
|
||||
#include <llvm-c/TargetMachine.h>
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
#include "util/u_queue.h"
|
||||
#include "si_state.h"
|
||||
|
||||
struct ac_shader_binary;
|
||||
#include "ac_binary.h"
|
||||
#include "si_state.h"
|
||||
|
||||
#define SI_MAX_VS_OUTPUTS 40
|
||||
|
||||
|
@@ -101,6 +101,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
* It would be wrong to think that TCS = TES. */
|
||||
struct si_shader_selector *tcs =
|
||||
sctx->tcs_shader.cso ? sctx->tcs_shader.cso : sctx->tes_shader.cso;
|
||||
unsigned tess_uses_primid = sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id;
|
||||
bool has_primid_instancing_bug = sctx->b.chip_class == SI &&
|
||||
sctx->b.screen->info.max_se == 1;
|
||||
unsigned tes_sh_base = sctx->shader_userdata.sh_base[PIPE_SHADER_TESS_EVAL];
|
||||
unsigned num_tcs_input_cp = info->vertices_per_patch;
|
||||
unsigned num_tcs_output_cp, num_tcs_inputs, num_tcs_outputs;
|
||||
@@ -114,7 +117,9 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
if (sctx->last_ls == ls->current &&
|
||||
sctx->last_tcs == tcs &&
|
||||
sctx->last_tes_sh_base == tes_sh_base &&
|
||||
sctx->last_num_tcs_input_cp == num_tcs_input_cp) {
|
||||
sctx->last_num_tcs_input_cp == num_tcs_input_cp &&
|
||||
(!has_primid_instancing_bug ||
|
||||
(sctx->last_tess_uses_primid == tess_uses_primid))) {
|
||||
*num_patches = sctx->last_num_patches;
|
||||
return;
|
||||
}
|
||||
@@ -123,6 +128,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
sctx->last_tcs = tcs;
|
||||
sctx->last_tes_sh_base = tes_sh_base;
|
||||
sctx->last_num_tcs_input_cp = num_tcs_input_cp;
|
||||
sctx->last_tess_uses_primid = tess_uses_primid;
|
||||
|
||||
/* This calculates how shader inputs and outputs among VS, TCS, and TES
|
||||
* are laid out in LDS. */
|
||||
@@ -155,8 +161,12 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
|
||||
/* Make sure that the data fits in LDS. This assumes the shaders only
|
||||
* use LDS for the inputs and outputs.
|
||||
*
|
||||
* While CIK can use 64K per threadgroup, there is a hang on Stoney
|
||||
* with 2 CUs if we use more than 32K. The closed Vulkan driver also
|
||||
* uses 32K at most on all GCN chips.
|
||||
*/
|
||||
hardware_lds_size = sctx->b.chip_class >= CIK ? 65536 : 32768;
|
||||
hardware_lds_size = 32768;
|
||||
*num_patches = MIN2(*num_patches, hardware_lds_size / (input_patch_size +
|
||||
output_patch_size));
|
||||
|
||||
@@ -174,22 +184,21 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
if (sctx->b.chip_class == SI) {
|
||||
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
|
||||
*num_patches = MIN2(*num_patches, one_wave);
|
||||
|
||||
if (sctx->screen->b.info.max_se == 1) {
|
||||
/* The VGT HS block increments the patch ID unconditionally
|
||||
* within a single threadgroup. This results in incorrect
|
||||
* patch IDs when instanced draws are used.
|
||||
*
|
||||
* The intended solution is to restrict threadgroups to
|
||||
* a single instance by setting SWITCH_ON_EOI, which
|
||||
* should cause IA to split instances up. However, this
|
||||
* doesn't work correctly on SI when there is no other
|
||||
* SE to switch to.
|
||||
*/
|
||||
*num_patches = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* The VGT HS block increments the patch ID unconditionally
|
||||
* within a single threadgroup. This results in incorrect
|
||||
* patch IDs when instanced draws are used.
|
||||
*
|
||||
* The intended solution is to restrict threadgroups to
|
||||
* a single instance by setting SWITCH_ON_EOI, which
|
||||
* should cause IA to split instances up. However, this
|
||||
* doesn't work correctly on SI when there is no other
|
||||
* SE to switch to.
|
||||
*/
|
||||
if (has_primid_instancing_bug)
|
||||
*num_patches = 1;
|
||||
|
||||
sctx->last_num_patches = *num_patches;
|
||||
|
||||
output_patch0_offset = input_patch_size * *num_patches;
|
||||
|
@@ -362,7 +362,21 @@ svga_screen_cache_flush(struct svga_screen *svgascreen,
|
||||
/* It is now safe to invalidate the surface content.
|
||||
* It will be done using the current context.
|
||||
*/
|
||||
svga->swc->surface_invalidate(svga->swc, entry->handle);
|
||||
if (svga->swc->surface_invalidate(svga->swc, entry->handle) != PIPE_OK) {
|
||||
enum pipe_error ret;
|
||||
|
||||
/* Even though surface invalidation here is done after the command
|
||||
* buffer is flushed, it is still possible that it will
|
||||
* fail because there might be just enough of this command that is
|
||||
* filling up the command buffer, so in this case we will call
|
||||
* the winsys flush directly to flush the buffer.
|
||||
* Note, we don't want to call svga_context_flush() here because
|
||||
* this function itself is called inside svga_context_flush().
|
||||
*/
|
||||
svga->swc->flush(svga->swc, NULL);
|
||||
ret = svga->swc->surface_invalidate(svga->swc, entry->handle);
|
||||
assert(ret == PIPE_OK);
|
||||
}
|
||||
|
||||
/* add the entry to the invalidated list */
|
||||
LIST_ADD(&entry->head, &cache->invalidated);
|
||||
|
@@ -190,6 +190,8 @@ emit_hw_gs(struct svga_context *svga, unsigned dirty)
|
||||
* Needs to unbind the geometry shader.
|
||||
*/
|
||||
ret = svga_set_shader(svga, SVGA3D_SHADERTYPE_GS, NULL);
|
||||
if (ret != PIPE_OK)
|
||||
goto done;
|
||||
svga->state.hw_draw.gs = NULL;
|
||||
}
|
||||
goto done;
|
||||
|
@@ -502,10 +502,10 @@ svga_validate_surface_view(struct svga_context *svga, struct svga_surface *s)
|
||||
* need to update the host-side copy with the invalid
|
||||
* content when the associated mob is first bound to the surface.
|
||||
*/
|
||||
ret = SVGA3D_InvalidateGBSurface(svga->swc, stex->handle);
|
||||
if (ret != PIPE_OK) {
|
||||
s = NULL;
|
||||
goto done;
|
||||
if (svga->swc->surface_invalidate(svga->swc, stex->handle) != PIPE_OK) {
|
||||
svga_context_flush(svga, NULL);
|
||||
ret = svga->swc->surface_invalidate(svga->swc, stex->handle);
|
||||
assert(ret == PIPE_OK);
|
||||
}
|
||||
stex->validated = TRUE;
|
||||
}
|
||||
|
@@ -394,7 +394,7 @@ struct svga_winsys_context
|
||||
/**
|
||||
* Invalidate the content of this surface
|
||||
*/
|
||||
void
|
||||
enum pipe_error
|
||||
(*surface_invalidate)(struct svga_winsys_context *swc,
|
||||
struct svga_winsys_surface *surface);
|
||||
|
||||
|
@@ -22,7 +22,7 @@
|
||||
include Makefile.sources
|
||||
include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(SWR_CXX14_CXXFLAGS)
|
||||
AM_CXXFLAGS = $(GALLIUM_DRIVER_CFLAGS) $(SWR_CXX11_CXXFLAGS)
|
||||
|
||||
noinst_LTLIBRARIES = libmesaswr.la
|
||||
|
||||
@@ -32,7 +32,7 @@ COMMON_CXXFLAGS = \
|
||||
-fno-strict-aliasing \
|
||||
$(GALLIUM_DRIVER_CFLAGS) \
|
||||
$(LLVM_CXXFLAGS) \
|
||||
$(SWR_CXX14_CXXFLAGS) \
|
||||
$(SWR_CXX11_CXXFLAGS) \
|
||||
-I$(builddir)/rasterizer/codegen \
|
||||
-I$(builddir)/rasterizer/jitter \
|
||||
-I$(builddir)/rasterizer/archrast \
|
||||
|
@@ -38,7 +38,7 @@ loadersource = env.ParseSourceList('Makefile.sources', [
|
||||
|
||||
if not env['msvc'] :
|
||||
env.Append(CCFLAGS = [
|
||||
'-std=c++14',
|
||||
'-std=c++11',
|
||||
])
|
||||
|
||||
swrroot = '#src/gallium/drivers/swr/'
|
||||
|
@@ -953,26 +953,27 @@ public:
|
||||
|
||||
|
||||
private:
|
||||
template <typename MaskT>
|
||||
INLINE __m128i expandThenBlend4(uint32_t* min, uint32_t* max) // @llvm_func_start
|
||||
{
|
||||
__m128i vMin = _mm_set1_epi32(*min);
|
||||
__m128i vMax = _mm_set1_epi32(*max);
|
||||
return _simd_blend4_epi32<MaskT::value>(vMin, vMax);
|
||||
} // @llvm_func_end
|
||||
|
||||
INLINE void CalcTileSampleOffsets(int numSamples) // @llvm_func_start
|
||||
{
|
||||
auto expandThenBlend4 = [](uint32_t* min, uint32_t* max, auto mask)
|
||||
{
|
||||
__m128i vMin = _mm_set1_epi32(*min);
|
||||
__m128i vMax = _mm_set1_epi32(*max);
|
||||
return _simd_blend4_epi32<decltype(mask)::value>(vMin, vMax);
|
||||
};
|
||||
|
||||
{
|
||||
auto minXi = std::min_element(std::begin(_xi), &_xi[numSamples]);
|
||||
auto maxXi = std::max_element(std::begin(_xi), &_xi[numSamples]);
|
||||
std::integral_constant<int, 0xA> xMask;
|
||||
using xMask = std::integral_constant<int, 0xA>;
|
||||
// BR(max), BL(min), UR(max), UL(min)
|
||||
tileSampleOffsetsX = expandThenBlend4(minXi, maxXi, xMask);
|
||||
|
||||
tileSampleOffsetsX = expandThenBlend4<xMask>(minXi, maxXi);
|
||||
|
||||
auto minYi = std::min_element(std::begin(_yi), &_yi[numSamples]);
|
||||
auto maxYi = std::max_element(std::begin(_yi), &_yi[numSamples]);
|
||||
std::integral_constant<int, 0xC> yMask;
|
||||
using yMask = std::integral_constant<int, 0xC>;
|
||||
// BR(max), BL(min), UR(max), UL(min)
|
||||
tileSampleOffsetsY = expandThenBlend4(minYi, maxYi, yMask);
|
||||
tileSampleOffsetsY = expandThenBlend4<yMask>(minYi, maxYi);
|
||||
}; // @llvm_func_end
|
||||
// scalar sample values
|
||||
uint32_t _xi[SWR_MAX_NUM_MULTISAMPLES];
|
||||
|
@@ -101,6 +101,8 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile profile,
|
||||
if (num_entrypoints == 0)
|
||||
return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
|
||||
|
||||
assert(*num_entrypoints <= ctx->max_entrypoints);
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
|
@@ -169,7 +169,7 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
|
||||
*ctx->vtable = vtable;
|
||||
*ctx->vtable_vpp = vtable_vpp;
|
||||
ctx->max_profiles = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH - PIPE_VIDEO_PROFILE_UNKNOWN;
|
||||
ctx->max_entrypoints = 1;
|
||||
ctx->max_entrypoints = 2;
|
||||
ctx->max_attributes = 1;
|
||||
ctx->max_image_formats = VL_VA_MAX_IMAGE_FORMATS;
|
||||
ctx->max_subpic_formats = 1;
|
||||
|
@@ -54,6 +54,7 @@ lib@GL_LIB@_la_SOURCES = xlib.c
|
||||
lib@GL_LIB@_la_LDFLAGS = \
|
||||
-no-undefined \
|
||||
-version-number $(GL_MAJOR):$(GL_MINOR):$(GL_TINY) \
|
||||
$(BSYMBOLIC) \
|
||||
$(GC_SECTIONS) \
|
||||
$(LD_NO_UNDEFINED)
|
||||
|
||||
|
@@ -747,10 +747,13 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
|
||||
p_atomic_dec(&cs->sparse_buffers[i].bo->num_cs_references);
|
||||
amdgpu_winsys_bo_reference(&cs->sparse_buffers[i].bo, NULL);
|
||||
}
|
||||
for (i = 0; i < cs->num_fence_dependencies; i++)
|
||||
amdgpu_fence_reference(&cs->fence_dependencies[i], NULL);
|
||||
|
||||
cs->num_real_buffers = 0;
|
||||
cs->num_slab_buffers = 0;
|
||||
cs->num_sparse_buffers = 0;
|
||||
cs->num_fence_dependencies = 0;
|
||||
amdgpu_fence_reference(&cs->fence, NULL);
|
||||
|
||||
memset(cs->buffer_indices_hashlist, -1, sizeof(cs->buffer_indices_hashlist));
|
||||
@@ -765,7 +768,7 @@ static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs)
|
||||
FREE(cs->handles);
|
||||
FREE(cs->slab_buffers);
|
||||
FREE(cs->sparse_buffers);
|
||||
FREE(cs->request.dependencies);
|
||||
FREE(cs->fence_dependencies);
|
||||
}
|
||||
|
||||
|
||||
@@ -976,7 +979,6 @@ static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs,
|
||||
{
|
||||
struct amdgpu_cs_context *cs = acs->csc;
|
||||
struct amdgpu_winsys_bo *bo = buffer->bo;
|
||||
struct amdgpu_cs_fence *dep;
|
||||
unsigned new_num_fences = 0;
|
||||
|
||||
for (unsigned j = 0; j < bo->num_fences; ++j) {
|
||||
@@ -998,21 +1000,21 @@ static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs,
|
||||
if (!(buffer->usage & RADEON_USAGE_SYNCHRONIZED))
|
||||
continue;
|
||||
|
||||
if (bo_fence->submission_in_progress)
|
||||
os_wait_until_zero(&bo_fence->submission_in_progress,
|
||||
PIPE_TIMEOUT_INFINITE);
|
||||
|
||||
idx = cs->request.number_of_dependencies++;
|
||||
if (idx >= cs->max_dependencies) {
|
||||
idx = cs->num_fence_dependencies++;
|
||||
if (idx >= cs->max_fence_dependencies) {
|
||||
unsigned size;
|
||||
const unsigned increment = 8;
|
||||
|
||||
cs->max_dependencies = idx + 8;
|
||||
size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence);
|
||||
cs->request.dependencies = realloc(cs->request.dependencies, size);
|
||||
cs->max_fence_dependencies = idx + increment;
|
||||
size = cs->max_fence_dependencies * sizeof(cs->fence_dependencies[0]);
|
||||
cs->fence_dependencies = realloc(cs->fence_dependencies, size);
|
||||
/* Clear the newly-allocated elements. */
|
||||
memset(cs->fence_dependencies + idx, 0,
|
||||
increment * sizeof(cs->fence_dependencies[0]));
|
||||
}
|
||||
|
||||
dep = &cs->request.dependencies[idx];
|
||||
memcpy(dep, &bo_fence->fence, sizeof(*dep));
|
||||
amdgpu_fence_reference(&cs->fence_dependencies[idx],
|
||||
(struct pipe_fence_handle*)bo_fence);
|
||||
}
|
||||
|
||||
for (unsigned j = new_num_fences; j < bo->num_fences; ++j)
|
||||
@@ -1083,7 +1085,7 @@ static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs)
|
||||
{
|
||||
struct amdgpu_cs_context *cs = acs->csc;
|
||||
|
||||
cs->request.number_of_dependencies = 0;
|
||||
cs->num_fence_dependencies = 0;
|
||||
|
||||
amdgpu_add_fence_dependencies_list(acs, cs->fence, cs->num_real_buffers, cs->real_buffers);
|
||||
amdgpu_add_fence_dependencies_list(acs, cs->fence, cs->num_slab_buffers, cs->slab_buffers);
|
||||
@@ -1131,7 +1133,30 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
|
||||
struct amdgpu_winsys *ws = acs->ctx->ws;
|
||||
struct amdgpu_cs_context *cs = acs->cst;
|
||||
int i, r;
|
||||
struct amdgpu_cs_fence *dependencies = NULL;
|
||||
|
||||
/* Set dependencies (input fences). */
|
||||
if (cs->num_fence_dependencies) {
|
||||
dependencies = alloca(sizeof(dependencies[0]) *
|
||||
cs->num_fence_dependencies);
|
||||
unsigned num = 0;
|
||||
|
||||
for (i = 0; i < cs->num_fence_dependencies; i++) {
|
||||
struct amdgpu_fence *fence =
|
||||
(struct amdgpu_fence*)cs->fence_dependencies[i];
|
||||
|
||||
/* Past fences can't be unsubmitted because we have only 1 CS thread. */
|
||||
assert(!fence->submission_in_progress);
|
||||
memcpy(&dependencies[num++], &fence->fence, sizeof(dependencies[0]));
|
||||
}
|
||||
cs->request.dependencies = dependencies;
|
||||
cs->request.number_of_dependencies = num;
|
||||
} else {
|
||||
cs->request.dependencies = NULL;
|
||||
cs->request.number_of_dependencies = 0;
|
||||
}
|
||||
|
||||
/* Set the output fence. */
|
||||
cs->request.fence_info.handle = NULL;
|
||||
if (amdgpu_cs_has_user_fence(cs)) {
|
||||
cs->request.fence_info.handle = acs->ctx->user_fence_bo;
|
||||
|
@@ -105,7 +105,9 @@ struct amdgpu_cs_context {
|
||||
unsigned last_added_bo_usage;
|
||||
uint64_t last_added_bo_priority_usage;
|
||||
|
||||
unsigned max_dependencies;
|
||||
struct pipe_fence_handle **fence_dependencies;
|
||||
unsigned num_fence_dependencies;
|
||||
unsigned max_fence_dependencies;
|
||||
|
||||
struct pipe_fence_handle *fence;
|
||||
|
||||
|
@@ -176,7 +176,7 @@ vmw_svga_winsys_surface_unmap(struct svga_winsys_context *swc,
|
||||
mtx_unlock(&vsrf->mutex);
|
||||
}
|
||||
|
||||
void
|
||||
enum pipe_error
|
||||
vmw_svga_winsys_surface_invalidate(struct svga_winsys_context *swc,
|
||||
struct svga_winsys_surface *surf)
|
||||
{
|
||||
@@ -186,6 +186,7 @@ vmw_svga_winsys_surface_invalidate(struct svga_winsys_context *swc,
|
||||
* when guest-backed surface is enabled, that implies DMA is always enabled;
|
||||
* hence, surface invalidation is not needed.
|
||||
*/
|
||||
return PIPE_OK;
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -94,7 +94,7 @@ void
|
||||
vmw_svga_winsys_surface_unmap(struct svga_winsys_context *swc,
|
||||
struct svga_winsys_surface *srf,
|
||||
boolean *rebind);
|
||||
void
|
||||
enum pipe_error
|
||||
vmw_svga_winsys_surface_invalidate(struct svga_winsys_context *swc,
|
||||
struct svga_winsys_surface *srf);
|
||||
|
||||
|
@@ -235,6 +235,11 @@ dri3_bind_context(struct glx_context *context, struct glx_context *old,
|
||||
if (!(*psc->core->bindContext) (pcp->driContext, dri_draw, dri_read))
|
||||
return GLXBadContext;
|
||||
|
||||
if (dri_draw)
|
||||
(*psc->f->invalidate)(dri_draw);
|
||||
if (dri_read && dri_read != dri_draw)
|
||||
(*psc->f->invalidate)(dri_read);
|
||||
|
||||
return Success;
|
||||
}
|
||||
|
||||
@@ -493,6 +498,7 @@ dri3_flush_front_buffer(__DRIdrawable *driDrawable, void *loaderPrivate)
|
||||
|
||||
loader_dri3_flush(draw, __DRI2_FLUSH_DRAWABLE, __DRI2_THROTTLE_FLUSHFRONT);
|
||||
|
||||
(*psc->f->invalidate)(driDrawable);
|
||||
loader_dri3_wait_gl(draw);
|
||||
}
|
||||
|
||||
|
@@ -132,6 +132,7 @@ static const struct gen_device_info gen_device_info_snb_gt2 = {
|
||||
static const struct gen_device_info gen_device_info_ivb_gt1 = {
|
||||
GEN7_FEATURES, .is_ivybridge = true, .gt = 1,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 2,
|
||||
.max_vs_threads = 36,
|
||||
.max_tcs_threads = 36,
|
||||
.max_tes_threads = 36,
|
||||
@@ -156,6 +157,7 @@ static const struct gen_device_info gen_device_info_ivb_gt1 = {
|
||||
static const struct gen_device_info gen_device_info_ivb_gt2 = {
|
||||
GEN7_FEATURES, .is_ivybridge = true, .gt = 2,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 4,
|
||||
.max_vs_threads = 128,
|
||||
.max_tcs_threads = 128,
|
||||
.max_tes_threads = 128,
|
||||
@@ -180,6 +182,7 @@ static const struct gen_device_info gen_device_info_ivb_gt2 = {
|
||||
static const struct gen_device_info gen_device_info_byt = {
|
||||
GEN7_FEATURES, .is_baytrail = true, .gt = 1,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 1,
|
||||
.has_llc = false,
|
||||
.max_vs_threads = 36,
|
||||
.max_tcs_threads = 36,
|
||||
@@ -211,6 +214,7 @@ static const struct gen_device_info gen_device_info_byt = {
|
||||
static const struct gen_device_info gen_device_info_hsw_gt1 = {
|
||||
HSW_FEATURES, .gt = 1,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 2,
|
||||
.max_vs_threads = 70,
|
||||
.max_tcs_threads = 70,
|
||||
.max_tes_threads = 70,
|
||||
@@ -235,6 +239,7 @@ static const struct gen_device_info gen_device_info_hsw_gt1 = {
|
||||
static const struct gen_device_info gen_device_info_hsw_gt2 = {
|
||||
HSW_FEATURES, .gt = 2,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 4,
|
||||
.max_vs_threads = 280,
|
||||
.max_tcs_threads = 256,
|
||||
.max_tes_threads = 280,
|
||||
@@ -259,6 +264,7 @@ static const struct gen_device_info gen_device_info_hsw_gt2 = {
|
||||
static const struct gen_device_info gen_device_info_hsw_gt3 = {
|
||||
HSW_FEATURES, .gt = 3,
|
||||
.num_slices = 2,
|
||||
.l3_banks = 8,
|
||||
.max_vs_threads = 280,
|
||||
.max_tcs_threads = 256,
|
||||
.max_tes_threads = 280,
|
||||
@@ -299,6 +305,7 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = {
|
||||
static const struct gen_device_info gen_device_info_bdw_gt1 = {
|
||||
GEN8_FEATURES, .gt = 1,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 2,
|
||||
.max_cs_threads = 42,
|
||||
.urb = {
|
||||
.size = 192,
|
||||
@@ -318,6 +325,7 @@ static const struct gen_device_info gen_device_info_bdw_gt1 = {
|
||||
static const struct gen_device_info gen_device_info_bdw_gt2 = {
|
||||
GEN8_FEATURES, .gt = 2,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 4,
|
||||
.max_cs_threads = 56,
|
||||
.urb = {
|
||||
.size = 384,
|
||||
@@ -337,6 +345,7 @@ static const struct gen_device_info gen_device_info_bdw_gt2 = {
|
||||
static const struct gen_device_info gen_device_info_bdw_gt3 = {
|
||||
GEN8_FEATURES, .gt = 3,
|
||||
.num_slices = 2,
|
||||
.l3_banks = 8,
|
||||
.max_cs_threads = 56,
|
||||
.urb = {
|
||||
.size = 384,
|
||||
@@ -357,6 +366,7 @@ static const struct gen_device_info gen_device_info_chv = {
|
||||
GEN8_FEATURES, .is_cherryview = 1, .gt = 1,
|
||||
.has_llc = false,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 2,
|
||||
.max_vs_threads = 80,
|
||||
.max_tcs_threads = 80,
|
||||
.max_tes_threads = 80,
|
||||
@@ -457,22 +467,26 @@ static const struct gen_device_info gen_device_info_chv = {
|
||||
static const struct gen_device_info gen_device_info_skl_gt1 = {
|
||||
GEN9_FEATURES, .gt = 1,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 2,
|
||||
.urb.size = 192,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_skl_gt2 = {
|
||||
GEN9_FEATURES, .gt = 2,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 4,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_skl_gt3 = {
|
||||
GEN9_FEATURES, .gt = 3,
|
||||
.num_slices = 2,
|
||||
.l3_banks = 8,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_skl_gt4 = {
|
||||
GEN9_FEATURES, .gt = 4,
|
||||
.num_slices = 3,
|
||||
.l3_banks = 12,
|
||||
/* From the "L3 Allocation and Programming" documentation:
|
||||
*
|
||||
* "URB is limited to 1008KB due to programming restrictions. This is not a
|
||||
@@ -485,11 +499,13 @@ static const struct gen_device_info gen_device_info_skl_gt4 = {
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_bxt = {
|
||||
GEN9_LP_FEATURES
|
||||
GEN9_LP_FEATURES,
|
||||
.l3_banks = 2,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_bxt_2x6 = {
|
||||
GEN9_LP_FEATURES_2X6
|
||||
GEN9_LP_FEATURES_2X6,
|
||||
.l3_banks = 1,
|
||||
};
|
||||
/*
|
||||
* Note: for all KBL SKUs, the PRM says SKL for GS entries, not SKL+.
|
||||
@@ -504,6 +520,7 @@ static const struct gen_device_info gen_device_info_kbl_gt1 = {
|
||||
.max_cs_threads = 7 * 6,
|
||||
.urb.size = 192,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 2,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
|
||||
@@ -513,6 +530,7 @@ static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
|
||||
|
||||
.max_cs_threads = 7 * 6,
|
||||
.num_slices = 1,
|
||||
.l3_banks = 4,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_kbl_gt2 = {
|
||||
@@ -521,6 +539,7 @@ static const struct gen_device_info gen_device_info_kbl_gt2 = {
|
||||
.gt = 2,
|
||||
|
||||
.num_slices = 1,
|
||||
.l3_banks = 4,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_kbl_gt3 = {
|
||||
@@ -529,6 +548,7 @@ static const struct gen_device_info gen_device_info_kbl_gt3 = {
|
||||
.gt = 3,
|
||||
|
||||
.num_slices = 2,
|
||||
.l3_banks = 8,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_kbl_gt4 = {
|
||||
@@ -548,12 +568,15 @@ static const struct gen_device_info gen_device_info_kbl_gt4 = {
|
||||
*/
|
||||
.urb.size = 1008 / 3,
|
||||
.num_slices = 3,
|
||||
.l3_banks = 12,
|
||||
};
|
||||
|
||||
static const struct gen_device_info gen_device_info_glk = {
|
||||
GEN9_LP_FEATURES
|
||||
GEN9_LP_FEATURES,
|
||||
.l3_banks = 2,
|
||||
};
|
||||
|
||||
/*TODO: Initialize l3_banks when we know the number. */
|
||||
static const struct gen_device_info gen_device_info_glk_2x6 = {
|
||||
GEN9_LP_FEATURES_2X6
|
||||
};
|
||||
|
@@ -96,6 +96,7 @@ struct gen_device_info
|
||||
* to change, so we program @max_cs_threads as the lower maximum.
|
||||
*/
|
||||
unsigned num_slices;
|
||||
unsigned l3_banks;
|
||||
unsigned max_vs_threads; /**< Maximum Vertex Shader threads */
|
||||
unsigned max_tcs_threads; /**< Maximum Hull Shader threads */
|
||||
unsigned max_tes_threads; /**< Maximum Domain Shader threads */
|
||||
|
@@ -101,6 +101,20 @@ static const struct gen_l3_config chv_l3_configs[] = {
|
||||
{{ 0 }}
|
||||
};
|
||||
|
||||
/**
|
||||
* BXT 2x6 validated L3 configurations. \sa ivb_l3_configs.
|
||||
*/
|
||||
static const struct gen_l3_config bxt_2x6_l3_configs[] = {
|
||||
/* SLM URB ALL DC RO IS C T */
|
||||
{{ 0, 32, 48, 0, 0, 0, 0, 0 }},
|
||||
{{ 0, 32, 0, 8, 40, 0, 0, 0 }},
|
||||
{{ 0, 32, 0, 32, 16, 0, 0, 0 }},
|
||||
{{ 16, 16, 48, 0, 0, 0, 0, 0 }},
|
||||
{{ 16, 16, 0, 40, 8, 0, 0, 0 }},
|
||||
{{ 16, 16, 0, 16, 32, 0, 0, 0 }},
|
||||
{{ 0 }}
|
||||
};
|
||||
|
||||
/**
|
||||
* Return a zero-terminated array of validated L3 configurations for the
|
||||
* specified device.
|
||||
@@ -116,6 +130,8 @@ get_l3_configs(const struct gen_device_info *devinfo)
|
||||
return (devinfo->is_cherryview ? chv_l3_configs : bdw_l3_configs);
|
||||
|
||||
case 9:
|
||||
if (devinfo->l3_banks == 1)
|
||||
return bxt_2x6_l3_configs;
|
||||
return chv_l3_configs;
|
||||
|
||||
default:
|
||||
|
@@ -822,7 +822,7 @@ genX(cmd_buffer_config_l3)(struct anv_cmd_buffer *cmd_buffer,
|
||||
anv_pack_struct(&l3cr2, GENX(L3CNTLREG2),
|
||||
.SLMEnable = has_slm,
|
||||
.URBLowBandwidth = urb_low_bw,
|
||||
.URBAllocation = cfg->n[GEN_L3P_URB],
|
||||
.URBAllocation = cfg->n[GEN_L3P_URB] - n0_urb,
|
||||
#if !GEN_IS_HASWELL
|
||||
.ALLAllocation = cfg->n[GEN_L3P_ALL],
|
||||
#endif
|
||||
|
@@ -245,3 +245,6 @@ es2api/glapi_mapi_tmp.h: glapi/gen/gl_and_es_API.xml $(glapi_gen_mapi_deps)
|
||||
$(srcdir)/glapi/gen/gl_and_es_API.xml > $@
|
||||
|
||||
include $(top_srcdir)/install-lib-links.mk
|
||||
|
||||
khrdir = $(includedir)/KHR
|
||||
khr_HEADERS = $(top_srcdir)/include/KHR/khrplatform.h
|
||||
|
@@ -79,12 +79,13 @@
|
||||
#define I915_CTXREG_STATE4 0
|
||||
#define I915_CTXREG_LI 1
|
||||
#define I915_CTXREG_LIS2 2
|
||||
#define I915_CTXREG_LIS4 3
|
||||
#define I915_CTXREG_LIS5 4
|
||||
#define I915_CTXREG_LIS6 5
|
||||
#define I915_CTXREG_BF_STENCIL_OPS 6
|
||||
#define I915_CTXREG_BF_STENCIL_MASKS 7
|
||||
#define I915_CTX_SETUP_SIZE 8
|
||||
#define I915_CTXREG_LIS3 3
|
||||
#define I915_CTXREG_LIS4 4
|
||||
#define I915_CTXREG_LIS5 5
|
||||
#define I915_CTXREG_LIS6 6
|
||||
#define I915_CTXREG_BF_STENCIL_OPS 7
|
||||
#define I915_CTXREG_BF_STENCIL_MASKS 8
|
||||
#define I915_CTX_SETUP_SIZE 9
|
||||
|
||||
#define I915_BLENDREG_IAB 0
|
||||
#define I915_BLENDREG_BLENDCOLOR0 1
|
||||
@@ -116,6 +117,7 @@ enum {
|
||||
};
|
||||
|
||||
#define I915_TEX_UNITS 8
|
||||
#define I915_WPOS_TEX_INVALID 0xff
|
||||
|
||||
#define I915_MAX_CONSTANT 32
|
||||
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
|
||||
|
@@ -1063,7 +1063,7 @@ check_wpos(struct i915_fragment_program *p)
|
||||
GLint i;
|
||||
unsigned unit = 0;
|
||||
|
||||
p->wpos_tex = -1;
|
||||
p->wpos_tex = I915_WPOS_TEX_INVALID;
|
||||
|
||||
if ((inputs & VARYING_BIT_POS) == 0)
|
||||
return;
|
||||
@@ -1238,6 +1238,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
||||
const GLbitfield64 inputsRead = p->FragProg.info.inputs_read;
|
||||
GLuint s4 = i915->state.Ctx[I915_CTXREG_LIS4] & ~S4_VFMT_MASK;
|
||||
GLuint s2 = S2_TEXCOORD_NONE;
|
||||
GLuint s3 = 0;
|
||||
int i, offset = 0;
|
||||
|
||||
/* Important:
|
||||
@@ -1252,12 +1253,10 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
||||
intel->coloroffset = 0;
|
||||
intel->specoffset = 0;
|
||||
|
||||
if (inputsRead & VARYING_BITS_TEX_ANY || p->wpos_tex != -1) {
|
||||
EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
|
||||
}
|
||||
else {
|
||||
EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_3F_VIEWPORT, S4_VFMT_XYZ, 12);
|
||||
}
|
||||
/* Always emit W to get consistent perspective
|
||||
* correct interpolation of primary/secondary colors.
|
||||
*/
|
||||
EMIT_ATTR(_TNL_ATTRIB_POS, EMIT_4F_VIEWPORT, S4_VFMT_XYZW, 16);
|
||||
|
||||
/* Handle gl_PointSize builtin var here */
|
||||
if (ctx->Point._Attenuated || ctx->VertexProgram.PointSizeEnabled)
|
||||
@@ -1303,6 +1302,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
||||
*/
|
||||
s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(wpos_size));
|
||||
s3 |= S3_TEXCOORD_PERSPECTIVE_DISABLE(i);
|
||||
|
||||
intel->wpos_offset = offset;
|
||||
EMIT_PAD(wpos_size);
|
||||
@@ -1310,6 +1310,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
||||
}
|
||||
|
||||
if (s2 != i915->state.Ctx[I915_CTXREG_LIS2] ||
|
||||
s3 != i915->state.Ctx[I915_CTXREG_LIS3] ||
|
||||
s4 != i915->state.Ctx[I915_CTXREG_LIS4]) {
|
||||
I915_STATECHANGE(i915, I915_UPLOAD_CTX);
|
||||
|
||||
@@ -1328,6 +1329,7 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
||||
intel->vertex_size >>= 2;
|
||||
|
||||
i915->state.Ctx[I915_CTXREG_LIS2] = s2;
|
||||
i915->state.Ctx[I915_CTXREG_LIS3] = s3;
|
||||
i915->state.Ctx[I915_CTXREG_LIS4] = s4;
|
||||
|
||||
assert(intel->vtbl.check_vertex_size(intel, intel->vertex_size));
|
||||
|
@@ -482,7 +482,7 @@ i915_init_program(struct i915_context *i915, struct i915_fragment_program *p)
|
||||
p->decl_t = 0;
|
||||
p->temp_flag = 0xffff000;
|
||||
p->utemp_flag = ~0x7;
|
||||
p->wpos_tex = -1;
|
||||
p->wpos_tex = I915_WPOS_TEX_INVALID;
|
||||
p->depth_written = 0;
|
||||
p->nr_params = 0;
|
||||
|
||||
|
@@ -925,11 +925,12 @@ i915_init_packets(struct i915_context *i915)
|
||||
* piece changes.
|
||||
*/
|
||||
i915->state.Ctx[I915_CTXREG_LI] = (_3DSTATE_LOAD_STATE_IMMEDIATE_1 |
|
||||
I1_LOAD_S(2) |
|
||||
I1_LOAD_S(4) |
|
||||
I1_LOAD_S(5) | I1_LOAD_S(6) | (3));
|
||||
I1_LOAD_S(2) | I1_LOAD_S(3) |
|
||||
I1_LOAD_S(4) | I1_LOAD_S(5) |
|
||||
I1_LOAD_S(6) | (4));
|
||||
i915->state.Ctx[I915_CTXREG_LIS2] = 0;
|
||||
i915->state.Ctx[I915_CTXREG_LIS4] = 0;
|
||||
i915->state.Ctx[I915_CTXREG_LIS3] = 0;
|
||||
i915->state.Ctx[I915_CTXREG_LIS5] = 0;
|
||||
|
||||
if (i915->intel.ctx.Visual.rgbBits == 16)
|
||||
|
@@ -176,7 +176,7 @@ i915_emit_invarient_state(struct intel_context *intel)
|
||||
{
|
||||
BATCH_LOCALS;
|
||||
|
||||
BEGIN_BATCH(17);
|
||||
BEGIN_BATCH(15);
|
||||
|
||||
OUT_BATCH(_3DSTATE_AA_CMD |
|
||||
AA_LINE_ECAAR_WIDTH_ENABLE |
|
||||
@@ -200,11 +200,6 @@ i915_emit_invarient_state(struct intel_context *intel)
|
||||
CSB_TCB(3, 3) |
|
||||
CSB_TCB(4, 4) | CSB_TCB(5, 5) | CSB_TCB(6, 6) | CSB_TCB(7, 7));
|
||||
|
||||
/* Need to initialize this to zero.
|
||||
*/
|
||||
OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | (0));
|
||||
OUT_BATCH(0);
|
||||
|
||||
OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
|
@@ -93,7 +93,11 @@
|
||||
#define S2_TEX_COUNT_SHIFT_830 12
|
||||
#define S2_VERTEX_1_WIDTH_SHIFT_830 0
|
||||
#define S2_VERTEX_0_WIDTH_SHIFT_830 6
|
||||
/* S3 not interesting */
|
||||
|
||||
#define S3_TEXCOORD_WRAP_SHORTEST_TCX(unit) (1<<((unit)*4+3))
|
||||
#define S3_TEXCOORD_WRAP_SHORTEST_TCY(unit) (1<<((unit)*4+2))
|
||||
#define S3_TEXCOORD_WRAP_SHORTEST_TCZ(unit) (1<<((unit)*4+1))
|
||||
#define S3_TEXCOORD_PERSPECTIVE_DISABLE(unit) (1<<((unit)*4+0))
|
||||
|
||||
#define S4_POINT_WIDTH_SHIFT 23
|
||||
#define S4_POINT_WIDTH_MASK (0x1ff<<23)
|
||||
|
@@ -887,9 +887,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
* and again afterwards to ensure that the resolve is complete before we
|
||||
* do any more regular drawing.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
struct blorp_batch batch;
|
||||
blorp_batch_init(&brw->blorp, &batch, brw, 0);
|
||||
@@ -899,9 +897,7 @@ do_single_blorp_clear(struct brw_context *brw, struct gl_framebuffer *fb,
|
||||
x0, y0, x1, y1);
|
||||
blorp_batch_finish(&batch);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
/* Now that the fast clear has occurred, put the buffer in
|
||||
* INTEL_FAST_CLEAR_STATE_CLEAR so that we won't waste time doing
|
||||
@@ -1001,9 +997,7 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
* and again afterwards to ensure that the resolve is complete before we
|
||||
* do any more regular drawing.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
|
||||
|
||||
struct blorp_batch batch;
|
||||
@@ -1014,9 +1008,7 @@ brw_blorp_resolve_color(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
blorp_batch_finish(&batch);
|
||||
|
||||
/* See comment above */
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_RENDER_TARGET_FLUSH);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1047,7 +1039,8 @@ gen6_blorp_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
*/
|
||||
void
|
||||
intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
unsigned int level, unsigned int layer, enum blorp_hiz_op op)
|
||||
unsigned int level, unsigned int start_layer,
|
||||
unsigned int num_layers, enum blorp_hiz_op op)
|
||||
{
|
||||
const char *opname = NULL;
|
||||
|
||||
@@ -1066,12 +1059,85 @@ intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
break;
|
||||
}
|
||||
|
||||
DBG("%s %s to mt %p level %d layer %d\n",
|
||||
__func__, opname, mt, level, layer);
|
||||
DBG("%s %s to mt %p level %d layers %d-%d\n",
|
||||
__func__, opname, mt, level, start_layer, start_layer + num_layers - 1);
|
||||
|
||||
/* The following stalls and flushes are only documented to be required for
|
||||
* HiZ clear operations. However, they also seem to be required for the
|
||||
* HiZ resolve operation which is basically the same as a fast clear only a
|
||||
* different value is written into the HiZ surface.
|
||||
*/
|
||||
if (op == BLORP_HIZ_OP_DEPTH_CLEAR || op == BLORP_HIZ_OP_HIZ_RESOLVE) {
|
||||
if (brw->gen == 6) {
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 313:
|
||||
*
|
||||
* "If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with write cache flush enabled and Z-inhibit
|
||||
* disabled must be issued before the rectangle primitive used for
|
||||
* the depth buffer clear operation.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
} else if (brw->gen >= 7) {
|
||||
/*
|
||||
* From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
|
||||
*
|
||||
* If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
|
||||
* enabled must be issued before the rectangle primitive used for
|
||||
* the depth buffer clear operation.
|
||||
*
|
||||
* Same applies for Gen8 and Gen9.
|
||||
*
|
||||
* In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
|
||||
* PIPE_CONTROL, Depth Cache Flush Enable:
|
||||
*
|
||||
* This bit must not be set when Depth Stall Enable bit is set in
|
||||
* this packet.
|
||||
*
|
||||
* This is confirmed to hold for real, HSW gets immediate gpu hangs.
|
||||
*
|
||||
* Therefore issue two pipe control flushes, one for cache flush and
|
||||
* another for depth stall.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
|
||||
}
|
||||
}
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
gen8_hiz_exec(brw, mt, level, layer, op);
|
||||
for (unsigned a = 0; a < num_layers; a++)
|
||||
gen8_hiz_exec(brw, mt, level, start_layer + a, op);
|
||||
} else {
|
||||
gen6_blorp_hiz_exec(brw, mt, level, layer, op);
|
||||
for (unsigned a = 0; a < num_layers; a++)
|
||||
gen6_blorp_hiz_exec(brw, mt, level, start_layer + a, op);
|
||||
}
|
||||
|
||||
|
||||
/* The following stalls and flushes are only documented to be required for
|
||||
* HiZ clear operations. However, they also seem to be required for the
|
||||
* HiZ resolve operation which is basically the same as a fast clear only a
|
||||
* different value is written into the HiZ surface.
|
||||
*/
|
||||
if (op == BLORP_HIZ_OP_DEPTH_CLEAR || op == BLORP_HIZ_OP_HIZ_RESOLVE) {
|
||||
if (brw->gen == 6) {
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
|
||||
*
|
||||
* "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
|
||||
* followed by a PIPE_CONTROL command with DEPTH_STALL bit set
|
||||
* and Then followed by Depth FLUSH'
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -70,7 +70,8 @@ brw_blorp_resolve_color(struct brw_context *brw,
|
||||
|
||||
void
|
||||
intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
unsigned int level, unsigned int layer, enum blorp_hiz_op op);
|
||||
unsigned int level, unsigned int start_layer,
|
||||
unsigned int num_layers, enum blorp_hiz_op op);
|
||||
|
||||
void gen6_blorp_exec(struct blorp_batch *batch,
|
||||
const struct blorp_params *params);
|
||||
|
@@ -175,71 +175,13 @@ brw_fast_clear_depth(struct gl_context *ctx)
|
||||
mt->depth_clear_value = depth_clear_value;
|
||||
}
|
||||
|
||||
if (brw->gen == 6) {
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 313:
|
||||
*
|
||||
* "If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with write cache flush enabled and Z-inhibit disabled
|
||||
* must be issued before the rectangle primitive used for the depth
|
||||
* buffer clear operation.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
} else if (brw->gen >= 7) {
|
||||
/*
|
||||
* From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
|
||||
*
|
||||
* If other rendering operations have preceded this clear, a
|
||||
* PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
|
||||
* enabled must be issued before the rectangle primitive used for the
|
||||
* depth buffer clear operation.
|
||||
*
|
||||
* Same applies for Gen8 and Gen9.
|
||||
*
|
||||
* In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 PIPE_CONTROL,
|
||||
* Depth Cache Flush Enable:
|
||||
*
|
||||
* This bit must not be set when Depth Stall Enable bit is set in
|
||||
* this packet.
|
||||
*
|
||||
* This is confirmed to hold for real, HSW gets immediate gpu hangs.
|
||||
*
|
||||
* Therefore issue two pipe control flushes, one for cache flush and
|
||||
* another for depth stall.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw, PIPE_CONTROL_DEPTH_STALL);
|
||||
}
|
||||
|
||||
if (fb->MaxNumLayers > 0) {
|
||||
for (unsigned layer = 0; layer < depth_irb->layer_count; layer++) {
|
||||
intel_hiz_exec(brw, mt, depth_irb->mt_level,
|
||||
depth_irb->mt_layer + layer,
|
||||
BLORP_HIZ_OP_DEPTH_CLEAR);
|
||||
}
|
||||
} else {
|
||||
intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer,
|
||||
intel_hiz_exec(brw, mt, depth_irb->mt_level,
|
||||
depth_irb->mt_layer, depth_irb->layer_count,
|
||||
BLORP_HIZ_OP_DEPTH_CLEAR);
|
||||
} else {
|
||||
intel_hiz_exec(brw, mt, depth_irb->mt_level, depth_irb->mt_layer, 1,
|
||||
BLORP_HIZ_OP_DEPTH_CLEAR);
|
||||
}
|
||||
|
||||
if (brw->gen == 6) {
|
||||
/* From the Sandy Bridge PRM, volume 2 part 1, page 314:
|
||||
*
|
||||
* "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be followed
|
||||
* by a PIPE_CONTROL command with DEPTH_STALL bit set and Then
|
||||
* followed by Depth FLUSH'
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_STALL);
|
||||
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
/* Now, the HiZ buffer contains data that needs to be resolved to the depth
|
||||
|
@@ -555,7 +555,7 @@ brw_initialize_context_constants(struct brw_context *brw)
|
||||
ctx->Const.Max3DTextureLevels = 12; /* 2048 */
|
||||
ctx->Const.MaxArrayTextureLayers = brw->gen >= 7 ? 2048 : 512;
|
||||
ctx->Const.MaxTextureMbytes = 1536;
|
||||
ctx->Const.MaxTextureRectSize = 1 << 12;
|
||||
ctx->Const.MaxTextureRectSize = brw->gen >= 7 ? 16384 : 8192;
|
||||
ctx->Const.MaxTextureMaxAnisotropy = 16.0;
|
||||
ctx->Const.MaxTextureLodBias = 15.0;
|
||||
ctx->Const.StripTextureBorder = true;
|
||||
|
@@ -1700,7 +1700,8 @@ void brw_fini_pipe_control(struct brw_context *brw);
|
||||
void brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags);
|
||||
void brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint32_t imm_lower, uint32_t imm_upper);
|
||||
uint64_t imm);
|
||||
void brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags);
|
||||
void brw_emit_mi_flush(struct brw_context *brw);
|
||||
void brw_emit_post_sync_nonzero_flush(struct brw_context *brw);
|
||||
void brw_emit_depth_stall_flushes(struct brw_context *brw);
|
||||
|
@@ -664,15 +664,16 @@ brw_prepare_vertices(struct brw_context *brw)
|
||||
ptr = glarray->Ptr;
|
||||
}
|
||||
else if (interleaved != glarray->StrideB ||
|
||||
glarray->InstanceDivisor != 0 ||
|
||||
glarray->Ptr < ptr ||
|
||||
(uintptr_t)(glarray->Ptr - ptr) + glarray->_ElementSize > interleaved)
|
||||
{
|
||||
/* If our stride is different from the first attribute's stride,
|
||||
* or if the first attribute's stride didn't cover our element,
|
||||
* disable the interleaved upload optimization. The second case
|
||||
* can most commonly occur in cases where there is a single vertex
|
||||
* and, for example, the data is stored on the application's
|
||||
* stack.
|
||||
* or if we are using an instance divisor or if the first
|
||||
* attribute's stride didn't cover our element, disable the
|
||||
* interleaved upload optimization. The second case can most
|
||||
* commonly occur in cases where there is a single vertex and, for
|
||||
* example, the data is stored on the application's stack.
|
||||
*
|
||||
* NOTE: This will also disable the optimization in cases where
|
||||
* the data is in a different order than the array indices.
|
||||
@@ -727,6 +728,7 @@ brw_prepare_vertices(struct brw_context *brw)
|
||||
buffer, interleaved);
|
||||
buffer->offset -= delta * interleaved;
|
||||
buffer->size += delta * interleaved;
|
||||
buffer->step_rate = 0;
|
||||
|
||||
for (i = 0; i < nr_uploads; i++) {
|
||||
/* Then, just point upload[i] at upload[0]'s buffer. */
|
||||
|
@@ -357,6 +357,46 @@ brw_meta_convert_fast_clear_color(const struct brw_context *brw,
|
||||
break;
|
||||
}
|
||||
|
||||
switch (_mesa_get_format_datatype(mt->format)) {
|
||||
case GL_UNSIGNED_NORMALIZED:
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f[i] = CLAMP(override_color.f[i], 0.0f, 1.0f);
|
||||
break;
|
||||
|
||||
case GL_SIGNED_NORMALIZED:
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f[i] = CLAMP(override_color.f[i], -1.0f, 1.0f);
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_INT:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
|
||||
if (bits < 32) {
|
||||
uint32_t max = (1u << bits) - 1;
|
||||
override_color.ui[i] = MIN2(override_color.ui[i], max);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_INT:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
unsigned bits = _mesa_get_format_bits(mt->format, GL_RED_BITS + i);
|
||||
if (bits < 32) {
|
||||
int32_t max = (1 << (bits - 1)) - 1;
|
||||
int32_t min = -(1 << (bits - 1));
|
||||
override_color.i[i] = CLAMP(override_color.i[i], min, max);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_FLOAT:
|
||||
if (!_mesa_is_format_signed(mt->format)) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
override_color.f[i] = MAX2(override_color.f[i], 0.0f);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (!_mesa_format_has_color_component(mt->format, 3)) {
|
||||
if (_mesa_is_format_integer_color(mt->format))
|
||||
override_color.ui[3] = 1;
|
||||
|
@@ -457,6 +457,12 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw,
|
||||
brw->depthstencil.stencil_offset =
|
||||
(stencil_draw_y & ~tile_mask_y) * stencil_mt->pitch +
|
||||
(stencil_draw_x & ~tile_mask_x) * 64;
|
||||
} else if (!depth_irb) {
|
||||
brw->depthstencil.depth_offset =
|
||||
intel_miptree_get_aligned_offset(
|
||||
stencil_mt,
|
||||
stencil_irb->draw_x & ~tile_mask_x,
|
||||
stencil_irb->draw_y & ~tile_mask_y);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -993,6 +999,37 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
* maybe this isn't required for us in particular.
|
||||
*/
|
||||
|
||||
if (brw->gen >= 6) {
|
||||
const unsigned dc_flush =
|
||||
brw->gen >= 7 ? PIPE_CONTROL_DATA_CACHE_FLUSH : 0;
|
||||
|
||||
/* Emit a render target cache flush.
|
||||
*
|
||||
* This isn't documented anywhere in the PRM. However, it seems to be
|
||||
* necessary prior to changing the surface state base adress. We've
|
||||
* seen issues in Vulkan where we get GPU hangs when using multi-level
|
||||
* command buffers which clear depth, reset state base address, and then
|
||||
* go render stuff.
|
||||
*
|
||||
* Normally, in GL, we would trust the kernel to do sufficient stalls
|
||||
* and flushes prior to executing our batch. However, it doesn't seem
|
||||
* as if the kernel's flushing is always sufficient and we don't want to
|
||||
* rely on it.
|
||||
*
|
||||
* We make this an end-of-pipe sync instead of a normal flush because we
|
||||
* do not know the current status of the GPU. On Haswell at least,
|
||||
* having a fast-clear operation in flight at the same time as a normal
|
||||
* rendering operation can cause hangs. Since the kernel's flushing is
|
||||
* insufficient, we need to ensure that any rendering operations from
|
||||
* other processes are definitely complete before we try to do our own
|
||||
* rendering. It's a bit of a big hammer but it appears to work.
|
||||
*/
|
||||
brw_emit_end_of_pipe_sync(brw,
|
||||
PIPE_CONTROL_RENDER_TARGET_FLUSH |
|
||||
PIPE_CONTROL_DEPTH_CACHE_FLUSH |
|
||||
dc_flush);
|
||||
}
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
uint32_t mocs_wb = brw->gen >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
|
||||
int pkt_len = brw->gen >= 9 ? 19 : 16;
|
||||
@@ -1096,6 +1133,13 @@ brw_upload_state_base_address(struct brw_context *brw)
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
|
||||
if (brw->gen >= 6) {
|
||||
brw_emit_pipe_control_flush(brw,
|
||||
PIPE_CONTROL_INSTRUCTION_INVALIDATE |
|
||||
PIPE_CONTROL_STATE_CACHE_INVALIDATE |
|
||||
PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
|
||||
}
|
||||
|
||||
/* According to section 3.6.1 of VOL1 of the 965 PRM,
|
||||
* STATE_BASE_ADDRESS updates require a reissue of:
|
||||
*
|
||||
|
@@ -87,6 +87,83 @@ gen7_cs_stall_every_four_pipe_controls(struct brw_context *brw, uint32_t flags)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
brw_emit_pipe_control(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset, uint64_t imm)
|
||||
{
|
||||
if (brw->gen >= 8) {
|
||||
if (brw->gen == 8)
|
||||
gen8_add_cs_stall_workaround_bits(&flags);
|
||||
|
||||
if (brw->gen == 9 &&
|
||||
(flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
|
||||
/* Hardware workaround: SKL
|
||||
*
|
||||
* Emit Pipe Control with all bits set to zero before emitting
|
||||
* a Pipe Control with VF Cache Invalidate set.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw, 0);
|
||||
}
|
||||
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
|
||||
OUT_BATCH(flags);
|
||||
if (bo) {
|
||||
OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, offset);
|
||||
} else {
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
OUT_BATCH(imm);
|
||||
OUT_BATCH(imm >> 32);
|
||||
ADVANCE_BATCH();
|
||||
} else if (brw->gen >= 6) {
|
||||
if (brw->gen == 6 &&
|
||||
(flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
|
||||
/* Hardware workaround: SNB B-Spec says:
|
||||
*
|
||||
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
|
||||
* Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
|
||||
* required.
|
||||
*/
|
||||
brw_emit_post_sync_nonzero_flush(brw);
|
||||
}
|
||||
|
||||
flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
|
||||
|
||||
/* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
|
||||
* on later platforms. We always use PPGTT on Gen7+.
|
||||
*/
|
||||
unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(flags);
|
||||
if (bo) {
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
gen6_gtt | offset);
|
||||
} else {
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
OUT_BATCH(imm);
|
||||
OUT_BATCH(imm >> 32);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
|
||||
if (bo) {
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
|
||||
} else {
|
||||
OUT_BATCH(0);
|
||||
}
|
||||
OUT_BATCH(imm);
|
||||
OUT_BATCH(imm >> 32);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a PIPE_CONTROL with various flushing flags.
|
||||
*
|
||||
@@ -107,64 +184,14 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
|
||||
* caches are coherent with memory once the specified R/O caches are
|
||||
* invalidated. On pre-Gen6 hardware the (implicit) R/O cache
|
||||
* invalidation seems to happen at the bottom of the pipeline together
|
||||
* with any write cache flush, so this shouldn't be a concern.
|
||||
* with any write cache flush, so this shouldn't be a concern. In order
|
||||
* to ensure a full stall, we do an end-of-pipe sync.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS) |
|
||||
PIPE_CONTROL_CS_STALL);
|
||||
brw_emit_end_of_pipe_sync(brw, (flags & PIPE_CONTROL_CACHE_FLUSH_BITS));
|
||||
flags &= ~(PIPE_CONTROL_CACHE_FLUSH_BITS | PIPE_CONTROL_CS_STALL);
|
||||
}
|
||||
|
||||
if (brw->gen >= 8) {
|
||||
if (brw->gen == 8)
|
||||
gen8_add_cs_stall_workaround_bits(&flags);
|
||||
|
||||
if (brw->gen == 9 &&
|
||||
(flags & PIPE_CONTROL_VF_CACHE_INVALIDATE)) {
|
||||
/* Hardware workaround: SKL
|
||||
*
|
||||
* Emit Pipe Control with all bits set to zero before emitting
|
||||
* a Pipe Control with VF Cache Invalidate set.
|
||||
*/
|
||||
brw_emit_pipe_control_flush(brw, 0);
|
||||
}
|
||||
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
|
||||
OUT_BATCH(flags);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
} else if (brw->gen >= 6) {
|
||||
if (brw->gen == 6 &&
|
||||
(flags & PIPE_CONTROL_RENDER_TARGET_FLUSH)) {
|
||||
/* Hardware workaround: SNB B-Spec says:
|
||||
*
|
||||
* [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush
|
||||
* Enable = 1, a PIPE_CONTROL with any non-zero post-sync-op is
|
||||
* required.
|
||||
*/
|
||||
brw_emit_post_sync_nonzero_flush(brw);
|
||||
}
|
||||
|
||||
flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(flags);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
OUT_BATCH(0);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
brw_emit_pipe_control(brw, flags, NULL, 0, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -178,45 +205,9 @@ brw_emit_pipe_control_flush(struct brw_context *brw, uint32_t flags)
|
||||
void
|
||||
brw_emit_pipe_control_write(struct brw_context *brw, uint32_t flags,
|
||||
struct brw_bo *bo, uint32_t offset,
|
||||
uint32_t imm_lower, uint32_t imm_upper)
|
||||
uint64_t imm)
|
||||
{
|
||||
if (brw->gen >= 8) {
|
||||
if (brw->gen == 8)
|
||||
gen8_add_cs_stall_workaround_bits(&flags);
|
||||
|
||||
BEGIN_BATCH(6);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (6 - 2));
|
||||
OUT_BATCH(flags);
|
||||
OUT_RELOC64(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
offset);
|
||||
OUT_BATCH(imm_lower);
|
||||
OUT_BATCH(imm_upper);
|
||||
ADVANCE_BATCH();
|
||||
} else if (brw->gen >= 6) {
|
||||
flags |= gen7_cs_stall_every_four_pipe_controls(brw, flags);
|
||||
|
||||
/* PPGTT/GGTT is selected by DW2 bit 2 on Sandybridge, but DW1 bit 24
|
||||
* on later platforms. We always use PPGTT on Gen7+.
|
||||
*/
|
||||
unsigned gen6_gtt = brw->gen == 6 ? PIPE_CONTROL_GLOBAL_GTT_WRITE : 0;
|
||||
|
||||
BEGIN_BATCH(5);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | (5 - 2));
|
||||
OUT_BATCH(flags);
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
gen6_gtt | offset);
|
||||
OUT_BATCH(imm_lower);
|
||||
OUT_BATCH(imm_upper);
|
||||
ADVANCE_BATCH();
|
||||
} else {
|
||||
BEGIN_BATCH(4);
|
||||
OUT_BATCH(_3DSTATE_PIPE_CONTROL | flags | (4 - 2));
|
||||
OUT_RELOC(bo, I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
|
||||
PIPE_CONTROL_GLOBAL_GTT_WRITE | offset);
|
||||
OUT_BATCH(imm_lower);
|
||||
OUT_BATCH(imm_upper);
|
||||
ADVANCE_BATCH();
|
||||
}
|
||||
brw_emit_pipe_control(brw, flags, bo, offset, imm);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -264,8 +255,7 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE
|
||||
| PIPE_CONTROL_DEPTH_STALL,
|
||||
brw->workaround_bo, 0,
|
||||
0, 0);
|
||||
brw->workaround_bo, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
@@ -278,11 +268,9 @@ gen7_emit_cs_stall_flush(struct brw_context *brw)
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_CS_STALL
|
||||
| PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->workaround_bo, 0,
|
||||
0, 0);
|
||||
brw->workaround_bo, 0, 0);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Emits a PIPE_CONTROL with a non-zero post-sync operation, for
|
||||
* implementing two workarounds on gen6. From section 1.4.7.1
|
||||
@@ -328,7 +316,106 @@ brw_emit_post_sync_nonzero_flush(struct brw_context *brw)
|
||||
PIPE_CONTROL_STALL_AT_SCOREBOARD);
|
||||
|
||||
brw_emit_pipe_control_write(brw, PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->workaround_bo, 0, 0, 0);
|
||||
brw->workaround_bo, 0, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* From Sandybridge PRM, volume 2, "1.7.2 End-of-Pipe Synchronization":
|
||||
*
|
||||
* Write synchronization is a special case of end-of-pipe
|
||||
* synchronization that requires that the render cache and/or depth
|
||||
* related caches are flushed to memory, where the data will become
|
||||
* globally visible. This type of synchronization is required prior to
|
||||
* SW (CPU) actually reading the result data from memory, or initiating
|
||||
* an operation that will use as a read surface (such as a texture
|
||||
* surface) a previous render target and/or depth/stencil buffer
|
||||
*
|
||||
*
|
||||
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* Exercising the write cache flush bits (Render Target Cache Flush
|
||||
* Enable, Depth Cache Flush Enable, DC Flush) in PIPE_CONTROL only
|
||||
* ensures the write caches are flushed and doesn't guarantee the data
|
||||
* is globally visible.
|
||||
*
|
||||
* SW can track the completion of the end-of-pipe-synchronization by
|
||||
* using "Notify Enable" and "PostSync Operation - Write Immediate
|
||||
* Data" in the PIPE_CONTROL command.
|
||||
*/
|
||||
void
|
||||
brw_emit_end_of_pipe_sync(struct brw_context *brw, uint32_t flags)
|
||||
{
|
||||
if (brw->gen >= 6) {
|
||||
/* From Sandybridge PRM, volume 2, "1.7.3.1 Writing a Value to Memory":
|
||||
*
|
||||
* "The most common action to perform upon reaching a synchronization
|
||||
* point is to write a value out to memory. An immediate value
|
||||
* (included with the synchronization command) may be written."
|
||||
*
|
||||
*
|
||||
* From Broadwell PRM, volume 7, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* "In case the data flushed out by the render engine is to be read
|
||||
* back in to the render engine in coherent manner, then the render
|
||||
* engine has to wait for the fence completion before accessing the
|
||||
* flushed data. This can be achieved by following means on various
|
||||
* products: PIPE_CONTROL command with CS Stall and the required
|
||||
* write caches flushed with Post-Sync-Operation as Write Immediate
|
||||
* Data.
|
||||
*
|
||||
* Example:
|
||||
* - Workload-1 (3D/GPGPU/MEDIA)
|
||||
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write Immediate
|
||||
* Data, Required Write Cache Flush bits set)
|
||||
* - Workload-2 (Can use the data produce or output by Workload-1)
|
||||
*/
|
||||
brw_emit_pipe_control_write(brw,
|
||||
flags | PIPE_CONTROL_CS_STALL |
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->workaround_bo, 0, 0);
|
||||
|
||||
if (brw->is_haswell) {
|
||||
/* Haswell needs addition work-arounds:
|
||||
*
|
||||
* From Haswell PRM, volume 2, part 1, "End-of-Pipe Synchronization":
|
||||
*
|
||||
* Option 1:
|
||||
* PIPE_CONTROL command with the CS Stall and the required write
|
||||
* caches flushed with Post-SyncOperation as Write Immediate Data
|
||||
* followed by eight dummy MI_STORE_DATA_IMM (write to scratch
|
||||
* spce) commands.
|
||||
*
|
||||
* Example:
|
||||
* - Workload-1
|
||||
* - PIPE_CONTROL (CS Stall, Post-Sync-Operation Write
|
||||
* Immediate Data, Required Write Cache Flush bits set)
|
||||
* - MI_STORE_DATA_IMM (8 times) (Dummy data, Scratch Address)
|
||||
* - Workload-2 (Can use the data produce or output by
|
||||
* Workload-1)
|
||||
*
|
||||
* Unfortunately, both the PRMs and the internal docs are a bit
|
||||
* out-of-date in this regard. What the windows driver does (and
|
||||
* this appears to actually work) is to emit a register read from the
|
||||
* memory address written by the pipe control above.
|
||||
*
|
||||
* What register we load into doesn't matter. We choose an indirect
|
||||
* rendering register because we know it always exists and it's one
|
||||
* of the first registers the command parser allows us to write. If
|
||||
* you don't have command parser support in your kernel (pre-4.2),
|
||||
* this will get turned into MI_NOOP and you won't get the
|
||||
* workaround. Unfortunately, there's just not much we can do in
|
||||
* that case. This register is perfectly safe to write since we
|
||||
* always re-load all of the indirect draw registers right before
|
||||
* 3DPRIMITIVE when needed anyway.
|
||||
*/
|
||||
brw_load_register_mem(brw, GEN7_3DPRIM_START_INSTANCE,
|
||||
brw->workaround_bo,
|
||||
I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
|
||||
}
|
||||
} else {
|
||||
/* On gen4-5, a regular pipe control seems to suffice. */
|
||||
brw_emit_pipe_control_flush(brw, flags);
|
||||
}
|
||||
}
|
||||
|
||||
/* Emit a pipelined flush to either flush render and texture cache for
|
||||
|
@@ -97,7 +97,7 @@ brw_write_timestamp(struct brw_context *brw, struct brw_bo *query_bo, int idx)
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
|
||||
brw_emit_pipe_control_write(brw, flags,
|
||||
query_bo, idx * sizeof(uint64_t), 0, 0);
|
||||
query_bo, idx * sizeof(uint64_t), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -112,8 +112,7 @@ brw_write_depth_count(struct brw_context *brw, struct brw_bo *query_bo, int idx)
|
||||
flags |= PIPE_CONTROL_CS_STALL;
|
||||
|
||||
brw_emit_pipe_control_write(brw, flags,
|
||||
query_bo, idx * sizeof(uint64_t),
|
||||
0, 0);
|
||||
query_bo, idx * sizeof(uint64_t), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -450,8 +450,10 @@ brw_update_sampler_state(struct brw_context *brw,
|
||||
/* Enable anisotropic filtering if desired. */
|
||||
unsigned max_anisotropy = BRW_ANISORATIO_2;
|
||||
if (sampler->MaxAnisotropy > 1.0f) {
|
||||
min_filter = BRW_MAPFILTER_ANISOTROPIC;
|
||||
mag_filter = BRW_MAPFILTER_ANISOTROPIC;
|
||||
if (min_filter == BRW_MAPFILTER_LINEAR)
|
||||
min_filter = BRW_MAPFILTER_ANISOTROPIC;
|
||||
if (mag_filter == BRW_MAPFILTER_LINEAR)
|
||||
mag_filter = BRW_MAPFILTER_ANISOTROPIC;
|
||||
|
||||
if (sampler->MaxAnisotropy > 2.0f) {
|
||||
max_anisotropy =
|
||||
|
@@ -63,7 +63,7 @@ set_query_availability(struct brw_context *brw, struct brw_query_object *query,
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
query->bo, 2 * sizeof(uint64_t),
|
||||
available, 0);
|
||||
available);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -513,7 +513,7 @@ gen8_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
*/
|
||||
brw_emit_pipe_control_write(brw,
|
||||
PIPE_CONTROL_WRITE_IMMEDIATE,
|
||||
brw->workaround_bo, 0, 0, 0);
|
||||
brw->workaround_bo, 0, 0);
|
||||
|
||||
/* Emit 3DSTATE_WM_HZ_OP again to disable the state overrides. */
|
||||
BEGIN_BATCH(5);
|
||||
|
@@ -180,7 +180,7 @@ genX(blorp_exec)(struct blorp_batch *batch,
|
||||
assert(batch->blorp->driver_ctx == batch->driver_batch);
|
||||
struct brw_context *brw = batch->driver_batch;
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1920 : 1500;
|
||||
const uint32_t estimated_max_batch_usage = GEN_GEN >= 8 ? 1920 : 1700;
|
||||
bool check_aperture_failed_once = false;
|
||||
|
||||
/* Flush the sampler and render caches. We definitely need to flush the
|
||||
|
@@ -329,6 +329,7 @@ intel_miptree_blit(struct brw_context *brw,
|
||||
intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice);
|
||||
intel_miptree_resolve_color(brw, src_mt, src_level, src_slice, 1, 0);
|
||||
intel_miptree_resolve_color(brw, dst_mt, dst_level, dst_slice, 1, 0);
|
||||
intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_slice);
|
||||
|
||||
if (src_flip)
|
||||
src_y = minify(src_mt->physical_height0, src_level - src_mt->first_level) - src_y - height;
|
||||
@@ -387,6 +388,7 @@ intel_miptree_copy(struct brw_context *brw,
|
||||
intel_miptree_slice_resolve_depth(brw, dst_mt, dst_level, dst_slice);
|
||||
intel_miptree_resolve_color(brw, src_mt, src_level, src_slice, 1, 0);
|
||||
intel_miptree_resolve_color(brw, dst_mt, dst_level, dst_slice, 1, 0);
|
||||
intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, dst_slice);
|
||||
|
||||
uint32_t src_image_x, src_image_y;
|
||||
intel_miptree_get_image_offset(src_mt, src_level, src_slice,
|
||||
|
@@ -442,13 +442,9 @@ intel_create_renderbuffer(mesa_format format, unsigned num_samples)
|
||||
struct intel_renderbuffer *irb;
|
||||
struct gl_renderbuffer *rb;
|
||||
|
||||
GET_CURRENT_CONTEXT(ctx);
|
||||
|
||||
irb = CALLOC_STRUCT(intel_renderbuffer);
|
||||
if (!irb) {
|
||||
_mesa_error(ctx, GL_OUT_OF_MEMORY, "creating renderbuffer");
|
||||
if (!irb)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
rb = &irb->Base.Base;
|
||||
irb->layer_count = 1;
|
||||
|
@@ -1992,7 +1992,7 @@ intel_miptree_slice_resolve(struct brw_context *brw,
|
||||
if (!item || item->need != need)
|
||||
return false;
|
||||
|
||||
intel_hiz_exec(brw, mt, level, layer, need);
|
||||
intel_hiz_exec(brw, mt, level, layer, 1, need);
|
||||
intel_resolve_map_remove(item);
|
||||
return true;
|
||||
}
|
||||
@@ -2028,7 +2028,7 @@ intel_miptree_all_slices_resolve(struct brw_context *brw,
|
||||
if (map->need != need)
|
||||
continue;
|
||||
|
||||
intel_hiz_exec(brw, mt, map->level, map->layer, need);
|
||||
intel_hiz_exec(brw, mt, map->level, map->layer, 1, need);
|
||||
intel_resolve_map_remove(map);
|
||||
did_resolve = true;
|
||||
}
|
||||
|
@@ -1029,10 +1029,6 @@ intel_miptree_unmap(struct brw_context *brw,
|
||||
unsigned int level,
|
||||
unsigned int slice);
|
||||
|
||||
void
|
||||
intel_hiz_exec(struct brw_context *brw, struct intel_mipmap_tree *mt,
|
||||
unsigned int level, unsigned int layer, enum blorp_hiz_op op);
|
||||
|
||||
bool
|
||||
intel_miptree_sample_with_hiz(struct brw_context *brw,
|
||||
struct intel_mipmap_tree *mt);
|
||||
|
@@ -76,6 +76,7 @@ lib@GL_LIB@_la_LIBADD = \
|
||||
lib@GL_LIB@_la_LDFLAGS = \
|
||||
-no-undefined \
|
||||
-version-number $(GL_MAJOR):$(GL_MINOR):$(GL_PATCH) \
|
||||
$(BSYMBOLIC) \
|
||||
$(GC_SECTIONS) \
|
||||
$(LD_NO_UNDEFINED)
|
||||
|
||||
|
@@ -379,13 +379,13 @@ glXQueryServerString(Display *dpy, int screen, int name)
|
||||
|
||||
/*** GLX_VERSION_1_2 ***/
|
||||
|
||||
/* declare here to avoid including xmesa.h */
|
||||
extern Display *XMesaGetCurrentDisplay(void);
|
||||
|
||||
Display PUBLIC *
|
||||
glXGetCurrentDisplay(void)
|
||||
{
|
||||
/* Same code as in libGL's glxext.c */
|
||||
__GLXcontext *gc = (__GLXcontext *) glXGetCurrentContext();
|
||||
if (NULL == gc) return NULL;
|
||||
return gc->currentDpy;
|
||||
return XMesaGetCurrentDisplay();
|
||||
}
|
||||
|
||||
|
||||
|
@@ -37,7 +37,6 @@
|
||||
* work properly.
|
||||
*/
|
||||
typedef struct __GLXcontextRec {
|
||||
Display *currentDpy;
|
||||
GLboolean isDirect;
|
||||
GLXDrawable currentDrawable;
|
||||
GLXDrawable currentReadable;
|
||||
|
@@ -1304,6 +1304,14 @@ XMesaBuffer XMesaGetCurrentReadBuffer( void )
|
||||
}
|
||||
|
||||
|
||||
Display *XMesaGetCurrentDisplay(void)
|
||||
{
|
||||
GET_CURRENT_CONTEXT(ctx);
|
||||
XMesaContext xmctx = XMESA_CONTEXT(ctx);
|
||||
return xmctx ? xmctx->display : NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
GLboolean XMesaSetFXmode( GLint mode )
|
||||
{
|
||||
|
@@ -240,6 +240,12 @@ extern XMesaBuffer XMesaGetCurrentBuffer( void );
|
||||
extern XMesaBuffer XMesaGetCurrentReadBuffer( void );
|
||||
|
||||
|
||||
/*
|
||||
* Return display of current context.
|
||||
*/
|
||||
extern Display *XMesaGetCurrentDisplay( void );
|
||||
|
||||
|
||||
/*
|
||||
* Swap the front and back buffers for the given buffer. No action is
|
||||
* taken if the buffer is not double buffered.
|
||||
|
@@ -469,6 +469,8 @@ _mesa_bind_pipeline(struct gl_context *ctx,
|
||||
* considered current."
|
||||
*/
|
||||
if (&ctx->Shader != ctx->_Shader) {
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
if (pipe != NULL) {
|
||||
/* Bound the pipeline to the current program and
|
||||
* restore the pipeline state
|
||||
@@ -480,8 +482,6 @@ _mesa_bind_pipeline(struct gl_context *ctx,
|
||||
ctx->Pipeline.Default);
|
||||
}
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_PROGRAM | _NEW_PROGRAM_CONSTANTS);
|
||||
|
||||
for (i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
struct gl_program *prog = ctx->_Shader->CurrentProgram[i];
|
||||
if (prog) {
|
||||
|
@@ -40,6 +40,8 @@ set_viewport_no_notify(struct gl_context *ctx, unsigned idx,
|
||||
GLfloat x, GLfloat y,
|
||||
GLfloat width, GLfloat height)
|
||||
{
|
||||
FLUSH_VERTICES(ctx, _NEW_VIEWPORT);
|
||||
|
||||
/* clamp width and height to the implementation dependent range */
|
||||
width = MIN2(width, (GLfloat) ctx->Const.MaxViewportWidth);
|
||||
height = MIN2(height, (GLfloat) ctx->Const.MaxViewportHeight);
|
||||
@@ -71,7 +73,6 @@ set_viewport_no_notify(struct gl_context *ctx, unsigned idx,
|
||||
ctx->ViewportArray[idx].Width = width;
|
||||
ctx->ViewportArray[idx].Y = y;
|
||||
ctx->ViewportArray[idx].Height = height;
|
||||
ctx->NewState |= _NEW_VIEWPORT;
|
||||
}
|
||||
|
||||
struct gl_viewport_inputs {
|
||||
@@ -240,9 +241,10 @@ set_depth_range_no_notify(struct gl_context *ctx, unsigned idx,
|
||||
ctx->ViewportArray[idx].Far == farval)
|
||||
return;
|
||||
|
||||
FLUSH_VERTICES(ctx, _NEW_VIEWPORT);
|
||||
|
||||
ctx->ViewportArray[idx].Near = CLAMP(nearval, 0.0, 1.0);
|
||||
ctx->ViewportArray[idx].Far = CLAMP(farval, 0.0, 1.0);
|
||||
ctx->NewState |= _NEW_VIEWPORT;
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -261,7 +261,7 @@ static void update_raster_state( struct st_context *st )
|
||||
_mesa_geometric_samples(ctx->DrawBuffer) > 1;
|
||||
|
||||
/* _NEW_SCISSOR */
|
||||
raster->scissor = ctx->Scissor.EnableFlags;
|
||||
raster->scissor = !!ctx->Scissor.EnableFlags;
|
||||
|
||||
/* _NEW_FRAG_CLAMP */
|
||||
raster->clamp_fragment_color = !st->clamp_frag_color_in_shader &&
|
||||
|
@@ -22,7 +22,7 @@
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include "st_debug.h"
|
||||
#include "st_program.h"
|
||||
#include "st_shader_cache.h"
|
||||
#include "compiler/glsl/program.h"
|
||||
@@ -383,6 +383,11 @@ st_load_tgsi_from_disk_cache(struct gl_context *ctx,
|
||||
_mesa_associate_uniform_storage(ctx, prog, glprog->Parameters,
|
||||
false);
|
||||
|
||||
/* Create Gallium shaders now instead of on demand. */
|
||||
if (ST_DEBUG & DEBUG_PRECOMPILE ||
|
||||
st->shader_has_one_variant[glprog->info.stage])
|
||||
st_precompile_shader_variant(st, glprog);
|
||||
|
||||
free(buffer);
|
||||
} else {
|
||||
/* Failed to find a matching cached shader so fallback to recompile.
|
||||
|
@@ -25,6 +25,7 @@
|
||||
#if defined(__linux__)
|
||||
#include <sys/file.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#else
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
Reference in New Issue
Block a user