Compare commits
77 Commits
mesa-12.0.
...
12.0
Author | SHA1 | Date | |
---|---|---|---|
|
13953f012d | ||
|
36e3f2542d | ||
|
555885a0bf | ||
|
ab62405953 | ||
|
806de4a224 | ||
|
2b87bb9b90 | ||
|
689ca381b5 | ||
|
cc2894d376 | ||
|
febf22ff55 | ||
|
3c7b53bba3 | ||
|
c880deef41 | ||
|
09973d9a99 | ||
|
36a54c27fd | ||
|
57708155d2 | ||
|
cf18ee4fcc | ||
|
76816e70a9 | ||
|
c0934035a5 | ||
|
08a9f69a8b | ||
|
d780f89966 | ||
|
a02edabb67 | ||
|
70bb67febc | ||
|
9126479017 | ||
|
f76da483a2 | ||
|
4ac5633618 | ||
|
32d7a060fa | ||
|
eb96145c74 | ||
|
ddd048bbf5 | ||
|
236ecd3c4e | ||
|
81e78ee65c | ||
|
6ebb536800 | ||
|
89a8fd71af | ||
|
8a293e6a0c | ||
|
231ace7eec | ||
|
c07386e2c8 | ||
|
bb4195ca26 | ||
|
0386f956b3 | ||
|
eb9127d224 | ||
|
d37d8d81d5 | ||
|
630c41e2aa | ||
|
d278c15a17 | ||
|
ce56dfca9a | ||
|
3197612a1a | ||
|
6d919a6fc6 | ||
|
f71c3734ce | ||
|
6b1c3c3aa0 | ||
|
01579a9d00 | ||
|
cd9a116558 | ||
|
4a5cce8bd5 | ||
|
b4c28b1755 | ||
|
4f71f93878 | ||
|
a9e5a98c19 | ||
|
c1cb184488 | ||
|
e3ef7da79c | ||
|
9666f75b1b | ||
|
0afbb9d052 | ||
|
bd114e6be6 | ||
|
29bac28a04 | ||
|
31aa3c014b | ||
|
b65a812d60 | ||
|
5dd6e23ad8 | ||
|
422b584c00 | ||
|
b1bced0d1f | ||
|
9baee818b6 | ||
|
68dd6ad433 | ||
|
6bcdb0611f | ||
|
0703bab2cd | ||
|
a7b662633e | ||
|
faa684802f | ||
|
9a844035c0 | ||
|
5f4284fd36 | ||
|
a4cd90283a | ||
|
0934f29c50 | ||
|
e6bc5248aa | ||
|
352902218e | ||
|
6e77fbc8d7 | ||
|
7b9d7257b2 | ||
|
3776e97f9d |
@@ -40,7 +40,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
|
||||
--enable-vdpau \
|
||||
--enable-xa \
|
||||
--enable-xvmc \
|
||||
--disable-llvm-shared-libs \
|
||||
--enable-llvm-shared-libs \
|
||||
--with-egl-platforms=x11,wayland,drm,surfaceless \
|
||||
--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
|
||||
--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr \
|
||||
|
@@ -23,3 +23,6 @@ f2b9b0c730e345bcffa9eadabb25af3ab02642f2 i965: Add missing BRW_NEW_FS_PROG_DATA
|
||||
# Patches depend on the fence_finish() gallium API change and corresponding driver work
|
||||
f240ad98bc05281ea7013d91973cb5f932ae9434 st/mesa: unduplicate st_check_sync code
|
||||
b687f766fddb7b39479cd9ee0427984029ea3559 st/mesa: allow multiple concurrent waiters in ClientWaitSync
|
||||
|
||||
# Commit was reverted shortly after it landed in master
|
||||
a39ad185932eab4f25a0cb2b112c10d8700ef242 configure.ac: honour LLVM_LIBDIR when linking against LLVM
|
||||
|
39
bin/get-typod-pick-list.sh
Executable file
39
bin/get-typod-pick-list.sh
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Script for generating a list of candidates which have typos in the nomination line
|
||||
#
|
||||
# Usage examples:
|
||||
#
|
||||
# $ bin/get-typod-pick-list.sh
|
||||
# $ bin/get-typod-pick-list.sh > picklist
|
||||
# $ bin/get-typod-pick-list.sh | tee picklist
|
||||
|
||||
# NB:
|
||||
# This script intentionally _never_ checks for specific version tag
|
||||
# Should we consider folding it with the original get-pick-list.sh
|
||||
|
||||
# Grep for commits with "cherry picked from commit" in the commit message.
|
||||
git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
|
||||
grep "cherry picked from commit" |\
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||
|
||||
# Grep for commits that were marked as a candidate for the stable tree.
|
||||
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' HEAD..origin/master |\
|
||||
while read sha
|
||||
do
|
||||
# Check to see whether the patch is on the ignore list.
|
||||
if [ -f bin/.cherry-ignore ] ; then
|
||||
if grep -q ^$sha bin/.cherry-ignore ; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check to see if it has already been picked over.
|
||||
if grep -q ^$sha already_picked ; then
|
||||
continue
|
||||
fi
|
||||
|
||||
git log -n1 --pretty=oneline $sha | cat
|
||||
done
|
||||
|
||||
rm -f already_picked
|
321
docs/relnotes/12.0.4.html
Normal file
321
docs/relnotes/12.0.4.html
Normal file
@@ -0,0 +1,321 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 12.0.4 Release Notes / November 10, 2016</h1>
|
||||
|
||||
<p>
|
||||
Mesa 12.0.4 is a bug fix release which fixes bugs found since the 12.0.4 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 12.0.4 implements the OpenGL 4.3 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.3. OpenGL
|
||||
4.3 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
22026ce4f1c6a7908b0d10ff057decec0a5633afe7f38a0cef5c08d0689f02a6 mesa-12.0.4.tar.gz
|
||||
5d6003da867d3f54e5000b4acdfc37e6cce5b6a4459274fdad73e24bd2f0065e mesa-12.0.4.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71759">Bug 71759</a> - Intel driver fails with "intel_do_flush_locked failed: No such file or directory" if buffer imported with EGL_NATIVE_PIXMAP_KHR</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94354">Bug 94354</a> - R9285 Unigine Valley perf regression since radeonsi: use re-Z</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96770">Bug 96770</a> - include/GL/mesa_glinterop.h:62: error: redefinition of typedef ‘GLXContext’</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97231">Bug 97231</a> - GL_DEPTH_CLAMP doesn't clamp to the far plane</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97233">Bug 97233</a> - vkQuake VkSpecializationMapEntry related bug</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97260">Bug 97260</a> - R9 290 low performance in Linux 4.7</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97549">Bug 97549</a> - [SNB, BXT] up to 40% perf drop from "loader/dri3: Overhaul dri3_update_num_back" commit</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97887">Bug 97887</a> - llvm segfault in janusvr -render vive</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98025">Bug 98025</a> - [radeonsi] incorrect primitive restart index used</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98134">Bug 98134</a> - dEQP-GLES31.functional.debug.negative_coverage.get_error.buffer.draw_buffers wants a different GL error code</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98326">Bug 98326</a> - [dEQP, EGL] pbuffer depth/stencil tests fail</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Axel Davy (4):</p>
|
||||
<ul>
|
||||
<li>gallium/util: Really allow aliasing of dst for u_box_union_*</li>
|
||||
<li>st/nine: Fix the calculation of the number of vs inputs</li>
|
||||
<li>st/nine: Fix mistake in Volume9 UnlockBox</li>
|
||||
<li>st/nine: Fix locking CubeTexture surfaces.</li>
|
||||
</ul>
|
||||
|
||||
<p>Brendan King (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: fix the name of the Wayland Scanner pc file</li>
|
||||
</ul>
|
||||
|
||||
<p>Brian Paul (1):</p>
|
||||
<ul>
|
||||
<li>st/mesa: fix swizzle issue in st_create_sampler_view_from_stobj()</li>
|
||||
</ul>
|
||||
|
||||
<p>Chad Versace (3):</p>
|
||||
<ul>
|
||||
<li>egl: Fix truncation error in _eglParseSyncAttribList64</li>
|
||||
<li>i965/sync: Fix uninitalized usage and leak of mutex</li>
|
||||
<li>egl: Don't advertise unsupported platform extensions</li>
|
||||
</ul>
|
||||
|
||||
<p>Chuanbo Weng (1):</p>
|
||||
<ul>
|
||||
<li>gbm: fix potential NULL deref of mapImage/unmapImage.</li>
|
||||
</ul>
|
||||
|
||||
<p>Chuck Atkins (1):</p>
|
||||
<ul>
|
||||
<li>autoconf: Make header install distinct for various APIs (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (3):</p>
|
||||
<ul>
|
||||
<li>anv: initialise and increment send_sbc</li>
|
||||
<li>anv/wsi: fix apps that acquire multiple images up front</li>
|
||||
<li>Revert "st/vdpau: use linear layout for output surfaces"</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (12):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 12.0.3</li>
|
||||
<li>cherry-ignore: add non-applicable i965 commit</li>
|
||||
<li>cherry-ignore: add vaapi encode fix</li>
|
||||
<li>cherry-ignore: add EGL_KHR_debug fix</li>
|
||||
<li>cherry-ignore: add update_renderbuffer_read_surfaces()</li>
|
||||
<li>isl/gen6: correctly check msaa layout samples count</li>
|
||||
<li>egl/x11: don't crash if dri2_dpy->conn is NULL</li>
|
||||
<li>get-pick-list.sh: Require explicit "12.0" for nominating stable patches</li>
|
||||
<li>automake: don't forget to pick wglext.h in the tarball</li>
|
||||
<li>cherry-ignore: add N/A EGL revert</li>
|
||||
<li>cherry-ignore: add ClientWaitSync fixes</li>
|
||||
<li>Update version to 12.0.4</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (5):</p>
|
||||
<ul>
|
||||
<li>travis: Parse configure.ac to pick an updated LIBDRM_VERSION.</li>
|
||||
<li>travis: Update to the Ubuntu Trusty image.</li>
|
||||
<li>travis: Enable vc4 in libdrm to satisfy vc4 test build dependency.</li>
|
||||
<li>travis: Upgrade LLVM dependency to 3.5 and enable LLVM drivers.</li>
|
||||
<li>gallium: Fix install-gallium-links.mk on non-bash /bin/sh</li>
|
||||
</ul>
|
||||
|
||||
<p>Hans de Goede (1):</p>
|
||||
<ul>
|
||||
<li>pipe_loader_sw: Fix fd leak when instantiated via pipe_loader_sw_probe_kms</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (1):</p>
|
||||
<ul>
|
||||
<li>glsl: Fix cut-and-paste bug in hierarchical visitor ir_expression::accept</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (16):</p>
|
||||
<ul>
|
||||
<li>nv30: set usage to staging so that the buffer is allocated in GART</li>
|
||||
<li>a3xx: make sure to actually clamp depth as requested</li>
|
||||
<li>a3xx: make use of software clipping when hw can't handle it</li>
|
||||
<li>a3xx: use window scissor to simulate viewport xy clip</li>
|
||||
<li>main: GL_RGB10_A2UI does not come with GL 3.0/EXT_texture_integer</li>
|
||||
<li>mesa/formatquery: limit ES target support, fix core context support</li>
|
||||
<li>nir: fix definition of pack_uvec2_to_uint</li>
|
||||
<li>gm107/ir: AL2P writes to a predicate register</li>
|
||||
<li>st/mesa: fix is_scissor_enabled when X/Y are negative</li>
|
||||
<li>nvc0/ir: fix overwriting of value backing non-constant gather offset</li>
|
||||
<li>nv50/ir: copy over value's register id when resolving merge of a phi</li>
|
||||
<li>nvc0/ir: fix textureGather with a single offset</li>
|
||||
<li>gm107/ir: fix texturing with indirect samplers</li>
|
||||
<li>gm107/ir: fix bit offset of tex lod setting for indirect texturing</li>
|
||||
<li>nv50,nvc0: avoid reading out of bounds when getting bogus so info</li>
|
||||
<li>nv50/ir: process texture offset sources as regular sources</li>
|
||||
</ul>
|
||||
|
||||
<p>James Legg (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: Fix primitive restart when index changes</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (9):</p>
|
||||
<ul>
|
||||
<li>nir/spirv: Swap the argument order for AtomicCompareExchange</li>
|
||||
<li>nir/spirv: Use the correct sources for CompareExchange on images</li>
|
||||
<li>nir/spirv: Break variable decoration handling into a helper</li>
|
||||
<li>nir/spirv: Refactor variable deocration handling</li>
|
||||
<li>nir/spirv/cfg: Handle switches whose break block is a loop continue</li>
|
||||
<li>nir/spirv/cfg: Detect switch_break after loop_break/continue</li>
|
||||
<li>nir: Add a nop intrinsic</li>
|
||||
<li>nir/spirv/cfg: Use a nop intrinsic for tagging the ends of blocks</li>
|
||||
<li>intel/blorp: Rework our usage of ralloc when compiling shaders</li>
|
||||
</ul>
|
||||
|
||||
<p>Jonathan Gray (3):</p>
|
||||
<ul>
|
||||
<li>genxml: add generated headers to EXTRA_DIST</li>
|
||||
<li>mapi: automake: set VISIBILITY_CFLAGS for shared glapi</li>
|
||||
<li>mesa: automake: include mesa_glinterop.h in distfile</li>
|
||||
</ul>
|
||||
|
||||
<p>Julien Isorce (1):</p>
|
||||
<ul>
|
||||
<li>st/va: also honors interlaced preference when providing a video format</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (8):</p>
|
||||
<ul>
|
||||
<li>nir: Call nir_metadata_preserve from nir_lower_alu_to_scalar().</li>
|
||||
<li>mesa: Expose RESET_NOTIFICATION_STRATEGY with KHR_robustness.</li>
|
||||
<li>i965: Fix missing _NEW_TRANSFORM in Gen8+ 3DSTATE_DS atom.</li>
|
||||
<li>i965: Add missing BRW_NEW_VS_PROG_DATA to 3DSTATE_CLIP.</li>
|
||||
<li>i965: Move BRW_NEW_FRAGMENT_PROGRAM from 3DSTATE_PS to PS_EXTRA.</li>
|
||||
<li>i965: Add missing BRW_NEW_CS_PROG_DATA to compute constant atom.</li>
|
||||
<li>i965: Add missing BRW_CS_PROG_DATA to CS work group surface atom.</li>
|
||||
<li>i965: Fix gl_InvocationID in dual object GS where invocations == 1.</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (12):</p>
|
||||
<ul>
|
||||
<li>radeonsi: fix cubemaps viewed as 2D</li>
|
||||
<li>radeonsi: take compute shader and dispatch indirect memory usage into account</li>
|
||||
<li>radeonsi: fix FP64 UBO loads with indirect uniform block indexing</li>
|
||||
<li>mesa: fix glGetFramebufferAttachmentParameteriv w/ on-demand FRONT_BACK alloc</li>
|
||||
<li>radeonsi: fix interpolateAt opcodes for .zw components</li>
|
||||
<li>radeonsi: fix texture border colors for compute shaders</li>
|
||||
<li>radeonsi: disable ReZ</li>
|
||||
<li>gallium/radeon: make sure the address of separate CMASK is aligned properly</li>
|
||||
<li>winsys/amdgpu: fix radeon_surf::macro_tile_index for imported textures</li>
|
||||
<li>egl: use util/macros.h</li>
|
||||
<li>egl: make interop ABI visible again</li>
|
||||
<li>glx: make interop ABI visible again</li>
|
||||
</ul>
|
||||
|
||||
<p>Mario Kleiner (1):</p>
|
||||
<ul>
|
||||
<li>glx: Perform check for valid fbconfig against proper X-Screen.</li>
|
||||
</ul>
|
||||
|
||||
<p>Martin Peres (2):</p>
|
||||
<ul>
|
||||
<li>loader/dri3: add get_dri_screen() to the vtable</li>
|
||||
<li>loader/dri3: import prime buffers in the currently-bound screen</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Whitlock (5):</p>
|
||||
<ul>
|
||||
<li>egl/android: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
|
||||
<li>gallium/auxiliary: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
|
||||
<li>st/dri: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
|
||||
<li>st/xa: replace call to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
|
||||
<li>gallium/winsys: replace calls to dup(2) with fcntl(F_DUPFD_CLOEXEC)</li>
|
||||
</ul>
|
||||
|
||||
<p>Max Staudt (1):</p>
|
||||
<ul>
|
||||
<li>r300g: Set R300_VAP_CNTL on RSxxx to avoid triangle flickering</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>loader/dri3: Overhaul dri3_update_num_back</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicholas Bishop (2):</p>
|
||||
<ul>
|
||||
<li>gbm: return appropriate error when queryImage() fails</li>
|
||||
<li>st/dri: check pipe_screen->resource_get_handle() return value</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (10):</p>
|
||||
<ul>
|
||||
<li>gallium/radeon: cleanup and fix branch emits</li>
|
||||
<li>st/glsl_to_tgsi: disable on-the-fly peephole for 64-bit operations</li>
|
||||
<li>st/glsl_to_tgsi: simplify translate_tex_offset</li>
|
||||
<li>st/glsl_to_tgsi: fix textureGatherOffset with indirectly loaded offsets</li>
|
||||
<li>st/mesa: fix vertex elements setup for doubles</li>
|
||||
<li>radeonsi: fix indirect loads of 64 bit constants</li>
|
||||
<li>st/glsl_to_tgsi: fix atomic counter addressing</li>
|
||||
<li>st/glsl_to_tgsi: fix block copies of arrays of doubles</li>
|
||||
<li>st/mesa: only set primitive_restart when the restart index is in range</li>
|
||||
<li>radeonsi: fix 64-bit loads from LDS</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (4):</p>
|
||||
<ul>
|
||||
<li>nvc0/ir: fix subops for IMAD</li>
|
||||
<li>gk110/ir: fix wrong emission of OP_NOT</li>
|
||||
<li>nvc0: use correct bufctx when invalidating CP textures</li>
|
||||
<li>nvc0/ir: fix emission of IMAD with NEG modifiers</li>
|
||||
</ul>
|
||||
|
||||
<p>Stencel, Joanna (1):</p>
|
||||
<ul>
|
||||
<li>egl/wayland: add missing destroy_window callback</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (5):</p>
|
||||
<ul>
|
||||
<li>egl: stop claiming support for pbuffer + msaa</li>
|
||||
<li>egl/dri2: set max values for pbuffer width and height</li>
|
||||
<li>egl: add check that eglCreateContext gets a valid config</li>
|
||||
<li>mesa: fix error handling in DrawBuffers</li>
|
||||
<li>egl: set preserved behavior for surface only if config supports it</li>
|
||||
</ul>
|
||||
|
||||
<p>Tim Rowley (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: add llvm inteljitevents component if enabled</li>
|
||||
</ul>
|
||||
|
||||
<p>Vedran Miletić (1):</p>
|
||||
<ul>
|
||||
<li>clover: Fix build against clang SVN >= r273191</li>
|
||||
</ul>
|
||||
|
||||
<p>Vinson Lee (1):</p>
|
||||
<ul>
|
||||
<li>Revert "mesa_glinterop: remove inclusion of GLX header"</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
138
docs/relnotes/12.0.5.html
Normal file
138
docs/relnotes/12.0.5.html
Normal file
@@ -0,0 +1,138 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 12.0.5 Release Notes / December 5, 2016</h1>
|
||||
|
||||
<p>
|
||||
Mesa 12.0.5 is a bug fix release which fixes bugs found since the 12.0.5 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 12.0.5 implements the OpenGL 4.3 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.3. OpenGL
|
||||
4.3 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
44d08a27d98bfeacd864381189e434d98afbf451689d01f80380dc1d66450e5b mesa-12.0.5.tar.gz
|
||||
2b0a972d8282860a11291c09c3ef01ac45171405951eb21a83c45ed2b4321924 mesa-12.0.5.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=77662">Bug 77662</a> - Fail to render to different faces of depth-stencil cube map</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97779">Bug 97779</a> - [regression, bisected][BDW, GPU hang] stuck on render ring, always reproducible</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98415">Bug 98415</a> - Vulkan Driver JSON file contains incorrect field</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Adam Jackson (2):</p>
|
||||
<ul>
|
||||
<li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
|
||||
<li>glx/glvnd: Fix dispatch function names and indices</li>
|
||||
</ul>
|
||||
|
||||
<p>Anuj Phogat (1):</p>
|
||||
<ul>
|
||||
<li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (4):</p>
|
||||
<ul>
|
||||
<li>docs: add release notes for 12.0.4</li>
|
||||
<li>docs: add sha256 checksums for 12.0.4</li>
|
||||
<li>cherry-ignore: add reverted LLVM_LIBDIR patch</li>
|
||||
<li>Update version to 12.0.5</li>
|
||||
</ul>
|
||||
|
||||
<p>Haixia Shi (1):</p>
|
||||
<ul>
|
||||
<li>mesa: change state query return value for RGB565</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (3):</p>
|
||||
<ul>
|
||||
<li>i965/fs/generator: Don't use the address immediate for MOV_INDIRECT</li>
|
||||
<li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
|
||||
<li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (1):</p>
|
||||
<ul>
|
||||
<li>intel: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (13):</p>
|
||||
<ul>
|
||||
<li>gallium/radeon: fix behavior of GLSL findLSB(0)</li>
|
||||
<li>gallium/radeon: make sure HTILE address is aligned properly</li>
|
||||
<li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
|
||||
<li>gallium/radeon: unify viewport emission code</li>
|
||||
<li>gallium/radeon: set VPORT_ZMIN/MAX registers correctly</li>
|
||||
<li>radeonsi: fix gl_PatchVerticesIn for tessellation evaluation shader</li>
|
||||
<li>radeonsi: fix a crash in imageSize for cubemap arrays</li>
|
||||
<li>radeonsi: emit TA_CS_BC_BASE_ADDR on SI only if the kernel allows it</li>
|
||||
<li>gallium/radeon: add support for sharing textures with DCC between processes</li>
|
||||
<li>radeonsi: always set all blend registers</li>
|
||||
<li>radeonsi: set CB_BLEND1_CONTROL.ENABLE for dual source blending</li>
|
||||
<li>radeonsi: disable RB+ blend optimizations for dual source blending</li>
|
||||
<li>radeonsi: silence runtime warnings with LLVM 3.9</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Turner (1):</p>
|
||||
<ul>
|
||||
<li>anv: Replace "abi_versions" with correct "api_version".</li>
|
||||
</ul>
|
||||
|
||||
<p>Nanley Chery (1):</p>
|
||||
<ul>
|
||||
<li>mesa/fbobject: Update CubeMapFace when reusing textures</li>
|
||||
</ul>
|
||||
|
||||
<p>Steinar H. Gunderson (1):</p>
|
||||
<ul>
|
||||
<li>Fix races during _mesa_HashWalk().</li>
|
||||
</ul>
|
||||
|
||||
<p>Tim Rowley (3):</p>
|
||||
<ul>
|
||||
<li>swr: [rasterizer jitter] cleanup supporting different llvm versions</li>
|
||||
<li>swr: [rasterizer jitter] fix llvm-3.7 compile</li>
|
||||
<li>swr: [rasterizer] add support for llvm-3.9</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
148
docs/relnotes/12.0.6.html
Normal file
148
docs/relnotes/12.0.6.html
Normal file
@@ -0,0 +1,148 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 12.0.6 Release Notes / January 23, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 12.0.6 is a bug fix release which fixes bugs found since the 12.0.5 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 12.0.6 implements the OpenGL 4.3 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.3. OpenGL
|
||||
4.3 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
65339ba5d76a45225b8b56f9a1da9db15c569e1d163760faa2921da0a8461741 mesa-12.0.6.tar.gz
|
||||
7d6da9744c1022a4c2ab6ad01a206984d00443fb691568011d01b3dd97e36448 mesa-12.0.6.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] "Assertion `bkref' failed" reproducible with glmark2</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Chad Versace (3):</p>
|
||||
<ul>
|
||||
<li>i965/mt: Disable aux surfaces after making miptree shareable</li>
|
||||
<li>i965/mt: Disable HiZ when sharing depth buffer externally (v2)</li>
|
||||
<li>anv: Handle vkGetPhysicalDeviceQueueFamilyProperties with count == 0</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (5):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 12.0.5</li>
|
||||
<li>get-typod-pick-list.sh: add new script</li>
|
||||
<li>automake: use shared llvm libs for make distcheck</li>
|
||||
<li>egl/wayland: use the destroy_window_callback for swrast</li>
|
||||
<li>Update version to 12.0.6</li>
|
||||
</ul>
|
||||
|
||||
<p>Fredrik Höglund (1):</p>
|
||||
<ul>
|
||||
<li>dri3: Fix MakeCurrent without a default framebuffer</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (1):</p>
|
||||
<ul>
|
||||
<li>nouveau: take extra push space into account for pushbuf_space calls</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (19):</p>
|
||||
<ul>
|
||||
<li>spirv/nir: Fix some texture opcode asserts</li>
|
||||
<li>spirv/nir: Add support for shadow samplers that return vec4</li>
|
||||
<li>spirv/nir: Properly handle gather components</li>
|
||||
<li>anv/pipeline: Set binding_table.gather_texture_start</li>
|
||||
<li>nir: Add a helper for determining the type of a texture source</li>
|
||||
<li>nir/lower_tex: Add some helpers for working with tex sources</li>
|
||||
<li>nir/lower_tex: Add support for lowering coordinate offsets</li>
|
||||
<li>i965/nir: Enable NIR lowering of txf and rect offsets</li>
|
||||
<li>i965: Get rid of the do_lower_unnormalized_offsets pass</li>
|
||||
<li>spirv/nir: Don't increment coord_components for array lod queries</li>
|
||||
<li>anv/image: Assert that the image format is actually supported</li>
|
||||
<li>spirv/nir: Move opcode selection higher up in handle_texture</li>
|
||||
<li>spirv/nir: Refactor type handling in handle_texture</li>
|
||||
<li>nir/spirv: Refactor coordinate handling in handle_texture</li>
|
||||
<li>spirv/nir: Handle texture projectors</li>
|
||||
<li>spirv/nir: Add support for ImageQuerySamples</li>
|
||||
<li>anv/device: Return the right error for failed maps</li>
|
||||
<li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
|
||||
<li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.</li>
|
||||
<li>i965: Properly flush in hsw_pause_transform_feedback().</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (6):</p>
|
||||
<ul>
|
||||
<li>cso: don't release sampler states that are bound</li>
|
||||
<li>radeonsi: always restore sampler states when unbinding sampler views</li>
|
||||
<li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
|
||||
<li>radeonsi: disable CE on SI + AMDGPU</li>
|
||||
<li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
|
||||
<li>gallium/radeon: fix the draw-calls HUD query</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Turner (3):</p>
|
||||
<ul>
|
||||
<li>i965/fs: Rename opt_copy_propagate -> opt_copy_propagation.</li>
|
||||
<li>i965/fs: Add unit tests for copy propagation pass.</li>
|
||||
<li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>cso: Don't restore nr_samplers in cso_restore_fragment_samplers</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: enable WQM in PS prolog when needed</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -1234,6 +1234,50 @@ nir_tex_instr_is_query(nir_tex_instr *instr)
|
||||
}
|
||||
}
|
||||
|
||||
static inline nir_alu_type
|
||||
nir_tex_instr_src_type(nir_tex_instr *instr, unsigned src)
|
||||
{
|
||||
switch (instr->src[src].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
switch (instr->op) {
|
||||
case nir_texop_txf:
|
||||
case nir_texop_txf_ms:
|
||||
case nir_texop_txf_ms_mcs:
|
||||
case nir_texop_samples_identical:
|
||||
return nir_type_int;
|
||||
|
||||
default:
|
||||
return nir_type_float;
|
||||
}
|
||||
|
||||
case nir_tex_src_lod:
|
||||
switch (instr->op) {
|
||||
case nir_texop_txs:
|
||||
case nir_texop_txf:
|
||||
return nir_type_int;
|
||||
|
||||
default:
|
||||
return nir_type_float;
|
||||
}
|
||||
|
||||
case nir_tex_src_projector:
|
||||
case nir_tex_src_comparitor:
|
||||
case nir_tex_src_bias:
|
||||
case nir_tex_src_ddx:
|
||||
case nir_tex_src_ddy:
|
||||
return nir_type_float;
|
||||
|
||||
case nir_tex_src_offset:
|
||||
case nir_tex_src_ms_index:
|
||||
case nir_tex_src_texture_offset:
|
||||
case nir_tex_src_sampler_offset:
|
||||
return nir_type_int;
|
||||
|
||||
default:
|
||||
unreachable("Invalid texture source type");
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
|
||||
{
|
||||
@@ -2344,6 +2388,16 @@ typedef struct nir_lower_tex_options {
|
||||
*/
|
||||
unsigned lower_txp;
|
||||
|
||||
/**
|
||||
* If true, lower away nir_tex_src_offset for all texelfetch instructions.
|
||||
*/
|
||||
bool lower_txf_offset;
|
||||
|
||||
/**
|
||||
* If true, lower away nir_tex_src_offset for all rect textures.
|
||||
*/
|
||||
bool lower_rect_offset;
|
||||
|
||||
/**
|
||||
* If true, lower rect textures to 2D, using txs to fetch the
|
||||
* texture dimensions and dividing the texture coords by the
|
||||
|
@@ -38,16 +38,39 @@
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static int
|
||||
tex_instr_find_src(nir_tex_instr *tex, nir_tex_src_type src_type)
|
||||
{
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
if (tex->src[i].src_type == src_type)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
|
||||
{
|
||||
assert(src_idx < tex->num_srcs);
|
||||
|
||||
/* First rewrite the source to NIR_SRC_INIT */
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
|
||||
|
||||
/* Now, move all of the other sources down */
|
||||
for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
|
||||
tex->src[i-1].src_type = tex->src[i].src_type;
|
||||
nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
|
||||
}
|
||||
tex->num_srcs--;
|
||||
}
|
||||
|
||||
static void
|
||||
project_src(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
/* Find the projector in the srcs list, if present. */
|
||||
unsigned proj_index;
|
||||
for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
|
||||
if (tex->src[proj_index].src_type == nir_tex_src_projector)
|
||||
break;
|
||||
}
|
||||
if (proj_index == tex->num_srcs)
|
||||
int proj_index = tex_instr_find_src(tex, nir_tex_src_projector);
|
||||
if (proj_index < 0)
|
||||
return;
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
@@ -102,18 +125,57 @@ project_src(nir_builder *b, nir_tex_instr *tex)
|
||||
nir_src_for_ssa(projected));
|
||||
}
|
||||
|
||||
/* Now move the later tex sources down the array so that the projector
|
||||
* disappears.
|
||||
*/
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
|
||||
NIR_SRC_INIT);
|
||||
for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
|
||||
tex->src[i-1].src_type = tex->src[i].src_type;
|
||||
nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
|
||||
}
|
||||
tex->num_srcs--;
|
||||
tex_instr_remove_src(tex, proj_index);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_offset(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
int offset_index = tex_instr_find_src(tex, nir_tex_src_offset);
|
||||
if (offset_index < 0)
|
||||
return false;
|
||||
|
||||
int coord_index = tex_instr_find_src(tex, nir_tex_src_coord);
|
||||
assert(coord_index >= 0);
|
||||
|
||||
assert(tex->src[offset_index].src.is_ssa);
|
||||
assert(tex->src[coord_index].src.is_ssa);
|
||||
nir_ssa_def *offset = tex->src[offset_index].src.ssa;
|
||||
nir_ssa_def *coord = tex->src[coord_index].src.ssa;
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
nir_ssa_def *offset_coord;
|
||||
if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
|
||||
assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT);
|
||||
offset_coord = nir_fadd(b, coord, nir_i2f(b, offset));
|
||||
} else {
|
||||
offset_coord = nir_iadd(b, coord, offset);
|
||||
}
|
||||
|
||||
if (tex->is_array) {
|
||||
/* The offset is not applied to the array index */
|
||||
if (tex->coord_components == 2) {
|
||||
offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
|
||||
nir_channel(b, coord, 1));
|
||||
} else if (tex->coord_components == 3) {
|
||||
offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
|
||||
nir_channel(b, offset_coord, 1),
|
||||
nir_channel(b, coord, 2));
|
||||
} else {
|
||||
unreachable("Invalid number of components");
|
||||
}
|
||||
}
|
||||
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
|
||||
nir_src_for_ssa(offset_coord));
|
||||
|
||||
tex_instr_remove_src(tex, offset_index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static nir_ssa_def *
|
||||
get_texture_size(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
@@ -444,6 +506,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
|
||||
(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
|
||||
options->lower_rect_offset)) {
|
||||
progress = lower_offset(b, tex) || progress;
|
||||
}
|
||||
|
||||
if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
|
||||
lower_rect(b, tex);
|
||||
progress = true;
|
||||
|
@@ -1335,54 +1335,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
} else {
|
||||
image_type = sampled.sampler->var->var->interface_type;
|
||||
}
|
||||
|
||||
nir_tex_src srcs[8]; /* 8 should be enough */
|
||||
nir_tex_src *p = srcs;
|
||||
|
||||
unsigned idx = 4;
|
||||
|
||||
bool has_coord = false;
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleImplicitLod:
|
||||
case SpvOpImageSampleExplicitLod:
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjImplicitLod:
|
||||
case SpvOpImageSampleProjExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
case SpvOpImageFetch:
|
||||
case SpvOpImageGather:
|
||||
case SpvOpImageDrefGather:
|
||||
case SpvOpImageQueryLod: {
|
||||
/* All these types have the coordinate as their first real argument */
|
||||
struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]);
|
||||
has_coord = true;
|
||||
p->src = nir_src_for_ssa(coord->def);
|
||||
p->src_type = nir_tex_src_coord;
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* These all have an explicit depth value as their next source */
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* For OpImageQuerySizeLod, we always have an LOD */
|
||||
if (opcode == SpvOpImageQuerySizeLod)
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
|
||||
const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image_type);
|
||||
const bool is_array = glsl_sampler_type_is_array(image_type);
|
||||
const bool is_shadow = glsl_sampler_type_is_shadow(image_type);
|
||||
|
||||
/* Figure out the base texture operation */
|
||||
nir_texop texop;
|
||||
@@ -1428,10 +1383,108 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
break;
|
||||
|
||||
case SpvOpImageQuerySamples:
|
||||
texop = nir_texop_texture_samples;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unhandled opcode");
|
||||
}
|
||||
|
||||
nir_tex_src srcs[8]; /* 8 should be enough */
|
||||
nir_tex_src *p = srcs;
|
||||
|
||||
unsigned idx = 4;
|
||||
|
||||
struct nir_ssa_def *coord;
|
||||
unsigned coord_components;
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleImplicitLod:
|
||||
case SpvOpImageSampleExplicitLod:
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjImplicitLod:
|
||||
case SpvOpImageSampleProjExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
case SpvOpImageFetch:
|
||||
case SpvOpImageGather:
|
||||
case SpvOpImageDrefGather:
|
||||
case SpvOpImageQueryLod: {
|
||||
/* All these types have the coordinate as their first real argument */
|
||||
switch (sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
case GLSL_SAMPLER_DIM_BUF:
|
||||
coord_components = 1;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
coord_components = 2;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
coord_components = 3;
|
||||
break;
|
||||
default:
|
||||
assert("Invalid sampler type");
|
||||
}
|
||||
|
||||
if (is_array && texop != nir_texop_lod)
|
||||
coord_components++;
|
||||
|
||||
coord = vtn_ssa_value(b, w[idx++])->def;
|
||||
p->src = nir_src_for_ssa(coord);
|
||||
p->src_type = nir_tex_src_coord;
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
coord = NULL;
|
||||
coord_components = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleProjImplicitLod:
|
||||
case SpvOpImageSampleProjExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
/* These have the projector as the last coordinate component */
|
||||
p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
|
||||
p->src_type = nir_tex_src_projector;
|
||||
p++;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned gather_component = 0;
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
case SpvOpImageDrefGather:
|
||||
/* These all have an explicit depth value as their next source */
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
|
||||
break;
|
||||
|
||||
case SpvOpImageGather:
|
||||
/* This has a component as its next source */
|
||||
gather_component =
|
||||
vtn_value(b, w[idx++], vtn_value_type_constant)->constant->value.u[0];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* For OpImageQuerySizeLod, we always have an LOD */
|
||||
if (opcode == SpvOpImageQuerySizeLod)
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
|
||||
|
||||
/* Now we need to handle some number of optional arguments */
|
||||
if (idx < count) {
|
||||
uint32_t operands = w[idx++];
|
||||
@@ -1444,12 +1497,12 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
if (operands & SpvImageOperandsLodMask) {
|
||||
assert(texop == nir_texop_txl || texop == nir_texop_txf ||
|
||||
texop == nir_texop_txf_ms || texop == nir_texop_txs);
|
||||
texop == nir_texop_txs);
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
|
||||
}
|
||||
|
||||
if (operands & SpvImageOperandsGradMask) {
|
||||
assert(texop == nir_texop_tex);
|
||||
assert(texop == nir_texop_txl);
|
||||
texop = nir_texop_txd;
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx);
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy);
|
||||
@@ -1476,35 +1529,13 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
|
||||
|
||||
instr->sampler_dim = glsl_get_sampler_dim(image_type);
|
||||
instr->is_array = glsl_sampler_type_is_array(image_type);
|
||||
instr->is_shadow = glsl_sampler_type_is_shadow(image_type);
|
||||
instr->is_new_style_shadow = instr->is_shadow;
|
||||
|
||||
if (has_coord) {
|
||||
switch (instr->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
case GLSL_SAMPLER_DIM_BUF:
|
||||
instr->coord_components = 1;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
instr->coord_components = 2;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
instr->coord_components = 3;
|
||||
break;
|
||||
default:
|
||||
assert("Invalid sampler type");
|
||||
}
|
||||
|
||||
if (instr->is_array)
|
||||
instr->coord_components++;
|
||||
} else {
|
||||
instr->coord_components = 0;
|
||||
}
|
||||
instr->coord_components = coord_components;
|
||||
instr->sampler_dim = sampler_dim;
|
||||
instr->is_array = is_array;
|
||||
instr->is_shadow = is_shadow;
|
||||
instr->is_new_style_shadow =
|
||||
is_shadow && glsl_get_components(ret_type->type) == 1;
|
||||
instr->component = gather_component;
|
||||
|
||||
switch (glsl_get_sampler_result_type(image_type)) {
|
||||
case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break;
|
||||
|
@@ -527,12 +527,13 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
|
||||
nir_variable *phi_var = phi_entry->data;
|
||||
|
||||
for (unsigned i = 3; i < count; i += 2) {
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
|
||||
struct vtn_block *pred =
|
||||
vtn_value(b, w[i + 1], vtn_value_type_block)->block;
|
||||
|
||||
b->nb.cursor = nir_after_instr(&pred->end_nop->instr);
|
||||
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
|
||||
|
||||
vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
|
||||
}
|
||||
|
||||
|
@@ -1706,6 +1706,8 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
|
||||
dri2_surf->format = WL_SHM_FORMAT_ARGB8888;
|
||||
|
||||
dri2_surf->wl_win = window;
|
||||
dri2_surf->wl_win->private = dri2_surf;
|
||||
dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
|
||||
|
||||
dri2_surf->base.Width = -1;
|
||||
dri2_surf->base.Height = -1;
|
||||
|
@@ -188,7 +188,9 @@ cso_insert_state(struct cso_cache *sc,
|
||||
void *state)
|
||||
{
|
||||
struct cso_hash *hash = _cso_hash_for_type(sc, type);
|
||||
sanitize_hash(sc, hash, type, sc->max_size);
|
||||
|
||||
if (type != CSO_SAMPLER)
|
||||
sanitize_hash(sc, hash, type, sc->max_size);
|
||||
|
||||
return cso_hash_insert(hash, hash_key, state);
|
||||
}
|
||||
|
@@ -1268,7 +1268,6 @@ cso_restore_fragment_samplers(struct cso_context *ctx)
|
||||
{
|
||||
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
|
||||
|
||||
info->nr_samplers = ctx->nr_fragment_samplers_saved;
|
||||
memcpy(info->samplers, ctx->fragment_samplers_saved,
|
||||
sizeof(info->samplers));
|
||||
cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
|
||||
|
@@ -73,7 +73,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) {
|
||||
if (!dec->cmds)
|
||||
return;
|
||||
|
||||
nouveau_pushbuf_space(push, 8, 2, 0);
|
||||
nouveau_pushbuf_space(push, 16, 2, 0);
|
||||
nouveau_bufctx_reset(dec->bufctx, NV31_VIDEO_BIND_CMD);
|
||||
|
||||
#define BCTX_ARGS dec->bufctx, NV31_VIDEO_BIND_CMD, NOUVEAU_BO_RD
|
||||
|
@@ -127,7 +127,7 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
|
||||
|
||||
refn.bo = mt->base.bo;
|
||||
refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
|
||||
if (nouveau_pushbuf_space(push, 16, 1, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 1, 0) ||
|
||||
nouveau_pushbuf_refn (push, &refn, 1))
|
||||
return;
|
||||
|
||||
|
@@ -431,7 +431,7 @@ nv30_transfer_rect_sifm(XFER_ARGS)
|
||||
si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
|
||||
}
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32, 6, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 64, 6, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -516,7 +516,7 @@ nv30_transfer_rect_m2mf(XFER_ARGS)
|
||||
while (h) {
|
||||
unsigned lines = (h > 2047) ? 2047 : h;
|
||||
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -709,7 +709,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
|
||||
lines = (pages > 2047) ? 2047 : pages;
|
||||
pages -= lines;
|
||||
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -733,7 +733,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
|
||||
}
|
||||
|
||||
if (size) {
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
|
@@ -294,7 +294,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
|
||||
PUSH_DATAf(push, color->f[2]);
|
||||
PUSH_DATAf(push, color->f[3]);
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
|
||||
@@ -388,7 +388,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
|
||||
mode |= NV50_3D_CLEAR_BUFFERS_S;
|
||||
}
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
|
||||
@@ -742,7 +742,7 @@ nv50_clear_buffer(struct pipe_context *pipe,
|
||||
PUSH_DATAf(push, color.f[2]);
|
||||
PUSH_DATAf(push, color.f[3]);
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
|
@@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (push, prim);
|
||||
|
||||
nouveau_pushbuf_space(push, 8, 0, 1);
|
||||
nouveau_pushbuf_space(push, 16, 0, 1);
|
||||
PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
|
||||
|
||||
switch (index_size) {
|
||||
|
@@ -273,7 +273,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
|
||||
|
||||
/* So lets test if the fence is working? */
|
||||
nouveau_pushbuf_space(push[0], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[0], 16, 1, 0);
|
||||
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[0], SUBC_BSP(0x240), 3);
|
||||
PUSH_DATAh(push[0], dec->fence_bo->offset);
|
||||
@@ -284,7 +284,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[0], 0);
|
||||
PUSH_KICK (push[0]);
|
||||
|
||||
nouveau_pushbuf_space(push[1], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[1], 16, 1, 0);
|
||||
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[1], SUBC_VP(0x240), 3);
|
||||
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
|
||||
@@ -295,7 +295,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[1], 0);
|
||||
PUSH_KICK (push[1]);
|
||||
|
||||
nouveau_pushbuf_space(push[2], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[2], 16, 1, 0);
|
||||
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[2], SUBC_PPP(0x240), 3);
|
||||
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
|
||||
|
@@ -47,7 +47,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
int ret;
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
unsigned fence_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -61,10 +60,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!dec->bitplane_bo)
|
||||
num_refs--;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
|
||||
for (i = 0; i < num_buffers; i++)
|
||||
bsp_size += num_bytes[i];
|
||||
@@ -112,7 +107,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
|
||||
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
|
||||
|
||||
nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32, num_refs, 0);
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
bsp_addr = bsp_bo->offset >> 8;
|
||||
|
@@ -93,13 +93,8 @@ nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_pushbuf *push = dec->pushbuf[2];
|
||||
unsigned ppp_caps = 0x10;
|
||||
unsigned fence_extra = 0;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
|
||||
nouveau_pushbuf_space(push, 32, 4, 0);
|
||||
|
||||
switch (codec) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG12: {
|
||||
|
@@ -76,7 +76,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
u32 fence_extra = 0, codec_extra = 0;
|
||||
u32 codec_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -88,10 +88,6 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
};
|
||||
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
|
||||
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
|
||||
codec_extra += 2;
|
||||
@@ -115,8 +111,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
|
||||
nv98_decoder_kick_ref(dec, target);
|
||||
|
||||
nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
|
||||
6 + codec_extra + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
|
||||
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
|
@@ -403,7 +403,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
|
||||
if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
|
||||
nvc0_hw_query_fifo_wait(nvc0, q);
|
||||
|
||||
nouveau_pushbuf_space(push, 16, 2, 0);
|
||||
nouveau_pushbuf_space(push, 32, 2, 0);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
|
||||
|
@@ -799,7 +799,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
|
||||
}
|
||||
|
||||
while (num_instances--) {
|
||||
nouveau_pushbuf_space(push, 9, 0, 1);
|
||||
nouveau_pushbuf_space(push, 16, 0, 1);
|
||||
BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (push, mode);
|
||||
BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
|
||||
|
@@ -297,7 +297,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
|
||||
|
||||
/* So lets test if the fence is working? */
|
||||
nouveau_pushbuf_space(push[0], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[0], 16, 1, 0);
|
||||
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
|
||||
PUSH_DATAh(push[0], dec->fence_bo->offset);
|
||||
@@ -308,7 +308,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[0], 0);
|
||||
PUSH_KICK (push[0]);
|
||||
|
||||
nouveau_pushbuf_space(push[1], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[1], 16, 1, 0);
|
||||
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
|
||||
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
|
||||
@@ -319,7 +319,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[1], 0);
|
||||
PUSH_KICK (push[1]);
|
||||
|
||||
nouveau_pushbuf_space(push[2], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[2], 16, 1, 0);
|
||||
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
|
||||
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
|
||||
|
@@ -143,7 +143,6 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
uint32_t caps;
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
unsigned fence_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -157,15 +156,11 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!dec->bitplane_bo)
|
||||
num_refs--;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
caps = nouveau_vp3_bsp_end(dec, desc);
|
||||
|
||||
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
|
||||
|
||||
nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32, num_refs, 0);
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
bsp_addr = bsp_bo->offset >> 8;
|
||||
|
@@ -93,13 +93,8 @@ nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_pushbuf *push = dec->pushbuf[2];
|
||||
unsigned ppp_caps = 0x10;
|
||||
unsigned fence_extra = 0;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
|
||||
nouveau_pushbuf_space(push, 32, 4, 0);
|
||||
|
||||
switch (codec) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG12: {
|
||||
|
@@ -76,7 +76,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
u32 fence_extra = 0, codec_extra = 0;
|
||||
u32 codec_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -88,10 +88,6 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
};
|
||||
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
|
||||
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
|
||||
codec_extra += 2;
|
||||
@@ -115,8 +111,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
|
||||
nvc0_decoder_kick_ref(dec, target);
|
||||
|
||||
nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
|
||||
6 + codec_extra + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
|
||||
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
|
@@ -473,6 +473,7 @@ static void *evergreen_create_rs_state(struct pipe_context *ctx,
|
||||
r600_init_command_buffer(&rs->buffer, 30);
|
||||
|
||||
rs->scissor_enable = state->scissor;
|
||||
rs->clip_halfz = state->clip_halfz;
|
||||
rs->flatshade = state->flatshade;
|
||||
rs->sprite_coord_enable = state->sprite_coord_enable;
|
||||
rs->two_side = state->light_twoside;
|
||||
|
@@ -1862,8 +1862,8 @@
|
||||
#define R_0283F8_SQ_VTX_SEMANTIC_30 0x000283F8
|
||||
#define R_0283FC_SQ_VTX_SEMANTIC_31 0x000283FC
|
||||
#define R_0288F0_SQ_VTX_SEMANTIC_CLEAR 0x000288F0
|
||||
#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x000282D0
|
||||
#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x000282D4
|
||||
#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
|
||||
#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
|
||||
#define R_028400_VGT_MAX_VTX_INDX 0x00028400
|
||||
#define R_028404_VGT_MIN_VTX_INDX 0x00028404
|
||||
#define R_028408_VGT_INDX_OFFSET 0x00028408
|
||||
|
@@ -308,6 +308,7 @@ void r600_begin_new_cs(struct r600_context *ctx)
|
||||
ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
r600_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
|
||||
ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
r600_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
|
||||
if (ctx->b.chip_class <= EVERGREEN) {
|
||||
r600_mark_atom_dirty(ctx, &ctx->config_state.atom);
|
||||
|
@@ -274,6 +274,7 @@ struct r600_rasterizer_state {
|
||||
bool offset_enable;
|
||||
bool scissor_enable;
|
||||
bool multisample_enable;
|
||||
bool clip_halfz;
|
||||
};
|
||||
|
||||
struct r600_poly_offset_state {
|
||||
|
@@ -459,6 +459,7 @@ static void *r600_create_rs_state(struct pipe_context *ctx,
|
||||
r600_init_command_buffer(&rs->buffer, 30);
|
||||
|
||||
rs->scissor_enable = state->scissor;
|
||||
rs->clip_halfz = state->clip_halfz;
|
||||
rs->flatshade = state->flatshade;
|
||||
rs->sprite_coord_enable = state->sprite_coord_enable;
|
||||
rs->two_side = state->light_twoside;
|
||||
|
@@ -364,7 +364,7 @@ static void r600_bind_rs_state(struct pipe_context *ctx, void *state)
|
||||
r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
|
||||
}
|
||||
|
||||
r600_set_scissor_enable(&rctx->b, rs->scissor_enable);
|
||||
r600_viewport_set_rast_deps(&rctx->b, rs->scissor_enable, rs->clip_halfz);
|
||||
|
||||
/* Re-emit PA_SC_LINE_STIPPLE. */
|
||||
rctx->last_primitive_type = -1;
|
||||
|
@@ -366,6 +366,10 @@ struct r600_common_screen {
|
||||
void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
|
||||
struct r600_texture *rtex,
|
||||
struct radeon_bo_metadata *md);
|
||||
|
||||
void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
|
||||
struct r600_texture *rtex,
|
||||
struct radeon_bo_metadata *md);
|
||||
};
|
||||
|
||||
/* This encapsulates a state or an operation which can emitted into the GPU
|
||||
@@ -430,6 +434,7 @@ struct r600_scissors {
|
||||
struct r600_viewports {
|
||||
struct r600_atom atom;
|
||||
unsigned dirty_mask;
|
||||
unsigned depth_range_dirty_mask;
|
||||
struct pipe_viewport_state states[R600_MAX_VIEWPORTS];
|
||||
struct r600_signed_scissor as_scissor[R600_MAX_VIEWPORTS];
|
||||
};
|
||||
@@ -469,6 +474,7 @@ struct r600_common_context {
|
||||
struct r600_scissors scissors;
|
||||
struct r600_viewports viewports;
|
||||
bool scissor_enabled;
|
||||
bool clip_halfz;
|
||||
bool vs_writes_viewport_index;
|
||||
bool vs_disables_clipping_viewport;
|
||||
|
||||
@@ -669,7 +675,8 @@ void r600_init_context_texture_functions(struct r600_common_context *rctx);
|
||||
/* r600_viewport.c */
|
||||
void evergreen_apply_scissor_bug_workaround(struct r600_common_context *rctx,
|
||||
struct pipe_scissor_state *scissor);
|
||||
void r600_set_scissor_enable(struct r600_common_context *rctx, bool enable);
|
||||
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
|
||||
bool scissor_enable, bool clip_halfz);
|
||||
void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
||||
struct tgsi_shader_info *info);
|
||||
void r600_init_viewport_functions(struct r600_common_context *rctx);
|
||||
|
@@ -1139,7 +1139,7 @@ err:
|
||||
static struct pipe_driver_query_info r600_driver_query_list[] = {
|
||||
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
|
||||
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
|
||||
X("draw-calls", DRAW_CALLS, UINT64, CUMULATIVE),
|
||||
X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
|
||||
X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
|
||||
X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
|
||||
X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
|
||||
|
@@ -723,10 +723,11 @@ static void r600_texture_alloc_cmask_separate(struct r600_common_screen *rscreen
|
||||
}
|
||||
|
||||
static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
||||
struct r600_texture *rtex)
|
||||
struct r600_texture *rtex,
|
||||
unsigned *base_align)
|
||||
{
|
||||
unsigned cl_width, cl_height, width, height;
|
||||
unsigned slice_elements, slice_bytes, pipe_interleave_bytes, base_align;
|
||||
unsigned slice_elements, slice_bytes, pipe_interleave_bytes;
|
||||
unsigned num_pipes = rscreen->info.num_tile_pipes;
|
||||
|
||||
if (rscreen->chip_class <= EVERGREEN &&
|
||||
@@ -788,7 +789,7 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
||||
slice_bytes = slice_elements * 4;
|
||||
|
||||
pipe_interleave_bytes = rscreen->info.pipe_interleave_bytes;
|
||||
base_align = num_pipes * pipe_interleave_bytes;
|
||||
*base_align = num_pipes * pipe_interleave_bytes;
|
||||
|
||||
rtex->htile.pitch = width;
|
||||
rtex->htile.height = height;
|
||||
@@ -796,20 +797,22 @@ static unsigned r600_texture_get_htile_size(struct r600_common_screen *rscreen,
|
||||
rtex->htile.yalign = cl_height * 8;
|
||||
|
||||
return (util_max_layer(&rtex->resource.b.b, 0) + 1) *
|
||||
align(slice_bytes, base_align);
|
||||
align(slice_bytes, *base_align);
|
||||
}
|
||||
|
||||
static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
|
||||
struct r600_texture *rtex)
|
||||
{
|
||||
unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex);
|
||||
unsigned alignment = 0;
|
||||
unsigned htile_size = r600_texture_get_htile_size(rscreen, rtex,
|
||||
&alignment);
|
||||
|
||||
if (!htile_size)
|
||||
return;
|
||||
|
||||
rtex->htile_buffer = (struct r600_resource*)
|
||||
pipe_buffer_create(&rscreen->b, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT, htile_size);
|
||||
r600_aligned_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
|
||||
htile_size, alignment);
|
||||
if (rtex->htile_buffer == NULL) {
|
||||
/* this is not a fatal error as we can still keep rendering
|
||||
* without htile buffer */
|
||||
@@ -965,8 +968,12 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||
}
|
||||
}
|
||||
|
||||
if (!buf && rtex->surface.dcc_size &&
|
||||
!(rscreen->debug_flags & DBG_NO_DCC)) {
|
||||
/* Shared textures must always set up DCC here.
|
||||
* If it's not present, it will be disabled by
|
||||
* apply_opaque_metadata later.
|
||||
*/
|
||||
if (rtex->surface.dcc_size &&
|
||||
(buf || !(rscreen->debug_flags & DBG_NO_DCC))) {
|
||||
/* Reserve space for the DCC buffer. */
|
||||
rtex->dcc_offset = align64(rtex->size, rtex->surface.dcc_alignment);
|
||||
rtex->size = rtex->dcc_offset + rtex->surface.dcc_size;
|
||||
@@ -993,7 +1000,9 @@ r600_texture_create_object(struct pipe_screen *screen,
|
||||
rtex->cmask.offset, rtex->cmask.size,
|
||||
0xCCCCCCCC, R600_COHERENCY_NONE);
|
||||
}
|
||||
if (rtex->dcc_offset) {
|
||||
|
||||
/* Initialize DCC only if the texture is not being imported. */
|
||||
if (!buf && rtex->dcc_offset) {
|
||||
r600_screen_clear_buffer(rscreen, &rtex->resource.b.b,
|
||||
rtex->dcc_offset,
|
||||
rtex->surface.dcc_size,
|
||||
@@ -1159,6 +1168,10 @@ static struct pipe_resource *r600_texture_from_handle(struct pipe_screen *screen
|
||||
|
||||
rtex->resource.is_shared = true;
|
||||
rtex->resource.external_usage = usage;
|
||||
|
||||
if (rscreen->apply_opaque_metadata)
|
||||
rscreen->apply_opaque_metadata(rscreen, rtex, &metadata);
|
||||
|
||||
return &rtex->resource.b.b;
|
||||
}
|
||||
|
||||
|
@@ -22,6 +22,7 @@
|
||||
*/
|
||||
|
||||
#include "r600_cs.h"
|
||||
#include "util/u_viewport.h"
|
||||
#include "tgsi/tgsi_scan.h"
|
||||
|
||||
#define GET_MAX_SCISSOR(rctx) (rctx->chip_class >= EVERGREEN ? 16384 : 8192)
|
||||
@@ -260,6 +261,7 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
|
||||
const struct pipe_viewport_state *state)
|
||||
{
|
||||
struct r600_common_context *rctx = (struct r600_common_context *)ctx;
|
||||
unsigned mask;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < num_viewports; i++) {
|
||||
@@ -270,13 +272,28 @@ static void r600_set_viewport_states(struct pipe_context *ctx,
|
||||
&rctx->viewports.as_scissor[index]);
|
||||
}
|
||||
|
||||
rctx->viewports.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
|
||||
rctx->scissors.dirty_mask |= ((1 << num_viewports) - 1) << start_slot;
|
||||
mask = ((1 << num_viewports) - 1) << start_slot;
|
||||
rctx->viewports.dirty_mask |= mask;
|
||||
rctx->viewports.depth_range_dirty_mask |= mask;
|
||||
rctx->scissors.dirty_mask |= mask;
|
||||
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
||||
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||
}
|
||||
|
||||
static void r600_emit_viewports(struct r600_common_context *rctx, struct r600_atom *atom)
|
||||
static void r600_emit_one_viewport(struct r600_common_context *rctx,
|
||||
struct pipe_viewport_state *state)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||
|
||||
radeon_emit(cs, fui(state->scale[0]));
|
||||
radeon_emit(cs, fui(state->translate[0]));
|
||||
radeon_emit(cs, fui(state->scale[1]));
|
||||
radeon_emit(cs, fui(state->translate[1]));
|
||||
radeon_emit(cs, fui(state->scale[2]));
|
||||
radeon_emit(cs, fui(state->translate[2]));
|
||||
}
|
||||
|
||||
static void r600_emit_viewports(struct r600_common_context *rctx)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||
struct pipe_viewport_state *states = rctx->viewports.states;
|
||||
@@ -288,12 +305,7 @@ static void r600_emit_viewports(struct r600_common_context *rctx, struct r600_at
|
||||
return;
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE, 6);
|
||||
radeon_emit(cs, fui(states[0].scale[0]));
|
||||
radeon_emit(cs, fui(states[0].translate[0]));
|
||||
radeon_emit(cs, fui(states[0].scale[1]));
|
||||
radeon_emit(cs, fui(states[0].translate[1]));
|
||||
radeon_emit(cs, fui(states[0].scale[2]));
|
||||
radeon_emit(cs, fui(states[0].translate[2]));
|
||||
r600_emit_one_viewport(rctx, &states[0]);
|
||||
rctx->viewports.dirty_mask &= ~1; /* clear one bit */
|
||||
return;
|
||||
}
|
||||
@@ -305,25 +317,70 @@ static void r600_emit_viewports(struct r600_common_context *rctx, struct r600_at
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_02843C_PA_CL_VPORT_XSCALE +
|
||||
start * 4 * 6, count * 6);
|
||||
for (i = start; i < start+count; i++) {
|
||||
radeon_emit(cs, fui(states[i].scale[0]));
|
||||
radeon_emit(cs, fui(states[i].translate[0]));
|
||||
radeon_emit(cs, fui(states[i].scale[1]));
|
||||
radeon_emit(cs, fui(states[i].translate[1]));
|
||||
radeon_emit(cs, fui(states[i].scale[2]));
|
||||
radeon_emit(cs, fui(states[i].translate[2]));
|
||||
}
|
||||
for (i = start; i < start+count; i++)
|
||||
r600_emit_one_viewport(rctx, &states[i]);
|
||||
}
|
||||
rctx->viewports.dirty_mask = 0;
|
||||
}
|
||||
|
||||
void r600_set_scissor_enable(struct r600_common_context *rctx, bool enable)
|
||||
static void r600_emit_depth_ranges(struct r600_common_context *rctx)
|
||||
{
|
||||
if (rctx->scissor_enabled != enable) {
|
||||
rctx->scissor_enabled = enable;
|
||||
struct radeon_winsys_cs *cs = rctx->gfx.cs;
|
||||
struct pipe_viewport_state *states = rctx->viewports.states;
|
||||
unsigned mask = rctx->viewports.depth_range_dirty_mask;
|
||||
float zmin, zmax;
|
||||
|
||||
/* The simple case: Only 1 viewport is active. */
|
||||
if (!rctx->vs_writes_viewport_index) {
|
||||
if (!(mask & 1))
|
||||
return;
|
||||
|
||||
util_viewport_zmin_zmax(&states[0], rctx->clip_halfz, &zmin, &zmax);
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0, 2);
|
||||
radeon_emit(cs, fui(zmin));
|
||||
radeon_emit(cs, fui(zmax));
|
||||
rctx->viewports.depth_range_dirty_mask &= ~1; /* clear one bit */
|
||||
return;
|
||||
}
|
||||
|
||||
while (mask) {
|
||||
int start, count, i;
|
||||
|
||||
u_bit_scan_consecutive_range(&mask, &start, &count);
|
||||
|
||||
radeon_set_context_reg_seq(cs, R_0282D0_PA_SC_VPORT_ZMIN_0 +
|
||||
start * 4 * 2, count * 2);
|
||||
for (i = start; i < start+count; i++) {
|
||||
util_viewport_zmin_zmax(&states[i], rctx->clip_halfz, &zmin, &zmax);
|
||||
radeon_emit(cs, fui(zmin));
|
||||
radeon_emit(cs, fui(zmax));
|
||||
}
|
||||
}
|
||||
rctx->viewports.depth_range_dirty_mask = 0;
|
||||
}
|
||||
|
||||
static void r600_emit_viewport_states(struct r600_common_context *rctx,
|
||||
struct r600_atom *atom)
|
||||
{
|
||||
r600_emit_viewports(rctx);
|
||||
r600_emit_depth_ranges(rctx);
|
||||
}
|
||||
|
||||
/* Set viewport dependencies on pipe_rasterizer_state. */
|
||||
void r600_viewport_set_rast_deps(struct r600_common_context *rctx,
|
||||
bool scissor_enable, bool clip_halfz)
|
||||
{
|
||||
if (rctx->scissor_enabled != scissor_enable) {
|
||||
rctx->scissor_enabled = scissor_enable;
|
||||
rctx->scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||
}
|
||||
if (rctx->clip_halfz != clip_halfz) {
|
||||
rctx->clip_halfz = clip_halfz;
|
||||
rctx->viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -357,14 +414,16 @@ void r600_update_vs_writes_viewport_index(struct r600_common_context *rctx,
|
||||
|
||||
if (rctx->scissors.dirty_mask)
|
||||
rctx->set_atom_dirty(rctx, &rctx->scissors.atom, true);
|
||||
if (rctx->viewports.dirty_mask)
|
||||
|
||||
if (rctx->viewports.dirty_mask ||
|
||||
rctx->viewports.depth_range_dirty_mask)
|
||||
rctx->set_atom_dirty(rctx, &rctx->viewports.atom, true);
|
||||
}
|
||||
|
||||
void r600_init_viewport_functions(struct r600_common_context *rctx)
|
||||
{
|
||||
rctx->scissors.atom.emit = r600_emit_scissors;
|
||||
rctx->viewports.atom.emit = r600_emit_viewports;
|
||||
rctx->viewports.atom.emit = r600_emit_viewport_states;
|
||||
|
||||
rctx->scissors.atom.num_dw = (2 + 16 * 2) + 6;
|
||||
rctx->viewports.atom.num_dw = 2 + 16 * 6;
|
||||
|
@@ -241,5 +241,7 @@
|
||||
#define S_028254_BR_Y(x) (((unsigned)(x) & 0x7FFF) << 16)
|
||||
#define G_028254_BR_Y(x) (((x) >> 16) & 0x7FFF)
|
||||
#define C_028254_BR_Y 0x8000FFFF
|
||||
#define R_0282D0_PA_SC_VPORT_ZMIN_0 0x0282D0
|
||||
#define R_0282D4_PA_SC_VPORT_ZMAX_0 0x0282D4
|
||||
|
||||
#endif
|
||||
|
@@ -1303,23 +1303,32 @@ static void emit_lsb(const struct lp_build_tgsi_action * action,
|
||||
struct lp_build_emit_data * emit_data)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef args[2] = {
|
||||
emit_data->args[0],
|
||||
|
||||
/* The value of 1 means that ffs(x=0) = undef, so LLVM won't
|
||||
* add special code to check for x=0. The reason is that
|
||||
* the LLVM behavior for x=0 is different from what we
|
||||
* need here.
|
||||
*
|
||||
* The hardware already implements the correct behavior.
|
||||
* need here. However, LLVM also assumes that ffs(x) is
|
||||
* in [0, 31], but GLSL expects that ffs(0) = -1, so
|
||||
* a conditional assignment to handle 0 is still required.
|
||||
*/
|
||||
lp_build_const_int32(gallivm, 1)
|
||||
LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
|
||||
};
|
||||
|
||||
emit_data->output[emit_data->chan] =
|
||||
LLVMValueRef lsb =
|
||||
lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
|
||||
emit_data->dst_type, args, ARRAY_SIZE(args),
|
||||
LLVMReadNoneAttribute);
|
||||
|
||||
/* TODO: We need an intrinsic to skip this conditional. */
|
||||
/* Check for zero: */
|
||||
emit_data->output[emit_data->chan] =
|
||||
LLVMBuildSelect(builder,
|
||||
LLVMBuildICmp(builder, LLVMIntEQ, args[0],
|
||||
bld_base->uint_bld.zero, ""),
|
||||
lp_build_const_int32(gallivm, -1), lsb, "");
|
||||
}
|
||||
|
||||
/* Find the last bit set. */
|
||||
|
@@ -376,7 +376,9 @@ si_decompress_sampler_color_textures(struct si_context *sctx,
|
||||
assert(view);
|
||||
|
||||
tex = (struct r600_texture *)view->texture;
|
||||
assert(tex->cmask.size || tex->fmask.size || tex->dcc_offset);
|
||||
/* CMASK or DCC can be discarded and we can still end up here. */
|
||||
if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
|
||||
continue;
|
||||
|
||||
si_blit_decompress_color(&sctx->b.b, tex,
|
||||
view->u.tex.first_level, view->u.tex.last_level,
|
||||
|
@@ -202,7 +202,12 @@ static void si_initialize_compute(struct si_context *sctx)
|
||||
radeon_emit(cs, bc_va >> 8); /* R_030E00_TA_CS_BC_BASE_ADDR */
|
||||
radeon_emit(cs, bc_va >> 40); /* R_030E04_TA_CS_BC_BASE_ADDR_HI */
|
||||
} else {
|
||||
radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR, bc_va >> 8);
|
||||
if (sctx->screen->b.info.drm_major == 3 ||
|
||||
(sctx->screen->b.info.drm_major == 2 &&
|
||||
sctx->screen->b.info.drm_minor >= 48)) {
|
||||
radeon_set_config_reg(cs, R_00950C_TA_CS_BC_BASE_ADDR,
|
||||
bc_va >> 8);
|
||||
}
|
||||
}
|
||||
|
||||
sctx->cs_shader_state.emitted_program = NULL;
|
||||
|
@@ -311,6 +311,7 @@ static void si_set_sampler_view(struct si_context *sctx,
|
||||
unsigned slot, struct pipe_sampler_view *view)
|
||||
{
|
||||
struct si_sampler_view *rview = (struct si_sampler_view*)view;
|
||||
uint32_t *desc = views->desc.list + slot * 16;
|
||||
|
||||
if (view && view->texture && view->texture->target != PIPE_BUFFER &&
|
||||
G_008F28_COMPRESSION_EN(rview->state[6]) &&
|
||||
@@ -346,9 +347,14 @@ static void si_set_sampler_view(struct si_context *sctx,
|
||||
views->desc.enabled_mask |= 1u << slot;
|
||||
} else {
|
||||
pipe_sampler_view_reference(&views->views[slot], NULL);
|
||||
memcpy(views->desc.list + slot*16, null_texture_descriptor, 8*4);
|
||||
memcpy(desc, null_texture_descriptor, 8*4);
|
||||
/* Only clear the lower dwords of FMASK. */
|
||||
memcpy(views->desc.list + slot*16 + 8, null_texture_descriptor, 4*4);
|
||||
memcpy(desc + 8, null_texture_descriptor, 4*4);
|
||||
/* Re-set the sampler state if we are transitioning from FMASK. */
|
||||
if (views->sampler_states[slot])
|
||||
memcpy(desc + 12,
|
||||
views->sampler_states[slot], 4*4);
|
||||
|
||||
views->desc.enabled_mask &= ~(1u << slot);
|
||||
}
|
||||
|
||||
@@ -631,10 +637,10 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
|
||||
/* If FMASK is bound, don't overwrite it.
|
||||
* The sampler state will be set after FMASK is unbound.
|
||||
*/
|
||||
if (samplers->views.views[i] &&
|
||||
samplers->views.views[i]->texture &&
|
||||
samplers->views.views[i]->texture->target != PIPE_BUFFER &&
|
||||
((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
|
||||
if (samplers->views.views[slot] &&
|
||||
samplers->views.views[slot]->texture &&
|
||||
samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
|
||||
((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
|
||||
continue;
|
||||
|
||||
memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
|
||||
|
@@ -231,6 +231,7 @@ void si_begin_new_cs(struct si_context *ctx)
|
||||
|
||||
ctx->b.scissors.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
ctx->b.viewports.dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
ctx->b.viewports.depth_range_dirty_mask = (1 << R600_MAX_VIEWPORTS) - 1;
|
||||
si_mark_atom_dirty(ctx, &ctx->b.scissors.atom);
|
||||
si_mark_atom_dirty(ctx, &ctx->b.viewports.atom);
|
||||
|
||||
|
@@ -147,7 +147,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||
sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
|
||||
si_context_gfx_flush, sctx);
|
||||
|
||||
if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) {
|
||||
/* SI + AMDGPU + CE = GPU hang */
|
||||
if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib &&
|
||||
sscreen->b.chip_class != SI &&
|
||||
/* These can't use CE due to a power gating bug in the kernel. */
|
||||
sscreen->b.family != CHIP_CARRIZO &&
|
||||
sscreen->b.family != CHIP_STONEY) {
|
||||
sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
|
||||
if (!sctx->ce_ib)
|
||||
goto fail;
|
||||
|
@@ -1667,7 +1667,12 @@ static void declare_system_value(
|
||||
}
|
||||
|
||||
case TGSI_SEMANTIC_VERTICESIN:
|
||||
value = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
|
||||
if (ctx->type == PIPE_SHADER_TESS_CTRL)
|
||||
value = unpack_param(ctx, SI_PARAM_TCS_OUT_LAYOUT, 26, 6);
|
||||
else if (ctx->type == PIPE_SHADER_TESS_EVAL)
|
||||
value = unpack_param(ctx, SI_PARAM_TCS_OFFCHIP_LAYOUT, 9, 7);
|
||||
else
|
||||
assert(!"invalid shader stage for TGSI_SEMANTIC_VERTICESIN");
|
||||
break;
|
||||
|
||||
case TGSI_SEMANTIC_TESSINNER:
|
||||
@@ -4028,7 +4033,7 @@ static void resq_fetch_args(
|
||||
const struct tgsi_full_instruction *inst = emit_data->inst;
|
||||
const struct tgsi_full_src_register *reg = &inst->Src[0];
|
||||
|
||||
emit_data->dst_type = LLVMVectorType(bld_base->base.elem_type, 4);
|
||||
emit_data->dst_type = ctx->v4i32;
|
||||
|
||||
if (reg->Register.File == TGSI_FILE_BUFFER) {
|
||||
emit_data->args[0] = shader_buffer_fetch_rsrc(ctx, reg);
|
||||
@@ -4079,9 +4084,7 @@ static void resq_emit(
|
||||
LLVMValueRef imm6 = lp_build_const_int32(gallivm, 6);
|
||||
|
||||
LLVMValueRef z = LLVMBuildExtractElement(builder, out, imm2, "");
|
||||
z = LLVMBuildBitCast(builder, z, bld_base->uint_bld.elem_type, "");
|
||||
z = LLVMBuildSDiv(builder, z, imm6, "");
|
||||
z = LLVMBuildBitCast(builder, z, bld_base->base.elem_type, "");
|
||||
out = LLVMBuildInsertElement(builder, out, z, imm2, "");
|
||||
}
|
||||
}
|
||||
@@ -5862,6 +5865,9 @@ void si_shader_binary_read_config(struct radeon_shader_binary *binary,
|
||||
conf->scratch_bytes_per_wave =
|
||||
G_00B860_WAVESIZE(value) * 256 * 4 * 1;
|
||||
break;
|
||||
case 0x4:
|
||||
case 0x8:
|
||||
break; /* just spilling stats, not important */
|
||||
default:
|
||||
{
|
||||
static bool printed;
|
||||
@@ -7232,6 +7238,12 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
|
||||
linear_sample[i], base + 10 + i, "");
|
||||
}
|
||||
|
||||
/* Tell LLVM to insert WQM instruction sequence when needed. */
|
||||
if (key->ps_prolog.wqm) {
|
||||
LLVMAddTargetDependentFunctionAttr(func,
|
||||
"amdgpu-ps-wqm-outputs", "");
|
||||
}
|
||||
|
||||
/* Compile. */
|
||||
LLVMBuildRet(gallivm->builder, ret);
|
||||
radeon_llvm_finalize_module(&ctx.radeon_bld);
|
||||
@@ -7382,6 +7394,9 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
|
||||
prolog_key.ps_prolog.colors_read = info->colors_read;
|
||||
prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
|
||||
prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
|
||||
prolog_key.ps_prolog.wqm = info->uses_derivatives &&
|
||||
(prolog_key.ps_prolog.colors_read ||
|
||||
prolog_key.ps_prolog.states.force_persample_interp);
|
||||
|
||||
if (info->colors_read) {
|
||||
unsigned *color = shader->selector->color_attr_index;
|
||||
|
@@ -355,6 +355,7 @@ union si_shader_part_key {
|
||||
unsigned colors_read:8; /* color input components read */
|
||||
unsigned num_interp_inputs:5; /* BCOLOR is at this location */
|
||||
unsigned face_vgpr_index:5;
|
||||
unsigned wqm:1;
|
||||
char color_attr_index[2];
|
||||
char color_interp_vgpr_index[2]; /* -1 == constant */
|
||||
} ps_prolog;
|
||||
|
@@ -461,16 +461,19 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
||||
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED);
|
||||
|
||||
/* Only set dual source blending for MRT0 to avoid a hang. */
|
||||
if (i >= 1 && blend->dual_src_blend)
|
||||
continue;
|
||||
if (i >= 1 && blend->dual_src_blend) {
|
||||
/* Vulkan does this for dual source blending. */
|
||||
if (i == 1)
|
||||
blend_cntl |= S_028780_ENABLE(1);
|
||||
|
||||
if (!state->rt[j].colormask)
|
||||
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* cb_render_state will disable unused ones */
|
||||
blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i);
|
||||
|
||||
if (!state->rt[j].blend_enable) {
|
||||
if (!state->rt[j].colormask || !state->rt[j].blend_enable) {
|
||||
si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl);
|
||||
continue;
|
||||
}
|
||||
@@ -551,6 +554,17 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
||||
}
|
||||
|
||||
if (sctx->b.family == CHIP_STONEY) {
|
||||
/* Disable RB+ blend optimizations for dual source blending.
|
||||
* Vulkan does this.
|
||||
*/
|
||||
if (blend->dual_src_blend) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
sx_mrt_blend_opt[i] =
|
||||
S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) |
|
||||
S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < 8; i++)
|
||||
si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4,
|
||||
sx_mrt_blend_opt[i]);
|
||||
@@ -728,6 +742,7 @@ static void *si_create_rs_state(struct pipe_context *ctx,
|
||||
}
|
||||
|
||||
rs->scissor_enable = state->scissor;
|
||||
rs->clip_halfz = state->clip_halfz;
|
||||
rs->two_side = state->light_twoside;
|
||||
rs->multisample_enable = state->multisample;
|
||||
rs->force_persample_interp = state->force_persample_interp;
|
||||
@@ -857,7 +872,7 @@ static void si_bind_rs_state(struct pipe_context *ctx, void *state)
|
||||
si_mark_atom_dirty(sctx, &sctx->msaa_sample_locs.atom);
|
||||
}
|
||||
|
||||
r600_set_scissor_enable(&sctx->b, rs->scissor_enable);
|
||||
r600_viewport_set_rast_deps(&sctx->b, rs->scissor_enable, rs->clip_halfz);
|
||||
|
||||
si_pm4_bind_state(sctx, rasterizer, rs);
|
||||
si_update_poly_offset_state(sctx);
|
||||
@@ -3427,6 +3442,11 @@ void si_init_state_functions(struct si_context *sctx)
|
||||
si_init_config(sctx);
|
||||
}
|
||||
|
||||
static uint32_t si_get_bo_metadata_word1(struct r600_common_screen *rscreen)
|
||||
{
|
||||
return (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
|
||||
}
|
||||
|
||||
static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
|
||||
struct r600_texture *rtex,
|
||||
struct radeon_bo_metadata *md)
|
||||
@@ -3461,7 +3481,7 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
|
||||
md->metadata[0] = 1; /* metadata image format version 1 */
|
||||
|
||||
/* TILE_MODE_INDEX is ambiguous without a PCI ID. */
|
||||
md->metadata[1] = (ATI_VENDOR_ID << 16) | rscreen->info.pci_id;
|
||||
md->metadata[1] = si_get_bo_metadata_word1(rscreen);
|
||||
|
||||
si_make_texture_descriptor(sscreen, rtex, true,
|
||||
res->target, res->format,
|
||||
@@ -3485,9 +3505,37 @@ static void si_query_opaque_metadata(struct r600_common_screen *rscreen,
|
||||
md->size_metadata = (11 + res->last_level) * 4;
|
||||
}
|
||||
|
||||
static void si_apply_opaque_metadata(struct r600_common_screen *rscreen,
|
||||
struct r600_texture *rtex,
|
||||
struct radeon_bo_metadata *md)
|
||||
{
|
||||
uint32_t *desc = &md->metadata[2];
|
||||
|
||||
if (rscreen->chip_class < VI)
|
||||
return;
|
||||
|
||||
/* Return if DCC is enabled. The texture should be set up with it
|
||||
* already.
|
||||
*/
|
||||
if (md->size_metadata >= 11 * 4 &&
|
||||
md->metadata[0] != 0 &&
|
||||
md->metadata[1] == si_get_bo_metadata_word1(rscreen) &&
|
||||
G_008F28_COMPRESSION_EN(desc[6])) {
|
||||
assert(rtex->dcc_offset == ((uint64_t)desc[7] << 8));
|
||||
return;
|
||||
}
|
||||
|
||||
/* Disable DCC. These are always set by texture_from_handle and must
|
||||
* be cleared here.
|
||||
*/
|
||||
rtex->dcc_offset = 0;
|
||||
rtex->cb_color_info &= ~VI_S_028C70_DCC_ENABLE(1);
|
||||
}
|
||||
|
||||
void si_init_screen_state_functions(struct si_screen *sscreen)
|
||||
{
|
||||
sscreen->b.query_opaque_metadata = si_query_opaque_metadata;
|
||||
sscreen->b.apply_opaque_metadata = si_apply_opaque_metadata;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -78,6 +78,7 @@ struct si_state_rasterizer {
|
||||
bool clamp_fragment_color;
|
||||
bool rasterizer_discard;
|
||||
bool scissor_enable;
|
||||
bool clip_halfz;
|
||||
};
|
||||
|
||||
struct si_dsa_stencil_ref_part {
|
||||
|
@@ -35,11 +35,13 @@
|
||||
#include "JitManager.h"
|
||||
#include "fetch_jit.h"
|
||||
|
||||
#pragma push_macro("DEBUG")
|
||||
#undef DEBUG
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include "llvm/ADT/Triple.h"
|
||||
#endif
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
|
||||
#include "llvm/Support/MemoryBuffer.h"
|
||||
#include "llvm/Support/SourceMgr.h"
|
||||
@@ -53,6 +55,8 @@
|
||||
#include "llvm/ExecutionEngine/JITEventListener.h"
|
||||
#endif
|
||||
|
||||
#pragma pop_macro("DEBUG")
|
||||
|
||||
#include "core/state.h"
|
||||
|
||||
#include "state_llvm.h"
|
||||
@@ -237,6 +241,13 @@ bool JitManager::SetupModuleFromIR(const uint8_t *pIR)
|
||||
return false;
|
||||
}
|
||||
|
||||
#if HAVE_LLVM == 0x307
|
||||
// llvm-3.7 has mismatched setDataLyout/getDataLayout APIs
|
||||
newModule->setDataLayout(*mpExec->getDataLayout());
|
||||
#else
|
||||
newModule->setDataLayout(mpExec->getDataLayout());
|
||||
#endif
|
||||
|
||||
mpCurrentModule = newModule.get();
|
||||
#if defined(_WIN32)
|
||||
// Needed for MCJIT on windows
|
||||
@@ -251,7 +262,6 @@ bool JitManager::SetupModuleFromIR(const uint8_t *pIR)
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Dump function x86 assembly to file.
|
||||
/// @note This should only be called after the module has been jitted to x86 and the
|
||||
|
@@ -54,7 +54,7 @@
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_LLVM
|
||||
#define HAVE_LLVM (LLVM_VERSION_MAJOR << 8) || LLVM_VERSION_MINOR
|
||||
#define HAVE_LLVM ((LLVM_VERSION_MAJOR << 8) | LLVM_VERSION_MINOR)
|
||||
#endif
|
||||
|
||||
#include "llvm/IR/Verifier.h"
|
||||
@@ -66,8 +66,12 @@
|
||||
|
||||
#if HAVE_LLVM == 0x306
|
||||
#include "llvm/PassManager.h"
|
||||
using FunctionPassManager = llvm::FunctionPassManager;
|
||||
using PassManager = llvm::PassManager;
|
||||
#else
|
||||
#include "llvm/IR/LegacyPassManager.h"
|
||||
using FunctionPassManager = llvm::legacy::FunctionPassManager;
|
||||
using PassManager = llvm::legacy::PassManager;
|
||||
#endif
|
||||
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
@@ -77,6 +81,7 @@
|
||||
#include "llvm/Transforms/IPO.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Support/Host.h"
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
|
||||
|
||||
#pragma pop_macro("DEBUG")
|
||||
|
@@ -31,7 +31,6 @@
|
||||
#include "blend_jit.h"
|
||||
#include "builder.h"
|
||||
#include "state_llvm.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
|
||||
#include <sstream>
|
||||
|
||||
@@ -725,12 +724,7 @@ struct BlendJit : public Builder
|
||||
|
||||
JitManager::DumpToFile(blendFunc, "");
|
||||
|
||||
#if HAVE_LLVM == 0x306
|
||||
FunctionPassManager
|
||||
#else
|
||||
llvm::legacy::FunctionPassManager
|
||||
#endif
|
||||
passes(JM()->mpCurrentModule);
|
||||
::FunctionPassManager passes(JM()->mpCurrentModule);
|
||||
|
||||
passes.add(createBreakCriticalEdgesPass());
|
||||
passes.add(createCFGSimplificationPass());
|
||||
|
@@ -30,8 +30,6 @@
|
||||
#include "builder.h"
|
||||
#include "common/rdtsc_buckets.h"
|
||||
|
||||
#include "llvm/Support/DynamicLibrary.h"
|
||||
|
||||
void __cdecl CallPrint(const char* fmt, ...);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -322,6 +320,32 @@ CallInst *Builder::CALL(Value *Callee, const std::initializer_list<Value*> &args
|
||||
return CALLA(Callee, args);
|
||||
}
|
||||
|
||||
#if HAVE_LLVM > 0x306
|
||||
CallInst *Builder::CALL(Value *Callee, Value* arg)
|
||||
{
|
||||
std::vector<Value*> args;
|
||||
args.push_back(arg);
|
||||
return CALLA(Callee, args);
|
||||
}
|
||||
|
||||
CallInst *Builder::CALL2(Value *Callee, Value* arg1, Value* arg2)
|
||||
{
|
||||
std::vector<Value*> args;
|
||||
args.push_back(arg1);
|
||||
args.push_back(arg2);
|
||||
return CALLA(Callee, args);
|
||||
}
|
||||
|
||||
CallInst *Builder::CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3)
|
||||
{
|
||||
std::vector<Value*> args;
|
||||
args.push_back(arg1);
|
||||
args.push_back(arg2);
|
||||
args.push_back(arg3);
|
||||
return CALLA(Callee, args);
|
||||
}
|
||||
#endif
|
||||
|
||||
Value *Builder::VRCP(Value *va)
|
||||
{
|
||||
return FDIV(VIMMED1(1.0f), va); // 1 / a
|
||||
@@ -676,20 +700,22 @@ Value *Builder::PSHUFB(Value* a, Value* b)
|
||||
/// lower 8 values are used.
|
||||
Value *Builder::PMOVSXBD(Value* a)
|
||||
{
|
||||
Value* res;
|
||||
// llvm-3.9 removed the pmovsxbd intrinsic
|
||||
#if HAVE_LLVM < 0x309
|
||||
// use avx2 byte sign extend instruction if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
res = VPMOVSXBD(a);
|
||||
Function *pmovsxbd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxbd);
|
||||
return CALL(pmovsxbd, std::initializer_list<Value*>{a});
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
// VPMOVSXBD output type
|
||||
Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
|
||||
// Extract 8 values from 128bit lane and sign extend
|
||||
res = S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
|
||||
return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -698,20 +724,22 @@ Value *Builder::PMOVSXBD(Value* a)
|
||||
/// @param a - 128bit SIMD lane(8x16bit) of 16bit integer values.
|
||||
Value *Builder::PMOVSXWD(Value* a)
|
||||
{
|
||||
Value* res;
|
||||
// llvm-3.9 removed the pmovsxwd intrinsic
|
||||
#if HAVE_LLVM < 0x309
|
||||
// use avx2 word sign extend if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
res = VPMOVSXWD(a);
|
||||
Function *pmovsxwd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmovsxwd);
|
||||
return CALL(pmovsxwd, std::initializer_list<Value*>{a});
|
||||
}
|
||||
else
|
||||
#endif
|
||||
{
|
||||
// VPMOVSXWD output type
|
||||
Type* v8x32Ty = VectorType::get(mInt32Ty, 8);
|
||||
// Extract 8 values from 128bit lane and sign extend
|
||||
res = S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
|
||||
return S_EXT(VSHUFFLE(a, a, C<int>({0, 1, 2, 3, 4, 5, 6, 7})), v8x32Ty);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
@@ -726,8 +754,7 @@ Value *Builder::PERMD(Value* a, Value* idx)
|
||||
// use avx2 permute instruction if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
// llvm 3.6.0 swapped the order of the args to vpermd
|
||||
res = VPERMD(idx, a);
|
||||
res = VPERMD(a, idx);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -852,9 +879,15 @@ Value *Builder::CVTPS2PH(Value* a, Value* rounding)
|
||||
|
||||
Value *Builder::PMAXSD(Value* a, Value* b)
|
||||
{
|
||||
// llvm-3.9 removed the pmax intrinsics
|
||||
#if HAVE_LLVM >= 0x309
|
||||
Value* cmp = ICMP_SGT(a, b);
|
||||
return SELECT(cmp, a, b);
|
||||
#else
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
return VPMAXSD(a, b);
|
||||
Function* pmaxsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmaxs_d);
|
||||
return CALL(pmaxsd, {a, b});
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -877,13 +910,20 @@ Value *Builder::PMAXSD(Value* a, Value* b)
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
Value *Builder::PMINSD(Value* a, Value* b)
|
||||
{
|
||||
// llvm-3.9 removed the pmin intrinsics
|
||||
#if HAVE_LLVM >= 0x309
|
||||
Value* cmp = ICMP_SLT(a, b);
|
||||
return SELECT(cmp, a, b);
|
||||
#else
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
return VPMINSD(a, b);
|
||||
Function* pminsd = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_pmins_d);
|
||||
return CALL(pminsd, {a, b});
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -906,6 +946,7 @@ Value *Builder::PMINSD(Value* a, Value* b)
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
|
||||
|
@@ -72,6 +72,12 @@ int32_t S_IMMED(Value* i);
|
||||
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
CallInst *CALL(Value *Callee, const std::initializer_list<Value*> &args);
|
||||
#if HAVE_LLVM > 0x306
|
||||
CallInst *CALL(Value *Callee) { return CALLA(Callee); }
|
||||
CallInst *CALL(Value *Callee, Value* arg);
|
||||
CallInst *CALL2(Value *Callee, Value* arg1, Value* arg2);
|
||||
CallInst *CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3);
|
||||
#endif
|
||||
|
||||
LoadInst *LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name = "");
|
||||
LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
|
||||
|
@@ -31,7 +31,6 @@
|
||||
#include "fetch_jit.h"
|
||||
#include "builder.h"
|
||||
#include "state_llvm.h"
|
||||
#include "llvm/IR/DataLayout.h"
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
|
||||
@@ -181,12 +180,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
||||
|
||||
verifyFunction(*fetch);
|
||||
|
||||
#if HAVE_LLVM == 0x306
|
||||
FunctionPassManager
|
||||
#else
|
||||
llvm::legacy::FunctionPassManager
|
||||
#endif
|
||||
setupPasses(JM()->mpCurrentModule);
|
||||
::FunctionPassManager setupPasses(JM()->mpCurrentModule);
|
||||
|
||||
///@todo We don't need the CFG passes for fetch. (e.g. BreakCriticalEdges and CFGSimplification)
|
||||
setupPasses.add(createBreakCriticalEdgesPass());
|
||||
@@ -198,12 +192,7 @@ Function* FetchJit::Create(const FETCH_COMPILE_STATE& fetchState)
|
||||
|
||||
JitManager::DumpToFile(fetch, "se");
|
||||
|
||||
#if HAVE_LLVM == 0x306
|
||||
FunctionPassManager
|
||||
#else
|
||||
llvm::legacy::FunctionPassManager
|
||||
#endif
|
||||
optPasses(JM()->mpCurrentModule);
|
||||
::FunctionPassManager optPasses(JM()->mpCurrentModule);
|
||||
|
||||
///@todo Haven't touched these either. Need to remove some of these and add others.
|
||||
optPasses.add(createCFGSimplificationPass());
|
||||
|
@@ -91,8 +91,6 @@ intrinsics = [
|
||||
["VRCPPS", "x86_avx_rcp_ps_256", ["a"]],
|
||||
["VMINPS", "x86_avx_min_ps_256", ["a", "b"]],
|
||||
["VMAXPS", "x86_avx_max_ps_256", ["a", "b"]],
|
||||
["VPMINSD", "x86_avx2_pmins_d", ["a", "b"]],
|
||||
["VPMAXSD", "x86_avx2_pmaxs_d", ["a", "b"]],
|
||||
["VROUND", "x86_avx_round_ps_256", ["a", "rounding"]],
|
||||
["VCMPPS", "x86_avx_cmp_ps_256", ["a", "b", "cmpop"]],
|
||||
["VBLENDVPS", "x86_avx_blendv_ps_256", ["a", "b", "mask"]],
|
||||
@@ -100,9 +98,7 @@ intrinsics = [
|
||||
["VMASKLOADD", "x86_avx2_maskload_d_256", ["src", "mask"]],
|
||||
["VMASKMOVPS", "x86_avx_maskload_ps_256", ["src", "mask"]],
|
||||
["VPSHUFB", "x86_avx2_pshuf_b", ["a", "b"]],
|
||||
["VPMOVSXBD", "x86_avx2_pmovsxbd", ["a"]], # sign extend packed 8bit components
|
||||
["VPMOVSXWD", "x86_avx2_pmovsxwd", ["a"]], # sign extend packed 16bit components
|
||||
["VPERMD", "x86_avx2_permd", ["idx", "a"]],
|
||||
["VPERMD", "x86_avx2_permd", ["a", "idx"]],
|
||||
["VPERMPS", "x86_avx2_permps", ["idx", "a"]],
|
||||
["VCVTPH2PS", "x86_vcvtph2ps_256", ["a"]],
|
||||
["VCVTPS2PH", "x86_vcvtps2ph_256", ["a", "round"]],
|
||||
@@ -110,7 +106,6 @@ intrinsics = [
|
||||
["VPTESTC", "x86_avx_ptestc_256", ["a", "b"]],
|
||||
["VPTESTZ", "x86_avx_ptestz_256", ["a", "b"]],
|
||||
["VFMADDPS", "x86_fma_vfmadd_ps_256", ["a", "b", "c"]],
|
||||
["VCVTTPS2DQ", "x86_avx_cvtt_ps2dq_256", ["a"]],
|
||||
["VMOVMSKPS", "x86_avx_movmsk_ps_256", ["a"]],
|
||||
["INTERRUPT", "x86_int", ["a"]],
|
||||
]
|
||||
@@ -352,7 +347,29 @@ def generate_x86_cpp(output_file):
|
||||
'Value *Builder::%s(%s)' % (inst[0], args),
|
||||
'{',
|
||||
' Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::%s);' % inst[1],
|
||||
]
|
||||
if inst[0] == "VPERMD":
|
||||
rev_args = ''
|
||||
first = True
|
||||
for arg in reversed(inst[2]):
|
||||
if not first:
|
||||
rev_args += ', '
|
||||
rev_args += arg
|
||||
first = False
|
||||
|
||||
output_lines += [
|
||||
'#if (HAVE_LLVM == 0x306) && (LLVM_VERSION_PATCH == 0)',
|
||||
' return CALL(func, std::initializer_list<Value*>{%s});' % rev_args,
|
||||
'#else',
|
||||
]
|
||||
output_lines += [
|
||||
' return CALL(func, std::initializer_list<Value*>{%s});' % pass_args,
|
||||
]
|
||||
if inst[0] == "VPERMD":
|
||||
output_lines += [
|
||||
'#endif',
|
||||
]
|
||||
output_lines += [
|
||||
'}',
|
||||
'',
|
||||
]
|
||||
|
@@ -292,12 +292,7 @@ struct StreamOutJit : public Builder
|
||||
|
||||
JitManager::DumpToFile(soFunc, "SoFunc");
|
||||
|
||||
#if HAVE_LLVM == 0x306
|
||||
FunctionPassManager
|
||||
#else
|
||||
llvm::legacy::FunctionPassManager
|
||||
#endif
|
||||
passes(JM()->mpCurrentModule);
|
||||
::FunctionPassManager passes(JM()->mpCurrentModule);
|
||||
|
||||
passes.add(createBreakCriticalEdgesPass());
|
||||
passes.add(createCFGSimplificationPass());
|
||||
|
@@ -209,18 +209,24 @@ dri3_bind_context(struct glx_context *context, struct glx_context *old,
|
||||
struct dri3_context *pcp = (struct dri3_context *) context;
|
||||
struct dri3_screen *psc = (struct dri3_screen *) pcp->base.psc;
|
||||
struct dri3_drawable *pdraw, *pread;
|
||||
__DRIdrawable *dri_draw = NULL, *dri_read = NULL;
|
||||
|
||||
pdraw = (struct dri3_drawable *) driFetchDrawable(context, draw);
|
||||
pread = (struct dri3_drawable *) driFetchDrawable(context, read);
|
||||
|
||||
driReleaseDrawables(&pcp->base);
|
||||
|
||||
if (pdraw == NULL || pread == NULL)
|
||||
if (pdraw)
|
||||
dri_draw = pdraw->loader_drawable.dri_drawable;
|
||||
else if (draw != None)
|
||||
return GLXBadDrawable;
|
||||
|
||||
if (!(*psc->core->bindContext) (pcp->driContext,
|
||||
pdraw->loader_drawable.dri_drawable,
|
||||
pread->loader_drawable.dri_drawable))
|
||||
if (pread)
|
||||
dri_read = pread->loader_drawable.dri_drawable;
|
||||
else if (read != None)
|
||||
return GLXBadDrawable;
|
||||
|
||||
if (!(*psc->core->bindContext) (pcp->driContext, dri_draw, dri_read))
|
||||
return GLXBadContext;
|
||||
|
||||
return Success;
|
||||
|
@@ -17,16 +17,19 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
|
||||
#define __ATTRIB(field) \
|
||||
[DI_##field] = "glX"#field
|
||||
|
||||
__ATTRIB(BindSwapBarrierSGIX),
|
||||
__ATTRIB(BindTexImageEXT),
|
||||
// glXChooseFBConfig implemented by libglvnd
|
||||
__ATTRIB(ChooseFBConfigSGIX),
|
||||
// glXChooseVisual implemented by libglvnd
|
||||
// glXCopyContext implemented by libglvnd
|
||||
__ATTRIB(CopySubBufferMESA),
|
||||
// glXCreateContext implemented by libglvnd
|
||||
__ATTRIB(CreateContextAttribsARB),
|
||||
__ATTRIB(CreateContextWithConfigSGIX),
|
||||
__ATTRIB(CreateGLXPbufferSGIX),
|
||||
// glXCreateGLXPixmap implemented by libglvnd
|
||||
__ATTRIB(CreateGLXPixmapMESA),
|
||||
__ATTRIB(CreateGLXPixmapWithConfigSGIX),
|
||||
// glXCreateNewContext implemented by libglvnd
|
||||
// glXCreatePbuffer implemented by libglvnd
|
||||
@@ -51,54 +54,50 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
|
||||
__ATTRIB(GetFBConfigAttribSGIX),
|
||||
__ATTRIB(GetFBConfigFromVisualSGIX),
|
||||
// glXGetFBConfigs implemented by libglvnd
|
||||
__ATTRIB(GetMscRateOML),
|
||||
// glXGetProcAddress implemented by libglvnd
|
||||
// glXGetProcAddressARB implemented by libglvnd
|
||||
__ATTRIB(GetScreenDriver),
|
||||
// glXGetSelectedEvent implemented by libglvnd
|
||||
__ATTRIB(GetSelectedEventSGIX),
|
||||
__ATTRIB(GetSwapIntervalMESA),
|
||||
__ATTRIB(GetSyncValuesOML),
|
||||
__ATTRIB(GetVideoSyncSGI),
|
||||
// glXGetVisualFromFBConfig implemented by libglvnd
|
||||
__ATTRIB(GetVisualFromFBConfigSGIX),
|
||||
// glXImportContextEXT implemented by libglvnd
|
||||
// glXIsDirect implemented by libglvnd
|
||||
__ATTRIB(JoinSwapGroupSGIX),
|
||||
// glXMakeContextCurrent implemented by libglvnd
|
||||
// glXMakeCurrent implemented by libglvnd
|
||||
// glXQueryContext implemented by libglvnd
|
||||
__ATTRIB(QueryContextInfoEXT),
|
||||
__ATTRIB(QueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(QueryCurrentRendererStringMESA),
|
||||
// glXQueryDrawable implemented by libglvnd
|
||||
// glXQueryExtension implemented by libglvnd
|
||||
// glXQueryExtensionsString implemented by libglvnd
|
||||
__ATTRIB(QueryGLXPbufferSGIX),
|
||||
__ATTRIB(QueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(QueryRendererIntegerMESA),
|
||||
__ATTRIB(QueryRendererStringMESA),
|
||||
// glXQueryServerString implemented by libglvnd
|
||||
// glXQueryVersion implemented by libglvnd
|
||||
__ATTRIB(ReleaseBuffersMESA),
|
||||
__ATTRIB(ReleaseTexImageEXT),
|
||||
// glXSelectEvent implemented by libglvnd
|
||||
__ATTRIB(SelectEventSGIX),
|
||||
// glXSwapBuffers implemented by libglvnd
|
||||
__ATTRIB(SwapBuffersMscOML),
|
||||
__ATTRIB(SwapIntervalMESA),
|
||||
__ATTRIB(SwapIntervalSGI),
|
||||
// glXUseXFont implemented by libglvnd
|
||||
__ATTRIB(WaitForMscOML),
|
||||
__ATTRIB(WaitForSbcOML),
|
||||
// glXWaitGL implemented by libglvnd
|
||||
__ATTRIB(WaitVideoSyncSGI),
|
||||
// glXWaitX implemented by libglvnd
|
||||
|
||||
__ATTRIB(glXBindSwapBarrierSGIX),
|
||||
__ATTRIB(glXCopySubBufferMESA),
|
||||
__ATTRIB(glXCreateGLXPixmapMESA),
|
||||
__ATTRIB(glXGetMscRateOML),
|
||||
__ATTRIB(glXGetScreenDriver),
|
||||
__ATTRIB(glXGetSwapIntervalMESA),
|
||||
__ATTRIB(glXGetSyncValuesOML),
|
||||
__ATTRIB(glXJoinSwapGroupSGIX),
|
||||
__ATTRIB(glXQueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryCurrentRendererStringMESA),
|
||||
__ATTRIB(glXQueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(glXQueryRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryRendererStringMESA),
|
||||
__ATTRIB(glXReleaseBuffersMESA),
|
||||
__ATTRIB(glXSwapBuffersMscOML),
|
||||
__ATTRIB(glXSwapIntervalMESA),
|
||||
__ATTRIB(glXWaitForMscOML),
|
||||
__ATTRIB(glXWaitForSbcOML),
|
||||
|
||||
#undef __ATTRIB
|
||||
};
|
||||
|
||||
@@ -557,49 +556,49 @@ static int dispatch_WaitVideoSyncSGI(int divisor, int remainder,
|
||||
|
||||
|
||||
|
||||
static void dispatch_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable,
|
||||
static void dispatch_BindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable,
|
||||
int barrier)
|
||||
{
|
||||
PFNGLXBINDSWAPBARRIERSGIXPROC pglXBindSwapBarrierSGIX;
|
||||
PFNGLXBINDSWAPBARRIERSGIXPROC pBindSwapBarrierSGIX;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXBindSwapBarrierSGIX);
|
||||
if (pglXBindSwapBarrierSGIX == NULL)
|
||||
__FETCH_FUNCTION_PTR(BindSwapBarrierSGIX);
|
||||
if (pBindSwapBarrierSGIX == NULL)
|
||||
return;
|
||||
|
||||
(*pglXBindSwapBarrierSGIX)(dpy, drawable, barrier);
|
||||
(*pBindSwapBarrierSGIX)(dpy, drawable, barrier);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void dispatch_glXCopySubBufferMESA(Display *dpy, GLXDrawable drawable,
|
||||
static void dispatch_CopySubBufferMESA(Display *dpy, GLXDrawable drawable,
|
||||
int x, int y, int width, int height)
|
||||
{
|
||||
PFNGLXCOPYSUBBUFFERMESAPROC pglXCopySubBufferMESA;
|
||||
PFNGLXCOPYSUBBUFFERMESAPROC pCopySubBufferMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXCopySubBufferMESA);
|
||||
if (pglXCopySubBufferMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(CopySubBufferMESA);
|
||||
if (pCopySubBufferMESA == NULL)
|
||||
return;
|
||||
|
||||
(*pglXCopySubBufferMESA)(dpy, drawable, x, y, width, height);
|
||||
(*pCopySubBufferMESA)(dpy, drawable, x, y, width, height);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy,
|
||||
static GLXPixmap dispatch_CreateGLXPixmapMESA(Display *dpy,
|
||||
XVisualInfo *visinfo,
|
||||
Pixmap pixmap, Colormap cmap)
|
||||
{
|
||||
PFNGLXCREATEGLXPIXMAPMESAPROC pglXCreateGLXPixmapMESA;
|
||||
PFNGLXCREATEGLXPIXMAPMESAPROC pCreateGLXPixmapMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
GLXPixmap ret;
|
||||
|
||||
@@ -607,11 +606,11 @@ static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy,
|
||||
if (dd == NULL)
|
||||
return None;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXCreateGLXPixmapMESA);
|
||||
if (pglXCreateGLXPixmapMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(CreateGLXPixmapMESA);
|
||||
if (pCreateGLXPixmapMESA == NULL)
|
||||
return None;
|
||||
|
||||
ret = (*pglXCreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap);
|
||||
ret = (*pCreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap);
|
||||
if (AddDrawableMapping(dpy, ret, dd)) {
|
||||
/* XXX: Call glXDestroyGLXPixmap which lives in libglvnd. If we're not
|
||||
* allowed to call it from here, should we extend __glXDispatchTableIndices ?
|
||||
@@ -624,47 +623,47 @@ static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy,
|
||||
|
||||
|
||||
|
||||
static GLboolean dispatch_glXGetMscRateOML(Display *dpy, GLXDrawable drawable,
|
||||
static GLboolean dispatch_GetMscRateOML(Display *dpy, GLXDrawable drawable,
|
||||
int32_t *numerator, int32_t *denominator)
|
||||
{
|
||||
PFNGLXGETMSCRATEOMLPROC pglXGetMscRateOML;
|
||||
PFNGLXGETMSCRATEOMLPROC pGetMscRateOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return GL_FALSE;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetMscRateOML);
|
||||
if (pglXGetMscRateOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetMscRateOML);
|
||||
if (pGetMscRateOML == NULL)
|
||||
return GL_FALSE;
|
||||
|
||||
return (*pglXGetMscRateOML)(dpy, drawable, numerator, denominator);
|
||||
return (*pGetMscRateOML)(dpy, drawable, numerator, denominator);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const char *dispatch_glXGetScreenDriver(Display *dpy, int scrNum)
|
||||
static const char *dispatch_GetScreenDriver(Display *dpy, int scrNum)
|
||||
{
|
||||
typedef const char *(*fn_glXGetScreenDriver_ptr)(Display *dpy, int scrNum);
|
||||
fn_glXGetScreenDriver_ptr pglXGetScreenDriver;
|
||||
fn_glXGetScreenDriver_ptr pGetScreenDriver;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, scrNum);
|
||||
if (dd == NULL)
|
||||
return NULL;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetScreenDriver);
|
||||
if (pglXGetScreenDriver == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetScreenDriver);
|
||||
if (pGetScreenDriver == NULL)
|
||||
return NULL;
|
||||
|
||||
return (*pglXGetScreenDriver)(dpy, scrNum);
|
||||
return (*pGetScreenDriver)(dpy, scrNum);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int dispatch_glXGetSwapIntervalMESA(void)
|
||||
static int dispatch_GetSwapIntervalMESA(void)
|
||||
{
|
||||
PFNGLXGETSWAPINTERVALMESAPROC pglXGetSwapIntervalMESA;
|
||||
PFNGLXGETSWAPINTERVALMESAPROC pGetSwapIntervalMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -674,57 +673,57 @@ static int dispatch_glXGetSwapIntervalMESA(void)
|
||||
if (dd == NULL)
|
||||
return 0;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetSwapIntervalMESA);
|
||||
if (pglXGetSwapIntervalMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetSwapIntervalMESA);
|
||||
if (pGetSwapIntervalMESA == NULL)
|
||||
return 0;
|
||||
|
||||
return (*pglXGetSwapIntervalMESA)();
|
||||
return (*pGetSwapIntervalMESA)();
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXGetSyncValuesOML(Display *dpy, GLXDrawable drawable,
|
||||
static Bool dispatch_GetSyncValuesOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t *ust, int64_t *msc, int64_t *sbc)
|
||||
{
|
||||
PFNGLXGETSYNCVALUESOMLPROC pglXGetSyncValuesOML;
|
||||
PFNGLXGETSYNCVALUESOMLPROC pGetSyncValuesOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetSyncValuesOML);
|
||||
if (pglXGetSyncValuesOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetSyncValuesOML);
|
||||
if (pGetSyncValuesOML == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXGetSyncValuesOML)(dpy, drawable, ust, msc, sbc);
|
||||
return (*pGetSyncValuesOML)(dpy, drawable, ust, msc, sbc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void dispatch_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable,
|
||||
static void dispatch_JoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable,
|
||||
GLXDrawable member)
|
||||
{
|
||||
PFNGLXJOINSWAPGROUPSGIXPROC pglXJoinSwapGroupSGIX;
|
||||
PFNGLXJOINSWAPGROUPSGIXPROC pJoinSwapGroupSGIX;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXJoinSwapGroupSGIX);
|
||||
if (pglXJoinSwapGroupSGIX == NULL)
|
||||
__FETCH_FUNCTION_PTR(JoinSwapGroupSGIX);
|
||||
if (pJoinSwapGroupSGIX == NULL)
|
||||
return;
|
||||
|
||||
(*pglXJoinSwapGroupSGIX)(dpy, drawable, member);
|
||||
(*pJoinSwapGroupSGIX)(dpy, drawable, member);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXQueryCurrentRendererIntegerMESA(int attribute,
|
||||
static Bool dispatch_QueryCurrentRendererIntegerMESA(int attribute,
|
||||
unsigned int *value)
|
||||
{
|
||||
PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC pglXQueryCurrentRendererIntegerMESA;
|
||||
PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC pQueryCurrentRendererIntegerMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -734,18 +733,18 @@ static Bool dispatch_glXQueryCurrentRendererIntegerMESA(int attribute,
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryCurrentRendererIntegerMESA);
|
||||
if (pglXQueryCurrentRendererIntegerMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryCurrentRendererIntegerMESA);
|
||||
if (pQueryCurrentRendererIntegerMESA == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXQueryCurrentRendererIntegerMESA)(attribute, value);
|
||||
return (*pQueryCurrentRendererIntegerMESA)(attribute, value);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const char *dispatch_glXQueryCurrentRendererStringMESA(int attribute)
|
||||
static const char *dispatch_QueryCurrentRendererStringMESA(int attribute)
|
||||
{
|
||||
PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC pglXQueryCurrentRendererStringMESA;
|
||||
PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC pQueryCurrentRendererStringMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -755,114 +754,114 @@ static const char *dispatch_glXQueryCurrentRendererStringMESA(int attribute)
|
||||
if (dd == NULL)
|
||||
return NULL;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryCurrentRendererStringMESA);
|
||||
if (pglXQueryCurrentRendererStringMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryCurrentRendererStringMESA);
|
||||
if (pQueryCurrentRendererStringMESA == NULL)
|
||||
return NULL;
|
||||
|
||||
return (*pglXQueryCurrentRendererStringMESA)(attribute);
|
||||
return (*pQueryCurrentRendererStringMESA)(attribute);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen,
|
||||
static Bool dispatch_QueryMaxSwapBarriersSGIX(Display *dpy, int screen,
|
||||
int *max)
|
||||
{
|
||||
PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC pglXQueryMaxSwapBarriersSGIX;
|
||||
PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC pQueryMaxSwapBarriersSGIX;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, screen);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryMaxSwapBarriersSGIX);
|
||||
if (pglXQueryMaxSwapBarriersSGIX == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryMaxSwapBarriersSGIX);
|
||||
if (pQueryMaxSwapBarriersSGIX == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXQueryMaxSwapBarriersSGIX)(dpy, screen, max);
|
||||
return (*pQueryMaxSwapBarriersSGIX)(dpy, screen, max);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXQueryRendererIntegerMESA(Display *dpy, int screen,
|
||||
static Bool dispatch_QueryRendererIntegerMESA(Display *dpy, int screen,
|
||||
int renderer, int attribute,
|
||||
unsigned int *value)
|
||||
{
|
||||
PFNGLXQUERYRENDERERINTEGERMESAPROC pglXQueryRendererIntegerMESA;
|
||||
PFNGLXQUERYRENDERERINTEGERMESAPROC pQueryRendererIntegerMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, screen);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryRendererIntegerMESA);
|
||||
if (pglXQueryRendererIntegerMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryRendererIntegerMESA);
|
||||
if (pQueryRendererIntegerMESA == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXQueryRendererIntegerMESA)(dpy, screen, renderer, attribute, value);
|
||||
return (*pQueryRendererIntegerMESA)(dpy, screen, renderer, attribute, value);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const char *dispatch_glXQueryRendererStringMESA(Display *dpy, int screen,
|
||||
static const char *dispatch_QueryRendererStringMESA(Display *dpy, int screen,
|
||||
int renderer, int attribute)
|
||||
{
|
||||
PFNGLXQUERYRENDERERSTRINGMESAPROC pglXQueryRendererStringMESA;
|
||||
PFNGLXQUERYRENDERERSTRINGMESAPROC pQueryRendererStringMESA;
|
||||
__GLXvendorInfo *dd = NULL;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, screen);
|
||||
if (dd == NULL)
|
||||
return NULL;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryRendererStringMESA);
|
||||
if (pglXQueryRendererStringMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryRendererStringMESA);
|
||||
if (pQueryRendererStringMESA == NULL)
|
||||
return NULL;
|
||||
|
||||
return (*pglXQueryRendererStringMESA)(dpy, screen, renderer, attribute);
|
||||
return (*pQueryRendererStringMESA)(dpy, screen, renderer, attribute);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXReleaseBuffersMESA(Display *dpy, GLXDrawable d)
|
||||
static Bool dispatch_ReleaseBuffersMESA(Display *dpy, GLXDrawable d)
|
||||
{
|
||||
PFNGLXRELEASEBUFFERSMESAPROC pglXReleaseBuffersMESA;
|
||||
PFNGLXRELEASEBUFFERSMESAPROC pReleaseBuffersMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, d);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXReleaseBuffersMESA);
|
||||
if (pglXReleaseBuffersMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(ReleaseBuffersMESA);
|
||||
if (pReleaseBuffersMESA == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXReleaseBuffersMESA)(dpy, d);
|
||||
return (*pReleaseBuffersMESA)(dpy, d);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int64_t dispatch_glXSwapBuffersMscOML(Display *dpy, GLXDrawable drawable,
|
||||
static int64_t dispatch_SwapBuffersMscOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t target_msc, int64_t divisor,
|
||||
int64_t remainder)
|
||||
{
|
||||
PFNGLXSWAPBUFFERSMSCOMLPROC pglXSwapBuffersMscOML;
|
||||
PFNGLXSWAPBUFFERSMSCOMLPROC pSwapBuffersMscOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return 0;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXSwapBuffersMscOML);
|
||||
if (pglXSwapBuffersMscOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(SwapBuffersMscOML);
|
||||
if (pSwapBuffersMscOML == NULL)
|
||||
return 0;
|
||||
|
||||
return (*pglXSwapBuffersMscOML)(dpy, drawable, target_msc, divisor, remainder);
|
||||
return (*pSwapBuffersMscOML)(dpy, drawable, target_msc, divisor, remainder);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int dispatch_glXSwapIntervalMESA(unsigned int interval)
|
||||
static int dispatch_SwapIntervalMESA(unsigned int interval)
|
||||
{
|
||||
PFNGLXSWAPINTERVALMESAPROC pglXSwapIntervalMESA;
|
||||
PFNGLXSWAPINTERVALMESAPROC pSwapIntervalMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -872,52 +871,52 @@ static int dispatch_glXSwapIntervalMESA(unsigned int interval)
|
||||
if (dd == NULL)
|
||||
return 0;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXSwapIntervalMESA);
|
||||
if (pglXSwapIntervalMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(SwapIntervalMESA);
|
||||
if (pSwapIntervalMESA == NULL)
|
||||
return 0;
|
||||
|
||||
return (*pglXSwapIntervalMESA)(interval);
|
||||
return (*pSwapIntervalMESA)(interval);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXWaitForMscOML(Display *dpy, GLXDrawable drawable,
|
||||
static Bool dispatch_WaitForMscOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t target_msc, int64_t divisor,
|
||||
int64_t remainder, int64_t *ust,
|
||||
int64_t *msc, int64_t *sbc)
|
||||
{
|
||||
PFNGLXWAITFORMSCOMLPROC pglXWaitForMscOML;
|
||||
PFNGLXWAITFORMSCOMLPROC pWaitForMscOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXWaitForMscOML);
|
||||
if (pglXWaitForMscOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(WaitForMscOML);
|
||||
if (pWaitForMscOML == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXWaitForMscOML)(dpy, drawable, target_msc, divisor, remainder, ust, msc, sbc);
|
||||
return (*pWaitForMscOML)(dpy, drawable, target_msc, divisor, remainder, ust, msc, sbc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXWaitForSbcOML(Display *dpy, GLXDrawable drawable,
|
||||
static Bool dispatch_WaitForSbcOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t target_sbc, int64_t *ust,
|
||||
int64_t *msc, int64_t *sbc)
|
||||
{
|
||||
PFNGLXWAITFORSBCOMLPROC pglXWaitForSbcOML;
|
||||
PFNGLXWAITFORSBCOMLPROC pWaitForSbcOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXWaitForSbcOML);
|
||||
if (pglXWaitForSbcOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(WaitForSbcOML);
|
||||
if (pWaitForSbcOML == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXWaitForSbcOML)(dpy, drawable, target_sbc, ust, msc, sbc);
|
||||
return (*pWaitForSbcOML)(dpy, drawable, target_sbc, ust, msc, sbc);
|
||||
}
|
||||
|
||||
#undef __FETCH_FUNCTION_PTR
|
||||
@@ -928,45 +927,44 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = {
|
||||
#define __ATTRIB(field) \
|
||||
[DI_##field] = (void *)dispatch_##field
|
||||
|
||||
__ATTRIB(BindTexImageEXT),
|
||||
__ATTRIB(BindSwapBarrierSGIX),
|
||||
__ATTRIB(BindTexImageEXT),
|
||||
__ATTRIB(ChooseFBConfigSGIX),
|
||||
__ATTRIB(CopySubBufferMESA),
|
||||
__ATTRIB(CreateContextAttribsARB),
|
||||
__ATTRIB(CreateContextWithConfigSGIX),
|
||||
__ATTRIB(CreateGLXPbufferSGIX),
|
||||
__ATTRIB(CreateGLXPixmapMESA),
|
||||
__ATTRIB(CreateGLXPixmapWithConfigSGIX),
|
||||
__ATTRIB(DestroyGLXPbufferSGIX),
|
||||
__ATTRIB(GetContextIDEXT),
|
||||
__ATTRIB(GetCurrentDisplayEXT),
|
||||
__ATTRIB(GetFBConfigAttribSGIX),
|
||||
__ATTRIB(GetFBConfigFromVisualSGIX),
|
||||
__ATTRIB(GetMscRateOML),
|
||||
__ATTRIB(GetScreenDriver),
|
||||
__ATTRIB(GetSelectedEventSGIX),
|
||||
__ATTRIB(GetSwapIntervalMESA),
|
||||
__ATTRIB(GetSyncValuesOML),
|
||||
__ATTRIB(GetVideoSyncSGI),
|
||||
__ATTRIB(GetVisualFromFBConfigSGIX),
|
||||
__ATTRIB(JoinSwapGroupSGIX),
|
||||
__ATTRIB(QueryContextInfoEXT),
|
||||
__ATTRIB(QueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(QueryCurrentRendererStringMESA),
|
||||
__ATTRIB(QueryGLXPbufferSGIX),
|
||||
__ATTRIB(QueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(QueryRendererIntegerMESA),
|
||||
__ATTRIB(QueryRendererStringMESA),
|
||||
__ATTRIB(ReleaseBuffersMESA),
|
||||
__ATTRIB(ReleaseTexImageEXT),
|
||||
__ATTRIB(SelectEventSGIX),
|
||||
__ATTRIB(SwapBuffersMscOML),
|
||||
__ATTRIB(SwapIntervalMESA),
|
||||
__ATTRIB(SwapIntervalSGI),
|
||||
__ATTRIB(WaitForMscOML),
|
||||
__ATTRIB(WaitForSbcOML),
|
||||
__ATTRIB(WaitVideoSyncSGI),
|
||||
__ATTRIB(glXBindSwapBarrierSGIX),
|
||||
__ATTRIB(glXCopySubBufferMESA),
|
||||
__ATTRIB(glXCreateGLXPixmapMESA),
|
||||
__ATTRIB(glXGetMscRateOML),
|
||||
__ATTRIB(glXGetScreenDriver),
|
||||
__ATTRIB(glXGetSwapIntervalMESA),
|
||||
__ATTRIB(glXGetSyncValuesOML),
|
||||
__ATTRIB(glXJoinSwapGroupSGIX),
|
||||
__ATTRIB(glXQueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryCurrentRendererStringMESA),
|
||||
__ATTRIB(glXQueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(glXQueryRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryRendererStringMESA),
|
||||
__ATTRIB(glXReleaseBuffersMESA),
|
||||
__ATTRIB(glXSwapBuffersMscOML),
|
||||
__ATTRIB(glXSwapIntervalMESA),
|
||||
__ATTRIB(glXWaitForMscOML),
|
||||
__ATTRIB(glXWaitForSbcOML),
|
||||
|
||||
[DI_LAST_INDEX] = NULL,
|
||||
#undef __ATTRIB
|
||||
|
@@ -6,16 +6,19 @@
|
||||
#define __glxlibglvnd_dispatchindex_h__
|
||||
|
||||
typedef enum __GLXdispatchIndex {
|
||||
DI_BindSwapBarrierSGIX,
|
||||
DI_BindTexImageEXT,
|
||||
// ChooseFBConfig implemented by libglvnd
|
||||
DI_ChooseFBConfigSGIX,
|
||||
// ChooseVisual implemented by libglvnd
|
||||
// CopyContext implemented by libglvnd
|
||||
DI_CopySubBufferMESA,
|
||||
// CreateContext implemented by libglvnd
|
||||
DI_CreateContextAttribsARB,
|
||||
DI_CreateContextWithConfigSGIX,
|
||||
DI_CreateGLXPbufferSGIX,
|
||||
// CreateGLXPixmap implemented by libglvnd
|
||||
DI_CreateGLXPixmapMESA,
|
||||
DI_CreateGLXPixmapWithConfigSGIX,
|
||||
// CreateNewContext implemented by libglvnd
|
||||
// CreatePbuffer implemented by libglvnd
|
||||
@@ -40,6 +43,7 @@ typedef enum __GLXdispatchIndex {
|
||||
DI_GetFBConfigAttribSGIX,
|
||||
DI_GetFBConfigFromVisualSGIX,
|
||||
// GetFBConfigs implemented by libglvnd
|
||||
DI_GetMscRateOML,
|
||||
// GetProcAddress implemented by libglvnd
|
||||
// GetProcAddressARB implemented by libglvnd
|
||||
// GetSelectedEvent implemented by libglvnd
|
||||
@@ -47,45 +51,41 @@ typedef enum __GLXdispatchIndex {
|
||||
DI_GetVideoSyncSGI,
|
||||
// GetVisualFromFBConfig implemented by libglvnd
|
||||
DI_GetVisualFromFBConfigSGIX,
|
||||
DI_GetScreenDriver,
|
||||
DI_GetSwapIntervalMESA,
|
||||
DI_GetSyncValuesOML,
|
||||
// ImportContextEXT implemented by libglvnd
|
||||
// IsDirect implemented by libglvnd
|
||||
DI_JoinSwapGroupSGIX,
|
||||
// MakeContextCurrent implemented by libglvnd
|
||||
// MakeCurrent implemented by libglvnd
|
||||
// QueryContext implemented by libglvnd
|
||||
DI_QueryContextInfoEXT,
|
||||
DI_QueryCurrentRendererIntegerMESA,
|
||||
DI_QueryCurrentRendererStringMESA,
|
||||
// QueryDrawable implemented by libglvnd
|
||||
// QueryExtension implemented by libglvnd
|
||||
// QueryExtensionsString implemented by libglvnd
|
||||
DI_QueryGLXPbufferSGIX,
|
||||
DI_QueryMaxSwapBarriersSGIX,
|
||||
DI_QueryRendererIntegerMESA,
|
||||
DI_QueryRendererStringMESA,
|
||||
// QueryServerString implemented by libglvnd
|
||||
// QueryVersion implemented by libglvnd
|
||||
DI_ReleaseBuffersMESA,
|
||||
DI_ReleaseTexImageEXT,
|
||||
// SelectEvent implemented by libglvnd
|
||||
DI_SelectEventSGIX,
|
||||
// SwapBuffers implemented by libglvnd
|
||||
DI_SwapBuffersMscOML,
|
||||
DI_SwapIntervalMESA,
|
||||
DI_SwapIntervalSGI,
|
||||
// UseXFont implemented by libglvnd
|
||||
// WaitGL implemented by libglvnd
|
||||
DI_WaitForMscOML,
|
||||
DI_WaitForSbcOML,
|
||||
DI_WaitVideoSyncSGI,
|
||||
// WaitX implemented by libglvnd
|
||||
DI_glXBindSwapBarrierSGIX,
|
||||
DI_glXCopySubBufferMESA,
|
||||
DI_glXCreateGLXPixmapMESA,
|
||||
DI_glXGetMscRateOML,
|
||||
DI_glXGetScreenDriver,
|
||||
DI_glXGetSwapIntervalMESA,
|
||||
DI_glXGetSyncValuesOML,
|
||||
DI_glXJoinSwapGroupSGIX,
|
||||
DI_glXQueryCurrentRendererIntegerMESA,
|
||||
DI_glXQueryCurrentRendererStringMESA,
|
||||
DI_glXQueryMaxSwapBarriersSGIX,
|
||||
DI_glXQueryRendererIntegerMESA,
|
||||
DI_glXQueryRendererStringMESA,
|
||||
DI_glXReleaseBuffersMESA,
|
||||
DI_glXSwapBuffersMscOML,
|
||||
DI_glXSwapIntervalMESA,
|
||||
DI_glXWaitForMscOML,
|
||||
DI_glXWaitForSbcOML,
|
||||
DI_LAST_INDEX
|
||||
} __GLXdispatchIndex;
|
||||
|
||||
|
@@ -50,6 +50,9 @@ static void __glXGLVNDSetDispatchIndex(const GLubyte *procName, int index)
|
||||
{
|
||||
unsigned internalIndex = FindGLXFunction(procName);
|
||||
|
||||
if (internalIndex == DI_FUNCTION_COUNT)
|
||||
return; /* unknown or static dispatch */
|
||||
|
||||
__glXDispatchTableIndices[internalIndex] = index;
|
||||
}
|
||||
|
||||
|
@@ -489,6 +489,7 @@ anv_descriptor_set_destroy(struct anv_device *device,
|
||||
struct surface_state_free_list_entry *entry =
|
||||
set->buffer_views[b].surface_state.map;
|
||||
entry->next = pool->surface_state_free_list;
|
||||
entry->offset = set->buffer_views[b].surface_state.offset;
|
||||
pool->surface_state_free_list = entry;
|
||||
}
|
||||
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
@@ -582,7 +583,14 @@ void anv_GetPhysicalDeviceQueueFamilyProperties(
|
||||
return;
|
||||
}
|
||||
|
||||
assert(*pCount >= 1);
|
||||
/* The spec implicitly allows the incoming count to be 0. From the Vulkan
|
||||
* 1.0.38 spec, Section 4.1 Physical Devices:
|
||||
*
|
||||
* If the value referenced by pQueueFamilyPropertyCount is not 0 [then
|
||||
* do stuff].
|
||||
*/
|
||||
if (*pCount == 0)
|
||||
return;
|
||||
|
||||
*pQueueFamilyProperties = (VkQueueFamilyProperties) {
|
||||
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
|
||||
@@ -1160,6 +1168,9 @@ VkResult anv_AllocateMemory(
|
||||
|
||||
mem->type_index = pAllocateInfo->memoryTypeIndex;
|
||||
|
||||
mem->map = NULL;
|
||||
mem->map_size = 0;
|
||||
|
||||
*pMem = anv_device_memory_to_handle(mem);
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -1181,6 +1192,9 @@ void anv_FreeMemory(
|
||||
if (mem == NULL)
|
||||
return;
|
||||
|
||||
if (mem->map)
|
||||
anv_UnmapMemory(_device, _mem);
|
||||
|
||||
if (mem->bo.map)
|
||||
anv_gem_munmap(mem->bo.map, mem->bo.size);
|
||||
|
||||
@@ -1227,8 +1241,12 @@ VkResult anv_MapMemory(
|
||||
/* Let's map whole pages */
|
||||
map_size = align_u64(map_size, 4096);
|
||||
|
||||
mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
|
||||
map_offset, map_size, gem_flags);
|
||||
void *map = anv_gem_mmap(device, mem->bo.gem_handle,
|
||||
map_offset, map_size, gem_flags);
|
||||
if (map == MAP_FAILED)
|
||||
return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
|
||||
|
||||
mem->map = map;
|
||||
mem->map_size = map_size;
|
||||
|
||||
*ppData = mem->map + (offset - map_offset);
|
||||
@@ -1246,6 +1264,9 @@ void anv_UnmapMemory(
|
||||
return;
|
||||
|
||||
anv_gem_munmap(mem->map, mem->map_size);
|
||||
|
||||
mem->map = NULL;
|
||||
mem->map_size = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -88,10 +88,8 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
|
||||
if (ret != 0) {
|
||||
/* FIXME: Is NULL the right error return? Cf MAP_INVALID */
|
||||
return NULL;
|
||||
}
|
||||
if (ret != 0)
|
||||
return MAP_FAILED;
|
||||
|
||||
VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1));
|
||||
return (void *)(uintptr_t) gem_mmap.addr_ptr;
|
||||
|
@@ -129,10 +129,13 @@ make_surface(const struct anv_device *dev,
|
||||
image->extent = anv_sanitize_image_extent(vk_info->imageType,
|
||||
vk_info->extent);
|
||||
|
||||
enum isl_format format = anv_get_isl_format(&dev->info, vk_info->format,
|
||||
aspect, vk_info->tiling);
|
||||
assert(format != ISL_FORMAT_UNSUPPORTED);
|
||||
|
||||
ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl,
|
||||
.dim = vk_to_isl_surf_dim[vk_info->imageType],
|
||||
.format = anv_get_isl_format(&dev->info, vk_info->format,
|
||||
aspect, vk_info->tiling),
|
||||
.format = format,
|
||||
.width = image->extent.width,
|
||||
.height = image->extent.height,
|
||||
.depth = image->extent.depth,
|
||||
|
@@ -392,6 +392,7 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
|
||||
{
|
||||
prog_data->binding_table.size_bytes = 0;
|
||||
prog_data->binding_table.texture_start = bias;
|
||||
prog_data->binding_table.gather_texture_start = bias;
|
||||
prog_data->binding_table.ubo_start = bias;
|
||||
prog_data->binding_table.ssbo_start = bias;
|
||||
prog_data->binding_table.image_start = bias;
|
||||
|
@@ -2,6 +2,6 @@
|
||||
"file_format_version": "1.0.0",
|
||||
"ICD": {
|
||||
"library_path": "@build_libdir@/libvulkan_intel.so",
|
||||
"abi_versions": "1.0.3"
|
||||
"api_version": "1.0.3"
|
||||
}
|
||||
}
|
||||
|
@@ -1194,22 +1194,25 @@ void genX(CmdEndRenderPass)(
|
||||
}
|
||||
|
||||
static void
|
||||
emit_ps_depth_count(struct anv_batch *batch,
|
||||
emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *bo, uint32_t offset)
|
||||
{
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DestinationAddressType = DAT_PPGTT;
|
||||
pc.PostSyncOperation = WritePSDepthCount;
|
||||
pc.DepthStallEnable = true;
|
||||
pc.Address = (struct anv_address) { bo, offset };
|
||||
|
||||
if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_query_availability(struct anv_batch *batch,
|
||||
emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *bo, uint32_t offset)
|
||||
{
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DestinationAddressType = DAT_PPGTT;
|
||||
pc.PostSyncOperation = WriteImmediateData;
|
||||
pc.Address = (struct anv_address) { bo, offset };
|
||||
@@ -1242,7 +1245,7 @@ void genX(CmdBeginQuery)(
|
||||
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
|
||||
emit_ps_depth_count(cmd_buffer, &pool->bo,
|
||||
query * sizeof(struct anv_query_pool_slot));
|
||||
break;
|
||||
|
||||
@@ -1262,10 +1265,10 @@ void genX(CmdEndQuery)(
|
||||
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
|
||||
emit_ps_depth_count(cmd_buffer, &pool->bo,
|
||||
query * sizeof(struct anv_query_pool_slot) + 8);
|
||||
|
||||
emit_query_availability(&cmd_buffer->batch, &pool->bo,
|
||||
emit_query_availability(cmd_buffer, &pool->bo,
|
||||
query * sizeof(struct anv_query_pool_slot) + 16);
|
||||
break;
|
||||
|
||||
@@ -1307,11 +1310,14 @@ void genX(CmdWriteTimestamp)(
|
||||
pc.DestinationAddressType = DAT_PPGTT,
|
||||
pc.PostSyncOperation = WriteTimestamp,
|
||||
pc.Address = (struct anv_address) { &pool->bo, offset };
|
||||
|
||||
if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16);
|
||||
emit_query_availability(cmd_buffer, &pool->bo, query + 16);
|
||||
}
|
||||
|
||||
#if GEN_GEN > 7 || GEN_IS_HASWELL
|
||||
|
@@ -2,6 +2,6 @@
|
||||
"file_format_version": "1.0.0",
|
||||
"ICD": {
|
||||
"library_path": "libvulkan_intel.so",
|
||||
"abi_versions": "1.0.3"
|
||||
"api_version": "1.0.3"
|
||||
}
|
||||
}
|
||||
|
@@ -74,6 +74,7 @@ TEST_LIBS = \
|
||||
|
||||
TESTS = \
|
||||
test_fs_cmod_propagation \
|
||||
test_fs_copy_propagation \
|
||||
test_fs_saturate_propagation \
|
||||
test_eu_compact \
|
||||
test_vf_float_conversions \
|
||||
@@ -89,6 +90,12 @@ test_fs_cmod_propagation_LDADD = \
|
||||
$(top_builddir)/src/gtest/libgtest.la \
|
||||
$(TEST_LIBS)
|
||||
|
||||
test_fs_copy_propagation_SOURCES = \
|
||||
test_fs_copy_propagation.cpp
|
||||
test_fs_copy_propagation_LDADD = \
|
||||
$(top_builddir)/src/gtest/libgtest.la \
|
||||
$(TEST_LIBS)
|
||||
|
||||
test_fs_saturate_propagation_SOURCES = \
|
||||
test_fs_saturate_propagation.cpp
|
||||
test_fs_saturate_propagation_LDADD = \
|
||||
|
@@ -134,7 +134,6 @@ i965_FILES = \
|
||||
brw_gs_surface_state.c \
|
||||
brw_link.cpp \
|
||||
brw_lower_texture_gradients.cpp \
|
||||
brw_lower_unnormalized_offset.cpp \
|
||||
brw_meta_util.c \
|
||||
brw_meta_util.h \
|
||||
brw_misc_state.c \
|
||||
|
@@ -1824,7 +1824,6 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
|
||||
bool brw_do_cubemap_normalize(struct exec_list *instructions);
|
||||
bool brw_lower_texture_gradients(struct brw_context *brw,
|
||||
struct exec_list *instructions);
|
||||
bool brw_do_lower_unnormalized_offset(struct exec_list *instructions);
|
||||
|
||||
extern const char * const conditional_modifier[16];
|
||||
extern const char *const pred_ctrl_align16[16];
|
||||
|
@@ -336,7 +336,7 @@ static const struct brw_device_info brw_device_info_chv = {
|
||||
.max_gs_threads = 336, \
|
||||
.max_hs_threads = 336, \
|
||||
.max_ds_threads = 336, \
|
||||
.max_wm_threads = 64 * 9, \
|
||||
.max_wm_threads = 64 * 12, \
|
||||
.max_cs_threads = 56, \
|
||||
.urb = { \
|
||||
.size = 384, \
|
||||
@@ -389,7 +389,7 @@ static const struct brw_device_info brw_device_info_bxt = {
|
||||
.max_hs_threads = 112,
|
||||
.max_ds_threads = 112,
|
||||
.max_gs_threads = 112,
|
||||
.max_wm_threads = 64 * 3,
|
||||
.max_wm_threads = 64 * 4,
|
||||
.max_cs_threads = 6 * 6,
|
||||
.urb = {
|
||||
.size = 192,
|
||||
@@ -412,7 +412,7 @@ static const struct brw_device_info brw_device_info_bxt_2x6 = {
|
||||
.max_hs_threads = 56, /* XXX: guess */
|
||||
.max_ds_threads = 56,
|
||||
.max_gs_threads = 56,
|
||||
.max_wm_threads = 64 * 2,
|
||||
.max_wm_threads = 64 * 4,
|
||||
.max_cs_threads = 6 * 6,
|
||||
.urb = {
|
||||
.size = 128,
|
||||
@@ -439,7 +439,7 @@ static const struct brw_device_info brw_device_info_kbl_gt1 = {
|
||||
.gt = 1,
|
||||
|
||||
.max_cs_threads = 7 * 6,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 4,
|
||||
.urb.size = 192,
|
||||
.num_slices = 1,
|
||||
};
|
||||
@@ -449,7 +449,7 @@ static const struct brw_device_info brw_device_info_kbl_gt1_5 = {
|
||||
.gt = 1,
|
||||
|
||||
.max_cs_threads = 7 * 6,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 4,
|
||||
.num_slices = 1,
|
||||
};
|
||||
|
||||
@@ -457,7 +457,7 @@ static const struct brw_device_info brw_device_info_kbl_gt2 = {
|
||||
GEN9_FEATURES,
|
||||
.gt = 2,
|
||||
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 4,
|
||||
.num_slices = 1,
|
||||
};
|
||||
|
||||
@@ -465,7 +465,7 @@ static const struct brw_device_info brw_device_info_kbl_gt3 = {
|
||||
GEN9_FEATURES,
|
||||
.gt = 3,
|
||||
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 8,
|
||||
.num_slices = 2,
|
||||
};
|
||||
|
||||
@@ -473,7 +473,7 @@ static const struct brw_device_info brw_device_info_kbl_gt4 = {
|
||||
GEN9_FEATURES,
|
||||
.gt = 4,
|
||||
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 12,
|
||||
/*
|
||||
* From the "L3 Allocation and Programming" documentation:
|
||||
*
|
||||
|
@@ -3885,6 +3885,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||
*/
|
||||
setup_color_payload(bld, key, &sources[length], src0_alpha, 1);
|
||||
length++;
|
||||
} else if (key->replicate_alpha && inst->target != 0) {
|
||||
/* Handle the case when fragment shader doesn't write to draw buffer
|
||||
* zero. No need to call setup_color_payload() for src0_alpha because
|
||||
* alpha value will be undefined.
|
||||
*/
|
||||
length++;
|
||||
}
|
||||
|
||||
setup_color_payload(bld, key, &sources[length], color0, components);
|
||||
@@ -5823,7 +5829,7 @@ fs_visitor::optimize()
|
||||
|
||||
OPT(opt_algebraic);
|
||||
OPT(opt_cse);
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
OPT(opt_predicated_break, this);
|
||||
OPT(opt_cmod_propagation);
|
||||
OPT(dead_code_eliminate);
|
||||
@@ -5849,12 +5855,12 @@ fs_visitor::optimize()
|
||||
OPT(lower_logical_sends);
|
||||
|
||||
if (progress) {
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
/* Only run after logical send lowering because it's easier to implement
|
||||
* in terms of physical sends.
|
||||
*/
|
||||
if (OPT(opt_zero_samples))
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
/* Run after logical send lowering to give it a chance to CSE the
|
||||
* LOAD_PAYLOAD instructions created to construct the payloads of
|
||||
* e.g. texturing messages in cases where it wasn't possible to CSE the
|
||||
@@ -5883,7 +5889,7 @@ fs_visitor::optimize()
|
||||
}
|
||||
|
||||
if (OPT(lower_d2x)) {
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
||||
@@ -5893,7 +5899,7 @@ fs_visitor::optimize()
|
||||
if (devinfo->gen <= 5 && OPT(lower_minmax)) {
|
||||
OPT(opt_cmod_propagation);
|
||||
OPT(opt_cse);
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
||||
|
@@ -133,11 +133,11 @@ public:
|
||||
bool opt_redundant_discard_jumps();
|
||||
bool opt_cse();
|
||||
bool opt_cse_local(bblock_t *block);
|
||||
bool opt_copy_propagate();
|
||||
bool opt_copy_propagation();
|
||||
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
|
||||
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
|
||||
bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
|
||||
exec_list *acp);
|
||||
bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
|
||||
exec_list *acp);
|
||||
bool opt_drop_redundant_mov_to_flags();
|
||||
bool opt_register_renaming();
|
||||
bool register_coalesce();
|
||||
|
@@ -129,7 +129,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
|
||||
foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
|
||||
acp[next_acp] = entry;
|
||||
|
||||
/* opt_copy_propagate_local populates out_acp with copies created
|
||||
/* opt_copy_propagation_local populates out_acp with copies created
|
||||
* in a block which are still live at the end of the block. This
|
||||
* is exactly what we want in the COPY set.
|
||||
*/
|
||||
@@ -445,7 +445,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
if (entry->saturate) {
|
||||
switch(inst->opcode) {
|
||||
case BRW_OPCODE_SEL:
|
||||
if (inst->src[1].file != IMM ||
|
||||
if ((inst->conditional_mod != BRW_CONDITIONAL_GE &&
|
||||
inst->conditional_mod != BRW_CONDITIONAL_L) ||
|
||||
inst->src[1].file != IMM ||
|
||||
inst->src[1].f < 0.0 ||
|
||||
inst->src[1].f > 1.0) {
|
||||
return false;
|
||||
@@ -759,8 +761,8 @@ can_propagate_from(fs_inst *inst)
|
||||
* list.
|
||||
*/
|
||||
bool
|
||||
fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
|
||||
exec_list *acp)
|
||||
fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
|
||||
exec_list *acp)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
@@ -844,7 +846,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::opt_copy_propagate()
|
||||
fs_visitor::opt_copy_propagation()
|
||||
{
|
||||
bool progress = false;
|
||||
void *copy_prop_ctx = ralloc_context(NULL);
|
||||
@@ -857,8 +859,8 @@ fs_visitor::opt_copy_propagate()
|
||||
* the set of copies available at the end of the block.
|
||||
*/
|
||||
foreach_block (block, cfg) {
|
||||
progress = opt_copy_propagate_local(copy_prop_ctx, block,
|
||||
out_acp[block->num]) || progress;
|
||||
progress = opt_copy_propagation_local(copy_prop_ctx, block,
|
||||
out_acp[block->num]) || progress;
|
||||
}
|
||||
|
||||
/* Do dataflow analysis for those available copies. */
|
||||
@@ -877,7 +879,8 @@ fs_visitor::opt_copy_propagate()
|
||||
}
|
||||
}
|
||||
|
||||
progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress;
|
||||
progress = opt_copy_propagation_local(copy_prop_ctx, block, in_acp) ||
|
||||
progress;
|
||||
}
|
||||
|
||||
for (int i = 0; i < cfg->num_blocks; i++)
|
||||
|
@@ -385,34 +385,33 @@ fs_generator::generate_mov_indirect(fs_inst *inst,
|
||||
indirect_byte_offset =
|
||||
retype(spread(indirect_byte_offset, 2), BRW_REGISTER_TYPE_UW);
|
||||
|
||||
struct brw_reg ind_src;
|
||||
if (devinfo->gen < 8) {
|
||||
/* From the Haswell PRM section "Register Region Restrictions":
|
||||
*
|
||||
* "The lower bits of the AddressImmediate must not overflow to
|
||||
* change the register address. The lower 5 bits of Address
|
||||
* Immediate when added to lower 5 bits of address register gives
|
||||
* the sub-register offset. The upper bits of Address Immediate
|
||||
* when added to upper bits of address register gives the register
|
||||
* address. Any overflow from sub-register offset is dropped."
|
||||
*
|
||||
* This restriction is only listed in the Haswell PRM but emperical
|
||||
* testing indicates that it applies on all older generations and is
|
||||
* lifted on Broadwell.
|
||||
*
|
||||
* Since the indirect may cause us to cross a register boundary, this
|
||||
* makes the base offset almost useless. We could try and do
|
||||
* something clever where we use a actual base offset if
|
||||
* base_offset % 32 == 0 but that would mean we were generating
|
||||
* different code depending on the base offset. Instead, for the
|
||||
* sake of consistency, we'll just do the add ourselves.
|
||||
*/
|
||||
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
|
||||
ind_src = brw_VxH_indirect(0, 0);
|
||||
} else {
|
||||
brw_MOV(p, addr, indirect_byte_offset);
|
||||
ind_src = brw_VxH_indirect(0, imm_byte_offset);
|
||||
}
|
||||
/* There are a number of reasons why we don't use the base offset here.
|
||||
* One reason is that the field is only 9 bits which means we can only
|
||||
* use it to access the first 16 GRFs. Also, from the Haswell PRM
|
||||
* section "Register Region Restrictions":
|
||||
*
|
||||
* "The lower bits of the AddressImmediate must not overflow to
|
||||
* change the register address. The lower 5 bits of Address
|
||||
* Immediate when added to lower 5 bits of address register gives
|
||||
* the sub-register offset. The upper bits of Address Immediate
|
||||
* when added to upper bits of address register gives the register
|
||||
* address. Any overflow from sub-register offset is dropped."
|
||||
*
|
||||
* Since the indirect may cause us to cross a register boundary, this
|
||||
* makes the base offset almost useless. We could try and do something
|
||||
* clever where we use a actual base offset if base_offset % 32 == 0 but
|
||||
* that would mean we were generating different code depending on the
|
||||
* base offset. Instead, for the sake of consistency, we'll just do the
|
||||
* add ourselves. This restriction is only listed in the Haswell PRM
|
||||
* but empirical testing indicates that it applies on all older
|
||||
* generations and is lifted on Broadwell.
|
||||
*
|
||||
* In the end, while base_offset is nice to look at in the generated
|
||||
* code, using it saves us 0 instructions and would require quite a bit
|
||||
* of case-by-case work. It's just not worth it.
|
||||
*/
|
||||
brw_ADD(p, addr, indirect_byte_offset, brw_imm_uw(imm_byte_offset));
|
||||
struct brw_reg ind_src = brw_VxH_indirect(0, 0);
|
||||
|
||||
brw_inst *mov = brw_MOV(p, dst, retype(ind_src, dst.type));
|
||||
|
||||
|
@@ -126,7 +126,6 @@ process_glsl_ir(gl_shader_stage stage,
|
||||
do_vec_index_to_cond_assign(shader->ir);
|
||||
lower_vector_insert(shader->ir, true);
|
||||
lower_offset_arrays(shader->ir);
|
||||
brw_do_lower_unnormalized_offset(shader->ir);
|
||||
lower_noise(shader->ir);
|
||||
lower_quadop_vector(shader->ir, false);
|
||||
|
||||
|
@@ -1,106 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_lower_unnormalized_offset.cpp
|
||||
*
|
||||
* IR lower pass to convert a texture offset into an adjusted coordinate,
|
||||
* for use with unnormalized coordinates. At least the gather4* messages
|
||||
* on Ivybridge and Haswell make a mess with nonzero offsets.
|
||||
*
|
||||
* \author Chris Forbes <chrisf@ijw.co.nz>
|
||||
*/
|
||||
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "compiler/glsl/ir.h"
|
||||
#include "compiler/glsl/ir_builder.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
class brw_lower_unnormalized_offset_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
brw_lower_unnormalized_offset_visitor()
|
||||
{
|
||||
progress = false;
|
||||
}
|
||||
|
||||
ir_visitor_status visit_leave(ir_texture *ir);
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
ir_visitor_status
|
||||
brw_lower_unnormalized_offset_visitor::visit_leave(ir_texture *ir)
|
||||
{
|
||||
if (!ir->offset)
|
||||
return visit_continue;
|
||||
|
||||
if (ir->op == ir_tg4 || ir->op == ir_tex) {
|
||||
if (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_RECT)
|
||||
return visit_continue;
|
||||
}
|
||||
else if (ir->op != ir_txf) {
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
void *mem_ctx = ralloc_parent(ir);
|
||||
|
||||
if (ir->op == ir_txf) {
|
||||
/* It appears that the ld instruction used for txf does its
|
||||
* address bounds check before adding in the offset. To work
|
||||
* around this, just add the integer offset to the integer texel
|
||||
* coordinate, and don't put the offset in the header.
|
||||
*/
|
||||
ir_variable *var = new(mem_ctx) ir_variable(ir->coordinate->type,
|
||||
"coordinate",
|
||||
ir_var_temporary);
|
||||
base_ir->insert_before(var);
|
||||
base_ir->insert_before(assign(var, ir->coordinate));
|
||||
base_ir->insert_before(assign(var,
|
||||
add(swizzle_for_size(var, ir->offset->type->vector_elements), ir->offset),
|
||||
(1 << ir->offset->type->vector_elements) - 1));
|
||||
|
||||
ir->coordinate = new(mem_ctx) ir_dereference_variable(var);
|
||||
} else {
|
||||
ir->coordinate = add(ir->coordinate, i2f(ir->offset));
|
||||
}
|
||||
|
||||
ir->offset = NULL;
|
||||
|
||||
progress = true;
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
bool
|
||||
brw_do_lower_unnormalized_offset(exec_list *instructions)
|
||||
{
|
||||
brw_lower_unnormalized_offset_visitor v;
|
||||
|
||||
visit_list_elements(&v, instructions);
|
||||
|
||||
return v.progress;
|
||||
}
|
||||
|
||||
}
|
@@ -419,6 +419,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
|
||||
|
||||
static const nir_lower_tex_options tex_options = {
|
||||
.lower_txp = ~0,
|
||||
.lower_txf_offset = true,
|
||||
.lower_rect_offset = true,
|
||||
};
|
||||
|
||||
OPT(nir_lower_tex, &tex_options);
|
||||
|
@@ -201,6 +201,9 @@ hsw_pause_transform_feedback(struct gl_context *ctx,
|
||||
(struct brw_transform_feedback_object *) obj;
|
||||
|
||||
if (brw->is_haswell) {
|
||||
/* Flush any drawing so that the counters have the right values. */
|
||||
brw_emit_mi_flush(brw);
|
||||
|
||||
/* Save the SOL buffer offset register values. */
|
||||
for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
|
||||
BEGIN_BATCH(3);
|
||||
|
@@ -984,6 +984,19 @@ intel_miptree_reference(struct intel_mipmap_tree **dst,
|
||||
*dst = src;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_hiz_buffer_free(struct intel_miptree_aux_buffer *hiz_buf)
|
||||
{
|
||||
if (hiz_buf == NULL)
|
||||
return;
|
||||
|
||||
if (hiz_buf->mt)
|
||||
intel_miptree_release(&hiz_buf->mt);
|
||||
else
|
||||
drm_intel_bo_unreference(hiz_buf->bo);
|
||||
|
||||
free(hiz_buf);
|
||||
}
|
||||
|
||||
void
|
||||
intel_miptree_release(struct intel_mipmap_tree **mt)
|
||||
@@ -999,13 +1012,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
|
||||
|
||||
drm_intel_bo_unreference((*mt)->bo);
|
||||
intel_miptree_release(&(*mt)->stencil_mt);
|
||||
if ((*mt)->hiz_buf) {
|
||||
if ((*mt)->hiz_buf->mt)
|
||||
intel_miptree_release(&(*mt)->hiz_buf->mt);
|
||||
else
|
||||
drm_intel_bo_unreference((*mt)->hiz_buf->bo);
|
||||
free((*mt)->hiz_buf);
|
||||
}
|
||||
intel_miptree_hiz_buffer_free((*mt)->hiz_buf);
|
||||
intel_miptree_release(&(*mt)->mcs_mt);
|
||||
intel_resolve_map_clear(&(*mt)->hiz_map);
|
||||
|
||||
@@ -2184,6 +2191,8 @@ intel_miptree_resolve_color(struct brw_context *brw,
|
||||
* then discard the MCS buffer, if present. We also set the fast_clear_state
|
||||
* to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
|
||||
* allocated in the future.
|
||||
*
|
||||
* HiZ is similarly unsafe with shared buffers.
|
||||
*/
|
||||
void
|
||||
intel_miptree_make_shareable(struct brw_context *brw,
|
||||
@@ -2201,6 +2210,14 @@ intel_miptree_make_shareable(struct brw_context *brw,
|
||||
intel_miptree_release(&mt->mcs_mt);
|
||||
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
|
||||
}
|
||||
|
||||
if (mt->hiz_buf) {
|
||||
intel_miptree_all_slices_resolve_depth(brw, mt);
|
||||
intel_miptree_hiz_buffer_free(mt->hiz_buf);
|
||||
mt->hiz_buf = NULL;
|
||||
}
|
||||
|
||||
mt->disable_aux_buffers = true;
|
||||
}
|
||||
|
||||
|
||||
|
213
src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp
Normal file
213
src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "brw_fs.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "program/program.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
class copy_propagation_test : public ::testing::Test {
|
||||
virtual void SetUp();
|
||||
|
||||
public:
|
||||
struct brw_compiler *compiler;
|
||||
struct brw_device_info *devinfo;
|
||||
struct gl_context *ctx;
|
||||
struct brw_wm_prog_data *prog_data;
|
||||
struct gl_shader_program *shader_prog;
|
||||
fs_visitor *v;
|
||||
};
|
||||
|
||||
class copy_propagation_fs_visitor : public fs_visitor
|
||||
{
|
||||
public:
|
||||
copy_propagation_fs_visitor(struct brw_compiler *compiler,
|
||||
struct brw_wm_prog_data *prog_data,
|
||||
nir_shader *shader)
|
||||
: fs_visitor(compiler, NULL, NULL, NULL,
|
||||
&prog_data->base, (struct gl_program *) NULL,
|
||||
shader, 8, -1) {}
|
||||
};
|
||||
|
||||
|
||||
void copy_propagation_test::SetUp()
|
||||
{
|
||||
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
|
||||
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
|
||||
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
|
||||
compiler->devinfo = devinfo;
|
||||
|
||||
prog_data = ralloc(NULL, struct brw_wm_prog_data);
|
||||
nir_shader *shader =
|
||||
nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
v = new copy_propagation_fs_visitor(compiler, prog_data, shader);
|
||||
|
||||
devinfo->gen = 4;
|
||||
}
|
||||
|
||||
static fs_inst *
|
||||
instruction(bblock_t *block, int num)
|
||||
{
|
||||
fs_inst *inst = (fs_inst *)block->start();
|
||||
for (int i = 0; i < num; i++) {
|
||||
inst = (fs_inst *)inst->next;
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_propagation(fs_visitor *v)
|
||||
{
|
||||
const bool print = getenv("TEST_DEBUG");
|
||||
|
||||
if (print) {
|
||||
fprintf(stderr, "= Before =\n");
|
||||
v->cfg->dump(v);
|
||||
}
|
||||
|
||||
bool ret = v->opt_copy_propagation();
|
||||
|
||||
if (print) {
|
||||
fprintf(stderr, "\n= After =\n");
|
||||
v->cfg->dump(v);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
TEST_F(copy_propagation_test, basic)
|
||||
{
|
||||
const fs_builder &bld = v->bld;
|
||||
fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf3 = v->vgrf(glsl_type::float_type);
|
||||
bld.MOV(vgrf0, vgrf2);
|
||||
bld.ADD(vgrf1, vgrf0, vgrf3);
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: mov(8) vgrf0 vgrf2
|
||||
* 1: add(8) vgrf1 vgrf0 vgrf3
|
||||
*
|
||||
* = After =
|
||||
* 0: mov(8) vgrf0 vgrf2
|
||||
* 1: add(8) vgrf1 vgrf2 vgrf3
|
||||
*/
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_TRUE(copy_propagation(v));
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
fs_inst *mov = instruction(block0, 0);
|
||||
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
|
||||
EXPECT_TRUE(mov->dst.equals(vgrf0));
|
||||
EXPECT_TRUE(mov->src[0].equals(vgrf2));
|
||||
|
||||
fs_inst *add = instruction(block0, 1);
|
||||
EXPECT_EQ(BRW_OPCODE_ADD, add->opcode);
|
||||
EXPECT_TRUE(add->dst.equals(vgrf1));
|
||||
EXPECT_TRUE(add->src[0].equals(vgrf2));
|
||||
EXPECT_TRUE(add->src[1].equals(vgrf3));
|
||||
}
|
||||
|
||||
TEST_F(copy_propagation_test, maxmax_sat_imm)
|
||||
{
|
||||
const fs_builder &bld = v->bld;
|
||||
fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
|
||||
|
||||
static const struct {
|
||||
enum brw_conditional_mod conditional_mod;
|
||||
float immediate;
|
||||
bool expected_result;
|
||||
} test[] = {
|
||||
/* conditional mod, imm, expected_result */
|
||||
{ BRW_CONDITIONAL_GE , 0.1f, true },
|
||||
{ BRW_CONDITIONAL_L , 0.1f, true },
|
||||
{ BRW_CONDITIONAL_GE , 0.5f, true },
|
||||
{ BRW_CONDITIONAL_L , 0.5f, true },
|
||||
{ BRW_CONDITIONAL_GE , 0.9f, true },
|
||||
{ BRW_CONDITIONAL_L , 0.9f, true },
|
||||
{ BRW_CONDITIONAL_GE , -1.5f, false },
|
||||
{ BRW_CONDITIONAL_L , -1.5f, false },
|
||||
{ BRW_CONDITIONAL_GE , 1.5f, false },
|
||||
{ BRW_CONDITIONAL_L , 1.5f, false },
|
||||
|
||||
{ BRW_CONDITIONAL_NONE, 0.5f, false },
|
||||
{ BRW_CONDITIONAL_Z , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_NZ , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_G , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_LE , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_R , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_O , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_U , 0.5f, false },
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < sizeof(test) / sizeof(test[0]); i++) {
|
||||
fs_inst *mov = set_saturate(true, bld.MOV(vgrf0, vgrf1));
|
||||
fs_inst *sel = set_condmod(test[i].conditional_mod,
|
||||
bld.SEL(vgrf2, vgrf0,
|
||||
brw_imm_f(test[i].immediate)));
|
||||
|
||||
v->calculate_cfg();
|
||||
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(test[i].expected_result, copy_propagation(v));
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
|
||||
EXPECT_TRUE(mov->saturate);
|
||||
EXPECT_TRUE(mov->dst.equals(vgrf0));
|
||||
EXPECT_TRUE(mov->src[0].equals(vgrf1));
|
||||
|
||||
EXPECT_EQ(BRW_OPCODE_SEL, sel->opcode);
|
||||
EXPECT_EQ(test[i].conditional_mod, sel->conditional_mod);
|
||||
EXPECT_EQ(test[i].expected_result, sel->saturate);
|
||||
EXPECT_TRUE(sel->dst.equals(vgrf2));
|
||||
if (test[i].expected_result) {
|
||||
EXPECT_TRUE(sel->src[0].equals(vgrf1));
|
||||
} else {
|
||||
EXPECT_TRUE(sel->src[0].equals(vgrf0));
|
||||
}
|
||||
EXPECT_TRUE(sel->src[1].equals(brw_imm_f(test[i].immediate)));
|
||||
|
||||
delete v->cfg;
|
||||
v->cfg = NULL;
|
||||
}
|
||||
}
|
@@ -2848,6 +2848,7 @@ reuse_framebuffer_texture_attachment(struct gl_framebuffer *fb,
|
||||
dst_att->Type = src_att->Type;
|
||||
dst_att->Complete = src_att->Complete;
|
||||
dst_att->TextureLevel = src_att->TextureLevel;
|
||||
dst_att->CubeMapFace = src_att->CubeMapFace;
|
||||
dst_att->Zoffset = src_att->Zoffset;
|
||||
dst_att->Layered = src_att->Layered;
|
||||
}
|
||||
|
@@ -857,7 +857,7 @@ _mesa_get_color_read_format(struct gl_context *ctx)
|
||||
if (format == MESA_FORMAT_B8G8R8A8_UNORM)
|
||||
return GL_BGRA;
|
||||
else if (format == MESA_FORMAT_B5G6R5_UNORM)
|
||||
return GL_BGR;
|
||||
return GL_RGB;
|
||||
else if (format == MESA_FORMAT_R_UNORM8)
|
||||
return GL_RED;
|
||||
|
||||
@@ -892,7 +892,7 @@ _mesa_get_color_read_type(struct gl_context *ctx)
|
||||
const GLenum data_type = _mesa_get_format_datatype(format);
|
||||
|
||||
if (format == MESA_FORMAT_B5G6R5_UNORM)
|
||||
return GL_UNSIGNED_SHORT_5_6_5_REV;
|
||||
return GL_UNSIGNED_SHORT_5_6_5;
|
||||
|
||||
switch (data_type) {
|
||||
case GL_SIGNED_NORMALIZED:
|
||||
|
@@ -59,7 +59,6 @@ struct _mesa_HashTable {
|
||||
struct hash_table *ht;
|
||||
GLuint MaxKey; /**< highest key inserted so far */
|
||||
mtx_t Mutex; /**< mutual exclusion lock */
|
||||
mtx_t WalkMutex; /**< for _mesa_HashWalk() */
|
||||
GLboolean InDeleteAll; /**< Debug check */
|
||||
/** Value that would be in the table for DELETED_KEY_VALUE. */
|
||||
void *deleted_key_data;
|
||||
@@ -129,8 +128,11 @@ _mesa_NewHashTable(void)
|
||||
}
|
||||
|
||||
_mesa_hash_table_set_deleted_key(table->ht, uint_key(DELETED_KEY_VALUE));
|
||||
mtx_init(&table->Mutex, mtx_plain);
|
||||
mtx_init(&table->WalkMutex, mtx_plain);
|
||||
/*
|
||||
* Needs to be recursive, since the callback in _mesa_HashWalk()
|
||||
* is allowed to call _mesa_HashRemove().
|
||||
*/
|
||||
mtx_init(&table->Mutex, mtx_recursive);
|
||||
}
|
||||
else {
|
||||
_mesa_error_no_memory(__func__);
|
||||
@@ -161,7 +163,6 @@ _mesa_DeleteHashTable(struct _mesa_HashTable *table)
|
||||
_mesa_hash_table_destroy(table->ht, NULL);
|
||||
|
||||
mtx_destroy(&table->Mutex);
|
||||
mtx_destroy(&table->WalkMutex);
|
||||
free(table);
|
||||
}
|
||||
|
||||
@@ -401,11 +402,6 @@ _mesa_HashDeleteAll(struct _mesa_HashTable *table,
|
||||
|
||||
/**
|
||||
* Walk over all entries in a hash table, calling callback function for each.
|
||||
* Note: we use a separate mutex in this function to avoid a recursive
|
||||
* locking deadlock (in case the callback calls _mesa_HashRemove()) and to
|
||||
* prevent multiple threads/contexts from getting tangled up.
|
||||
* A lock-less version of this function could be used when the table will
|
||||
* not be modified.
|
||||
* \param table the hash table to walk
|
||||
* \param callback the callback function
|
||||
* \param userData arbitrary pointer to pass along to the callback
|
||||
@@ -422,13 +418,13 @@ _mesa_HashWalk(const struct _mesa_HashTable *table,
|
||||
|
||||
assert(table);
|
||||
assert(callback);
|
||||
mtx_lock(&table2->WalkMutex);
|
||||
mtx_lock(&table2->Mutex);
|
||||
hash_table_foreach(table->ht, entry) {
|
||||
callback((uintptr_t)entry->key, entry->data, userData);
|
||||
}
|
||||
if (table->deleted_key_data)
|
||||
callback(DELETED_KEY_VALUE, table->deleted_key_data, userData);
|
||||
mtx_unlock(&table2->WalkMutex);
|
||||
mtx_unlock(&table2->Mutex);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Reference in New Issue
Block a user