Compare commits
85 Commits
mesa-18.3.
...
mesa-18.3.
Author | SHA1 | Date | |
---|---|---|---|
|
b26488dead | ||
|
a41881fcaa | ||
|
55f3a4fac3 | ||
|
d000488c2e | ||
|
4aa92b54e5 | ||
|
08ab660bf5 | ||
|
4bb51927aa | ||
|
7662965ce9 | ||
|
6cea56e2c2 | ||
|
5b48a26072 | ||
|
d3f49ece4e | ||
|
ecad528a11 | ||
|
f036a040bb | ||
|
5694279c14 | ||
|
75340edb27 | ||
|
dafa02c980 | ||
|
36258308a7 | ||
|
f1eccd091d | ||
|
945aa87408 | ||
|
b3b0a97f69 | ||
|
3545986962 | ||
|
a9c0e146ef | ||
|
541eb984ea | ||
|
fb63b1b3bf | ||
|
08834a3721 | ||
|
f04d57ff1f | ||
|
45c3bf14ca | ||
|
2180aa1bb2 | ||
|
fdb66dd155 | ||
|
e868c77615 | ||
|
a19ddce953 | ||
|
7bf9cf29dc | ||
|
e0eba40ae4 | ||
|
1a2b227fce | ||
|
6beaa2d7fb | ||
|
434f19a8dc | ||
|
7475d7727f | ||
|
190a79f462 | ||
|
871aea89fd | ||
|
f2c1d7acd0 | ||
|
5e8af9e609 | ||
|
f072585522 | ||
|
f275e16c9e | ||
|
813f0a8296 | ||
|
b280cdb59e | ||
|
3b9e9e4723 | ||
|
12586d5846 | ||
|
e362fe26ea | ||
|
220490cf5f | ||
|
991f9ea553 | ||
|
84f59f6bbc | ||
|
e1374ce107 | ||
|
0b4d381ee0 | ||
|
6050d6f1cf | ||
|
cadab68f95 | ||
|
8f45b22c11 | ||
|
fea0bca1be | ||
|
32c0f59c48 | ||
|
2733d26011 | ||
|
8a6c154496 | ||
|
9d45651005 | ||
|
77ac39c359 | ||
|
ae91c29a25 | ||
|
b6cd30de3a | ||
|
a1605e77d2 | ||
|
f5b6f5ad64 | ||
|
93db1e7153 | ||
|
313c1487b7 | ||
|
98a661f2b1 | ||
|
ea2bf29ed9 | ||
|
252beed945 | ||
|
5f25cfdaf6 | ||
|
92273935a5 | ||
|
8f1c75e9a0 | ||
|
e38d275a86 | ||
|
f0eee7df43 | ||
|
bd9edb5f2e | ||
|
cad3d0735d | ||
|
4b91802bef | ||
|
5d2cfa64c1 | ||
|
220705036c | ||
|
fa11468db4 | ||
|
029dced476 | ||
|
ec40bc62a5 | ||
|
8320a07221 |
@@ -2,3 +2,37 @@
|
||||
c02390f8fcd367c7350db568feabb2f062efca14 egl/wayland: rather obvious build fix
|
||||
# fixes: The commit addresses b4476138d5ad3f8d30c14ee61f2f375edfdbab2a
|
||||
ff6f1dd0d3c6b4c15ca51b478b2884d14f6a1e06 meson: libfreedreno depends upon libdrm (for fence support)
|
||||
|
||||
# fixes: This commit requires commits aeaf8dbd097 and 7484bc894b9 which did not
|
||||
# land in branch.
|
||||
f67dea5e19ef14187be0e8d0f61b1f764c7ccb4f radv: Fix multiview depth clears
|
||||
|
||||
# stable The commits aren't suitable in their present form.
|
||||
bfe31c5e461a1330d6f606bf5310685eff1198dd nir/builder: Add nir_i2i and nir_u2u helpers which take a bit size
|
||||
abfe674c54bee6f8fdcae411b07db89c10b9d530 spirv: Handle arbitrary bit sizes for deref array indices
|
||||
|
||||
# warn The commits refer stale sha, yet don't fix anything in particular.
|
||||
98984b7cdd79c15cc7331c791f8be61e873b8bbd Revert "mapi/new: sort by slot number"
|
||||
9f86f1da7c68b5b900cd6f60925610ff1225a72d egl: add glvnd entrypoints for EGL_MESA_query_driver
|
||||
|
||||
# stable Explicit 19.0 only nomination.
|
||||
38f542783faa360020b77fdd76b97f207a9e0068 v50,nvc0: add explicit settings for recent caps
|
||||
|
||||
# stable Explicit 19.0 only nominations.
|
||||
399215eb7a0517463e5757c598d6cff6ae2301d0 nvc0: add support for handling indirect draws with attrib conversion
|
||||
4443b6ddf2e08d06f3d0457cf20a2e04244cde37 nvc0/ir: always use CG mode for loads from atomic-only buffers
|
||||
5de5beedf21306b01730085f8e03d8f424729016 nvc0/ir: fix second tex argument after levelZero optimization
|
||||
162352e6711b3ceab114686f7a3248074339e7f7 nvc0: fix 3d images on kepler
|
||||
e00799d3dc0595dc3998dbf199ceec8b1eece966 nv50,nvc0: use condition for occlusion queries when already complete
|
||||
6adb9b38bfb1f6ee4c94596bf0744225aa8e967a nvc0: stick zero values for the compute invocation counts
|
||||
04593d9a73ea257a36cc3b9fb5cd41427beaaea5 gk110/ir: Add rcp f64 implementation
|
||||
7937408052a1896f0b08b0110bb8a1790eeee351 gk110/ir: Add rsq f64 implementation
|
||||
656ad060518d067a3b311db8c2de2a396fb41898 gk110/ir: Use the new rcp/rsq in library
|
||||
12669d29705a26478aa691cb454149628be65f17 gk104/ir: Use the new rcp/rsq in library
|
||||
815a8e59c6d462a7008653ea9e3010d40b6ba589 gm107/ir: add fp64 rcp
|
||||
cce495572136a606dd2a35e79f45080c3796e2cc gm107/ir: add fp64 rsq
|
||||
6010d7b8e8bee1bcea2b329cf6d3b44c5fc3ca66 gallium: add PIPE_CAP_MAX_VARYINGS
|
||||
cbd1ad6165f0aea7fb7c6fd1b36ad5317dd65cb7 st/mesa: require RGBA2, RGB4, and RGBA4 to be renderable
|
||||
|
||||
# stable The commit addresses functionality not present in branch
|
||||
1b8983c25be19073c02fe9630e949be55f8280fa radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8
|
||||
|
@@ -13,12 +13,12 @@
|
||||
|
||||
is_stable_nomination()
|
||||
{
|
||||
git show --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
|
||||
git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-stable"
|
||||
}
|
||||
|
||||
is_typod_nomination()
|
||||
{
|
||||
git show --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
|
||||
git show --pretty=medium --summary "$1" | grep -q -i -o "CC:.*mesa-dev"
|
||||
}
|
||||
|
||||
fixes=
|
||||
@@ -44,7 +44,7 @@ is_sha_nomination()
|
||||
# Treat only the current line
|
||||
id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
|
||||
fixes_count=$(($fixes_count-1))
|
||||
if ! git show $id &>/dev/null; then
|
||||
if ! git show $id >/dev/null 2>&1; then
|
||||
echo WARNING: Commit $1 lists invalid sha $id
|
||||
fi
|
||||
done
|
||||
@@ -143,7 +143,7 @@ do
|
||||
esac
|
||||
|
||||
printf "[ %8s ] " "$tag"
|
||||
git --no-pager show --summary --oneline $sha
|
||||
git --no-pager show --no-patch --oneline $sha
|
||||
done
|
||||
|
||||
rm -f already_picked
|
||||
|
@@ -1864,6 +1864,7 @@ for plat in $platforms; do
|
||||
;;
|
||||
|
||||
drm)
|
||||
test "x$enable_egl" = "xyes" &&
|
||||
test "x$enable_gbm" = "xno" &&
|
||||
AC_MSG_ERROR([EGL platform drm needs gbm])
|
||||
DEFINES="$DEFINES -DHAVE_DRM_PLATFORM"
|
||||
|
@@ -31,7 +31,8 @@ Compatibility contexts may report a lower version depending on each driver.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
1cde4fafd40cd1ad4ee3a13b364b7a0175a08b7afdd127fb46f918c1e1dfd4b0 mesa-18.3.2.tar.gz
|
||||
f7ce7181c07b6d8e0132da879af1729523a6c8aa87f79a9d59dfd064024cfb35 mesa-18.3.2.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
208
docs/relnotes/18.3.3.html
Normal file
208
docs/relnotes/18.3.3.html
Normal file
@@ -0,0 +1,208 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 18.3.3 Release Notes / January 31, 2019</h1>
|
||||
|
||||
<p>
|
||||
Mesa 18.3.3 is a bug fix release which fixes bugs found since the 18.3.2 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 18.3.3 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation.
|
||||
Compatibility contexts may report a lower version depending on each driver.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
6b9893942fe8011c7736d51448deb6ef80ece2257e0fac27b02e997a6605d5e4 mesa-18.3.3.tar.gz
|
||||
2ab6886a6966c532ccbcc3b240925e681464b658244f0cbed752615af3936299 mesa-18.3.3.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108877">Bug 108877</a> - OpenGL CTS gl43 test cases were interrupted due to segment fault</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109023">Bug 109023</a> - error: inlining failed in call to always_inline ‘__m512 _mm512_and_ps(__m512, __m512)’: target specific option mismatch</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109129">Bug 109129</a> - format_types.h:1220: undefined reference to `_mm256_cvtps_ph'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109229">Bug 109229</a> - glLinkProgram locks up for ~30 seconds</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109242">Bug 109242</a> - [RADV] The Witcher 3 system freeze</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109488">Bug 109488</a> - Mesa 18.3.2 crash on a specific fragment shader (assert triggered) / already fixed on the master branch.</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Andres Gomez (2):</p>
|
||||
<ul>
|
||||
<li>bin/get-pick-list.sh: fix the oneline printing</li>
|
||||
<li>bin/get-pick-list.sh: fix redirection in sh</li>
|
||||
</ul>
|
||||
|
||||
<p>Axel Davy (1):</p>
|
||||
<ul>
|
||||
<li>st/nine: Immediately upload user provided textures</li>
|
||||
</ul>
|
||||
|
||||
<p>Bas Nieuwenhuizen (3):</p>
|
||||
<ul>
|
||||
<li>radv: Only use 32 KiB per threadgroup on Stoney.</li>
|
||||
<li>radv: Set partial_vs_wave for pipelines with just GS, not tess.</li>
|
||||
<li>nir: Account for atomics in copy propagation.</li>
|
||||
</ul>
|
||||
|
||||
<p>Bruce Cherniak (1):</p>
|
||||
<ul>
|
||||
<li>gallium/swr: Fix multi-context sync fence deadlock.</li>
|
||||
</ul>
|
||||
|
||||
<p>Carsten Haitzler (Rasterman) (2):</p>
|
||||
<ul>
|
||||
<li>vc4: Use named parameters for the NEON inline asm.</li>
|
||||
<li>vc4: Declare the cpu pointers as being modified in NEON asm.</li>
|
||||
</ul>
|
||||
|
||||
<p>Danylo Piliaiev (1):</p>
|
||||
<ul>
|
||||
<li>glsl: Fix copying function's out to temp if dereferenced by array</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (3):</p>
|
||||
<ul>
|
||||
<li>dri_interface: add put shm image2 (v2)</li>
|
||||
<li>glx: add support for putimageshm2 path (v2)</li>
|
||||
<li>gallium: use put image shm2 path (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Dylan Baker (4):</p>
|
||||
<ul>
|
||||
<li>meson: allow building dri driver without window system if osmesa is classic</li>
|
||||
<li>meson: fix swr KNL build</li>
|
||||
<li>meson: Fix compiler checks for SWR with ICC</li>
|
||||
<li>meson: Add warnings and errors when using ICC</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (4):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 18.3.2</li>
|
||||
<li>cherry-ignore: radv: Fix multiview depth clears</li>
|
||||
<li>cherry-ignore: spirv: Handle arbitrary bit sizes for deref array indices</li>
|
||||
<li>cherry-ignore: WARNING: Commit XXX lists invalid sha</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (2):</p>
|
||||
<ul>
|
||||
<li>vc4: Don't leak the GPU fd for renderonly usage.</li>
|
||||
<li>vc4: Enable NEON asm on meson cross-builds.</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (2):</p>
|
||||
<ul>
|
||||
<li>configure: EGL requirements only apply if EGL is built</li>
|
||||
<li>meson/vdpau: add missing soversion</li>
|
||||
</ul>
|
||||
|
||||
<p>Iago Toral Quiroga (1):</p>
|
||||
<ul>
|
||||
<li>anv/device: fix maximum number of images supported</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (3):</p>
|
||||
<ul>
|
||||
<li>anv/nir: Rework arguments to apply_pipeline_layout</li>
|
||||
<li>anv: Only parse pImmutableSamplers if the descriptor has samplers</li>
|
||||
<li>nir/xfb: Fix offset accounting for dvec3/4</li>
|
||||
</ul>
|
||||
|
||||
<p>Karol Herbst (2):</p>
|
||||
<ul>
|
||||
<li>nv50/ir: disable tryCollapseChainedMULs in ConstantFolding for precise instructions</li>
|
||||
<li>glsl/lower_output_reads: set invariant and precise flags on temporaries</li>
|
||||
</ul>
|
||||
|
||||
<p>Lionel Landwerlin (1):</p>
|
||||
<ul>
|
||||
<li>anv: fix invalid binding table index computation</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (4):</p>
|
||||
<ul>
|
||||
<li>radeonsi: also apply the GS hang workaround to draws without tessellation</li>
|
||||
<li>radeonsi: fix a u_blitter crash after a shader with FBFETCH</li>
|
||||
<li>radeonsi: fix rendering to tiny viewports where the viewport center is > 8K</li>
|
||||
<li>st/mesa: purge framebuffers when unbinding a context</li>
|
||||
</ul>
|
||||
|
||||
<p>Niklas Haas (1):</p>
|
||||
<ul>
|
||||
<li>radv: correctly use vulkan 1.0 by default</li>
|
||||
</ul>
|
||||
|
||||
<p>Pierre Moreau (1):</p>
|
||||
<ul>
|
||||
<li>meson: Fix with_gallium_icd to with_opencl_icd</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (1):</p>
|
||||
<ul>
|
||||
<li>loader: fix the no-modifiers case</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (1):</p>
|
||||
<ul>
|
||||
<li>radv: clean up setting partial_es_wave for distributed tess on VI</li>
|
||||
</ul>
|
||||
|
||||
<p>Timothy Arceri (5):</p>
|
||||
<ul>
|
||||
<li>ac/nir_to_llvm: fix interpolateAt* for arrays</li>
|
||||
<li>ac/nir_to_llvm: fix clamp shadow reference for more hardware</li>
|
||||
<li>radv/ac: fix some fp16 handling</li>
|
||||
<li>glsl: use remap location when serialising uniform program resource data</li>
|
||||
<li>glsl: Copy function out to temp if we don't directly ref a variable</li>
|
||||
</ul>
|
||||
|
||||
<p>Tomeu Vizoso (1):</p>
|
||||
<ul>
|
||||
<li>etnaviv: Consolidate buffer references from framebuffers</li>
|
||||
</ul>
|
||||
|
||||
<p>Vinson Lee (1):</p>
|
||||
<ul>
|
||||
<li>meson: Fix typo.</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
|
179
docs/relnotes/18.3.4.html
Normal file
179
docs/relnotes/18.3.4.html
Normal file
@@ -0,0 +1,179 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 18.3.4 Release Notes / February 18, 2019</h1>
|
||||
|
||||
<p>
|
||||
Mesa 18.3.4 is a bug fix release which fixes bugs found since the 18.3.3 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 18.3.4 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation.
|
||||
Compatibility contexts may report a lower version depending on each driver.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109107">Bug 109107</a> - gallium/st/va: change va max_profiles when using Radeon VCN Hardware</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109401">Bug 109401</a> - [DXVK] Project Cars rendering problems</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109543">Bug 109543</a> - After upgrade mesa to 19.0.0~rc1 all vulkan based application stop working ["vulkan-cube" received SIGSEGV in radv_pipeline_init_blend_state at ../src/amd/vulkan/radv_pipeline.c:699]</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109603">Bug 109603</a> - nir_instr_as_deref: Assertion `parent && parent->type == nir_instr_type_deref' failed.</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Bart Oldeman (1):</p>
|
||||
<ul>
|
||||
<li>gallium-xlib: query MIT-SHM before using it.</li>
|
||||
</ul>
|
||||
|
||||
<p>Bas Nieuwenhuizen (2):</p>
|
||||
<ul>
|
||||
<li>radv: Only look at pImmutableSamples if the descriptor has a sampler.</li>
|
||||
<li>amd/common: Use correct writemask for shared memory stores.</li>
|
||||
</ul>
|
||||
|
||||
<p>Dylan Baker (2):</p>
|
||||
<ul>
|
||||
<li>get-pick-list: Add --pretty=medium to the arguments for Cc patches</li>
|
||||
<li>meson: Add dependency on genxml to anvil</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (5):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 18.3.3</li>
|
||||
<li>cherry-ignore: nv50,nvc0: add explicit settings for recent caps</li>
|
||||
<li>cherry-ignore: add more 19.0 only nominations from Ilia</li>
|
||||
<li>cherry-ignore: radv: fix using LOAD_CONTEXT_REG with old GFX ME firmwares on GFX8</li>
|
||||
<li>Update version to 18.3.4</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (1):</p>
|
||||
<ul>
|
||||
<li>vc4: Fix copy-and-paste fail in backport of NEON asm fixes.</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (2):</p>
|
||||
<ul>
|
||||
<li>xvmc: fix string comparison</li>
|
||||
<li>xvmc: fix string comparison</li>
|
||||
</ul>
|
||||
|
||||
<p>Ernestas Kulik (2):</p>
|
||||
<ul>
|
||||
<li>vc4: Fix leak in HW queries error path</li>
|
||||
<li>v3d: Fix leak in resource setup error path</li>
|
||||
</ul>
|
||||
|
||||
<p>Iago Toral Quiroga (1):</p>
|
||||
<ul>
|
||||
<li>intel/compiler: do not copy-propagate strided regions to ddx/ddy arguments</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (1):</p>
|
||||
<ul>
|
||||
<li>nvc0: we have 16k-sized framebuffers, fix default scissors</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (3):</p>
|
||||
<ul>
|
||||
<li>intel/fs: Handle IMAGE_SIZE in size_read() and is_send_from_grf()</li>
|
||||
<li>intel/fs: Do the grf127 hack on SIMD8 instructions in SIMD16 mode</li>
|
||||
<li>nir/deref: Rematerialize parents in rematerialize_derefs_in_use_blocks</li>
|
||||
</ul>
|
||||
|
||||
<p>Juan A. Suarez Romero (1):</p>
|
||||
<ul>
|
||||
<li>anv/cmd_buffer: check for NULL framebuffer</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (1):</p>
|
||||
<ul>
|
||||
<li>st/mesa: Limit GL_MAX_[NATIVE_]PROGRAM_PARAMETERS_ARB to 2048</li>
|
||||
</ul>
|
||||
|
||||
<p>Kristian H. Kristensen (1):</p>
|
||||
<ul>
|
||||
<li>freedreno/a6xx: Emit blitter dst with OUT_RELOCW</li>
|
||||
</ul>
|
||||
|
||||
<p>Leo Liu (2):</p>
|
||||
<ul>
|
||||
<li>st/va: fix the incorrect max profiles report</li>
|
||||
<li>st/va/vp9: set max reference as default of VP9 reference number</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (4):</p>
|
||||
<ul>
|
||||
<li>meson: drop the xcb-xrandr version requirement</li>
|
||||
<li>gallium/u_threaded: fix EXPLICIT_FLUSH for flush offsets > 0</li>
|
||||
<li>radeonsi: fix EXPLICIT_FLUSH for flush offsets > 0</li>
|
||||
<li>winsys/amdgpu: don't drop manually added fence dependencies</li>
|
||||
</ul>
|
||||
|
||||
<p>Mario Kleiner (2):</p>
|
||||
<ul>
|
||||
<li>egl/wayland: Allow client->server format conversion for PRIME offload. (v2)</li>
|
||||
<li>egl/wayland-drm: Only announce formats via wl_drm which the driver supports.</li>
|
||||
</ul>
|
||||
|
||||
<p>Oscar Blumberg (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: Fix guardband computation for large render targets</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (1):</p>
|
||||
<ul>
|
||||
<li>freedreno: stop frob'ing pipe_resource::nr_samples</li>
|
||||
</ul>
|
||||
|
||||
<p>Rodrigo Vivi (1):</p>
|
||||
<ul>
|
||||
<li>intel: Add more PCI Device IDs for Coffee Lake and Ice Lake.</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (2):</p>
|
||||
<ul>
|
||||
<li>radv: fix compiler issues with GCC 9</li>
|
||||
<li>radv: always export gl_SampleMask when the fragment shader uses it</li>
|
||||
</ul>
|
||||
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -589,7 +589,7 @@ struct __DRIdamageExtensionRec {
|
||||
* SWRast Loader extension.
|
||||
*/
|
||||
#define __DRI_SWRAST_LOADER "DRI_SWRastLoader"
|
||||
#define __DRI_SWRAST_LOADER_VERSION 4
|
||||
#define __DRI_SWRAST_LOADER_VERSION 5
|
||||
struct __DRIswrastLoaderExtensionRec {
|
||||
__DRIextension base;
|
||||
|
||||
@@ -649,6 +649,23 @@ struct __DRIswrastLoaderExtensionRec {
|
||||
void (*getImageShm)(__DRIdrawable *readable,
|
||||
int x, int y, int width, int height,
|
||||
int shmid, void *loaderPrivate);
|
||||
|
||||
/**
|
||||
* Put shm image to drawable (v2)
|
||||
*
|
||||
* The original version fixes srcx/y to 0, and expected
|
||||
* the offset to be adjusted. This version allows src x,y
|
||||
* to not be included in the offset. This is needed to
|
||||
* avoid certain overflow checks in the X server, that
|
||||
* result in lost rendering.
|
||||
*
|
||||
* \since 5
|
||||
*/
|
||||
void (*putImageShm2)(__DRIdrawable *drawable, int op,
|
||||
int x, int y,
|
||||
int width, int height, int stride,
|
||||
int shmid, char *shmaddr, unsigned offset,
|
||||
void *loaderPrivate);
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -171,6 +171,7 @@ CHIPSET(0x3185, glk_2x6, "Intel(R) UHD Graphics 600 (Geminilake 2x6)")
|
||||
CHIPSET(0x3E90, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
|
||||
CHIPSET(0x3E93, cfl_gt1, "Intel(R) UHD Graphics 610 (Coffeelake 2x6 GT1)")
|
||||
CHIPSET(0x3E99, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
|
||||
CHIPSET(0x3E9C, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
|
||||
CHIPSET(0x3E91, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
|
||||
CHIPSET(0x3E92, cfl_gt2, "Intel(R) UHD Graphics 630 (Coffeelake 3x8 GT2)")
|
||||
CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
|
||||
@@ -203,6 +204,10 @@ CHIPSET(0x5A54, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
|
||||
CHIPSET(0x8A50, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
|
||||
CHIPSET(0x8A51, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
|
||||
CHIPSET(0x8A52, icl_8x8, "Intel(R) HD Graphics (Ice Lake 8x8 GT2)")
|
||||
CHIPSET(0x8A56, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
|
||||
CHIPSET(0x8A57, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
|
||||
CHIPSET(0x8A58, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
|
||||
CHIPSET(0x8A59, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
|
||||
CHIPSET(0x8A5A, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
|
||||
CHIPSET(0x8A5B, icl_4x8, "Intel(R) HD Graphics (Ice Lake 4x8 GT1)")
|
||||
CHIPSET(0x8A5C, icl_6x8, "Intel(R) HD Graphics (Ice Lake 6x8 GT1.5)")
|
||||
|
18
meson.build
18
meson.build
@@ -1,4 +1,4 @@
|
||||
# Copyright © 2017-2018 Intel Corporation
|
||||
# Copyright © 2017-2019 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -165,6 +165,14 @@ with_gallium_svga = _drivers.contains('svga')
|
||||
with_gallium_virgl = _drivers.contains('virgl')
|
||||
with_gallium_swr = _drivers.contains('swr')
|
||||
|
||||
if cc.get_id() == 'intel'
|
||||
if meson.version().version_compare('< 0.49.0')
|
||||
error('Meson does not have sufficient support of ICC before 0.49.0 to compile mesa')
|
||||
elif with_gallium_swr and meson.version().version_compare('== 0.49.0')
|
||||
warning('Meson as of 0.49.0 is sufficient for compiling mesa with ICC, but there are some caveats with SWR. 0.49.1 should resolve all of these')
|
||||
endif
|
||||
endif
|
||||
|
||||
with_gallium = _drivers.length() != 0 and _drivers != ['']
|
||||
|
||||
if with_gallium and system_has_kms_drm
|
||||
@@ -385,8 +393,8 @@ if with_any_vk and (with_platform_x11 and not with_dri3)
|
||||
error('Vulkan drivers require dri3 for X11 support')
|
||||
endif
|
||||
if with_dri
|
||||
if with_glx == 'disabled' and not with_egl and not with_gbm
|
||||
error('building dri drivers require at least one windowing system')
|
||||
if with_glx == 'disabled' and not with_egl and not with_gbm and with_osmesa != 'classic'
|
||||
error('building dri drivers require at least one windowing system or classic osmesa')
|
||||
endif
|
||||
endif
|
||||
|
||||
@@ -671,7 +679,7 @@ if _opencl != 'disabled'
|
||||
else
|
||||
dep_clc = null_dep
|
||||
with_gallium_opencl = false
|
||||
with_gallium_icd = false
|
||||
with_opencl_icd = false
|
||||
endif
|
||||
|
||||
gl_pkgconfig_c_flags = []
|
||||
@@ -1399,7 +1407,7 @@ if with_platform_x11
|
||||
dep_xcb_xfixes = dependency('xcb-xfixes')
|
||||
endif
|
||||
if with_xlib_lease
|
||||
dep_xcb_xrandr = dependency('xcb-randr', version : '>= 1.12')
|
||||
dep_xcb_xrandr = dependency('xcb-randr')
|
||||
dep_xlib_xrandr = dependency('xrandr', version : '>= 1.3')
|
||||
endif
|
||||
endif
|
||||
|
@@ -2072,7 +2072,7 @@ visit_store_var(struct ac_nir_context *ctx,
|
||||
int writemask = instr->const_index[0];
|
||||
LLVMValueRef address = get_src(ctx, instr->src[0]);
|
||||
LLVMValueRef val = get_src(ctx, instr->src[1]);
|
||||
if (util_is_power_of_two_nonzero(writemask)) {
|
||||
if (writemask == (1u << ac_get_llvm_num_components(val)) - 1) {
|
||||
val = LLVMBuildBitCast(
|
||||
ctx->ac.builder, val,
|
||||
LLVMGetElementType(LLVMTypeOf(address)), "");
|
||||
@@ -2802,15 +2802,16 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
|
||||
const nir_intrinsic_instr *instr)
|
||||
{
|
||||
LLVMValueRef result[4];
|
||||
LLVMValueRef interp_param, attr_number;
|
||||
LLVMValueRef interp_param;
|
||||
unsigned location;
|
||||
unsigned chan;
|
||||
LLVMValueRef src_c0 = NULL;
|
||||
LLVMValueRef src_c1 = NULL;
|
||||
LLVMValueRef src0 = NULL;
|
||||
|
||||
nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
|
||||
int input_index = ctx->abi->fs_input_attr_indices[var->data.location - VARYING_SLOT_VAR0];
|
||||
nir_deref_instr *deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref_instr);
|
||||
int input_base = ctx->abi->fs_input_attr_indices[var->data.location - VARYING_SLOT_VAR0];
|
||||
switch (instr->intrinsic) {
|
||||
case nir_intrinsic_interp_deref_at_centroid:
|
||||
location = INTERP_CENTROID;
|
||||
@@ -2840,7 +2841,6 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
|
||||
src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
|
||||
}
|
||||
interp_param = ctx->abi->lookup_interp_param(ctx->abi, var->data.interpolation, location);
|
||||
attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);
|
||||
|
||||
if (location == INTERP_CENTER) {
|
||||
LLVMValueRef ij_out[2];
|
||||
@@ -2878,26 +2878,65 @@ static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
|
||||
|
||||
}
|
||||
|
||||
LLVMValueRef array_idx = ctx->ac.i32_0;
|
||||
while(deref_instr->deref_type != nir_deref_type_var) {
|
||||
if (deref_instr->deref_type == nir_deref_type_array) {
|
||||
unsigned array_size = glsl_get_aoa_size(deref_instr->type);
|
||||
if (!array_size)
|
||||
array_size = 1;
|
||||
|
||||
LLVMValueRef offset;
|
||||
nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index);
|
||||
if (const_value) {
|
||||
offset = LLVMConstInt(ctx->ac.i32, array_size * const_value->u32[0], false);
|
||||
} else {
|
||||
LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
|
||||
|
||||
offset = LLVMBuildMul(ctx->ac.builder, indirect,
|
||||
LLVMConstInt(ctx->ac.i32, array_size, false), "");
|
||||
}
|
||||
|
||||
array_idx = LLVMBuildAdd(ctx->ac.builder, array_idx, offset, "");
|
||||
deref_instr = nir_src_as_deref(deref_instr->parent);
|
||||
} else {
|
||||
unreachable("Unsupported deref type");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
unsigned input_array_size = glsl_get_aoa_size(var->type);
|
||||
if (!input_array_size)
|
||||
input_array_size = 1;
|
||||
|
||||
for (chan = 0; chan < 4; chan++) {
|
||||
LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, input_array_size));
|
||||
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
|
||||
|
||||
if (interp_param) {
|
||||
interp_param = LLVMBuildBitCast(ctx->ac.builder,
|
||||
interp_param, ctx->ac.v2f32, "");
|
||||
LLVMValueRef i = LLVMBuildExtractElement(
|
||||
ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
|
||||
LLVMValueRef j = LLVMBuildExtractElement(
|
||||
ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
|
||||
for (unsigned idx = 0; idx < input_array_size; ++idx) {
|
||||
LLVMValueRef v, attr_number;
|
||||
|
||||
result[chan] = ac_build_fs_interp(&ctx->ac,
|
||||
llvm_chan, attr_number,
|
||||
ctx->abi->prim_mask, i, j);
|
||||
} else {
|
||||
result[chan] = ac_build_fs_interp_mov(&ctx->ac,
|
||||
LLVMConstInt(ctx->ac.i32, 2, false),
|
||||
llvm_chan, attr_number,
|
||||
ctx->abi->prim_mask);
|
||||
attr_number = LLVMConstInt(ctx->ac.i32, input_base + idx, false);
|
||||
if (interp_param) {
|
||||
interp_param = LLVMBuildBitCast(ctx->ac.builder,
|
||||
interp_param, ctx->ac.v2f32, "");
|
||||
LLVMValueRef i = LLVMBuildExtractElement(
|
||||
ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
|
||||
LLVMValueRef j = LLVMBuildExtractElement(
|
||||
ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
|
||||
|
||||
v = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
|
||||
ctx->abi->prim_mask, i, j);
|
||||
} else {
|
||||
v = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false),
|
||||
llvm_chan, attr_number, ctx->abi->prim_mask);
|
||||
}
|
||||
|
||||
gather = LLVMBuildInsertElement(ctx->ac.builder, gather, v,
|
||||
LLVMConstInt(ctx->ac.i32, idx, false), "");
|
||||
}
|
||||
|
||||
result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, array_idx, "");
|
||||
|
||||
}
|
||||
return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
|
||||
var->data.location_frac);
|
||||
@@ -3460,7 +3499,7 @@ static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
|
||||
* It's unnecessary if the original texture format was
|
||||
* Z32_FLOAT, but we don't know that here.
|
||||
*/
|
||||
if (args.compare && ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
|
||||
if (args.compare && ctx->ac.chip_class >= VI && ctx->abi->clamp_shadow_reference)
|
||||
args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));
|
||||
|
||||
/* pack derivatives */
|
||||
@@ -3851,7 +3890,7 @@ ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
bool is_16bit = glsl_type_is_16bit(variable->type);
|
||||
bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
|
||||
LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
|
||||
for (unsigned i = 0; i < attrib_count; ++i) {
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
|
@@ -84,7 +84,9 @@ VkResult radv_CreateDescriptorSetLayout(
|
||||
uint32_t immutable_sampler_count = 0;
|
||||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
|
||||
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
|
||||
if (pCreateInfo->pBindings[j].pImmutableSamplers)
|
||||
if ((pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
|
||||
pCreateInfo->pBindings[j].descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
|
||||
pCreateInfo->pBindings[j].pImmutableSamplers)
|
||||
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
|
||||
}
|
||||
|
||||
@@ -182,7 +184,9 @@ VkResult radv_CreateDescriptorSetLayout(
|
||||
set_layout->has_variable_descriptors = true;
|
||||
}
|
||||
|
||||
if (binding->pImmutableSamplers) {
|
||||
if ((binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ||
|
||||
binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER) &&
|
||||
binding->pImmutableSamplers) {
|
||||
set_layout->binding[b].immutable_samplers_offset = samplers_offset;
|
||||
set_layout->binding[b].immutable_samplers_equal =
|
||||
has_equal_immutable_samplers(binding->pImmutableSamplers, binding->descriptorCount);
|
||||
|
@@ -525,7 +525,7 @@ VkResult radv_CreateInstance(
|
||||
pCreateInfo->pApplicationInfo->apiVersion != 0) {
|
||||
client_version = pCreateInfo->pApplicationInfo->apiVersion;
|
||||
} else {
|
||||
radv_EnumerateInstanceVersion(&client_version);
|
||||
client_version = VK_API_VERSION_1_0;
|
||||
}
|
||||
|
||||
instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
|
||||
|
@@ -849,54 +849,60 @@ build_pipeline(struct radv_device *device,
|
||||
.subpass = 0,
|
||||
};
|
||||
|
||||
switch(aspect) {
|
||||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||||
vk_pipeline_info.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
|
||||
{ .colorWriteMask =
|
||||
VK_COLOR_COMPONENT_A_BIT |
|
||||
VK_COLOR_COMPONENT_R_BIT |
|
||||
VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT },
|
||||
VkPipelineColorBlendStateCreateInfo color_blend_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
|
||||
.attachmentCount = 1,
|
||||
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
|
||||
{
|
||||
.colorWriteMask = VK_COLOR_COMPONENT_A_BIT |
|
||||
VK_COLOR_COMPONENT_R_BIT |
|
||||
VK_COLOR_COMPONENT_G_BIT |
|
||||
VK_COLOR_COMPONENT_B_BIT },
|
||||
}
|
||||
};
|
||||
|
||||
VkPipelineDepthStencilStateCreateInfo depth_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.depthTestEnable = true,
|
||||
.depthWriteEnable = true,
|
||||
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
|
||||
};
|
||||
|
||||
VkPipelineDepthStencilStateCreateInfo stencil_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.depthTestEnable = false,
|
||||
.depthWriteEnable = false,
|
||||
.stencilTestEnable = true,
|
||||
.front = {
|
||||
.failOp = VK_STENCIL_OP_REPLACE,
|
||||
.passOp = VK_STENCIL_OP_REPLACE,
|
||||
.depthFailOp = VK_STENCIL_OP_REPLACE,
|
||||
.compareOp = VK_COMPARE_OP_ALWAYS,
|
||||
.compareMask = 0xff,
|
||||
.writeMask = 0xff,
|
||||
.reference = 0
|
||||
},
|
||||
.back = {
|
||||
.failOp = VK_STENCIL_OP_REPLACE,
|
||||
.passOp = VK_STENCIL_OP_REPLACE,
|
||||
.depthFailOp = VK_STENCIL_OP_REPLACE,
|
||||
.compareOp = VK_COMPARE_OP_ALWAYS,
|
||||
.compareMask = 0xff,
|
||||
.writeMask = 0xff,
|
||||
.reference = 0
|
||||
},
|
||||
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
|
||||
};
|
||||
|
||||
switch(aspect) {
|
||||
case VK_IMAGE_ASPECT_COLOR_BIT:
|
||||
vk_pipeline_info.pColorBlendState = &color_blend_info;
|
||||
break;
|
||||
case VK_IMAGE_ASPECT_DEPTH_BIT:
|
||||
vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.depthTestEnable = true,
|
||||
.depthWriteEnable = true,
|
||||
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
|
||||
};
|
||||
vk_pipeline_info.pDepthStencilState = &depth_info;
|
||||
break;
|
||||
case VK_IMAGE_ASPECT_STENCIL_BIT:
|
||||
vk_pipeline_info.pDepthStencilState = &(VkPipelineDepthStencilStateCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
|
||||
.depthTestEnable = false,
|
||||
.depthWriteEnable = false,
|
||||
.stencilTestEnable = true,
|
||||
.front = {
|
||||
.failOp = VK_STENCIL_OP_REPLACE,
|
||||
.passOp = VK_STENCIL_OP_REPLACE,
|
||||
.depthFailOp = VK_STENCIL_OP_REPLACE,
|
||||
.compareOp = VK_COMPARE_OP_ALWAYS,
|
||||
.compareMask = 0xff,
|
||||
.writeMask = 0xff,
|
||||
.reference = 0
|
||||
},
|
||||
.back = {
|
||||
.failOp = VK_STENCIL_OP_REPLACE,
|
||||
.passOp = VK_STENCIL_OP_REPLACE,
|
||||
.depthFailOp = VK_STENCIL_OP_REPLACE,
|
||||
.compareOp = VK_COMPARE_OP_ALWAYS,
|
||||
.compareMask = 0xff,
|
||||
.writeMask = 0xff,
|
||||
.reference = 0
|
||||
},
|
||||
.depthCompareOp = VK_COMPARE_OP_ALWAYS,
|
||||
};
|
||||
vk_pipeline_info.pDepthStencilState = &stencil_info;
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled aspect");
|
||||
|
@@ -256,7 +256,16 @@ get_tcs_num_patches(struct radv_shader_context *ctx)
|
||||
/* Make sure that the data fits in LDS. This assumes the shaders only
|
||||
* use LDS for the inputs and outputs.
|
||||
*/
|
||||
hardware_lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
|
||||
hardware_lds_size = 32768;
|
||||
|
||||
/* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
|
||||
* threadgroup, even though there is more than 32 KiB LDS.
|
||||
*
|
||||
* Test: dEQP-VK.tessellation.shader_input_output.barrier
|
||||
*/
|
||||
if (ctx->options->chip_class >= CIK && ctx->options->family != CHIP_STONEY)
|
||||
hardware_lds_size = 65536;
|
||||
|
||||
num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
|
||||
/* Make sure the output data fits in the offchip buffer */
|
||||
num_patches = MIN2(num_patches, (ctx->options->tess_offchip_block_dw_size * 4) / output_patch_size);
|
||||
@@ -2160,7 +2169,7 @@ handle_fs_input_decl(struct radv_shader_context *ctx,
|
||||
|
||||
interp = lookup_interp_param(&ctx->abi, variable->data.interpolation, interp_type);
|
||||
}
|
||||
bool is_16bit = glsl_type_is_16bit(variable->type);
|
||||
bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
|
||||
LLVMTypeRef type = is_16bit ? ctx->ac.i16 : ctx->ac.i32;
|
||||
if (interp == NULL)
|
||||
interp = LLVMGetUndef(type);
|
||||
|
@@ -3179,11 +3179,11 @@ radv_compute_db_shader_control(const struct radv_device *device,
|
||||
bool disable_rbplus = device->physical_device->has_rbplus &&
|
||||
!device->physical_device->rbplus_allowed;
|
||||
|
||||
/* Do not enable the gl_SampleMask fragment shader output if MSAA is
|
||||
* disabled.
|
||||
/* It shouldn't be needed to export gl_SampleMask when MSAA is disabled
|
||||
* but this appears to break Project Cars (DXVK). See
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=109401
|
||||
*/
|
||||
bool mask_export_enable = ms->num_samples > 1 &&
|
||||
ps->info.info.ps.writes_sample_mask;
|
||||
bool mask_export_enable = ps->info.info.ps.writes_sample_mask;
|
||||
|
||||
return S_02880C_Z_EXPORT_ENABLE(ps->info.info.ps.writes_z) |
|
||||
S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.info.ps.writes_stencil) |
|
||||
@@ -3371,14 +3371,8 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
|
||||
else
|
||||
ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */
|
||||
|
||||
ia_multi_vgt_param.partial_es_wave = false;
|
||||
if (pipeline->device->has_distributed_tess) {
|
||||
if (radv_pipeline_has_gs(pipeline)) {
|
||||
if (device->physical_device->rad_info.chip_class <= VI)
|
||||
ia_multi_vgt_param.partial_es_wave = true;
|
||||
}
|
||||
}
|
||||
/* GS requirement. */
|
||||
ia_multi_vgt_param.partial_es_wave = false;
|
||||
if (radv_pipeline_has_gs(pipeline) && device->physical_device->rad_info.chip_class <= VI)
|
||||
if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pipeline->device->gs_table_depth - 3)
|
||||
ia_multi_vgt_param.partial_es_wave = true;
|
||||
@@ -3424,13 +3418,8 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
|
||||
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
|
||||
if (device->has_distributed_tess) {
|
||||
if (radv_pipeline_has_gs(pipeline)) {
|
||||
if (device->physical_device->rad_info.family == CHIP_TONGA ||
|
||||
device->physical_device->rad_info.family == CHIP_FIJI ||
|
||||
device->physical_device->rad_info.family == CHIP_POLARIS10 ||
|
||||
device->physical_device->rad_info.family == CHIP_POLARIS11 ||
|
||||
device->physical_device->rad_info.family == CHIP_POLARIS12 ||
|
||||
device->physical_device->rad_info.family == CHIP_VEGAM)
|
||||
ia_multi_vgt_param.partial_vs_wave = true;
|
||||
if (device->physical_device->rad_info.chip_class <= VI)
|
||||
ia_multi_vgt_param.partial_es_wave = true;
|
||||
} else {
|
||||
ia_multi_vgt_param.partial_vs_wave = true;
|
||||
}
|
||||
@@ -3448,6 +3437,26 @@ radv_compute_ia_multi_vgt_param_helpers(struct radv_pipeline *pipeline,
|
||||
ia_multi_vgt_param.partial_vs_wave = true;
|
||||
}
|
||||
|
||||
if (radv_pipeline_has_gs(pipeline)) {
|
||||
/* On these chips there is the possibility of a hang if the
|
||||
* pipeline uses a GS and partial_vs_wave is not set.
|
||||
*
|
||||
* This mostly does not hit 4-SE chips, as those typically set
|
||||
* ia_switch_on_eoi and then partial_vs_wave is set for pipelines
|
||||
* with GS due to another workaround.
|
||||
*
|
||||
* Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242
|
||||
*/
|
||||
if (device->physical_device->rad_info.family == CHIP_TONGA ||
|
||||
device->physical_device->rad_info.family == CHIP_FIJI ||
|
||||
device->physical_device->rad_info.family == CHIP_POLARIS10 ||
|
||||
device->physical_device->rad_info.family == CHIP_POLARIS11 ||
|
||||
device->physical_device->rad_info.family == CHIP_POLARIS12 ||
|
||||
device->physical_device->rad_info.family == CHIP_VEGAM) {
|
||||
ia_multi_vgt_param.partial_vs_wave = true;
|
||||
}
|
||||
}
|
||||
|
||||
ia_multi_vgt_param.base =
|
||||
S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) |
|
||||
/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
|
||||
|
@@ -363,31 +363,29 @@ copy_index_derefs_to_temps(ir_instruction *ir, void *data)
|
||||
ir = a->array->as_dereference();
|
||||
|
||||
ir_rvalue *idx = a->array_index;
|
||||
if (idx->as_dereference_variable()) {
|
||||
ir_variable *var = idx->variable_referenced();
|
||||
ir_variable *var = idx->variable_referenced();
|
||||
|
||||
/* If the index is read only it cannot change so there is no need
|
||||
* to copy it.
|
||||
*/
|
||||
if (var->data.read_only || var->data.memory_read_only)
|
||||
return;
|
||||
/* If the index is read only it cannot change so there is no need
|
||||
* to copy it.
|
||||
*/
|
||||
if (!var || var->data.read_only || var->data.memory_read_only)
|
||||
return;
|
||||
|
||||
ir_variable *tmp = new(d->mem_ctx) ir_variable(idx->type, "idx_tmp",
|
||||
ir_var_temporary);
|
||||
d->before_instructions->push_tail(tmp);
|
||||
ir_variable *tmp = new(d->mem_ctx) ir_variable(idx->type, "idx_tmp",
|
||||
ir_var_temporary);
|
||||
d->before_instructions->push_tail(tmp);
|
||||
|
||||
ir_dereference_variable *const deref_tmp_1 =
|
||||
new(d->mem_ctx) ir_dereference_variable(tmp);
|
||||
ir_assignment *const assignment =
|
||||
new(d->mem_ctx) ir_assignment(deref_tmp_1,
|
||||
idx->clone(d->mem_ctx, NULL));
|
||||
d->before_instructions->push_tail(assignment);
|
||||
ir_dereference_variable *const deref_tmp_1 =
|
||||
new(d->mem_ctx) ir_dereference_variable(tmp);
|
||||
ir_assignment *const assignment =
|
||||
new(d->mem_ctx) ir_assignment(deref_tmp_1,
|
||||
idx->clone(d->mem_ctx, NULL));
|
||||
d->before_instructions->push_tail(assignment);
|
||||
|
||||
/* Replace the array index with a dereference of the new temporary */
|
||||
ir_dereference_variable *const deref_tmp_2 =
|
||||
new(d->mem_ctx) ir_dereference_variable(tmp);
|
||||
a->array_index = deref_tmp_2;
|
||||
}
|
||||
/* Replace the array index with a dereference of the new temporary */
|
||||
ir_dereference_variable *const deref_tmp_2 =
|
||||
new(d->mem_ctx) ir_dereference_variable(tmp);
|
||||
a->array_index = deref_tmp_2;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -402,7 +400,8 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
|
||||
* nothing needs to be done to fix the parameter.
|
||||
*/
|
||||
if (formal_type == actual->type
|
||||
&& (expr == NULL || expr->operation != ir_binop_vector_extract))
|
||||
&& (expr == NULL || expr->operation != ir_binop_vector_extract)
|
||||
&& actual->as_dereference_variable())
|
||||
return;
|
||||
|
||||
/* An array index could also be an out variable so we need to make a copy
|
||||
@@ -456,7 +455,7 @@ fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type,
|
||||
ir_dereference_variable *const deref_tmp_1 =
|
||||
new(mem_ctx) ir_dereference_variable(tmp);
|
||||
ir_assignment *const assignment =
|
||||
new(mem_ctx) ir_assignment(deref_tmp_1, actual);
|
||||
new(mem_ctx) ir_assignment(deref_tmp_1, actual->clone(mem_ctx, NULL));
|
||||
before_instructions->push_tail(assignment);
|
||||
}
|
||||
|
||||
|
@@ -101,6 +101,10 @@ output_read_remover::visit(ir_dereference_variable *ir)
|
||||
void *var_ctx = ralloc_parent(ir->var);
|
||||
temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name,
|
||||
ir_var_temporary);
|
||||
/* copy flags which affect arithematical precision */
|
||||
temp->data.invariant = ir->var->data.invariant;
|
||||
temp->data.precise = ir->var->data.precise;
|
||||
temp->data.precision = ir->var->data.precision;
|
||||
_mesa_hash_table_insert(replacements, ir->var, temp);
|
||||
ir->var->insert_after(temp);
|
||||
}
|
||||
|
@@ -764,6 +764,12 @@ get_shader_var_and_pointer_sizes(size_t *s_var_size, size_t *s_var_ptrs,
|
||||
sizeof(var->name);
|
||||
}
|
||||
|
||||
enum uniform_type
|
||||
{
|
||||
uniform_remapped,
|
||||
uniform_not_remapped
|
||||
};
|
||||
|
||||
static void
|
||||
write_program_resource_data(struct blob *metadata,
|
||||
struct gl_shader_program *prog,
|
||||
@@ -816,12 +822,19 @@ write_program_resource_data(struct blob *metadata,
|
||||
case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
|
||||
case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
|
||||
case GL_UNIFORM:
|
||||
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
|
||||
if (strcmp(((gl_uniform_storage *)res->Data)->name,
|
||||
prog->data->UniformStorage[i].name) == 0) {
|
||||
blob_write_uint32(metadata, i);
|
||||
break;
|
||||
if (((gl_uniform_storage *)res->Data)->builtin ||
|
||||
res->Type != GL_UNIFORM) {
|
||||
blob_write_uint32(metadata, uniform_not_remapped);
|
||||
for (unsigned i = 0; i < prog->data->NumUniformStorage; i++) {
|
||||
if (strcmp(((gl_uniform_storage *)res->Data)->name,
|
||||
prog->data->UniformStorage[i].name) == 0) {
|
||||
blob_write_uint32(metadata, i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
blob_write_uint32(metadata, uniform_remapped);
|
||||
blob_write_uint32(metadata, ((gl_uniform_storage *)res->Data)->remap_location);
|
||||
}
|
||||
break;
|
||||
case GL_ATOMIC_COUNTER_BUFFER:
|
||||
@@ -906,9 +919,15 @@ read_program_resource_data(struct blob_reader *metadata,
|
||||
case GL_COMPUTE_SUBROUTINE_UNIFORM:
|
||||
case GL_TESS_CONTROL_SUBROUTINE_UNIFORM:
|
||||
case GL_TESS_EVALUATION_SUBROUTINE_UNIFORM:
|
||||
case GL_UNIFORM:
|
||||
res->Data = &prog->data->UniformStorage[blob_read_uint32(metadata)];
|
||||
case GL_UNIFORM: {
|
||||
enum uniform_type type = (enum uniform_type) blob_read_uint32(metadata);
|
||||
if (type == uniform_not_remapped) {
|
||||
res->Data = &prog->data->UniformStorage[blob_read_uint32(metadata)];
|
||||
} else {
|
||||
res->Data = prog->UniformRemapTable[blob_read_uint32(metadata)];
|
||||
}
|
||||
break;
|
||||
}
|
||||
case GL_ATOMIC_COUNTER_BUFFER:
|
||||
res->Data = &prog->data->AtomicBuffers[blob_read_uint32(metadata)];
|
||||
break;
|
||||
|
@@ -490,10 +490,9 @@ nir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
|
||||
_mesa_hash_table_clear(state.cache, NULL);
|
||||
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
if (instr->type == nir_instr_type_deref) {
|
||||
nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr));
|
||||
if (instr->type == nir_instr_type_deref &&
|
||||
nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
|
||||
continue;
|
||||
}
|
||||
|
||||
state.builder.cursor = nir_before_instr(instr);
|
||||
nir_foreach_src(instr, rematerialize_deref_src, &state);
|
||||
|
@@ -76,13 +76,13 @@ add_var_xfb_outputs(nir_xfb_info *xfb,
|
||||
nir_xfb_output_info *output = &xfb->outputs[xfb->output_count++];
|
||||
|
||||
output->buffer = var->data.xfb_buffer;
|
||||
output->offset = *offset;
|
||||
output->offset = *offset + s * 16;
|
||||
output->location = *location;
|
||||
output->component_mask = (comp_mask >> (s * 4)) & 0xf;
|
||||
|
||||
(*location)++;
|
||||
*offset += comp_slots * 4;
|
||||
}
|
||||
*offset += comp_slots * 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -143,9 +143,19 @@ gather_vars_written(struct copy_prop_var_state *state,
|
||||
written->modes = nir_var_shader_out;
|
||||
break;
|
||||
|
||||
case nir_intrinsic_deref_atomic_add:
|
||||
case nir_intrinsic_deref_atomic_imin:
|
||||
case nir_intrinsic_deref_atomic_umin:
|
||||
case nir_intrinsic_deref_atomic_imax:
|
||||
case nir_intrinsic_deref_atomic_umax:
|
||||
case nir_intrinsic_deref_atomic_and:
|
||||
case nir_intrinsic_deref_atomic_or:
|
||||
case nir_intrinsic_deref_atomic_xor:
|
||||
case nir_intrinsic_deref_atomic_exchange:
|
||||
case nir_intrinsic_deref_atomic_comp_swap:
|
||||
case nir_intrinsic_store_deref:
|
||||
case nir_intrinsic_copy_deref: {
|
||||
/* Destination in _both_ store_deref and copy_deref is src[0]. */
|
||||
/* Destination in all of store_deref, copy_deref and the atomics is src[0]. */
|
||||
nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
|
||||
|
||||
uintptr_t mask = intrin->intrinsic == nir_intrinsic_store_deref ?
|
||||
@@ -750,6 +760,19 @@ copy_prop_vars_block(struct copy_prop_var_state *state,
|
||||
break;
|
||||
}
|
||||
|
||||
case nir_intrinsic_deref_atomic_add:
|
||||
case nir_intrinsic_deref_atomic_imin:
|
||||
case nir_intrinsic_deref_atomic_umin:
|
||||
case nir_intrinsic_deref_atomic_imax:
|
||||
case nir_intrinsic_deref_atomic_umax:
|
||||
case nir_intrinsic_deref_atomic_and:
|
||||
case nir_intrinsic_deref_atomic_or:
|
||||
case nir_intrinsic_deref_atomic_xor:
|
||||
case nir_intrinsic_deref_atomic_exchange:
|
||||
case nir_intrinsic_deref_atomic_comp_swap:
|
||||
kill_aliases(copies, nir_src_as_deref(intrin->src[0]), 0xf);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@@ -2819,7 +2819,8 @@ dri2_bind_wayland_display_wl(_EGLDriver *drv, _EGLDisplay *disp,
|
||||
const struct wayland_drm_callbacks wl_drm_callbacks = {
|
||||
.authenticate = (int(*)(void *, uint32_t)) dri2_dpy->vtbl->authenticate,
|
||||
.reference_buffer = dri2_wl_reference_buffer,
|
||||
.release_buffer = dri2_wl_release_buffer
|
||||
.release_buffer = dri2_wl_release_buffer,
|
||||
.is_format_supported = dri2_wl_is_format_supported
|
||||
};
|
||||
int flags = 0;
|
||||
uint64_t cap;
|
||||
|
@@ -457,6 +457,8 @@ EGLBoolean
|
||||
dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp);
|
||||
void
|
||||
dri2_teardown_wayland(struct dri2_egl_display *dri2_dpy);
|
||||
bool
|
||||
dri2_wl_is_format_supported(void* user_data, uint32_t format);
|
||||
#else
|
||||
static inline EGLBoolean
|
||||
dri2_initialize_wayland(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
|
@@ -59,49 +59,57 @@ static const struct dri2_wl_visual {
|
||||
uint32_t wl_drm_format;
|
||||
uint32_t wl_shm_format;
|
||||
int dri_image_format;
|
||||
/* alt_dri_image_format is a substitute wl_buffer format to use for a
|
||||
* wl-server unsupported dri_image_format, ie. some other dri_image_format in
|
||||
* the table, of the same precision but with different channel ordering, or
|
||||
* __DRI_IMAGE_FORMAT_NONE if an alternate format is not needed or supported.
|
||||
* The code checks if alt_dri_image_format can be used as a fallback for a
|
||||
* dri_image_format for a given wl-server implementation.
|
||||
*/
|
||||
int alt_dri_image_format;
|
||||
int bpp;
|
||||
unsigned int rgba_masks[4];
|
||||
} dri2_wl_visuals[] = {
|
||||
{
|
||||
"XRGB2101010",
|
||||
WL_DRM_FORMAT_XRGB2101010, WL_SHM_FORMAT_XRGB2101010,
|
||||
__DRI_IMAGE_FORMAT_XRGB2101010, 32,
|
||||
__DRI_IMAGE_FORMAT_XRGB2101010, __DRI_IMAGE_FORMAT_XBGR2101010, 32,
|
||||
{ 0x3ff00000, 0x000ffc00, 0x000003ff, 0x00000000 }
|
||||
},
|
||||
{
|
||||
"ARGB2101010",
|
||||
WL_DRM_FORMAT_ARGB2101010, WL_SHM_FORMAT_ARGB2101010,
|
||||
__DRI_IMAGE_FORMAT_ARGB2101010, 32,
|
||||
__DRI_IMAGE_FORMAT_ARGB2101010, __DRI_IMAGE_FORMAT_ABGR2101010, 32,
|
||||
{ 0x3ff00000, 0x000ffc00, 0x000003ff, 0xc0000000 }
|
||||
},
|
||||
{
|
||||
"XBGR2101010",
|
||||
WL_DRM_FORMAT_XBGR2101010, WL_SHM_FORMAT_XBGR2101010,
|
||||
__DRI_IMAGE_FORMAT_XBGR2101010, 32,
|
||||
__DRI_IMAGE_FORMAT_XBGR2101010, __DRI_IMAGE_FORMAT_XRGB2101010, 32,
|
||||
{ 0x000003ff, 0x000ffc00, 0x3ff00000, 0x00000000 }
|
||||
},
|
||||
{
|
||||
"ABGR2101010",
|
||||
WL_DRM_FORMAT_ABGR2101010, WL_SHM_FORMAT_ABGR2101010,
|
||||
__DRI_IMAGE_FORMAT_ABGR2101010, 32,
|
||||
__DRI_IMAGE_FORMAT_ABGR2101010, __DRI_IMAGE_FORMAT_ARGB2101010, 32,
|
||||
{ 0x000003ff, 0x000ffc00, 0x3ff00000, 0xc0000000 }
|
||||
},
|
||||
{
|
||||
"XRGB8888",
|
||||
WL_DRM_FORMAT_XRGB8888, WL_SHM_FORMAT_XRGB8888,
|
||||
__DRI_IMAGE_FORMAT_XRGB8888, 32,
|
||||
__DRI_IMAGE_FORMAT_XRGB8888, __DRI_IMAGE_FORMAT_NONE, 32,
|
||||
{ 0x00ff0000, 0x0000ff00, 0x000000ff, 0x00000000 }
|
||||
},
|
||||
{
|
||||
"ARGB8888",
|
||||
WL_DRM_FORMAT_ARGB8888, WL_SHM_FORMAT_ARGB8888,
|
||||
__DRI_IMAGE_FORMAT_ARGB8888, 32,
|
||||
__DRI_IMAGE_FORMAT_ARGB8888, __DRI_IMAGE_FORMAT_NONE, 32,
|
||||
{ 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 }
|
||||
},
|
||||
{
|
||||
"RGB565",
|
||||
WL_DRM_FORMAT_RGB565, WL_SHM_FORMAT_RGB565,
|
||||
__DRI_IMAGE_FORMAT_RGB565, 16,
|
||||
__DRI_IMAGE_FORMAT_RGB565, __DRI_IMAGE_FORMAT_NONE, 16,
|
||||
{ 0xf800, 0x07e0, 0x001f, 0x0000 }
|
||||
},
|
||||
};
|
||||
@@ -166,6 +174,24 @@ dri2_wl_visual_idx_from_shm_format(uint32_t shm_format)
|
||||
return -1;
|
||||
}
|
||||
|
||||
bool
|
||||
dri2_wl_is_format_supported(void* user_data, uint32_t format)
|
||||
{
|
||||
_EGLDisplay *disp = (_EGLDisplay *) user_data;
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
|
||||
int j = dri2_wl_visual_idx_from_fourcc(format);
|
||||
|
||||
if (j == -1)
|
||||
return false;
|
||||
|
||||
for (int i = 0; dri2_dpy->driver_configs[i]; i++)
|
||||
if (j == dri2_wl_visual_idx_from_config(dri2_dpy,
|
||||
dri2_dpy->driver_configs[i]))
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
roundtrip(struct dri2_egl_display *dri2_dpy)
|
||||
{
|
||||
@@ -461,15 +487,29 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
|
||||
int use_flags;
|
||||
int visual_idx;
|
||||
unsigned int dri_image_format;
|
||||
unsigned int linear_dri_image_format;
|
||||
uint64_t *modifiers;
|
||||
int num_modifiers;
|
||||
|
||||
visual_idx = dri2_wl_visual_idx_from_fourcc(dri2_surf->format);
|
||||
assert(visual_idx != -1);
|
||||
dri_image_format = dri2_wl_visuals[visual_idx].dri_image_format;
|
||||
linear_dri_image_format = dri_image_format;
|
||||
modifiers = u_vector_tail(&dri2_dpy->wl_modifiers[visual_idx]);
|
||||
num_modifiers = u_vector_length(&dri2_dpy->wl_modifiers[visual_idx]);
|
||||
|
||||
/* Substitute dri image format if server does not support original format */
|
||||
if (!(dri2_dpy->formats & (1 << visual_idx)))
|
||||
linear_dri_image_format = dri2_wl_visuals[visual_idx].alt_dri_image_format;
|
||||
|
||||
/* These asserts hold, as long as dri2_wl_visuals[] is self-consistent and
|
||||
* the PRIME substitution logic in dri2_wl_add_configs_for_visuals() is free
|
||||
* of bugs.
|
||||
*/
|
||||
assert(linear_dri_image_format != __DRI_IMAGE_FORMAT_NONE);
|
||||
assert(dri2_dpy->formats &
|
||||
(1 << dri2_wl_visual_idx_from_dri_image_format(linear_dri_image_format)));
|
||||
|
||||
/* There might be a buffer release already queued that wasn't processed */
|
||||
wl_display_dispatch_queue_pending(dri2_dpy->wl_dpy, dri2_surf->wl_queue);
|
||||
|
||||
@@ -516,7 +556,7 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
|
||||
dri2_dpy->image->createImageWithModifiers(dri2_dpy->dri_screen,
|
||||
dri2_surf->base.Width,
|
||||
dri2_surf->base.Height,
|
||||
dri_image_format,
|
||||
linear_dri_image_format,
|
||||
&linear_mod,
|
||||
1,
|
||||
NULL);
|
||||
@@ -525,7 +565,7 @@ get_back_bo(struct dri2_egl_surface *dri2_surf)
|
||||
dri2_dpy->image->createImage(dri2_dpy->dri_screen,
|
||||
dri2_surf->base.Width,
|
||||
dri2_surf->base.Height,
|
||||
dri_image_format,
|
||||
linear_dri_image_format,
|
||||
use_flags |
|
||||
__DRI_IMAGE_USE_LINEAR,
|
||||
NULL);
|
||||
@@ -1298,8 +1338,11 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
|
||||
unsigned int format_count[ARRAY_SIZE(dri2_wl_visuals)] = { 0 };
|
||||
unsigned int count = 0;
|
||||
bool assigned;
|
||||
|
||||
for (unsigned i = 0; dri2_dpy->driver_configs[i]; i++) {
|
||||
assigned = false;
|
||||
|
||||
for (unsigned j = 0; j < ARRAY_SIZE(dri2_wl_visuals); j++) {
|
||||
struct dri2_egl_config *dri2_conf;
|
||||
|
||||
@@ -1312,6 +1355,43 @@ dri2_wl_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
format_count[j]++;
|
||||
assigned = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!assigned && dri2_dpy->is_different_gpu) {
|
||||
struct dri2_egl_config *dri2_conf;
|
||||
int alt_dri_image_format, c, s;
|
||||
|
||||
/* No match for config. Try if we can blitImage convert to a visual */
|
||||
c = dri2_wl_visual_idx_from_config(dri2_dpy,
|
||||
dri2_dpy->driver_configs[i]);
|
||||
|
||||
if (c == -1)
|
||||
continue;
|
||||
|
||||
/* Find optimal target visual for blitImage conversion, if any. */
|
||||
alt_dri_image_format = dri2_wl_visuals[c].alt_dri_image_format;
|
||||
s = dri2_wl_visual_idx_from_dri_image_format(alt_dri_image_format);
|
||||
|
||||
if (s == -1 || !(dri2_dpy->formats & (1 << s)))
|
||||
continue;
|
||||
|
||||
/* Visual s works for the Wayland server, and c can be converted into s
|
||||
* by our client gpu during PRIME blitImage conversion to a linear
|
||||
* wl_buffer, so add visual c as supported by the client renderer.
|
||||
*/
|
||||
dri2_conf = dri2_add_config(disp, dri2_dpy->driver_configs[i],
|
||||
count + 1, EGL_WINDOW_BIT, NULL,
|
||||
dri2_wl_visuals[c].rgba_masks);
|
||||
if (dri2_conf) {
|
||||
if (dri2_conf->base.ConfigID == count + 1)
|
||||
count++;
|
||||
format_count[c]++;
|
||||
if (format_count[c] == 1)
|
||||
_eglLog(_EGL_DEBUG, "Client format %s to server format %s via "
|
||||
"PRIME blitImage.", dri2_wl_visuals[c].format_name,
|
||||
dri2_wl_visuals[s].format_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -111,6 +111,8 @@ drm_create_buffer(struct wl_client *client, struct wl_resource *resource,
|
||||
uint32_t stride, uint32_t format)
|
||||
{
|
||||
switch (format) {
|
||||
case WL_DRM_FORMAT_ABGR2101010:
|
||||
case WL_DRM_FORMAT_XBGR2101010:
|
||||
case WL_DRM_FORMAT_ARGB2101010:
|
||||
case WL_DRM_FORMAT_XRGB2101010:
|
||||
case WL_DRM_FORMAT_ARGB8888:
|
||||
@@ -210,10 +212,31 @@ bind_drm(struct wl_client *client, void *data, uint32_t version, uint32_t id)
|
||||
wl_resource_set_implementation(resource, &drm_interface, data, NULL);
|
||||
|
||||
wl_resource_post_event(resource, WL_DRM_DEVICE, drm->device_name);
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
WL_DRM_FORMAT_ARGB2101010);
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
WL_DRM_FORMAT_XRGB2101010);
|
||||
|
||||
if (drm->callbacks.is_format_supported(drm->user_data,
|
||||
WL_DRM_FORMAT_ARGB2101010)) {
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
WL_DRM_FORMAT_ARGB2101010);
|
||||
}
|
||||
|
||||
if (drm->callbacks.is_format_supported(drm->user_data,
|
||||
WL_DRM_FORMAT_XRGB2101010)) {
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
WL_DRM_FORMAT_XRGB2101010);
|
||||
}
|
||||
|
||||
if (drm->callbacks.is_format_supported(drm->user_data,
|
||||
WL_DRM_FORMAT_ABGR2101010)) {
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
WL_DRM_FORMAT_ABGR2101010);
|
||||
}
|
||||
|
||||
if (drm->callbacks.is_format_supported(drm->user_data,
|
||||
WL_DRM_FORMAT_XBGR2101010)) {
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
WL_DRM_FORMAT_XBGR2101010);
|
||||
}
|
||||
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
WL_DRM_FORMAT_ARGB8888);
|
||||
wl_resource_post_event(resource, WL_DRM_FORMAT,
|
||||
|
@@ -14,6 +14,8 @@ struct wayland_drm_callbacks {
|
||||
struct wl_drm_buffer *buffer);
|
||||
|
||||
void (*release_buffer)(void *user_data, struct wl_drm_buffer *buffer);
|
||||
|
||||
bool (*is_format_supported)(void *user_data, uint32_t format);
|
||||
};
|
||||
|
||||
|
||||
|
@@ -1524,7 +1524,8 @@ tc_buffer_do_flush_region(struct threaded_context *tc,
|
||||
if (ttrans->staging) {
|
||||
struct pipe_box src_box;
|
||||
|
||||
u_box_1d(ttrans->offset + box->x % tc->map_buffer_alignment,
|
||||
u_box_1d(ttrans->offset + ttrans->b.box.x % tc->map_buffer_alignment +
|
||||
(box->x - ttrans->b.box.x),
|
||||
box->width, &src_box);
|
||||
|
||||
/* Copy the staging buffer into the original one. */
|
||||
|
@@ -60,6 +60,8 @@ etna_context_destroy(struct pipe_context *pctx)
|
||||
{
|
||||
struct etna_context *ctx = etna_context(pctx);
|
||||
|
||||
util_copy_framebuffer_state(&ctx->framebuffer_s, NULL);
|
||||
|
||||
if (ctx->primconvert)
|
||||
util_primconvert_destroy(ctx->primconvert);
|
||||
|
||||
@@ -296,10 +298,10 @@ etna_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
|
||||
if (DBG_ENABLED(ETNA_DBG_FLUSH_ALL))
|
||||
pctx->flush(pctx, NULL, 0);
|
||||
|
||||
if (ctx->framebuffer.cbuf)
|
||||
etna_resource(ctx->framebuffer.cbuf->texture)->seqno++;
|
||||
if (ctx->framebuffer.zsbuf)
|
||||
etna_resource(ctx->framebuffer.zsbuf->texture)->seqno++;
|
||||
if (ctx->framebuffer_s.cbufs[0])
|
||||
etna_resource(ctx->framebuffer_s.cbufs[0]->texture)->seqno++;
|
||||
if (ctx->framebuffer_s.zsbuf)
|
||||
etna_resource(ctx->framebuffer_s.zsbuf->texture)->seqno++;
|
||||
if (info->index_size && indexbuf != info->index.resource)
|
||||
pipe_resource_reference(&indexbuf, NULL);
|
||||
}
|
||||
|
@@ -182,7 +182,6 @@ struct compiled_viewport_state {
|
||||
|
||||
/* Compiled pipe_framebuffer_state */
|
||||
struct compiled_framebuffer_state {
|
||||
struct pipe_surface *cbuf, *zsbuf; /* keep reference to surfaces */
|
||||
uint32_t GL_MULTI_SAMPLE_CONFIG;
|
||||
uint32_t PE_COLOR_FORMAT;
|
||||
uint32_t PE_DEPTH_CONFIG;
|
||||
|
@@ -37,6 +37,7 @@
|
||||
#include "etnaviv_surface.h"
|
||||
#include "etnaviv_translate.h"
|
||||
#include "etnaviv_util.h"
|
||||
#include "util/u_framebuffer.h"
|
||||
#include "util/u_helpers.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_math.h"
|
||||
@@ -130,7 +131,6 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
||||
assert(res->layout & ETNA_LAYOUT_BIT_TILE); /* Cannot render to linear surfaces */
|
||||
etna_update_render_resource(pctx, cbuf->base.texture);
|
||||
|
||||
pipe_surface_reference(&cs->cbuf, &cbuf->base);
|
||||
cs->PE_COLOR_FORMAT =
|
||||
VIVS_PE_COLOR_FORMAT_FORMAT(translate_rs_format(cbuf->base.format)) |
|
||||
VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK |
|
||||
@@ -182,7 +182,6 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
||||
|
||||
nr_samples_color = cbuf->base.texture->nr_samples;
|
||||
} else {
|
||||
pipe_surface_reference(&cs->cbuf, NULL);
|
||||
/* Clearing VIVS_PE_COLOR_FORMAT_COMPONENTS__MASK and
|
||||
* VIVS_PE_COLOR_FORMAT_OVERWRITE prevents us from overwriting the
|
||||
* color target */
|
||||
@@ -201,7 +200,6 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
||||
|
||||
etna_update_render_resource(pctx, zsbuf->base.texture);
|
||||
|
||||
pipe_surface_reference(&cs->zsbuf, &zsbuf->base);
|
||||
assert(res->layout &ETNA_LAYOUT_BIT_TILE); /* Cannot render to linear surfaces */
|
||||
|
||||
uint32_t depth_format = translate_depth_format(zsbuf->base.format);
|
||||
@@ -252,7 +250,6 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
||||
|
||||
nr_samples_depth = zsbuf->base.texture->nr_samples;
|
||||
} else {
|
||||
pipe_surface_reference(&cs->zsbuf, NULL);
|
||||
cs->PE_DEPTH_CONFIG = VIVS_PE_DEPTH_CONFIG_DEPTH_MODE_NONE;
|
||||
cs->PE_DEPTH_ADDR.bo = NULL;
|
||||
cs->PE_DEPTH_STRIDE = 0;
|
||||
@@ -325,7 +322,8 @@ etna_set_framebuffer_state(struct pipe_context *pctx,
|
||||
*/
|
||||
cs->PE_LOGIC_OP = VIVS_PE_LOGIC_OP_SINGLE_BUFFER(ctx->specs.single_buffer ? 3 : 0);
|
||||
|
||||
ctx->framebuffer_s = *sv; /* keep copy of original structure */
|
||||
/* keep copy of original structure */
|
||||
util_copy_framebuffer_state(&ctx->framebuffer_s, sv);
|
||||
ctx->dirty |= ETNA_DIRTY_FRAMEBUFFER | ETNA_DIRTY_DERIVE_TS;
|
||||
}
|
||||
|
||||
|
@@ -430,7 +430,7 @@ emit_blit_texture(struct fd_ringbuffer *ring, const struct pipe_blit_info *info)
|
||||
OUT_RING(ring, A6XX_RB_2D_DST_INFO_COLOR_FORMAT(dfmt) |
|
||||
A6XX_RB_2D_DST_INFO_TILE_MODE(dtile) |
|
||||
A6XX_RB_2D_DST_INFO_COLOR_SWAP(dswap));
|
||||
OUT_RELOC(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
|
||||
OUT_RELOCW(ring, dst->bo, doff, 0, 0); /* RB_2D_DST_LO/HI */
|
||||
OUT_RING(ring, A6XX_RB_2D_DST_SIZE_PITCH(dpitch));
|
||||
OUT_RING(ring, 0x00000000);
|
||||
OUT_RING(ring, 0x00000000);
|
||||
|
@@ -839,8 +839,7 @@ fd_resource_create(struct pipe_screen *pscreen,
|
||||
|
||||
rsc->internal_format = format;
|
||||
rsc->cpp = util_format_get_blocksize(format);
|
||||
prsc->nr_samples = MAX2(1, prsc->nr_samples);
|
||||
rsc->cpp *= prsc->nr_samples;
|
||||
rsc->cpp *= fd_resource_nr_samples(prsc);
|
||||
|
||||
assert(rsc->cpp);
|
||||
|
||||
@@ -924,9 +923,9 @@ fd_resource_from_handle(struct pipe_screen *pscreen,
|
||||
if (!rsc->bo)
|
||||
goto fail;
|
||||
|
||||
prsc->nr_samples = MAX2(1, prsc->nr_samples);
|
||||
rsc->internal_format = tmpl->format;
|
||||
rsc->cpp = prsc->nr_samples * util_format_get_blocksize(tmpl->format);
|
||||
rsc->cpp = util_format_get_blocksize(tmpl->format);
|
||||
rsc->cpp *= fd_resource_nr_samples(prsc);
|
||||
slice->pitch = handle->stride / rsc->cpp;
|
||||
slice->offset = handle->offset;
|
||||
slice->size0 = handle->stride * prsc->height0;
|
||||
|
@@ -178,6 +178,15 @@ fd_resource_level_linear(struct pipe_resource *prsc, int level)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* access # of samples, with 0 normalized to 1 (which is what we care about
|
||||
* most of the time)
|
||||
*/
|
||||
static inline unsigned
|
||||
fd_resource_nr_samples(struct pipe_resource *prsc)
|
||||
{
|
||||
return MAX2(1, prsc->nr_samples);
|
||||
}
|
||||
|
||||
void fd_blitter_pipe_begin(struct fd_context *ctx, bool render_cond, bool discard,
|
||||
enum fd_render_stage stage);
|
||||
void fd_blitter_pipe_end(struct fd_context *ctx);
|
||||
|
@@ -31,6 +31,7 @@
|
||||
|
||||
#include "freedreno_texture.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_util.h"
|
||||
|
||||
static void
|
||||
@@ -83,7 +84,7 @@ static void set_sampler_views(struct fd_texture_stateobj *tex,
|
||||
tex->num_textures = util_last_bit(tex->valid_textures);
|
||||
|
||||
for (i = 0; i < tex->num_textures; i++) {
|
||||
uint nr_samples = tex->textures[i]->texture->nr_samples;
|
||||
uint nr_samples = fd_resource_nr_samples(tex->textures[i]->texture);
|
||||
samplers |= (nr_samples >> 1) << (i * 2);
|
||||
}
|
||||
|
||||
|
@@ -1044,7 +1044,7 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
break;
|
||||
}
|
||||
case OP_MUL:
|
||||
if (i->dType == TYPE_F32)
|
||||
if (i->dType == TYPE_F32 && !i->precise)
|
||||
tryCollapseChainedMULs(i, s, imm0);
|
||||
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
|
||||
|
@@ -1279,8 +1279,8 @@ nvc0_screen_create(struct nouveau_device *dev)
|
||||
for (i = 0; i < NVC0_MAX_VIEWPORTS; i++) {
|
||||
BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(i)), 3);
|
||||
PUSH_DATA (push, 1);
|
||||
PUSH_DATA (push, 8192 << 16);
|
||||
PUSH_DATA (push, 8192 << 16);
|
||||
PUSH_DATA (push, 16384 << 16);
|
||||
PUSH_DATA (push, 16384 << 16);
|
||||
}
|
||||
|
||||
#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
|
||||
|
@@ -521,10 +521,13 @@ static void si_buffer_do_flush_region(struct pipe_context *ctx,
|
||||
struct r600_resource *rbuffer = r600_resource(transfer->resource);
|
||||
|
||||
if (stransfer->staging) {
|
||||
unsigned src_offset = stransfer->offset +
|
||||
transfer->box.x % SI_MAP_BUFFER_ALIGNMENT +
|
||||
(box->x - transfer->box.x);
|
||||
|
||||
/* Copy the staging buffer into the original one. */
|
||||
si_copy_buffer((struct si_context*)ctx, transfer->resource,
|
||||
&stransfer->staging->b.b, box->x,
|
||||
stransfer->offset + box->x % SI_MAP_BUFFER_ALIGNMENT,
|
||||
&stransfer->staging->b.b, box->x, src_offset,
|
||||
box->width);
|
||||
}
|
||||
|
||||
|
@@ -348,20 +348,11 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
|
||||
key->u.uses_gs)
|
||||
partial_vs_wave = true;
|
||||
|
||||
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
|
||||
/* Needed for 028B6C_DISTRIBUTION_MODE != 0. (implies >= VI) */
|
||||
if (sscreen->has_distributed_tess) {
|
||||
if (key->u.uses_gs) {
|
||||
if (sscreen->info.chip_class <= VI)
|
||||
if (sscreen->info.chip_class == VI)
|
||||
partial_es_wave = true;
|
||||
|
||||
/* GPU hang workaround. */
|
||||
if (sscreen->info.family == CHIP_TONGA ||
|
||||
sscreen->info.family == CHIP_FIJI ||
|
||||
sscreen->info.family == CHIP_POLARIS10 ||
|
||||
sscreen->info.family == CHIP_POLARIS11 ||
|
||||
sscreen->info.family == CHIP_POLARIS12 ||
|
||||
sscreen->info.family == CHIP_VEGAM)
|
||||
partial_vs_wave = true;
|
||||
} else {
|
||||
partial_vs_wave = true;
|
||||
}
|
||||
@@ -417,6 +408,18 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
|
||||
if (sscreen->info.max_se == 4 && !wd_switch_on_eop)
|
||||
ia_switch_on_eoi = true;
|
||||
|
||||
/* HW engineers suggested that PARTIAL_VS_WAVE_ON should be set
|
||||
* to work around a GS hang.
|
||||
*/
|
||||
if (key->u.uses_gs &&
|
||||
(sscreen->info.family == CHIP_TONGA ||
|
||||
sscreen->info.family == CHIP_FIJI ||
|
||||
sscreen->info.family == CHIP_POLARIS10 ||
|
||||
sscreen->info.family == CHIP_POLARIS11 ||
|
||||
sscreen->info.family == CHIP_POLARIS12 ||
|
||||
sscreen->info.family == CHIP_VEGAM))
|
||||
partial_vs_wave = true;
|
||||
|
||||
/* Required by Hawaii and, for some special cases, by VI. */
|
||||
if (ia_switch_on_eoi &&
|
||||
(sscreen->info.family == CHIP_HAWAII ||
|
||||
|
@@ -1662,7 +1662,7 @@ static inline void si_shader_selector_key(struct pipe_context *ctx,
|
||||
key->part.ps.epilog.alpha_func = si_get_alpha_test_func(sctx);
|
||||
|
||||
/* ps_uses_fbfetch is true only if the color buffer is bound. */
|
||||
if (sctx->ps_uses_fbfetch) {
|
||||
if (sctx->ps_uses_fbfetch && !sctx->blitter->running) {
|
||||
struct pipe_surface *cb0 = sctx->framebuffer.state.cbufs[0];
|
||||
struct pipe_resource *tex = cb0->texture;
|
||||
|
||||
|
@@ -146,6 +146,8 @@ static void si_emit_one_scissor(struct si_context *ctx,
|
||||
S_028254_BR_Y(final.maxy));
|
||||
}
|
||||
|
||||
#define MAX_PA_SU_HARDWARE_SCREEN_OFFSET 8176
|
||||
|
||||
static void si_emit_guardband(struct si_context *ctx)
|
||||
{
|
||||
const struct si_state_rasterizer *rs = ctx->queued.named.rasterizer;
|
||||
@@ -179,13 +181,22 @@ static void si_emit_guardband(struct si_context *ctx)
|
||||
int hw_screen_offset_x = (vp_as_scissor.maxx + vp_as_scissor.minx) / 2;
|
||||
int hw_screen_offset_y = (vp_as_scissor.maxy + vp_as_scissor.miny) / 2;
|
||||
|
||||
const unsigned hw_screen_offset_max = 8176;
|
||||
/* SI-CI need to align the offset to an ubertile consisting of all SEs. */
|
||||
const unsigned hw_screen_offset_alignment =
|
||||
ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
|
||||
|
||||
hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, hw_screen_offset_max);
|
||||
hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, hw_screen_offset_max);
|
||||
/* Indexed by quantization modes */
|
||||
static unsigned max_viewport_size[] = {65535, 16383, 4095};
|
||||
|
||||
/* Ensure that the whole viewport stays representable in
|
||||
* absolute coordinates.
|
||||
* See comment in si_set_viewport_states.
|
||||
*/
|
||||
assert(vp_as_scissor.maxx <= max_viewport_size[vp_as_scissor.quant_mode] &&
|
||||
vp_as_scissor.maxy <= max_viewport_size[vp_as_scissor.quant_mode]);
|
||||
|
||||
hw_screen_offset_x = CLAMP(hw_screen_offset_x, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
|
||||
hw_screen_offset_y = CLAMP(hw_screen_offset_y, 0, MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
|
||||
|
||||
/* Align the screen offset by dropping the low bits. */
|
||||
hw_screen_offset_x &= ~(hw_screen_offset_alignment - 1);
|
||||
@@ -218,7 +229,6 @@ static void si_emit_guardband(struct si_context *ctx)
|
||||
*
|
||||
* The viewport range is [-max_viewport_size/2, max_viewport_size/2].
|
||||
*/
|
||||
static unsigned max_viewport_size[] = {65535, 16383, 4095};
|
||||
assert(vp_as_scissor.quant_mode < ARRAY_SIZE(max_viewport_size));
|
||||
max_range = max_viewport_size[vp_as_scissor.quant_mode] / 2;
|
||||
left = (-max_range - vp.translate[0]) / vp.scale[0];
|
||||
@@ -332,6 +342,22 @@ static void si_set_viewport_states(struct pipe_context *pctx,
|
||||
unsigned h = scissor->maxy - scissor->miny;
|
||||
unsigned max_extent = MAX2(w, h);
|
||||
|
||||
int max_corner = MAX2(scissor->maxx, scissor->maxy);
|
||||
|
||||
unsigned center_x = (scissor->maxx + scissor->minx) / 2;
|
||||
unsigned center_y = (scissor->maxy + scissor->miny) / 2;
|
||||
unsigned max_center = MAX2(center_x, center_y);
|
||||
|
||||
/* PA_SU_HARDWARE_SCREEN_OFFSET can't center viewports whose
|
||||
* center start farther than MAX_PA_SU_HARDWARE_SCREEN_OFFSET.
|
||||
* (for example, a 1x1 viewport in the lower right corner of
|
||||
* 16Kx16K) Such viewports need a greater guardband, so they
|
||||
* have to use a worse quantization mode.
|
||||
*/
|
||||
unsigned distance_off_center =
|
||||
MAX2(0, (int)max_center - MAX_PA_SU_HARDWARE_SCREEN_OFFSET);
|
||||
max_extent += distance_off_center;
|
||||
|
||||
/* Determine the best quantization mode (subpixel precision),
|
||||
* but also leave enough space for the guardband.
|
||||
*
|
||||
@@ -343,7 +369,22 @@ static void si_set_viewport_states(struct pipe_context *pctx,
|
||||
if (ctx->family == CHIP_RAVEN)
|
||||
max_extent = 16384; /* Use QUANT_MODE == 16_8. */
|
||||
|
||||
if (max_extent <= 1024) /* 4K scanline area for guardband */
|
||||
/* Another constraint is that all coordinates in the viewport
|
||||
* are representable in fixed point with respect to the
|
||||
* surface origin.
|
||||
*
|
||||
* It means that PA_SU_HARDWARE_SCREEN_OFFSET can't be given
|
||||
* an offset that would make the upper corner of the viewport
|
||||
* greater than the maximum representable number post
|
||||
* quantization, ie 2^quant_bits.
|
||||
*
|
||||
* This does not matter for 14.10 and 16.8 formats since the
|
||||
* offset is already limited at 8k, but it means we can't use
|
||||
* 12.12 if we are drawing to some pixels outside the lower
|
||||
* 4k x 4k of the render target.
|
||||
*/
|
||||
|
||||
if (max_extent <= 1024 && max_corner < 4096) /* 4K scanline area for guardband */
|
||||
scissor->quant_mode = SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH;
|
||||
else if (max_extent <= 4096) /* 16K scanline area for guardband */
|
||||
scissor->quant_mode = SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH;
|
||||
|
@@ -190,11 +190,7 @@ swr_arch_libs = []
|
||||
swr_arch_defines = []
|
||||
|
||||
swr_avx_args = cpp.first_supported_argument(
|
||||
'-target-cpu=sandybridge', '-mavx', '-march=core-avx', '-tp=sandybridge',
|
||||
prefix : '''
|
||||
#if !defined(__AVX__)
|
||||
# error
|
||||
#endif ''',
|
||||
'-mavx', '-target-cpu=sandybridge', '-march=core-avx', '-tp=sandybridge',
|
||||
)
|
||||
if swr_avx_args == []
|
||||
error('Cannot find AVX support for swr. (these are required for SWR an all architectures.)')
|
||||
@@ -215,18 +211,10 @@ endif
|
||||
|
||||
if with_swr_arches.contains('avx2')
|
||||
swr_avx2_args = cpp.first_supported_argument(
|
||||
'-target-cpu=haswell', '-march=core-avx2', '-tp=haswell',
|
||||
prefix : '''
|
||||
#if !defined(__AVX2__)
|
||||
# error
|
||||
#endif ''',
|
||||
'-march=core-avx2', '-target-cpu=haswell', '-tp=haswell',
|
||||
)
|
||||
if swr_avx2_args == []
|
||||
if cpp.has_argument(['-mavx2', '-mfma', '-mbmi2', '-mf16c'],
|
||||
prefix : '''
|
||||
#if !defined(__AVX2__)
|
||||
# error
|
||||
#endif ''')
|
||||
if cpp.has_argument(['-mavx2', '-mfma', '-mbmi2', '-mf16c'])
|
||||
swr_avx2_args = ['-mavx2', '-mfma', '-mbmi2', '-mf16c']
|
||||
else
|
||||
error('Cannot find AVX2 support for swr.')
|
||||
@@ -248,11 +236,7 @@ endif
|
||||
|
||||
if with_swr_arches.contains('knl')
|
||||
swr_knl_args = cpp.first_supported_argument(
|
||||
'-target-cpu=mic-knl', '-march=knl', '-xMIC-AVX512',
|
||||
prefix : '''
|
||||
#if !defined(__AVX512F__) || !defined(__AVX512ER__)
|
||||
# error
|
||||
#endif ''',
|
||||
'-march=knl', '-target-cpu=mic-knl', '-xMIC-AVX512',
|
||||
)
|
||||
if swr_knl_args == []
|
||||
error('Cannot find KNL support for swr.')
|
||||
@@ -264,7 +248,7 @@ if with_swr_arches.contains('knl')
|
||||
[files_swr_common, files_swr_arch],
|
||||
cpp_args : [
|
||||
swr_cpp_args, swr_knl_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512',
|
||||
'-DKNOB_ARCH_KNIGHTS',
|
||||
'-DSIMD_ARCH_KNIGHTS',
|
||||
],
|
||||
link_args : [ld_args_gc_sections],
|
||||
include_directories : [swr_incs],
|
||||
@@ -276,11 +260,7 @@ endif
|
||||
|
||||
if with_swr_arches.contains('skx')
|
||||
swr_skx_args = cpp.first_supported_argument(
|
||||
'-target-cpu=x86-skylake', '-march=skylake-avx512', '-xCORE-AVX512',
|
||||
prefix : '''
|
||||
#if !defined(__AVX512F__) || !defined(__AVX512BW__)
|
||||
# error
|
||||
#endif ''',
|
||||
'-march=skylake-avx512', '-target-cpu=x86-skylake', '-xCORE-AVX512',
|
||||
)
|
||||
if swr_skx_args == []
|
||||
error('Cannot find SKX support for swr.')
|
||||
|
@@ -50,7 +50,9 @@ swr_fence_cb(uint64_t userData, uint64_t userData2, uint64_t userData3)
|
||||
swr_fence_do_work(fence);
|
||||
|
||||
/* Correct value is in SwrSync data, and not the fence write field. */
|
||||
fence->read = userData2;
|
||||
/* Contexts may not finish in order, but fence value always increases */
|
||||
if (fence->read < userData2)
|
||||
fence->read = userData2;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -669,7 +669,7 @@ v3d_resource_create_with_modifiers(struct pipe_screen *pscreen,
|
||||
rsc->tiled = false;
|
||||
} else {
|
||||
fprintf(stderr, "Unsupported modifier requested\n");
|
||||
return NULL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rsc->internal_format = prsc->format;
|
||||
|
@@ -81,8 +81,10 @@ files_libvc4 = files(
|
||||
'vc4_uniforms.c',
|
||||
)
|
||||
|
||||
vc4_c_args = []
|
||||
|
||||
libvc4_neon = []
|
||||
if with_asm_arch == 'arm'
|
||||
if host_machine.cpu_family() == 'arm'
|
||||
libvc4_neon = static_library(
|
||||
'vc4_neon',
|
||||
'vc4_tiling_lt_neon.c',
|
||||
@@ -91,12 +93,12 @@ if with_asm_arch == 'arm'
|
||||
],
|
||||
c_args : '-mfpu=neon',
|
||||
)
|
||||
vc4_c_args += '-DUSE_ARM_ASM'
|
||||
endif
|
||||
|
||||
simpenrose_c_args = []
|
||||
dep_simpenrose = dependency('simpenrose', required : false)
|
||||
if dep_simpenrose.found()
|
||||
simpenrose_c_args = '-DUSE_VC4_SIMULATOR'
|
||||
vc4_c_args += '-DUSE_VC4_SIMULATOR'
|
||||
endif
|
||||
|
||||
libvc4 = static_library(
|
||||
@@ -107,7 +109,7 @@ libvc4 = static_library(
|
||||
inc_gallium_drivers, inc_drm_uapi,
|
||||
],
|
||||
link_with: libvc4_neon,
|
||||
c_args : [c_vis_args, simpenrose_c_args],
|
||||
c_args : [c_vis_args, vc4_c_args],
|
||||
cpp_args : [cpp_vis_args],
|
||||
dependencies : [dep_simpenrose, dep_libdrm, dep_valgrind, idep_nir_headers],
|
||||
build_by_default : false,
|
||||
|
@@ -132,7 +132,7 @@ vc4_create_batch_query(struct pipe_context *pctx, unsigned num_queries,
|
||||
|
||||
/* We can't mix HW and non-HW queries. */
|
||||
if (nhwqueries && nhwqueries != num_queries)
|
||||
return NULL;
|
||||
goto err_free_query;
|
||||
|
||||
if (!nhwqueries)
|
||||
return (struct pipe_query *)query;
|
||||
|
@@ -73,42 +73,46 @@ vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp)
|
||||
/* Load from the GPU in one shot, no interleave, to
|
||||
* d0-d7.
|
||||
*/
|
||||
"vldm %0, {q0, q1, q2, q3}\n"
|
||||
"vldm %[gpu], {q0, q1, q2, q3}\n"
|
||||
/* Store each 8-byte line to cpu-side destination,
|
||||
* incrementing it by the stride each time.
|
||||
*/
|
||||
"vst1.8 d0, [%1], %2\n"
|
||||
"vst1.8 d1, [%1], %2\n"
|
||||
"vst1.8 d2, [%1], %2\n"
|
||||
"vst1.8 d3, [%1], %2\n"
|
||||
"vst1.8 d4, [%1], %2\n"
|
||||
"vst1.8 d5, [%1], %2\n"
|
||||
"vst1.8 d6, [%1], %2\n"
|
||||
"vst1.8 d7, [%1]\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu_stride)
|
||||
"vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d1, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d3, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d5, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d6, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d7, [%[cpu]]\n"
|
||||
: [cpu] "+r"(cpu)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "q0", "q1", "q2", "q3");
|
||||
} else {
|
||||
assert(gpu_stride == 16);
|
||||
void *cpu2 = cpu + 8;
|
||||
__asm__ volatile (
|
||||
/* Load from the GPU in one shot, no interleave, to
|
||||
* d0-d7.
|
||||
*/
|
||||
"vldm %0, {q0, q1, q2, q3};\n"
|
||||
"vldm %[gpu], {q0, q1, q2, q3};\n"
|
||||
/* Store each 16-byte line in 2 parts to the cpu-side
|
||||
* destination. (vld1 can only store one d-register
|
||||
* at a time).
|
||||
*/
|
||||
"vst1.8 d0, [%1], %3\n"
|
||||
"vst1.8 d1, [%2], %3\n"
|
||||
"vst1.8 d2, [%1], %3\n"
|
||||
"vst1.8 d3, [%2], %3\n"
|
||||
"vst1.8 d4, [%1], %3\n"
|
||||
"vst1.8 d5, [%2], %3\n"
|
||||
"vst1.8 d6, [%1]\n"
|
||||
"vst1.8 d7, [%2]\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu + 8), "r"(cpu_stride)
|
||||
"vst1.8 d0, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d1, [%[cpu2]],%[cpu_stride]\n"
|
||||
"vst1.8 d2, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d3, [%[cpu2]],%[cpu_stride]\n"
|
||||
"vst1.8 d4, [%[cpu]], %[cpu_stride]\n"
|
||||
"vst1.8 d5, [%[cpu2]],%[cpu_stride]\n"
|
||||
"vst1.8 d6, [%[cpu]]\n"
|
||||
"vst1.8 d7, [%[cpu2]]\n"
|
||||
: [cpu] "+r"(cpu),
|
||||
[cpu2] "+r"(cpu2)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "q0", "q1", "q2", "q3");
|
||||
}
|
||||
#elif defined (PIPE_ARCH_AARCH64)
|
||||
@@ -117,42 +121,46 @@ vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp)
|
||||
/* Load from the GPU in one shot, no interleave, to
|
||||
* d0-d7.
|
||||
*/
|
||||
"ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%0]\n"
|
||||
"ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
|
||||
/* Store each 8-byte line to cpu-side destination,
|
||||
* incrementing it by the stride each time.
|
||||
*/
|
||||
"st1 {v0.D}[0], [%1], %2\n"
|
||||
"st1 {v0.D}[1], [%1], %2\n"
|
||||
"st1 {v1.D}[0], [%1], %2\n"
|
||||
"st1 {v1.D}[1], [%1], %2\n"
|
||||
"st1 {v2.D}[0], [%1], %2\n"
|
||||
"st1 {v2.D}[1], [%1], %2\n"
|
||||
"st1 {v3.D}[0], [%1], %2\n"
|
||||
"st1 {v3.D}[1], [%1]\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu_stride)
|
||||
"st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v3.D}[1], [%[cpu]]\n"
|
||||
: [cpu] "+r"(cpu)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "v0", "v1", "v2", "v3");
|
||||
} else {
|
||||
assert(gpu_stride == 16);
|
||||
void *cpu2 = cpu + 8;
|
||||
__asm__ volatile (
|
||||
/* Load from the GPU in one shot, no interleave, to
|
||||
* d0-d7.
|
||||
*/
|
||||
"ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%0]\n"
|
||||
"ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
|
||||
/* Store each 16-byte line in 2 parts to the cpu-side
|
||||
* destination. (vld1 can only store one d-register
|
||||
* at a time).
|
||||
*/
|
||||
"st1 {v0.D}[0], [%1], %3\n"
|
||||
"st1 {v0.D}[1], [%2], %3\n"
|
||||
"st1 {v1.D}[0], [%1], %3\n"
|
||||
"st1 {v1.D}[1], [%2], %3\n"
|
||||
"st1 {v2.D}[0], [%1], %3\n"
|
||||
"st1 {v2.D}[1], [%2], %3\n"
|
||||
"st1 {v3.D}[0], [%1]\n"
|
||||
"st1 {v3.D}[1], [%2]\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu + 8), "r"(cpu_stride)
|
||||
"st1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
|
||||
"st1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
|
||||
"st1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"st1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
|
||||
"st1 {v3.D}[0], [%[cpu]]\n"
|
||||
"st1 {v3.D}[1], [%[cpu2]]\n"
|
||||
: [cpu] "+r"(cpu),
|
||||
[cpu2] "+r"(cpu2)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "v0", "v1", "v2", "v3");
|
||||
}
|
||||
#else
|
||||
@@ -174,40 +182,44 @@ vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
|
||||
/* Load each 8-byte line from cpu-side source,
|
||||
* incrementing it by the stride each time.
|
||||
*/
|
||||
"vld1.8 d0, [%1], %2\n"
|
||||
"vld1.8 d1, [%1], %2\n"
|
||||
"vld1.8 d2, [%1], %2\n"
|
||||
"vld1.8 d3, [%1], %2\n"
|
||||
"vld1.8 d4, [%1], %2\n"
|
||||
"vld1.8 d5, [%1], %2\n"
|
||||
"vld1.8 d6, [%1], %2\n"
|
||||
"vld1.8 d7, [%1]\n"
|
||||
"vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d1, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d3, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d5, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d6, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d7, [%[cpu]]\n"
|
||||
/* Load from the GPU in one shot, no interleave, to
|
||||
* d0-d7.
|
||||
*/
|
||||
"vstm %0, {q0, q1, q2, q3}\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu_stride)
|
||||
"vstm %[gpu], {q0, q1, q2, q3}\n"
|
||||
: [cpu] "+r"(cpu)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "q0", "q1", "q2", "q3");
|
||||
} else {
|
||||
assert(gpu_stride == 16);
|
||||
void *cpu2 = cpu + 8;
|
||||
__asm__ volatile (
|
||||
/* Load each 16-byte line in 2 parts from the cpu-side
|
||||
* destination. (vld1 can only store one d-register
|
||||
* at a time).
|
||||
*/
|
||||
"vld1.8 d0, [%1], %3\n"
|
||||
"vld1.8 d1, [%2], %3\n"
|
||||
"vld1.8 d2, [%1], %3\n"
|
||||
"vld1.8 d3, [%2], %3\n"
|
||||
"vld1.8 d4, [%1], %3\n"
|
||||
"vld1.8 d5, [%2], %3\n"
|
||||
"vld1.8 d6, [%1]\n"
|
||||
"vld1.8 d7, [%2]\n"
|
||||
"vld1.8 d0, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d1, [%[cpu2]],%[cpu_stride]\n"
|
||||
"vld1.8 d2, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d3, [%[cpu2]],%[cpu_stride]\n"
|
||||
"vld1.8 d4, [%[cpu]], %[cpu_stride]\n"
|
||||
"vld1.8 d5, [%[cpu2]],%[cpu_stride]\n"
|
||||
"vld1.8 d6, [%[cpu]]\n"
|
||||
"vld1.8 d7, [%[cpu2]]\n"
|
||||
/* Store to the GPU in one shot, no interleave. */
|
||||
"vstm %0, {q0, q1, q2, q3}\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu + 8), "r"(cpu_stride)
|
||||
"vstm %[gpu], {q0, q1, q2, q3}\n"
|
||||
: [cpu] "+r"(cpu),
|
||||
[cpu2] "+r"(cpu2)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "q0", "q1", "q2", "q3");
|
||||
}
|
||||
#elif defined (PIPE_ARCH_AARCH64)
|
||||
@@ -216,38 +228,42 @@ vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
|
||||
/* Load each 8-byte line from cpu-side source,
|
||||
* incrementing it by the stride each time.
|
||||
*/
|
||||
"ld1 {v0.D}[0], [%1], %2\n"
|
||||
"ld1 {v0.D}[1], [%1], %2\n"
|
||||
"ld1 {v1.D}[0], [%1], %2\n"
|
||||
"ld1 {v1.D}[1], [%1], %2\n"
|
||||
"ld1 {v2.D}[0], [%1], %2\n"
|
||||
"ld1 {v2.D}[1], [%1], %2\n"
|
||||
"ld1 {v3.D}[0], [%1], %2\n"
|
||||
"ld1 {v3.D}[1], [%1]\n"
|
||||
"ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v0.D}[1], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v1.D}[1], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v2.D}[1], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v3.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v3.D}[1], [%[cpu]]\n"
|
||||
/* Store to the GPU in one shot, no interleave. */
|
||||
"st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%0]\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu_stride)
|
||||
"st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
|
||||
: [cpu] "+r"(cpu)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "v0", "v1", "v2", "v3");
|
||||
} else {
|
||||
assert(gpu_stride == 16);
|
||||
void *cpu2 = cpu + 8;
|
||||
__asm__ volatile (
|
||||
/* Load each 16-byte line in 2 parts from the cpu-side
|
||||
* destination. (vld1 can only store one d-register
|
||||
* at a time).
|
||||
*/
|
||||
"ld1 {v0.D}[0], [%1], %3\n"
|
||||
"ld1 {v0.D}[1], [%2], %3\n"
|
||||
"ld1 {v1.D}[0], [%1], %3\n"
|
||||
"ld1 {v1.D}[1], [%2], %3\n"
|
||||
"ld1 {v2.D}[0], [%1], %3\n"
|
||||
"ld1 {v2.D}[1], [%2], %3\n"
|
||||
"ld1 {v3.D}[0], [%1]\n"
|
||||
"ld1 {v3.D}[1], [%2]\n"
|
||||
"ld1 {v0.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v0.D}[1], [%[cpu2]],%[cpu_stride]\n"
|
||||
"ld1 {v1.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v1.D}[1], [%[cpu2]],%[cpu_stride]\n"
|
||||
"ld1 {v2.D}[0], [%[cpu]], %[cpu_stride]\n"
|
||||
"ld1 {v2.D}[1], [%[cpu2]],%[cpu_stride]\n"
|
||||
"ld1 {v3.D}[0], [%[cpu]]\n"
|
||||
"ld1 {v3.D}[1], [%[cpu2]]\n"
|
||||
/* Store to the GPU in one shot, no interleave. */
|
||||
"st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%0]\n"
|
||||
:
|
||||
: "r"(gpu), "r"(cpu), "r"(cpu + 8), "r"(cpu_stride)
|
||||
"st1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%[gpu]]\n"
|
||||
: [cpu] "+r"(cpu),
|
||||
[cpu2] "+r"(cpu2)
|
||||
: [gpu] "r"(gpu),
|
||||
[cpu_stride] "r"(cpu_stride)
|
||||
: "v0", "v1", "v2", "v3");
|
||||
}
|
||||
#else
|
||||
|
@@ -70,7 +70,8 @@ enum pipe_video_profile
|
||||
PIPE_VIDEO_PROFILE_HEVC_MAIN_444,
|
||||
PIPE_VIDEO_PROFILE_JPEG_BASELINE,
|
||||
PIPE_VIDEO_PROFILE_VP9_PROFILE0,
|
||||
PIPE_VIDEO_PROFILE_VP9_PROFILE2
|
||||
PIPE_VIDEO_PROFILE_VP9_PROFILE2,
|
||||
PIPE_VIDEO_PROFILE_MAX
|
||||
};
|
||||
|
||||
/* Video caps, can be different for each codec/profile */
|
||||
|
@@ -20,7 +20,7 @@ struct drisw_loader_funcs
|
||||
void (*put_image2) (struct dri_drawable *dri_drawable,
|
||||
void *data, int x, int y, unsigned width, unsigned height, unsigned stride);
|
||||
void (*put_image_shm) (struct dri_drawable *dri_drawable,
|
||||
int shmid, char *shmaddr, unsigned offset,
|
||||
int shmid, char *shmaddr, unsigned offset, unsigned offset_x,
|
||||
int x, int y, unsigned width, unsigned height, unsigned stride);
|
||||
};
|
||||
|
||||
|
@@ -79,15 +79,21 @@ put_image2(__DRIdrawable *dPriv, void *data, int x, int y,
|
||||
|
||||
static inline void
|
||||
put_image_shm(__DRIdrawable *dPriv, int shmid, char *shmaddr,
|
||||
unsigned offset, int x, int y,
|
||||
unsigned offset, unsigned offset_x, int x, int y,
|
||||
unsigned width, unsigned height, unsigned stride)
|
||||
{
|
||||
__DRIscreen *sPriv = dPriv->driScreenPriv;
|
||||
const __DRIswrastLoaderExtension *loader = sPriv->swrast_loader;
|
||||
|
||||
loader->putImageShm(dPriv, __DRI_SWRAST_IMAGE_OP_SWAP,
|
||||
x, y, width, height, stride,
|
||||
shmid, shmaddr, offset, dPriv->loaderPrivate);
|
||||
/* if we have the newer interface, don't have to add the offset_x here. */
|
||||
if (loader->base.version > 4 && loader->putImageShm2)
|
||||
loader->putImageShm2(dPriv, __DRI_SWRAST_IMAGE_OP_SWAP,
|
||||
x, y, width, height, stride,
|
||||
shmid, shmaddr, offset, dPriv->loaderPrivate);
|
||||
else
|
||||
loader->putImageShm(dPriv, __DRI_SWRAST_IMAGE_OP_SWAP,
|
||||
x, y, width, height, stride,
|
||||
shmid, shmaddr, offset + offset_x, dPriv->loaderPrivate);
|
||||
}
|
||||
|
||||
static inline void
|
||||
@@ -179,12 +185,13 @@ drisw_put_image2(struct dri_drawable *drawable,
|
||||
static inline void
|
||||
drisw_put_image_shm(struct dri_drawable *drawable,
|
||||
int shmid, char *shmaddr, unsigned offset,
|
||||
unsigned offset_x,
|
||||
int x, int y, unsigned width, unsigned height,
|
||||
unsigned stride)
|
||||
{
|
||||
__DRIdrawable *dPriv = drawable->dPriv;
|
||||
|
||||
put_image_shm(dPriv, shmid, shmaddr, offset, x, y, width, height, stride);
|
||||
put_image_shm(dPriv, shmid, shmaddr, offset, offset_x, x, y, width, height, stride);
|
||||
}
|
||||
|
||||
static inline void
|
||||
|
@@ -668,6 +668,19 @@ NineSurface9_CopyMemToDefault( struct NineSurface9 *This,
|
||||
From->data, From->stride,
|
||||
0, /* depth = 1 */
|
||||
&src_box);
|
||||
if (From->texture == D3DRTYPE_TEXTURE) {
|
||||
struct NineTexture9 *tex =
|
||||
NineTexture9(From->base.base.container);
|
||||
/* D3DPOOL_SYSTEMMEM with buffer content passed
|
||||
* from the user: execute the upload right now.
|
||||
* It is possible it is enough to delay upload
|
||||
* until the surface refcount is 0, but the
|
||||
* bind refcount may not be 0, and thus the dtor
|
||||
* is not executed (and doesn't trigger the
|
||||
* pending_uploads_counter check). */
|
||||
if (!tex->managed_buffer)
|
||||
nine_csmt_process(This->base.base.device);
|
||||
}
|
||||
|
||||
if (This->data_conversion)
|
||||
(void) util_format_translate(This->format_conversion,
|
||||
|
@@ -175,7 +175,7 @@ VA_DRIVER_INIT_FUNC(VADriverContextP ctx)
|
||||
ctx->version_minor = 1;
|
||||
*ctx->vtable = vtable;
|
||||
*ctx->vtable_vpp = vtable_vpp;
|
||||
ctx->max_profiles = PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH - PIPE_VIDEO_PROFILE_UNKNOWN;
|
||||
ctx->max_profiles = PIPE_VIDEO_PROFILE_MAX - PIPE_VIDEO_PROFILE_UNKNOWN - 1;
|
||||
ctx->max_entrypoints = 2;
|
||||
ctx->max_attributes = 1;
|
||||
ctx->max_image_formats = VL_VA_MAX_IMAGE_FORMATS;
|
||||
|
@@ -28,6 +28,8 @@
|
||||
#include "vl/vl_vlc.h"
|
||||
#include "va_private.h"
|
||||
|
||||
#define NUM_VP9_REFS 8
|
||||
|
||||
void vlVaHandlePictureParameterBufferVP9(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
|
||||
{
|
||||
VADecPictureParameterBufferVP9 *vp9 = buf->data;
|
||||
@@ -79,8 +81,11 @@ void vlVaHandlePictureParameterBufferVP9(vlVaDriver *drv, vlVaContext *context,
|
||||
|
||||
context->desc.vp9.picture_parameter.bit_depth = vp9->bit_depth;
|
||||
|
||||
for (i = 0 ; i < 8 ; i++)
|
||||
for (i = 0 ; i < NUM_VP9_REFS ; i++)
|
||||
vlVaGetReferenceFrame(drv, vp9->reference_frames[i], &context->desc.vp9.ref[i]);
|
||||
|
||||
if (!context->decoder && !context->templat.max_references)
|
||||
context->templat.max_references = NUM_VP9_REFS;
|
||||
}
|
||||
|
||||
void vlVaHandleSliceParameterBufferVP9(vlVaContext *context, vlVaBuffer *buf)
|
||||
|
@@ -18,13 +18,20 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
VDPAU_MAJOR = 1
|
||||
VDPAU_MINOR = 0
|
||||
|
||||
libvdpau_st = static_library(
|
||||
'vdpau_st',
|
||||
files(
|
||||
'bitmap.c', 'decode.c', 'device.c', 'ftab.c', 'htab.c', 'mixer.c',
|
||||
'output.c', 'preemption.c', 'presentation.c', 'query.c', 'surface.c',
|
||||
),
|
||||
c_args : [c_vis_args, '-DVER_MAJOR=1', '-DVER_MINOR=0'],
|
||||
c_args : [
|
||||
c_vis_args,
|
||||
'-DVER_MAJOR=@0@'.format(VDPAU_MAJOR),
|
||||
'-DVER_MINOR=@0@'.format(VDPAU_MINOR),
|
||||
],
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_util, inc_gallium, inc_gallium_aux,
|
||||
],
|
||||
|
@@ -90,15 +90,15 @@ Status XvMCSetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int
|
||||
if (!attr)
|
||||
return XvMCBadContext;
|
||||
|
||||
if (strcmp(attr, XV_BRIGHTNESS))
|
||||
if (strcmp(attr, XV_BRIGHTNESS) == 0)
|
||||
context_priv->procamp.brightness = value / 1000.0f;
|
||||
else if (strcmp(attr, XV_CONTRAST))
|
||||
else if (strcmp(attr, XV_CONTRAST) == 0)
|
||||
context_priv->procamp.contrast = value / 1000.0f + 1.0f;
|
||||
else if (strcmp(attr, XV_SATURATION))
|
||||
else if (strcmp(attr, XV_SATURATION) == 0)
|
||||
context_priv->procamp.saturation = value / 1000.0f + 1.0f;
|
||||
else if (strcmp(attr, XV_HUE))
|
||||
else if (strcmp(attr, XV_HUE) == 0)
|
||||
context_priv->procamp.hue = value / 1000.0f;
|
||||
else if (strcmp(attr, XV_COLORSPACE))
|
||||
else if (strcmp(attr, XV_COLORSPACE) == 0)
|
||||
context_priv->color_standard = value ?
|
||||
VL_CSC_COLOR_STANDARD_BT_601 :
|
||||
VL_CSC_COLOR_STANDARD_BT_709;
|
||||
@@ -134,15 +134,15 @@ Status XvMCGetAttribute(Display *dpy, XvMCContext *context, Atom attribute, int
|
||||
if (!attr)
|
||||
return XvMCBadContext;
|
||||
|
||||
if (strcmp(attr, XV_BRIGHTNESS))
|
||||
if (strcmp(attr, XV_BRIGHTNESS) == 0)
|
||||
*value = context_priv->procamp.brightness * 1000;
|
||||
else if (strcmp(attr, XV_CONTRAST))
|
||||
else if (strcmp(attr, XV_CONTRAST) == 0)
|
||||
*value = context_priv->procamp.contrast * 1000 - 1000;
|
||||
else if (strcmp(attr, XV_SATURATION))
|
||||
else if (strcmp(attr, XV_SATURATION) == 0)
|
||||
*value = context_priv->procamp.saturation * 1000 + 1000;
|
||||
else if (strcmp(attr, XV_HUE))
|
||||
else if (strcmp(attr, XV_HUE) == 0)
|
||||
*value = context_priv->procamp.hue * 1000;
|
||||
else if (strcmp(attr, XV_COLORSPACE))
|
||||
else if (strcmp(attr, XV_COLORSPACE) == 0)
|
||||
*value = context_priv->color_standard == VL_CSC_COLOR_STANDARD_BT_709;
|
||||
else
|
||||
return BadName;
|
||||
|
@@ -123,11 +123,11 @@ void ParseArgs(int argc, char **argv, struct Config *config)
|
||||
|
||||
while (token && !fail)
|
||||
{
|
||||
if (strcmp(token, "i"))
|
||||
if (strcmp(token, "i") == 0)
|
||||
config->mb_types |= MB_TYPE_I;
|
||||
else if (strcmp(token, "p"))
|
||||
else if (strcmp(token, "p") == 0)
|
||||
config->mb_types |= MB_TYPE_P;
|
||||
else if (strcmp(token, "b"))
|
||||
else if (strcmp(token, "b") == 0)
|
||||
config->mb_types |= MB_TYPE_B;
|
||||
else
|
||||
fail = 1;
|
||||
|
@@ -54,13 +54,14 @@ libvdpau_gallium = shared_library(
|
||||
dep_thread, driver_r300, driver_r600, driver_radeonsi, driver_nouveau,
|
||||
],
|
||||
link_depends : vdpau_link_depends,
|
||||
soversion : '@0@.@1@.0'.format(VDPAU_MAJOR, VDPAU_MINOR),
|
||||
)
|
||||
foreach d : [[with_gallium_r300, 'r300'],
|
||||
[with_gallium_r600, 'r600'],
|
||||
[with_gallium_radeonsi, 'radeonsi'],
|
||||
[with_gallium_nouveau, 'nouveau']]
|
||||
if d[0]
|
||||
vdpau_drivers += 'libvdpau_@0@.so.1.0.0'.format(d[1])
|
||||
vdpau_drivers += 'libvdpau_@0@.so.@1@.@2@.0'.format(d[1], VDPAU_MAJOR, VDPAU_MINOR)
|
||||
endif
|
||||
endforeach
|
||||
|
||||
|
@@ -1217,8 +1217,6 @@ static void amdgpu_add_fence_dependencies_bo_lists(struct amdgpu_cs *acs)
|
||||
{
|
||||
struct amdgpu_cs_context *cs = acs->csc;
|
||||
|
||||
cs->num_fence_dependencies = 0;
|
||||
|
||||
amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_real_buffers, cs->real_buffers);
|
||||
amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_slab_buffers, cs->slab_buffers);
|
||||
amdgpu_add_fence_dependencies_bo_list(acs, cs->fence, cs->num_sparse_buffers, cs->sparse_buffers);
|
||||
|
@@ -244,15 +244,20 @@ dri_sw_displaytarget_display(struct sw_winsys *ws,
|
||||
unsigned width, height, x = 0, y = 0;
|
||||
unsigned blsize = util_format_get_blocksize(dri_sw_dt->format);
|
||||
unsigned offset = 0;
|
||||
unsigned offset_x = 0;
|
||||
char *data = dri_sw_dt->data;
|
||||
|
||||
bool is_shm = dri_sw_dt->shmid != -1;
|
||||
/* Set the width to 'stride / cpp'.
|
||||
*
|
||||
* PutImage correctly clips to the width of the dst drawable.
|
||||
*/
|
||||
if (box) {
|
||||
offset = (dri_sw_dt->stride * box->y) + box->x * blsize;
|
||||
offset = dri_sw_dt->stride * box->y;
|
||||
offset_x = box->x * blsize;
|
||||
data += offset;
|
||||
/* don't add x offset for shm, the put_image_shm will deal with it */
|
||||
if (!is_shm)
|
||||
data += offset_x;
|
||||
x = box->x;
|
||||
y = box->y;
|
||||
width = box->width;
|
||||
@@ -262,8 +267,8 @@ dri_sw_displaytarget_display(struct sw_winsys *ws,
|
||||
height = dri_sw_dt->height;
|
||||
}
|
||||
|
||||
if (dri_sw_dt->shmid != -1) {
|
||||
dri_sw_ws->lf->put_image_shm(dri_drawable, dri_sw_dt->shmid, dri_sw_dt->data, offset,
|
||||
if (is_shm) {
|
||||
dri_sw_ws->lf->put_image_shm(dri_drawable, dri_sw_dt->shmid, dri_sw_dt->data, offset, offset_x,
|
||||
x, y, width, height, dri_sw_dt->stride);
|
||||
return;
|
||||
}
|
||||
|
@@ -396,6 +396,7 @@ xlib_displaytarget_create(struct sw_winsys *winsys,
|
||||
{
|
||||
struct xlib_displaytarget *xlib_dt;
|
||||
unsigned nblocksy, size;
|
||||
int ignore;
|
||||
|
||||
xlib_dt = CALLOC_STRUCT(xlib_displaytarget);
|
||||
if (!xlib_dt)
|
||||
@@ -410,7 +411,8 @@ xlib_displaytarget_create(struct sw_winsys *winsys,
|
||||
xlib_dt->stride = align(util_format_get_stride(format, width), alignment);
|
||||
size = xlib_dt->stride * nblocksy;
|
||||
|
||||
if (!debug_get_option_xlib_no_shm()) {
|
||||
if (!debug_get_option_xlib_no_shm() &&
|
||||
XQueryExtension(xlib_dt->display, "MIT-SHM", &ignore, &ignore, &ignore)) {
|
||||
xlib_dt->data = alloc_shm(xlib_dt, size);
|
||||
if (xlib_dt->data) {
|
||||
xlib_dt->shm = True;
|
||||
|
@@ -37,5 +37,5 @@ vc4_drm_screen_create(int fd)
|
||||
struct pipe_screen *
|
||||
vc4_drm_screen_create_renderonly(struct renderonly *ro)
|
||||
{
|
||||
return vc4_screen_create(fcntl(ro->gpu_fd, F_DUPFD_CLOEXEC, 3), ro);
|
||||
return vc4_screen_create(ro->gpu_fd, ro);
|
||||
}
|
||||
|
@@ -201,7 +201,8 @@ bytes_per_line(unsigned pitch_bits, unsigned mul)
|
||||
|
||||
static void
|
||||
swrastXPutImage(__DRIdrawable * draw, int op,
|
||||
int x, int y, int w, int h, int stride,
|
||||
int srcx, int srcy, int x, int y,
|
||||
int w, int h, int stride,
|
||||
int shmid, char *data, void *loaderPrivate)
|
||||
{
|
||||
struct drisw_drawable *pdp = loaderPrivate;
|
||||
@@ -235,12 +236,12 @@ swrastXPutImage(__DRIdrawable * draw, int op,
|
||||
if (pdp->shminfo.shmid >= 0) {
|
||||
ximage->width = ximage->bytes_per_line / ((ximage->bits_per_pixel + 7)/ 8);
|
||||
ximage->height = h;
|
||||
XShmPutImage(dpy, drawable, gc, ximage, 0, 0, x, y, w, h, False);
|
||||
XShmPutImage(dpy, drawable, gc, ximage, srcx, srcy, x, y, w, h, False);
|
||||
XSync(dpy, False);
|
||||
} else {
|
||||
ximage->width = w;
|
||||
ximage->height = h;
|
||||
XPutImage(dpy, drawable, gc, ximage, 0, 0, x, y, w, h);
|
||||
XPutImage(dpy, drawable, gc, ximage, srcx, srcy, x, y, w, h);
|
||||
}
|
||||
ximage->data = NULL;
|
||||
}
|
||||
@@ -254,7 +255,21 @@ swrastPutImageShm(__DRIdrawable * draw, int op,
|
||||
struct drisw_drawable *pdp = loaderPrivate;
|
||||
|
||||
pdp->shminfo.shmaddr = shmaddr;
|
||||
swrastXPutImage(draw, op, x, y, w, h, stride, shmid,
|
||||
swrastXPutImage(draw, op, 0, 0, x, y, w, h, stride, shmid,
|
||||
shmaddr + offset, loaderPrivate);
|
||||
}
|
||||
|
||||
static void
|
||||
swrastPutImageShm2(__DRIdrawable * draw, int op,
|
||||
int x, int y,
|
||||
int w, int h, int stride,
|
||||
int shmid, char *shmaddr, unsigned offset,
|
||||
void *loaderPrivate)
|
||||
{
|
||||
struct drisw_drawable *pdp = loaderPrivate;
|
||||
|
||||
pdp->shminfo.shmaddr = shmaddr;
|
||||
swrastXPutImage(draw, op, x, 0, x, y, w, h, stride, shmid,
|
||||
shmaddr + offset, loaderPrivate);
|
||||
}
|
||||
|
||||
@@ -263,7 +278,7 @@ swrastPutImage2(__DRIdrawable * draw, int op,
|
||||
int x, int y, int w, int h, int stride,
|
||||
char *data, void *loaderPrivate)
|
||||
{
|
||||
swrastXPutImage(draw, op, x, y, w, h, stride, -1,
|
||||
swrastXPutImage(draw, op, 0, 0, x, y, w, h, stride, -1,
|
||||
data, loaderPrivate);
|
||||
}
|
||||
|
||||
@@ -272,7 +287,7 @@ swrastPutImage(__DRIdrawable * draw, int op,
|
||||
int x, int y, int w, int h,
|
||||
char *data, void *loaderPrivate)
|
||||
{
|
||||
swrastXPutImage(draw, op, x, y, w, h, 0, -1,
|
||||
swrastXPutImage(draw, op, 0, 0, x, y, w, h, 0, -1,
|
||||
data, loaderPrivate);
|
||||
}
|
||||
|
||||
@@ -340,7 +355,7 @@ swrastGetImageShm(__DRIdrawable * read,
|
||||
}
|
||||
|
||||
static const __DRIswrastLoaderExtension swrastLoaderExtension_shm = {
|
||||
.base = {__DRI_SWRAST_LOADER, 4 },
|
||||
.base = {__DRI_SWRAST_LOADER, 5 },
|
||||
|
||||
.getDrawableInfo = swrastGetDrawableInfo,
|
||||
.putImage = swrastPutImage,
|
||||
@@ -349,6 +364,7 @@ static const __DRIswrastLoaderExtension swrastLoaderExtension_shm = {
|
||||
.getImage2 = swrastGetImage2,
|
||||
.putImageShm = swrastPutImageShm,
|
||||
.getImageShm = swrastGetImageShm,
|
||||
.putImageShm2 = swrastPutImageShm2,
|
||||
};
|
||||
|
||||
static const __DRIextension *loader_extensions_shm[] = {
|
||||
|
@@ -251,6 +251,7 @@ fs_inst::is_send_from_grf() const
|
||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
||||
case SHADER_OPCODE_IMAGE_SIZE:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
|
||||
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
|
||||
@@ -892,6 +893,7 @@ fs_inst::size_read(int arg) const
|
||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
||||
case SHADER_OPCODE_IMAGE_SIZE:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
|
||||
|
@@ -371,6 +371,20 @@ can_take_stride(fs_inst *inst, unsigned arg, unsigned stride,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
instruction_requires_packed_data(fs_inst *inst)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case FS_OPCODE_DDX_FINE:
|
||||
case FS_OPCODE_DDX_COARSE:
|
||||
case FS_OPCODE_DDY_FINE:
|
||||
case FS_OPCODE_DDY_COARSE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
{
|
||||
@@ -417,6 +431,13 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
|
||||
return false;
|
||||
|
||||
/* Some instructions implemented in the generator backend, such as
|
||||
* derivatives, assume that their operands are packed so we can't
|
||||
* generally propagate strided regions to them.
|
||||
*/
|
||||
if (instruction_requires_packed_data(inst) && entry->src.stride > 1)
|
||||
return false;
|
||||
|
||||
/* Bail if the result of composing both strides would exceed the
|
||||
* hardware limit.
|
||||
*/
|
||||
|
@@ -667,15 +667,14 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
|
||||
* messages adding a node interference to the grf127_send_hack_node.
|
||||
* This node has a fixed asignment to grf127.
|
||||
*
|
||||
* We don't apply it to SIMD16 because previous code avoids any register
|
||||
* overlap between sources and destination.
|
||||
* We don't apply it to SIMD16 instructions because previous code avoids
|
||||
* any register overlap between sources and destination.
|
||||
*/
|
||||
ra_set_node_reg(g, grf127_send_hack_node, 127);
|
||||
if (dispatch_width == 8) {
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
if (inst->is_send_from_grf() && inst->dst.file == VGRF)
|
||||
ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
|
||||
}
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
if (inst->exec_size < 16 && inst->is_send_from_grf() &&
|
||||
inst->dst.file == VGRF)
|
||||
ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
|
||||
}
|
||||
|
||||
if (spilled_any_registers) {
|
||||
|
@@ -94,7 +94,22 @@ VkResult anv_CreateDescriptorSetLayout(
|
||||
uint32_t immutable_sampler_count = 0;
|
||||
for (uint32_t j = 0; j < pCreateInfo->bindingCount; j++) {
|
||||
max_binding = MAX2(max_binding, pCreateInfo->pBindings[j].binding);
|
||||
if (pCreateInfo->pBindings[j].pImmutableSamplers)
|
||||
|
||||
/* From the Vulkan 1.1.97 spec for VkDescriptorSetLayoutBinding:
|
||||
*
|
||||
* "If descriptorType specifies a VK_DESCRIPTOR_TYPE_SAMPLER or
|
||||
* VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER type descriptor, then
|
||||
* pImmutableSamplers can be used to initialize a set of immutable
|
||||
* samplers. [...] If descriptorType is not one of these descriptor
|
||||
* types, then pImmutableSamplers is ignored.
|
||||
*
|
||||
* We need to be careful here and only parse pImmutableSamplers if we
|
||||
* have one of the right descriptor types.
|
||||
*/
|
||||
VkDescriptorType desc_type = pCreateInfo->pBindings[j].descriptorType;
|
||||
if ((desc_type == VK_DESCRIPTOR_TYPE_SAMPLER ||
|
||||
desc_type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) &&
|
||||
pCreateInfo->pBindings[j].pImmutableSamplers)
|
||||
immutable_sampler_count += pCreateInfo->pBindings[j].descriptorCount;
|
||||
}
|
||||
|
||||
@@ -153,6 +168,12 @@ VkResult anv_CreateDescriptorSetLayout(
|
||||
if (binding == NULL)
|
||||
continue;
|
||||
|
||||
/* We temporarily stashed the pointer to the binding in the
|
||||
* immutable_samplers pointer. Now that we've pulled it back out
|
||||
* again, we reset immutable_samplers to NULL.
|
||||
*/
|
||||
set_layout->binding[b].immutable_samplers = NULL;
|
||||
|
||||
if (binding->descriptorCount == 0)
|
||||
continue;
|
||||
|
||||
@@ -170,6 +191,15 @@ VkResult anv_CreateDescriptorSetLayout(
|
||||
set_layout->binding[b].stage[s].sampler_index = sampler_count[s];
|
||||
sampler_count[s] += binding->descriptorCount;
|
||||
}
|
||||
|
||||
if (binding->pImmutableSamplers) {
|
||||
set_layout->binding[b].immutable_samplers = samplers;
|
||||
samplers += binding->descriptorCount;
|
||||
|
||||
for (uint32_t i = 0; i < binding->descriptorCount; i++)
|
||||
set_layout->binding[b].immutable_samplers[i] =
|
||||
anv_sampler_from_handle(binding->pImmutableSamplers[i]);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -221,17 +251,6 @@ VkResult anv_CreateDescriptorSetLayout(
|
||||
break;
|
||||
}
|
||||
|
||||
if (binding->pImmutableSamplers) {
|
||||
set_layout->binding[b].immutable_samplers = samplers;
|
||||
samplers += binding->descriptorCount;
|
||||
|
||||
for (uint32_t i = 0; i < binding->descriptorCount; i++)
|
||||
set_layout->binding[b].immutable_samplers[i] =
|
||||
anv_sampler_from_handle(binding->pImmutableSamplers[i]);
|
||||
} else {
|
||||
set_layout->binding[b].immutable_samplers = NULL;
|
||||
}
|
||||
|
||||
set_layout->shader_stages |= binding->stageFlags;
|
||||
}
|
||||
|
||||
|
@@ -980,9 +980,12 @@ void anv_GetPhysicalDeviceProperties(
|
||||
const uint32_t max_samplers = (devinfo->gen >= 8 || devinfo->is_haswell) ?
|
||||
128 : 16;
|
||||
|
||||
const uint32_t max_images = devinfo->gen < 9 ? MAX_GEN8_IMAGES : MAX_IMAGES;
|
||||
|
||||
VkSampleCountFlags sample_counts =
|
||||
isl_device_get_sample_counts(&pdevice->isl_dev);
|
||||
|
||||
|
||||
VkPhysicalDeviceLimits limits = {
|
||||
.maxImageDimension1D = (1 << 14),
|
||||
.maxImageDimension2D = (1 << 14),
|
||||
@@ -1002,7 +1005,7 @@ void anv_GetPhysicalDeviceProperties(
|
||||
.maxPerStageDescriptorUniformBuffers = 64,
|
||||
.maxPerStageDescriptorStorageBuffers = 64,
|
||||
.maxPerStageDescriptorSampledImages = max_samplers,
|
||||
.maxPerStageDescriptorStorageImages = 64,
|
||||
.maxPerStageDescriptorStorageImages = max_images,
|
||||
.maxPerStageDescriptorInputAttachments = 64,
|
||||
.maxPerStageResources = 250,
|
||||
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
|
||||
@@ -1011,7 +1014,7 @@ void anv_GetPhysicalDeviceProperties(
|
||||
.maxDescriptorSetStorageBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorStorageBuffers */
|
||||
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
|
||||
.maxDescriptorSetSampledImages = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSampledImages */
|
||||
.maxDescriptorSetStorageImages = 6 * 64, /* number of stages * maxPerStageDescriptorStorageImages */
|
||||
.maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
|
||||
.maxDescriptorSetInputAttachments = 256,
|
||||
.maxVertexInputAttributes = MAX_VBS,
|
||||
.maxVertexInputBindings = MAX_VBS,
|
||||
|
@@ -40,7 +40,8 @@ bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
|
||||
bool anv_nir_lower_ycbcr_textures(nir_shader *shader,
|
||||
struct anv_pipeline_layout *layout);
|
||||
|
||||
void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
void anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
struct anv_pipeline_layout *layout,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
|
@@ -428,7 +428,8 @@ setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n)
|
||||
}
|
||||
|
||||
void
|
||||
anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
|
||||
bool robust_buffer_access,
|
||||
struct anv_pipeline_layout *layout,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
@@ -439,7 +440,7 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
struct apply_pipeline_layout_state state = {
|
||||
.shader = shader,
|
||||
.layout = layout,
|
||||
.add_bounds_checks = pipeline->device->robust_buffer_access,
|
||||
.add_bounds_checks = robust_buffer_access,
|
||||
};
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
@@ -518,8 +519,8 @@ anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
}
|
||||
}
|
||||
|
||||
if (map->image_count > 0) {
|
||||
assert(map->image_count <= MAX_IMAGES);
|
||||
if (map->image_count > 0 && pdevice->compiler->devinfo->gen < 9) {
|
||||
assert(map->image_count <= MAX_GEN8_IMAGES);
|
||||
assert(shader->num_uniforms == prog_data->nr_params * 4);
|
||||
state.first_image_uniform = shader->num_uniforms;
|
||||
uint32_t *param = brw_stage_prog_data_add_params(prog_data,
|
||||
|
@@ -532,7 +532,9 @@ anv_pipeline_lower_nir(struct anv_pipeline *pipeline,
|
||||
|
||||
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
|
||||
if (layout) {
|
||||
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data,
|
||||
anv_nir_apply_pipeline_layout(&pipeline->device->instance->physicalDevice,
|
||||
pipeline->device->robust_buffer_access,
|
||||
layout, nir, prog_data,
|
||||
&stage->bind_map);
|
||||
}
|
||||
|
||||
|
@@ -157,7 +157,8 @@ struct gen_l3_config;
|
||||
#define MAX_SCISSORS 16
|
||||
#define MAX_PUSH_CONSTANTS_SIZE 128
|
||||
#define MAX_DYNAMIC_BUFFERS 16
|
||||
#define MAX_IMAGES 8
|
||||
#define MAX_IMAGES 64
|
||||
#define MAX_GEN8_IMAGES 8
|
||||
#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
|
||||
|
||||
/* The kernel relocation API has a limitation of a 32-bit delta value
|
||||
@@ -1874,7 +1875,7 @@ struct anv_push_constants {
|
||||
uint32_t base_work_group_id[3];
|
||||
|
||||
/* Image data for image_load_store on pre-SKL */
|
||||
struct brw_image_param images[MAX_IMAGES];
|
||||
struct brw_image_param images[MAX_GEN8_IMAGES];
|
||||
};
|
||||
|
||||
struct anv_dynamic_state {
|
||||
|
@@ -70,12 +70,36 @@ gen7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer)
|
||||
};
|
||||
|
||||
const int max = 0xffff;
|
||||
|
||||
uint32_t y_min = s->offset.y;
|
||||
uint32_t x_min = s->offset.x;
|
||||
uint32_t y_max = s->offset.y + s->extent.height - 1;
|
||||
uint32_t x_max = s->offset.x + s->extent.width - 1;
|
||||
|
||||
/* Do this math using int64_t so overflow gets clamped correctly. */
|
||||
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
|
||||
y_min = clamp_int64((uint64_t) y_min,
|
||||
cmd_buffer->state.render_area.offset.y, max);
|
||||
x_min = clamp_int64((uint64_t) x_min,
|
||||
cmd_buffer->state.render_area.offset.x, max);
|
||||
y_max = clamp_int64((uint64_t) y_max, 0,
|
||||
cmd_buffer->state.render_area.offset.y +
|
||||
cmd_buffer->state.render_area.extent.height - 1);
|
||||
x_max = clamp_int64((uint64_t) x_max, 0,
|
||||
cmd_buffer->state.render_area.offset.x +
|
||||
cmd_buffer->state.render_area.extent.width - 1);
|
||||
} else if (fb) {
|
||||
y_min = clamp_int64((uint64_t) y_min, 0, max);
|
||||
x_min = clamp_int64((uint64_t) x_min, 0, max);
|
||||
y_max = clamp_int64((uint64_t) y_max, 0, fb->height - 1);
|
||||
x_max = clamp_int64((uint64_t) x_max, 0, fb->width - 1);
|
||||
}
|
||||
|
||||
struct GEN7_SCISSOR_RECT scissor = {
|
||||
/* Do this math using int64_t so overflow gets clamped correctly. */
|
||||
.ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
|
||||
.ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
|
||||
.ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, fb->height - 1),
|
||||
.ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, fb->width - 1)
|
||||
.ScissorRectangleYMin = y_min,
|
||||
.ScissorRectangleXMin = x_min,
|
||||
.ScissorRectangleYMax = y_max,
|
||||
.ScissorRectangleXMax = x_max
|
||||
};
|
||||
|
||||
if (s->extent.width <= 0 || s->extent.height <= 0) {
|
||||
|
@@ -1998,6 +1998,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
gl_shader_stage stage,
|
||||
struct anv_state *bt_state)
|
||||
{
|
||||
const struct gen_device_info *devinfo = &cmd_buffer->device->info;
|
||||
struct anv_subpass *subpass = cmd_buffer->state.subpass;
|
||||
struct anv_cmd_pipeline_state *pipe_state;
|
||||
struct anv_pipeline *pipeline;
|
||||
@@ -2055,7 +2056,8 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
if (map->surface_count == 0)
|
||||
goto out;
|
||||
|
||||
if (map->image_count > 0) {
|
||||
/* We only use push constant space for images before gen9 */
|
||||
if (map->image_count > 0 && devinfo->gen < 9) {
|
||||
VkResult result =
|
||||
anv_cmd_buffer_ensure_push_constant_field(cmd_buffer, stage, images);
|
||||
if (result != VK_SUCCESS)
|
||||
@@ -2168,11 +2170,15 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
surface_state = sstate.state;
|
||||
assert(surface_state.alloc_size);
|
||||
add_surface_state_relocs(cmd_buffer, sstate);
|
||||
if (devinfo->gen < 9) {
|
||||
assert(image < MAX_GEN8_IMAGES);
|
||||
struct brw_image_param *image_param =
|
||||
&cmd_buffer->state.push_constants[stage]->images[image];
|
||||
|
||||
struct brw_image_param *image_param =
|
||||
&cmd_buffer->state.push_constants[stage]->images[image++];
|
||||
|
||||
*image_param = desc->image_view->planes[binding->plane].storage_image_param;
|
||||
*image_param =
|
||||
desc->image_view->planes[binding->plane].storage_image_param;
|
||||
}
|
||||
image++;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2217,11 +2223,14 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
assert(surface_state.alloc_size);
|
||||
add_surface_reloc(cmd_buffer, surface_state,
|
||||
desc->buffer_view->address);
|
||||
if (devinfo->gen < 9) {
|
||||
assert(image < MAX_GEN8_IMAGES);
|
||||
struct brw_image_param *image_param =
|
||||
&cmd_buffer->state.push_constants[stage]->images[image];
|
||||
|
||||
struct brw_image_param *image_param =
|
||||
&cmd_buffer->state.push_constants[stage]->images[image++];
|
||||
|
||||
*image_param = desc->buffer_view->storage_image_param;
|
||||
*image_param = desc->buffer_view->storage_image_param;
|
||||
}
|
||||
image++;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright © 2017-2018 Intel Corporation
|
||||
# Copyright © 2017-2019 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -176,7 +176,10 @@ endif
|
||||
|
||||
libanv_common = static_library(
|
||||
'anv_common',
|
||||
[libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h],
|
||||
[
|
||||
libanv_files, anv_entrypoints, anv_extensions_c, anv_extensions_h, sha1_h,
|
||||
gen_xml_pack,
|
||||
],
|
||||
include_directories : [
|
||||
inc_common, inc_intel, inc_compiler, inc_drm_uapi, inc_vulkan_util,
|
||||
inc_vulkan_wsi,
|
||||
|
@@ -1273,12 +1273,20 @@ dri3_alloc_render_buffer(struct loader_dri3_drawable *draw, unsigned int format,
|
||||
|
||||
free(mod_reply);
|
||||
|
||||
buffer->image = draw->ext->image->createImageWithModifiers(draw->dri_screen,
|
||||
width, height,
|
||||
format,
|
||||
modifiers,
|
||||
count,
|
||||
buffer);
|
||||
/* don't use createImageWithModifiers() if we have no
|
||||
* modifiers, other things depend on the use flags when
|
||||
* there are no modifiers to know that a buffer can be
|
||||
* shared.
|
||||
*/
|
||||
if (modifiers) {
|
||||
buffer->image = draw->ext->image->createImageWithModifiers(draw->dri_screen,
|
||||
width, height,
|
||||
format,
|
||||
modifiers,
|
||||
count,
|
||||
buffer);
|
||||
}
|
||||
|
||||
free(modifiers);
|
||||
}
|
||||
#endif
|
||||
|
@@ -222,8 +222,13 @@ void st_init_limits(struct pipe_screen *screen,
|
||||
pc->MaxUniformComponents = MIN2(pc->MaxUniformComponents,
|
||||
MAX_UNIFORMS * 4);
|
||||
|
||||
/* For ARB programs, prog_src_register::Index is a signed 13-bit number.
|
||||
* This gives us a limit of 4096 values - but we may need to generate
|
||||
* internal values in addition to what the source program uses. So, we
|
||||
* drop the limit one step lower, to 2048, to be safe.
|
||||
*/
|
||||
pc->MaxParameters =
|
||||
pc->MaxNativeParameters = pc->MaxUniformComponents / 4;
|
||||
pc->MaxNativeParameters = MIN2(pc->MaxUniformComponents / 4, 2048);
|
||||
pc->MaxInputComponents =
|
||||
screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_INPUTS) * 4;
|
||||
pc->MaxOutputComponents =
|
||||
|
@@ -1071,7 +1071,12 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
|
||||
st_framebuffers_purge(st);
|
||||
}
|
||||
else {
|
||||
GET_CURRENT_CONTEXT(ctx);
|
||||
|
||||
ret = _mesa_make_current(NULL, NULL, NULL);
|
||||
|
||||
if (ctx)
|
||||
st_framebuffers_purge(ctx->st);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
Reference in New Issue
Block a user