Compare commits
138 Commits
mesa-13.0.
...
mesa-13.0.
Author | SHA1 | Date | |
---|---|---|---|
|
71f3ff57fa | ||
|
8d622e91d4 | ||
|
1d561d8147 | ||
|
9f66954047 | ||
|
bebf672fc7 | ||
|
1b0715f05f | ||
|
06b96072c7 | ||
|
2248d24509 | ||
|
32cf2344c1 | ||
|
a05089106b | ||
|
dfe79fc3f1 | ||
|
dad81f4cd0 | ||
|
61d7a9dc21 | ||
|
279604b4fa | ||
|
d0e460c7b7 | ||
|
f00bc877a2 | ||
|
c3365b06ac | ||
|
2bdb22fdaa | ||
|
bbe50d9b03 | ||
|
0683feb18c | ||
|
7d172c6c35 | ||
|
f562f57646 | ||
|
f4e2c60858 | ||
|
b18c791a64 | ||
|
010886b120 | ||
|
473319075b | ||
|
0288655ce7 | ||
|
64f24795b9 | ||
|
784b362767 | ||
|
3abc968236 | ||
|
efe15de566 | ||
|
91df6e8aed | ||
|
00d2299007 | ||
|
ed48242e05 | ||
|
01ac2d3c5c | ||
|
1770ba4d8f | ||
|
60662cf26e | ||
|
fada7e5fda | ||
|
dc5ac1404c | ||
|
5c7fcaacd9 | ||
|
61e3b6d309 | ||
|
de2402aafb | ||
|
6086a15f1a | ||
|
70a8715ea7 | ||
|
0d934e4a39 | ||
|
40a92fd518 | ||
|
e1815ff6a2 | ||
|
f332448ef5 | ||
|
a8ba39aba6 | ||
|
f8230e841a | ||
|
c1ad22360d | ||
|
585d8777b0 | ||
|
3479dde5c6 | ||
|
df67d0590a | ||
|
129f6089a0 | ||
|
426b1156c7 | ||
|
92c061289e | ||
|
c50346e58b | ||
|
eb73e3c6d0 | ||
|
f6813d5dda | ||
|
0624d6c2a4 | ||
|
b5bfc9bcc0 | ||
|
01044bf446 | ||
|
cf94d126b6 | ||
|
697cf3c720 | ||
|
68d18cccff | ||
|
fa5d8de838 | ||
|
58c7c9d438 | ||
|
5210d157c4 | ||
|
f69b8510ba | ||
|
94278a48d1 | ||
|
70afb711fa | ||
|
a545a33b1a | ||
|
f084d3c7aa | ||
|
6bfc352f5a | ||
|
3255d10da4 | ||
|
c6c7e98208 | ||
|
9185a3385b | ||
|
b602f4f5bd | ||
|
12ba860584 | ||
|
b9fd9a693b | ||
|
830b1051ab | ||
|
15432c29be | ||
|
c148b51a83 | ||
|
704072afed | ||
|
d88fee9df0 | ||
|
ae28120652 | ||
|
501c380d87 | ||
|
645b47c32b | ||
|
d446f45567 | ||
|
ced6fd3508 | ||
|
5dd74b9c40 | ||
|
ee1118d1df | ||
|
f9cac64aee | ||
|
259e74f682 | ||
|
bf556db1c9 | ||
|
c0709db9cf | ||
|
c865a39636 | ||
|
83a83a89ea | ||
|
eba4c8fea0 | ||
|
8a1113ce3f | ||
|
fdb01cdb92 | ||
|
f33d1100b4 | ||
|
023e380ccc | ||
|
213791e86c | ||
|
0c54aa1568 | ||
|
e2a522b755 | ||
|
d5a941b7d9 | ||
|
8b3beddd03 | ||
|
fb51df52d0 | ||
|
f41850eaad | ||
|
37dcca9a78 | ||
|
17cd8edc37 | ||
|
512728415d | ||
|
5afc33bcb3 | ||
|
32d50d0f75 | ||
|
f7d13af063 | ||
|
071c058d9d | ||
|
b5ad01d579 | ||
|
a2ba1f4393 | ||
|
84317907b1 | ||
|
9c4ebd16e9 | ||
|
0ca96e995e | ||
|
23ecfe8f09 | ||
|
dcf5c05499 | ||
|
ca81a9f9df | ||
|
e8a2e40260 | ||
|
b5c7a1e2d7 | ||
|
94d521cd88 | ||
|
8c8d819065 | ||
|
a83fb211c0 | ||
|
19d2f006da | ||
|
1cc110517b | ||
|
0b63515f22 | ||
|
02c3e9033e | ||
|
1185212d79 | ||
|
a908680f3e | ||
|
c8ece92ded |
@@ -40,7 +40,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
|
||||
--enable-vdpau \
|
||||
--enable-xa \
|
||||
--enable-xvmc \
|
||||
--disable-llvm-shared-libs \
|
||||
--enable-llvm-shared-libs \
|
||||
--with-egl-platforms=x11,wayland,drm,surfaceless \
|
||||
--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
|
||||
--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr \
|
||||
|
@@ -3,3 +3,19 @@
|
||||
|
||||
# Commit was reverted shortly after it landed in master
|
||||
a39ad185932eab4f25a0cb2b112c10d8700ef242 configure.ac: honour LLVM_LIBDIR when linking against LLVM
|
||||
|
||||
# Commit fixes an earlier patch which is quite invasive to be considered for stable.
|
||||
157971e450c34ec430c295ff922c2e597294aba3 i965/blit: Fix the src dimension sanity check in miptree_copy
|
||||
|
||||
# Similar to the above - depends on the series which introduce intel_miptree_copy
|
||||
b18cd8ce2c07c2d1a666fbff1f0d92d17dd5b22c i965/miptree: Use intel_miptree_copy for maps
|
||||
|
||||
# The commit is a backport of an identical anv one. The latter is not in stable
|
||||
# and so does this one since they depend on functionality which is not in stable.
|
||||
65cbb993d33976d9ee24eff01ade8ed9013617ca radv: Call nir_lower_constant_initializers.
|
||||
|
||||
# Commit causes regression on i915, and Nicolai requested that we drop it all together.
|
||||
963311b71fd9900351a4a9dd1cd5f5db391f7e1b mesa/main: fix version/extension checks in _mesa_ClampColor
|
||||
|
||||
# Misnominated (only previous commit was meant to be for stable)
|
||||
36b9976e1f99e8070c67cb8a255793939db77d02 egl/wayland: Avoid race conditions when on non-main thread
|
||||
|
@@ -10,26 +10,28 @@
|
||||
# $ bin/get-extra-pick-list.sh | tee picklist
|
||||
|
||||
# Use the last branchpoint as our limit for the search
|
||||
# XXX: there should be a better way for this
|
||||
latest_branchpoint=`git branch | grep \* | cut -c 3-`-branchpoint
|
||||
latest_branchpoint=`git merge-base origin/master HEAD`
|
||||
|
||||
# Grep for commits with "cherry picked from commit" in the commit message.
|
||||
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
|
||||
grep "cherry picked from commit" |\
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' |\
|
||||
cut -c -8 |\
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||
|
||||
# For each cherry-picked commit...
|
||||
cat already_picked | cut -c -8 |\
|
||||
while read sha
|
||||
do
|
||||
# Check if the original commit is referenced in master
|
||||
# ... check if it's referenced (fixed by another) patch
|
||||
git log -n1 --pretty=oneline --grep=$sha $latest_branchpoint..origin/master |\
|
||||
cut -c -8 |\
|
||||
while read candidate
|
||||
do
|
||||
# Check if the potential fix, hasn't landed in branch yet.
|
||||
found=`git log -n1 --pretty=oneline --reverse --grep=$candidate $latest_branchpoint..HEAD |wc -l`
|
||||
if test $found = 0
|
||||
then
|
||||
echo Commit $candidate might need to be picked, as it references $sha
|
||||
# And flag up if it hasn't landed in branch yet.
|
||||
if grep -q ^$candidate already_picked ; then
|
||||
continue
|
||||
fi
|
||||
echo Commit $candidate references $sha
|
||||
done
|
||||
done
|
||||
|
||||
rm -f already_picked
|
||||
|
61
bin/get-fixes-pick-list.sh
Executable file
61
bin/get-fixes-pick-list.sh
Executable file
@@ -0,0 +1,61 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Script for generating a list of candidates [referenced by a Fixes tag] for
|
||||
# cherry-picking to a stable branch
|
||||
#
|
||||
# Usage examples:
|
||||
#
|
||||
# $ bin/get-fixes-pick-list.sh
|
||||
# $ bin/get-fixes-pick-list.sh > picklist
|
||||
# $ bin/get-fixes-pick-list.sh | tee picklist
|
||||
|
||||
# Use the last branchpoint as our limit for the search
|
||||
latest_branchpoint=`git merge-base origin/master HEAD`
|
||||
|
||||
# List all the commits between day 1 and the branch point...
|
||||
git log --reverse --pretty=%H $latest_branchpoint > already_landed
|
||||
|
||||
# ... and the ones cherry-picked.
|
||||
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
|
||||
grep "cherry picked from commit" |\
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||
|
||||
# Grep for commits with Fixes tag
|
||||
git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
|
||||
while read sha
|
||||
do
|
||||
# For each one try to extract the tag
|
||||
fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
|
||||
if [ "x$fixes_count" != x1 ] ; then
|
||||
echo WARNING: Commit $sha has nore than one Fixes tag
|
||||
fi
|
||||
fixes=`git show $sha | grep -i "fixes:" | head -n 1`
|
||||
# The following sed/cut combination is borrowed from GregKH
|
||||
id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`
|
||||
|
||||
# Bail out if we cannot find suitable id.
|
||||
# Any specific validation the $id is valid and not some junk, is
|
||||
# implied with the follow up code
|
||||
if [ "x$id" = x ] ; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check if the offending commit is in branch.
|
||||
|
||||
# Be that cherry-picked ...
|
||||
# ... or landed before the branchpoint.
|
||||
if grep -q ^$id already_picked ||
|
||||
grep -q ^$id already_landed ; then
|
||||
|
||||
# Finally nominate the fix if it hasn't landed yet.
|
||||
if grep -q ^$sha already_picked ; then
|
||||
continue
|
||||
fi
|
||||
|
||||
echo Commit $sha fixes $id
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
rm -f already_picked
|
||||
rm -f already_landed
|
@@ -8,13 +8,16 @@
|
||||
# $ bin/get-pick-list.sh > picklist
|
||||
# $ bin/get-pick-list.sh | tee picklist
|
||||
|
||||
# Use the last branchpoint as our limit for the search
|
||||
latest_branchpoint=`git merge-base origin/master HEAD`
|
||||
|
||||
# Grep for commits with "cherry picked from commit" in the commit message.
|
||||
git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
|
||||
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
|
||||
grep "cherry picked from commit" |\
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||
|
||||
# Grep for commits that were marked as a candidate for the stable tree.
|
||||
git log --reverse --pretty=%H -i --grep='^\([[:space:]]*NOTE: .*[Cc]andidate\|CC:.*mesa-stable\)' HEAD..origin/master |\
|
||||
git log --reverse --pretty=%H -i --grep='^CC:.*13\.0.*mesa-stable' $latest_branchpoint..origin/master |\
|
||||
while read sha
|
||||
do
|
||||
# Check to see whether the patch is on the ignore list.
|
||||
|
42
bin/get-typod-pick-list.sh
Executable file
42
bin/get-typod-pick-list.sh
Executable file
@@ -0,0 +1,42 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Script for generating a list of candidates which have typos in the nomination line
|
||||
#
|
||||
# Usage examples:
|
||||
#
|
||||
# $ bin/get-typod-pick-list.sh
|
||||
# $ bin/get-typod-pick-list.sh > picklist
|
||||
# $ bin/get-typod-pick-list.sh | tee picklist
|
||||
|
||||
# NB:
|
||||
# This script intentionally _never_ checks for specific version tag
|
||||
# Should we consider folding it with the original get-pick-list.sh
|
||||
|
||||
# Use the last branchpoint as our limit for the search
|
||||
latest_branchpoint=`git merge-base origin/master HEAD`
|
||||
|
||||
# Grep for commits with "cherry picked from commit" in the commit message.
|
||||
git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |\
|
||||
grep "cherry picked from commit" |\
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||
|
||||
# Grep for commits that were marked as a candidate for the stable tree.
|
||||
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' $latest_branchpoint..origin/master |\
|
||||
while read sha
|
||||
do
|
||||
# Check to see whether the patch is on the ignore list.
|
||||
if [ -f bin/.cherry-ignore ] ; then
|
||||
if grep -q ^$sha bin/.cherry-ignore ; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check to see if it has already been picked over.
|
||||
if grep -q ^$sha already_picked ; then
|
||||
continue
|
||||
fi
|
||||
|
||||
git log -n1 --pretty=oneline $sha | cat
|
||||
done
|
||||
|
||||
rm -f already_picked
|
@@ -1377,6 +1377,9 @@ AC_ARG_ENABLE([driglx-direct],
|
||||
dnl
|
||||
dnl libGL configuration per driver
|
||||
dnl
|
||||
if test "x$enable_glx" != xno; then
|
||||
PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED])
|
||||
fi
|
||||
case "x$enable_glx" in
|
||||
xxlib | xgallium-xlib)
|
||||
# Xlib-based GLX
|
||||
@@ -1390,7 +1393,6 @@ xxlib | xgallium-xlib)
|
||||
;;
|
||||
xdri)
|
||||
# DRI-based GLX
|
||||
PKG_CHECK_MODULES([GLPROTO], [glproto >= $GLPROTO_REQUIRED])
|
||||
|
||||
# find the DRI deps for libGL
|
||||
dri_modules="x11 xext xdamage xfixes x11-xcb xcb xcb-glx >= $XCBGLX_REQUIRED"
|
||||
@@ -1648,7 +1650,7 @@ fi
|
||||
AC_ARG_WITH([vulkan-drivers],
|
||||
[AS_HELP_STRING([--with-vulkan-drivers@<:@=DIRS...@:>@],
|
||||
[comma delimited Vulkan drivers list, e.g.
|
||||
"intel"
|
||||
"intel,radeon"
|
||||
@<:@default=no@:>@])],
|
||||
[with_vulkan_drivers="$withval"],
|
||||
[with_vulkan_drivers="no"])
|
||||
|
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
55b07d056f9b855ba9d7c8b2ddc7d3b220a61c6ab1bdc73cbfc2f607721094c2 mesa-13.0.3.tar.gz
|
||||
d9aa8be5c176d00d0cd503cb2f64a5a403ea471ec819c022581414860d7ba40e mesa-13.0.3.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
255
docs/relnotes/13.0.4.html
Normal file
255
docs/relnotes/13.0.4.html
Normal file
@@ -0,0 +1,255 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 13.0.4 Release Notes / February 1, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 13.0.4 is a bug fix release which fixes bugs found since the 13.0.3 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 13.0.4 implements the OpenGL 4.4 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.4. OpenGL
|
||||
4.4 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
a78518030b0b7d77a6c426ac3ff40f4b27fb0e2cdb0dfbe685024a46cae59bad mesa-13.0.4.tar.gz
|
||||
a95d7ce8f7bd5f88585e4be3144a341236d8c0fc91f6feaec59bb8ba3120e726 mesa-13.0.4.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92634">Bug 92634</a> - gallium's vl_mpeg12_decoder does not work with st/va</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94512">Bug 94512</a> - X segfaults with glx-tls enabled in a x32 environment</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=94900">Bug 94900</a> - HD6950 GPU lockup loop with various steam games (octodad[always], saints row 4[always], dead island[always], grid autosport[sometimes])</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with "Fatal error: Cannot set display mode."</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98914">Bug 98914</a> - mesa-vdpau-drivers: breaks vdpau for mpeg2video</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99085">Bug 99085</a> - [EGL] dEQP-EGL.functional.sharing.gles2.multithread intermittent</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99097">Bug 99097</a> - [vulkancts] dEQP-VK.image.store regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99100">Bug 99100</a> - [SKL,BDW,BSW,KBL] dEQP-VK.glsl.return.return_in_dynamic_loop_dynamic_vertex regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99144">Bug 99144</a> - Incorrect rendering using glDrawArraysInstancedBaseInstance and first != 0 on Skylake</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99154">Bug 99154</a> - Link time error when using multiple builtin functions</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99158">Bug 99158</a> - vdpau segfaults and gpu locks with kodi on R9285</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99185">Bug 99185</a> - dEQP-EGL.functional.image.modify.tex_rgb5_a1_tex_subimage_rgba8</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99188">Bug 99188</a> - dEQP-EGL.functional.create_context_ext.robust_gl_30.rgb565_no_depth_no_stencil</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99210">Bug 99210</a> - ES3-CTS.functional.texture.mipmap.cube.generate.rgba5551_*</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] "Assertion `bkref' failed" reproducible with glmark2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Andres Rodriguez (2):</p>
|
||||
<ul>
|
||||
<li>vulkan/wsi: clarify the severity of lack of DRI3 v2</li>
|
||||
<li>radv: fix include order for installed headers v2</li>
|
||||
</ul>
|
||||
|
||||
<p>Arda Coskunses (2):</p>
|
||||
<ul>
|
||||
<li>vulkan/wsi/x11: don't crash on null visual</li>
|
||||
<li>vulkan/wsi/x11: don't crash on null wsi x11 connection</li>
|
||||
</ul>
|
||||
|
||||
<p>Bas Nieuwenhuizen (1):</p>
|
||||
<ul>
|
||||
<li>radv: Support loader interface version 3.</li>
|
||||
</ul>
|
||||
|
||||
<p>Chad Versace (10):</p>
|
||||
<ul>
|
||||
<li>egl: Check config's surface types in eglCreate*Surface()</li>
|
||||
<li>dri: Add __DRI_IMAGE_FORMAT_ARGB1555</li>
|
||||
<li>mesa/texformat: Handle GL_RGBA + GL_UNSIGNED_SHORT_5_5_5_1</li>
|
||||
<li>egl: Emit correct error when robust context creation fails</li>
|
||||
<li>anv: Handle vkGetPhysicalDeviceQueueFamilyProperties with count == 0</li>
|
||||
<li>mesa/shaderobj: Fix races on refcounts</li>
|
||||
<li>meta: Disable dithering during glGenerateMipmap</li>
|
||||
<li>vulkan: Add new cast macros for VkIcd types</li>
|
||||
<li>vulkan: Update vk_icd.h to interface version 3</li>
|
||||
<li>anv: Support loader interface version 3 (patch v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Christian König (1):</p>
|
||||
<ul>
|
||||
<li>vl/zscan: fix "Fix trivial sign compare warnings"</li>
|
||||
</ul>
|
||||
|
||||
<p>Chuck Atkins (1):</p>
|
||||
<ul>
|
||||
<li>glx: Add missing glproto dependency for gallium-xlib glx</li>
|
||||
</ul>
|
||||
|
||||
<p>Damien Grassart (1):</p>
|
||||
<ul>
|
||||
<li>anv: return count of queue families written</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (1):</p>
|
||||
<ul>
|
||||
<li>radv: flush smem for uniform buffer bit.</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (10):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 13.0.3</li>
|
||||
<li>cherry-ignore: add couple of intel_miptree_copy related patches</li>
|
||||
<li>cherry-ignore: add radv: Call nir_lower_constant_initializers."</li>
|
||||
<li>get-typod-pick-list.sh: add new script</li>
|
||||
<li>cherry-ignore: add "_mesa_ClampColor extension/version fix"</li>
|
||||
<li>cherry-ignore: add wayland race condition fix</li>
|
||||
<li>egl/wayland: use the destroy_window_callback for swrast</li>
|
||||
<li>automake: use shared llvm libs for make distcheck</li>
|
||||
<li>get-pick-list.sh: Require explicit "13.0" for nominating stable patches</li>
|
||||
<li>Update version to 13.0.4</li>
|
||||
</ul>
|
||||
|
||||
<p>Francisco Jerez (1):</p>
|
||||
<ul>
|
||||
<li>anv: Fix uniform and storage buffer offset alignment limits.</li>
|
||||
</ul>
|
||||
|
||||
<p>Fredrik Höglund (2):</p>
|
||||
<ul>
|
||||
<li>radv: fix dual source blending</li>
|
||||
<li>dri3: Fix MakeCurrent without a default framebuffer</li>
|
||||
</ul>
|
||||
|
||||
<p>Grazvydas Ignotas (1):</p>
|
||||
<ul>
|
||||
<li>mapi: update the asm code to support x32</li>
|
||||
</ul>
|
||||
|
||||
<p>Heiko Przybyl (1):</p>
|
||||
<ul>
|
||||
<li>r600/sb: Fix loop optimization related hangs on eg</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (1):</p>
|
||||
<ul>
|
||||
<li>nouveau: take extra push space into account for pushbuf_space calls</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (4):</p>
|
||||
<ul>
|
||||
<li>i965/generator/tex: Handle an immediate sampler with an indirect texture</li>
|
||||
<li>anv/formats: Use the real format for B4G4R4A4_UNORM_PACK16 on gen8</li>
|
||||
<li>nir/search: Only allow matching SSA values</li>
|
||||
<li>isl: Mark A4B4G4R4_UNORM as supported on gen8</li>
|
||||
</ul>
|
||||
|
||||
<p>Jonas Ådahl (1):</p>
|
||||
<ul>
|
||||
<li>egl/wayland: Cleanup private display connection when init fails</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (7):</p>
|
||||
<ul>
|
||||
<li>i965: Don't bail on vertex element processing if we need draw params.</li>
|
||||
<li>i965: Fix last slot calculations</li>
|
||||
<li>i965: Fix texturing in the vec4 TCS and GS backends.</li>
|
||||
<li>spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.</li>
|
||||
<li>i965: Make BLORP disable the NP Z PMA stall fix.</li>
|
||||
<li>glsl: Use ir_var_temporary when generating inline functions.</li>
|
||||
<li>i965: Properly flush in hsw_pause_transform_feedback().</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (4):</p>
|
||||
<ul>
|
||||
<li>vdpau: call texture_get_handle while the mutex is being held</li>
|
||||
<li>va: call texture_get_handle while the mutex is being held</li>
|
||||
<li>radeonsi: for the tess barrier, only use emit_waitcnt on SI and LLVM 3.9+</li>
|
||||
<li>radeonsi: don't forget to add HTILE to the buffer list for texturing</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>cso: Don't restore nr_samplers in cso_restore_fragment_samplers</li>
|
||||
</ul>
|
||||
|
||||
<p>Nanley Chery (3):</p>
|
||||
<ul>
|
||||
<li>anv/cmd_buffer: Fix arrayed depth/stencil attachments</li>
|
||||
<li>anv/cmd_buffer: Fix programmed HiZ qpitch</li>
|
||||
<li>anv/image: Disable HiZ for depth buffer arrays</li>
|
||||
</ul>
|
||||
|
||||
<p>Nayan Deshmukh (1):</p>
|
||||
<ul>
|
||||
<li>st/va: delay calling begin_frame until we have all parameters</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (1):</p>
|
||||
<ul>
|
||||
<li>freedreno: some fence cleanup</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (1):</p>
|
||||
<ul>
|
||||
<li>gallium/hud: add missing break in hud_cpufreq_graph_install()</li>
|
||||
</ul>
|
||||
|
||||
<p>Timothy Arceri (3):</p>
|
||||
<ul>
|
||||
<li>nir: Turn imov/fmov of undef into undef</li>
|
||||
<li>glsl: fix opt_minmax redundancy checks against baserange</li>
|
||||
<li>util: fix list_is_singular()</li>
|
||||
</ul>
|
||||
|
||||
<p>Zachary Michaels (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: Always leave poly_offset in a valid state</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
209
docs/relnotes/13.0.5.html
Normal file
209
docs/relnotes/13.0.5.html
Normal file
@@ -0,0 +1,209 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 13.0.5 Release Notes / February 20, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 13.0.5 is a bug fix release which fixes bugs found since the 13.0.4 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 13.0.5 implements the OpenGL 4.4 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.4. OpenGL
|
||||
4.4 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98421">Bug 98421</a> - src/loader/loader.c:111:40: error: unknown type name ‘drmDevicePtr’</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98526">Bug 98526</a> - glsl/tests/general-ir-test regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99532">Bug 99532</a> - Compute shader doesn't give right result under some circumstances</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99631">Bug 99631</a> - segfault with OSVRTrackerView and openscenegraph git master</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99633">Bug 99633</a> - rasterizer/core/clip.h:279:49: error: ‘const struct API_STATE’ has no member named ‘linkageCount’</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99692">Bug 99692</a> - [radv] Mostly broken on Hawaii PRO/CIK ASICs</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Bartosz Tomczyk (2):</p>
|
||||
<ul>
|
||||
<li>r600: Fix stack overflow</li>
|
||||
<li>r600/sb: Fix memory leak</li>
|
||||
</ul>
|
||||
|
||||
<p>Bruce Cherniak (1):</p>
|
||||
<ul>
|
||||
<li>swr: [rasterizer core] Remove dead code Clipper::ClipScalar()</li>
|
||||
</ul>
|
||||
|
||||
<p>Chad Versace (1):</p>
|
||||
<ul>
|
||||
<li>i965/mt: Disable HiZ when sharing depth buffer externally (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (3):</p>
|
||||
<ul>
|
||||
<li>radv: change base aligmment for allocated memory.</li>
|
||||
<li>radv: fix cik macroModeIndex.</li>
|
||||
<li>radv: adopt some init config workarounds from radeonsi.</li>
|
||||
</ul>
|
||||
|
||||
<p>Derek Foreman (1):</p>
|
||||
<ul>
|
||||
<li>egl/dri2: add image_loader_extension back into loader extensions for wayland</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (26):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 13.0.4</li>
|
||||
<li>configure.ac: list radeon in --with-vulkan-drivers help string</li>
|
||||
<li>i965: automake: correctly set MKDIR_GEN</li>
|
||||
<li>freedreno: automake: correctly set MKDIR_GEN</li>
|
||||
<li>i965: automake: include builddir prior to srcdir</li>
|
||||
<li>i915: automake: include builddir prior to srcdir</li>
|
||||
<li>egl: automake: include builddir prior to srcdir</li>
|
||||
<li>clover: automake: include builddir prior to srcdir</li>
|
||||
<li>st/dri: automake: include builddir prior to srcdir</li>
|
||||
<li>d3dadapter9: automake: include builddir prior to srcdir</li>
|
||||
<li>glx: automake: include builddir prior to srcdir</li>
|
||||
<li>glx/apple: automake: include builddir prior to srcdir</li>
|
||||
<li>glx/windows: automake: include builddir prior to srcdir</li>
|
||||
<li>loader: automake: include builddir prior to srcdir</li>
|
||||
<li>mapi: automake: include builddir prior to srcdir</li>
|
||||
<li>radeon, r200: automake: include builddir prior to srcdir</li>
|
||||
<li>dri/swrast: automake: include builddir prior to srcdir</li>
|
||||
<li>dri/osmesa: automake: include builddir prior to srcdir</li>
|
||||
<li>mesa/tests: automake: include builddir prior to srcdir</li>
|
||||
<li>bin/get-extra-pick-list: use git merge-base to get the branchpoint</li>
|
||||
<li>bin/get-extra-pick-list: rework to use already_picked list</li>
|
||||
<li>bin/get-typod-pick-list.sh: limit `git grep ...' to only as needed</li>
|
||||
<li>bin/get-pick-list.sh: limit `git grep ...' only as needed</li>
|
||||
<li>bin/get-pick-list.sh: remove ancient way of nominating patches</li>
|
||||
<li>bin/get-fixes-pick-list.sh: add new script</li>
|
||||
<li>Update version to 13.0.5</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (1):</p>
|
||||
<ul>
|
||||
<li>vc4: Avoid emitting small immediates for UBO indirect load address guards.</li>
|
||||
</ul>
|
||||
|
||||
<p>Hans de Goede (1):</p>
|
||||
<ul>
|
||||
<li>glx/glvnd: Fix GLXdispatchIndex sorting</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (11):</p>
|
||||
<ul>
|
||||
<li>linker: Slight code rearrange to prevent duplication in the next commit</li>
|
||||
<li>linker: Accurately track gl_uniform_block::stageref</li>
|
||||
<li>glsl: Split process_block_array into two functions</li>
|
||||
<li>glsl: Fix wonkey indentation left from previous commit</li>
|
||||
<li>glsl: Track the linearized array index for each UBO instance array element</li>
|
||||
<li>glsl: Use simpler visitor to determine which UBO and SSBO blocks are used</li>
|
||||
<li>glsl: Add tracking for elements of an array-of-arrays that have been accessed</li>
|
||||
<li>glsl: Add structures to track accessed elements of a single array</li>
|
||||
<li>glsl: Mark a set of array elements as accessed using a list of array_deref_range</li>
|
||||
<li>glsl: Walk a list of ir_dereference_array to mark array elements as accessed</li>
|
||||
<li>linker: Accurately mark a uniform block instance array element as used in a stage</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (3):</p>
|
||||
<ul>
|
||||
<li>vbo: process buffer binding state changes on draw when recording</li>
|
||||
<li>st/mesa: MAX_VARYING is the max supported number of patch varyings, not min</li>
|
||||
<li>nvc0: disable linked tsc mode in compute launch descriptor</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (11):</p>
|
||||
<ul>
|
||||
<li>nir/search: Use the correct bit size for integer comparisons</li>
|
||||
<li>i965/blorp: Use the correct ISL format for combined depth/stencil</li>
|
||||
<li>intel/blorp: Handle clearing of A4B4G4R4 on all platforms</li>
|
||||
<li>isl/formats: Only advertise sampling for A4B4G4R4 on Broadwell</li>
|
||||
<li>anv: Flush render cache before STATE_BASE_ADDRESS on gen7</li>
|
||||
<li>anv: Improve flushing around STATE_BASE_ADDRESS</li>
|
||||
<li>vulkan/wsi/wayland: Handle VK_INCOMPLETE for GetFormats</li>
|
||||
<li>vulkan/wsi/wayland: Handle VK_INCOMPLETE for GetPresentModes</li>
|
||||
<li>vulkan/wsi: Lower the maximum image sizes</li>
|
||||
<li>i965/sampler_state: Pass texObj into update_sampler_state</li>
|
||||
<li>i965/sampler_state: Set the "Base Mip Level" field on Sandy Bridge</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (1):</p>
|
||||
<ul>
|
||||
<li>i965: Unbind deleted shaders from brw_context, fixing malloc heisenbug.</li>
|
||||
</ul>
|
||||
|
||||
<p>Lionel Landwerlin (5):</p>
|
||||
<ul>
|
||||
<li>anv: don't require render target isl bit for depth/stencil surfaces</li>
|
||||
<li>anv: set command buffer to NULL when allocations fail</li>
|
||||
<li>anv: fix descriptor pool internal size allocation</li>
|
||||
<li>spirv: handle OpUndef as part of the variable parsing pass</li>
|
||||
<li>spirv: handle undefined components for OpVectorShuffle</li>
|
||||
</ul>
|
||||
|
||||
<p>Marc-André Lureau (1):</p>
|
||||
<ul>
|
||||
<li>tgsi-dump: dump label if instruction has one</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (2):</p>
|
||||
<ul>
|
||||
<li>radeonsi: always set the TCL1_ACTION_ENA when invalidating L2</li>
|
||||
<li>gallium/radeon: fix performance of buffer readbacks</li>
|
||||
</ul>
|
||||
|
||||
<p>Topi Pohjolainen (2):</p>
|
||||
<ul>
|
||||
<li>i965: Make depth clear flushing more explicit</li>
|
||||
<li>i965/gen6: Issue direct depth stall and flush after depth clear</li>
|
||||
</ul>
|
||||
|
||||
<p>Vinson Lee (2):</p>
|
||||
<ul>
|
||||
<li>scons: Require libdrm >= 2.4.66 for DRM.</li>
|
||||
<li>util: Fix Clang trivial destructor check.</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -1121,6 +1121,7 @@ struct __DRIdri2ExtensionRec {
|
||||
#define __DRI_IMAGE_FORMAT_XRGB2101010 0x1009
|
||||
#define __DRI_IMAGE_FORMAT_ARGB2101010 0x100a
|
||||
#define __DRI_IMAGE_FORMAT_SARGB8 0x100b
|
||||
#define __DRI_IMAGE_FORMAT_ARGB1555 0x100c
|
||||
|
||||
#define __DRI_IMAGE_USE_SHARE 0x0001
|
||||
#define __DRI_IMAGE_USE_SCANOUT 0x0002
|
||||
@@ -1148,6 +1149,7 @@ struct __DRIdri2ExtensionRec {
|
||||
|
||||
#define __DRI_IMAGE_FOURCC_R8 0x20203852
|
||||
#define __DRI_IMAGE_FOURCC_GR88 0x38385247
|
||||
#define __DRI_IMAGE_FOURCC_ARGB1555 0x35315241
|
||||
#define __DRI_IMAGE_FOURCC_RGB565 0x36314752
|
||||
#define __DRI_IMAGE_FOURCC_ARGB8888 0x34325241
|
||||
#define __DRI_IMAGE_FOURCC_XRGB8888 0x34325258
|
||||
|
@@ -1,28 +1,56 @@
|
||||
//
|
||||
// File: vk_icd.h
|
||||
//
|
||||
/*
|
||||
* Copyright (c) 2015-2016 The Khronos Group Inc.
|
||||
* Copyright (c) 2015-2016 Valve Corporation
|
||||
* Copyright (c) 2015-2016 LunarG, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef VKICD_H
|
||||
#define VKICD_H
|
||||
|
||||
#include "vk_platform.h"
|
||||
#include "vulkan.h"
|
||||
|
||||
/*
|
||||
* Loader-ICD version negotiation API
|
||||
*/
|
||||
#define CURRENT_LOADER_ICD_INTERFACE_VERSION 3
|
||||
#define MIN_SUPPORTED_LOADER_ICD_INTERFACE_VERSION 0
|
||||
typedef VkResult (VKAPI_PTR *PFN_vkNegotiateLoaderICDInterfaceVersion)(uint32_t *pVersion);
|
||||
/*
|
||||
* The ICD must reserve space for a pointer for the loader's dispatch
|
||||
* table, at the start of <each object>.
|
||||
* The ICD must initialize this variable using the SET_LOADER_MAGIC_VALUE macro.
|
||||
*/
|
||||
|
||||
#define ICD_LOADER_MAGIC 0x01CDC0DE
|
||||
#define ICD_LOADER_MAGIC 0x01CDC0DE
|
||||
|
||||
typedef union _VK_LOADER_DATA {
|
||||
uintptr_t loaderMagic;
|
||||
void *loaderData;
|
||||
typedef union {
|
||||
uintptr_t loaderMagic;
|
||||
void *loaderData;
|
||||
} VK_LOADER_DATA;
|
||||
|
||||
static inline void set_loader_magic_value(void* pNewObject) {
|
||||
VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
|
||||
static inline void set_loader_magic_value(void *pNewObject) {
|
||||
VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
|
||||
loader_info->loaderMagic = ICD_LOADER_MAGIC;
|
||||
}
|
||||
|
||||
static inline bool valid_loader_magic_value(void* pNewObject) {
|
||||
const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *) pNewObject;
|
||||
static inline bool valid_loader_magic_value(void *pNewObject) {
|
||||
const VK_LOADER_DATA *loader_info = (VK_LOADER_DATA *)pNewObject;
|
||||
return (loader_info->loaderMagic & 0xffffffff) == ICD_LOADER_MAGIC;
|
||||
}
|
||||
|
||||
@@ -30,56 +58,74 @@ static inline bool valid_loader_magic_value(void* pNewObject) {
|
||||
* Windows and Linux ICDs will treat VkSurfaceKHR as a pointer to a struct that
|
||||
* contains the platform-specific connection and surface information.
|
||||
*/
|
||||
typedef enum _VkIcdWsiPlatform {
|
||||
typedef enum {
|
||||
VK_ICD_WSI_PLATFORM_MIR,
|
||||
VK_ICD_WSI_PLATFORM_WAYLAND,
|
||||
VK_ICD_WSI_PLATFORM_WIN32,
|
||||
VK_ICD_WSI_PLATFORM_XCB,
|
||||
VK_ICD_WSI_PLATFORM_XLIB,
|
||||
VK_ICD_WSI_PLATFORM_DISPLAY
|
||||
} VkIcdWsiPlatform;
|
||||
|
||||
typedef struct _VkIcdSurfaceBase {
|
||||
VkIcdWsiPlatform platform;
|
||||
typedef struct {
|
||||
VkIcdWsiPlatform platform;
|
||||
} VkIcdSurfaceBase;
|
||||
|
||||
#ifdef VK_USE_PLATFORM_MIR_KHR
|
||||
typedef struct _VkIcdSurfaceMir {
|
||||
VkIcdSurfaceBase base;
|
||||
MirConnection* connection;
|
||||
MirSurface* mirSurface;
|
||||
typedef struct {
|
||||
VkIcdSurfaceBase base;
|
||||
MirConnection *connection;
|
||||
MirSurface *mirSurface;
|
||||
} VkIcdSurfaceMir;
|
||||
#endif // VK_USE_PLATFORM_MIR_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
|
||||
typedef struct _VkIcdSurfaceWayland {
|
||||
VkIcdSurfaceBase base;
|
||||
struct wl_display* display;
|
||||
struct wl_surface* surface;
|
||||
typedef struct {
|
||||
VkIcdSurfaceBase base;
|
||||
struct wl_display *display;
|
||||
struct wl_surface *surface;
|
||||
} VkIcdSurfaceWayland;
|
||||
#endif // VK_USE_PLATFORM_WAYLAND_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_WIN32_KHR
|
||||
typedef struct _VkIcdSurfaceWin32 {
|
||||
VkIcdSurfaceBase base;
|
||||
HINSTANCE hinstance;
|
||||
HWND hwnd;
|
||||
typedef struct {
|
||||
VkIcdSurfaceBase base;
|
||||
HINSTANCE hinstance;
|
||||
HWND hwnd;
|
||||
} VkIcdSurfaceWin32;
|
||||
#endif // VK_USE_PLATFORM_WIN32_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
typedef struct _VkIcdSurfaceXcb {
|
||||
VkIcdSurfaceBase base;
|
||||
xcb_connection_t* connection;
|
||||
xcb_window_t window;
|
||||
typedef struct {
|
||||
VkIcdSurfaceBase base;
|
||||
xcb_connection_t *connection;
|
||||
xcb_window_t window;
|
||||
} VkIcdSurfaceXcb;
|
||||
#endif // VK_USE_PLATFORM_XCB_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_XLIB_KHR
|
||||
typedef struct _VkIcdSurfaceXlib {
|
||||
VkIcdSurfaceBase base;
|
||||
Display* dpy;
|
||||
Window window;
|
||||
typedef struct {
|
||||
VkIcdSurfaceBase base;
|
||||
Display *dpy;
|
||||
Window window;
|
||||
} VkIcdSurfaceXlib;
|
||||
#endif // VK_USE_PLATFORM_XLIB_KHR
|
||||
|
||||
#ifdef VK_USE_PLATFORM_ANDROID_KHR
|
||||
typedef struct {
|
||||
ANativeWindow* window;
|
||||
} VkIcdSurfaceAndroid;
|
||||
#endif //VK_USE_PLATFORM_ANDROID_KHR
|
||||
|
||||
typedef struct {
|
||||
VkIcdSurfaceBase base;
|
||||
VkDisplayModeKHR displayMode;
|
||||
uint32_t planeIndex;
|
||||
uint32_t planeStackIndex;
|
||||
VkSurfaceTransformFlagBitsKHR transform;
|
||||
float globalAlpha;
|
||||
VkDisplayPlaneAlphaFlagBitsKHR alphaMode;
|
||||
VkExtent2D imageExtent;
|
||||
} VkIcdSurfaceDisplay;
|
||||
|
||||
#endif // VKICD_H
|
||||
|
@@ -651,7 +651,7 @@ def generate(env):
|
||||
env.PkgCheckModules('X11', ['x11', 'xext', 'xdamage', 'xfixes', 'glproto >= 1.4.13'])
|
||||
env.PkgCheckModules('XCB', ['x11-xcb', 'xcb-glx >= 1.8.1', 'xcb-dri2 >= 1.8'])
|
||||
env.PkgCheckModules('XF86VIDMODE', ['xxf86vm'])
|
||||
env.PkgCheckModules('DRM', ['libdrm >= 2.4.38'])
|
||||
env.PkgCheckModules('DRM', ['libdrm >= 2.4.66'])
|
||||
|
||||
if env['x11']:
|
||||
env.Append(CPPPATH = env['X11_CPPPATH'])
|
||||
|
@@ -3946,7 +3946,7 @@ static void
|
||||
handle_shader_output_decl(struct nir_to_llvm_context *ctx,
|
||||
struct nir_variable *variable)
|
||||
{
|
||||
int idx = variable->data.location;
|
||||
int idx = variable->data.location + variable->data.index;
|
||||
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
|
||||
|
||||
variable->data.driver_location = idx * 4;
|
||||
@@ -3976,7 +3976,7 @@ handle_shader_output_decl(struct nir_to_llvm_context *ctx,
|
||||
si_build_alloca_undef(ctx, ctx->f32, "");
|
||||
}
|
||||
}
|
||||
ctx->output_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
|
||||
ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -32,9 +32,6 @@ lib_LTLIBRARIES = libvulkan_radeon.la
|
||||
# The gallium includes are for the util/u_math.h include from main/macros.h
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
$(AMDGPU_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(top_srcdir)/src \
|
||||
@@ -48,7 +45,10 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/include
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
$(AMDGPU_CFLAGS) \
|
||||
$(VALGRIND_CFLAGS) \
|
||||
$(DEFINES)
|
||||
|
||||
AM_CFLAGS = \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
|
@@ -2283,9 +2283,11 @@ void radv_CmdPipelineBarrier(
|
||||
case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:
|
||||
case VK_ACCESS_INDEX_READ_BIT:
|
||||
case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:
|
||||
case VK_ACCESS_UNIFORM_READ_BIT:
|
||||
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1;
|
||||
break;
|
||||
case VK_ACCESS_UNIFORM_READ_BIT:
|
||||
flush_bits |= RADV_CMD_FLAG_INV_VMEM_L1 | RADV_CMD_FLAG_INV_SMEM_L1;
|
||||
break;
|
||||
case VK_ACCESS_SHADER_READ_BIT:
|
||||
flush_bits |= RADV_CMD_FLAG_INV_GLOBAL_L2;
|
||||
break;
|
||||
|
@@ -866,7 +866,7 @@ VkResult radv_AllocateMemory(
|
||||
flags |= RADEON_FLAG_NO_CPU_ACCESS;
|
||||
else
|
||||
flags |= RADEON_FLAG_CPU_ACCESS;
|
||||
mem->bo = device->ws->buffer_create(device->ws, alloc_size, 32768,
|
||||
mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536,
|
||||
domain, flags);
|
||||
|
||||
if (!mem->bo) {
|
||||
@@ -1823,3 +1823,48 @@ void radv_DestroySampler(
|
||||
return;
|
||||
vk_free2(&device->alloc, pAllocator, sampler);
|
||||
}
|
||||
|
||||
|
||||
/* vk_icd.h does not declare this function, so we declare it here to
|
||||
* suppress Wmissing-prototypes.
|
||||
*/
|
||||
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
|
||||
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
|
||||
|
||||
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
|
||||
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
|
||||
{
|
||||
/* For the full details on loader interface versioning, see
|
||||
* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
|
||||
* What follows is a condensed summary, to help you navigate the large and
|
||||
* confusing official doc.
|
||||
*
|
||||
* - Loader interface v0 is incompatible with later versions. We don't
|
||||
* support it.
|
||||
*
|
||||
* - In loader interface v1:
|
||||
* - The first ICD entrypoint called by the loader is
|
||||
* vk_icdGetInstanceProcAddr(). The ICD must statically expose this
|
||||
* entrypoint.
|
||||
* - The ICD must statically expose no other Vulkan symbol unless it is
|
||||
* linked with -Bsymbolic.
|
||||
* - Each dispatchable Vulkan handle created by the ICD must be
|
||||
* a pointer to a struct whose first member is VK_LOADER_DATA. The
|
||||
* ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
|
||||
* - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
|
||||
* vkDestroySurfaceKHR(). The ICD must be capable of working with
|
||||
* such loader-managed surfaces.
|
||||
*
|
||||
* - Loader interface v2 differs from v1 in:
|
||||
* - The first ICD entrypoint called by the loader is
|
||||
* vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
|
||||
* statically expose this entrypoint.
|
||||
*
|
||||
* - Loader interface v3 differs from v2 in:
|
||||
* - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
|
||||
* vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
|
||||
* because the loader no longer does so.
|
||||
*/
|
||||
*pSupportedVersion = MIN2(*pSupportedVersion, 3u);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
@@ -75,7 +75,7 @@ void radv_DestroySurfaceKHR(
|
||||
const VkAllocationCallbacks* pAllocator)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_instance, instance, _instance);
|
||||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
|
||||
|
||||
vk_free2(&instance->alloc, pAllocator, surface);
|
||||
}
|
||||
@@ -87,7 +87,7 @@ VkResult radv_GetPhysicalDeviceSurfaceSupportKHR(
|
||||
VkBool32* pSupported)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
|
||||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
|
||||
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
|
||||
|
||||
return iface->get_support(surface, &device->wsi_device,
|
||||
@@ -101,7 +101,7 @@ VkResult radv_GetPhysicalDeviceSurfaceCapabilitiesKHR(
|
||||
VkSurfaceCapabilitiesKHR* pSurfaceCapabilities)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
|
||||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
|
||||
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
|
||||
|
||||
return iface->get_capabilities(surface, pSurfaceCapabilities);
|
||||
@@ -114,7 +114,7 @@ VkResult radv_GetPhysicalDeviceSurfaceFormatsKHR(
|
||||
VkSurfaceFormatKHR* pSurfaceFormats)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
|
||||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
|
||||
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
|
||||
|
||||
return iface->get_formats(surface, &device->wsi_device, pSurfaceFormatCount,
|
||||
@@ -128,7 +128,7 @@ VkResult radv_GetPhysicalDeviceSurfacePresentModesKHR(
|
||||
VkPresentModeKHR* pPresentModes)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
|
||||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, _surface);
|
||||
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, _surface);
|
||||
struct wsi_interface *iface = device->wsi_device.wsi[surface->platform];
|
||||
|
||||
return iface->get_present_modes(surface, pPresentModeCount,
|
||||
@@ -249,7 +249,7 @@ VkResult radv_CreateSwapchainKHR(
|
||||
VkSwapchainKHR* pSwapchain)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(_VkIcdSurfaceBase, surface, pCreateInfo->surface);
|
||||
ICD_FROM_HANDLE(VkIcdSurfaceBase, surface, pCreateInfo->surface);
|
||||
struct wsi_interface *iface =
|
||||
device->instance->physicalDevice.wsi_device.wsi[surface->platform];
|
||||
struct wsi_swapchain *swapchain;
|
||||
|
@@ -371,6 +371,15 @@ void si_init_config(struct radv_physical_device *physical_device,
|
||||
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
|
||||
|
||||
if (physical_device->rad_info.chip_class >= CIK) {
|
||||
/* If this is 0, Bonaire can hang even if GS isn't being used.
|
||||
* Other chips are unaffected. These are suboptimal values,
|
||||
* but we don't use on-chip GS.
|
||||
*/
|
||||
radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
|
||||
S_028A44_ES_VERTS_PER_SUBGRP(64) |
|
||||
S_028A44_GS_PRIMS_PER_SUBGRP(4));
|
||||
|
||||
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
|
||||
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
|
||||
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
|
||||
radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
|
||||
@@ -383,7 +392,6 @@ void si_init_config(struct radv_physical_device *physical_device,
|
||||
*
|
||||
* LATE_ALLOC_VS = 2 is the highest safe number.
|
||||
*/
|
||||
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
|
||||
radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xffff));
|
||||
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(2));
|
||||
} else {
|
||||
@@ -392,7 +400,6 @@ void si_init_config(struct radv_physical_device *physical_device,
|
||||
* - VS can't execute on CU0.
|
||||
* - If HS writes outputs to LDS, LS can't execute on CU0.
|
||||
*/
|
||||
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xfffe));
|
||||
radeon_set_sh_reg(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, S_00B118_CU_EN(0xfffe));
|
||||
radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(31));
|
||||
}
|
||||
|
@@ -274,6 +274,19 @@ static void radv_set_micro_tile_mode(struct radeon_surf *surf,
|
||||
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
|
||||
}
|
||||
|
||||
static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
|
||||
{
|
||||
unsigned index, tileb;
|
||||
|
||||
tileb = 8 * 8 * surf->bpe;
|
||||
tileb = MIN2(surf->tile_split, tileb);
|
||||
|
||||
for (index = 0; tileb > 64; index++)
|
||||
tileb >>= 1;
|
||||
|
||||
assert(index < 16);
|
||||
return index;
|
||||
}
|
||||
|
||||
static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
|
||||
struct radeon_surf *surf)
|
||||
@@ -435,6 +448,7 @@ static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
|
||||
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
|
||||
else
|
||||
AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
|
||||
AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -69,6 +69,7 @@ glsl_tests_cache_test_LDADD = \
|
||||
$(PTHREAD_LIBS)
|
||||
|
||||
glsl_tests_general_ir_test_SOURCES = \
|
||||
glsl/tests/array_refcount_test.cpp \
|
||||
glsl/tests/builtin_variable_test.cpp \
|
||||
glsl/tests/invalidate_locations_test.cpp \
|
||||
glsl/tests/general_ir_test.cpp \
|
||||
|
@@ -28,6 +28,8 @@ LIBGLSL_FILES = \
|
||||
glsl/glsl_to_nir.cpp \
|
||||
glsl/glsl_to_nir.h \
|
||||
glsl/hir_field_selection.cpp \
|
||||
glsl/ir_array_refcount.cpp \
|
||||
glsl/ir_array_refcount.h \
|
||||
glsl/ir_basic_block.cpp \
|
||||
glsl/ir_basic_block.h \
|
||||
glsl/ir_builder.cpp \
|
||||
|
254
src/compiler/glsl/ir_array_refcount.cpp
Normal file
254
src/compiler/glsl/ir_array_refcount.cpp
Normal file
@@ -0,0 +1,254 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file ir_array_refcount.cpp
|
||||
*
|
||||
* Provides a visitor which produces a list of variables referenced.
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "ir_array_refcount.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "util/hash_table.h"
|
||||
|
||||
ir_array_refcount_visitor::ir_array_refcount_visitor()
|
||||
: last_array_deref(0), derefs(0), num_derefs(0), derefs_size(0)
|
||||
{
|
||||
this->mem_ctx = ralloc_context(NULL);
|
||||
this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
}
|
||||
|
||||
static void
|
||||
free_entry(struct hash_entry *entry)
|
||||
{
|
||||
ir_array_refcount_entry *ivre = (ir_array_refcount_entry *) entry->data;
|
||||
delete ivre;
|
||||
}
|
||||
|
||||
ir_array_refcount_visitor::~ir_array_refcount_visitor()
|
||||
{
|
||||
ralloc_free(this->mem_ctx);
|
||||
_mesa_hash_table_destroy(this->ht, free_entry);
|
||||
}
|
||||
|
||||
ir_array_refcount_entry::ir_array_refcount_entry(ir_variable *var)
|
||||
: var(var), is_referenced(false)
|
||||
{
|
||||
num_bits = MAX2(1, var->type->arrays_of_arrays_size());
|
||||
bits = new BITSET_WORD[BITSET_WORDS(num_bits)];
|
||||
memset(bits, 0, BITSET_WORDS(num_bits) * sizeof(bits[0]));
|
||||
|
||||
/* Count the "depth" of the arrays-of-arrays. */
|
||||
array_depth = 0;
|
||||
for (const glsl_type *type = var->type;
|
||||
type->is_array();
|
||||
type = type->fields.array) {
|
||||
array_depth++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ir_array_refcount_entry::~ir_array_refcount_entry()
|
||||
{
|
||||
delete [] bits;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count)
|
||||
{
|
||||
if (count != array_depth)
|
||||
return;
|
||||
|
||||
mark_array_elements_referenced(dr, count, 1, 0);
|
||||
}
|
||||
|
||||
void
|
||||
ir_array_refcount_entry::mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count,
|
||||
unsigned scale,
|
||||
unsigned linearized_index)
|
||||
{
|
||||
/* Walk through the list of array dereferences in least- to
|
||||
* most-significant order. Along the way, accumulate the current
|
||||
* linearized offset and the scale factor for each array-of-.
|
||||
*/
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
if (dr[i].index < dr[i].size) {
|
||||
linearized_index += dr[i].index * scale;
|
||||
scale *= dr[i].size;
|
||||
} else {
|
||||
/* For each element in the current array, update the count and
|
||||
* offset, then recurse to process the remaining arrays.
|
||||
*
|
||||
* There is some inefficency here if the last element in the
|
||||
* array_deref_range list specifies the entire array. In that case,
|
||||
* the loop will make recursive calls with count == 0. In the call,
|
||||
* all that will happen is the bit will be set.
|
||||
*/
|
||||
for (unsigned j = 0; j < dr[i].size; j++) {
|
||||
mark_array_elements_referenced(&dr[i + 1],
|
||||
count - (i + 1),
|
||||
scale * dr[i].size,
|
||||
linearized_index + (j * scale));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
BITSET_SET(bits, linearized_index);
|
||||
}
|
||||
|
||||
ir_array_refcount_entry *
|
||||
ir_array_refcount_visitor::get_variable_entry(ir_variable *var)
|
||||
{
|
||||
assert(var);
|
||||
|
||||
struct hash_entry *e = _mesa_hash_table_search(this->ht, var);
|
||||
if (e)
|
||||
return (ir_array_refcount_entry *)e->data;
|
||||
|
||||
ir_array_refcount_entry *entry = new ir_array_refcount_entry(var);
|
||||
_mesa_hash_table_insert(this->ht, var, entry);
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
|
||||
array_deref_range *
|
||||
ir_array_refcount_visitor::get_array_deref()
|
||||
{
|
||||
if ((num_derefs + 1) * sizeof(array_deref_range) > derefs_size) {
|
||||
void *ptr = reralloc_size(mem_ctx, derefs, derefs_size + 4096);
|
||||
|
||||
if (ptr == NULL)
|
||||
return NULL;
|
||||
|
||||
derefs_size += 4096;
|
||||
derefs = (array_deref_range *)ptr;
|
||||
}
|
||||
|
||||
array_deref_range *d = &derefs[num_derefs];
|
||||
num_derefs++;
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
ir_visitor_status
|
||||
ir_array_refcount_visitor::visit_enter(ir_dereference_array *ir)
|
||||
{
|
||||
/* It could also be a vector or a matrix. Individual elements of vectors
|
||||
* are natrices are not tracked, so bail.
|
||||
*/
|
||||
if (!ir->array->type->is_array())
|
||||
return visit_continue;
|
||||
|
||||
/* If this array dereference is a child of an array dereference that was
|
||||
* already visited, just continue on. Otherwise, for an arrays-of-arrays
|
||||
* dereference like x[1][2][3][4], we'd process the [1][2][3][4] sequence,
|
||||
* the [1][2][3] sequence, the [1][2] sequence, and the [1] sequence. This
|
||||
* ensures that we only process the full sequence.
|
||||
*/
|
||||
if (last_array_deref && last_array_deref->array == ir) {
|
||||
last_array_deref = ir;
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
last_array_deref = ir;
|
||||
|
||||
num_derefs = 0;
|
||||
|
||||
ir_rvalue *rv = ir;
|
||||
while (rv->ir_type == ir_type_dereference_array) {
|
||||
ir_dereference_array *const deref = rv->as_dereference_array();
|
||||
|
||||
assert(deref != NULL);
|
||||
assert(deref->array->type->is_array());
|
||||
|
||||
ir_rvalue *const array = deref->array;
|
||||
const ir_constant *const idx = deref->array_index->as_constant();
|
||||
array_deref_range *const dr = get_array_deref();
|
||||
|
||||
dr->size = array->type->array_size();
|
||||
|
||||
if (idx != NULL) {
|
||||
dr->index = idx->get_int_component(0);
|
||||
} else {
|
||||
/* An unsized array can occur at the end of an SSBO. We can't track
|
||||
* accesses to such an array, so bail.
|
||||
*/
|
||||
if (array->type->array_size() == 0)
|
||||
return visit_continue;
|
||||
|
||||
dr->index = dr->size;
|
||||
}
|
||||
|
||||
rv = array;
|
||||
}
|
||||
|
||||
ir_dereference_variable *const var_deref = rv->as_dereference_variable();
|
||||
|
||||
/* If the array being dereferenced is not a variable, bail. At the very
|
||||
* least, ir_constant and ir_dereference_record are possible.
|
||||
*/
|
||||
if (var_deref == NULL)
|
||||
return visit_continue;
|
||||
|
||||
ir_array_refcount_entry *const entry =
|
||||
this->get_variable_entry(var_deref->var);
|
||||
|
||||
if (entry == NULL)
|
||||
return visit_stop;
|
||||
|
||||
entry->mark_array_elements_referenced(derefs, num_derefs);
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
|
||||
ir_visitor_status
|
||||
ir_array_refcount_visitor::visit(ir_dereference_variable *ir)
|
||||
{
|
||||
ir_variable *const var = ir->variable_referenced();
|
||||
ir_array_refcount_entry *entry = this->get_variable_entry(var);
|
||||
|
||||
entry->is_referenced = true;
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
|
||||
ir_visitor_status
|
||||
ir_array_refcount_visitor::visit_enter(ir_function_signature *ir)
|
||||
{
|
||||
/* We don't want to descend into the function parameters and
|
||||
* dead-code eliminate them, so just accept the body here.
|
||||
*/
|
||||
visit_list_elements(this, &ir->body);
|
||||
return visit_continue_with_parent;
|
||||
}
|
183
src/compiler/glsl/ir_array_refcount.h
Normal file
183
src/compiler/glsl/ir_array_refcount.h
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file ir_array_refcount.h
|
||||
*
|
||||
* Provides a visitor which produces a list of variables referenced.
|
||||
*/
|
||||
|
||||
#include "ir.h"
|
||||
#include "ir_visitor.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "util/bitset.h"
|
||||
|
||||
/**
|
||||
* Describes an access of an array element or an access of the whole array
|
||||
*/
|
||||
struct array_deref_range {
|
||||
/**
|
||||
* Index that was accessed.
|
||||
*
|
||||
* All valid array indices are less than the size of the array. If index
|
||||
* is equal to the size of the array, this means the entire array has been
|
||||
* accessed (e.g., due to use of a non-constant index).
|
||||
*/
|
||||
unsigned index;
|
||||
|
||||
/** Size of the array. Used for offset calculations. */
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
class ir_array_refcount_entry
|
||||
{
|
||||
public:
|
||||
ir_array_refcount_entry(ir_variable *var);
|
||||
~ir_array_refcount_entry();
|
||||
|
||||
ir_variable *var; /* The key: the variable's pointer. */
|
||||
|
||||
/** Has the variable been referenced? */
|
||||
bool is_referenced;
|
||||
|
||||
/**
|
||||
* Mark a set of array elements as accessed.
|
||||
*
|
||||
* If every \c array_deref_range is for a single index, only a single
|
||||
* element will be marked. If any \c array_deref_range is for an entire
|
||||
* array-of-, then multiple elements will be marked.
|
||||
*
|
||||
* Items in the \c array_deref_range list appear in least- to
|
||||
* most-significant order. This is the \b opposite order the indices
|
||||
* appear in the GLSL shader text. An array access like
|
||||
*
|
||||
* x = y[1][i][3];
|
||||
*
|
||||
* would appear as
|
||||
*
|
||||
* { { 3, n }, { m, m }, { 1, p } }
|
||||
*
|
||||
* where n, m, and p are the sizes of the arrays-of-arrays.
|
||||
*
|
||||
* The set of marked array elements can later be queried by
|
||||
* \c ::is_linearized_index_referenced.
|
||||
*
|
||||
* \param dr List of array_deref_range elements to be processed.
|
||||
* \param count Number of array_deref_range elements to be processed.
|
||||
*/
|
||||
void mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count);
|
||||
|
||||
/** Has a linearized array index been referenced? */
|
||||
bool is_linearized_index_referenced(unsigned linearized_index) const
|
||||
{
|
||||
assert(bits != 0);
|
||||
assert(linearized_index <= num_bits);
|
||||
|
||||
return BITSET_TEST(bits, linearized_index);
|
||||
}
|
||||
|
||||
private:
|
||||
/** Set of bit-flags to note which array elements have been accessed. */
|
||||
BITSET_WORD *bits;
|
||||
|
||||
/**
|
||||
* Total number of bits referenced by \c bits.
|
||||
*
|
||||
* Also the total number of array(s-of-arrays) elements of \c var.
|
||||
*/
|
||||
unsigned num_bits;
|
||||
|
||||
/** Count of nested arrays in the type. */
|
||||
unsigned array_depth;
|
||||
|
||||
/**
|
||||
* Recursive part of the public mark_array_elements_referenced method.
|
||||
*
|
||||
* The recursion occurs when an entire array-of- is accessed. See the
|
||||
* implementation for more details.
|
||||
*
|
||||
* \param dr List of array_deref_range elements to be
|
||||
* processed.
|
||||
* \param count Number of array_deref_range elements to be
|
||||
* processed.
|
||||
* \param scale Current offset scale.
|
||||
* \param linearized_index Current accumulated linearized array index.
|
||||
*/
|
||||
void mark_array_elements_referenced(const array_deref_range *dr,
|
||||
unsigned count,
|
||||
unsigned scale,
|
||||
unsigned linearized_index);
|
||||
|
||||
friend class array_refcount_test;
|
||||
};
|
||||
|
||||
class ir_array_refcount_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
ir_array_refcount_visitor(void);
|
||||
~ir_array_refcount_visitor(void);
|
||||
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *);
|
||||
|
||||
virtual ir_visitor_status visit_enter(ir_function_signature *);
|
||||
virtual ir_visitor_status visit_enter(ir_dereference_array *);
|
||||
|
||||
/**
|
||||
* Find variable in the hash table, and insert it if not present
|
||||
*/
|
||||
ir_array_refcount_entry *get_variable_entry(ir_variable *var);
|
||||
|
||||
/**
|
||||
* Hash table mapping ir_variable to ir_array_refcount_entry.
|
||||
*/
|
||||
struct hash_table *ht;
|
||||
|
||||
void *mem_ctx;
|
||||
|
||||
private:
|
||||
/** Get an array_deref_range element from private tracking. */
|
||||
array_deref_range *get_array_deref();
|
||||
|
||||
/**
|
||||
* Last ir_dereference_array that was visited
|
||||
*
|
||||
* Used to prevent some redundant calculations.
|
||||
*
|
||||
* \sa ::visit_enter(ir_dereference_array *)
|
||||
*/
|
||||
ir_dereference_array *last_array_deref;
|
||||
|
||||
/**
|
||||
* \name array_deref_range tracking
|
||||
*/
|
||||
/*@{*/
|
||||
/** Currently allocated block of derefs. */
|
||||
array_deref_range *derefs;
|
||||
|
||||
/** Number of derefs used in current processing. */
|
||||
unsigned num_derefs;
|
||||
|
||||
/** Size of the derefs buffer in bytes. */
|
||||
unsigned derefs_size;
|
||||
/*@}*/
|
||||
};
|
@@ -214,67 +214,98 @@ struct block {
|
||||
bool has_instance_name;
|
||||
};
|
||||
|
||||
static void process_block_array_leaf(char **name, gl_uniform_block *blocks,
|
||||
ubo_visitor *parcel,
|
||||
gl_uniform_buffer_variable *variables,
|
||||
const struct link_uniform_block_active *const b,
|
||||
unsigned *block_index,
|
||||
unsigned *binding_offset,
|
||||
unsigned linearized_index,
|
||||
struct gl_context *ctx,
|
||||
struct gl_shader_program *prog);
|
||||
|
||||
/**
|
||||
*
|
||||
* \param first_index Value of \c block_index for the first element of the
|
||||
* array.
|
||||
*/
|
||||
static void
|
||||
process_block_array(struct uniform_block_array_elements *ub_array, char **name,
|
||||
size_t name_length, gl_uniform_block *blocks,
|
||||
ubo_visitor *parcel, gl_uniform_buffer_variable *variables,
|
||||
const struct link_uniform_block_active *const b,
|
||||
unsigned *block_index, unsigned *binding_offset,
|
||||
struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
struct gl_context *ctx, struct gl_shader_program *prog,
|
||||
unsigned first_index)
|
||||
{
|
||||
if (ub_array) {
|
||||
for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
|
||||
size_t new_length = name_length;
|
||||
for (unsigned j = 0; j < ub_array->num_array_elements; j++) {
|
||||
size_t new_length = name_length;
|
||||
|
||||
/* Append the subscript to the current variable name */
|
||||
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]",
|
||||
ub_array->array_elements[j]);
|
||||
/* Append the subscript to the current variable name */
|
||||
ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]",
|
||||
ub_array->array_elements[j]);
|
||||
|
||||
if (ub_array->array) {
|
||||
process_block_array(ub_array->array, name, new_length, blocks,
|
||||
parcel, variables, b, block_index,
|
||||
binding_offset, ctx, prog);
|
||||
binding_offset, ctx, prog, first_index);
|
||||
} else {
|
||||
process_block_array_leaf(name, blocks,
|
||||
parcel, variables, b, block_index,
|
||||
binding_offset, *block_index - first_index,
|
||||
ctx, prog);
|
||||
}
|
||||
} else {
|
||||
unsigned i = *block_index;
|
||||
const glsl_type *type = b->type->without_array();
|
||||
|
||||
blocks[i].Name = ralloc_strdup(blocks, *name);
|
||||
blocks[i].Uniforms = &variables[(*parcel).index];
|
||||
|
||||
/* The GL_ARB_shading_language_420pack spec says:
|
||||
*
|
||||
* "If the binding identifier is used with a uniform block
|
||||
* instanced as an array then the first element of the array
|
||||
* takes the specified block binding and each subsequent
|
||||
* element takes the next consecutive uniform block binding
|
||||
* point."
|
||||
*/
|
||||
blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0;
|
||||
|
||||
blocks[i].UniformBufferSize = 0;
|
||||
blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing);
|
||||
|
||||
parcel->process(type, blocks[i].Name);
|
||||
|
||||
blocks[i].UniformBufferSize = parcel->buffer_size;
|
||||
|
||||
/* Check SSBO size is lower than maximum supported size for SSBO */
|
||||
if (b->is_shader_storage &&
|
||||
parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
|
||||
linker_error(prog, "shader storage block `%s' has size %d, "
|
||||
"which is larger than than the maximum allowed (%d)",
|
||||
b->type->name,
|
||||
parcel->buffer_size,
|
||||
ctx->Const.MaxShaderStorageBlockSize);
|
||||
}
|
||||
blocks[i].NumUniforms =
|
||||
(unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms);
|
||||
|
||||
*block_index = *block_index + 1;
|
||||
*binding_offset = *binding_offset + 1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
process_block_array_leaf(char **name,
|
||||
gl_uniform_block *blocks,
|
||||
ubo_visitor *parcel, gl_uniform_buffer_variable *variables,
|
||||
const struct link_uniform_block_active *const b,
|
||||
unsigned *block_index, unsigned *binding_offset,
|
||||
unsigned linearized_index,
|
||||
struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
{
|
||||
unsigned i = *block_index;
|
||||
const glsl_type *type = b->type->without_array();
|
||||
|
||||
blocks[i].Name = ralloc_strdup(blocks, *name);
|
||||
blocks[i].Uniforms = &variables[(*parcel).index];
|
||||
|
||||
/* The GL_ARB_shading_language_420pack spec says:
|
||||
*
|
||||
* "If the binding identifier is used with a uniform block instanced as
|
||||
* an array then the first element of the array takes the specified
|
||||
* block binding and each subsequent element takes the next consecutive
|
||||
* uniform block binding point."
|
||||
*/
|
||||
blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0;
|
||||
|
||||
blocks[i].UniformBufferSize = 0;
|
||||
blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing);
|
||||
blocks[i].linearized_array_index = linearized_index;
|
||||
|
||||
parcel->process(type, blocks[i].Name);
|
||||
|
||||
blocks[i].UniformBufferSize = parcel->buffer_size;
|
||||
|
||||
/* Check SSBO size is lower than maximum supported size for SSBO */
|
||||
if (b->is_shader_storage &&
|
||||
parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) {
|
||||
linker_error(prog, "shader storage block `%s' has size %d, "
|
||||
"which is larger than than the maximum allowed (%d)",
|
||||
b->type->name,
|
||||
parcel->buffer_size,
|
||||
ctx->Const.MaxShaderStorageBlockSize);
|
||||
}
|
||||
blocks[i].NumUniforms =
|
||||
(unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms);
|
||||
|
||||
*block_index = *block_index + 1;
|
||||
*binding_offset = *binding_offset + 1;
|
||||
}
|
||||
|
||||
/* This function resizes the array types of the block so that later we can use
|
||||
* this new size to correctly calculate the offest for indirect indexing.
|
||||
*/
|
||||
@@ -351,7 +382,8 @@ create_buffer_blocks(void *mem_ctx, struct gl_context *ctx,
|
||||
|
||||
assert(b->has_instance_name);
|
||||
process_block_array(b->array, &name, name_length, blocks, &parcel,
|
||||
variables, b, &i, &binding_offset, ctx, prog);
|
||||
variables, b, &i, &binding_offset, ctx, prog,
|
||||
i);
|
||||
ralloc_free(name);
|
||||
} else {
|
||||
blocks[i].Name = ralloc_strdup(blocks, block_type->name);
|
||||
|
@@ -28,6 +28,7 @@
|
||||
#include "glsl_symbol_table.h"
|
||||
#include "program.h"
|
||||
#include "util/string_to_uint_map.h"
|
||||
#include "ir_array_refcount.h"
|
||||
|
||||
/**
|
||||
* \file link_uniforms.cpp
|
||||
@@ -882,6 +883,15 @@ public:
|
||||
unsigned shader_shadow_samplers;
|
||||
};
|
||||
|
||||
static bool
|
||||
variable_is_referenced(ir_array_refcount_visitor &v, ir_variable *var)
|
||||
{
|
||||
ir_array_refcount_entry *const entry = v.get_variable_entry(var);
|
||||
|
||||
return entry->is_referenced;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Walks the IR and update the references to uniform blocks in the
|
||||
* ir_variables to point at linked shader's list (previously, they
|
||||
@@ -889,8 +899,13 @@ public:
|
||||
* shaders).
|
||||
*/
|
||||
static void
|
||||
link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
|
||||
link_update_uniform_buffer_variables(struct gl_linked_shader *shader,
|
||||
unsigned stage)
|
||||
{
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
v.run(shader->ir);
|
||||
|
||||
foreach_in_list(ir_instruction, node, shader->ir) {
|
||||
ir_variable *const var = node->as_variable();
|
||||
|
||||
@@ -900,7 +915,48 @@ link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
|
||||
assert(var->data.mode == ir_var_uniform ||
|
||||
var->data.mode == ir_var_shader_storage);
|
||||
|
||||
unsigned num_blocks = var->data.mode == ir_var_uniform ?
|
||||
shader->NumUniformBlocks : shader->NumShaderStorageBlocks;
|
||||
struct gl_uniform_block **blks = var->data.mode == ir_var_uniform ?
|
||||
shader->UniformBlocks : shader->ShaderStorageBlocks;
|
||||
|
||||
if (var->is_interface_instance()) {
|
||||
const ir_array_refcount_entry *const entry = v.get_variable_entry(var);
|
||||
|
||||
if (entry->is_referenced) {
|
||||
/* Since this is an interface instance, the instance type will be
|
||||
* same as the array-stripped variable type. If the variable type
|
||||
* is an array, then the block names will be suffixed with [0]
|
||||
* through [n-1]. Unlike for non-interface instances, there will
|
||||
* not be structure types here, so the only name sentinel that we
|
||||
* have to worry about is [.
|
||||
*/
|
||||
assert(var->type->without_array() == var->get_interface_type());
|
||||
const char sentinel = var->type->is_array() ? '[' : '\0';
|
||||
|
||||
const ptrdiff_t len = strlen(var->get_interface_type()->name);
|
||||
for (unsigned i = 0; i < num_blocks; i++) {
|
||||
const char *const begin = blks[i]->Name;
|
||||
const char *const end = strchr(begin, sentinel);
|
||||
|
||||
if (end == NULL)
|
||||
continue;
|
||||
|
||||
if (len != (end - begin))
|
||||
continue;
|
||||
|
||||
/* Even when a match is found, do not "break" here. This could
|
||||
* be an array of instances, and all elements of the array need
|
||||
* to be marked as referenced.
|
||||
*/
|
||||
if (strncmp(begin, var->get_interface_type()->name, len) == 0 &&
|
||||
(!var->type->is_array() ||
|
||||
entry->is_linearized_index_referenced(blks[i]->linearized_array_index))) {
|
||||
blks[i]->stageref |= 1U << stage;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var->data.location = 0;
|
||||
continue;
|
||||
}
|
||||
@@ -915,11 +971,6 @@ link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
|
||||
sentinel = '[';
|
||||
}
|
||||
|
||||
unsigned num_blocks = var->data.mode == ir_var_uniform ?
|
||||
shader->NumUniformBlocks : shader->NumShaderStorageBlocks;
|
||||
struct gl_uniform_block **blks = var->data.mode == ir_var_uniform ?
|
||||
shader->UniformBlocks : shader->ShaderStorageBlocks;
|
||||
|
||||
const unsigned l = strlen(var->name);
|
||||
for (unsigned i = 0; i < num_blocks; i++) {
|
||||
for (unsigned j = 0; j < blks[i]->NumUniforms; j++) {
|
||||
@@ -933,14 +984,17 @@ link_update_uniform_buffer_variables(struct gl_linked_shader *shader)
|
||||
if ((ptrdiff_t) l != (end - begin))
|
||||
continue;
|
||||
|
||||
if (strncmp(var->name, begin, l) == 0) {
|
||||
found = true;
|
||||
var->data.location = j;
|
||||
break;
|
||||
}
|
||||
} else if (!strcmp(var->name, blks[i]->Uniforms[j].Name)) {
|
||||
found = true;
|
||||
found = strncmp(var->name, begin, l) == 0;
|
||||
} else {
|
||||
found = strcmp(var->name, blks[i]->Uniforms[j].Name) == 0;
|
||||
}
|
||||
|
||||
if (found) {
|
||||
var->data.location = j;
|
||||
|
||||
if (variable_is_referenced(v, var))
|
||||
blks[i]->stageref |= 1U << stage;
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1262,7 +1316,7 @@ link_assign_uniform_locations(struct gl_shader_program *prog,
|
||||
memset(sh->SamplerUnits, 0, sizeof(sh->SamplerUnits));
|
||||
memset(sh->ImageUnits, 0, sizeof(sh->ImageUnits));
|
||||
|
||||
link_update_uniform_buffer_variables(sh);
|
||||
link_update_uniform_buffer_variables(sh, i);
|
||||
|
||||
/* Reset various per-shader target counts.
|
||||
*/
|
||||
|
@@ -1183,11 +1183,10 @@ interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog,
|
||||
if (stage_index != -1) {
|
||||
struct gl_linked_shader *sh = prog->_LinkedShaders[i];
|
||||
|
||||
blks[j].stageref |= (1 << i);
|
||||
|
||||
struct gl_uniform_block **sh_blks = validate_ssbo ?
|
||||
sh->ShaderStorageBlocks : sh->UniformBlocks;
|
||||
|
||||
blks[j].stageref |= sh_blks[stage_index]->stageref;
|
||||
sh_blks[stage_index] = &blks[j];
|
||||
}
|
||||
}
|
||||
|
@@ -128,7 +128,7 @@ ir_call::generate_inline(ir_instruction *next_ir)
|
||||
parameters[i] = NULL;
|
||||
} else {
|
||||
parameters[i] = sig_param->clone(ctx, ht);
|
||||
parameters[i]->data.mode = ir_var_auto;
|
||||
parameters[i]->data.mode = ir_var_temporary;
|
||||
|
||||
/* Remove the read-only decoration because we're going to write
|
||||
* directly to this variable. If the cloned variable is left
|
||||
|
@@ -355,7 +355,7 @@ ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
|
||||
*/
|
||||
if (!is_redundant && limits[i].low && baserange.high) {
|
||||
cr = compare_components(limits[i].low, baserange.high);
|
||||
if (cr >= EQUAL && cr != MIXED)
|
||||
if (cr > EQUAL && cr != MIXED)
|
||||
is_redundant = true;
|
||||
}
|
||||
} else {
|
||||
@@ -373,7 +373,7 @@ ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
|
||||
*/
|
||||
if (!is_redundant && limits[i].high && baserange.low) {
|
||||
cr = compare_components(limits[i].high, baserange.low);
|
||||
if (cr <= EQUAL)
|
||||
if (cr < EQUAL)
|
||||
is_redundant = true;
|
||||
}
|
||||
}
|
||||
|
717
src/compiler/glsl/tests/array_refcount_test.cpp
Normal file
717
src/compiler/glsl/tests/array_refcount_test.cpp
Normal file
@@ -0,0 +1,717 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#include <gtest/gtest.h>
|
||||
#include "ir.h"
|
||||
#include "ir_array_refcount.h"
|
||||
#include "ir_builder.h"
|
||||
#include "util/hash_table.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
class array_refcount_test : public ::testing::Test {
|
||||
public:
|
||||
virtual void SetUp();
|
||||
virtual void TearDown();
|
||||
|
||||
exec_list instructions;
|
||||
ir_factory *body;
|
||||
void *mem_ctx;
|
||||
|
||||
/**
|
||||
* glsl_type for a vec4[3][4][5].
|
||||
*
|
||||
* The exceptionally verbose name is picked because it matches the syntax
|
||||
* of http://cdecl.org/.
|
||||
*/
|
||||
const glsl_type *array_3_of_array_4_of_array_5_of_vec4;
|
||||
|
||||
/**
|
||||
* glsl_type for a int[3].
|
||||
*
|
||||
* The exceptionally verbose name is picked because it matches the syntax
|
||||
* of http://cdecl.org/.
|
||||
*/
|
||||
const glsl_type *array_3_of_int;
|
||||
|
||||
/**
|
||||
* Wrapper to access private member "bits" of ir_array_refcount_entry
|
||||
*
|
||||
* The test class is a friend to ir_array_refcount_entry, but the
|
||||
* individual tests are not part of the class. Since the friendliness of
|
||||
* the test class does not extend to the tests, provide a wrapper.
|
||||
*/
|
||||
const BITSET_WORD *get_bits(const ir_array_refcount_entry &entry)
|
||||
{
|
||||
return entry.bits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper to access private member "num_bits" of ir_array_refcount_entry
|
||||
*
|
||||
* The test class is a friend to ir_array_refcount_entry, but the
|
||||
* individual tests are not part of the class. Since the friendliness of
|
||||
* the test class does not extend to the tests, provide a wrapper.
|
||||
*/
|
||||
unsigned get_num_bits(const ir_array_refcount_entry &entry)
|
||||
{
|
||||
return entry.num_bits;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrapper to access private member "array_depth" of ir_array_refcount_entry
|
||||
*
|
||||
* The test class is a friend to ir_array_refcount_entry, but the
|
||||
* individual tests are not part of the class. Since the friendliness of
|
||||
* the test class does not extend to the tests, provide a wrapper.
|
||||
*/
|
||||
unsigned get_array_depth(const ir_array_refcount_entry &entry)
|
||||
{
|
||||
return entry.array_depth;
|
||||
}
|
||||
};
|
||||
|
||||
void
|
||||
array_refcount_test::SetUp()
|
||||
{
|
||||
mem_ctx = ralloc_context(NULL);
|
||||
|
||||
instructions.make_empty();
|
||||
body = new ir_factory(&instructions, mem_ctx);
|
||||
|
||||
/* The type of vec4 x[3][4][5]; */
|
||||
const glsl_type *const array_5_of_vec4 =
|
||||
glsl_type::get_array_instance(glsl_type::vec4_type, 5);
|
||||
const glsl_type *const array_4_of_array_5_of_vec4 =
|
||||
glsl_type::get_array_instance(array_5_of_vec4, 4);
|
||||
array_3_of_array_4_of_array_5_of_vec4 =
|
||||
glsl_type::get_array_instance(array_4_of_array_5_of_vec4, 3);
|
||||
|
||||
array_3_of_int = glsl_type::get_array_instance(glsl_type::int_type, 3);
|
||||
}
|
||||
|
||||
void
|
||||
array_refcount_test::TearDown()
|
||||
{
|
||||
delete body;
|
||||
body = NULL;
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
mem_ctx = NULL;
|
||||
}
|
||||
|
||||
static operand
|
||||
deref_array(operand array, operand index)
|
||||
{
|
||||
void *mem_ctx = ralloc_parent(array.val);
|
||||
|
||||
ir_rvalue *val = new(mem_ctx) ir_dereference_array(array.val, index.val);
|
||||
|
||||
return operand(val);
|
||||
}
|
||||
|
||||
static operand
|
||||
deref_struct(operand s, const char *field)
|
||||
{
|
||||
void *mem_ctx = ralloc_parent(s.val);
|
||||
|
||||
ir_rvalue *val = new(mem_ctx) ir_dereference_record(s.val, field);
|
||||
|
||||
return operand(val);
|
||||
}
|
||||
|
||||
/**
|
||||
* Verify that only the specified set of ir_variables exists in the hash table
|
||||
*/
|
||||
static void
|
||||
validate_variables_in_hash_table(struct hash_table *ht,
|
||||
unsigned count,
|
||||
...)
|
||||
{
|
||||
ir_variable **vars = new ir_variable *[count];
|
||||
va_list args;
|
||||
|
||||
/* Make a copy of the list of expected ir_variables. The copied list can
|
||||
* be modified during the checking.
|
||||
*/
|
||||
va_start(args, count);
|
||||
|
||||
for (unsigned i = 0; i < count; i++)
|
||||
vars[i] = va_arg(args, ir_variable *);
|
||||
|
||||
va_end(args);
|
||||
|
||||
struct hash_entry *entry;
|
||||
hash_table_foreach(ht, entry) {
|
||||
const ir_instruction *const ir = (ir_instruction *) entry->key;
|
||||
const ir_variable *const v = ir->as_variable();
|
||||
|
||||
if (v == NULL) {
|
||||
ADD_FAILURE() << "Invalid junk in hash table: ir_type = "
|
||||
<< ir->ir_type << ", address = "
|
||||
<< (void *) ir;
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned i;
|
||||
for (i = 0; i < count; i++) {
|
||||
if (vars[i] == NULL)
|
||||
continue;
|
||||
|
||||
if (vars[i] == v)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == count) {
|
||||
ADD_FAILURE() << "Invalid variable in hash table: \""
|
||||
<< v->name << "\"";
|
||||
} else {
|
||||
/* As each variable is encountered, remove it from the set. Don't
|
||||
* bother compacting the set because we don't care about
|
||||
* performance here.
|
||||
*/
|
||||
vars[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check that there's nothing left in the set. */
|
||||
for (unsigned i = 0; i < count; i++) {
|
||||
if (vars[i] != NULL) {
|
||||
ADD_FAILURE() << "Variable was not in the hash table: \""
|
||||
<< vars[i]->name << "\"";
|
||||
}
|
||||
}
|
||||
|
||||
delete [] vars;
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_scalar)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(glsl_type::int_type, "a", ir_var_auto);
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
ASSERT_NE((void *)0, get_bits(entry));
|
||||
EXPECT_FALSE(entry.is_referenced);
|
||||
EXPECT_EQ(1, get_num_bits(entry));
|
||||
EXPECT_EQ(0, get_array_depth(entry));
|
||||
EXPECT_FALSE(entry.is_linearized_index_referenced(0));
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_vector)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(glsl_type::vec4_type, "a", ir_var_auto);
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
ASSERT_NE((void *)0, get_bits(entry));
|
||||
EXPECT_FALSE(entry.is_referenced);
|
||||
EXPECT_EQ(1, get_num_bits(entry));
|
||||
EXPECT_EQ(0, get_array_depth(entry));
|
||||
EXPECT_FALSE(entry.is_linearized_index_referenced(0));
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_matrix)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(glsl_type::mat4_type, "a", ir_var_auto);
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
ASSERT_NE((void *)0, get_bits(entry));
|
||||
EXPECT_FALSE(entry.is_referenced);
|
||||
EXPECT_EQ(1, get_num_bits(entry));
|
||||
EXPECT_EQ(0, get_array_depth(entry));
|
||||
EXPECT_FALSE(entry.is_linearized_index_referenced(0));
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, ir_array_refcount_entry_initial_state_for_array)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
const unsigned total_elements = var->type->arrays_of_arrays_size();
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
ASSERT_NE((void *)0, get_bits(entry));
|
||||
EXPECT_FALSE(entry.is_referenced);
|
||||
EXPECT_EQ(total_elements, get_num_bits(entry));
|
||||
EXPECT_EQ(3, get_array_depth(entry));
|
||||
|
||||
for (unsigned i = 0; i < total_elements; i++)
|
||||
EXPECT_FALSE(entry.is_linearized_index_referenced(i)) << "index = " << i;
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, mark_array_elements_referenced_simple)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
const unsigned total_elements = var->type->arrays_of_arrays_size();
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
static const array_deref_range dr[] = {
|
||||
{ 0, 5 }, { 1, 4 }, { 2, 3 }
|
||||
};
|
||||
const unsigned accessed_element = 0 + (1 * 5) + (2 * 4 * 5);
|
||||
|
||||
entry.mark_array_elements_referenced(dr, 3);
|
||||
|
||||
for (unsigned i = 0; i < total_elements; i++)
|
||||
EXPECT_EQ(i == accessed_element, entry.is_linearized_index_referenced(i));
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, mark_array_elements_referenced_whole_first_array)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
static const array_deref_range dr[] = {
|
||||
{ 0, 5 }, { 1, 4 }, { 3, 3 }
|
||||
};
|
||||
|
||||
entry.mark_array_elements_referenced(dr, 3);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
for (unsigned k = 0; k < 5; k++) {
|
||||
const bool accessed = (j == 1) && (k == 0);
|
||||
const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
|
||||
|
||||
EXPECT_EQ(accessed,
|
||||
entry.is_linearized_index_referenced(linearized_index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, mark_array_elements_referenced_whole_second_array)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
static const array_deref_range dr[] = {
|
||||
{ 0, 5 }, { 4, 4 }, { 1, 3 }
|
||||
};
|
||||
|
||||
entry.mark_array_elements_referenced(dr, 3);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
for (unsigned k = 0; k < 5; k++) {
|
||||
const bool accessed = (i == 1) && (k == 0);
|
||||
const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
|
||||
|
||||
EXPECT_EQ(accessed,
|
||||
entry.is_linearized_index_referenced(linearized_index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, mark_array_elements_referenced_whole_third_array)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
static const array_deref_range dr[] = {
|
||||
{ 5, 5 }, { 2, 4 }, { 1, 3 }
|
||||
};
|
||||
|
||||
entry.mark_array_elements_referenced(dr, 3);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
for (unsigned k = 0; k < 5; k++) {
|
||||
const bool accessed = (i == 1) && (j == 2);
|
||||
const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
|
||||
|
||||
EXPECT_EQ(accessed,
|
||||
entry.is_linearized_index_referenced(linearized_index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, mark_array_elements_referenced_whole_first_and_third_arrays)
|
||||
{
|
||||
ir_variable *const var =
|
||||
new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
|
||||
ir_array_refcount_entry entry(var);
|
||||
|
||||
static const array_deref_range dr[] = {
|
||||
{ 5, 5 }, { 3, 4 }, { 3, 3 }
|
||||
};
|
||||
|
||||
entry.mark_array_elements_referenced(dr, 3);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
for (unsigned k = 0; k < 5; k++) {
|
||||
const bool accessed = (j == 3);
|
||||
const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
|
||||
|
||||
EXPECT_EQ(accessed,
|
||||
entry.is_linearized_index_referenced(linearized_index));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, do_not_process_vector_indexing)
|
||||
{
|
||||
/* Vectors and matrices can also be indexed in much the same manner as
|
||||
* arrays. The visitor should not try to track per-element accesses to
|
||||
* these types.
|
||||
*/
|
||||
ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::float_type,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
ir_variable *var_b = new(mem_ctx) ir_variable(glsl_type::int_type,
|
||||
"b",
|
||||
ir_var_auto);
|
||||
ir_variable *var_c = new(mem_ctx) ir_variable(glsl_type::vec4_type,
|
||||
"c",
|
||||
ir_var_auto);
|
||||
|
||||
body->emit(assign(var_a, deref_array(var_c, var_b)));
|
||||
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
visit_list_elements(&v, &instructions);
|
||||
|
||||
ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
|
||||
ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
|
||||
ir_array_refcount_entry *entry_c = v.get_variable_entry(var_c);
|
||||
|
||||
EXPECT_TRUE(entry_a->is_referenced);
|
||||
EXPECT_TRUE(entry_b->is_referenced);
|
||||
EXPECT_TRUE(entry_c->is_referenced);
|
||||
|
||||
/* As validated by previous tests, for non-array types, num_bits is 1. */
|
||||
ASSERT_EQ(1, get_num_bits(*entry_c));
|
||||
EXPECT_FALSE(entry_c->is_linearized_index_referenced(0));
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, do_not_process_matrix_indexing)
|
||||
{
|
||||
/* Vectors and matrices can also be indexed in much the same manner as
|
||||
* arrays. The visitor should not try to track per-element accesses to
|
||||
* these types.
|
||||
*/
|
||||
ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
ir_variable *var_b = new(mem_ctx) ir_variable(glsl_type::int_type,
|
||||
"b",
|
||||
ir_var_auto);
|
||||
ir_variable *var_c = new(mem_ctx) ir_variable(glsl_type::mat4_type,
|
||||
"c",
|
||||
ir_var_auto);
|
||||
|
||||
body->emit(assign(var_a, deref_array(var_c, var_b)));
|
||||
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
visit_list_elements(&v, &instructions);
|
||||
|
||||
ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
|
||||
ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
|
||||
ir_array_refcount_entry *entry_c = v.get_variable_entry(var_c);
|
||||
|
||||
EXPECT_TRUE(entry_a->is_referenced);
|
||||
EXPECT_TRUE(entry_b->is_referenced);
|
||||
EXPECT_TRUE(entry_c->is_referenced);
|
||||
|
||||
/* As validated by previous tests, for non-array types, num_bits is 1. */
|
||||
ASSERT_EQ(1, get_num_bits(*entry_c));
|
||||
EXPECT_FALSE(entry_c->is_linearized_index_referenced(0));
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, do_not_process_array_inside_structure)
|
||||
{
|
||||
/* Structures can contain arrays. The visitor should not try to track
|
||||
* per-element accesses to arrays contained inside structures.
|
||||
*/
|
||||
const glsl_struct_field fields[] = {
|
||||
glsl_struct_field(array_3_of_int, "i"),
|
||||
};
|
||||
|
||||
const glsl_type *const record_of_array_3_of_int =
|
||||
glsl_type::get_record_instance(fields, ARRAY_SIZE(fields), "S");
|
||||
|
||||
ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::int_type,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
|
||||
ir_variable *var_b = new(mem_ctx) ir_variable(record_of_array_3_of_int,
|
||||
"b",
|
||||
ir_var_auto);
|
||||
|
||||
/* a = b.i[2] */
|
||||
body->emit(assign(var_a,
|
||||
deref_array(
|
||||
deref_struct(var_b, "i"),
|
||||
body->constant(int(2)))));
|
||||
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
visit_list_elements(&v, &instructions);
|
||||
|
||||
ir_array_refcount_entry *entry_a = v.get_variable_entry(var_a);
|
||||
ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
|
||||
|
||||
EXPECT_TRUE(entry_a->is_referenced);
|
||||
EXPECT_TRUE(entry_b->is_referenced);
|
||||
|
||||
ASSERT_EQ(1, get_num_bits(*entry_b));
|
||||
EXPECT_FALSE(entry_b->is_linearized_index_referenced(0));
|
||||
|
||||
validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, visit_simple_indexing)
|
||||
{
|
||||
ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"b",
|
||||
ir_var_auto);
|
||||
|
||||
/* a = b[2][1][0] */
|
||||
body->emit(assign(var_a,
|
||||
deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, body->constant(int(2))),
|
||||
body->constant(int(1))),
|
||||
body->constant(int(0)))));
|
||||
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
visit_list_elements(&v, &instructions);
|
||||
|
||||
const unsigned accessed_element = 0 + (1 * 5) + (2 * 4 * 5);
|
||||
ir_array_refcount_entry *entry_b = v.get_variable_entry(var_b);
|
||||
const unsigned total_elements = var_b->type->arrays_of_arrays_size();
|
||||
|
||||
for (unsigned i = 0; i < total_elements; i++)
|
||||
EXPECT_EQ(i == accessed_element, entry_b->is_linearized_index_referenced(i)) <<
|
||||
"i = " << i;
|
||||
|
||||
validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, visit_whole_second_array_indexing)
|
||||
{
|
||||
ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"b",
|
||||
ir_var_auto);
|
||||
ir_variable *var_i = new(mem_ctx) ir_variable(glsl_type::int_type,
|
||||
"i",
|
||||
ir_var_auto);
|
||||
|
||||
/* a = b[2][i][1] */
|
||||
body->emit(assign(var_a,
|
||||
deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, body->constant(int(2))),
|
||||
var_i),
|
||||
body->constant(int(1)))));
|
||||
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
visit_list_elements(&v, &instructions);
|
||||
|
||||
ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
for (unsigned k = 0; k < 5; k++) {
|
||||
const bool accessed = (i == 2) && (k == 1);
|
||||
const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
|
||||
|
||||
EXPECT_EQ(accessed,
|
||||
entry_b->is_linearized_index_referenced(linearized_index)) <<
|
||||
"i = " << i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validate_variables_in_hash_table(v.ht, 3, var_a, var_b, var_i);
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, visit_array_indexing_an_array)
|
||||
{
|
||||
ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::vec4_type,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
ir_variable *var_b = new(mem_ctx) ir_variable(array_3_of_array_4_of_array_5_of_vec4,
|
||||
"b",
|
||||
ir_var_auto);
|
||||
ir_variable *var_c = new(mem_ctx) ir_variable(array_3_of_int,
|
||||
"c",
|
||||
ir_var_auto);
|
||||
ir_variable *var_i = new(mem_ctx) ir_variable(glsl_type::int_type,
|
||||
"i",
|
||||
ir_var_auto);
|
||||
|
||||
/* a = b[2][3][c[i]] */
|
||||
body->emit(assign(var_a,
|
||||
deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, body->constant(int(2))),
|
||||
body->constant(int(3))),
|
||||
deref_array(var_c, var_i))));
|
||||
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
visit_list_elements(&v, &instructions);
|
||||
|
||||
ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
|
||||
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned j = 0; j < 4; j++) {
|
||||
for (unsigned k = 0; k < 5; k++) {
|
||||
const bool accessed = (i == 2) && (j == 3);
|
||||
const unsigned linearized_index = k + (j * 5) + (i * 4 * 5);
|
||||
|
||||
EXPECT_EQ(accessed,
|
||||
entry_b->is_linearized_index_referenced(linearized_index)) <<
|
||||
"array b[" << i << "][" << j << "][" << k << "], " <<
|
||||
"linear index = " << linearized_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ir_array_refcount_entry *const entry_c = v.get_variable_entry(var_c);
|
||||
|
||||
for (unsigned i = 0; i < var_c->type->array_size(); i++) {
|
||||
EXPECT_EQ(true, entry_c->is_linearized_index_referenced(i)) <<
|
||||
"array c, i = " << i;
|
||||
}
|
||||
|
||||
validate_variables_in_hash_table(v.ht, 4, var_a, var_b, var_c, var_i);
|
||||
}
|
||||
|
||||
TEST_F(array_refcount_test, visit_array_indexing_with_itself)
|
||||
{
|
||||
const glsl_type *const array_2_of_array_3_of_int =
|
||||
glsl_type::get_array_instance(array_3_of_int, 2);
|
||||
|
||||
const glsl_type *const array_2_of_array_2_of_array_3_of_int =
|
||||
glsl_type::get_array_instance(array_2_of_array_3_of_int, 2);
|
||||
|
||||
ir_variable *var_a = new(mem_ctx) ir_variable(glsl_type::int_type,
|
||||
"a",
|
||||
ir_var_auto);
|
||||
ir_variable *var_b = new(mem_ctx) ir_variable(array_2_of_array_2_of_array_3_of_int,
|
||||
"b",
|
||||
ir_var_auto);
|
||||
|
||||
/* Given GLSL code:
|
||||
*
|
||||
* int b[2][2][3];
|
||||
* a = b[ b[0][0][0] ][ b[ b[0][1][0] ][ b[1][0][0] ][1] ][2]
|
||||
*
|
||||
* b[0][0][0], b[0][1][0], and b[1][0][0] are trivially accessed.
|
||||
*
|
||||
* b[*][*][1] and b[*][*][2] are accessed.
|
||||
*
|
||||
* Only b[1][1][0] is not accessed.
|
||||
*/
|
||||
operand b000 = deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, body->constant(int(0))),
|
||||
body->constant(int(0))),
|
||||
body->constant(int(0)));
|
||||
|
||||
operand b010 = deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, body->constant(int(0))),
|
||||
body->constant(int(1))),
|
||||
body->constant(int(0)));
|
||||
|
||||
operand b100 = deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, body->constant(int(1))),
|
||||
body->constant(int(0))),
|
||||
body->constant(int(0)));
|
||||
|
||||
operand b_b010_b100_1 = deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, b010),
|
||||
b100),
|
||||
body->constant(int(1)));
|
||||
|
||||
body->emit(assign(var_a,
|
||||
deref_array(
|
||||
deref_array(
|
||||
deref_array(var_b, b000),
|
||||
b_b010_b100_1),
|
||||
body->constant(int(2)))));
|
||||
|
||||
ir_array_refcount_visitor v;
|
||||
|
||||
visit_list_elements(&v, &instructions);
|
||||
|
||||
ir_array_refcount_entry *const entry_b = v.get_variable_entry(var_b);
|
||||
|
||||
for (unsigned i = 0; i < 2; i++) {
|
||||
for (unsigned j = 0; j < 2; j++) {
|
||||
for (unsigned k = 0; k < 3; k++) {
|
||||
const bool accessed = !(i == 1 && j == 1 && k == 0);
|
||||
const unsigned linearized_index = k + (j * 3) + (i * 2 * 3);
|
||||
|
||||
EXPECT_EQ(accessed,
|
||||
entry_b->is_linearized_index_referenced(linearized_index)) <<
|
||||
"array b[" << i << "][" << j << "][" << k << "], " <<
|
||||
"linear index = " << linearized_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
validate_variables_in_hash_table(v.ht, 2, var_a, var_b);
|
||||
}
|
@@ -79,22 +79,22 @@ opt_undef_vecN(nir_builder *b, nir_alu_instr *alu)
|
||||
{
|
||||
if (alu->op != nir_op_vec2 &&
|
||||
alu->op != nir_op_vec3 &&
|
||||
alu->op != nir_op_vec4)
|
||||
alu->op != nir_op_vec4 &&
|
||||
alu->op != nir_op_fmov &&
|
||||
alu->op != nir_op_imov)
|
||||
return false;
|
||||
|
||||
assert(alu->dest.dest.is_ssa);
|
||||
|
||||
unsigned num_components = nir_op_infos[alu->op].num_inputs;
|
||||
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
|
||||
if (!alu->src[i].src.is_ssa ||
|
||||
alu->src[i].src.ssa->parent_instr->type != nir_instr_type_ssa_undef)
|
||||
return false;
|
||||
}
|
||||
|
||||
b->cursor = nir_before_instr(&alu->instr);
|
||||
nir_ssa_def *undef =
|
||||
nir_ssa_undef(b, num_components, nir_dest_bit_size(alu->dest.dest));
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, alu->dest.dest.ssa.num_components,
|
||||
nir_dest_bit_size(alu->dest.dest));
|
||||
nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(undef));
|
||||
|
||||
return true;
|
||||
|
@@ -98,6 +98,16 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
{
|
||||
uint8_t new_swizzle[4];
|
||||
|
||||
/* Searching only works on SSA values because, if it's not SSA, we can't
|
||||
* know if the value changed between one instance of that value in the
|
||||
* expression and another. Also, the replace operation will place reads of
|
||||
* that value right before the last instruction in the expression we're
|
||||
* replacing so those reads will happen after the original reads and may
|
||||
* not be valid if they're register reads.
|
||||
*/
|
||||
if (!instr->src[src].src.is_ssa)
|
||||
return false;
|
||||
|
||||
/* If the source is an explicitly sized source, then we need to reset
|
||||
* both the number of components and the swizzle.
|
||||
*/
|
||||
@@ -116,9 +126,6 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
|
||||
switch (value->type) {
|
||||
case nir_search_value_expression:
|
||||
if (!instr->src[src].src.is_ssa)
|
||||
return false;
|
||||
|
||||
if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu)
|
||||
return false;
|
||||
|
||||
@@ -131,8 +138,7 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
assert(var->variable < NIR_SEARCH_MAX_VARIABLES);
|
||||
|
||||
if (state->variables_seen & (1 << var->variable)) {
|
||||
if (!nir_srcs_equal(state->variables[var->variable].src,
|
||||
instr->src[src].src))
|
||||
if (state->variables[var->variable].src.ssa != instr->src[src].src.ssa)
|
||||
return false;
|
||||
|
||||
assert(!instr->src[src].abs && !instr->src[src].negate);
|
||||
@@ -204,43 +210,27 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
|
||||
return true;
|
||||
|
||||
case nir_type_int:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
int64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.i32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.i64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
|
||||
if (val != const_val->data.i)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
case nir_type_uint:
|
||||
case nir_type_bool32:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
uint64_t val;
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
val = load->value.u32[new_swizzle[i]];
|
||||
break;
|
||||
case 64:
|
||||
val = load->value.u64[new_swizzle[i]];
|
||||
break;
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
switch (load->def.bit_size) {
|
||||
case 32:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.u32[new_swizzle[i]] !=
|
||||
(uint32_t)const_val->data.u)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
if (val != const_val->data.u)
|
||||
return false;
|
||||
case 64:
|
||||
for (unsigned i = 0; i < num_components; ++i) {
|
||||
if (load->value.u64[new_swizzle[i]] != const_val->data.u)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("unknown bit size");
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
unreachable("Invalid alu source type");
|
||||
|
@@ -1055,16 +1055,30 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
|
||||
SpvOp opcode = get_specialization(b, val, w[3]);
|
||||
switch (opcode) {
|
||||
case SpvOpVectorShuffle: {
|
||||
struct vtn_value *v0 = vtn_value(b, w[4], vtn_value_type_constant);
|
||||
struct vtn_value *v1 = vtn_value(b, w[5], vtn_value_type_constant);
|
||||
unsigned len0 = glsl_get_vector_elements(v0->const_type);
|
||||
unsigned len1 = glsl_get_vector_elements(v1->const_type);
|
||||
struct vtn_value *v0 = &b->values[w[4]];
|
||||
struct vtn_value *v1 = &b->values[w[5]];
|
||||
|
||||
assert(v0->value_type == vtn_value_type_constant ||
|
||||
v0->value_type == vtn_value_type_undef);
|
||||
assert(v1->value_type == vtn_value_type_constant ||
|
||||
v1->value_type == vtn_value_type_undef);
|
||||
|
||||
unsigned len0 = v0->value_type == vtn_value_type_constant ?
|
||||
glsl_get_vector_elements(v0->const_type) :
|
||||
glsl_get_vector_elements(v0->type->type);
|
||||
unsigned len1 = v1->value_type == vtn_value_type_constant ?
|
||||
glsl_get_vector_elements(v1->const_type) :
|
||||
glsl_get_vector_elements(v1->type->type);
|
||||
|
||||
uint32_t u[8];
|
||||
for (unsigned i = 0; i < len0; i++)
|
||||
u[i] = v0->constant->value.u[i];
|
||||
for (unsigned i = 0; i < len1; i++)
|
||||
u[len0 + i] = v1->constant->value.u[i];
|
||||
if (v0->value_type == vtn_value_type_constant) {
|
||||
for (unsigned i = 0; i < len0; i++)
|
||||
u[i] = v0->constant->value.u[i];
|
||||
}
|
||||
if (v1->value_type == vtn_value_type_constant) {
|
||||
for (unsigned i = 0; i < len1; i++)
|
||||
u[len0 + i] = v1->constant->value.u[i];
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < count - 6; i++) {
|
||||
uint32_t comp = w[i + 6];
|
||||
@@ -2707,6 +2721,7 @@ vtn_handle_variable_or_type_instruction(struct vtn_builder *b, SpvOp opcode,
|
||||
vtn_handle_constant(b, opcode, w, count);
|
||||
break;
|
||||
|
||||
case SpvOpUndef:
|
||||
case SpvOpVariable:
|
||||
vtn_handle_variables(b, opcode, w, count);
|
||||
break;
|
||||
|
@@ -527,12 +527,13 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
|
||||
nir_variable *phi_var = phi_entry->data;
|
||||
|
||||
for (unsigned i = 3; i < count; i += 2) {
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
|
||||
struct vtn_block *pred =
|
||||
vtn_value(b, w[i + 1], vtn_value_type_block)->block;
|
||||
|
||||
b->nb.cursor = nir_after_instr(&pred->end_nop->instr);
|
||||
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
|
||||
|
||||
vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
|
||||
}
|
||||
|
||||
|
@@ -1158,6 +1158,12 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SpvOpUndef: {
|
||||
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_undef);
|
||||
val->type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
break;
|
||||
}
|
||||
|
||||
case SpvOpVariable: {
|
||||
struct vtn_variable *var = rzalloc(b, struct vtn_variable);
|
||||
var->type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
|
@@ -95,8 +95,8 @@ AM_CFLAGS += \
|
||||
-I$(top_srcdir)/src/egl/drivers/dri2 \
|
||||
-I$(top_srcdir)/src/gbm/backends/dri \
|
||||
-I$(top_srcdir)/src/egl/wayland/wayland-egl \
|
||||
-I$(top_srcdir)/src/egl/wayland/wayland-drm \
|
||||
-I$(top_builddir)/src/egl/wayland/wayland-drm \
|
||||
-I$(top_srcdir)/src/egl/wayland/wayland-drm \
|
||||
-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
|
||||
-D_EGL_BUILT_IN_DRIVER_DRI2
|
||||
|
||||
|
@@ -1081,6 +1081,7 @@ static struct dri2_egl_display_vtbl dri2_wl_display_vtbl = {
|
||||
|
||||
static const __DRIextension *dri2_loader_extensions[] = {
|
||||
&dri2_loader_extension.base,
|
||||
&image_loader_extension.base,
|
||||
&image_lookup_extension.base,
|
||||
&use_invalidate.base,
|
||||
NULL,
|
||||
@@ -1283,6 +1284,8 @@ dri2_initialize_wayland_drm(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
cleanup_registry:
|
||||
wl_registry_destroy(dri2_dpy->wl_registry);
|
||||
wl_event_queue_destroy(dri2_dpy->wl_queue);
|
||||
if (disp->PlatformDisplay == NULL)
|
||||
wl_display_disconnect(dri2_dpy->wl_dpy);
|
||||
cleanup_dpy:
|
||||
free(dri2_dpy);
|
||||
disp->DriverData = NULL;
|
||||
@@ -1742,6 +1745,8 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
|
||||
dri2_surf->format = WL_SHM_FORMAT_ARGB8888;
|
||||
|
||||
dri2_surf->wl_win = window;
|
||||
dri2_surf->wl_win->private = dri2_surf;
|
||||
dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
|
||||
|
||||
dri2_surf->base.Width = -1;
|
||||
dri2_surf->base.Height = -1;
|
||||
@@ -1924,6 +1929,8 @@ dri2_initialize_wayland_swrast(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
cleanup_registry:
|
||||
wl_registry_destroy(dri2_dpy->wl_registry);
|
||||
wl_event_queue_destroy(dri2_dpy->wl_queue);
|
||||
if (disp->PlatformDisplay == NULL)
|
||||
wl_display_disconnect(dri2_dpy->wl_dpy);
|
||||
cleanup_dpy:
|
||||
free(dri2_dpy);
|
||||
disp->DriverData = NULL;
|
||||
|
@@ -868,6 +868,9 @@ _eglCreateWindowSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
|
||||
|
||||
_EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
|
||||
|
||||
if ((conf->SurfaceType & EGL_WINDOW_BIT) == 0)
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
|
||||
|
||||
surf = drv->API.CreateWindowSurface(drv, disp, conf, native_window,
|
||||
attrib_list);
|
||||
ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
|
||||
@@ -986,6 +989,10 @@ _eglCreatePixmapSurfaceCommon(_EGLDisplay *disp, EGLConfig config,
|
||||
#endif
|
||||
|
||||
_EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
|
||||
|
||||
if ((conf->SurfaceType & EGL_PIXMAP_BIT) == 0)
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
|
||||
|
||||
surf = drv->API.CreatePixmapSurface(drv, disp, conf, native_pixmap,
|
||||
attrib_list);
|
||||
ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
|
||||
@@ -1056,6 +1063,9 @@ eglCreatePbufferSurface(EGLDisplay dpy, EGLConfig config,
|
||||
_EGL_FUNC_START(disp, EGL_OBJECT_DISPLAY_KHR, NULL, EGL_NO_SURFACE);
|
||||
_EGL_CHECK_CONFIG(disp, conf, EGL_NO_SURFACE, drv);
|
||||
|
||||
if ((conf->SurfaceType & EGL_PBUFFER_BIT) == 0)
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_NO_SURFACE);
|
||||
|
||||
surf = drv->API.CreatePbufferSurface(drv, disp, conf, attrib_list);
|
||||
ret = (surf) ? _eglLinkSurface(surf) : EGL_NO_SURFACE;
|
||||
|
||||
|
@@ -184,19 +184,33 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
|
||||
break;
|
||||
}
|
||||
|
||||
/* The EGL_KHR_create_context_spec says:
|
||||
*
|
||||
* "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
|
||||
* EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
|
||||
* access> will be created. Robust buffer access is defined in the
|
||||
* GL_ARB_robustness extension specification, and the resulting
|
||||
* context must also support either the GL_ARB_robustness
|
||||
* extension, or a version of OpenGL incorporating equivalent
|
||||
* functionality. This bit is supported for OpenGL contexts.
|
||||
*/
|
||||
if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
|
||||
(api != EGL_OPENGL_API ||
|
||||
!dpy->Extensions.EXT_create_context_robustness)) {
|
||||
api != EGL_OPENGL_API) {
|
||||
/* The EGL_KHR_create_context spec says:
|
||||
*
|
||||
* 10) Which error should be generated if robust buffer access
|
||||
* or reset notifications are requested under OpenGL ES?
|
||||
*
|
||||
* As per Issue 6, this extension does not support creating
|
||||
* robust contexts for OpenGL ES. This is only supported via
|
||||
* the EGL_EXT_create_context_robustness extension.
|
||||
*
|
||||
* Attempting to use this extension to create robust OpenGL
|
||||
* ES context will generate an EGL_BAD_ATTRIBUTE error. This
|
||||
* specific error is generated because this extension does
|
||||
* not define the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR
|
||||
* and EGL_CONTEXT_OPENGL_RESET_NOTIFICATION_STRATEGY_KHR
|
||||
* bits for OpenGL ES contexts. Thus, use of these bits fall
|
||||
* under condition described by: "If an attribute is
|
||||
* specified that is not meaningful for the client API
|
||||
* type.." in the above specification.
|
||||
*
|
||||
* The spec requires that we emit the error even if the display
|
||||
* supports EGL_EXT_create_context_robustness. To create a robust
|
||||
* GLES context, the *attribute*
|
||||
* EGL_CONTEXT_OPENGL_ROBUST_ACCESS_EXT must be used, not the
|
||||
* *flag* EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR.
|
||||
*/
|
||||
err = EGL_BAD_ATTRIBUTE;
|
||||
break;
|
||||
}
|
||||
|
@@ -1275,7 +1275,6 @@ cso_restore_fragment_samplers(struct cso_context *ctx)
|
||||
{
|
||||
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
|
||||
|
||||
info->nr_samplers = ctx->nr_fragment_samplers_saved;
|
||||
memcpy(info->samplers, ctx->fragment_samplers_saved,
|
||||
sizeof(info->samplers));
|
||||
cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
|
||||
|
@@ -149,6 +149,7 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
|
||||
break;
|
||||
case CPUFREQ_MAXIMUM:
|
||||
snprintf(gr->name, sizeof(gr->name), "%s-Max", cfi->name);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
|
@@ -672,17 +672,19 @@ iter_instruction(
|
||||
}
|
||||
}
|
||||
|
||||
switch (inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_IF:
|
||||
case TGSI_OPCODE_UIF:
|
||||
case TGSI_OPCODE_ELSE:
|
||||
case TGSI_OPCODE_BGNLOOP:
|
||||
case TGSI_OPCODE_ENDLOOP:
|
||||
case TGSI_OPCODE_CAL:
|
||||
case TGSI_OPCODE_BGNSUB:
|
||||
TXT( " :" );
|
||||
UID( inst->Label.Label );
|
||||
break;
|
||||
if (inst->Instruction.Label) {
|
||||
switch (inst->Instruction.Opcode) {
|
||||
case TGSI_OPCODE_IF:
|
||||
case TGSI_OPCODE_UIF:
|
||||
case TGSI_OPCODE_ELSE:
|
||||
case TGSI_OPCODE_BGNLOOP:
|
||||
case TGSI_OPCODE_ENDLOOP:
|
||||
case TGSI_OPCODE_CAL:
|
||||
case TGSI_OPCODE_BGNSUB:
|
||||
TXT( " :" );
|
||||
UID( inst->Label.Label );
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* update indentation */
|
||||
|
@@ -152,7 +152,7 @@ create_vert_shader(struct vl_zscan *zscan)
|
||||
for (i = 0; i < zscan->num_channels; ++i) {
|
||||
ureg_ADD(shader, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y),
|
||||
ureg_imm1f(shader, 1.0f / (zscan->blocks_per_line * VL_BLOCK_WIDTH)
|
||||
* (i - (signed)zscan->num_channels / 2)));
|
||||
* ((signed)i - (signed)zscan->num_channels / 2)));
|
||||
|
||||
ureg_MAD(shader, ureg_writemask(o_vtex[i], TGSI_WRITEMASK_X), vrect,
|
||||
ureg_imm1f(shader, 1.0f / zscan->blocks_per_line), ureg_src(tmp));
|
||||
|
@@ -9,6 +9,7 @@ AM_CFLAGS = \
|
||||
$(GALLIUM_DRIVER_CFLAGS) \
|
||||
$(FREEDRENO_CFLAGS)
|
||||
|
||||
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
|
||||
ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
|
||||
$(MKDIR_GEN)
|
||||
$(AM_V_GEN) PYTHONPATH=$(top_srcdir)/src/compiler/nir $(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/ir3/ir3_nir_trig.py > $@ || ($(RM) $@; false)
|
||||
|
@@ -234,7 +234,6 @@ batch_flush_func(void *job, int id)
|
||||
|
||||
fd_gmem_render_tiles(batch);
|
||||
batch_reset_resources(batch);
|
||||
batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -275,7 +274,6 @@ batch_flush(struct fd_batch *batch)
|
||||
} else {
|
||||
fd_gmem_render_tiles(batch);
|
||||
batch_reset_resources(batch);
|
||||
batch->ctx->last_fence = fd_ringbuffer_timestamp(batch->gmem);
|
||||
}
|
||||
|
||||
debug_assert(batch->reference.count > 0);
|
||||
|
@@ -124,7 +124,7 @@ fd_bc_fini(struct fd_batch_cache *cache)
|
||||
_mesa_hash_table_destroy(cache->ht, NULL);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
void
|
||||
fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
|
||||
{
|
||||
struct hash_entry *entry;
|
||||
@@ -150,8 +150,6 @@ fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx)
|
||||
fd_batch_sync(last_batch);
|
||||
fd_batch_reference(&last_batch, NULL);
|
||||
}
|
||||
|
||||
return ctx->last_fence;
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -62,7 +62,7 @@ struct fd_batch_cache {
|
||||
void fd_bc_init(struct fd_batch_cache *cache);
|
||||
void fd_bc_fini(struct fd_batch_cache *cache);
|
||||
|
||||
uint32_t fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx);
|
||||
void fd_bc_flush(struct fd_batch_cache *cache, struct fd_context *ctx);
|
||||
|
||||
void fd_bc_invalidate_context(struct fd_context *ctx);
|
||||
void fd_bc_invalidate_batch(struct fd_batch *batch, bool destroy);
|
||||
|
@@ -43,22 +43,15 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
|
||||
unsigned flags)
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
uint32_t timestamp;
|
||||
|
||||
if (!ctx->screen->reorder) {
|
||||
struct fd_batch *batch = NULL;
|
||||
fd_batch_reference(&batch, ctx->batch);
|
||||
fd_batch_flush(batch, true);
|
||||
timestamp = fd_ringbuffer_timestamp(batch->gmem);
|
||||
fd_batch_reference(&batch, NULL);
|
||||
fd_batch_flush(ctx->batch, true);
|
||||
} else {
|
||||
timestamp = fd_bc_flush(&ctx->screen->batch_cache, ctx);
|
||||
fd_bc_flush(&ctx->screen->batch_cache, ctx);
|
||||
}
|
||||
|
||||
if (fence) {
|
||||
fd_screen_fence_ref(pctx->screen, fence, NULL);
|
||||
*fence = fd_fence_create(pctx, timestamp);
|
||||
}
|
||||
if (fence)
|
||||
fd_fence_ref(pctx->screen, fence, ctx->last_fence);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -109,6 +102,8 @@ fd_context_destroy(struct pipe_context *pctx)
|
||||
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
|
||||
fd_bc_invalidate_context(ctx);
|
||||
|
||||
fd_fence_ref(pctx->screen, &ctx->last_fence, NULL);
|
||||
|
||||
fd_prog_fini(pctx);
|
||||
fd_hw_query_fini(pctx);
|
||||
|
||||
|
@@ -164,7 +164,7 @@ struct fd_context {
|
||||
*/
|
||||
struct fd_batch *batch;
|
||||
|
||||
uint32_t last_fence;
|
||||
struct pipe_fence_handle *last_fence;
|
||||
|
||||
/* Are we in process of shadowing a resource? Used to detect recursion
|
||||
* in transfer_map, and skip unneeded synchronization.
|
||||
|
@@ -40,7 +40,7 @@ struct pipe_fence_handle {
|
||||
};
|
||||
|
||||
void
|
||||
fd_screen_fence_ref(struct pipe_screen *pscreen,
|
||||
fd_fence_ref(struct pipe_screen *pscreen,
|
||||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *pfence)
|
||||
{
|
||||
@@ -50,7 +50,7 @@ fd_screen_fence_ref(struct pipe_screen *pscreen,
|
||||
*ptr = pfence;
|
||||
}
|
||||
|
||||
boolean fd_screen_fence_finish(struct pipe_screen *screen,
|
||||
boolean fd_fence_finish(struct pipe_screen *pscreen,
|
||||
struct pipe_context *ctx,
|
||||
struct pipe_fence_handle *fence,
|
||||
uint64_t timeout)
|
||||
@@ -61,11 +61,10 @@ boolean fd_screen_fence_finish(struct pipe_screen *screen,
|
||||
return true;
|
||||
}
|
||||
|
||||
struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
|
||||
struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx,
|
||||
uint32_t timestamp)
|
||||
{
|
||||
struct pipe_fence_handle *fence;
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
|
||||
fence = CALLOC_STRUCT(pipe_fence_handle);
|
||||
if (!fence)
|
||||
|
@@ -31,14 +31,16 @@
|
||||
|
||||
#include "pipe/p_context.h"
|
||||
|
||||
void fd_screen_fence_ref(struct pipe_screen *pscreen,
|
||||
void fd_fence_ref(struct pipe_screen *pscreen,
|
||||
struct pipe_fence_handle **ptr,
|
||||
struct pipe_fence_handle *pfence);
|
||||
boolean fd_screen_fence_finish(struct pipe_screen *screen,
|
||||
boolean fd_fence_finish(struct pipe_screen *screen,
|
||||
struct pipe_context *ctx,
|
||||
struct pipe_fence_handle *pfence,
|
||||
uint64_t timeout);
|
||||
struct pipe_fence_handle * fd_fence_create(struct pipe_context *pctx,
|
||||
|
||||
struct fd_context;
|
||||
struct pipe_fence_handle * fd_fence_create(struct fd_context *ctx,
|
||||
uint32_t timestamp);
|
||||
|
||||
#endif /* FREEDRENO_FENCE_H_ */
|
||||
|
@@ -34,6 +34,7 @@
|
||||
|
||||
#include "freedreno_gmem.h"
|
||||
#include "freedreno_context.h"
|
||||
#include "freedreno_fence.h"
|
||||
#include "freedreno_resource.h"
|
||||
#include "freedreno_query_hw.h"
|
||||
#include "freedreno_util.h"
|
||||
@@ -394,6 +395,9 @@ fd_gmem_render_tiles(struct fd_batch *batch)
|
||||
}
|
||||
|
||||
fd_ringbuffer_flush(batch->gmem);
|
||||
|
||||
fd_fence_ref(&ctx->screen->base, &ctx->last_fence, NULL);
|
||||
ctx->last_fence = fd_fence_create(ctx, fd_ringbuffer_timestamp(batch->gmem));
|
||||
}
|
||||
|
||||
/* tile needs restore if it isn't completely contained within the
|
||||
|
@@ -696,8 +696,8 @@ fd_screen_create(struct fd_device *dev)
|
||||
|
||||
pscreen->get_timestamp = fd_screen_get_timestamp;
|
||||
|
||||
pscreen->fence_reference = fd_screen_fence_ref;
|
||||
pscreen->fence_finish = fd_screen_fence_finish;
|
||||
pscreen->fence_reference = fd_fence_ref;
|
||||
pscreen->fence_finish = fd_fence_finish;
|
||||
|
||||
slab_create_parent(&screen->transfer_pool, sizeof(struct fd_transfer), 16);
|
||||
|
||||
|
@@ -73,7 +73,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) {
|
||||
if (!dec->cmds)
|
||||
return;
|
||||
|
||||
nouveau_pushbuf_space(push, 8, 2, 0);
|
||||
nouveau_pushbuf_space(push, 16, 2, 0);
|
||||
nouveau_bufctx_reset(dec->bufctx, NV31_VIDEO_BIND_CMD);
|
||||
|
||||
#define BCTX_ARGS dec->bufctx, NV31_VIDEO_BIND_CMD, NOUVEAU_BO_RD
|
||||
|
@@ -128,7 +128,7 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
|
||||
|
||||
refn.bo = mt->base.bo;
|
||||
refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
|
||||
if (nouveau_pushbuf_space(push, 16, 1, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 1, 0) ||
|
||||
nouveau_pushbuf_refn (push, &refn, 1))
|
||||
return;
|
||||
|
||||
|
@@ -431,7 +431,7 @@ nv30_transfer_rect_sifm(XFER_ARGS)
|
||||
si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
|
||||
}
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32, 6, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 64, 6, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -516,7 +516,7 @@ nv30_transfer_rect_m2mf(XFER_ARGS)
|
||||
while (h) {
|
||||
unsigned lines = (h > 2047) ? 2047 : h;
|
||||
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -708,7 +708,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
|
||||
lines = (pages > 2047) ? 2047 : pages;
|
||||
pages -= lines;
|
||||
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -732,7 +732,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
|
||||
}
|
||||
|
||||
if (size) {
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
|
@@ -295,7 +295,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
|
||||
PUSH_DATAf(push, color->f[2]);
|
||||
PUSH_DATAf(push, color->f[3]);
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
|
||||
@@ -394,7 +394,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
|
||||
mode |= NV50_3D_CLEAR_BUFFERS_S;
|
||||
}
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
|
||||
@@ -752,7 +752,7 @@ nv50_clear_buffer(struct pipe_context *pipe,
|
||||
PUSH_DATAf(push, color.f[2]);
|
||||
PUSH_DATAf(push, color.f[3]);
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
|
@@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (push, prim);
|
||||
|
||||
nouveau_pushbuf_space(push, 8, 0, 1);
|
||||
nouveau_pushbuf_space(push, 16, 0, 1);
|
||||
PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
|
||||
|
||||
switch (index_size) {
|
||||
|
@@ -273,7 +273,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
|
||||
|
||||
/* So lets test if the fence is working? */
|
||||
nouveau_pushbuf_space(push[0], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[0], 16, 1, 0);
|
||||
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[0], SUBC_BSP(0x240), 3);
|
||||
PUSH_DATAh(push[0], dec->fence_bo->offset);
|
||||
@@ -284,7 +284,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[0], 0);
|
||||
PUSH_KICK (push[0]);
|
||||
|
||||
nouveau_pushbuf_space(push[1], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[1], 16, 1, 0);
|
||||
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[1], SUBC_VP(0x240), 3);
|
||||
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
|
||||
@@ -295,7 +295,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[1], 0);
|
||||
PUSH_KICK (push[1]);
|
||||
|
||||
nouveau_pushbuf_space(push[2], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[2], 16, 1, 0);
|
||||
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[2], SUBC_PPP(0x240), 3);
|
||||
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
|
||||
|
@@ -47,7 +47,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
int ret;
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
unsigned fence_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -61,10 +60,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!dec->bitplane_bo)
|
||||
num_refs--;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
|
||||
for (i = 0; i < num_buffers; i++)
|
||||
bsp_size += num_bytes[i];
|
||||
@@ -112,7 +107,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
|
||||
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
|
||||
|
||||
nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32, num_refs, 0);
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
bsp_addr = bsp_bo->offset >> 8;
|
||||
|
@@ -93,13 +93,8 @@ nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_pushbuf *push = dec->pushbuf[2];
|
||||
unsigned ppp_caps = 0x10;
|
||||
unsigned fence_extra = 0;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
|
||||
nouveau_pushbuf_space(push, 32, 4, 0);
|
||||
|
||||
switch (codec) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG12: {
|
||||
|
@@ -76,7 +76,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
u32 fence_extra = 0, codec_extra = 0;
|
||||
u32 codec_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -88,10 +88,6 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
};
|
||||
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
|
||||
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
|
||||
codec_extra += 2;
|
||||
@@ -115,8 +111,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
|
||||
nv98_decoder_kick_ref(dec, target);
|
||||
|
||||
nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
|
||||
6 + codec_extra + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
|
||||
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
|
@@ -403,7 +403,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
|
||||
if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
|
||||
nvc0_hw_query_fifo_wait(nvc0, q);
|
||||
|
||||
nouveau_pushbuf_space(push, 16, 2, 0);
|
||||
nouveau_pushbuf_space(push, 32, 2, 0);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
|
||||
|
@@ -799,7 +799,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
|
||||
}
|
||||
|
||||
while (num_instances--) {
|
||||
nouveau_pushbuf_space(push, 9, 0, 1);
|
||||
nouveau_pushbuf_space(push, 16, 0, 1);
|
||||
BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (push, mode);
|
||||
BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
|
||||
|
@@ -297,7 +297,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
|
||||
|
||||
/* So lets test if the fence is working? */
|
||||
nouveau_pushbuf_space(push[0], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[0], 16, 1, 0);
|
||||
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
|
||||
PUSH_DATAh(push[0], dec->fence_bo->offset);
|
||||
@@ -308,7 +308,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[0], 0);
|
||||
PUSH_KICK (push[0]);
|
||||
|
||||
nouveau_pushbuf_space(push[1], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[1], 16, 1, 0);
|
||||
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
|
||||
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
|
||||
@@ -319,7 +319,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[1], 0);
|
||||
PUSH_KICK (push[1]);
|
||||
|
||||
nouveau_pushbuf_space(push[2], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[2], 16, 1, 0);
|
||||
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
|
||||
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
|
||||
|
@@ -143,7 +143,6 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
uint32_t caps;
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
unsigned fence_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -157,15 +156,11 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!dec->bitplane_bo)
|
||||
num_refs--;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
caps = nouveau_vp3_bsp_end(dec, desc);
|
||||
|
||||
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
|
||||
|
||||
nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32, num_refs, 0);
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
bsp_addr = bsp_bo->offset >> 8;
|
||||
|
@@ -93,13 +93,8 @@ nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_pushbuf *push = dec->pushbuf[2];
|
||||
unsigned ppp_caps = 0x10;
|
||||
unsigned fence_extra = 0;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
|
||||
nouveau_pushbuf_space(push, 32, 4, 0);
|
||||
|
||||
switch (codec) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG12: {
|
||||
|
@@ -76,7 +76,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
u32 fence_extra = 0, codec_extra = 0;
|
||||
u32 codec_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -88,10 +88,6 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
};
|
||||
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
|
||||
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
|
||||
codec_extra += 2;
|
||||
@@ -115,8 +111,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
|
||||
nvc0_decoder_kick_ref(dec, target);
|
||||
|
||||
nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
|
||||
6 + codec_extra + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
|
||||
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
|
@@ -816,6 +816,7 @@ nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
|
||||
debug_printf("barrier count: %u\n", desc->bar_alloc);
|
||||
debug_printf("$r count: %u\n", desc->gpr_alloc);
|
||||
debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
|
||||
debug_printf("linked tsc: %d\n", desc->linked_tsc);
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
uint64_t address;
|
||||
|
@@ -8,7 +8,10 @@ struct nve4_cp_launch_desc
|
||||
{
|
||||
u32 unk0[8];
|
||||
u32 entry;
|
||||
u32 unk9[3];
|
||||
u32 unk9[2];
|
||||
u32 unk11_0 : 30;
|
||||
u32 linked_tsc : 1;
|
||||
u32 unk11_31 : 1;
|
||||
u32 griddim_x : 31;
|
||||
u32 unk12 : 1;
|
||||
u16 griddim_y;
|
||||
@@ -48,7 +51,7 @@ nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
|
||||
memset(desc, 0, sizeof(*desc));
|
||||
|
||||
desc->unk0[7] = 0xbc000000;
|
||||
desc->unk9[2] = 0x44014000;
|
||||
desc->unk11_0 = 0x04014000;
|
||||
desc->unk47_20 = 0x300;
|
||||
}
|
||||
|
||||
|
@@ -2924,7 +2924,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
struct pipe_stream_output_info so = pipeshader->selector->so;
|
||||
struct tgsi_full_immediate *immediate;
|
||||
struct r600_shader_ctx ctx;
|
||||
struct r600_bytecode_output output[32];
|
||||
struct r600_bytecode_output output[ARRAY_SIZE(shader->output)];
|
||||
unsigned output_done, noutput;
|
||||
unsigned opcode;
|
||||
int i, j, k, r = 0;
|
||||
|
@@ -30,6 +30,18 @@
|
||||
|
||||
namespace r600_sb {
|
||||
|
||||
int dce_cleanup::run() {
|
||||
int r;
|
||||
|
||||
// Run cleanup for as long as there are unused nodes.
|
||||
do {
|
||||
nodes_changed = false;
|
||||
r = vpass::run();
|
||||
} while (r == 0 && nodes_changed);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
bool dce_cleanup::visit(node& n, bool enter) {
|
||||
if (enter) {
|
||||
} else {
|
||||
@@ -110,7 +122,18 @@ bool dce_cleanup::visit(region_node& n, bool enter) {
|
||||
void dce_cleanup::cleanup_dst(node& n) {
|
||||
if (!cleanup_dst_vec(n.dst) && remove_unused &&
|
||||
!n.dst.empty() && !(n.flags & NF_DONT_KILL) && n.parent)
|
||||
{
|
||||
// Delete use references to the removed node from the src values.
|
||||
for (vvec::iterator I = n.src.begin(), E = n.src.end(); I != E; ++I) {
|
||||
value* v = *I;
|
||||
if (v && v->def && v->uses.size())
|
||||
{
|
||||
v->remove_use(&n);
|
||||
}
|
||||
}
|
||||
n.remove();
|
||||
nodes_changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
bool dce_cleanup::visit(container_node& n, bool enter) {
|
||||
@@ -130,7 +153,7 @@ bool dce_cleanup::cleanup_dst_vec(vvec& vv) {
|
||||
if (v->gvn_source && v->gvn_source->is_dead())
|
||||
v->gvn_source = NULL;
|
||||
|
||||
if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses))
|
||||
if (v->is_dead() || (remove_unused && !v->is_rel() && !v->uses.size()))
|
||||
v = NULL;
|
||||
else
|
||||
alive = true;
|
||||
|
@@ -199,10 +199,9 @@ void gcm::td_release_val(value *v) {
|
||||
sblog << "\n";
|
||||
);
|
||||
|
||||
use_info *u = v->uses;
|
||||
while (u) {
|
||||
for (uselist::iterator I = v->uses.begin(), E = v->uses.end(); I != E; ++I) {
|
||||
use_info *u = *I;
|
||||
if (u->op->parent != &pending) {
|
||||
u = u->next;
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -212,6 +211,7 @@ void gcm::td_release_val(value *v) {
|
||||
sblog << "\n";
|
||||
);
|
||||
|
||||
assert(uses[u->op] > 0);
|
||||
if (--uses[u->op] == 0) {
|
||||
GCM_DUMP(
|
||||
sblog << "td released : ";
|
||||
@@ -222,7 +222,6 @@ void gcm::td_release_val(value *v) {
|
||||
pending.remove_node(u->op);
|
||||
ready.push_back(u->op);
|
||||
}
|
||||
u = u->next;
|
||||
}
|
||||
|
||||
}
|
||||
|
@@ -255,7 +255,7 @@ void container_node::expand() {
|
||||
void node::remove() {parent->remove_node(this);
|
||||
}
|
||||
|
||||
value_hash node::hash_src() {
|
||||
value_hash node::hash_src() const {
|
||||
|
||||
value_hash h = 12345;
|
||||
|
||||
@@ -269,7 +269,7 @@ value_hash node::hash_src() {
|
||||
}
|
||||
|
||||
|
||||
value_hash node::hash() {
|
||||
value_hash node::hash() const {
|
||||
|
||||
if (parent && parent->subtype == NST_LOOP_PHI_CONTAINER)
|
||||
return 47451;
|
||||
|
@@ -446,15 +446,16 @@ enum use_kind {
|
||||
};
|
||||
|
||||
struct use_info {
|
||||
use_info *next;
|
||||
node *op;
|
||||
use_kind kind;
|
||||
int arg;
|
||||
|
||||
use_info(node *n, use_kind kind, int arg, use_info* next)
|
||||
: next(next), op(n), kind(kind), arg(arg) {}
|
||||
use_info(node *n, use_kind kind, int arg)
|
||||
: op(n), kind(kind), arg(arg) {}
|
||||
};
|
||||
|
||||
typedef std::list< use_info * > uselist;
|
||||
|
||||
enum constraint_kind {
|
||||
CK_SAME_REG,
|
||||
CK_PACKED_BS,
|
||||
@@ -498,7 +499,7 @@ public:
|
||||
value_hash ghash;
|
||||
|
||||
node *def, *adef;
|
||||
use_info *uses;
|
||||
uselist uses;
|
||||
|
||||
ra_constraint *constraint;
|
||||
ra_chunk *chunk;
|
||||
@@ -585,6 +586,7 @@ public:
|
||||
}
|
||||
|
||||
void add_use(node *n, use_kind kind, int arg);
|
||||
void remove_use(const node *n);
|
||||
|
||||
value_hash hash();
|
||||
value_hash rel_hash();
|
||||
@@ -790,8 +792,8 @@ public:
|
||||
void replace_with(node *n);
|
||||
void remove();
|
||||
|
||||
virtual value_hash hash();
|
||||
value_hash hash_src();
|
||||
virtual value_hash hash() const;
|
||||
value_hash hash_src() const;
|
||||
|
||||
virtual bool fold_dispatch(expr_handler *ex);
|
||||
|
||||
|
@@ -124,7 +124,9 @@ class dce_cleanup : public vpass {
|
||||
public:
|
||||
|
||||
dce_cleanup(shader &s) : vpass(s),
|
||||
remove_unused(s.dce_flags & DF_REMOVE_UNUSED) {}
|
||||
remove_unused(s.dce_flags & DF_REMOVE_UNUSED), nodes_changed(false) {}
|
||||
|
||||
virtual int run();
|
||||
|
||||
virtual bool visit(node &n, bool enter);
|
||||
virtual bool visit(alu_group_node &n, bool enter);
|
||||
@@ -140,6 +142,8 @@ private:
|
||||
void cleanup_dst(node &n);
|
||||
bool cleanup_dst_vec(vvec &vv);
|
||||
|
||||
// Did we alter/remove nodes during a single pass?
|
||||
bool nodes_changed;
|
||||
};
|
||||
|
||||
|
||||
|
@@ -220,17 +220,34 @@ void value::add_use(node* n, use_kind kind, int arg) {
|
||||
dump::dump_op(n);
|
||||
sblog << " kind " << kind << " arg " << arg << "\n";
|
||||
}
|
||||
uses = new use_info(n, kind, arg, uses);
|
||||
uses.push_back(new use_info(n, kind, arg));
|
||||
}
|
||||
|
||||
struct use_node_comp {
|
||||
explicit use_node_comp(const node *n) : n(n) {}
|
||||
bool operator() (const use_info *u) {
|
||||
return u->op->hash() == n->hash();
|
||||
}
|
||||
|
||||
private:
|
||||
const node *n;
|
||||
};
|
||||
|
||||
void value::remove_use(const node *n) {
|
||||
uselist::iterator it =
|
||||
std::find_if(uses.begin(), uses.end(), use_node_comp(n));
|
||||
|
||||
if (it != uses.end())
|
||||
{
|
||||
// TODO assert((*it)->kind == kind) ?
|
||||
// TODO assert((*it)->arg == arg) ?
|
||||
delete *it;
|
||||
uses.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
unsigned value::use_count() {
|
||||
use_info *u = uses;
|
||||
unsigned c = 0;
|
||||
while (u) {
|
||||
++c;
|
||||
u = u->next;
|
||||
}
|
||||
return c;
|
||||
return uses.size();
|
||||
}
|
||||
|
||||
bool value::is_global() {
|
||||
@@ -274,13 +291,12 @@ bool value::is_prealloc() {
|
||||
}
|
||||
|
||||
void value::delete_uses() {
|
||||
use_info *u, *c = uses;
|
||||
while (c) {
|
||||
u = c->next;
|
||||
delete c;
|
||||
c = u;
|
||||
for (uselist::iterator it = uses.begin(); it != uses.end(); ++it)
|
||||
{
|
||||
delete *it;
|
||||
}
|
||||
uses = NULL;
|
||||
|
||||
uses.clear();
|
||||
}
|
||||
|
||||
void ra_constraint::update_values() {
|
||||
@@ -468,7 +484,7 @@ bool r600_sb::sb_value_set::add_vec(vvec& vv) {
|
||||
bool r600_sb::sb_value_set::contains(value* v) {
|
||||
unsigned b = v->uid - 1;
|
||||
if (b < bs.size())
|
||||
return bs.get(v->uid - 1);
|
||||
return bs.get(b);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
@@ -377,11 +377,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||||
}
|
||||
}
|
||||
/* Using a staging buffer in GTT for larger reads is much faster. */
|
||||
/* Use a staging buffer in cached GTT for reads. */
|
||||
else if ((usage & PIPE_TRANSFER_READ) &&
|
||||
!(usage & (PIPE_TRANSFER_WRITE |
|
||||
PIPE_TRANSFER_PERSISTENT)) &&
|
||||
rbuffer->domains & RADEON_DOMAIN_VRAM &&
|
||||
!(usage & PIPE_TRANSFER_PERSISTENT) &&
|
||||
(rbuffer->domains & RADEON_DOMAIN_VRAM ||
|
||||
rbuffer->flags & RADEON_FLAG_GTT_WC) &&
|
||||
r600_can_dma_copy_buffer(rctx, 0, box->x, box->width)) {
|
||||
struct r600_resource *staging;
|
||||
|
||||
@@ -390,11 +390,12 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||
box->width + (box->x % R600_MAP_BUFFER_ALIGNMENT));
|
||||
if (staging) {
|
||||
/* Copy the VRAM buffer to the staging buffer. */
|
||||
ctx->resource_copy_region(ctx, &staging->b.b, 0,
|
||||
box->x % R600_MAP_BUFFER_ALIGNMENT,
|
||||
0, 0, resource, level, box);
|
||||
rctx->dma_copy(ctx, &staging->b.b, 0,
|
||||
box->x % R600_MAP_BUFFER_ALIGNMENT,
|
||||
0, 0, resource, 0, box);
|
||||
|
||||
data = r600_buffer_map_sync_with_rings(rctx, staging, PIPE_TRANSFER_READ);
|
||||
data = r600_buffer_map_sync_with_rings(rctx, staging,
|
||||
usage & ~PIPE_TRANSFER_UNSYNCHRONIZED);
|
||||
if (!data) {
|
||||
r600_resource_reference(&staging, NULL);
|
||||
return NULL;
|
||||
|
@@ -320,14 +320,21 @@ static void si_sampler_view_add_buffer(struct si_context *sctx,
|
||||
if (resource->target == PIPE_BUFFER)
|
||||
return;
|
||||
|
||||
/* Now add separate DCC if it's present. */
|
||||
/* Now add separate DCC or HTILE. */
|
||||
rtex = (struct r600_texture*)resource;
|
||||
if (!rtex->dcc_separate_buffer)
|
||||
return;
|
||||
if (rtex->dcc_separate_buffer) {
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->dcc_separate_buffer, usage,
|
||||
RADEON_PRIO_DCC, check_mem);
|
||||
}
|
||||
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->dcc_separate_buffer, usage,
|
||||
RADEON_PRIO_DCC, check_mem);
|
||||
if (rtex->htile_buffer &&
|
||||
rtex->tc_compatible_htile &&
|
||||
!is_stencil_sampler) {
|
||||
radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
|
||||
rtex->htile_buffer, usage,
|
||||
RADEON_PRIO_HTILE, check_mem);
|
||||
}
|
||||
}
|
||||
|
||||
static void si_sampler_views_begin_new_cs(struct si_context *sctx,
|
||||
|
@@ -5396,10 +5396,13 @@ static void si_llvm_emit_barrier(const struct lp_build_tgsi_action *action,
|
||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
|
||||
/* The real barrier instruction isn’t needed, because an entire patch
|
||||
/* SI only (thanks to a hw bug workaround):
|
||||
* The real barrier instruction isn’t needed, because an entire patch
|
||||
* always fits into a single wave.
|
||||
*/
|
||||
if (ctx->type == PIPE_SHADER_TESS_CTRL) {
|
||||
if (HAVE_LLVM >= 0x0309 &&
|
||||
ctx->screen->b.chip_class == SI &&
|
||||
ctx->type == PIPE_SHADER_TESS_CTRL) {
|
||||
emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
|
||||
return;
|
||||
}
|
||||
|
@@ -698,8 +698,10 @@ static void si_update_poly_offset_state(struct si_context *sctx)
|
||||
{
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
|
||||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf)
|
||||
if (!rs || !rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) {
|
||||
si_pm4_bind_state(sctx, poly_offset, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Use the user format, not db_render_format, so that the polygon
|
||||
* offset behaves as expected by applications.
|
||||
|
@@ -847,11 +847,12 @@ void si_emit_cache_flush(struct si_context *sctx)
|
||||
if (rctx->flags & SI_CONTEXT_INV_GLOBAL_L2 ||
|
||||
(rctx->chip_class <= CIK &&
|
||||
(rctx->flags & SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
|
||||
/* Invalidate L1 & L2. (L1 is always invalidated)
|
||||
/* Invalidate L1 & L2. (L1 is always invalidated on SI)
|
||||
* WB must be set on VI+ when TC_ACTION is set.
|
||||
*/
|
||||
si_emit_surface_sync(rctx, cp_coher_cntl |
|
||||
S_0085F0_TC_ACTION_ENA(1) |
|
||||
S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
S_0301F0_TC_WB_ACTION_ENA(rctx->chip_class >= VI));
|
||||
cp_coher_cntl = 0;
|
||||
} else {
|
||||
|
@@ -262,45 +262,6 @@ public:
|
||||
return _simd_movemask_ps(vClipCullMask);
|
||||
}
|
||||
|
||||
// clip a single primitive
|
||||
int ClipScalar(PA_STATE& pa, uint32_t primIndex, float* pOutPos, float* pOutAttribs)
|
||||
{
|
||||
OSALIGNSIMD(float) inVerts[3 * 4];
|
||||
OSALIGNSIMD(float) inAttribs[3 * KNOB_NUM_ATTRIBUTES * 4];
|
||||
|
||||
// transpose primitive position
|
||||
__m128 verts[3];
|
||||
pa.AssembleSingle(VERTEX_POSITION_SLOT, primIndex, verts);
|
||||
_mm_store_ps(&inVerts[0], verts[0]);
|
||||
_mm_store_ps(&inVerts[4], verts[1]);
|
||||
_mm_store_ps(&inVerts[8], verts[2]);
|
||||
|
||||
// transpose attribs
|
||||
uint32_t numScalarAttribs = this->state.linkageCount * 4;
|
||||
|
||||
int idx = 0;
|
||||
DWORD slot = 0;
|
||||
uint32_t mapIdx = 0;
|
||||
uint32_t tmpLinkage = uint32_t(this->state.linkageMask);
|
||||
while (_BitScanForward(&slot, tmpLinkage))
|
||||
{
|
||||
tmpLinkage &= ~(1 << slot);
|
||||
// Compute absolute attrib slot in vertex array
|
||||
uint32_t inputSlot = VERTEX_ATTRIB_START_SLOT + this->state.linkageMap[mapIdx++];
|
||||
__m128 attrib[3]; // triangle attribs (always 4 wide)
|
||||
pa.AssembleSingle(inputSlot, primIndex, attrib);
|
||||
_mm_store_ps(&inAttribs[idx], attrib[0]);
|
||||
_mm_store_ps(&inAttribs[idx + numScalarAttribs], attrib[1]);
|
||||
_mm_store_ps(&inAttribs[idx + numScalarAttribs * 2], attrib[2]);
|
||||
idx += 4;
|
||||
}
|
||||
|
||||
int numVerts;
|
||||
Clip(inVerts, inAttribs, numScalarAttribs, pOutPos, &numVerts, pOutAttribs);
|
||||
|
||||
return numVerts;
|
||||
}
|
||||
|
||||
// clip SIMD primitives
|
||||
void ClipSimd(const simdscalar& vPrimMask, const simdscalar& vClipMask, PA_STATE& pa, const simdscalari& vPrimId, const simdscalari& vViewportIdx)
|
||||
{
|
||||
|
@@ -52,6 +52,17 @@ qir_opt_small_immediates(struct vc4_compile *c)
|
||||
if (uses_small_imm)
|
||||
continue;
|
||||
|
||||
/* Don't propagate small immediates into the top-end bounds
|
||||
* checking for indirect UBO loads. The kernel doesn't parse
|
||||
* small immediates and rejects the shader in this case. UBO
|
||||
* loads are much more expensive than the uniform load, and
|
||||
* indirect UBO regions are usually much larger than a small
|
||||
* immediate, so it's not worth updating the kernel to allow
|
||||
* optimizing it.
|
||||
*/
|
||||
if (inst->op == QOP_MIN_NOIMM)
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
|
||||
struct qreg src = qir_follow_movs(c, inst->src[i]);
|
||||
|
||||
|
@@ -102,9 +102,9 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
|
||||
|
||||
/* Clamp to [0, array size). Note that MIN/MAX are signed. */
|
||||
indirect_offset = qir_MAX(c, indirect_offset, qir_uniform_ui(c, 0));
|
||||
indirect_offset = qir_MIN(c, indirect_offset,
|
||||
qir_uniform_ui(c, (range->dst_offset +
|
||||
range->size - 4)));
|
||||
indirect_offset = qir_MIN_NOIMM(c, indirect_offset,
|
||||
qir_uniform_ui(c, (range->dst_offset +
|
||||
range->size - 4)));
|
||||
|
||||
qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
|
||||
c->num_texture_samples++;
|
||||
@@ -322,7 +322,7 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
|
||||
|
||||
/* Perform the clamping required by kernel validation. */
|
||||
addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
|
||||
addr = qir_MIN(c, addr, qir_uniform_ui(c, size - 4));
|
||||
addr = qir_MIN_NOIMM(c, addr, qir_uniform_ui(c, size - 4));
|
||||
|
||||
qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
|
||||
|
||||
|
@@ -58,6 +58,7 @@ static const struct qir_op_info qir_op_info[] = {
|
||||
[QOP_ASR] = { "asr", 1, 2 },
|
||||
[QOP_SHL] = { "shl", 1, 2 },
|
||||
[QOP_MIN] = { "min", 1, 2 },
|
||||
[QOP_MIN_NOIMM] = { "min_noimm", 1, 2 },
|
||||
[QOP_MAX] = { "max", 1, 2 },
|
||||
[QOP_AND] = { "and", 1, 2 },
|
||||
[QOP_OR] = { "or", 1, 2 },
|
||||
|
@@ -111,6 +111,7 @@ enum qop {
|
||||
QOP_SHR,
|
||||
QOP_ASR,
|
||||
QOP_MIN,
|
||||
QOP_MIN_NOIMM,
|
||||
QOP_MAX,
|
||||
QOP_AND,
|
||||
QOP_OR,
|
||||
@@ -709,6 +710,7 @@ QIR_ALU2(SHL)
|
||||
QIR_ALU2(SHR)
|
||||
QIR_ALU2(ASR)
|
||||
QIR_ALU2(MIN)
|
||||
QIR_ALU2(MIN_NOIMM)
|
||||
QIR_ALU2(MAX)
|
||||
QIR_ALU2(AND)
|
||||
QIR_ALU2(OR)
|
||||
|
@@ -285,6 +285,8 @@ vc4_generate_code_block(struct vc4_compile *c,
|
||||
[QOP_MOV] = { QPU_A_OR },
|
||||
[QOP_FMOV] = { QPU_A_FMAX },
|
||||
[QOP_MMOV] = { QPU_M_V8MIN },
|
||||
|
||||
[QOP_MIN_NOIMM] = { QPU_A_MIN },
|
||||
};
|
||||
|
||||
uint64_t unpack = 0;
|
||||
|
@@ -2,12 +2,12 @@ include Makefile.sources
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/gallium/include \
|
||||
-I$(top_srcdir)/src/gallium/drivers \
|
||||
-I$(top_srcdir)/src/gallium/auxiliary \
|
||||
-I$(top_srcdir)/src/gallium/winsys \
|
||||
-I$(top_builddir)/src \
|
||||
-I$(srcdir)
|
||||
|
||||
if HAVE_CLOVER_ICD
|
||||
|
@@ -28,8 +28,8 @@ AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common \
|
||||
$(GALLIUM_CFLAGS) \
|
||||
$(LIBDRM_CFLAGS) \
|
||||
$(VISIBILITY_CFLAGS)
|
||||
|
@@ -25,6 +25,7 @@ include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
AM_CFLAGS = \
|
||||
$(GALLIUM_CFLAGS) \
|
||||
$(GLPROTO_CFLAGS) \
|
||||
$(X11_INCLUDES)
|
||||
AM_CPPFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
|
@@ -297,15 +297,18 @@ vlVaAcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id,
|
||||
|
||||
pipe_mutex_lock(drv->mutex);
|
||||
drv->pipe->flush(drv->pipe, NULL, 0);
|
||||
pipe_mutex_unlock(drv->mutex);
|
||||
|
||||
memset(&whandle, 0, sizeof(whandle));
|
||||
whandle.type = DRM_API_HANDLE_TYPE_FD;
|
||||
|
||||
if (!screen->resource_get_handle(screen, drv->pipe,
|
||||
buf->derived_surface.resource,
|
||||
&whandle, PIPE_HANDLE_USAGE_READ_WRITE))
|
||||
&whandle, PIPE_HANDLE_USAGE_READ_WRITE)) {
|
||||
pipe_mutex_unlock(drv->mutex);
|
||||
return VA_STATUS_ERROR_INVALID_BUFFER;
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(drv->mutex);
|
||||
|
||||
buf_info->handle = (intptr_t)whandle.handle;
|
||||
break;
|
||||
|
@@ -81,7 +81,7 @@ vlVaBeginPicture(VADriverContextP ctx, VAContextID context_id, VASurfaceID rende
|
||||
}
|
||||
|
||||
if (context->decoder->entrypoint != PIPE_VIDEO_ENTRYPOINT_ENCODE)
|
||||
context->decoder->begin_frame(context->decoder, context->target, &context->desc.base);
|
||||
context->needs_begin_frame = true;
|
||||
|
||||
return VA_STATUS_SUCCESS;
|
||||
}
|
||||
@@ -179,8 +179,7 @@ handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *
|
||||
if (!context->decoder)
|
||||
return VA_STATUS_ERROR_ALLOCATION_FAILED;
|
||||
|
||||
context->decoder->begin_frame(context->decoder, context->target,
|
||||
&context->desc.base);
|
||||
context->needs_begin_frame = true;
|
||||
}
|
||||
|
||||
return vaStatus;
|
||||
@@ -310,6 +309,12 @@ handleVASliceDataBufferType(vlVaContext *context, vlVaBuffer *buf)
|
||||
buffers[num_buffers] = buf->data;
|
||||
sizes[num_buffers] = buf->size;
|
||||
++num_buffers;
|
||||
|
||||
if (context->needs_begin_frame) {
|
||||
context->decoder->begin_frame(context->decoder, context->target,
|
||||
&context->desc.base);
|
||||
context->needs_begin_frame = false;
|
||||
}
|
||||
context->decoder->decode_bitstream(context->decoder, context->target, &context->desc.base,
|
||||
num_buffers, (const void * const*)buffers, sizes);
|
||||
}
|
||||
|
@@ -244,6 +244,7 @@ typedef struct {
|
||||
struct vl_deint_filter *deint;
|
||||
struct vlVaBuffer *coded_buf;
|
||||
int target_id;
|
||||
bool needs_begin_frame;
|
||||
} vlVaContext;
|
||||
|
||||
typedef struct {
|
||||
|
@@ -790,7 +790,6 @@ VdpStatus vlVdpOutputSurfaceDMABuf(VdpOutputSurface surface,
|
||||
pipe_mutex_lock(vlsurface->device->mutex);
|
||||
vlVdpResolveDelayedRendering(vlsurface->device, NULL, NULL);
|
||||
vlsurface->device->context->flush(vlsurface->device->context, NULL, 0);
|
||||
pipe_mutex_unlock(vlsurface->device->mutex);
|
||||
|
||||
memset(&whandle, 0, sizeof(struct winsys_handle));
|
||||
whandle.type = DRM_API_HANDLE_TYPE_FD;
|
||||
@@ -798,8 +797,12 @@ VdpStatus vlVdpOutputSurfaceDMABuf(VdpOutputSurface surface,
|
||||
pscreen = vlsurface->surface->texture->screen;
|
||||
if (!pscreen->resource_get_handle(pscreen, vlsurface->device->context,
|
||||
vlsurface->surface->texture, &whandle,
|
||||
PIPE_HANDLE_USAGE_READ_WRITE))
|
||||
PIPE_HANDLE_USAGE_READ_WRITE)) {
|
||||
pipe_mutex_unlock(vlsurface->device->mutex);
|
||||
return VDP_STATUS_NO_IMPLEMENTATION;
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(vlsurface->device->mutex);
|
||||
|
||||
result->handle = whandle.handle;
|
||||
result->width = vlsurface->surface->width;
|
||||
|
@@ -460,10 +460,10 @@ VdpStatus vlVdpVideoSurfaceDMABuf(VdpVideoSurface surface,
|
||||
}
|
||||
|
||||
surf = p_surf->video_buffer->get_surfaces(p_surf->video_buffer)[plane];
|
||||
pipe_mutex_unlock(p_surf->device->mutex);
|
||||
|
||||
if (!surf)
|
||||
if (!surf) {
|
||||
pipe_mutex_unlock(p_surf->device->mutex);
|
||||
return VDP_STATUS_RESOURCES;
|
||||
}
|
||||
|
||||
memset(&whandle, 0, sizeof(struct winsys_handle));
|
||||
whandle.type = DRM_API_HANDLE_TYPE_FD;
|
||||
@@ -472,8 +472,12 @@ VdpStatus vlVdpVideoSurfaceDMABuf(VdpVideoSurface surface,
|
||||
pscreen = surf->texture->screen;
|
||||
if (!pscreen->resource_get_handle(pscreen, p_surf->device->context,
|
||||
surf->texture, &whandle,
|
||||
PIPE_HANDLE_USAGE_READ_WRITE))
|
||||
PIPE_HANDLE_USAGE_READ_WRITE)) {
|
||||
pipe_mutex_unlock(p_surf->device->mutex);
|
||||
return VDP_STATUS_NO_IMPLEMENTATION;
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(p_surf->device->mutex);
|
||||
|
||||
result->handle = whandle.handle;
|
||||
result->width = surf->width;
|
||||
|
@@ -27,8 +27,8 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/loader \
|
||||
-I$(top_srcdir)/src/mapi/ \
|
||||
-I$(top_srcdir)/src/mesa/ \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_builddir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_srcdir)/src/mesa/drivers/dri/common/ \
|
||||
-I$(top_srcdir)/src/gallium/winsys \
|
||||
-I$(top_srcdir)/src/gallium/state_trackers/nine \
|
||||
$(GALLIUM_TARGET_CFLAGS) \
|
||||
|
@@ -37,10 +37,10 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/include/GL/internal \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/src/loader \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
-I$(top_builddir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_builddir)/src/mapi/glapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(SHARED_GLAPI_CFLAGS) \
|
||||
$(EXTRA_DEFINES_XF86VIDMODE) \
|
||||
|
@@ -6,11 +6,11 @@ AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/src/glx \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_builddir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mesa \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
-I$(top_builddir)/src/mapi/glapi \
|
||||
-I$(top_srcdir)/src/mapi/glapi \
|
||||
$(VISIBILITY_CFLAGS) \
|
||||
$(SHARED_GLAPI_CFLAGS) \
|
||||
$(DEFINES) \
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user