Compare commits
143 Commits
23.0
...
mesa-17.1.
Author | SHA1 | Date | |
---|---|---|---|
|
ca0a148a4d | ||
|
0eaf422957 | ||
|
4da22e2b68 | ||
|
88309a985a | ||
|
3922a43bf2 | ||
|
832f6b4543 | ||
|
b6f92084bd | ||
|
50cd4e37d9 | ||
|
6e977a358f | ||
|
e0df523795 | ||
|
6199b3d485 | ||
|
6c5bcc6473 | ||
|
15338b0d19 | ||
|
700dcb9ab4 | ||
|
81bdf59610 | ||
|
68e64d92bc | ||
|
364048cf42 | ||
|
72a8fd8d50 | ||
|
5cace16ac6 | ||
|
d6439cb297 | ||
|
0e4c34b347 | ||
|
f56fff79e7 | ||
|
70cbcb2d39 | ||
|
5755f40874 | ||
|
085efa0261 | ||
|
2401051b16 | ||
|
4b14ad64d0 | ||
|
bcd09ef32e | ||
|
6123a076d0 | ||
|
691d42700b | ||
|
f36cd7fc57 | ||
|
c006ced2c9 | ||
|
5c9b59de8b | ||
|
b7d7458b9a | ||
|
e39c07dbdf | ||
|
3b9b7a1342 | ||
|
054a27c508 | ||
|
1b3704c22d | ||
|
ad3b5b7f5f | ||
|
35ec26bb00 | ||
|
424bf46f27 | ||
|
806f802e7b | ||
|
15a38605fc | ||
|
0831cc7c2f | ||
|
43678114c7 | ||
|
072b1f5270 | ||
|
bd79ce4356 | ||
|
0640bae86c | ||
|
9b808c5748 | ||
|
9105e36765 | ||
|
e1678159b1 | ||
|
da13cc7e4b | ||
|
3db01cd4e7 | ||
|
396f9ae52f | ||
|
0eaab97f21 | ||
|
b0394dfe2f | ||
|
639481e340 | ||
|
ee0254a12f | ||
|
5b7cc779d2 | ||
|
ae9a2c1fc4 | ||
|
64b98a1e72 | ||
|
7bbece985e | ||
|
929ae9581c | ||
|
d4c08bc8c1 | ||
|
cd284ce928 | ||
|
e972294b8e | ||
|
5ef17d6854 | ||
|
1a23aff6b7 | ||
|
9a226fa669 | ||
|
41dfe1f275 | ||
|
ba6cb5d97a | ||
|
7e4b3aec9f | ||
|
124e7b3bc8 | ||
|
5c43c3fc73 | ||
|
0bd957be11 | ||
|
4ad2c57c26 | ||
|
14bbc51e6d | ||
|
caa6baa688 | ||
|
56922e8f33 | ||
|
6c96e750f1 | ||
|
ca357be5aa | ||
|
e702379663 | ||
|
ff27a47807 | ||
|
63d75fbfe3 | ||
|
cabca7185b | ||
|
8020ce02fc | ||
|
7b4b055a24 | ||
|
72e52fa7c8 | ||
|
6ca6d53e1c | ||
|
5f88ebaf5c | ||
|
f068a360cd | ||
|
e7cafd09ba | ||
|
f76068b879 | ||
|
ef6da453f0 | ||
|
5b4bff2ddb | ||
|
667cb4bc9e | ||
|
3f0b544745 | ||
|
27d4beb2e1 | ||
|
5e8e015db3 | ||
|
135615caa0 | ||
|
2acd78cfab | ||
|
642228ceaf | ||
|
12e7ec2c05 | ||
|
f2d91f2065 | ||
|
9a97d9081d | ||
|
a720963140 | ||
|
3f0740e87c | ||
|
2609ac2b5a | ||
|
3597829605 | ||
|
505e7cd232 | ||
|
791f0fb429 | ||
|
f1fe2b30b1 | ||
|
ee36cbe219 | ||
|
2cd07c39cc | ||
|
76f046add3 | ||
|
328afc7e86 | ||
|
77345993ec | ||
|
f2673a0f40 | ||
|
b38423210e | ||
|
ba6fd491a1 | ||
|
36f6fc59cb | ||
|
2bf79cb2f1 | ||
|
fb6379697b | ||
|
0948e113d2 | ||
|
f61c453cfc | ||
|
612fc14aab | ||
|
8aa9aa6a5f | ||
|
10ff4b49dc | ||
|
3d40db7892 | ||
|
99da9dfd95 | ||
|
fcbb263f8c | ||
|
29fa5b6e1c | ||
|
4e7e903bb3 | ||
|
26949e872b | ||
|
2cc119c35a | ||
|
6abdbd8b10 | ||
|
24c05c57e4 | ||
|
7ae90b4f65 | ||
|
0f2ac6ded8 | ||
|
bea2c4b88f | ||
|
ed846b4c78 | ||
|
8c69adf9a9 | ||
|
47dd2544e1 |
347
.travis.yml
347
.travis.yml
@@ -1,24 +1,11 @@
|
||||
language: c
|
||||
|
||||
sudo: required
|
||||
sudo: false
|
||||
dist: trusty
|
||||
|
||||
cache:
|
||||
directories:
|
||||
- $HOME/.ccache
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- libdrm-dev
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libxcb-dri2-0-dev
|
||||
- libx11-xcb-dev
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
- libelf-dev
|
||||
- scons
|
||||
apt: true
|
||||
ccache: true
|
||||
|
||||
env:
|
||||
global:
|
||||
@@ -32,17 +19,260 @@ env:
|
||||
- XCBPROTO_VERSION=xcb-proto-1.11
|
||||
- LIBXCB_VERSION=libxcb-1.11
|
||||
- LIBXSHMFENCE_VERSION=libxshmfence-1.2
|
||||
- LLVM_VERSION=3.9
|
||||
- LLVM_PACKAGE="llvm-${LLVM_VERSION} llvm-${LLVM_VERSION}-dev"
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- LIBTXC_DXTN_VERSION=libtxc_dxtn-1.0.1
|
||||
- LIBVDPAU_VERSION=libvdpau-1.1
|
||||
- LIBVA_VERSION=libva-1.6.2
|
||||
- LIBWAYLAND_VERSION=wayland-1.11.1
|
||||
- PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig
|
||||
- MAKEFLAGS=-j2
|
||||
matrix:
|
||||
- BUILD=make
|
||||
- BUILD=scons
|
||||
- LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH"
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- env:
|
||||
- LABEL="make loaders/classic DRI"
|
||||
- BUILD=make
|
||||
- MAKEFLAGS="-j4"
|
||||
- MAKE_CHECK_COMMAND="make check"
|
||||
- DRI_LOADERS="--enable-glx --enable-gbm --enable-egl --with-platforms=x11,drm,surfaceless,wayland --enable-osmesa"
|
||||
- DRI_DRIVERS="i915,i965,radeon,r200,swrast,nouveau"
|
||||
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
|
||||
- GALLIUM_DRIVERS=""
|
||||
- VULKAN_DRIVERS=""
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- env:
|
||||
# NOTE: Building SWR is 2x (yes two) times slower than all the other
|
||||
# gallium drivers combined.
|
||||
# Start this early so that it doesn't hunder the run time.
|
||||
- LABEL="make Gallium Drivers SWR"
|
||||
- BUILD=make
|
||||
- MAKEFLAGS="-j4"
|
||||
- MAKE_CHECK_COMMAND="true"
|
||||
- LLVM_VERSION=3.9
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- OVERRIDE_CC="gcc-5"
|
||||
- OVERRIDE_CXX="g++-5"
|
||||
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
|
||||
- DRI_DRIVERS=""
|
||||
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
|
||||
- GALLIUM_DRIVERS="swr"
|
||||
- VULKAN_DRIVERS=""
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-trusty-3.9
|
||||
packages:
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
# From sources above
|
||||
- g++-5
|
||||
- llvm-3.9-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
- LABEL="make Gallium Drivers Other"
|
||||
- BUILD=make
|
||||
- MAKEFLAGS="-j4"
|
||||
- MAKE_CHECK_COMMAND="true"
|
||||
- LLVM_VERSION=3.9
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
|
||||
- DRI_DRIVERS=""
|
||||
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
|
||||
- GALLIUM_DRIVERS="i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
|
||||
- VULKAN_DRIVERS=""
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-trusty-3.9
|
||||
packages:
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
# From sources above
|
||||
- llvm-3.9-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
# NOTE: Analogous to SWR above, building Clover is quite slow.
|
||||
- LABEL="make Gallium ST Clover"
|
||||
- BUILD=make
|
||||
- MAKEFLAGS="-j4"
|
||||
- MAKE_CHECK_COMMAND="true"
|
||||
- LLVM_VERSION=3.6
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
- OVERRIDE_CC=gcc-4.7
|
||||
- OVERRIDE_CXX=g++-4.7
|
||||
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
|
||||
- DRI_DRIVERS=""
|
||||
- GALLIUM_ST="--disable-dri --enable-opencl --enable-opencl-icd --enable-llvm --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
|
||||
# i915 most likely doesn't work with OpenCL.
|
||||
# Regardless - we're doing a quick build test here.
|
||||
- GALLIUM_DRIVERS="i915"
|
||||
- VULKAN_DRIVERS=""
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-trusty-3.6
|
||||
packages:
|
||||
- libclc-dev
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
- g++-4.7
|
||||
# From sources above
|
||||
- llvm-3.6-dev
|
||||
- clang-3.6
|
||||
- libclang-3.6-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
- LABEL="make Gallium ST Other"
|
||||
- BUILD=make
|
||||
- MAKEFLAGS="-j4"
|
||||
- MAKE_CHECK_COMMAND="true"
|
||||
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
|
||||
- DRI_DRIVERS=""
|
||||
- GALLIUM_ST="--enable-dri --disable-opencl --enable-xa --enable-nine --enable-xvmc --enable-vdpau --enable-va --enable-omx --enable-gallium-osmesa"
|
||||
# We need swrast for osmesa and nine.
|
||||
# i915 most likely doesn't work with most ST.
|
||||
# Regardless - we're doing a quick build test here.
|
||||
- GALLIUM_DRIVERS="i915,swrast"
|
||||
- VULKAN_DRIVERS=""
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
# Nine requires gcc 4.6... which is the one we have right ?
|
||||
- libxvmc-dev
|
||||
# Build locally, for now.
|
||||
#- libvdpau-dev
|
||||
#- libva-dev
|
||||
- libomxil-bellagio-dev
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
- LABEL="make Vulkan"
|
||||
- BUILD=make
|
||||
- MAKEFLAGS="-j4"
|
||||
- MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
|
||||
- LLVM_VERSION=3.9
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
# XXX: we want to test the WSI, but those are enabled via the EGL toggles
|
||||
# XXX: Platform X11 dependencies are checked when --enable-glx is set
|
||||
- DRI_LOADERS="--enable-glx --disable-gbm --enable-egl --with-platforms=x11,wayland"
|
||||
- DRI_DRIVERS=""
|
||||
# XXX: enable DRI for EGL above
|
||||
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
|
||||
- GALLIUM_DRIVERS=""
|
||||
- VULKAN_DRIVERS="intel,radeon"
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-trusty-3.9
|
||||
packages:
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
# From sources above
|
||||
- llvm-3.9-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
- LABEL="scons"
|
||||
- BUILD=scons
|
||||
- SCONSFLAGS="-j4"
|
||||
# Explicitly disable.
|
||||
- SCONS_TARGET="llvm=0"
|
||||
# Keep it symmetrical to the make build.
|
||||
- SCONS_CHECK_COMMAND="scons llvm=0 check"
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- scons
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
- LABEL="scons LLVM"
|
||||
- BUILD=scons
|
||||
- SCONSFLAGS="-j4"
|
||||
- SCONS_TARGET="llvm=1"
|
||||
# Keep it symmetrical to the make build.
|
||||
- SCONS_CHECK_COMMAND="scons llvm=1 check"
|
||||
- LLVM_VERSION=3.3
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- scons
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
- llvm-3.3-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
- LABEL="scons SWR"
|
||||
- BUILD=scons
|
||||
- SCONSFLAGS="-j4"
|
||||
- SCONS_TARGET="swr=1"
|
||||
- LLVM_VERSION=3.9
|
||||
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
|
||||
# Keep it symmetrical to the make build. There's no actual SWR, yet.
|
||||
- SCONS_CHECK_COMMAND="true"
|
||||
- OVERRIDE_CC="gcc-5"
|
||||
- OVERRIDE_CXX="g++-5"
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- ubuntu-toolchain-r-test
|
||||
- llvm-toolchain-trusty-3.9
|
||||
packages:
|
||||
- scons
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
# From sources above
|
||||
- g++-5
|
||||
- llvm-3.9-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- x11proto-xf86vidmode-dev
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
|
||||
install:
|
||||
- export PATH="/usr/lib/ccache:$PATH"
|
||||
- pip install --user mako
|
||||
|
||||
# Since libdrm gets updated in configure.ac regularly, try to pick up the
|
||||
@@ -90,25 +320,64 @@ install:
|
||||
- tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
|
||||
- (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
# Install LLVM directly via apt-get (not Travis-CI's apt addon)
|
||||
# See https://github.com/travis-ci/apt-source-whitelist/pull/205#issuecomment-216054237
|
||||
# libtxc-dxtn uses the patented S3 Texture Compression
|
||||
# algorithm. Therefore, we don't want to use this library but it is
|
||||
# still possible through setting the USE_TXC_DXTN variable to yes in
|
||||
# the travis web UI.
|
||||
#
|
||||
# According to Wikipedia, the patent expires on October 2, 2017:
|
||||
# https://en.wikipedia.org/wiki/S3_Texture_Compression#Patent
|
||||
- if test "x$USE_TXC_DXTN" = xyes; then
|
||||
wget https://people.freedesktop.org/~cbrill/libtxc_dxtn/$LIBTXC_DXTN_VERSION.tar.bz2;
|
||||
tar -jxvf $LIBTXC_DXTN_VERSION.tar.bz2;
|
||||
(cd $LIBTXC_DXTN_VERSION && ./configure --prefix=$HOME/prefix && make install);
|
||||
fi
|
||||
|
||||
- wget -nv -O - http://llvm.org/apt/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
- sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty-3.9 main'
|
||||
- sudo apt-add-repository -y 'deb http://llvm.org/apt/trusty llvm-toolchain-trusty main'
|
||||
- sudo apt-get update -qq
|
||||
- sudo apt-get install -qq -y $LLVM_PACKAGE
|
||||
- wget http://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBVDPAU_VERSION.tar.bz2
|
||||
- (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
- wget http://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBVA_VERSION.tar.bz2
|
||||
- (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
|
||||
|
||||
- wget http://wayland.freedesktop.org/releases/$LIBWAYLAND_VERSION.tar.xz
|
||||
- tar -axvf $LIBWAYLAND_VERSION.tar.xz
|
||||
- (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
|
||||
|
||||
# Generate the header since one is missing on the Travis instance
|
||||
- mkdir -p linux
|
||||
- printf "%s\n" \
|
||||
"#ifndef _LINUX_MEMFD_H" \
|
||||
"#define _LINUX_MEMFD_H" \
|
||||
"" \
|
||||
"#define __NR_memfd_create 319" \
|
||||
"#define SYS_memfd_create __NR_memfd_create" \
|
||||
"" \
|
||||
"#define MFD_CLOEXEC 0x0001U" \
|
||||
"#define MFD_ALLOW_SEALING 0x0002U" \
|
||||
"" \
|
||||
"#endif /* _LINUX_MEMFD_H */" > linux/memfd.h
|
||||
|
||||
script:
|
||||
- if test "x$BUILD" = xmake; then
|
||||
test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
|
||||
test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
|
||||
export CC="$CC -isystem`pwd`";
|
||||
|
||||
./autogen.sh --enable-debug
|
||||
--with-platforms=x11,drm
|
||||
--with-dri-drivers=i915,i965,radeon,r200,swrast,nouveau
|
||||
--with-gallium-drivers=i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx
|
||||
--with-vulkan-drivers=radeon
|
||||
$DRI_LOADERS
|
||||
--with-dri-drivers=$DRI_DRIVERS
|
||||
$GALLIUM_ST
|
||||
--with-gallium-drivers=$GALLIUM_DRIVERS
|
||||
--with-vulkan-drivers=$VULKAN_DRIVERS
|
||||
--disable-llvm-shared-libs
|
||||
;
|
||||
make && make check;
|
||||
elif test x$BUILD = xscons; then
|
||||
scons llvm=1 && scons llvm=1 check;
|
||||
&&
|
||||
make && eval $MAKE_CHECK_COMMAND;
|
||||
fi
|
||||
|
||||
- if test "x$BUILD" = xscons; then
|
||||
test -n "$OVERRIDE_CC" && export CC="$OVERRIDE_CC";
|
||||
test -n "$OVERRIDE_CXX" && export CXX="$OVERRIDE_CXX";
|
||||
scons $SCONS_TARGET && eval $SCONS_CHECK_COMMAND;
|
||||
fi
|
||||
|
@@ -116,7 +116,3 @@ endif
|
||||
|
||||
# Quiet down the build system and remove any .h files from the sources
|
||||
LOCAL_SRC_FILES := $(patsubst %.h, , $(LOCAL_SRC_FILES))
|
||||
|
||||
ifneq ($(LOCAL_IS_HOST_MODULE),true)
|
||||
LOCAL_SHARED_LIBRARIES += libz
|
||||
endif
|
||||
|
@@ -30,7 +30,15 @@ do
|
||||
if grep -q ^$candidate already_picked ; then
|
||||
continue
|
||||
fi
|
||||
echo Commit $candidate references $sha
|
||||
# Or if it isn't in the ignore list.
|
||||
if [ -f bin/.cherry-ignore ] ; then
|
||||
if grep -q ^$candidate bin/.cherry-ignore ; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
printf "Commit \"%s\" references %s\n" \
|
||||
"`git log -n1 --pretty=oneline $candidate`" \
|
||||
"$sha"
|
||||
done
|
||||
done
|
||||
|
||||
|
@@ -24,35 +24,52 @@ git log --reverse --grep="cherry picked from commit" $latest_branchpoint..HEAD |
|
||||
git log --reverse --pretty=%H -i --grep="fixes:" $latest_branchpoint..origin/master |\
|
||||
while read sha
|
||||
do
|
||||
# For each one try to extract the tag
|
||||
fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
|
||||
if [ "x$fixes_count" != x1 ] ; then
|
||||
echo WARNING: Commit $sha has more than one Fixes tag
|
||||
# Check to see whether the patch is on the ignore list ...
|
||||
if [ -f bin/.cherry-ignore ] ; then
|
||||
if grep -q ^$sha bin/.cherry-ignore ; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
fixes=`git show $sha | grep -i "fixes:" | head -n 1`
|
||||
# The following sed/cut combination is borrowed from GregKH
|
||||
id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`
|
||||
|
||||
# Bail out if we cannot find suitable id.
|
||||
# Any specific validation the $id is valid and not some junk, is
|
||||
# implied with the follow up code
|
||||
if [ "x$id" = x ] ; then
|
||||
# Skip if it has been already cherry-picked.
|
||||
if grep -q ^$sha already_picked ; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Check if the offending commit is in branch.
|
||||
# For each one try to extract the tag
|
||||
fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
|
||||
warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0`
|
||||
while [ $fixes_count -gt 0 ] ; do
|
||||
fixes=`git show $sha | grep -i "fixes:" | tail -n $fixes_count`
|
||||
fixes_count=$(($fixes_count-1))
|
||||
# The following sed/cut combination is borrowed from GregKH
|
||||
id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`
|
||||
|
||||
# Be that cherry-picked ...
|
||||
# ... or landed before the branchpoint.
|
||||
if grep -q ^$id already_picked ||
|
||||
grep -q ^$id already_landed ; then
|
||||
|
||||
# Finally nominate the fix if it hasn't landed yet.
|
||||
if grep -q ^$sha already_picked ; then
|
||||
# Bail out if we cannot find suitable id.
|
||||
# Any specific validation the $id is valid and not some junk, is
|
||||
# implied with the follow up code
|
||||
if [ "x$id" = x ] ; then
|
||||
continue
|
||||
fi
|
||||
|
||||
echo Commit $sha fixes $id
|
||||
# Check if the offending commit is in branch.
|
||||
|
||||
# Be that cherry-picked ...
|
||||
# ... or landed before the branchpoint.
|
||||
if grep -q ^$id already_picked ||
|
||||
grep -q ^$id already_landed ; then
|
||||
|
||||
printf "Commit \"%s\" fixes %s\n" \
|
||||
"`git log -n1 --pretty=oneline $sha`" \
|
||||
"$id"
|
||||
warn=$(($warn-1))
|
||||
fi
|
||||
|
||||
done
|
||||
|
||||
if [ $warn -gt 0 ] ; then
|
||||
printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
|
||||
"`git log -n1 --pretty=oneline $sha`"
|
||||
fi
|
||||
|
||||
done
|
||||
|
22
configure.ac
22
configure.ac
@@ -724,7 +724,7 @@ dnl Arch/platform-specific settings
|
||||
dnl
|
||||
AC_ARG_ENABLE([asm],
|
||||
[AS_HELP_STRING([--disable-asm],
|
||||
[disable assembly usage @<:@default=enabled on supported plaforms@:>@])],
|
||||
[disable assembly usage @<:@default=enabled on supported platforms@:>@])],
|
||||
[enable_asm="$enableval"],
|
||||
[enable_asm=yes]
|
||||
)
|
||||
@@ -1079,14 +1079,9 @@ fi
|
||||
|
||||
if test "x$LIBUNWIND" = "xyes"; then
|
||||
PKG_CHECK_MODULES(LIBUNWIND, libunwind)
|
||||
if test "x$HAVE_LIBUNWIND" != "xyes"; then
|
||||
AC_MSG_ERROR([libunwind requested but not installed.])
|
||||
fi
|
||||
AC_DEFINE(HAVE_LIBUNWIND, 1, [Have libunwind support])
|
||||
fi
|
||||
|
||||
AM_CONDITIONAL(HAVE_LIBUNWIND, [test "x$LIBUNWIND" = xyes])
|
||||
|
||||
|
||||
dnl Options for APIs
|
||||
AC_ARG_ENABLE([opengl],
|
||||
@@ -1367,7 +1362,7 @@ if test "x$enable_libglvnd" = xyes ; then
|
||||
esac
|
||||
|
||||
PKG_CHECK_MODULES([GLVND], libglvnd >= 0.2.0)
|
||||
PKG_CHECK_VAR(LIBGLVND_DATADIR, libglvnd, datadir)
|
||||
LIBGLVND_DATADIR=`$PKG_CONFIG --variable=datadir libglvnd`
|
||||
AC_SUBST([LIBGLVND_DATADIR])
|
||||
|
||||
DEFINES="${DEFINES} -DUSE_LIBGLVND=1"
|
||||
@@ -2146,12 +2141,11 @@ dnl DEPRECATED: EGL Platforms configuration
|
||||
dnl
|
||||
AC_ARG_WITH([egl-platforms],
|
||||
[AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
|
||||
[DEPRECATED: use --with-plaforms instead@<:@default=auto@:>@])],
|
||||
[DEPRECATED: use --with-platforms instead@<:@default=auto@:>@])],
|
||||
[with_egl_platforms="$withval"],
|
||||
[with_egl_platforms=auto])
|
||||
|
||||
if test "x$with_egl_platforms" = xauto; then
|
||||
AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-plaforms instead.])
|
||||
if test "x$enable_egl" = xyes; then
|
||||
if test "x$enable_gbm" = xyes; then
|
||||
with_egl_platforms="x11,drm"
|
||||
@@ -2161,6 +2155,8 @@ if test "x$with_egl_platforms" = xauto; then
|
||||
else
|
||||
with_egl_platforms=""
|
||||
fi
|
||||
else
|
||||
AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-platforms instead.])
|
||||
fi
|
||||
|
||||
dnl
|
||||
@@ -2465,7 +2461,9 @@ if test -n "$with_gallium_drivers"; then
|
||||
xvirgl)
|
||||
HAVE_GALLIUM_VIRGL=yes
|
||||
require_libdrm "virgl"
|
||||
require_basic_egl "virgl"
|
||||
if test "x$enable_egl" = xyes; then
|
||||
require_basic_egl "virgl"
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([Unknown Gallium driver: $driver])
|
||||
@@ -2474,6 +2472,10 @@ if test -n "$with_gallium_drivers"; then
|
||||
done
|
||||
fi
|
||||
|
||||
# XXX: Keep in sync with LLVM_REQUIRED_SWR
|
||||
AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x3.9.0 -a \
|
||||
"x$LLVM_VERSION" != x3.9.1)
|
||||
|
||||
if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
|
||||
llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
|
||||
llvm_add_default_components "gallium"
|
||||
|
@@ -14,7 +14,7 @@
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.1.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 17.1.0 Release Notes / May 10, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.1.0 is a new development release.
|
||||
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
c388069581a72853161657ac365f2c083afabd7cffd53f80513dacfa1cfa58a8 mesa-17.1.0.tar.gz
|
||||
cf234a6ed4764673886b6661553b54675776ef0898f774716173cec890ac3b17 mesa-17.1.0.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
@@ -63,6 +64,147 @@ Note: some of the new features are only available with certain drivers.
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68504">Bug 68504</a> - 9.2-rc1 workaround for clover build failure on ppc/altivec: cannot convert 'bool' to '__vector(4) __bool int' in return</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84325">Bug 84325</a> - X.Org segfaults when starting DE on an Intel+Radeon laptop, caused by libpciaccess cleanup, patch attached</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93089">Bug 93089</a> - mesa fails to check for gcc atomic primitives before using them</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95460">Bug 95460</a> - Please add more drivers (freedreno, virgl) to features.txt status document</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96743">Bug 96743</a> - [BYT, HSW, SKL, BXT, KBL] GPU hangs with GfxBench 4.0 CarChase</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97102">Bug 97102</a> - [dri][swr] stack overflow / infinite loop with GALLIUM_DRIVER=swr</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97338">Bug 97338</a> - Black squares in the Spec Ops: The Line chapter select screen</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97967">Bug 97967</a> - glsl/tests/cache-test regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97988">Bug 97988</a> - [radeonsi] playing back videos with VDPAU exhibits deinterlacing/anti-aliasing issues not visible with VA-API</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with "Fatal error: Cannot set display mode."</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98502">Bug 98502</a> - Delay when starting firefox, thunderbird or chromium and dmesg spam</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98869">Bug 98869</a> - Electronic Super Joy graphic artefacts (regression,bisected)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99010">Bug 99010</a> - --disable-gallium-llvm no longer recognized</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99246">Bug 99246</a> - [d3dadapter+radeonsi & bisect] EVE-Online : hang on wormhole sight</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99265">Bug 99265</a> - i965: Piglit egl_khr_gl_renderbuffer_image-clear-shared-image fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99339">Bug 99339</a> - Blender line rendering broken after removing XY clipping of lines</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99401">Bug 99401</a> - [g33] regression: piglit.spec.!opengl 1_0.gl-1_0-beginend-coverage</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99456">Bug 99456</a> - Firefox crashing when opening about:support with WebGL2 enabled</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99465">Bug 99465</a> - vtn_vector_construct writing out of bounds when given multiple non-zero length sources</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99484">Bug 99484</a> - Crusader Kings 2 - Loading bars, siege bars, morale bars, etc. do not render correctly</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99532">Bug 99532</a> - Compute shader doesn't give right result under some circumstances</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99542">Bug 99542</a> - vdpau logging errors since gallium/radeon: adjust the rule for using the LINEAR_ALIGNED layout</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99631">Bug 99631</a> - segfault with OSVRTrackerView and openscenegraph git master</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99633">Bug 99633</a> - rasterizer/core/clip.h:279:49: error: ‘const struct API_STATE’ has no member named ‘linkageCount’</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99660">Bug 99660</a> - Not all of the int64 conversion opcodes got implemented</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99677">Bug 99677</a> - heap-use-after-free in glsl</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99692">Bug 99692</a> - [radv] Mostly broken on Hawaii PRO/CIK ASICs</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99701">Bug 99701</a> - loader.c:353:8: error: implicit declaration of function 'geteuid' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99715">Bug 99715</a> - Don't print: "Note: Buggy applications may crash, if they do please report to vendor"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99789">Bug 99789</a> - Memory leak on failure to create an ir_constant in calculate_iterations in loop_controls.cpp</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99817">Bug 99817</a> - [softpipe] piglit glsl-fs-tan-1 regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99842">Bug 99842</a> - GL_ARB_transform_feedback2 on i965 gen6</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99850">Bug 99850</a> - Tessellation bug on Carrizo</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99918">Bug 99918</a> - disk_cache.h:57:20: error: no member named 'st_mtim' in 'struct stat'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99953">Bug 99953</a> - device9.c:122:49: error: ‘PIPE_CAP_USER_INDEX_BUFFERS’ undeclared (first use in this function)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99955">Bug 99955</a> - [r600g] GPU load always displayed at 100% with GALLIUM_HUD=GPU-load</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100026">Bug 100026</a> - piglit.spec.arb_shader_subroutine.compiler.direct-call_vert regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100049">Bug 100049</a> - "ralloc: Make sure ralloc() allocations match malloc()'s alignment." causes seg fault in 32bit build</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100060">Bug 100060</a> - wsi/wsi_common_wayland.c:25:41: fatal error: wayland-drm-client-protocol.h: No such file or directory</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100061">Bug 100061</a> - LODQ instruction generated with invalid dst mask</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100068">Bug 100068</a> - LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.buffer.load.format</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100088">Bug 100088</a> - piglit.spec.arb_get_texture_sub_image.arb_get_texture_sub_image regressions</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100091">Bug 100091</a> - Failure to create folder for on-disk shader cache</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100133">Bug 100133</a> - swr_context.cpp:336:44: error: invalid conversion from ‘uint {aka unsigned int}’ to ‘pipe_render_cond_flag’ [-fpermissive]</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100154">Bug 100154</a> - test_eu_compact regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100180">Bug 100180</a> - Build failure in GNOME Continuous</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100182">Bug 100182</a> - Flickering in The Talos Principle on Sky Lake GT4.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100201">Bug 100201</a> - Windows scons build with MSVC toolchain and LLVM 4.0 fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100223">Bug 100223</a> - marshal_generated.c:38:10: fatal error: 'X11/Xlib-xcb.h' file not found</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100236">Bug 100236</a> - Undefined symbols for architecture x86_64: "typeinfo for llvm::RTDyldMemoryManager"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100259">Bug 100259</a> - [EGL] [GBM] undefined reference to `gbm_bo_create_with_modifiers'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100288">Bug 100288</a> - clover unable to run OpenCL kernels since 03127bb radeonsi: compile all TGSI compute shaders asynchronously</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100303">Bug 100303</a> - Adding a single, meaningless if-else to a shader source leads to different image</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100391">Bug 100391</a> - SachaWillems deferredmultisampling asserts</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100452">Bug 100452</a> - push_constants host memory leak when resetting command buffer</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100531">Bug 100531</a> - [regression] Broken graphics in several games</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100562">Bug 100562</a> - u_debug_stack.c:59: undefined reference to `_Ux86_64_getcontext'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100569">Bug 100569</a> - core/resource.cpp:36:33: error: non-constant-expression cannot be narrowed from type 'int' to 'int16_t' (aka 'short') in initializer list [-Wc++11-narrowing]</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100574">Bug 100574</a> - anv_device.c:189: undefined reference to `anv_gem_supports_48b_addresses'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100582">Bug 100582</a> - [GEN8+] piglit.spec.arb_stencil_texturing.glblitframebuffer corrupts state.gl_texture* assertions</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100600">Bug 100600</a> - anv_device.c:1337: undefined reference to `anv_gem_busy'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100620">Bug 100620</a> - [SKL] 48-bit addresses break DOOM</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100663">Bug 100663</a> - commit 61e47d92c5196 breaks RS780</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100690">Bug 100690</a> - [Regression, bisected] TotalWar: Warhammer corrupted graphics</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100892">Bug 100892</a> - Polaris 12: winsys init bad switch (missing break) initializing addrlib</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
187
docs/relnotes/17.1.1.html
Normal file
187
docs/relnotes/17.1.1.html
Normal file
@@ -0,0 +1,187 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 17.1.1 Release Notes / March 25, 2017</h1>
|
||||
|
||||
<p>
|
||||
Mesa 17.1.1 is a bug fix release which fixes bugs found since the 17.1.0 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 17.1.1 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Alex Deucher (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: add new vega10 pci ids</li>
|
||||
</ul>
|
||||
|
||||
<p>Andres Gomez (2):</p>
|
||||
<ul>
|
||||
<li>bin/get-fixes-pick-list.sh: don't warn if more than one, go over them</li>
|
||||
<li>bin/get-fixes-pick-list.sh: bring back the warning</li>
|
||||
</ul>
|
||||
|
||||
<p>Bruce Cherniak (1):</p>
|
||||
<ul>
|
||||
<li>swr: move msaa resolve to generalized StoreTile</li>
|
||||
</ul>
|
||||
|
||||
<p>Chad Versace (1):</p>
|
||||
<ul>
|
||||
<li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
|
||||
</ul>
|
||||
|
||||
<p>Chih-Wei Huang (1):</p>
|
||||
<ul>
|
||||
<li>Android: correct libz dependency</li>
|
||||
</ul>
|
||||
|
||||
<p>Daniel Stone (1):</p>
|
||||
<ul>
|
||||
<li>gbm/dri: Fix sign-extension in modifier query</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (6):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 17.1.0</li>
|
||||
<li>radeon: automake: remove unneeded elf Cflags/Libs</li>
|
||||
<li>configure: remove unneeded bits around libunwind handling</li>
|
||||
<li>egl: add g_egldispatchstubs.h to the release tarball</li>
|
||||
<li>automake: add SWR LLVM gen_builder.hpp workaround</li>
|
||||
<li>Update version to 17.1.1</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (2):</p>
|
||||
<ul>
|
||||
<li>renderonly: Initialize fields of struct winsys_handle.</li>
|
||||
<li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
|
||||
</ul>
|
||||
|
||||
<p>Grazvydas Ignotas (2):</p>
|
||||
<ul>
|
||||
<li>anv: fix possible stack corruption</li>
|
||||
<li>anv: don't leak DRM devices</li>
|
||||
</ul>
|
||||
|
||||
<p>Hans de Goede (1):</p>
|
||||
<ul>
|
||||
<li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (1):</p>
|
||||
<ul>
|
||||
<li>nvc0/ir: SHLADD's middle source must be an immediate</li>
|
||||
</ul>
|
||||
|
||||
<p>Johnson Lin (1):</p>
|
||||
<ul>
|
||||
<li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
|
||||
</ul>
|
||||
|
||||
<p>Juan A. Suarez Romero (2):</p>
|
||||
<ul>
|
||||
<li>bin/get-{extra,fixes}-pick-list.sh: add support for ignore list</li>
|
||||
<li>bin/get-{extra,fixes}-pick-list.sh: improve output</li>
|
||||
</ul>
|
||||
|
||||
<p>Lucas Stach (2):</p>
|
||||
<ul>
|
||||
<li>etnaviv: stop oversizing buffer resources</li>
|
||||
<li>etnaviv: allow R/B swapped surfaces to be cleared</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (2):</p>
|
||||
<ul>
|
||||
<li>amd/addrlib: import Raven support</li>
|
||||
<li>radeonsi/gfx9: add support for Raven</li>
|
||||
</ul>
|
||||
|
||||
<p>Nanley Chery (2):</p>
|
||||
<ul>
|
||||
<li>anv/formats: Update the three-channel BC1 mappings</li>
|
||||
<li>i965/formats: Update the three-channel DXT1 mappings</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (5):</p>
|
||||
<ul>
|
||||
<li>radeonsi: mark fast-cleared textures as compressed when dirtying</li>
|
||||
<li>radeonsi: fix primitive ID in fragment shader when using tessellation</li>
|
||||
<li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
|
||||
<li>radeonsi: fix gl_PrimitiveIDIn in geometry shader when using tessellation</li>
|
||||
<li>st/mesa: remove an incorrect assertion</li>
|
||||
</ul>
|
||||
|
||||
<p>Pohjolainen, Topi (1):</p>
|
||||
<ul>
|
||||
<li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (2):</p>
|
||||
<ul>
|
||||
<li>mesa/st: fix yuv EGLImage's</li>
|
||||
<li>freedreno: fix crash when flush() but no rendering</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Herring (1):</p>
|
||||
<ul>
|
||||
<li>virgl: fix virgl_bo_transfer_{put, get} box struct copy</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Iglesias Gonsálvez (3):</p>
|
||||
<ul>
|
||||
<li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
|
||||
<li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
|
||||
<li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
|
||||
</ul>
|
||||
|
||||
<p>Tom Stellard (1):</p>
|
||||
<ul>
|
||||
<li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -219,6 +219,10 @@ CHIPSET(0x6860, VEGA10_, VEGA10)
|
||||
CHIPSET(0x6861, VEGA10_, VEGA10)
|
||||
CHIPSET(0x6862, VEGA10_, VEGA10)
|
||||
CHIPSET(0x6863, VEGA10_, VEGA10)
|
||||
CHIPSET(0x6864, VEGA10_, VEGA10)
|
||||
CHIPSET(0x6867, VEGA10_, VEGA10)
|
||||
CHIPSET(0x6868, VEGA10_, VEGA10)
|
||||
CHIPSET(0x687F, VEGA10_, VEGA10)
|
||||
CHIPSET(0x686C, VEGA10_, VEGA10)
|
||||
|
||||
CHIPSET(0x15DD, RAVEN_, RAVEN)
|
||||
|
@@ -103,8 +103,26 @@ def generate(env):
|
||||
'HAVE_STDINT_H',
|
||||
])
|
||||
env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
|
||||
# LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
|
||||
if llvm_version >= distutils.version.LooseVersion('3.9'):
|
||||
# LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
|
||||
if llvm_version >= distutils.version.LooseVersion('4.0'):
|
||||
env.Prepend(LIBS = [
|
||||
'LLVMX86Disassembler', 'LLVMX86AsmParser',
|
||||
'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
|
||||
'LLVMDebugInfoCodeView', 'LLVMCodeGen',
|
||||
'LLVMScalarOpts', 'LLVMInstCombine',
|
||||
'LLVMTransformUtils',
|
||||
'LLVMBitWriter', 'LLVMX86Desc',
|
||||
'LLVMMCDisassembler', 'LLVMX86Info',
|
||||
'LLVMX86AsmPrinter', 'LLVMX86Utils',
|
||||
'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
|
||||
'LLVMAnalysis', 'LLVMProfileData',
|
||||
'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
|
||||
'LLVMBitReader', 'LLVMMC', 'LLVMCore',
|
||||
'LLVMSupport',
|
||||
'LLVMIRReader', 'LLVMAsmParser',
|
||||
'LLVMDemangle', 'LLVMGlobalISel', 'LLVMDebugInfoMSF',
|
||||
])
|
||||
elif llvm_version >= distutils.version.LooseVersion('3.9'):
|
||||
env.Prepend(LIBS = [
|
||||
'LLVMX86Disassembler', 'LLVMX86AsmParser',
|
||||
'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
|
||||
|
@@ -65,6 +65,8 @@ common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
|
||||
endif
|
||||
endif
|
||||
|
||||
common_libamd_common_la_LIBADD = $(LIBELF_LIBS)
|
||||
|
||||
common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h
|
||||
$(AM_V_at)$(MKDIR_P) $(@D)
|
||||
$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h > $@
|
||||
|
@@ -1193,6 +1193,20 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
|
||||
m_settings.depthPipeXorDisable = 1;
|
||||
break;
|
||||
|
||||
case FAMILY_RV:
|
||||
m_settings.isArcticIsland = 1;
|
||||
m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
|
||||
|
||||
if (m_settings.isRaven)
|
||||
{
|
||||
m_settings.isDcn1 = 1;
|
||||
}
|
||||
|
||||
m_settings.metaBaseAlignFix = 1;
|
||||
|
||||
m_settings.depthPipeXorDisable = 1;
|
||||
break;
|
||||
|
||||
default:
|
||||
ADDR_ASSERT(!"This should be a Fusion");
|
||||
break;
|
||||
@@ -2734,6 +2748,35 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if (m_settings.isDcn1)
|
||||
{
|
||||
switch (swizzleMode)
|
||||
{
|
||||
case ADDR_SW_4KB_D:
|
||||
case ADDR_SW_64KB_D:
|
||||
case ADDR_SW_VAR_D:
|
||||
case ADDR_SW_64KB_D_T:
|
||||
case ADDR_SW_4KB_D_X:
|
||||
case ADDR_SW_64KB_D_X:
|
||||
case ADDR_SW_VAR_D_X:
|
||||
support = (pIn->bpp == 64);
|
||||
break;
|
||||
|
||||
case ADDR_SW_LINEAR:
|
||||
case ADDR_SW_4KB_S:
|
||||
case ADDR_SW_64KB_S:
|
||||
case ADDR_SW_VAR_S:
|
||||
case ADDR_SW_64KB_S_T:
|
||||
case ADDR_SW_4KB_S_X:
|
||||
case ADDR_SW_64KB_S_X:
|
||||
case ADDR_SW_VAR_S_X:
|
||||
support = (pIn->bpp <= 64);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ADDR_NOT_IMPLEMENTED();
|
||||
@@ -3195,6 +3238,20 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
|
||||
// DCE12 does not support display surface to be _T swizzle mode
|
||||
prtXor = FALSE;
|
||||
}
|
||||
else if (m_settings.isDcn1)
|
||||
{
|
||||
// _R is not supported by Dcn1
|
||||
if (pIn->bpp == 64)
|
||||
{
|
||||
swType = ADDR_SW_D;
|
||||
}
|
||||
else
|
||||
{
|
||||
swType = ADDR_SW_S;
|
||||
}
|
||||
|
||||
blockSet.micro = FALSE;
|
||||
}
|
||||
else
|
||||
{
|
||||
ADDR_NOT_IMPLEMENTED();
|
||||
|
@@ -54,11 +54,13 @@ struct Gfx9ChipSettings
|
||||
// Asic/Generation name
|
||||
UINT_32 isArcticIsland : 1;
|
||||
UINT_32 isVega10 : 1;
|
||||
UINT_32 reserved0 : 30;
|
||||
UINT_32 isRaven : 1;
|
||||
UINT_32 reserved0 : 29;
|
||||
|
||||
// Display engine IP version name
|
||||
UINT_32 isDce12 : 1;
|
||||
UINT_32 reserved1 : 31;
|
||||
UINT_32 isDcn1 : 1;
|
||||
UINT_32 reserved1 : 29;
|
||||
|
||||
// Misc configuration bits
|
||||
UINT_32 metaBaseAlignFix : 1;
|
||||
@@ -201,7 +203,7 @@ protected:
|
||||
|
||||
if (IsXor(swizzleMode))
|
||||
{
|
||||
if (m_settings.isVega10)
|
||||
if (m_settings.isVega10 || m_settings.isRaven)
|
||||
{
|
||||
baseAlign = GetBlockSize(swizzleMode);
|
||||
}
|
||||
|
@@ -114,6 +114,7 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
|
||||
case CHIP_POLARIS10:
|
||||
return "polaris10";
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
return "polaris11";
|
||||
#endif
|
||||
default:
|
||||
|
@@ -1230,6 +1230,33 @@ static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
|
||||
return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
|
||||
}
|
||||
|
||||
static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
|
||||
LLVMValueRef src0)
|
||||
{
|
||||
LLVMValueRef result;
|
||||
LLVMValueRef cond;
|
||||
|
||||
src0 = to_float(ctx, src0);
|
||||
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
|
||||
|
||||
/* TODO SI/CIK options here */
|
||||
if (ctx->options->chip_class >= VI) {
|
||||
LLVMValueRef args[2];
|
||||
/* Check if the result is a denormal - and flush to 0 if so. */
|
||||
args[0] = result;
|
||||
args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
|
||||
cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
|
||||
}
|
||||
|
||||
/* need to convert back up to f32 */
|
||||
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
|
||||
|
||||
if (ctx->options->chip_class >= VI)
|
||||
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
|
||||
LLVMValueRef src0, LLVMValueRef src1)
|
||||
{
|
||||
@@ -1626,10 +1653,18 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
|
||||
case nir_op_fmax:
|
||||
result = emit_intrin_2f_param(ctx, "llvm.maxnum",
|
||||
to_float_type(ctx, def_type), src[0], src[1]);
|
||||
if (instr->dest.dest.ssa.bit_size == 32)
|
||||
result = emit_intrin_1f_param(ctx, "llvm.canonicalize",
|
||||
to_float_type(ctx, def_type),
|
||||
result);
|
||||
break;
|
||||
case nir_op_fmin:
|
||||
result = emit_intrin_2f_param(ctx, "llvm.minnum",
|
||||
to_float_type(ctx, def_type), src[0], src[1]);
|
||||
if (instr->dest.dest.ssa.bit_size == 32)
|
||||
result = emit_intrin_1f_param(ctx, "llvm.canonicalize",
|
||||
to_float_type(ctx, def_type),
|
||||
result);
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
result = emit_intrin_3f_param(ctx, "llvm.fma",
|
||||
@@ -1717,10 +1752,7 @@ static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
|
||||
result = emit_b2f(ctx, src[0]);
|
||||
break;
|
||||
case nir_op_fquantize2f16:
|
||||
src[0] = to_float(ctx, src[0]);
|
||||
result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
|
||||
/* need to convert back up to f32 */
|
||||
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
|
||||
result = emit_f2f16(ctx, src[0]);
|
||||
break;
|
||||
case nir_op_umul_high:
|
||||
result = emit_umul_high(ctx, src[0], src[1]);
|
||||
|
@@ -93,6 +93,7 @@ enum radeon_family {
|
||||
CHIP_POLARIS11,
|
||||
CHIP_POLARIS12,
|
||||
CHIP_VEGA10,
|
||||
CHIP_RAVEN,
|
||||
CHIP_LAST,
|
||||
};
|
||||
|
||||
|
@@ -49,6 +49,7 @@ enum {
|
||||
FAMILY_CZ,
|
||||
FAMILY_PI,
|
||||
FAMILY_AI,
|
||||
FAMILY_RV,
|
||||
FAMILY_LAST,
|
||||
};
|
||||
|
||||
@@ -185,4 +186,13 @@ enum {
|
||||
#define ASICREV_IS_VEGA10_P(eChipRev) \
|
||||
((eChipRev) >= AI_VEGA10_P_A0 && (eChipRev) < AI_UNKNOWN)
|
||||
|
||||
/* RV specific rev IDs */
|
||||
enum {
|
||||
RAVEN_A0 = 0x01,
|
||||
RAVEN_UNKNOWN = 0xFF
|
||||
};
|
||||
|
||||
#define ASICREV_IS_RAVEN(eChipRev) \
|
||||
((eChipRev) >= RAVEN_A0 && (eChipRev) < RAVEN_UNKNOWN)
|
||||
|
||||
#endif /* AMDGPU_ID_H */
|
||||
|
@@ -4074,6 +4074,10 @@
|
||||
#define S_028060_PUNCHOUT_MODE(x) (((unsigned)(x) & 0x03) << 0)
|
||||
#define G_028060_PUNCHOUT_MODE(x) (((x) >> 0) & 0x03)
|
||||
#define C_028060_PUNCHOUT_MODE 0xFFFFFFFC
|
||||
#define V_028060_AUTO 0
|
||||
#define V_028060_FORCE_ON 1
|
||||
#define V_028060_FORCE_OFF 2
|
||||
#define V_028060_RESERVED 3
|
||||
#define S_028060_POPS_DRAIN_PS_ON_OVERLAP(x) (((unsigned)(x) & 0x1) << 2)
|
||||
#define G_028060_POPS_DRAIN_PS_ON_OVERLAP(x) (((x) >> 2) & 0x1)
|
||||
#define C_028060_POPS_DRAIN_PS_ON_OVERLAP 0xFFFFFFFB
|
||||
|
@@ -9094,5 +9094,18 @@
|
||||
#define CIK_SDMA_PACKET_SRBM_WRITE 0xe
|
||||
#define CIK_SDMA_COPY_MAX_SIZE 0x3fffe0
|
||||
|
||||
enum amd_cmp_class_flags {
|
||||
S_NAN = 1 << 0, // Signaling NaN
|
||||
Q_NAN = 1 << 1, // Quiet NaN
|
||||
N_INFINITY = 1 << 2, // Negative infinity
|
||||
N_NORMAL = 1 << 3, // Negative normal
|
||||
N_SUBNORMAL = 1 << 4, // Negative subnormal
|
||||
N_ZERO = 1 << 5, // Negative zero
|
||||
P_ZERO = 1 << 6, // Positive zero
|
||||
P_SUBNORMAL = 1 << 7, // Positive subnormal
|
||||
P_NORMAL = 1 << 8, // Positive normal
|
||||
P_INFINITY = 1 << 9 // Positive infinity
|
||||
};
|
||||
|
||||
#endif /* _SID_H */
|
||||
|
||||
|
@@ -1186,6 +1186,15 @@ radv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
|
||||
|
||||
if (G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.pipeline->graphics.raster.pa_cl_clip_cntl))
|
||||
return;
|
||||
|
||||
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
|
||||
radv_emit_viewport(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
|
||||
radv_emit_scissor(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {
|
||||
unsigned width = cmd_buffer->state.dynamic.line_width * 8;
|
||||
radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
|
||||
@@ -1474,12 +1483,6 @@ radv_cmd_buffer_flush_state(struct radv_cmd_buffer *cmd_buffer,
|
||||
if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_RENDER_TARGETS)
|
||||
radv_emit_framebuffer_state(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
|
||||
radv_emit_viewport(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->state.dirty & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
|
||||
radv_emit_scissor(cmd_buffer);
|
||||
|
||||
ia_multi_vgt_param = si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, draw_vertex_count);
|
||||
if (cmd_buffer->state.last_ia_multi_vgt_param != ia_multi_vgt_param) {
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK)
|
||||
@@ -1900,6 +1903,8 @@ void radv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
|
||||
{
|
||||
struct radeon_winsys *ws = cmd_buffer->device->ws;
|
||||
|
||||
assert(!(set->layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
|
||||
|
||||
cmd_buffer->state.descriptors[idx] = set;
|
||||
cmd_buffer->state.descriptors_dirty |= (1 << idx);
|
||||
if (!set)
|
||||
|
@@ -401,7 +401,7 @@ radv_enumerate_devices(struct radv_instance *instance)
|
||||
|
||||
instance->physicalDeviceCount = 0;
|
||||
|
||||
max_devices = drmGetDevices2(0, devices, sizeof(devices));
|
||||
max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
|
||||
if (max_devices < 1)
|
||||
return VK_ERROR_INCOMPATIBLE_DRIVER;
|
||||
|
||||
@@ -417,9 +417,11 @@ radv_enumerate_devices(struct radv_instance *instance)
|
||||
if (result == VK_SUCCESS)
|
||||
++instance->physicalDeviceCount;
|
||||
else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
|
||||
return result;
|
||||
break;
|
||||
}
|
||||
}
|
||||
drmFreeDevices(devices, max_devices);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -915,6 +917,7 @@ radv_device_init_gs_info(struct radv_device *device)
|
||||
case CHIP_FIJI:
|
||||
case CHIP_POLARIS10:
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
device->gs_table_depth = 32;
|
||||
return;
|
||||
default:
|
||||
@@ -1046,6 +1049,22 @@ VkResult radv_CreateDevice(
|
||||
break;
|
||||
}
|
||||
device->ws->cs_finalize(device->flush_cs[family]);
|
||||
|
||||
device->flush_shader_cs[family] = device->ws->cs_create(device->ws, family);
|
||||
switch (family) {
|
||||
case RADV_QUEUE_GENERAL:
|
||||
case RADV_QUEUE_COMPUTE:
|
||||
si_cs_emit_cache_flush(device->flush_shader_cs[family],
|
||||
device->physical_device->rad_info.chip_class,
|
||||
family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
|
||||
family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
|
||||
RADV_CMD_FLAG_INV_ICACHE |
|
||||
RADV_CMD_FLAG_INV_SMEM_L1 |
|
||||
RADV_CMD_FLAG_INV_VMEM_L1 |
|
||||
RADV_CMD_FLAG_INV_GLOBAL_L2);
|
||||
break;
|
||||
}
|
||||
device->ws->cs_finalize(device->flush_shader_cs[family]);
|
||||
}
|
||||
|
||||
if (getenv("RADV_TRACE_FILE")) {
|
||||
@@ -1121,6 +1140,8 @@ void radv_DestroyDevice(
|
||||
device->ws->cs_destroy(device->empty_cs[i]);
|
||||
if (device->flush_cs[i])
|
||||
device->ws->cs_destroy(device->flush_cs[i]);
|
||||
if (device->flush_shader_cs[i])
|
||||
device->ws->cs_destroy(device->flush_shader_cs[i]);
|
||||
}
|
||||
radv_device_finish_meta(device);
|
||||
|
||||
@@ -1822,7 +1843,7 @@ VkResult radv_QueueSubmit(
|
||||
|
||||
for (uint32_t i = 0; i < submitCount; i++) {
|
||||
struct radeon_winsys_cs **cs_array;
|
||||
bool do_flush = !i;
|
||||
bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
|
||||
bool can_patch = !do_flush;
|
||||
uint32_t advance;
|
||||
|
||||
@@ -1849,7 +1870,9 @@ VkResult radv_QueueSubmit(
|
||||
(pSubmits[i].commandBufferCount + do_flush));
|
||||
|
||||
if(do_flush)
|
||||
cs_array[0] = queue->device->flush_cs[queue->queue_family_index];
|
||||
cs_array[0] = pSubmits[i].waitSemaphoreCount ?
|
||||
queue->device->flush_shader_cs[queue->queue_family_index] :
|
||||
queue->device->flush_cs[queue->queue_family_index];
|
||||
|
||||
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
|
||||
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
|
||||
|
@@ -597,13 +597,13 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
|
||||
tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
|
||||
}
|
||||
}
|
||||
if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
|
||||
if (tiled && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
|
||||
tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
|
||||
VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
|
||||
}
|
||||
}
|
||||
|
||||
if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
|
||||
if (linear && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
|
||||
linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
|
||||
VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
|
||||
}
|
||||
|
@@ -52,7 +52,9 @@ radv_meta_restore(const struct radv_meta_saved_state *state,
|
||||
struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
cmd_buffer->state.pipeline = state->old_pipeline;
|
||||
radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
|
||||
|
||||
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
|
||||
cmd_buffer->state.descriptors_dirty |= (1u << 0);
|
||||
memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
|
||||
sizeof(state->old_vertex_bindings));
|
||||
|
||||
@@ -110,7 +112,9 @@ radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
|
||||
{
|
||||
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
|
||||
radv_pipeline_to_handle(state->old_pipeline));
|
||||
radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
|
||||
|
||||
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
|
||||
cmd_buffer->state.descriptors_dirty |= (1u << 0);
|
||||
|
||||
if (push_constant_size) {
|
||||
memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size);
|
||||
|
@@ -693,6 +693,9 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
|
||||
VK_IMAGE_ASPECT_STENCIL_BIT));
|
||||
assert(pass_att != VK_ATTACHMENT_UNUSED);
|
||||
|
||||
if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
|
||||
clear_value.depth = 1.0f;
|
||||
|
||||
const struct depthstencil_clear_vattrs vertex_data[3] = {
|
||||
{
|
||||
.position = {
|
||||
|
@@ -495,7 +495,7 @@ struct radv_device {
|
||||
int queue_count[RADV_MAX_QUEUE_FAMILIES];
|
||||
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
|
||||
struct radeon_winsys_cs *flush_cs[RADV_MAX_QUEUE_FAMILIES];
|
||||
|
||||
struct radeon_winsys_cs *flush_shader_cs[RADV_MAX_QUEUE_FAMILIES];
|
||||
uint64_t debug_flags;
|
||||
|
||||
bool llvm_supports_spill;
|
||||
|
@@ -460,16 +460,20 @@ VkResult radv_QueuePresentKHR(
|
||||
RADV_FROM_HANDLE(wsi_swapchain, swapchain, pPresentInfo->pSwapchains[i]);
|
||||
struct radeon_winsys_cs *cs;
|
||||
const VkPresentRegionKHR *region = NULL;
|
||||
VkResult item_result;
|
||||
|
||||
assert(radv_device_from_handle(swapchain->device) == queue->device);
|
||||
if (swapchain->fences[0] == VK_NULL_HANDLE) {
|
||||
result = radv_CreateFence(radv_device_to_handle(queue->device),
|
||||
item_result = radv_CreateFence(radv_device_to_handle(queue->device),
|
||||
&(VkFenceCreateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
|
||||
.flags = 0,
|
||||
}, &swapchain->alloc, &swapchain->fences[0]);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
if (pPresentInfo->pResults != NULL)
|
||||
pPresentInfo->pResults[i] = item_result;
|
||||
result = result == VK_SUCCESS ? item_result : result;
|
||||
if (item_result != VK_SUCCESS)
|
||||
continue;
|
||||
} else {
|
||||
radv_ResetFences(radv_device_to_handle(queue->device),
|
||||
1, &swapchain->fences[0]);
|
||||
@@ -493,12 +497,15 @@ VkResult radv_QueuePresentKHR(
|
||||
if (regions && regions->pRegions)
|
||||
region = ®ions->pRegions[i];
|
||||
|
||||
result = swapchain->queue_present(swapchain,
|
||||
item_result = swapchain->queue_present(swapchain,
|
||||
pPresentInfo->pImageIndices[i],
|
||||
region);
|
||||
/* TODO: What if one of them returns OUT_OF_DATE? */
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
if (pPresentInfo->pResults != NULL)
|
||||
pPresentInfo->pResults[i] = item_result;
|
||||
result = result == VK_SUCCESS ? item_result : result;
|
||||
if (item_result != VK_SUCCESS)
|
||||
continue;
|
||||
|
||||
VkFence last = swapchain->fences[2];
|
||||
swapchain->fences[2] = swapchain->fences[1];
|
||||
|
@@ -297,6 +297,7 @@ si_emit_config(struct radv_physical_device *physical_device,
|
||||
raster_config_1 = 0x0000002a;
|
||||
break;
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
raster_config = 0x16000012;
|
||||
raster_config_1 = 0x00000000;
|
||||
break;
|
||||
@@ -671,7 +672,8 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
|
||||
if (family == CHIP_TONGA ||
|
||||
family == CHIP_FIJI ||
|
||||
family == CHIP_POLARIS10 ||
|
||||
family == CHIP_POLARIS11)
|
||||
family == CHIP_POLARIS11 ||
|
||||
family == CHIP_POLARIS12)
|
||||
partial_vs_wave = true;
|
||||
} else {
|
||||
partial_vs_wave = true;
|
||||
|
@@ -107,6 +107,7 @@ get_chip_name(enum radeon_family family)
|
||||
case CHIP_FIJI: return "AMD RADV FIJI";
|
||||
case CHIP_POLARIS10: return "AMD RADV POLARIS10";
|
||||
case CHIP_POLARIS11: return "AMD RADV POLARIS11";
|
||||
case CHIP_POLARIS12: return "AMD RADV POLARIS12";
|
||||
case CHIP_STONEY: return "AMD RADV STONEY";
|
||||
default: return "AMD RADV unknown";
|
||||
}
|
||||
@@ -271,6 +272,10 @@ do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
|
||||
ws->family = FAMILY_VI;
|
||||
ws->rev_id = VI_POLARIS11_M_A0;
|
||||
break;
|
||||
case CHIP_POLARIS12:
|
||||
ws->family = FAMILY_VI;
|
||||
ws->rev_id = VI_POLARIS12_V_A0;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "amdgpu: Unknown family.\n");
|
||||
goto fail;
|
||||
|
@@ -96,7 +96,9 @@ lower_bitmap(nir_shader *shader, nir_builder *b,
|
||||
tex->texture_index = options->sampler;
|
||||
tex->dest_type = nir_type_float;
|
||||
tex->src[0].src_type = nir_tex_src_coord;
|
||||
tex->src[0].src = nir_src_for_ssa(texcoord);
|
||||
tex->src[0].src =
|
||||
nir_src_for_ssa(nir_channels(b, texcoord,
|
||||
(1 << tex->coord_components) - 1));
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
|
@@ -135,7 +135,9 @@ lower_color(lower_drawpixels_state *state, nir_intrinsic_instr *intr)
|
||||
tex->texture_index = state->options->drawpix_sampler;
|
||||
tex->dest_type = nir_type_float;
|
||||
tex->src[0].src_type = nir_tex_src_coord;
|
||||
tex->src[0].src = nir_src_for_ssa(texcoord);
|
||||
tex->src[0].src =
|
||||
nir_src_for_ssa(nir_channels(b, texcoord,
|
||||
(1 << tex->coord_components) - 1));
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
|
@@ -244,9 +244,9 @@ convert_yuv_to_rgb(nir_builder *b, nir_tex_instr *tex,
|
||||
nir_ssa_def *yuv =
|
||||
nir_vec4(b,
|
||||
nir_fmul(b, nir_imm_float(b, 1.16438356f),
|
||||
nir_fadd(b, y, nir_imm_float(b, -0.0625f))),
|
||||
nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -0.5f)), 0),
|
||||
nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -0.5f)), 0),
|
||||
nir_fadd(b, y, nir_imm_float(b, -16.0f / 255.0f))),
|
||||
nir_channel(b, nir_fadd(b, u, nir_imm_float(b, -128.0f / 255.0f)), 0),
|
||||
nir_channel(b, nir_fadd(b, v, nir_imm_float(b, -128.0f / 255.0f)), 0),
|
||||
nir_imm_float(b, 0.0));
|
||||
|
||||
nir_ssa_def *red = nir_fdot4(b, yuv, nir_build_imm(b, 4, 32, m[0]));
|
||||
|
@@ -138,7 +138,8 @@ libEGL_mesa_la_SOURCES = \
|
||||
main/eglglvnd.c \
|
||||
main/egldispatchstubs.h \
|
||||
main/egldispatchstubs.c \
|
||||
g_egldispatchstubs.c
|
||||
g_egldispatchstubs.c \
|
||||
g_egldispatchstubs.h
|
||||
libEGL_mesa_la_LIBADD = libEGL_common.la
|
||||
libEGL_mesa_la_LDFLAGS = \
|
||||
-no-undefined \
|
||||
|
@@ -264,10 +264,15 @@ droid_window_enqueue_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_sur
|
||||
}
|
||||
|
||||
static void
|
||||
droid_window_cancel_buffer(_EGLDisplay *disp, struct dri2_egl_surface *dri2_surf)
|
||||
droid_window_cancel_buffer(struct dri2_egl_surface *dri2_surf)
|
||||
{
|
||||
/* no cancel buffer? */
|
||||
droid_window_enqueue_buffer(disp, dri2_surf);
|
||||
int ret;
|
||||
|
||||
ret = dri2_surf->window->cancelBuffer(dri2_surf->window, dri2_surf->buffer, -1);
|
||||
if (ret < 0) {
|
||||
_eglLog(_EGL_WARNING, "ANativeWindow::cancelBuffer failed");
|
||||
dri2_surf->base.Lost = EGL_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static __DRIbuffer *
|
||||
@@ -399,7 +404,7 @@ droid_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
|
||||
|
||||
if (dri2_surf->base.Type == EGL_WINDOW_BIT) {
|
||||
if (dri2_surf->buffer)
|
||||
droid_window_cancel_buffer(disp, dri2_surf);
|
||||
droid_window_cancel_buffer(dri2_surf);
|
||||
|
||||
dri2_surf->window->common.decRef(&dri2_surf->window->common);
|
||||
}
|
||||
@@ -426,12 +431,16 @@ droid_destroy_surface(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf)
|
||||
static int
|
||||
update_buffers(struct dri2_egl_surface *dri2_surf)
|
||||
{
|
||||
if (dri2_surf->base.Lost)
|
||||
return -1;
|
||||
|
||||
if (dri2_surf->base.Type != EGL_WINDOW_BIT)
|
||||
return 0;
|
||||
|
||||
/* try to dequeue the next back buffer */
|
||||
if (!dri2_surf->buffer && !droid_window_dequeue_buffer(dri2_surf)) {
|
||||
_eglLog(_EGL_WARNING, "Could not dequeue buffer from native window");
|
||||
dri2_surf->base.Lost = EGL_TRUE;
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -631,6 +640,12 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
|
||||
|
||||
dri2_flush_drawable_for_swapbuffers(disp, draw);
|
||||
|
||||
/* dri2_surf->buffer can be null even when no error has occured. For
|
||||
* example, if the user has called no GL rendering commands since the
|
||||
* previous eglSwapBuffers, then the driver may have not triggered
|
||||
* a callback to ANativeWindow::dequeueBuffer, in which case
|
||||
* dri2_surf->buffer remains null.
|
||||
*/
|
||||
if (dri2_surf->buffer)
|
||||
droid_window_enqueue_buffer(disp, dri2_surf);
|
||||
|
||||
|
@@ -689,12 +689,12 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
fd = loader_open_device(buf);
|
||||
if (fd < 0)
|
||||
fd = loader_open_device("/dev/dri/card0");
|
||||
dri2_dpy->own_device = 1;
|
||||
gbm = gbm_create_device(fd);
|
||||
if (gbm == NULL) {
|
||||
err = "DRI2: failed to create gbm device";
|
||||
goto cleanup;
|
||||
}
|
||||
dri2_dpy->own_device = 1;
|
||||
} else {
|
||||
fd = fcntl(gbm_device_get_fd(gbm), F_DUPFD_CLOEXEC, 3);
|
||||
if (fd < 0) {
|
||||
|
@@ -828,6 +828,14 @@ eglMakeCurrent(EGLDisplay dpy, EGLSurface draw, EGLSurface read,
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_MATCH, EGL_FALSE);
|
||||
}
|
||||
|
||||
/* If a native window underlying either draw or read is no longer valid,
|
||||
* an EGL_BAD_NATIVE_WINDOW error is generated.
|
||||
*/
|
||||
if (draw_surf && draw_surf->Lost)
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_FALSE);
|
||||
if (read_surf && read_surf->Lost)
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_FALSE);
|
||||
|
||||
ret = drv->API.MakeCurrent(drv, disp, draw_surf, read_surf, context);
|
||||
|
||||
RETURN_EGL_EVAL(disp, ret);
|
||||
@@ -1215,6 +1223,15 @@ eglSwapBuffers(EGLDisplay dpy, EGLSurface surface)
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_SURFACE, EGL_FALSE);
|
||||
#endif
|
||||
|
||||
/* From the EGL 1.5 spec:
|
||||
*
|
||||
* If eglSwapBuffers is called and the native window associated with
|
||||
* surface is no longer valid, an EGL_BAD_NATIVE_WINDOW error is
|
||||
* generated.
|
||||
*/
|
||||
if (surf->Lost)
|
||||
RETURN_EGL_ERROR(disp, EGL_BAD_NATIVE_WINDOW, EGL_FALSE);
|
||||
|
||||
ret = drv->API.SwapBuffers(drv, disp, surf);
|
||||
|
||||
RETURN_EGL_EVAL(disp, ret);
|
||||
|
@@ -295,6 +295,7 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
|
||||
_eglInitResource(&surf->Resource, sizeof(*surf), dpy);
|
||||
surf->Type = type;
|
||||
surf->Config = conf;
|
||||
surf->Lost = EGL_FALSE;
|
||||
|
||||
surf->Width = 0;
|
||||
surf->Height = 0;
|
||||
|
@@ -56,6 +56,11 @@ struct _egl_surface
|
||||
|
||||
EGLint Type; /* one of EGL_WINDOW_BIT, EGL_PIXMAP_BIT or EGL_PBUFFER_BIT */
|
||||
|
||||
/* The native surface is lost. The EGL spec requires certain functions
|
||||
* to generate EGL_BAD_NATIVE_WINDOW when given this surface.
|
||||
*/
|
||||
EGLBoolean Lost;
|
||||
|
||||
/* attributes set by attribute list */
|
||||
EGLint Width, Height;
|
||||
EGLenum TextureFormat;
|
||||
|
@@ -125,19 +125,6 @@ create_pass_manager(struct gallivm_state *gallivm)
|
||||
LLVMAddTargetData(gallivm->target, gallivm->passmgr);
|
||||
#endif
|
||||
|
||||
/* Setting the module's DataLayout to an empty string will cause the
|
||||
* ExecutionEngine to copy to the DataLayout string from its target
|
||||
* machine to the module. As of LLVM 3.8 the module and the execution
|
||||
* engine are required to have the same DataLayout.
|
||||
*
|
||||
* TODO: This is just a temporary work-around. The correct solution is
|
||||
* for gallivm_init_state() to create a TargetMachine and pull the
|
||||
* DataLayout from there. Currently, the TargetMachine used by llvmpipe
|
||||
* is being implicitly created by the EngineBuilder in
|
||||
* lp_build_create_jit_compiler_for_module()
|
||||
*/
|
||||
|
||||
#if HAVE_LLVM < 0x0308
|
||||
{
|
||||
char *td_str;
|
||||
// New ones from the Module.
|
||||
@@ -145,9 +132,6 @@ create_pass_manager(struct gallivm_state *gallivm)
|
||||
LLVMSetDataLayout(gallivm->module, td_str);
|
||||
free(td_str);
|
||||
}
|
||||
#else
|
||||
LLVMSetDataLayout(gallivm->module, "");
|
||||
#endif
|
||||
|
||||
if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) {
|
||||
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
|
||||
@@ -628,6 +612,24 @@ gallivm_compile_module(struct gallivm_state *gallivm)
|
||||
}
|
||||
|
||||
if (use_mcjit) {
|
||||
/* Setting the module's DataLayout to an empty string will cause the
|
||||
* ExecutionEngine to copy to the DataLayout string from its target
|
||||
* machine to the module. As of LLVM 3.8 the module and the execution
|
||||
* engine are required to have the same DataLayout.
|
||||
*
|
||||
* We must make sure we do this after running the optimization passes,
|
||||
* because those passes need a correct datalayout string. For example,
|
||||
* if those optimization passes see an empty datalayout, they will assume
|
||||
* this is a little endian target and will do optimizations that break big
|
||||
* endian machines.
|
||||
*
|
||||
* TODO: This is just a temporary work-around. The correct solution is
|
||||
* for gallivm_init_state() to create a TargetMachine and pull the
|
||||
* DataLayout from there. Currently, the TargetMachine used by llvmpipe
|
||||
* is being implicitly created by the EngineBuilder in
|
||||
* lp_build_create_jit_compiler_for_module()
|
||||
*/
|
||||
LLVMSetDataLayout(gallivm->module, "");
|
||||
assert(!gallivm->engine);
|
||||
if (!init_gallivm_engine(gallivm)) {
|
||||
assert(0);
|
||||
|
@@ -64,13 +64,13 @@ static const struct pipe_loader_ops pipe_loader_drm_ops;
|
||||
|
||||
#ifdef GALLIUM_STATIC_TARGETS
|
||||
static const struct drm_conf_ret throttle_ret = {
|
||||
DRM_CONF_INT,
|
||||
{2},
|
||||
.type = DRM_CONF_INT,
|
||||
.val.val_int = 2,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
DRM_CONF_BOOL,
|
||||
{true},
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static inline const struct drm_conf_ret *
|
||||
|
@@ -29,11 +29,11 @@
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <xf86drm.h>
|
||||
|
||||
#include "state_tracker/drm_driver.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
struct renderonly *
|
||||
@@ -65,8 +65,16 @@ renderonly_scanout_for_prime(struct pipe_resource *rsc, struct renderonly *ro)
|
||||
}
|
||||
|
||||
void
|
||||
renderonly_scanout_destroy(struct renderonly_scanout *scanout)
|
||||
renderonly_scanout_destroy(struct renderonly_scanout *scanout,
|
||||
struct renderonly *ro)
|
||||
{
|
||||
struct drm_mode_destroy_dumb destroy_dumb = { };
|
||||
|
||||
pipe_resource_reference(&scanout->prime, NULL);
|
||||
if (ro->kms_fd != -1) {
|
||||
destroy_dumb.handle = scanout->handle;
|
||||
drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);
|
||||
}
|
||||
FREE(scanout);
|
||||
}
|
||||
|
||||
@@ -90,7 +98,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
|
||||
return NULL;
|
||||
|
||||
/* create dumb buffer at scanout GPU */
|
||||
err = ioctl(ro->kms_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb);
|
||||
err = drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb);
|
||||
if (err < 0) {
|
||||
fprintf(stderr, "DRM_IOCTL_MODE_CREATE_DUMB failed: %s\n",
|
||||
strerror(errno));
|
||||
@@ -109,6 +117,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
|
||||
}
|
||||
|
||||
/* import dumb buffer */
|
||||
memset(&handle, 0, sizeof(handle));
|
||||
handle.type = DRM_API_HANDLE_TYPE_FD;
|
||||
handle.handle = prime_fd;
|
||||
handle.stride = create_dumb.pitch;
|
||||
@@ -116,6 +125,8 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
|
||||
scanout->prime = screen->resource_from_handle(screen, rsc,
|
||||
&handle, PIPE_HANDLE_USAGE_READ_WRITE);
|
||||
|
||||
close(prime_fd);
|
||||
|
||||
if (!scanout->prime) {
|
||||
fprintf(stderr, "failed to create resource_from_handle: %s\n", strerror(errno));
|
||||
goto free_dumb;
|
||||
@@ -125,7 +136,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
|
||||
|
||||
free_dumb:
|
||||
destroy_dumb.handle = scanout->handle;
|
||||
ioctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);
|
||||
drmIoctl(ro->kms_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb);
|
||||
|
||||
free_scanout:
|
||||
FREE(scanout);
|
||||
|
@@ -77,7 +77,8 @@ struct renderonly_scanout *
|
||||
renderonly_scanout_for_prime(struct pipe_resource *rsc, struct renderonly *ro);
|
||||
|
||||
void
|
||||
renderonly_scanout_destroy(struct renderonly_scanout *scanout);
|
||||
renderonly_scanout_destroy(struct renderonly_scanout *scanout,
|
||||
struct renderonly *ro);
|
||||
|
||||
static inline boolean
|
||||
renderonly_get_handle(struct renderonly_scanout *scanout,
|
||||
|
@@ -103,6 +103,8 @@ static struct etna_format formats[PIPE_FORMAT_COUNT] = {
|
||||
_T(B4G4R4A4_UNORM, A4R4G4B4, A4R4G4B4),
|
||||
_T(B4G4R4X4_UNORM, X4R4G4B4, X4R4G4B4),
|
||||
|
||||
_T(L8A8_UNORM, A8L8, NONE),
|
||||
|
||||
_T(Z16_UNORM, D16, A4R4G4B4),
|
||||
_T(B5G6R5_UNORM, R5G6B5, R5G6B5),
|
||||
_T(B5G5R5A1_UNORM, A1R5G5B5, A1R5G5B5),
|
||||
|
@@ -180,7 +180,7 @@ etna_resource_alloc(struct pipe_screen *pscreen, unsigned layout,
|
||||
&paddingY, &halign);
|
||||
assert(paddingX && paddingY);
|
||||
|
||||
if (templat->bind != PIPE_BUFFER) {
|
||||
if (templat->target != PIPE_BUFFER) {
|
||||
unsigned min_paddingY = 4 * screen->specs.pixel_pipes;
|
||||
if (paddingY < min_paddingY)
|
||||
paddingY = min_paddingY;
|
||||
@@ -308,7 +308,7 @@ etna_resource_destroy(struct pipe_screen *pscreen, struct pipe_resource *prsc)
|
||||
etna_bo_del(rsc->ts_bo);
|
||||
|
||||
if (rsc->scanout)
|
||||
renderonly_scanout_destroy(rsc->scanout);
|
||||
renderonly_scanout_destroy(rsc->scanout, etna_screen(pscreen)->ro);
|
||||
|
||||
list_delinit(&rsc->list);
|
||||
|
||||
|
@@ -416,6 +416,8 @@ translate_clear_color(enum pipe_format format,
|
||||
switch (format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_R8G8B8X8_UNORM:
|
||||
clear_value = etna_cfloat_to_uintN(color->f[2], 8) |
|
||||
(etna_cfloat_to_uintN(color->f[1], 8) << 8) |
|
||||
(etna_cfloat_to_uintN(color->f[0], 8) << 16) |
|
||||
|
@@ -149,6 +149,9 @@ use_hw_binning(struct fd_batch *batch)
|
||||
if (gmem->minx || gmem->miny)
|
||||
return false;
|
||||
|
||||
if ((gmem->maxpw * gmem->maxph) > 32)
|
||||
return false;
|
||||
|
||||
return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
|
||||
}
|
||||
|
||||
|
@@ -45,6 +45,12 @@ fd_context_flush(struct pipe_context *pctx, struct pipe_fence_handle **fence,
|
||||
{
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
|
||||
if (!ctx->batch) {
|
||||
if (fence)
|
||||
*fence = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
if (flags & PIPE_FLUSH_FENCE_FD)
|
||||
ctx->batch->needs_out_fence_fd = true;
|
||||
|
||||
@@ -112,7 +118,7 @@ fd_context_destroy(struct pipe_context *pctx)
|
||||
|
||||
DBG("");
|
||||
|
||||
if (ctx->screen->reorder)
|
||||
if (ctx->screen->reorder && util_queue_is_initialized(&ctx->flush_queue))
|
||||
util_queue_destroy(&ctx->flush_queue);
|
||||
|
||||
fd_batch_reference(&ctx->batch, NULL); /* unref current batch */
|
||||
|
@@ -219,6 +219,9 @@ calculate_tiles(struct fd_batch *batch)
|
||||
div_round_up(nbins_x, tpp_x)) > 8)
|
||||
tpp_x += 1;
|
||||
|
||||
gmem->maxpw = tpp_x;
|
||||
gmem->maxph = tpp_y;
|
||||
|
||||
/* configure pipes: */
|
||||
xoff = yoff = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
|
||||
|
@@ -57,6 +57,7 @@ struct fd_gmem_stateobj {
|
||||
uint16_t bin_w, nbins_x;
|
||||
uint16_t minx, miny;
|
||||
uint16_t width, height;
|
||||
uint16_t maxpw, maxph; /* maximum pipe width/height */
|
||||
};
|
||||
|
||||
struct fd_batch;
|
||||
|
@@ -642,6 +642,8 @@ NVC0LegalizePostRA::replaceZero(Instruction *i)
|
||||
for (int s = 0; i->srcExists(s); ++s) {
|
||||
if (s == 2 && i->op == OP_SUCLAMP)
|
||||
continue;
|
||||
if (s == 1 && i->op == OP_SHLADD)
|
||||
continue;
|
||||
ImmediateValue *imm = i->getSrc(s)->asImm();
|
||||
if (imm) {
|
||||
if (i->op == OP_SELP && s == 2) {
|
||||
|
@@ -16,13 +16,11 @@ libradeon_la_SOURCES = \
|
||||
if HAVE_GALLIUM_LLVM
|
||||
|
||||
AM_CFLAGS += \
|
||||
$(LLVM_CFLAGS) \
|
||||
$(LIBELF_CFLAGS)
|
||||
$(LLVM_CFLAGS)
|
||||
|
||||
libradeon_la_LIBADD = \
|
||||
$(CLOCK_LIB) \
|
||||
$(LLVM_LIBS) \
|
||||
$(LIBELF_LIBS)
|
||||
$(LLVM_LIBS)
|
||||
|
||||
libradeon_la_LDFLAGS = \
|
||||
$(LLVM_LDFLAGS)
|
||||
|
@@ -833,6 +833,7 @@ static const char* r600_get_chip_name(struct r600_common_screen *rscreen)
|
||||
case CHIP_POLARIS12: return "AMD POLARIS12";
|
||||
case CHIP_STONEY: return "AMD STONEY";
|
||||
case CHIP_VEGA10: return "AMD VEGA10";
|
||||
case CHIP_RAVEN: return "AMD RAVEN";
|
||||
default: return "AMD unknown";
|
||||
}
|
||||
}
|
||||
@@ -1006,6 +1007,7 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
||||
case CHIP_POLARIS12: /* same as polaris11 */
|
||||
return HAVE_LLVM >= 0x0309 ? "polaris11" : "carrizo";
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
return "gfx900";
|
||||
default:
|
||||
return "";
|
||||
|
@@ -2720,8 +2720,15 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
||||
|
||||
vi_dcc_clear_level(rctx, tex, 0, reset_value);
|
||||
|
||||
if (clear_words_needed)
|
||||
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
|
||||
unsigned level_bit = 1 << fb->cbufs[i]->u.tex.level;
|
||||
if (clear_words_needed) {
|
||||
bool need_compressed_update = !tex->dirty_level_mask;
|
||||
|
||||
tex->dirty_level_mask |= level_bit;
|
||||
|
||||
if (need_compressed_update)
|
||||
p_atomic_inc(&rctx->screen->compressed_colortex_counter);
|
||||
}
|
||||
tex->separate_dcc_dirty = true;
|
||||
} else {
|
||||
/* 128-bit formats are unusupported */
|
||||
@@ -2744,7 +2751,12 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
|
||||
tex->cmask.offset, tex->cmask.size, 0,
|
||||
R600_COHERENCY_CB_META);
|
||||
|
||||
bool need_compressed_update = !tex->dirty_level_mask;
|
||||
|
||||
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
|
||||
|
||||
if (need_compressed_update)
|
||||
p_atomic_inc(&rctx->screen->compressed_colortex_counter);
|
||||
}
|
||||
|
||||
/* We can change the micro tile mode before a full clear. */
|
||||
|
@@ -765,7 +765,7 @@ static void si_set_shader_image(struct si_context *ctx,
|
||||
static const unsigned char swizzle[4] = { 0, 1, 2, 3 };
|
||||
struct r600_texture *tex = (struct r600_texture *)res;
|
||||
unsigned level = view->u.tex.level;
|
||||
unsigned width, height, depth;
|
||||
unsigned width, height, depth, hw_level;
|
||||
bool uses_dcc = vi_dcc_enabled(tex, level);
|
||||
|
||||
assert(!tex->is_depth);
|
||||
@@ -794,20 +794,31 @@ static void si_set_shader_image(struct si_context *ctx,
|
||||
p_atomic_read(&tex->framebuffers_bound))
|
||||
ctx->need_check_render_feedback = true;
|
||||
|
||||
/* Always force the base level to the selected level.
|
||||
*
|
||||
* This is required for 3D textures, where otherwise
|
||||
* selecting a single slice for non-layered bindings
|
||||
* fails. It doesn't hurt the other targets.
|
||||
*/
|
||||
width = u_minify(res->b.b.width0, level);
|
||||
height = u_minify(res->b.b.height0, level);
|
||||
depth = u_minify(res->b.b.depth0, level);
|
||||
if (ctx->b.chip_class >= GFX9) {
|
||||
/* Always set the base address. The swizzle modes don't
|
||||
* allow setting mipmap level offsets as the base.
|
||||
*/
|
||||
width = res->b.b.width0;
|
||||
height = res->b.b.height0;
|
||||
depth = res->b.b.depth0;
|
||||
hw_level = level;
|
||||
} else {
|
||||
/* Always force the base level to the selected level.
|
||||
*
|
||||
* This is required for 3D textures, where otherwise
|
||||
* selecting a single slice for non-layered bindings
|
||||
* fails. It doesn't hurt the other targets.
|
||||
*/
|
||||
width = u_minify(res->b.b.width0, level);
|
||||
height = u_minify(res->b.b.height0, level);
|
||||
depth = u_minify(res->b.b.depth0, level);
|
||||
hw_level = 0;
|
||||
}
|
||||
|
||||
si_make_texture_descriptor(screen, tex,
|
||||
false, res->b.b.target,
|
||||
view->format, swizzle,
|
||||
0, 0,
|
||||
hw_level, hw_level,
|
||||
view->u.tex.first_layer,
|
||||
view->u.tex.last_layer,
|
||||
width, height, depth,
|
||||
|
@@ -759,6 +759,7 @@ static bool si_init_gs_info(struct si_screen *sscreen)
|
||||
case CHIP_POLARIS11:
|
||||
case CHIP_POLARIS12:
|
||||
case CHIP_VEGA10:
|
||||
case CHIP_RAVEN:
|
||||
sscreen->gs_table_depth = 32;
|
||||
return true;
|
||||
default:
|
||||
@@ -897,7 +898,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
||||
|
||||
sscreen->has_msaa_sample_loc_bug = (sscreen->b.family >= CHIP_POLARIS10 &&
|
||||
sscreen->b.family <= CHIP_POLARIS12) ||
|
||||
sscreen->b.family == CHIP_VEGA10;
|
||||
sscreen->b.family == CHIP_VEGA10 ||
|
||||
sscreen->b.family == CHIP_RAVEN;
|
||||
|
||||
sscreen->b.has_cp_dma = true;
|
||||
sscreen->b.has_streamout = true;
|
||||
@@ -911,7 +913,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
|
||||
|
||||
sscreen->b.rbplus_allowed =
|
||||
!(sscreen->b.debug_flags & DBG_NO_RB_PLUS) &&
|
||||
sscreen->b.family == CHIP_STONEY;
|
||||
(sscreen->b.family == CHIP_STONEY ||
|
||||
sscreen->b.family == CHIP_RAVEN);
|
||||
}
|
||||
|
||||
(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
|
||||
|
@@ -2344,8 +2344,8 @@ handle_semantic:
|
||||
shader->selector->info.writes_layer) {
|
||||
pos_args[1].enabled_channels = shader->selector->info.writes_psize |
|
||||
(shader->selector->info.writes_edgeflag << 1) |
|
||||
(shader->selector->info.writes_layer << 2) |
|
||||
(shader->selector->info.writes_viewport_index << 3);
|
||||
(shader->selector->info.writes_layer << 2);
|
||||
|
||||
pos_args[1].valid_mask = 0; /* EXEC mask */
|
||||
pos_args[1].done = 0; /* last export? */
|
||||
pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
|
||||
@@ -2374,11 +2374,34 @@ handle_semantic:
|
||||
ctx->f32, "");
|
||||
}
|
||||
|
||||
if (shader->selector->info.writes_layer)
|
||||
pos_args[1].out[2] = layer_value;
|
||||
if (ctx->screen->b.chip_class >= GFX9) {
|
||||
/* GFX9 has the layer in out.z[10:0] and the viewport
|
||||
* index in out.z[19:16].
|
||||
*/
|
||||
if (shader->selector->info.writes_layer)
|
||||
pos_args[1].out[2] = layer_value;
|
||||
|
||||
if (shader->selector->info.writes_viewport_index)
|
||||
pos_args[1].out[3] = viewport_index_value;
|
||||
if (shader->selector->info.writes_viewport_index) {
|
||||
LLVMValueRef v = viewport_index_value;
|
||||
|
||||
v = bitcast(bld_base, TGSI_TYPE_UNSIGNED, v);
|
||||
v = LLVMBuildShl(ctx->gallivm.builder, v,
|
||||
LLVMConstInt(ctx->i32, 16, 0), "");
|
||||
v = LLVMBuildOr(ctx->gallivm.builder, v,
|
||||
bitcast(bld_base, TGSI_TYPE_UNSIGNED,
|
||||
pos_args[1].out[2]), "");
|
||||
pos_args[1].out[2] = bitcast(bld_base, TGSI_TYPE_FLOAT, v);
|
||||
pos_args[1].enabled_channels |= 1 << 2;
|
||||
}
|
||||
} else {
|
||||
if (shader->selector->info.writes_layer)
|
||||
pos_args[1].out[2] = layer_value;
|
||||
|
||||
if (shader->selector->info.writes_viewport_index) {
|
||||
pos_args[1].out[3] = viewport_index_value;
|
||||
pos_args[1].enabled_channels |= 1 << 3;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
@@ -3400,7 +3423,7 @@ image_fetch_rsrc(
|
||||
static LLVMValueRef image_fetch_coords(
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
const struct tgsi_full_instruction *inst,
|
||||
unsigned src)
|
||||
unsigned src, LLVMValueRef desc)
|
||||
{
|
||||
struct si_shader_context *ctx = si_shader_context(bld_base);
|
||||
struct gallivm_state *gallivm = &ctx->gallivm;
|
||||
@@ -3417,14 +3440,30 @@ static LLVMValueRef image_fetch_coords(
|
||||
coords[chan] = tmp;
|
||||
}
|
||||
|
||||
/* 1D textures are allocated and used as 2D on GFX9. */
|
||||
if (ctx->screen->b.chip_class >= GFX9) {
|
||||
/* 1D textures are allocated and used as 2D on GFX9. */
|
||||
if (target == TGSI_TEXTURE_1D) {
|
||||
coords[1] = ctx->i32_0;
|
||||
num_coords++;
|
||||
} else if (target == TGSI_TEXTURE_1D_ARRAY) {
|
||||
coords[2] = coords[1];
|
||||
coords[1] = ctx->i32_0;
|
||||
num_coords++;
|
||||
} else if (target == TGSI_TEXTURE_2D) {
|
||||
/* The hw can't bind a slice of a 3D image as a 2D
|
||||
* image, because it ignores BASE_ARRAY if the target
|
||||
* is 3D. The workaround is to read BASE_ARRAY and set
|
||||
* it as the 3rd address operand for all 2D images.
|
||||
*/
|
||||
LLVMValueRef first_layer, const5, mask;
|
||||
|
||||
const5 = LLVMConstInt(ctx->i32, 5, 0);
|
||||
mask = LLVMConstInt(ctx->i32, S_008F24_BASE_ARRAY(~0), 0);
|
||||
first_layer = LLVMBuildExtractElement(builder, desc, const5, "");
|
||||
first_layer = LLVMBuildAnd(builder, first_layer, mask, "");
|
||||
|
||||
coords[2] = first_layer;
|
||||
num_coords++;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3539,7 +3578,7 @@ static void load_fetch_args(
|
||||
LLVMValueRef coords;
|
||||
|
||||
image_fetch_rsrc(bld_base, &inst->Src[0], false, target, &rsrc);
|
||||
coords = image_fetch_coords(bld_base, inst, 1);
|
||||
coords = image_fetch_coords(bld_base, inst, 1, rsrc);
|
||||
|
||||
if (target == TGSI_TEXTURE_BUFFER) {
|
||||
buffer_append_args(ctx, emit_data, rsrc, coords,
|
||||
@@ -3814,16 +3853,15 @@ static void store_fetch_args(
|
||||
*/
|
||||
bool force_glc = ctx->screen->b.chip_class == SI;
|
||||
|
||||
coords = image_fetch_coords(bld_base, inst, 0);
|
||||
image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
|
||||
coords = image_fetch_coords(bld_base, inst, 0, rsrc);
|
||||
|
||||
if (target == TGSI_TEXTURE_BUFFER) {
|
||||
image_fetch_rsrc(bld_base, &memory, true, target, &rsrc);
|
||||
buffer_append_args(ctx, emit_data, rsrc, coords,
|
||||
ctx->i32_0, false, force_glc);
|
||||
} else {
|
||||
emit_data->args[1] = coords;
|
||||
image_fetch_rsrc(bld_base, &memory, true, target,
|
||||
&emit_data->args[2]);
|
||||
emit_data->args[2] = rsrc;
|
||||
emit_data->args[3] = LLVMConstInt(ctx->i32, 15, 0); /* dmask */
|
||||
emit_data->arg_count = 4;
|
||||
|
||||
@@ -4027,7 +4065,7 @@ static void atomic_fetch_args(
|
||||
LLVMValueRef coords;
|
||||
|
||||
image_fetch_rsrc(bld_base, &inst->Src[0], true, target, &rsrc);
|
||||
coords = image_fetch_coords(bld_base, inst, 1);
|
||||
coords = image_fetch_coords(bld_base, inst, 1, rsrc);
|
||||
|
||||
if (target == TGSI_TEXTURE_BUFFER) {
|
||||
buffer_append_args(ctx, emit_data, rsrc, coords,
|
||||
|
@@ -2976,7 +2976,40 @@ si_make_buffer_descriptor(struct si_screen *screen, struct r600_resource *buf,
|
||||
num_records = size / stride;
|
||||
num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride);
|
||||
|
||||
if (screen->b.chip_class == VI)
|
||||
/* The NUM_RECORDS field has a different meaning depending on the chip,
|
||||
* instruction type, STRIDE, and SWIZZLE_ENABLE.
|
||||
*
|
||||
* SI-CIK:
|
||||
* - If STRIDE == 0, it's in byte units.
|
||||
* - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN.
|
||||
*
|
||||
* VI:
|
||||
* - For SMEM and STRIDE == 0, it's in byte units.
|
||||
* - For SMEM and STRIDE != 0, it's in units of STRIDE.
|
||||
* - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units.
|
||||
* - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE.
|
||||
* NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_-
|
||||
* ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when
|
||||
* using SMEM. This can be done in the shader by clearing STRIDE with s_and.
|
||||
* That way the same descriptor can be used by both SMEM and VMEM.
|
||||
*
|
||||
* GFX9:
|
||||
* - For SMEM and STRIDE == 0, it's in byte units.
|
||||
* - For SMEM and STRIDE != 0, it's in units of STRIDE.
|
||||
* - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units.
|
||||
* - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE.
|
||||
*/
|
||||
if (screen->b.chip_class >= GFX9)
|
||||
/* When vindex == 0, LLVM sets IDXEN = 0, thus changing units
|
||||
* from STRIDE to bytes. This works around it by setting
|
||||
* NUM_RECORDS to at least the size of one element, so that
|
||||
* the first element is readable when IDXEN == 0.
|
||||
*
|
||||
* TODO: Fix this in LLVM, but do we need a new intrinsic where
|
||||
* IDXEN is enforced?
|
||||
*/
|
||||
num_records = num_records ? MAX2(num_records, stride) : 0;
|
||||
else if (screen->b.chip_class == VI)
|
||||
num_records *= stride;
|
||||
|
||||
state[4] = 0;
|
||||
@@ -3156,7 +3189,8 @@ si_make_texture_descriptor(struct si_screen *screen,
|
||||
if (!sampler &&
|
||||
(res->target == PIPE_TEXTURE_CUBE ||
|
||||
res->target == PIPE_TEXTURE_CUBE_ARRAY ||
|
||||
res->target == PIPE_TEXTURE_3D)) {
|
||||
(screen->b.chip_class <= VI &&
|
||||
res->target == PIPE_TEXTURE_3D))) {
|
||||
/* For the purpose of shader images, treat cube maps and 3D
|
||||
* textures as 2D arrays. For 3D textures, the address
|
||||
* calculations for mipmaps are different, so we rely on the
|
||||
@@ -4527,15 +4561,33 @@ static void si_init_config(struct si_context *sctx)
|
||||
RADEON_PRIO_BORDER_COLORS);
|
||||
|
||||
if (sctx->b.chip_class >= GFX9) {
|
||||
si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 0);
|
||||
unsigned num_se = sscreen->b.info.max_se;
|
||||
unsigned pc_lines = 0;
|
||||
|
||||
switch (sctx->b.family) {
|
||||
case CHIP_VEGA10:
|
||||
pc_lines = 4096;
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
pc_lines = 1024;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL,
|
||||
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
|
||||
si_pm4_set_reg(pm4, R_028064_DB_RENDER_FILTER, 0);
|
||||
/* TODO: We can use this to disable RBs for rendering to GART: */
|
||||
si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
|
||||
si_pm4_set_reg(pm4, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
|
||||
/* TODO: Enable the binner: */
|
||||
si_pm4_set_reg(pm4, R_028C44_PA_SC_BINNER_CNTL_0,
|
||||
S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC));
|
||||
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 0);
|
||||
S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
|
||||
S_028C44_DISABLE_START_OF_PRIM(1));
|
||||
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
|
||||
S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
|
||||
S_028C48_MAX_PRIM_PER_BATCH(1023));
|
||||
si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
|
||||
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
|
||||
si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0);
|
||||
|
@@ -174,6 +174,20 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
if (sctx->b.chip_class == SI) {
|
||||
unsigned one_wave = 64 / MAX2(num_tcs_input_cp, num_tcs_output_cp);
|
||||
*num_patches = MIN2(*num_patches, one_wave);
|
||||
|
||||
if (sctx->screen->b.info.max_se == 1) {
|
||||
/* The VGT HS block increments the patch ID unconditionally
|
||||
* within a single threadgroup. This results in incorrect
|
||||
* patch IDs when instanced draws are used.
|
||||
*
|
||||
* The intended solution is to restrict threadgroups to
|
||||
* a single instance by setting SWITCH_ON_EOI, which
|
||||
* should cause IA to split instances up. However, this
|
||||
* doesn't work correctly on SI when there is no other
|
||||
* SE to switch to.
|
||||
*/
|
||||
*num_patches = 1;
|
||||
}
|
||||
}
|
||||
|
||||
sctx->last_num_patches = *num_patches;
|
||||
@@ -291,13 +305,15 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
|
||||
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
|
||||
if (sscreen->has_distributed_tess) {
|
||||
if (key->u.uses_gs) {
|
||||
partial_es_wave = true;
|
||||
if (sscreen->b.chip_class <= VI)
|
||||
partial_es_wave = true;
|
||||
|
||||
/* GPU hang workaround. */
|
||||
if (sscreen->b.family == CHIP_TONGA ||
|
||||
sscreen->b.family == CHIP_FIJI ||
|
||||
sscreen->b.family == CHIP_POLARIS10 ||
|
||||
sscreen->b.family == CHIP_POLARIS11)
|
||||
sscreen->b.family == CHIP_POLARIS11 ||
|
||||
sscreen->b.family == CHIP_POLARIS12)
|
||||
partial_vs_wave = true;
|
||||
} else {
|
||||
partial_vs_wave = true;
|
||||
@@ -371,7 +387,7 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
|
||||
}
|
||||
|
||||
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
|
||||
if (ia_switch_on_eoi)
|
||||
if (sscreen->b.chip_class <= VI && ia_switch_on_eoi)
|
||||
partial_es_wave = true;
|
||||
|
||||
return S_028AA8_SWITCH_ON_EOP(ia_switch_on_eop) |
|
||||
@@ -379,7 +395,8 @@ si_get_init_multi_vgt_param(struct si_screen *sscreen,
|
||||
S_028AA8_PARTIAL_VS_WAVE_ON(partial_vs_wave) |
|
||||
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
|
||||
S_028AA8_WD_SWITCH_ON_EOP(sscreen->b.chip_class >= CIK ? wd_switch_on_eop : 0) |
|
||||
S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class >= VI ?
|
||||
/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
|
||||
S_028AA8_MAX_PRIMGRP_IN_WAVE(sscreen->b.chip_class == VI ?
|
||||
max_primgroup_in_wave : 0) |
|
||||
S_030960_EN_INST_OPT_BASIC(sscreen->b.chip_class >= GFX9) |
|
||||
S_030960_EN_INST_OPT_ADV(sscreen->b.chip_class >= GFX9);
|
||||
|
@@ -655,8 +655,17 @@ static void si_shader_vs(struct si_screen *sscreen, struct si_shader *shader,
|
||||
* not sent again.
|
||||
*/
|
||||
if (!gs) {
|
||||
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE,
|
||||
S_028A40_MODE(enable_prim_id ? V_028A40_GS_SCENARIO_A : 0));
|
||||
unsigned mode = 0;
|
||||
|
||||
/* PrimID needs GS scenario A.
|
||||
* GFX9 also needs it when ViewportIndex is enabled.
|
||||
*/
|
||||
if (enable_prim_id ||
|
||||
(sscreen->b.chip_class >= GFX9 &&
|
||||
shader->selector->info.writes_viewport_index))
|
||||
mode = V_028A40_GS_SCENARIO_A;
|
||||
|
||||
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, S_028A40_MODE(mode));
|
||||
si_pm4_set_reg(pm4, R_028A84_VGT_PRIMITIVEID_EN, enable_prim_id);
|
||||
} else {
|
||||
si_pm4_set_reg(pm4, R_028A40_VGT_GS_MODE, si_vgt_gs_mode(gs));
|
||||
@@ -1804,6 +1813,19 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
|
||||
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
||||
}
|
||||
|
||||
static void si_update_tcs_tes_uses_prim_id(struct si_context *sctx)
|
||||
{
|
||||
sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id =
|
||||
(sctx->tes_shader.cso &&
|
||||
sctx->tes_shader.cso->info.uses_primid) ||
|
||||
(sctx->tcs_shader.cso &&
|
||||
sctx->tcs_shader.cso->info.uses_primid) ||
|
||||
(sctx->gs_shader.cso &&
|
||||
sctx->gs_shader.cso->info.uses_primid) ||
|
||||
(sctx->ps_shader.cso && !sctx->gs_shader.cso &&
|
||||
sctx->ps_shader.cso->info.uses_primid);
|
||||
}
|
||||
|
||||
static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
@@ -1820,20 +1842,14 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
|
||||
si_mark_atom_dirty(sctx, &sctx->clip_regs);
|
||||
sctx->last_rast_prim = -1; /* reset this so that it gets updated */
|
||||
|
||||
if (enable_changed)
|
||||
if (enable_changed) {
|
||||
si_shader_change_notify(sctx);
|
||||
if (sctx->ia_multi_vgt_param_key.u.uses_tess)
|
||||
si_update_tcs_tes_uses_prim_id(sctx);
|
||||
}
|
||||
r600_update_vs_writes_viewport_index(&sctx->b, si_get_vs_info(sctx));
|
||||
}
|
||||
|
||||
static void si_update_tcs_tes_uses_prim_id(struct si_context *sctx)
|
||||
{
|
||||
sctx->ia_multi_vgt_param_key.u.tcs_tes_uses_prim_id =
|
||||
(sctx->tes_shader.cso &&
|
||||
sctx->tes_shader.cso->info.uses_primid) ||
|
||||
(sctx->tcs_shader.cso &&
|
||||
sctx->tcs_shader.cso->info.uses_primid);
|
||||
}
|
||||
|
||||
static void si_bind_tcs_shader(struct pipe_context *ctx, void *state)
|
||||
{
|
||||
struct si_context *sctx = (struct si_context *)ctx;
|
||||
@@ -1888,6 +1904,8 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
|
||||
sctx->ps_shader.cso = sel;
|
||||
sctx->ps_shader.current = sel ? sel->first_variant : NULL;
|
||||
sctx->do_update_shaders = true;
|
||||
if (sel && sctx->ia_multi_vgt_param_key.u.uses_tess)
|
||||
si_update_tcs_tes_uses_prim_id(sctx);
|
||||
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
|
||||
}
|
||||
|
||||
@@ -2115,7 +2133,10 @@ static bool si_update_gs_ring_buffers(struct si_context *sctx)
|
||||
unsigned num_se = sctx->screen->b.info.max_se;
|
||||
unsigned wave_size = 64;
|
||||
unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */
|
||||
unsigned gs_vertex_reuse = 16 * num_se; /* GS_VERTEX_REUSE register (per SE) */
|
||||
/* On SI-CI, the value comes from VGT_GS_VERTEX_REUSE = 16.
|
||||
* On VI+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2).
|
||||
*/
|
||||
unsigned gs_vertex_reuse = (sctx->b.chip_class >= VI ? 32 : 16) * num_se;
|
||||
unsigned alignment = 256 * num_se;
|
||||
/* The maximum size is 63.999 MB per SE. */
|
||||
unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se;
|
||||
@@ -2542,6 +2563,9 @@ static void si_update_vgt_shader_config(struct si_context *sctx)
|
||||
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
|
||||
}
|
||||
|
||||
if (sctx->b.chip_class >= GFX9)
|
||||
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
|
||||
|
||||
si_pm4_set_reg(*pm4, R_028B54_VGT_SHADER_STAGES_EN, stages);
|
||||
}
|
||||
si_pm4_bind_state(sctx, vgt_shader_config, *pm4);
|
||||
|
@@ -56,6 +56,7 @@ BUILT_SOURCES = \
|
||||
rasterizer/codegen/gen_knobs.cpp \
|
||||
rasterizer/codegen/gen_knobs.h \
|
||||
rasterizer/jitter/gen_state_llvm.h \
|
||||
rasterizer/jitter/gen_builder.hpp \
|
||||
rasterizer/jitter/gen_builder_x86.hpp \
|
||||
rasterizer/archrast/gen_ar_event.hpp \
|
||||
rasterizer/archrast/gen_ar_event.cpp \
|
||||
@@ -168,20 +169,6 @@ COMMON_LDFLAGS = \
|
||||
$(LLVM_LDFLAGS)
|
||||
|
||||
|
||||
# XXX: As we cannot use BUILT_SOURCES (the files will end up in the dist
|
||||
# tarball) just annotate the dependency directly.
|
||||
# As the single direct user of gen_builder.hpp is a header (builder.h) trace all
|
||||
# the translusive users (one that use the latter header).
|
||||
rasterizer/jitter/blend_jit.cpp: rasterizer/jitter/gen_builder.hpp
|
||||
rasterizer/jitter/builder.cpp: rasterizer/jitter/gen_builder.hpp
|
||||
rasterizer/jitter/builder_misc.cpp: rasterizer/jitter/gen_builder.hpp
|
||||
rasterizer/jitter/fetch_jit.cpp: rasterizer/jitter/gen_builder.hpp
|
||||
rasterizer/jitter/streamout_jit.cpp: rasterizer/jitter/gen_builder.hpp
|
||||
swr_shader.cpp: rasterizer/jitter/gen_builder.hpp
|
||||
|
||||
CLEANFILES = \
|
||||
rasterizer/jitter/gen_builder.hpp
|
||||
|
||||
lib_LTLIBRARIES = libswrAVX.la libswrAVX2.la
|
||||
|
||||
libswrAVX_la_CXXFLAGS = \
|
||||
@@ -192,14 +179,6 @@ libswrAVX_la_CXXFLAGS = \
|
||||
libswrAVX_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
|
||||
# XXX: Don't ship these generated sources for now, since they are specific
|
||||
# to the LLVM version they are generated from. Thus a release tarball
|
||||
# containing the said files, generated against eg. LLVM 3.8 will fail to build
|
||||
# on systems with other versions of LLVM eg. 3.7 or 3.6.
|
||||
# Move these back to BUILT_SOURCES once that is resolved.
|
||||
nodist_libswrAVX_la_SOURCES = \
|
||||
rasterizer/jitter/gen_builder.hpp
|
||||
|
||||
libswrAVX_la_LIBADD = \
|
||||
$(COMMON_LIBADD)
|
||||
|
||||
@@ -214,14 +193,6 @@ libswrAVX2_la_CXXFLAGS = \
|
||||
libswrAVX2_la_SOURCES = \
|
||||
$(COMMON_SOURCES)
|
||||
|
||||
# XXX: Don't ship these generated sources for now, since they are specific
|
||||
# to the LLVM version they are generated from. Thus a release tarball
|
||||
# containing the said files, generated against eg. LLVM 3.8 will fail to build
|
||||
# on systems with other versions of LLVM eg. 3.7 or 3.6.
|
||||
# Move these back to BUILT_SOURCES once that is resolved.
|
||||
nodist_libswrAVX2_la_SOURCES = \
|
||||
rasterizer/jitter/gen_builder.hpp
|
||||
|
||||
libswrAVX2_la_LIBADD = \
|
||||
$(COMMON_LIBADD)
|
||||
|
||||
@@ -230,6 +201,16 @@ libswrAVX2_la_LDFLAGS = \
|
||||
|
||||
include $(top_srcdir)/install-gallium-links.mk
|
||||
|
||||
# Generated gen_builder.hpp is not backwards compatible. So ship only one
|
||||
# created with the oldest supported version of LLVM.
|
||||
dist-hook:
|
||||
if SWR_INVALID_LLVM_VERSION
|
||||
@echo "*******************************************************"
|
||||
@echo "LLVM 3.9.0 or LLVM 3.9.1 required to create the tarball"
|
||||
@echo "*******************************************************"
|
||||
@test
|
||||
endif
|
||||
|
||||
EXTRA_DIST = \
|
||||
SConscript \
|
||||
rasterizer/archrast/events.proto \
|
||||
|
@@ -1133,6 +1133,64 @@ struct StoreRasterTile
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Resolves an 8x8 raster tile to the resolve destination surface.
|
||||
/// @param pSrc - Pointer to raster tile.
|
||||
/// @param pDstSurface - Destination surface state
|
||||
/// @param x, y - Coordinates to raster tile.
|
||||
/// @param sampleOffset - Offset between adjacent multisamples
|
||||
INLINE static void Resolve(
|
||||
uint8_t *pSrc,
|
||||
SWR_SURFACE_STATE* pDstSurface,
|
||||
uint32_t x, uint32_t y, uint32_t sampleOffset, uint32_t renderTargetArrayIndex) // (x, y) pixel coordinate to start of raster tile.
|
||||
{
|
||||
uint32_t lodWidth = std::max(pDstSurface->width >> pDstSurface->lod, 1U);
|
||||
uint32_t lodHeight = std::max(pDstSurface->height >> pDstSurface->lod, 1U);
|
||||
|
||||
float oneOverNumSamples = 1.0f / pDstSurface->numSamples;
|
||||
|
||||
// For each raster tile pixel (rx, ry)
|
||||
for (uint32_t ry = 0; ry < KNOB_TILE_Y_DIM; ++ry)
|
||||
{
|
||||
for (uint32_t rx = 0; rx < KNOB_TILE_X_DIM; ++rx)
|
||||
{
|
||||
// Perform bounds checking.
|
||||
if (((x + rx) < lodWidth) &&
|
||||
((y + ry) < lodHeight))
|
||||
{
|
||||
// Sum across samples
|
||||
float resolveColor[4] = {0};
|
||||
for (uint32_t sampleNum = 0; sampleNum < pDstSurface->numSamples; sampleNum++)
|
||||
{
|
||||
float sampleColor[4] = {0};
|
||||
uint8_t *pSampleSrc = pSrc + sampleOffset * sampleNum;
|
||||
GetSwizzledSrcColor(pSampleSrc, rx, ry, sampleColor);
|
||||
resolveColor[0] += sampleColor[0];
|
||||
resolveColor[1] += sampleColor[1];
|
||||
resolveColor[2] += sampleColor[2];
|
||||
resolveColor[3] += sampleColor[3];
|
||||
}
|
||||
|
||||
// Divide by numSamples to average
|
||||
resolveColor[0] *= oneOverNumSamples;
|
||||
resolveColor[1] *= oneOverNumSamples;
|
||||
resolveColor[2] *= oneOverNumSamples;
|
||||
resolveColor[3] *= oneOverNumSamples;
|
||||
|
||||
// Use the resolve surface state
|
||||
SWR_SURFACE_STATE* pResolveSurface = (SWR_SURFACE_STATE*)pDstSurface->pAuxBaseAddress;
|
||||
uint8_t *pDst = (uint8_t*)ComputeSurfaceAddress<false, false>((x + rx), (y + ry),
|
||||
pResolveSurface->arrayIndex + renderTargetArrayIndex, pResolveSurface->arrayIndex + renderTargetArrayIndex,
|
||||
0, pResolveSurface->lod, pResolveSurface);
|
||||
{
|
||||
ConvertPixelFromFloat<DstFormat>(pDst, resolveColor);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename TTraits, SWR_FORMAT SrcFormat, SWR_FORMAT DstFormat>
|
||||
@@ -2316,6 +2374,9 @@ struct StoreMacroTile
|
||||
pfnStore[sampleNum] = (bForceGeneric || KNOB_USE_GENERIC_STORETILE) ? StoreRasterTile<TTraits, SrcFormat, DstFormat>::Store : OptStoreRasterTile<TTraits, SrcFormat, DstFormat>::Store;
|
||||
}
|
||||
|
||||
// Save original for pSrcHotTile resolve.
|
||||
uint8_t *pResolveSrcHotTile = pSrcHotTile;
|
||||
|
||||
// Store each raster tile from the hot tile to the destination surface.
|
||||
for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
|
||||
{
|
||||
@@ -2328,6 +2389,20 @@ struct StoreMacroTile
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (pDstSurface->pAuxBaseAddress)
|
||||
{
|
||||
uint32_t sampleOffset = KNOB_TILE_X_DIM * KNOB_TILE_Y_DIM * (FormatTraits<SrcFormat>::bpp / 8);
|
||||
// Store each raster tile from the hot tile to the destination surface.
|
||||
for(uint32_t row = 0; row < KNOB_MACROTILE_Y_DIM; row += KNOB_TILE_Y_DIM)
|
||||
{
|
||||
for(uint32_t col = 0; col < KNOB_MACROTILE_X_DIM; col += KNOB_TILE_X_DIM)
|
||||
{
|
||||
StoreRasterTile<TTraits, SrcFormat, DstFormat>::Resolve(pResolveSrcHotTile, pDstSurface, (x + col), (y + row), sampleOffset, renderTargetArrayIndex);
|
||||
pResolveSrcHotTile += sampleOffset * pDstSurface->numSamples;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
@@ -267,65 +267,6 @@ swr_resource_copy(struct pipe_context *pipe,
|
||||
}
|
||||
|
||||
|
||||
/* XXX: This resolve is incomplete and suboptimal. It will be removed once the
|
||||
* pipelined resolve blit works. */
|
||||
void
|
||||
swr_do_msaa_resolve(struct pipe_resource *src_resource,
|
||||
struct pipe_resource *dst_resource)
|
||||
{
|
||||
/* This is a pretty dumb inline resolve. It only supports 8-bit formats
|
||||
* (ex RGBA8/BGRA8) - which are most common display formats anyway.
|
||||
*/
|
||||
|
||||
/* quick check for 8-bit and number of components */
|
||||
uint8_t bits_per_component =
|
||||
util_format_get_component_bits(src_resource->format,
|
||||
UTIL_FORMAT_COLORSPACE_RGB, 0);
|
||||
|
||||
/* Unsupported resolve format */
|
||||
assert(src_resource->format == dst_resource->format);
|
||||
assert(bits_per_component == 8);
|
||||
if ((src_resource->format != dst_resource->format) ||
|
||||
(bits_per_component != 8)) {
|
||||
return;
|
||||
}
|
||||
|
||||
uint8_t src_num_comps = util_format_get_nr_components(src_resource->format);
|
||||
|
||||
SWR_SURFACE_STATE *src_surface = &swr_resource(src_resource)->swr;
|
||||
SWR_SURFACE_STATE *dst_surface = &swr_resource(dst_resource)->swr;
|
||||
|
||||
uint32_t *src, *dst, offset;
|
||||
uint32_t num_samples = src_surface->numSamples;
|
||||
float recip_num_samples = 1.0f / num_samples;
|
||||
for (uint32_t y = 0; y < src_surface->height; y++) {
|
||||
for (uint32_t x = 0; x < src_surface->width; x++) {
|
||||
float r = 0.0f;
|
||||
float g = 0.0f;
|
||||
float b = 0.0f;
|
||||
float a = 0.0f;
|
||||
for (uint32_t sampleNum = 0; sampleNum < num_samples; sampleNum++) {
|
||||
offset = ComputeSurfaceOffset<false>(x, y, 0, 0, sampleNum, 0, src_surface);
|
||||
src = (uint32_t *) src_surface->pBaseAddress + offset/src_num_comps;
|
||||
const uint32_t sample = *src;
|
||||
r += (float)((sample >> 24) & 0xff) / 255.0f * recip_num_samples;
|
||||
g += (float)((sample >> 16) & 0xff) / 255.0f * recip_num_samples;
|
||||
b += (float)((sample >> 8) & 0xff) / 255.0f * recip_num_samples;
|
||||
a += (float)((sample ) & 0xff) / 255.0f * recip_num_samples;
|
||||
}
|
||||
uint32_t result = 0;
|
||||
result = ((uint8_t)(r * 255.0f) & 0xff) << 24;
|
||||
result |= ((uint8_t)(g * 255.0f) & 0xff) << 16;
|
||||
result |= ((uint8_t)(b * 255.0f) & 0xff) << 8;
|
||||
result |= ((uint8_t)(a * 255.0f) & 0xff);
|
||||
offset = ComputeSurfaceOffset<false>(x, y, 0, 0, 0, 0, src_surface);
|
||||
dst = (uint32_t *) dst_surface->pBaseAddress + offset/src_num_comps;
|
||||
*dst = result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
|
||||
{
|
||||
@@ -342,28 +283,14 @@ swr_blit(struct pipe_context *pipe, const struct pipe_blit_info *blit_info)
|
||||
debug_printf("swr_blit: color resolve : %d -> %d\n",
|
||||
info.src.resource->nr_samples, info.dst.resource->nr_samples);
|
||||
|
||||
/* Because the resolve is being done inline (not pipelined),
|
||||
* resources need to be stored out of hottiles and the pipeline empty.
|
||||
*
|
||||
* Resources are marked unused following fence finish because all
|
||||
* pipeline operations are complete. Validation of the blit will mark
|
||||
* them are read/write again.
|
||||
*/
|
||||
/* Resolve is done as part of the surface store. */
|
||||
swr_store_dirty_resource(pipe, info.src.resource, SWR_TILE_RESOLVED);
|
||||
swr_store_dirty_resource(pipe, info.dst.resource, SWR_TILE_RESOLVED);
|
||||
swr_fence_finish(pipe->screen, NULL, swr_screen(pipe->screen)->flush_fence, 0);
|
||||
swr_resource_unused(info.src.resource);
|
||||
swr_resource_unused(info.dst.resource);
|
||||
|
||||
struct pipe_resource *src_resource = info.src.resource;
|
||||
struct pipe_resource *resolve_target =
|
||||
swr_resource(src_resource)->resolve_target;
|
||||
|
||||
/* Inline resolve samples into resolve target resource, then continue
|
||||
* the blit. */
|
||||
swr_do_msaa_resolve(src_resource, resolve_target);
|
||||
|
||||
/* The resolve target becomes the new source for the blit. */
|
||||
/* The resolve target becomes the new source for the blit. */
|
||||
info.src.resource = resolve_target;
|
||||
}
|
||||
|
||||
|
@@ -891,6 +891,10 @@ swr_create_resolve_resource(struct pipe_screen *_screen,
|
||||
|
||||
/* Attach it to the multisample resource */
|
||||
msaa_res->resolve_target = alt;
|
||||
|
||||
/* Hang resolve surface state off the multisample surface state to so
|
||||
* StoreTiles knows where to resolve the surface. */
|
||||
msaa_res->swr.pAuxBaseAddress = (uint8_t *)&swr_resource(alt)->swr;
|
||||
}
|
||||
|
||||
return true; /* success */
|
||||
@@ -1009,14 +1013,10 @@ swr_flush_frontbuffer(struct pipe_screen *p_screen,
|
||||
SwrEndFrame(swr_context(pipe)->swrContext);
|
||||
}
|
||||
|
||||
/* Multisample surfaces need to be resolved before present */
|
||||
/* Multisample resolved into resolve_target at flush with store_resource */
|
||||
if (pipe && spr->swr.numSamples > 1) {
|
||||
struct pipe_resource *resolve_target = spr->resolve_target;
|
||||
|
||||
/* Do an inline surface resolve into the resolve target resource
|
||||
* XXX: This works, just not optimal. Work on using a pipelined blit. */
|
||||
swr_do_msaa_resolve(resource, resolve_target);
|
||||
|
||||
/* Once resolved, copy into display target */
|
||||
SWR_SURFACE_STATE *resolve = &swr_resource(resolve_target)->swr;
|
||||
|
||||
|
@@ -165,7 +165,8 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
|
||||
prsc->width0 == box->width &&
|
||||
prsc->height0 == box->height &&
|
||||
prsc->depth0 == box->depth &&
|
||||
prsc->array_size == 1) {
|
||||
prsc->array_size == 1 &&
|
||||
rsc->bo->private) {
|
||||
usage |= PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE;
|
||||
}
|
||||
|
||||
|
@@ -61,7 +61,7 @@ static void
|
||||
vc4_load_utile(void *cpu, void *gpu, uint32_t cpu_stride, uint32_t cpp)
|
||||
{
|
||||
uint32_t gpu_stride = vc4_utile_stride(cpp);
|
||||
#if defined(VC4_BUILD_NEON) && defined(__ARM_ARCH)
|
||||
#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
|
||||
if (gpu_stride == 8) {
|
||||
__asm__ volatile (
|
||||
/* Load from the GPU in one shot, no interleave, to
|
||||
@@ -118,7 +118,7 @@ vc4_store_utile(void *gpu, void *cpu, uint32_t cpu_stride, uint32_t cpp)
|
||||
{
|
||||
uint32_t gpu_stride = vc4_utile_stride(cpp);
|
||||
|
||||
#if defined(VC4_BUILD_NEON) && defined(__ARM_ARCH)
|
||||
#if defined(VC4_BUILD_NEON) && defined(PIPE_ARCH_ARM)
|
||||
if (gpu_stride == 8) {
|
||||
__asm__ volatile (
|
||||
/* Load each 8-byte line from cpu-side source,
|
||||
|
@@ -49,7 +49,7 @@ namespace clover {
|
||||
build_module_library(const ::llvm::Module &mod,
|
||||
enum module::section::type section_type);
|
||||
|
||||
std::unique_ptr<::llvm::Module>
|
||||
std::unique_ptr< ::llvm::Module>
|
||||
parse_module_library(const module &m, ::llvm::LLVMContext &ctx,
|
||||
std::string &r_log);
|
||||
|
||||
|
@@ -94,7 +94,7 @@ clover::llvm::build_module_library(const ::llvm::Module &mod,
|
||||
return m;
|
||||
}
|
||||
|
||||
std::unique_ptr<::llvm::Module>
|
||||
std::unique_ptr< ::llvm::Module>
|
||||
clover::llvm::parse_module_library(const module &m, ::llvm::LLVMContext &ctx,
|
||||
std::string &r_log) {
|
||||
auto mod = ::llvm::parseBitcodeFile(::llvm::MemoryBufferRef(
|
||||
@@ -104,5 +104,5 @@ clover::llvm::parse_module_library(const module &m, ::llvm::LLVMContext &ctx,
|
||||
fail(r_log, error(CL_INVALID_PROGRAM), s);
|
||||
});
|
||||
|
||||
return std::unique_ptr<::llvm::Module>(std::move(*mod));
|
||||
return std::unique_ptr< ::llvm::Module>(std::move(*mod));
|
||||
}
|
||||
|
@@ -121,12 +121,12 @@ namespace {
|
||||
} else {
|
||||
// Other types.
|
||||
const auto actual_type =
|
||||
isa<::llvm::PointerType>(arg_type) && arg.hasByValAttr() ?
|
||||
cast<::llvm::PointerType>(arg_type)->getElementType() : arg_type;
|
||||
isa< ::llvm::PointerType>(arg_type) && arg.hasByValAttr() ?
|
||||
cast< ::llvm::PointerType>(arg_type)->getElementType() : arg_type;
|
||||
|
||||
if (actual_type->isPointerTy()) {
|
||||
const unsigned address_space =
|
||||
cast<::llvm::PointerType>(actual_type)->getAddressSpace();
|
||||
cast< ::llvm::PointerType>(actual_type)->getAddressSpace();
|
||||
|
||||
if (address_space == address_spaces[clang::LangAS::opencl_local
|
||||
- compat::lang_as_offset]) {
|
||||
|
@@ -156,7 +156,7 @@ clover::llvm::print_module_native(const ::llvm::Module &mod,
|
||||
const target &target) {
|
||||
std::string log;
|
||||
try {
|
||||
std::unique_ptr<::llvm::Module> cmod { CloneModule(&mod) };
|
||||
std::unique_ptr< ::llvm::Module> cmod { CloneModule(&mod) };
|
||||
return as_string(emit_code(*cmod, target,
|
||||
TargetMachine::CGFT_AssemblyFile, log));
|
||||
} catch (...) {
|
||||
|
@@ -132,18 +132,18 @@ namespace clover {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline std::unique_ptr<::llvm::Linker>
|
||||
inline std::unique_ptr< ::llvm::Linker>
|
||||
create_linker(::llvm::Module &mod) {
|
||||
#if HAVE_LLVM >= 0x0308
|
||||
return std::unique_ptr<::llvm::Linker>(new ::llvm::Linker(mod));
|
||||
return std::unique_ptr< ::llvm::Linker>(new ::llvm::Linker(mod));
|
||||
#else
|
||||
return std::unique_ptr<::llvm::Linker>(new ::llvm::Linker(&mod));
|
||||
return std::unique_ptr< ::llvm::Linker>(new ::llvm::Linker(&mod));
|
||||
#endif
|
||||
}
|
||||
|
||||
inline bool
|
||||
link_in_module(::llvm::Linker &linker,
|
||||
std::unique_ptr<::llvm::Module> mod) {
|
||||
std::unique_ptr< ::llvm::Module> mod) {
|
||||
#if HAVE_LLVM >= 0x0308
|
||||
return linker.linkInModule(std::move(mod));
|
||||
#else
|
||||
|
@@ -51,7 +51,7 @@ namespace clover {
|
||||
is_kernel_node_for(const ::llvm::Function &f) {
|
||||
return [&](const ::llvm::MDNode *n) {
|
||||
using ::llvm::mdconst::dyn_extract;
|
||||
return &f == dyn_extract<::llvm::Function>(n->getOperand(0));
|
||||
return &f == dyn_extract< ::llvm::Function>(n->getOperand(0));
|
||||
};
|
||||
}
|
||||
|
||||
@@ -65,7 +65,7 @@ namespace clover {
|
||||
#endif
|
||||
}
|
||||
|
||||
inline iterator_range<::llvm::MDNode::op_iterator>
|
||||
inline iterator_range< ::llvm::MDNode::op_iterator>
|
||||
get_kernel_metadata_operands(const ::llvm::Function &f,
|
||||
const std::string &name) {
|
||||
#if HAVE_LLVM >= 0x0309
|
||||
@@ -79,11 +79,11 @@ namespace clover {
|
||||
const auto kernel_node = find(is_kernel_node_for(f),
|
||||
get_kernel_nodes(*f.getParent()));
|
||||
|
||||
const auto data_node = cast<::llvm::MDNode>(
|
||||
const auto data_node = cast< ::llvm::MDNode>(
|
||||
find([&](const ::llvm::MDOperand &op) {
|
||||
if (auto m = dyn_cast<::llvm::MDNode>(op))
|
||||
if (auto m = dyn_cast< ::llvm::MDNode>(op))
|
||||
if (m->getNumOperands())
|
||||
if (auto m_name = dyn_cast<::llvm::MDString>(
|
||||
if (auto m_name = dyn_cast< ::llvm::MDString>(
|
||||
m->getOperand(0).get()))
|
||||
return m_name->getString() == name;
|
||||
|
||||
@@ -106,7 +106,7 @@ namespace clover {
|
||||
get_argument_metadata(const ::llvm::Function &f,
|
||||
const ::llvm::Argument &arg,
|
||||
const std::string &name) {
|
||||
return ::llvm::cast<::llvm::MDString>(
|
||||
return ::llvm::cast< ::llvm::MDString>(
|
||||
detail::get_kernel_metadata_operands(f, name)[arg.getArgNo()])
|
||||
->getString();
|
||||
}
|
||||
|
@@ -35,7 +35,8 @@ LOCAL_CFLAGS :=
|
||||
LOCAL_SHARED_LIBRARIES := \
|
||||
libdl \
|
||||
libglapi \
|
||||
libexpat
|
||||
libexpat \
|
||||
libz
|
||||
|
||||
ifneq ($(filter freedreno,$(MESA_GPU_DRIVERS)),)
|
||||
LOCAL_CFLAGS += -DGALLIUM_FREEDRENO
|
||||
|
@@ -1,10 +1,8 @@
|
||||
include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
if HAVE_ANDROID
|
||||
if HAVE_SHARED_GLAPI
|
||||
SHARED_GLAPI_LIB = $(top_builddir)/src/mapi/shared-glapi/libglapi.la
|
||||
endif
|
||||
endif
|
||||
|
||||
AM_CFLAGS = \
|
||||
-I$(top_srcdir)/src/mapi \
|
||||
@@ -31,8 +29,10 @@ gallium_dri_la_LDFLAGS = \
|
||||
-shared \
|
||||
-shrext .so \
|
||||
-module \
|
||||
-no-undefined \
|
||||
-avoid-version \
|
||||
$(GC_SECTIONS)
|
||||
$(GC_SECTIONS) \
|
||||
$(LD_NO_UNDEFINED)
|
||||
|
||||
if HAVE_LD_VERSION_SCRIPT
|
||||
gallium_dri_la_LDFLAGS += \
|
||||
|
@@ -30,7 +30,7 @@ static const struct drm_conf_ret throttle_ret = {
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
|
@@ -24,7 +24,7 @@ static const struct drm_conf_ret throttle_ret = {
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
|
@@ -24,7 +24,7 @@ static const struct drm_conf_ret throttle_ret = {
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
|
@@ -20,7 +20,7 @@ static const struct drm_conf_ret throttle_ret = {
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
|
@@ -20,7 +20,7 @@ static const struct drm_conf_ret throttle_ret = {
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
|
@@ -26,7 +26,7 @@ static const struct drm_conf_ret throttle_ret = {
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
|
@@ -30,7 +30,7 @@ static const struct drm_conf_ret throttle_ret = {
|
||||
|
||||
static const struct drm_conf_ret share_fd_ret = {
|
||||
.type = DRM_CONF_BOOL,
|
||||
.val.val_int = true,
|
||||
.val.val_bool = true,
|
||||
};
|
||||
|
||||
static const struct drm_conf_ret *drm_configuration(enum drm_conf conf)
|
||||
|
@@ -310,10 +310,15 @@ static bool do_winsys_init(struct amdgpu_winsys *ws, int fd)
|
||||
case CHIP_POLARIS12:
|
||||
ws->family = FAMILY_VI;
|
||||
ws->rev_id = VI_POLARIS12_V_A0;
|
||||
break;
|
||||
case CHIP_VEGA10:
|
||||
ws->family = FAMILY_AI;
|
||||
ws->rev_id = AI_VEGA10_P_A0;
|
||||
break;
|
||||
case CHIP_RAVEN:
|
||||
ws->family = FAMILY_RV;
|
||||
ws->rev_id = RAVEN_A0;
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "amdgpu: Unknown family.\n");
|
||||
goto fail;
|
||||
|
@@ -200,7 +200,7 @@ dri_sw_displaytarget_display(struct sw_winsys *ws,
|
||||
|
||||
if (box) {
|
||||
void *data;
|
||||
data = dri_sw_dt->data + (dri_sw_dt->stride * box->y) + box->x * blsize;
|
||||
data = (char *)dri_sw_dt->data + (dri_sw_dt->stride * box->y) + box->x * blsize;
|
||||
dri_sw_ws->lf->put_image2(dri_drawable, data,
|
||||
box->x, box->y, box->width, box->height, dri_sw_dt->stride);
|
||||
} else {
|
||||
|
@@ -258,7 +258,12 @@ virgl_bo_transfer_put(struct virgl_winsys *vws,
|
||||
|
||||
memset(&tohostcmd, 0, sizeof(tohostcmd));
|
||||
tohostcmd.bo_handle = res->bo_handle;
|
||||
tohostcmd.box = *(struct drm_virtgpu_3d_box *)box;
|
||||
tohostcmd.box.x = box->x;
|
||||
tohostcmd.box.y = box->y;
|
||||
tohostcmd.box.z = box->z;
|
||||
tohostcmd.box.w = box->width;
|
||||
tohostcmd.box.h = box->height;
|
||||
tohostcmd.box.d = box->depth;
|
||||
tohostcmd.offset = buf_offset;
|
||||
tohostcmd.level = level;
|
||||
// tohostcmd.stride = stride;
|
||||
@@ -282,7 +287,12 @@ virgl_bo_transfer_get(struct virgl_winsys *vws,
|
||||
fromhostcmd.offset = buf_offset;
|
||||
// fromhostcmd.stride = stride;
|
||||
// fromhostcmd.layer_stride = layer_stride;
|
||||
fromhostcmd.box = *(struct drm_virtgpu_3d_box *)box;
|
||||
fromhostcmd.box.x = box->x;
|
||||
fromhostcmd.box.y = box->y;
|
||||
fromhostcmd.box.z = box->z;
|
||||
fromhostcmd.box.w = box->width;
|
||||
fromhostcmd.box.h = box->height;
|
||||
fromhostcmd.box.d = box->depth;
|
||||
return drmIoctl(vdws->fd, DRM_IOCTL_VIRTGPU_TRANSFER_FROM_HOST, &fromhostcmd);
|
||||
}
|
||||
|
||||
|
@@ -772,7 +772,7 @@ gbm_dri_bo_get_modifier(struct gbm_bo *_bo)
|
||||
&mod))
|
||||
return DRM_FORMAT_MOD_INVALID;
|
||||
|
||||
ret |= mod;
|
||||
ret |= (uint64_t)(mod & 0xffffffff);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@@ -4,6 +4,7 @@
|
||||
*/
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "glxclient.h"
|
||||
#include "glxglvnd.h"
|
||||
#include "glxglvnddispatchfuncs.h"
|
||||
#include "g_glxglvnddispatchindices.h"
|
||||
@@ -50,6 +51,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
|
||||
__ATTRIB(GetCurrentDisplayEXT),
|
||||
// glXGetCurrentDrawable implemented by libglvnd
|
||||
// glXGetCurrentReadDrawable implemented by libglvnd
|
||||
__ATTRIB(GetDriverConfig),
|
||||
// glXGetFBConfigAttrib implemented by libglvnd
|
||||
__ATTRIB(GetFBConfigAttribSGIX),
|
||||
__ATTRIB(GetFBConfigFromVisualSGIX),
|
||||
@@ -334,6 +336,17 @@ static Display *dispatch_GetCurrentDisplayEXT(void)
|
||||
|
||||
|
||||
|
||||
static const char *dispatch_GetDriverConfig(const char *driverName)
|
||||
{
|
||||
/*
|
||||
* The options are constant for a given driverName, so we do not need
|
||||
* a context (and apps expect to be able to call this without one).
|
||||
*/
|
||||
return glXGetDriverConfig(driverName);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int dispatch_GetFBConfigAttribSGIX(Display *dpy, GLXFBConfigSGIX config,
|
||||
int attribute, int *value_return)
|
||||
{
|
||||
@@ -939,6 +952,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = {
|
||||
__ATTRIB(DestroyGLXPbufferSGIX),
|
||||
__ATTRIB(GetContextIDEXT),
|
||||
__ATTRIB(GetCurrentDisplayEXT),
|
||||
__ATTRIB(GetDriverConfig),
|
||||
__ATTRIB(GetFBConfigAttribSGIX),
|
||||
__ATTRIB(GetFBConfigFromVisualSGIX),
|
||||
__ATTRIB(GetMscRateOML),
|
||||
|
@@ -39,6 +39,7 @@ typedef enum __GLXdispatchIndex {
|
||||
DI_GetCurrentDisplayEXT,
|
||||
// GetCurrentDrawable implemented by libglvnd
|
||||
// GetCurrentReadDrawable implemented by libglvnd
|
||||
DI_GetDriverConfig,
|
||||
// GetFBConfigAttrib implemented by libglvnd
|
||||
DI_GetFBConfigAttribSGIX,
|
||||
DI_GetFBConfigFromVisualSGIX,
|
||||
|
@@ -2651,7 +2651,7 @@ _GLX_PUBLIC void (*glXGetProcAddressARB(const GLubyte * procName)) (void)
|
||||
f = (gl_function) get_glx_proc_address((const char *) procName);
|
||||
if ((f == NULL) && (procName[0] == 'g') && (procName[1] == 'l')
|
||||
&& (procName[2] != 'X')) {
|
||||
#ifdef GLX_SHARED_GLAPI
|
||||
#ifdef GLX_INDIRECT_RENDERING
|
||||
f = (gl_function) __indirect_get_proc_address((const char *) procName);
|
||||
#endif
|
||||
if (!f)
|
||||
|
@@ -231,7 +231,7 @@ LOCAL_WHOLE_STATIC_LIBRARIES := \
|
||||
libmesa_intel_compiler \
|
||||
libmesa_anv_entrypoints
|
||||
|
||||
LOCAL_SHARED_LIBRARIES := libdrm
|
||||
LOCAL_SHARED_LIBRARIES := libdrm libz
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
@@ -822,12 +822,11 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
for (unsigned int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF)
|
||||
spill_costs[inst->src[i].nr] += block_scale;
|
||||
spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale;
|
||||
}
|
||||
|
||||
if (inst->dst.file == VGRF)
|
||||
spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE)
|
||||
* block_scale;
|
||||
spill_costs[inst->dst.nr] += regs_written(inst) * block_scale;
|
||||
|
||||
switch (inst->opcode) {
|
||||
|
||||
|
@@ -583,16 +583,46 @@ vec4_visitor::split_uniform_registers()
|
||||
}
|
||||
}
|
||||
|
||||
/* This function returns the register number where we placed the uniform */
|
||||
static int
|
||||
set_push_constant_loc(const int nr_uniforms, int *new_uniform_count,
|
||||
const int src, const int size, const int channel_size,
|
||||
int *new_loc, int *new_chan,
|
||||
int *new_chans_used)
|
||||
{
|
||||
int dst;
|
||||
/* Find the lowest place we can slot this uniform in. */
|
||||
for (dst = 0; dst < nr_uniforms; dst++) {
|
||||
if (ALIGN(new_chans_used[dst], channel_size) + size <= 4)
|
||||
break;
|
||||
}
|
||||
|
||||
assert(dst < nr_uniforms);
|
||||
|
||||
new_loc[src] = dst;
|
||||
new_chan[src] = ALIGN(new_chans_used[dst], channel_size);
|
||||
new_chans_used[dst] = ALIGN(new_chans_used[dst], channel_size) + size;
|
||||
|
||||
*new_uniform_count = MAX2(*new_uniform_count, dst + 1);
|
||||
return dst;
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::pack_uniform_registers()
|
||||
{
|
||||
uint8_t chans_used[this->uniforms];
|
||||
int new_loc[this->uniforms];
|
||||
int new_chan[this->uniforms];
|
||||
bool is_aligned_to_dvec4[this->uniforms];
|
||||
int new_chans_used[this->uniforms];
|
||||
int channel_sizes[this->uniforms];
|
||||
|
||||
memset(chans_used, 0, sizeof(chans_used));
|
||||
memset(new_loc, 0, sizeof(new_loc));
|
||||
memset(new_chan, 0, sizeof(new_chan));
|
||||
memset(new_chans_used, 0, sizeof(new_chans_used));
|
||||
memset(is_aligned_to_dvec4, 0, sizeof(is_aligned_to_dvec4));
|
||||
memset(channel_sizes, 0, sizeof(channel_sizes));
|
||||
|
||||
/* Find which uniform vectors are actually used by the program. We
|
||||
* expect unused vector elements when we've moved array access out
|
||||
@@ -622,7 +652,7 @@ vec4_visitor::pack_uniform_registers()
|
||||
continue;
|
||||
|
||||
assert(type_sz(inst->src[i].type) % 4 == 0);
|
||||
unsigned channel_size = type_sz(inst->src[i].type) / 4;
|
||||
int channel_size = type_sz(inst->src[i].type) / 4;
|
||||
|
||||
int reg = inst->src[i].nr;
|
||||
for (int c = 0; c < 4; c++) {
|
||||
@@ -631,10 +661,15 @@ vec4_visitor::pack_uniform_registers()
|
||||
|
||||
unsigned channel = BRW_GET_SWZ(inst->src[i].swizzle, c) + 1;
|
||||
unsigned used = MAX2(chans_used[reg], channel * channel_size);
|
||||
if (used <= 4)
|
||||
if (used <= 4) {
|
||||
chans_used[reg] = used;
|
||||
else
|
||||
channel_sizes[reg] = MAX2(channel_sizes[reg], channel_size);
|
||||
} else {
|
||||
is_aligned_to_dvec4[reg] = true;
|
||||
is_aligned_to_dvec4[reg + 1] = true;
|
||||
chans_used[reg + 1] = used - 4;
|
||||
channel_sizes[reg + 1] = MAX2(channel_sizes[reg + 1], channel_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -659,42 +694,60 @@ vec4_visitor::pack_uniform_registers()
|
||||
|
||||
int new_uniform_count = 0;
|
||||
|
||||
/* As the uniforms are going to be reordered, take the data from a temporary
|
||||
* copy of the original param[].
|
||||
*/
|
||||
gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
|
||||
stage_prog_data->nr_params);
|
||||
memcpy(param, stage_prog_data->param,
|
||||
sizeof(gl_constant_value*) * stage_prog_data->nr_params);
|
||||
|
||||
/* Now, figure out a packing of the live uniform vectors into our
|
||||
* push constants.
|
||||
* push constants. Start with dvec{3,4} because they are aligned to
|
||||
* dvec4 size (2 vec4).
|
||||
*/
|
||||
for (int src = 0; src < uniforms; src++) {
|
||||
int size = chans_used[src];
|
||||
|
||||
if (size == 0)
|
||||
if (size == 0 || !is_aligned_to_dvec4[src])
|
||||
continue;
|
||||
|
||||
int dst;
|
||||
/* Find the lowest place we can slot this uniform in. */
|
||||
for (dst = 0; dst < src; dst++) {
|
||||
if (chans_used[dst] + size <= 4)
|
||||
break;
|
||||
/* dvec3 are aligned to dvec4 size, apply the alignment of the size
|
||||
* to 4 to avoid moving last component of a dvec3 to the available
|
||||
* location at the end of a previous dvec3. These available locations
|
||||
* could be filled by smaller variables in next loop.
|
||||
*/
|
||||
size = ALIGN(size, 4);
|
||||
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
|
||||
src, size, channel_sizes[src],
|
||||
new_loc, new_chan,
|
||||
new_chans_used);
|
||||
/* Move the references to the data */
|
||||
for (int j = 0; j < size; j++) {
|
||||
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
||||
param[src * 4 + j];
|
||||
}
|
||||
|
||||
if (src == dst) {
|
||||
new_loc[src] = dst;
|
||||
new_chan[src] = 0;
|
||||
} else {
|
||||
new_loc[src] = dst;
|
||||
new_chan[src] = chans_used[dst];
|
||||
|
||||
/* Move the references to the data */
|
||||
for (int j = 0; j < size; j++) {
|
||||
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
||||
stage_prog_data->param[src * 4 + j];
|
||||
}
|
||||
|
||||
chans_used[dst] += size;
|
||||
chans_used[src] = 0;
|
||||
}
|
||||
|
||||
new_uniform_count = MAX2(new_uniform_count, dst + 1);
|
||||
}
|
||||
|
||||
/* Continue with the rest of data, which is aligned to vec4. */
|
||||
for (int src = 0; src < uniforms; src++) {
|
||||
int size = chans_used[src];
|
||||
|
||||
if (size == 0 || is_aligned_to_dvec4[src])
|
||||
continue;
|
||||
|
||||
int dst = set_push_constant_loc(uniforms, &new_uniform_count,
|
||||
src, size, channel_sizes[src],
|
||||
new_loc, new_chan,
|
||||
new_chans_used);
|
||||
/* Move the references to the data */
|
||||
for (int j = 0; j < size; j++) {
|
||||
stage_prog_data->param[dst * 4 + new_chan[src] + j] =
|
||||
param[src * 4 + j];
|
||||
}
|
||||
}
|
||||
|
||||
ralloc_free(param);
|
||||
this->uniforms = new_uniform_count;
|
||||
|
||||
/* Now, update the instructions for our repacked uniforms. */
|
||||
@@ -705,9 +758,9 @@ vec4_visitor::pack_uniform_registers()
|
||||
if (inst->src[i].file != UNIFORM)
|
||||
continue;
|
||||
|
||||
int chan = new_chan[src] / channel_sizes[src];
|
||||
inst->src[i].nr = new_loc[src];
|
||||
inst->src[i].swizzle += BRW_SWIZZLE4(new_chan[src], new_chan[src],
|
||||
new_chan[src], new_chan[src]);
|
||||
inst->src[i].swizzle += BRW_SWIZZLE4(chan, chan, chan, chan);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1071,6 +1124,13 @@ vec4_instruction::can_reswizzle(const struct gen_device_info *devinfo,
|
||||
if (devinfo->gen == 6 && is_math() && swizzle != BRW_SWIZZLE_XYZW)
|
||||
return false;
|
||||
|
||||
/* We can't swizzle implicit accumulator access. We'd have to
|
||||
* reswizzle the producer of the accumulator value in addition
|
||||
* to the consumer (i.e. both MUL and MACH). Just skip this.
|
||||
*/
|
||||
if (reads_accumulator_implicitly())
|
||||
return false;
|
||||
|
||||
if (!can_do_writemask(devinfo) && dst_writemask != WRITEMASK_XYZW)
|
||||
return false;
|
||||
|
||||
@@ -1941,6 +2001,24 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value)
|
||||
inst->mlen = 2;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_align1_df(vec4_instruction *inst)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case VEC4_OPCODE_DOUBLE_TO_F32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_D32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_U32:
|
||||
case VEC4_OPCODE_TO_DOUBLE:
|
||||
case VEC4_OPCODE_PICK_LOW_32BIT:
|
||||
case VEC4_OPCODE_PICK_HIGH_32BIT:
|
||||
case VEC4_OPCODE_SET_LOW_32BIT:
|
||||
case VEC4_OPCODE_SET_HIGH_32BIT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
vec4_visitor::convert_to_hw_regs()
|
||||
{
|
||||
@@ -1950,9 +2028,7 @@ vec4_visitor::convert_to_hw_regs()
|
||||
struct brw_reg reg;
|
||||
switch (src.file) {
|
||||
case VGRF: {
|
||||
const unsigned type_size = type_sz(src.type);
|
||||
const unsigned width = REG_SIZE / 2 / MAX2(4, type_size);
|
||||
reg = byte_offset(brw_vecn_grf(width, src.nr, 0), src.offset);
|
||||
reg = byte_offset(brw_vecn_grf(4, src.nr, 0), src.offset);
|
||||
reg.type = src.type;
|
||||
reg.abs = src.abs;
|
||||
reg.negate = src.negate;
|
||||
@@ -1960,12 +2036,11 @@ vec4_visitor::convert_to_hw_regs()
|
||||
}
|
||||
|
||||
case UNIFORM: {
|
||||
const unsigned width = REG_SIZE / 2 / MAX2(4, type_sz(src.type));
|
||||
reg = stride(byte_offset(brw_vec4_grf(
|
||||
prog_data->base.dispatch_grf_start_reg +
|
||||
src.nr / 2, src.nr % 2 * 4),
|
||||
src.offset),
|
||||
0, width, 1);
|
||||
0, 4, 1);
|
||||
reg.type = src.type;
|
||||
reg.abs = src.abs;
|
||||
reg.negate = src.negate;
|
||||
@@ -1998,6 +2073,20 @@ vec4_visitor::convert_to_hw_regs()
|
||||
|
||||
apply_logical_swizzle(®, inst, i);
|
||||
src = reg;
|
||||
|
||||
/* From IVB PRM, vol4, part3, "General Restrictions on Regioning
|
||||
* Parameters":
|
||||
*
|
||||
* "If ExecSize = Width and HorzStride ≠ 0, VertStride must be set
|
||||
* to Width * HorzStride."
|
||||
*
|
||||
* We can break this rule with DF sources on DF align1
|
||||
* instructions, because the exec_size would be 4 and width is 4.
|
||||
* As we know we are not accessing to next GRF, it is safe to
|
||||
* set vstride to the formula given by the rule itself.
|
||||
*/
|
||||
if (is_align1_df(inst) && (cvt(inst->exec_size) - 1) == src.width)
|
||||
src.vstride = src.width + src.hstride;
|
||||
}
|
||||
|
||||
if (inst->is_3src(devinfo)) {
|
||||
@@ -2255,24 +2344,6 @@ vec4_visitor::lower_simd_width()
|
||||
return progress;
|
||||
}
|
||||
|
||||
static bool
|
||||
is_align1_df(vec4_instruction *inst)
|
||||
{
|
||||
switch (inst->opcode) {
|
||||
case VEC4_OPCODE_DOUBLE_TO_F32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_D32:
|
||||
case VEC4_OPCODE_DOUBLE_TO_U32:
|
||||
case VEC4_OPCODE_TO_DOUBLE:
|
||||
case VEC4_OPCODE_PICK_LOW_32BIT:
|
||||
case VEC4_OPCODE_PICK_HIGH_32BIT:
|
||||
case VEC4_OPCODE_SET_LOW_32BIT:
|
||||
case VEC4_OPCODE_SET_HIGH_32BIT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static brw_predicate
|
||||
scalarize_predicate(brw_predicate predicate, unsigned writemask)
|
||||
{
|
||||
@@ -2506,6 +2577,11 @@ vec4_visitor::apply_logical_swizzle(struct brw_reg *hw_reg,
|
||||
assert(brw_is_single_value_swizzle(reg.swizzle) ||
|
||||
is_supported_64bit_region(inst, arg));
|
||||
|
||||
/* Apply the region <2, 2, 1> for GRF or <0, 2, 1> for uniforms, as align16
|
||||
* HW can only do 32-bit swizzle channels.
|
||||
*/
|
||||
hw_reg->width = BRW_WIDTH_2;
|
||||
|
||||
if (is_supported_64bit_region(inst, arg) &&
|
||||
!is_gen7_supported_64bit_swizzle(inst, arg)) {
|
||||
/* Supported 64-bit swizzles are those such that their first two
|
||||
|
@@ -1980,8 +1980,6 @@ generate_code(struct brw_codegen *p,
|
||||
else
|
||||
spread_dst = stride(dst, 8, 4, 2);
|
||||
|
||||
src[0].vstride = BRW_VERTICAL_STRIDE_4;
|
||||
src[0].width = BRW_WIDTH_4;
|
||||
brw_MOV(p, spread_dst, src[0]);
|
||||
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||
@@ -2016,9 +2014,7 @@ generate_code(struct brw_codegen *p,
|
||||
src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
|
||||
if (inst->opcode == VEC4_OPCODE_PICK_HIGH_32BIT)
|
||||
src[0] = suboffset(src[0], 1);
|
||||
src[0].vstride = BRW_VERTICAL_STRIDE_8;
|
||||
src[0].width = BRW_WIDTH_4;
|
||||
src[0].hstride = BRW_HORIZONTAL_STRIDE_2;
|
||||
src[0] = spread(src[0], 2);
|
||||
brw_MOV(p, dst, src[0]);
|
||||
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||
@@ -2041,9 +2037,6 @@ generate_code(struct brw_codegen *p,
|
||||
dst.hstride = BRW_HORIZONTAL_STRIDE_2;
|
||||
|
||||
src[0] = retype(src[0], BRW_REGISTER_TYPE_UD);
|
||||
src[0].vstride = BRW_VERTICAL_STRIDE_4;
|
||||
src[0].width = BRW_WIDTH_4;
|
||||
src[0].hstride = BRW_HORIZONTAL_STRIDE_1;
|
||||
brw_MOV(p, dst, src[0]);
|
||||
|
||||
brw_set_default_access_mode(p, BRW_ALIGN_16);
|
||||
|
@@ -868,10 +868,36 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data,
|
||||
|
||||
vec4_gs_visitor v(compiler, log_data, &c, prog_data, shader,
|
||||
mem_ctx, true /* no_spills */, shader_time_index);
|
||||
|
||||
/* Backup 'nr_params' and 'param' as they can be modified by the
|
||||
* the DUAL_OBJECT visitor. If it fails, we will run the fallback
|
||||
* (DUAL_INSTANCED or SINGLE mode) and we need to restore original
|
||||
* values.
|
||||
*/
|
||||
const unsigned param_count = prog_data->base.base.nr_params;
|
||||
gl_constant_value **param = ralloc_array(NULL, gl_constant_value*,
|
||||
param_count);
|
||||
memcpy(param, prog_data->base.base.param,
|
||||
sizeof(gl_constant_value*) * param_count);
|
||||
|
||||
if (v.run()) {
|
||||
/* Success! Backup is not needed */
|
||||
ralloc_free(param);
|
||||
return brw_vec4_generate_assembly(compiler, log_data, mem_ctx,
|
||||
shader, &prog_data->base, v.cfg,
|
||||
final_assembly_size);
|
||||
} else {
|
||||
/* These variables could be modified by the execution of the GS
|
||||
* visitor if it packed the uniforms in the push constant buffer.
|
||||
* As it failed, we need restore them so we can start again with
|
||||
* DUAL_INSTANCED or SINGLE mode.
|
||||
*
|
||||
* FIXME: Could more variables be modified by this execution?
|
||||
*/
|
||||
memcpy(prog_data->base.base.param, param,
|
||||
sizeof(gl_constant_value*) * param_count);
|
||||
prog_data->base.base.nr_params = param_count;
|
||||
ralloc_free(param);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -852,7 +852,8 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
* The swizzle also works in the indirect case as the generator adds
|
||||
* the swizzle to the offset for us.
|
||||
*/
|
||||
unsigned shift = (nir_intrinsic_base(instr) % 16) / 4;
|
||||
const int type_size = type_sz(src.type);
|
||||
unsigned shift = (nir_intrinsic_base(instr) % 16) / type_size;
|
||||
assert(shift + instr->num_components <= 4);
|
||||
|
||||
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
|
||||
@@ -860,14 +861,20 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr)
|
||||
/* Offsets are in bytes but they should always be multiples of 4 */
|
||||
assert(const_offset->u32[0] % 4 == 0);
|
||||
|
||||
unsigned offset = const_offset->u32[0] + shift * 4;
|
||||
src.swizzle = brw_swizzle_for_size(instr->num_components);
|
||||
dest.writemask = brw_writemask_for_size(instr->num_components);
|
||||
unsigned offset = const_offset->u32[0] + shift * type_size;
|
||||
src.offset = ROUND_DOWN_TO(offset, 16);
|
||||
shift = (offset % 16) / 4;
|
||||
shift = (offset % 16) / type_size;
|
||||
assert(shift + instr->num_components <= 4);
|
||||
src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
|
||||
|
||||
emit(MOV(dest, src));
|
||||
} else {
|
||||
src.swizzle += BRW_SWIZZLE4(shift, shift, shift, shift);
|
||||
/* Uniform arrays are vec4 aligned, because of std140 alignment
|
||||
* rules.
|
||||
*/
|
||||
assert(shift == 0);
|
||||
|
||||
src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
|
||||
|
||||
|
@@ -352,30 +352,12 @@ gen7_choose_valign_el(const struct isl_device *dev,
|
||||
if (isl_surf_usage_is_stencil(info->usage)) {
|
||||
/* The Ivybridge PRM states that the stencil buffer's vertical alignment
|
||||
* is 8 [Ivybridge PRM, Volume 1, Part 1, Section 6.18.4.4 Alignment
|
||||
* Unit Size]. However, valign=8 is outside the set of valid values of
|
||||
* RENDER_SURFACE_STATE.SurfaceVerticalAlignment, which is VALIGN_2
|
||||
* (0x0) and VALIGN_4 (0x1).
|
||||
*
|
||||
* The PRM is generally confused about the width, height, and alignment
|
||||
* of the stencil buffer; and this confusion appears elsewhere. For
|
||||
* example, the following PRM text effectively converts the stencil
|
||||
* buffer's 8-pixel alignment to a 4-pixel alignment [Ivybridge PRM,
|
||||
* Volume 1, Part 1, Section
|
||||
* 6.18.4.2 Base Address and LOD Calculation]:
|
||||
*
|
||||
* For separate stencil buffer, the width must be mutiplied by 2 and
|
||||
* height divided by 2 as follows:
|
||||
*
|
||||
* w_L = 2*i*ceil(W_L/i)
|
||||
* h_L = 1/2*j*ceil(H_L/j)
|
||||
*
|
||||
* The root of the confusion is that, in W tiling, each pair of rows is
|
||||
* interleaved into one.
|
||||
*
|
||||
* FINISHME(chadv): Decide to set valign=4 or valign=8 after isl's API
|
||||
* is more polished.
|
||||
* Unit Size]. valign=8 is outside the set of valid values of
|
||||
* RENDER_SURFACE_STATE.SurfaceVerticalAlignment, but that's ok because
|
||||
* a stencil buffer will never be used directly for texturing or
|
||||
* rendering on gen7.
|
||||
*/
|
||||
require_valign4 = true;
|
||||
return 8;
|
||||
}
|
||||
|
||||
assert(!require_valign2 || !require_valign4);
|
||||
|
@@ -884,7 +884,7 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo, uint32_t size)
|
||||
assert(new_bo.size == pow2_size);
|
||||
|
||||
new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 0);
|
||||
if (new_bo.map == NULL) {
|
||||
if (new_bo.map == MAP_FAILED) {
|
||||
anv_gem_close(pool->device, new_bo.gem_handle);
|
||||
return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
|
||||
}
|
||||
@@ -993,6 +993,25 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
||||
|
||||
anv_bo_init_new(&bo->bo, device, size);
|
||||
|
||||
/* Even though the Scratch base pointers in 3DSTATE_*S are 64 bits, they
|
||||
* are still relative to the general state base address. When we emit
|
||||
* STATE_BASE_ADDRESS, we set general state base address to 0 and the size
|
||||
* to the maximum (1 page under 4GB). This allows us to just place the
|
||||
* scratch buffers anywhere we wish in the bottom 32 bits of address space
|
||||
* and just set the scratch base pointer in 3DSTATE_*S using a relocation.
|
||||
* However, in order to do so, we need to ensure that the kernel does not
|
||||
* place the scratch BO above the 32-bit boundary.
|
||||
*
|
||||
* NOTE: Technically, it can't go "anywhere" because the top page is off
|
||||
* limits. However, when EXEC_OBJECT_SUPPORTS_48B_ADDRESS is set, the
|
||||
* kernel allocates space using
|
||||
*
|
||||
* end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE);
|
||||
*
|
||||
* so nothing will ever touch the top page.
|
||||
*/
|
||||
bo->bo.flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
|
||||
|
||||
/* Set the exists last because it may be read by other threads */
|
||||
__sync_synchronize();
|
||||
bo->exists = true;
|
||||
|
@@ -1265,7 +1265,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
|
||||
cmd_buffer->last_ss_pool_center);
|
||||
VkResult result =
|
||||
anv_execbuf_add_bo(&execbuf, &ss_pool->bo, &cmd_buffer->surface_relocs,
|
||||
&cmd_buffer->pool->alloc);
|
||||
&device->alloc);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -1278,7 +1278,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
|
||||
cmd_buffer->last_ss_pool_center);
|
||||
|
||||
result = anv_execbuf_add_bo(&execbuf, &(*bbo)->bo, &(*bbo)->relocs,
|
||||
&cmd_buffer->pool->alloc);
|
||||
&device->alloc);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
}
|
||||
@@ -1387,7 +1387,7 @@ anv_cmd_buffer_execbuf(struct anv_device *device,
|
||||
|
||||
result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
|
||||
|
||||
anv_execbuf_finish(&execbuf, &cmd_buffer->pool->alloc);
|
||||
anv_execbuf_finish(&execbuf, &device->alloc);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -701,10 +701,10 @@ void anv_CmdUpdateBuffer(
|
||||
struct anv_state tmp_data =
|
||||
anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
|
||||
|
||||
anv_state_flush(cmd_buffer->device, tmp_data);
|
||||
|
||||
memcpy(tmp_data.map, pData, copy_size);
|
||||
|
||||
anv_state_flush(cmd_buffer->device, tmp_data);
|
||||
|
||||
int bs = 16;
|
||||
bs = gcd_pow2_u64(bs, dstOffset);
|
||||
bs = gcd_pow2_u64(bs, copy_size);
|
||||
@@ -1398,7 +1398,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
|
||||
* still hot in the cache.
|
||||
*/
|
||||
bool found_draw = false;
|
||||
bool self_dep = false;
|
||||
enum anv_subpass_usage usage = 0;
|
||||
for (uint32_t s = subpass_idx + 1; s < pass->subpass_count; s++) {
|
||||
usage |= pass->attachments[att].subpass_usage[s];
|
||||
@@ -1408,8 +1407,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
|
||||
* wait to resolve until then.
|
||||
*/
|
||||
found_draw = true;
|
||||
if (pass->attachments[att].subpass_usage[s] & ANV_SUBPASS_USAGE_INPUT)
|
||||
self_dep = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1468,14 +1465,6 @@ ccs_resolve_attachment(struct anv_cmd_buffer *cmd_buffer,
|
||||
* binding this surface to Sampler."
|
||||
*/
|
||||
resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL;
|
||||
} else if (cmd_buffer->device->info.gen == 8 && self_dep &&
|
||||
att_state->input_aux_usage == ISL_AUX_USAGE_CCS_D) {
|
||||
/* On Broadwell we still need to do resolves when there is a
|
||||
* self-dependency because HW could not see fast-clears and works
|
||||
* on the render cache as if there was regular non-fast-clear surface.
|
||||
* To avoid any inconsistency, we force the resolve.
|
||||
*/
|
||||
resolve_op = BLORP_FAST_CLEAR_OP_RESOLVE_FULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -452,7 +452,7 @@ anv_enumerate_devices(struct anv_instance *instance)
|
||||
|
||||
instance->physicalDeviceCount = 0;
|
||||
|
||||
max_devices = drmGetDevices2(0, devices, sizeof(devices));
|
||||
max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
|
||||
if (max_devices < 1)
|
||||
return VK_ERROR_INCOMPATIBLE_DRIVER;
|
||||
|
||||
@@ -468,6 +468,7 @@ anv_enumerate_devices(struct anv_instance *instance)
|
||||
break;
|
||||
}
|
||||
}
|
||||
drmFreeDevices(devices, max_devices);
|
||||
|
||||
if (result == VK_SUCCESS)
|
||||
instance->physicalDeviceCount = 1;
|
||||
|
@@ -179,8 +179,8 @@ static const struct anv_format anv_formats[] = {
|
||||
fmt(VK_FORMAT_D24_UNORM_S8_UINT, ISL_FORMAT_R24_UNORM_X8_TYPELESS),
|
||||
fmt(VK_FORMAT_D32_SFLOAT_S8_UINT, ISL_FORMAT_R32_FLOAT),
|
||||
|
||||
fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_DXT1_RGB),
|
||||
fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_DXT1_RGB_SRGB),
|
||||
swiz_fmt(VK_FORMAT_BC1_RGB_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM, RGB1),
|
||||
swiz_fmt(VK_FORMAT_BC1_RGB_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB, RGB1),
|
||||
fmt(VK_FORMAT_BC1_RGBA_UNORM_BLOCK, ISL_FORMAT_BC1_UNORM),
|
||||
fmt(VK_FORMAT_BC1_RGBA_SRGB_BLOCK, ISL_FORMAT_BC1_UNORM_SRGB),
|
||||
fmt(VK_FORMAT_BC2_UNORM_BLOCK, ISL_FORMAT_BC2_UNORM),
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include "anv_private.h"
|
||||
#include "util/debug.h"
|
||||
@@ -347,7 +348,7 @@ VkResult anv_BindImageMemory(
|
||||
if (image->aux_surface.isl.size > 0) {
|
||||
|
||||
/* The offset and size must be a multiple of 4K or else the
|
||||
* anv_gem_mmap call below will return NULL.
|
||||
* anv_gem_mmap call below will fail.
|
||||
*/
|
||||
assert((image->offset + image->aux_surface.offset) % 4096 == 0);
|
||||
assert(image->aux_surface.isl.size % 4096 == 0);
|
||||
@@ -363,10 +364,7 @@ VkResult anv_BindImageMemory(
|
||||
image->aux_surface.isl.size,
|
||||
device->info.has_llc ? 0 : I915_MMAP_WC);
|
||||
|
||||
/* If anv_gem_mmap returns NULL, it's likely that the kernel was
|
||||
* not able to find space on the host to create a proper mapping.
|
||||
*/
|
||||
if (map == NULL)
|
||||
if (map == MAP_FAILED)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
memset(map, 0, image->aux_surface.isl.size);
|
||||
|
@@ -291,27 +291,21 @@ color_attachment_compute_aux_usage(struct anv_device *device,
|
||||
att_state->input_aux_usage = ISL_AUX_USAGE_CCS_E;
|
||||
} else if (att_state->fast_clear) {
|
||||
att_state->aux_usage = ISL_AUX_USAGE_CCS_D;
|
||||
if (GEN_GEN >= 9 &&
|
||||
!isl_format_supports_ccs_e(&device->info, iview->isl.format)) {
|
||||
/* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
|
||||
*
|
||||
* "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
|
||||
* setting is only allowed if Surface Format supported for Fast
|
||||
* Clear. In addition, if the surface is bound to the sampling
|
||||
* engine, Surface Format must be supported for Render Target
|
||||
* Compression for surfaces bound to the sampling engine."
|
||||
*
|
||||
* In other words, we can't sample from a fast-cleared image if it
|
||||
* doesn't also support color compression.
|
||||
*/
|
||||
att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
|
||||
} else if (GEN_GEN >= 8) {
|
||||
/* Broadwell/Skylake can sample from fast-cleared images */
|
||||
/* From the Sky Lake PRM, RENDER_SURFACE_STATE::AuxiliarySurfaceMode:
|
||||
*
|
||||
* "If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D
|
||||
* setting is only allowed if Surface Format supported for Fast
|
||||
* Clear. In addition, if the surface is bound to the sampling
|
||||
* engine, Surface Format must be supported for Render Target
|
||||
* Compression for surfaces bound to the sampling engine."
|
||||
*
|
||||
* In other words, we can only sample from a fast-cleared image if it
|
||||
* also supports color compression.
|
||||
*/
|
||||
if (isl_format_supports_ccs_e(&device->info, iview->isl.format))
|
||||
att_state->input_aux_usage = ISL_AUX_USAGE_CCS_D;
|
||||
} else {
|
||||
/* Ivy Bridge and Haswell cannot */
|
||||
else
|
||||
att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
|
||||
}
|
||||
} else {
|
||||
att_state->aux_usage = ISL_AUX_USAGE_NONE;
|
||||
att_state->input_aux_usage = ISL_AUX_USAGE_NONE;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user