Compare commits
151 Commits
mesa-18.0.
...
chadv/cros
Author | SHA1 | Date | |
---|---|---|---|
|
131e871385 | ||
|
20578f81a6 | ||
|
bcfd78e448 | ||
|
78c125af39 | ||
|
ca19ee33d7 | ||
|
acaec6cdd9 | ||
|
8096b558a7 | ||
|
d3ce493b34 | ||
|
75a4802060 | ||
|
e1a49f974b | ||
|
14f6275c92 | ||
|
e28233a527 | ||
|
49b0a140a7 | ||
|
b453f38a47 | ||
|
310d17fcf1 | ||
|
51e14bc3c0 | ||
|
d7c93b558a | ||
|
6d07e443ba | ||
|
5391de1262 | ||
|
b358e0e67f | ||
|
b9e2f78d6e | ||
|
c8949e2498 | ||
|
8116b9170b | ||
|
c7deeb71a8 | ||
|
f5b9c2a6e3 | ||
|
39f875a6b7 | ||
|
9eed6bea6b | ||
|
4e9e964de6 | ||
|
21be331401 | ||
|
0aaa27f291 | ||
|
cf0b26ec12 | ||
|
0c69db895f | ||
|
9c1f010f34 | ||
|
5a3404d443 | ||
|
db682b8f0e | ||
|
af9d4ce480 | ||
|
f4c534ef68 | ||
|
6ac5e851f1 | ||
|
5d8f270d10 | ||
|
dfe4dd48ec | ||
|
36dbbf11a0 | ||
|
94922dbe4b | ||
|
0b46c7b3b0 | ||
|
2d16b61bff | ||
|
123798eb44 | ||
|
0e7aaaf5a5 | ||
|
ba01589c0c | ||
|
e4504be6fc | ||
|
6c724fb7c1 | ||
|
c860171c63 | ||
|
5092610f29 | ||
|
94610758a3 | ||
|
6aeef54644 | ||
|
7b744a494d | ||
|
3e3956d6ae | ||
|
647f40298a | ||
|
48e7bc6833 | ||
|
785d9a4ed8 | ||
|
0beaf7ad3e | ||
|
ac4437b20b | ||
|
50265cd9ee | ||
|
265d36c890 | ||
|
4fe662c58f | ||
|
57223fb07a | ||
|
0e879aad2f | ||
|
e1331c9d61 | ||
|
0663ae0aa1 | ||
|
57b0ccd178 | ||
|
513c2263cb | ||
|
df13588d21 | ||
|
20f70ae385 | ||
|
e52a9f18d6 | ||
|
468ea3cc45 | ||
|
d38ec24f53 | ||
|
dfe0217905 | ||
|
7d4007d58a | ||
|
e776791432 | ||
|
62b68d05e7 | ||
|
65c18b02fc | ||
|
8fae5eddd9 | ||
|
53f9131205 | ||
|
c38c60a63c | ||
|
7c8cfe2d59 | ||
|
0cc7370733 | ||
|
a2a1b0e75e | ||
|
5781c3d1db | ||
|
d2414e64e4 | ||
|
298554541d | ||
|
125c0529f3 | ||
|
022c5b22fe | ||
|
0b8d38bd48 | ||
|
8172b9ff48 | ||
|
f2040fbe48 | ||
|
51c0cee267 | ||
|
517e34c62f | ||
|
807e2539e5 | ||
|
8dda01ef5a | ||
|
6569b33b6e | ||
|
ad1990629e | ||
|
ee48e3acb8 | ||
|
6a3421078a | ||
|
365a48abdd | ||
|
2123bd2805 | ||
|
6b0109cf39 | ||
|
b9280031a8 | ||
|
a83f7e119c | ||
|
718f4251c5 | ||
|
f4376a0c2b | ||
|
aafb56a148 | ||
|
a7cfec3be0 | ||
|
d40fa42292 | ||
|
04a17ec327 | ||
|
7b0ae96711 | ||
|
3bbf8d9042 | ||
|
a152cb7492 | ||
|
5d3e10fd27 | ||
|
cffa82327d | ||
|
b3a1aa94d9 | ||
|
67dc551ba9 | ||
|
cb7ef0df00 | ||
|
8b3cb7c651 | ||
|
67ebde19d4 | ||
|
ec4bb693a0 | ||
|
4064fe59e7 | ||
|
38ec78049f | ||
|
95ff232294 | ||
|
24caee8975 | ||
|
8bd5ec5b86 | ||
|
e85aaec148 | ||
|
97f96610c8 | ||
|
31b2144c83 | ||
|
b9e1ca16f8 | ||
|
90cceaa9dd | ||
|
d5592e2fda | ||
|
9af5379228 | ||
|
ddc2d28548 | ||
|
cd3feea745 | ||
|
d6c9a89d13 | ||
|
bc0a21e348 | ||
|
4b69ba3817 | ||
|
766589d89a | ||
|
c727ea9370 | ||
|
4df414bbd2 | ||
|
316d762186 | ||
|
224fd17e1e | ||
|
08085df313 | ||
|
4b4d929c27 | ||
|
3b6d232a5c | ||
|
eee8dd7c33 | ||
|
10f5e0dce2 | ||
|
911ca587f8 |
121
.travis.yml
121
.travis.yml
@@ -396,39 +396,9 @@ matrix:
|
||||
- libexpat1-dev
|
||||
- libx11-xcb-dev
|
||||
- libelf-dev
|
||||
- env:
|
||||
- LABEL="macOS make"
|
||||
- BUILD=make
|
||||
- MAKEFLAGS="-j4"
|
||||
- MAKE_CHECK_COMMAND="make check"
|
||||
- DRI_LOADERS="--with-platforms=x11 --disable-egl"
|
||||
os: osx
|
||||
|
||||
before_install:
|
||||
- |
|
||||
if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
|
||||
HOMEBREW_NO_AUTO_UPDATE=1 brew install python3 ninja expat gettext
|
||||
# Set PATH for homebrew pip3 installs
|
||||
PATH="$HOME/Library/Python/3.6/bin:${PATH}"
|
||||
# Set PKG_CONFIG_PATH for keg-only expat
|
||||
PKG_CONFIG_PATH="/usr/local/opt/expat/lib/pkgconfig:${PKG_CONFIG_PATH}"
|
||||
# Set PATH for keg-only gettext
|
||||
PATH="/usr/local/opt/gettext/bin:${PATH}"
|
||||
|
||||
# Install xquartz for prereqs ...
|
||||
XQUARTZ_VERSION="2.7.11"
|
||||
wget -nv https://dl.bintray.com/xquartz/downloads/XQuartz-${XQUARTZ_VERSION}.dmg
|
||||
hdiutil attach XQuartz-${XQUARTZ_VERSION}.dmg
|
||||
sudo installer -pkg /Volumes/XQuartz-${XQUARTZ_VERSION}/XQuartz.pkg -target /
|
||||
hdiutil detach /Volumes/XQuartz-${XQUARTZ_VERSION}
|
||||
# ... and set paths
|
||||
PATH="/opt/X11/bin:${PATH}"
|
||||
PKG_CONFIG_PATH="/opt/X11/share/pkgconfig:/opt/X11/lib/pkgconfig:${PKG_CONFIG_PATH}"
|
||||
ACLOCAL="aclocal -I /opt/X11/share/aclocal -I /usr/local/share/aclocal"
|
||||
fi
|
||||
|
||||
install:
|
||||
- pip2 install --user mako
|
||||
- pip install --user mako
|
||||
|
||||
# Install the latest meson from pip, since the version in the ubuntu repos is
|
||||
# often quite old.
|
||||
@@ -449,64 +419,62 @@ install:
|
||||
# Install dependencies where we require specific versions (or where
|
||||
# disallowed by Travis CI's package whitelisting).
|
||||
|
||||
- |
|
||||
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
|
||||
wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2
|
||||
tar -jxvf $XORGMACROS_VERSION.tar.bz2
|
||||
(cd $XORGMACROS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2
|
||||
- tar -jxvf $XORGMACROS_VERSION.tar.bz2
|
||||
- (cd $XORGMACROS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2
|
||||
tar -jxvf $GLPROTO_VERSION.tar.bz2
|
||||
(cd $GLPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2
|
||||
- tar -jxvf $GLPROTO_VERSION.tar.bz2
|
||||
- (cd $GLPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2
|
||||
tar -jxvf $DRI2PROTO_VERSION.tar.bz2
|
||||
(cd $DRI2PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2
|
||||
- tar -jxvf $DRI2PROTO_VERSION.tar.bz2
|
||||
- (cd $DRI2PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2
|
||||
tar -jxvf $XCBPROTO_VERSION.tar.bz2
|
||||
(cd $XCBPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2
|
||||
- tar -jxvf $XCBPROTO_VERSION.tar.bz2
|
||||
- (cd $XCBPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2
|
||||
tar -jxvf $LIBXCB_VERSION.tar.bz2
|
||||
(cd $LIBXCB_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBXCB_VERSION.tar.bz2
|
||||
- (cd $LIBXCB_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2
|
||||
tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2
|
||||
(cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2
|
||||
- (cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
|
||||
tar -jxvf $LIBDRM_VERSION.tar.bz2
|
||||
(cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install)
|
||||
- wget http://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBDRM_VERSION.tar.bz2
|
||||
- (cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install)
|
||||
|
||||
wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
|
||||
tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
|
||||
(cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
|
||||
- (cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget http://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
|
||||
tar -jxvf $LIBVDPAU_VERSION.tar.bz2
|
||||
(cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget http://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBVDPAU_VERSION.tar.bz2
|
||||
- (cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget http://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
|
||||
tar -jxvf $LIBVA_VERSION.tar.bz2
|
||||
(cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
|
||||
- wget http://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
|
||||
- tar -jxvf $LIBVA_VERSION.tar.bz2
|
||||
- (cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
|
||||
|
||||
wget $WAYLAND_RELEASES/$LIBWAYLAND_VERSION.tar.xz
|
||||
tar -axvf $LIBWAYLAND_VERSION.tar.xz
|
||||
(cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
|
||||
- wget $WAYLAND_RELEASES/$LIBWAYLAND_VERSION.tar.xz
|
||||
- tar -axvf $LIBWAYLAND_VERSION.tar.xz
|
||||
- (cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
|
||||
|
||||
wget $WAYLAND_RELEASES/$WAYLAND_PROTOCOLS_VERSION.tar.xz
|
||||
tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz
|
||||
(cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
- wget $WAYLAND_RELEASES/$WAYLAND_PROTOCOLS_VERSION.tar.xz
|
||||
- tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz
|
||||
- (cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
# Meson requires ninja >= 1.6, but trusty has 1.3.x
|
||||
wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip
|
||||
unzip ninja-linux.zip
|
||||
mv ninja $HOME/prefix/bin/
|
||||
# Meson requires ninja >= 1.6, but trusty has 1.3.x
|
||||
- wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip;
|
||||
- unzip ninja-linux.zip
|
||||
- mv ninja $HOME/prefix/bin/
|
||||
|
||||
# Generate this header since one is missing on the Travis instance
|
||||
mkdir -p linux
|
||||
printf "%s\n" \
|
||||
# Generate the header since one is missing on the Travis instance
|
||||
- mkdir -p linux
|
||||
- printf "%s\n" \
|
||||
"#ifndef _LINUX_MEMFD_H" \
|
||||
"#define _LINUX_MEMFD_H" \
|
||||
"" \
|
||||
@@ -517,7 +485,6 @@ install:
|
||||
"#define MFD_ALLOW_SEALING 0x0002U" \
|
||||
"" \
|
||||
"#endif /* _LINUX_MEMFD_H */" > linux/memfd.h
|
||||
fi
|
||||
|
||||
script:
|
||||
- if test "x$BUILD" = xmake; then
|
||||
|
@@ -1,6 +0,0 @@
|
||||
# fixes: The following commits were applied without the "cherry-picked from" tag
|
||||
50265cd9ee4caffee853700bdcd75b92eedc0e7b automake: anv: ship anv_extensions_gen.py in the tarball
|
||||
ac4437b20b87c7285b89466f05b51518ae616873 automake: small cleanup after the meson.build inclusion
|
||||
|
||||
# stable: The KHX extension is disabled all together in the stable branches.
|
||||
bee9270853c34aa8e4b3d19a125608ee67c87b86 radv: Don't expose VK_KHX_multiview on android.
|
17
configure.ac
17
configure.ac
@@ -685,19 +685,6 @@ AC_LINK_IFELSE(
|
||||
LDFLAGS=$save_LDFLAGS
|
||||
AM_CONDITIONAL(HAVE_LD_DYNAMIC_LIST, test "$have_ld_dynamic_list" = "yes")
|
||||
|
||||
dnl
|
||||
dnl OSX linker does not support build-id
|
||||
dnl
|
||||
case "$host_os" in
|
||||
darwin*)
|
||||
LD_BUILD_ID=""
|
||||
;;
|
||||
*)
|
||||
LD_BUILD_ID="-Wl,--build-id=sha1"
|
||||
;;
|
||||
esac
|
||||
AC_SUBST([LD_BUILD_ID])
|
||||
|
||||
dnl
|
||||
dnl compatibility symlinks
|
||||
dnl
|
||||
@@ -1283,10 +1270,10 @@ AC_ARG_ENABLE([xa],
|
||||
[enable_xa=no])
|
||||
AC_ARG_ENABLE([gbm],
|
||||
[AS_HELP_STRING([--enable-gbm],
|
||||
[enable gbm library @<:@default=yes except cygwin and macOS@:>@])],
|
||||
[enable gbm library @<:@default=yes except cygwin@:>@])],
|
||||
[enable_gbm="$enableval"],
|
||||
[case "$host_os" in
|
||||
cygwin* | darwin*)
|
||||
cygwin*)
|
||||
enable_gbm=no
|
||||
;;
|
||||
*)
|
||||
|
@@ -58,13 +58,7 @@ if you'd like to nominate a patch in the next stable release.
|
||||
<td>Final planned release for the 17.3 series</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="7">18.0</td>
|
||||
<td>2018-01-19</td>
|
||||
<td>18.0.0-rc1</td>
|
||||
<td>Emil Velikov</td>
|
||||
<td></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td rowspan="6">18.0</td>
|
||||
<td>2018-01-26</td>
|
||||
<td>18.0.0-rc2</td>
|
||||
<td>Emil Velikov</td>
|
||||
|
64
docs/relnotes/18.1.0.html
Normal file
64
docs/relnotes/18.1.0.html
Normal file
@@ -0,0 +1,64 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 18.1.0 Release Notes / TBD</h1>
|
||||
|
||||
<p>
|
||||
Mesa 18.1.0 is a new development release. People who are concerned
|
||||
with stability and reliability should stick with a previous release or
|
||||
wait for Mesa 18.1.1.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 18.1.0 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
|
||||
<p>
|
||||
Note: some of the new features are only available with certain drivers.
|
||||
</p>
|
||||
|
||||
<ul>
|
||||
TBD
|
||||
</ul>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
TBD
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<ul>
|
||||
TBD
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -246,6 +246,10 @@ release.
|
||||
Note: resending patch identical to one on mesa-dev@ or one that differs only
|
||||
by the extra mesa-stable@ tag is <strong>not</strong> recommended.
|
||||
</p>
|
||||
<p>
|
||||
If you are not the author of the original patch, please Cc: them in your
|
||||
nomination request.
|
||||
</p>
|
||||
|
||||
|
||||
<h3 id="thetag">The stable tag</h3>
|
||||
|
@@ -6,7 +6,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
** Copyright (c) 2015-2017 The Khronos Group Inc.
|
||||
** Copyright (c) 2015-2018 The Khronos Group Inc.
|
||||
**
|
||||
** Licensed under the Apache License, Version 2.0 (the "License");
|
||||
** you may not use this file except in compliance with the License.
|
||||
@@ -43,7 +43,7 @@ extern "C" {
|
||||
#define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
|
||||
#define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
|
||||
// Version of this file
|
||||
#define VK_HEADER_VERSION 66
|
||||
#define VK_HEADER_VERSION 68
|
||||
|
||||
|
||||
#define VK_NULL_HANDLE 0
|
||||
@@ -304,6 +304,8 @@ typedef enum VkStructureType {
|
||||
VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV = 1000098000,
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT = 1000099000,
|
||||
VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001,
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT = 1000101000,
|
||||
VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT = 1000101001,
|
||||
VK_STRUCTURE_TYPE_HDR_METADATA_EXT = 1000105000,
|
||||
VK_STRUCTURE_TYPE_SHARED_PRESENT_SURFACE_CAPABILITIES_KHR = 1000111000,
|
||||
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_FENCE_INFO_KHR = 1000112000,
|
||||
@@ -5240,12 +5242,12 @@ typedef enum VkDebugReportObjectTypeEXT {
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT = 30,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_OBJECT_TABLE_NVX_EXT = 31,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX_EXT = 32,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT = 33,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT = 33,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR_EXT = 1000085000,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_KHR_EXT = 1000156000,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_BEGIN_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT - VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1),
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_END_RANGE_EXT = VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT,
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_RANGE_SIZE_EXT = (VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT - VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT + 1),
|
||||
VK_DEBUG_REPORT_OBJECT_TYPE_MAX_ENUM_EXT = 0x7FFFFFFF
|
||||
} VkDebugReportObjectTypeEXT;
|
||||
|
||||
@@ -6532,6 +6534,47 @@ VKAPI_ATTR void VKAPI_CALL vkCmdSetDiscardRectangleEXT(
|
||||
const VkRect2D* pDiscardRectangles);
|
||||
#endif
|
||||
|
||||
#define VK_EXT_conservative_rasterization 1
|
||||
#define VK_EXT_CONSERVATIVE_RASTERIZATION_SPEC_VERSION 1
|
||||
#define VK_EXT_CONSERVATIVE_RASTERIZATION_EXTENSION_NAME "VK_EXT_conservative_rasterization"
|
||||
|
||||
|
||||
typedef enum VkConservativeRasterizationModeEXT {
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT = 0,
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT = 1,
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT = 2,
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_BEGIN_RANGE_EXT = VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT,
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_END_RANGE_EXT = VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT,
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_RANGE_SIZE_EXT = (VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT - VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT + 1),
|
||||
VK_CONSERVATIVE_RASTERIZATION_MODE_MAX_ENUM_EXT = 0x7FFFFFFF
|
||||
} VkConservativeRasterizationModeEXT;
|
||||
|
||||
typedef VkFlags VkPipelineRasterizationConservativeStateCreateFlagsEXT;
|
||||
|
||||
typedef struct VkPhysicalDeviceConservativeRasterizationPropertiesEXT {
|
||||
VkStructureType sType;
|
||||
void* pNext;
|
||||
float primitiveOverestimationSize;
|
||||
float maxExtraPrimitiveOverestimationSize;
|
||||
float extraPrimitiveOverestimationSizeGranularity;
|
||||
VkBool32 primitiveUnderestimation;
|
||||
VkBool32 conservativePointAndLineRasterization;
|
||||
VkBool32 degenerateTrianglesRasterized;
|
||||
VkBool32 degenerateLinesRasterized;
|
||||
VkBool32 fullyCoveredFragmentShaderInputVariable;
|
||||
VkBool32 conservativeRasterizationPostDepthCoverage;
|
||||
} VkPhysicalDeviceConservativeRasterizationPropertiesEXT;
|
||||
|
||||
typedef struct VkPipelineRasterizationConservativeStateCreateInfoEXT {
|
||||
VkStructureType sType;
|
||||
const void* pNext;
|
||||
VkPipelineRasterizationConservativeStateCreateFlagsEXT flags;
|
||||
VkConservativeRasterizationModeEXT conservativeRasterizationMode;
|
||||
float extraPrimitiveOverestimationSize;
|
||||
} VkPipelineRasterizationConservativeStateCreateInfoEXT;
|
||||
|
||||
|
||||
|
||||
#define VK_EXT_swapchain_colorspace 1
|
||||
#define VK_EXT_SWAPCHAIN_COLOR_SPACE_SPEC_VERSION 3
|
||||
#define VK_EXT_SWAPCHAIN_COLOR_SPACE_EXTENSION_NAME "VK_EXT_swapchain_colorspace"
|
||||
@@ -6861,6 +6904,7 @@ VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkValidationCacheEXT)
|
||||
|
||||
#define VK_EXT_VALIDATION_CACHE_SPEC_VERSION 1
|
||||
#define VK_EXT_VALIDATION_CACHE_EXTENSION_NAME "VK_EXT_validation_cache"
|
||||
#define VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT
|
||||
|
||||
|
||||
typedef enum VkValidationCacheHeaderVersionEXT {
|
||||
|
34
meson.build
34
meson.build
@@ -57,10 +57,6 @@ dri_drivers_path = get_option('dri-drivers-path')
|
||||
if dri_drivers_path == ''
|
||||
dri_drivers_path = join_paths(get_option('libdir'), 'dri')
|
||||
endif
|
||||
dri_search_path = get_option('dri-search-path')
|
||||
if dri_search_path == ''
|
||||
dri_search_path = join_paths(get_option('prefix'), dri_drivers_path)
|
||||
endif
|
||||
|
||||
with_gles1 = get_option('gles1')
|
||||
with_gles2 = get_option('gles2')
|
||||
@@ -357,15 +353,9 @@ endif
|
||||
with_dri2 = (with_dri or with_any_vk) and with_dri_platform == 'drm'
|
||||
with_dri3 = get_option('dri3')
|
||||
if with_dri3 == 'auto'
|
||||
if system_has_kms_drm and with_dri2
|
||||
with_dri3 = true
|
||||
else
|
||||
with_dri3 = false
|
||||
endif
|
||||
elif with_dri3 == 'true'
|
||||
with_dri3 = true
|
||||
with_dri3 = system_has_kms_drm and with_dri2
|
||||
else
|
||||
with_dri3 = false
|
||||
with_dri3 = with_dri3 == 'true'
|
||||
endif
|
||||
|
||||
if with_any_vk and (with_platform_x11 and not with_dri3)
|
||||
@@ -1010,23 +1000,15 @@ if with_gallium_opencl
|
||||
# TODO: optional modules
|
||||
endif
|
||||
|
||||
if with_amd_vk
|
||||
_llvm_version = '>= 4.0.0'
|
||||
elif with_gallium_opencl or with_gallium_swr or with_gallium_r600 or with_gallium_radeonsi
|
||||
_llvm_version = '>= 3.9.0'
|
||||
else
|
||||
_llvm_version = '>= 3.3.0'
|
||||
endif
|
||||
|
||||
_llvm = get_option('llvm')
|
||||
if _llvm == 'auto'
|
||||
dep_llvm = dependency(
|
||||
'llvm', version : _llvm_version, modules : llvm_modules,
|
||||
'llvm', version : '>= 3.9.0', modules : llvm_modules,
|
||||
required : with_amd_vk or with_gallium_radeonsi or with_gallium_swr or with_gallium_opencl,
|
||||
)
|
||||
with_llvm = dep_llvm.found()
|
||||
elif _llvm == 'true'
|
||||
dep_llvm = dependency('llvm', version : _llvm_version, modules : llvm_modules)
|
||||
dep_llvm = dependency('llvm', version : '>= 3.9.0', modules : llvm_modules)
|
||||
with_llvm = true
|
||||
else
|
||||
dep_llvm = []
|
||||
@@ -1045,7 +1027,7 @@ if with_llvm
|
||||
_llvm_patch = _llvm_patch.split('g')[0]
|
||||
endif
|
||||
pre_args += [
|
||||
'-DHAVE_LLVM=0x0@0@@1@@2@'.format(_llvm_version[0], _llvm_version[1], _llvm_patch),
|
||||
'-DHAVE_LLVM=0x0@0@0@1@'.format(_llvm_version[0], _llvm_version[1]),
|
||||
'-DMESA_LLVM_VERSION_PATCH=@0@'.format(_llvm_patch),
|
||||
]
|
||||
elif with_amd_vk or with_gallium_radeonsi or with_gallium_swr
|
||||
@@ -1231,10 +1213,8 @@ inc_include = include_directories('include')
|
||||
|
||||
gl_priv_reqs = [
|
||||
'x11', 'xext', 'xdamage >= 1.1', 'xfixes', 'x11-xcb', 'xcb',
|
||||
'xcb-glx >= 1.8.1']
|
||||
if dep_libdrm.found()
|
||||
gl_priv_reqs += 'libdrm >= 2.4.75'
|
||||
endif
|
||||
'xcb-glx >= 1.8.1', 'libdrm >= 2.4.75',
|
||||
]
|
||||
if dep_xxf86vm != [] and dep_xxf86vm.found()
|
||||
gl_priv_reqs += 'xxf86vm'
|
||||
endif
|
||||
|
@@ -41,13 +41,7 @@ option(
|
||||
'dri-drivers-path',
|
||||
type : 'string',
|
||||
value : '',
|
||||
description : 'Location to install dri drivers. Default: $libdir/dri.'
|
||||
)
|
||||
option(
|
||||
'dri-search-path',
|
||||
type : 'string',
|
||||
value : '',
|
||||
description : 'Locations to search for dri drivers, passed as colon separated list. Default: dri-drivers-path.'
|
||||
description : 'Location of dri drivers. Default: $libdir/dri.'
|
||||
)
|
||||
option(
|
||||
'gallium-drivers',
|
||||
|
@@ -923,6 +923,43 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
|
||||
AC_FUNC_ATTR_LEGACY);
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
ac_build_buffer_load_common(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex,
|
||||
LLVMValueRef voffset,
|
||||
unsigned num_channels,
|
||||
bool glc,
|
||||
bool slc,
|
||||
bool can_speculate,
|
||||
bool use_format)
|
||||
{
|
||||
LLVMValueRef args[] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
|
||||
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
|
||||
voffset,
|
||||
LLVMConstInt(ctx->i1, glc, 0),
|
||||
LLVMConstInt(ctx->i1, slc, 0)
|
||||
};
|
||||
unsigned func = CLAMP(num_channels, 1, 3) - 1;
|
||||
|
||||
LLVMTypeRef types[] = {ctx->f32, ctx->v2f32, ctx->v4f32};
|
||||
const char *type_names[] = {"f32", "v2f32", "v4f32"};
|
||||
char name[256];
|
||||
|
||||
if (use_format) {
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.format.%s",
|
||||
type_names[func]);
|
||||
} else {
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
|
||||
type_names[func]);
|
||||
}
|
||||
|
||||
return ac_build_intrinsic(ctx, name, types[func], args,
|
||||
ARRAY_SIZE(args),
|
||||
ac_get_load_intr_attribs(can_speculate));
|
||||
}
|
||||
|
||||
LLVMValueRef
|
||||
ac_build_buffer_load(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef rsrc,
|
||||
@@ -967,47 +1004,21 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
|
||||
return ac_build_gather_values(ctx, result, num_channels);
|
||||
}
|
||||
|
||||
unsigned func = CLAMP(num_channels, 1, 3) - 1;
|
||||
|
||||
LLVMValueRef args[] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
|
||||
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
|
||||
offset,
|
||||
LLVMConstInt(ctx->i1, glc, 0),
|
||||
LLVMConstInt(ctx->i1, slc, 0)
|
||||
};
|
||||
|
||||
LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
|
||||
ctx->v4f32};
|
||||
const char *type_names[] = {"f32", "v2f32", "v4f32"};
|
||||
char name[256];
|
||||
|
||||
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
|
||||
type_names[func]);
|
||||
|
||||
return ac_build_intrinsic(ctx, name, types[func], args,
|
||||
ARRAY_SIZE(args),
|
||||
ac_get_load_intr_attribs(can_speculate));
|
||||
return ac_build_buffer_load_common(ctx, rsrc, vindex, offset,
|
||||
num_channels, glc, slc,
|
||||
can_speculate, false);
|
||||
}
|
||||
|
||||
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex,
|
||||
LLVMValueRef voffset,
|
||||
unsigned num_channels,
|
||||
bool can_speculate)
|
||||
{
|
||||
LLVMValueRef args [] = {
|
||||
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
|
||||
vindex,
|
||||
voffset,
|
||||
ctx->i1false, /* glc */
|
||||
ctx->i1false, /* slc */
|
||||
};
|
||||
|
||||
return ac_build_intrinsic(ctx,
|
||||
"llvm.amdgcn.buffer.load.format.v4f32",
|
||||
ctx->v4f32, args, ARRAY_SIZE(args),
|
||||
ac_get_load_intr_attribs(can_speculate));
|
||||
return ac_build_buffer_load_common(ctx, rsrc, vindex, voffset,
|
||||
num_channels, false, false,
|
||||
can_speculate, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@@ -214,6 +214,7 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
|
||||
LLVMValueRef rsrc,
|
||||
LLVMValueRef vindex,
|
||||
LLVMValueRef voffset,
|
||||
unsigned num_channels,
|
||||
bool can_speculate);
|
||||
|
||||
LLVMValueRef
|
||||
|
@@ -326,6 +326,7 @@ create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
|
||||
if (args->array_params_mask & (1 << i)) {
|
||||
LLVMValueRef P = LLVMGetParam(main_function, i);
|
||||
ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL);
|
||||
ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
|
||||
ac_add_attr_dereferenceable(P, UINT64_MAX);
|
||||
}
|
||||
else {
|
||||
@@ -555,10 +556,12 @@ static bool needs_view_index_sgpr(struct nir_to_llvm_context *ctx,
|
||||
case MESA_SHADER_TESS_EVAL:
|
||||
if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
|
||||
return true;
|
||||
break;
|
||||
case MESA_SHADER_GEOMETRY:
|
||||
case MESA_SHADER_TESS_CTRL:
|
||||
if (ctx->shader_info->info.needs_multiview_view_index)
|
||||
return true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@@ -1911,18 +1914,24 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||
case nir_op_fmax:
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
|
||||
if (instr->dest.dest.ssa.bit_size == 32)
|
||||
if (ctx->ac.chip_class < GFX9 &&
|
||||
instr->dest.dest.ssa.bit_size == 32) {
|
||||
/* Only pre-GFX9 chips do not flush denorms. */
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
|
||||
ac_to_float_type(&ctx->ac, def_type),
|
||||
result);
|
||||
}
|
||||
break;
|
||||
case nir_op_fmin:
|
||||
result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
|
||||
ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
|
||||
if (instr->dest.dest.ssa.bit_size == 32)
|
||||
if (ctx->ac.chip_class < GFX9 &&
|
||||
instr->dest.dest.ssa.bit_size == 32) {
|
||||
/* Only pre-GFX9 chips do not flush denorms. */
|
||||
result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
|
||||
ac_to_float_type(&ctx->ac, def_type),
|
||||
result);
|
||||
}
|
||||
break;
|
||||
case nir_op_ffma:
|
||||
result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
|
||||
@@ -2307,10 +2316,13 @@ static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
|
||||
struct ac_image_args *args)
|
||||
{
|
||||
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
|
||||
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
|
||||
|
||||
return ac_build_buffer_load_format(&ctx->ac,
|
||||
args->resource,
|
||||
args->addr,
|
||||
ctx->ac.i32_0,
|
||||
util_last_bit(mask),
|
||||
true);
|
||||
}
|
||||
|
||||
@@ -4549,11 +4561,14 @@ static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
|
||||
LLVMValueRef buffer_ptr, bool write)
|
||||
{
|
||||
struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
|
||||
LLVMValueRef result;
|
||||
|
||||
if (write && ctx->stage == MESA_SHADER_FRAGMENT)
|
||||
ctx->shader_info->fs.writes_memory = true;
|
||||
LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
|
||||
|
||||
return LLVMBuildLoad(ctx->builder, buffer_ptr, "");
|
||||
result = LLVMBuildLoad(ctx->builder, buffer_ptr, "");
|
||||
LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr)
|
||||
@@ -4589,9 +4604,6 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
|
||||
|
||||
assert(base_index < layout->binding_count);
|
||||
|
||||
if (write && ctx->stage == MESA_SHADER_FRAGMENT)
|
||||
ctx->shader_info->fs.writes_memory = true;
|
||||
|
||||
switch (desc_type) {
|
||||
case AC_DESC_IMAGE:
|
||||
type = ctx->ac.v8i32;
|
||||
@@ -5365,7 +5377,7 @@ handle_vs_input_decl(struct nir_to_llvm_context *ctx,
|
||||
input = ac_build_buffer_load_format(&ctx->ac, t_list,
|
||||
buffer_index,
|
||||
ctx->ac.i32_0,
|
||||
true);
|
||||
4, true);
|
||||
|
||||
for (unsigned chan = 0; chan < 4; chan++) {
|
||||
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
|
||||
|
@@ -179,7 +179,6 @@ struct ac_shader_variant_info {
|
||||
bool writes_stencil;
|
||||
bool writes_sample_mask;
|
||||
bool early_fragment_test;
|
||||
bool writes_memory;
|
||||
bool prim_id_input;
|
||||
bool layer_input;
|
||||
} fs;
|
||||
|
@@ -31,7 +31,7 @@ static void mark_sampler_desc(const nir_variable *var,
|
||||
}
|
||||
|
||||
static void
|
||||
gather_intrinsic_info(const nir_intrinsic_instr *instr,
|
||||
gather_intrinsic_info(const nir_shader *nir, const nir_intrinsic_instr *instr,
|
||||
struct ac_shader_info *info)
|
||||
{
|
||||
switch (instr->intrinsic) {
|
||||
@@ -104,15 +104,43 @@ gather_intrinsic_info(const nir_intrinsic_instr *instr,
|
||||
dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
|
||||
info->ps.uses_input_attachments = true;
|
||||
mark_sampler_desc(instr->variables[0]->var, info);
|
||||
|
||||
if (nir_intrinsic_image_store ||
|
||||
nir_intrinsic_image_atomic_add ||
|
||||
nir_intrinsic_image_atomic_min ||
|
||||
nir_intrinsic_image_atomic_max ||
|
||||
nir_intrinsic_image_atomic_and ||
|
||||
nir_intrinsic_image_atomic_or ||
|
||||
nir_intrinsic_image_atomic_xor ||
|
||||
nir_intrinsic_image_atomic_exchange ||
|
||||
nir_intrinsic_image_atomic_comp_swap) {
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
info->ps.writes_memory = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
case nir_intrinsic_store_ssbo:
|
||||
case nir_intrinsic_ssbo_atomic_add:
|
||||
case nir_intrinsic_ssbo_atomic_imin:
|
||||
case nir_intrinsic_ssbo_atomic_umin:
|
||||
case nir_intrinsic_ssbo_atomic_imax:
|
||||
case nir_intrinsic_ssbo_atomic_umax:
|
||||
case nir_intrinsic_ssbo_atomic_and:
|
||||
case nir_intrinsic_ssbo_atomic_or:
|
||||
case nir_intrinsic_ssbo_atomic_xor:
|
||||
case nir_intrinsic_ssbo_atomic_exchange:
|
||||
case nir_intrinsic_ssbo_atomic_comp_swap:
|
||||
if (nir->info.stage == MESA_SHADER_FRAGMENT)
|
||||
info->ps.writes_memory = true;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gather_tex_info(const nir_tex_instr *instr, struct ac_shader_info *info)
|
||||
gather_tex_info(const nir_shader *nir, const nir_tex_instr *instr,
|
||||
struct ac_shader_info *info)
|
||||
{
|
||||
if (instr->sampler)
|
||||
mark_sampler_desc(instr->sampler->var, info);
|
||||
@@ -121,15 +149,16 @@ gather_tex_info(const nir_tex_instr *instr, struct ac_shader_info *info)
|
||||
}
|
||||
|
||||
static void
|
||||
gather_info_block(const nir_block *block, struct ac_shader_info *info)
|
||||
gather_info_block(const nir_shader *nir, const nir_block *block,
|
||||
struct ac_shader_info *info)
|
||||
{
|
||||
nir_foreach_instr(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_intrinsic:
|
||||
gather_intrinsic_info(nir_instr_as_intrinsic(instr), info);
|
||||
gather_intrinsic_info(nir, nir_instr_as_intrinsic(instr), info);
|
||||
break;
|
||||
case nir_instr_type_tex:
|
||||
gather_tex_info(nir_instr_as_tex(instr), info);
|
||||
gather_tex_info(nir, nir_instr_as_tex(instr), info);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@@ -165,6 +194,6 @@ ac_nir_shader_info_pass(const struct nir_shader *nir,
|
||||
gather_info_input_decl(nir, variable, info);
|
||||
|
||||
nir_foreach_block(block, func->impl) {
|
||||
gather_info_block(block, info);
|
||||
gather_info_block(nir, block, info);
|
||||
}
|
||||
}
|
||||
|
@@ -42,6 +42,7 @@ struct ac_shader_info {
|
||||
bool force_persample;
|
||||
bool needs_sample_positions;
|
||||
bool uses_input_attachments;
|
||||
bool writes_memory;
|
||||
} ps;
|
||||
struct {
|
||||
bool uses_grid_size;
|
||||
|
@@ -429,18 +429,26 @@ void radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
|
||||
static void
|
||||
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer)
|
||||
radv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer,
|
||||
enum radv_cmd_flush_bits flags)
|
||||
{
|
||||
if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
|
||||
enum radv_cmd_flush_bits flags;
|
||||
uint32_t *ptr = NULL;
|
||||
uint64_t va = 0;
|
||||
|
||||
/* Force wait for graphics/compute engines to be idle. */
|
||||
flags = RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
|
||||
assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
|
||||
RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
|
||||
|
||||
si_cs_emit_cache_flush(cmd_buffer->cs,
|
||||
if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
|
||||
va = radv_buffer_get_va(cmd_buffer->gfx9_fence_bo) +
|
||||
cmd_buffer->gfx9_fence_offset;
|
||||
ptr = &cmd_buffer->gfx9_fence_idx;
|
||||
}
|
||||
|
||||
/* Force wait for graphics or compute engines to be idle. */
|
||||
si_cs_emit_cache_flush(cmd_buffer->cs, false,
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
NULL, 0,
|
||||
ptr, va,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||
flags);
|
||||
}
|
||||
@@ -3501,7 +3509,7 @@ radv_draw(struct radv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
|
||||
assert(cmd_buffer->cs->cdw <= cdw_max);
|
||||
radv_cmd_buffer_after_draw(cmd_buffer);
|
||||
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
|
||||
}
|
||||
|
||||
void radv_CmdDraw(
|
||||
@@ -3821,7 +3829,7 @@ radv_dispatch(struct radv_cmd_buffer *cmd_buffer,
|
||||
radv_emit_dispatch_packets(cmd_buffer, info);
|
||||
}
|
||||
|
||||
radv_cmd_buffer_after_draw(cmd_buffer);
|
||||
radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
|
||||
}
|
||||
|
||||
void radv_CmdDispatch(
|
||||
|
@@ -1771,6 +1771,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
||||
|
||||
if (i == 0) {
|
||||
si_cs_emit_cache_flush(cs,
|
||||
false,
|
||||
queue->device->physical_device->rad_info.chip_class,
|
||||
NULL, 0,
|
||||
queue->queue_family_index == RING_COMPUTE &&
|
||||
@@ -1782,6 +1783,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
|
||||
RADV_CMD_FLAG_INV_GLOBAL_L2);
|
||||
} else if (i == 1) {
|
||||
si_cs_emit_cache_flush(cs,
|
||||
false,
|
||||
queue->device->physical_device->rad_info.chip_class,
|
||||
NULL, 0,
|
||||
queue->queue_family_index == RING_COMPUTE &&
|
||||
@@ -1994,32 +1996,6 @@ VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Signals fence as soon as all the work currently put on queue is done. */
|
||||
static VkResult radv_signal_fence(struct radv_queue *queue,
|
||||
struct radv_fence *fence)
|
||||
{
|
||||
int ret;
|
||||
VkResult result;
|
||||
struct radv_winsys_sem_info sem_info;
|
||||
|
||||
result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
|
||||
radv_fence_to_handle(fence));
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
|
||||
&queue->device->empty_cs[queue->queue_family_index],
|
||||
1, NULL, NULL, &sem_info,
|
||||
false, fence->fence);
|
||||
radv_free_sem_info(&sem_info);
|
||||
|
||||
/* TODO: find a better error */
|
||||
if (ret)
|
||||
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult radv_QueueSubmit(
|
||||
VkQueue _queue,
|
||||
uint32_t submitCount,
|
||||
@@ -2148,7 +2124,18 @@ VkResult radv_QueueSubmit(
|
||||
|
||||
if (fence) {
|
||||
if (!fence_emitted) {
|
||||
radv_signal_fence(queue, fence);
|
||||
struct radv_winsys_sem_info sem_info;
|
||||
|
||||
result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
|
||||
_fence);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
|
||||
&queue->device->empty_cs[queue->queue_family_index],
|
||||
1, NULL, NULL, &sem_info,
|
||||
false, base_fence);
|
||||
radv_free_sem_info(&sem_info);
|
||||
}
|
||||
fence->submitted = true;
|
||||
}
|
||||
@@ -2669,11 +2656,8 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
|
||||
|
||||
}
|
||||
|
||||
if (fence) {
|
||||
if (!fence_emitted) {
|
||||
radv_signal_fence(queue, fence);
|
||||
}
|
||||
fence->submitted = true;
|
||||
if (fence && !fence_emitted) {
|
||||
fence->signalled = true;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
@@ -81,7 +81,7 @@ EXTENSIONS = [
|
||||
Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
|
||||
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
|
||||
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
||||
Extension('VK_KHX_multiview', 1, False),
|
||||
Extension('VK_KHX_multiview', 1, True),
|
||||
Extension('VK_EXT_debug_report', 9, True),
|
||||
Extension('VK_EXT_discard_rectangles', 1, True),
|
||||
Extension('VK_EXT_external_memory_dma_buf', 1, True),
|
||||
|
@@ -1069,55 +1069,10 @@ radv_image_view_init(struct radv_image_view *iview,
|
||||
}
|
||||
|
||||
if (iview->vk_format != image->vk_format) {
|
||||
unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
|
||||
unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
|
||||
unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
|
||||
unsigned img_bh = vk_format_get_blockheight(image->vk_format);
|
||||
|
||||
iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
|
||||
iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
|
||||
|
||||
/* Comment ported from amdvlk -
|
||||
* If we have the following image:
|
||||
* Uncompressed pixels Compressed block sizes (4x4)
|
||||
* mip0: 22 x 22 6 x 6
|
||||
* mip1: 11 x 11 3 x 3
|
||||
* mip2: 5 x 5 2 x 2
|
||||
* mip3: 2 x 2 1 x 1
|
||||
* mip4: 1 x 1 1 x 1
|
||||
*
|
||||
* On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and the HW is
|
||||
* calculating the degradation of the block sizes down the mip-chain as follows (straight-up
|
||||
* divide-by-two integer math):
|
||||
* mip0: 6x6
|
||||
* mip1: 3x3
|
||||
* mip2: 1x1
|
||||
* mip3: 1x1
|
||||
*
|
||||
* This means that mip2 will be missing texels.
|
||||
*
|
||||
* Fix this by calculating the base mip's width and height, then convert that, and round it
|
||||
* back up to get the level 0 size.
|
||||
* Clamp the converted size between the original values, and next power of two, which
|
||||
* means we don't oversize the image.
|
||||
*/
|
||||
if (device->physical_device->rad_info.chip_class >= GFX9 &&
|
||||
vk_format_is_compressed(image->vk_format) &&
|
||||
!vk_format_is_compressed(iview->vk_format)) {
|
||||
unsigned rounded_img_w = util_next_power_of_two(iview->extent.width);
|
||||
unsigned rounded_img_h = util_next_power_of_two(iview->extent.height);
|
||||
unsigned lvl_width = radv_minify(image->info.width , range->baseMipLevel);
|
||||
unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
|
||||
|
||||
lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
|
||||
lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
|
||||
|
||||
lvl_width <<= range->baseMipLevel;
|
||||
lvl_height <<= range->baseMipLevel;
|
||||
|
||||
iview->extent.width = CLAMP(lvl_width, iview->extent.width, rounded_img_w);
|
||||
iview->extent.height = CLAMP(lvl_height, iview->extent.height, rounded_img_h);
|
||||
}
|
||||
iview->extent.width = round_up_u32(iview->extent.width * vk_format_get_blockwidth(iview->vk_format),
|
||||
vk_format_get_blockwidth(image->vk_format));
|
||||
iview->extent.height = round_up_u32(iview->extent.height * vk_format_get_blockheight(iview->vk_format),
|
||||
vk_format_get_blockheight(image->vk_format));
|
||||
}
|
||||
|
||||
iview->base_layer = range->baseArrayLayer;
|
||||
|
@@ -714,6 +714,9 @@ radv_pipeline_init_depth_stencil_state(struct radv_pipeline *pipeline,
|
||||
S_028800_Z_WRITE_ENABLE(vkds->depthWriteEnable ? 1 : 0) |
|
||||
S_028800_ZFUNC(vkds->depthCompareOp) |
|
||||
S_028800_DEPTH_BOUNDS_ENABLE(vkds->depthBoundsTestEnable ? 1 : 0);
|
||||
|
||||
/* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */
|
||||
ds->db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(attachment->samples > 2);
|
||||
}
|
||||
|
||||
if (has_stencil_attachment && vkds->stencilTestEnable) {
|
||||
@@ -2494,7 +2497,7 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
|
||||
|
||||
unsigned z_order;
|
||||
pipeline->graphics.db_shader_control = 0;
|
||||
if (ps->info.fs.early_fragment_test || !ps->info.fs.writes_memory)
|
||||
if (ps->info.fs.early_fragment_test || !ps->info.info.ps.writes_memory)
|
||||
z_order = V_02880C_EARLY_Z_THEN_LATE_Z;
|
||||
else
|
||||
z_order = V_02880C_LATE_Z;
|
||||
@@ -2506,8 +2509,8 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
|
||||
S_02880C_MASK_EXPORT_ENABLE(ps->info.fs.writes_sample_mask) |
|
||||
S_02880C_Z_ORDER(z_order) |
|
||||
S_02880C_DEPTH_BEFORE_SHADER(ps->info.fs.early_fragment_test) |
|
||||
S_02880C_EXEC_ON_HIER_FAIL(ps->info.fs.writes_memory) |
|
||||
S_02880C_EXEC_ON_NOOP(ps->info.fs.writes_memory);
|
||||
S_02880C_EXEC_ON_HIER_FAIL(ps->info.info.ps.writes_memory) |
|
||||
S_02880C_EXEC_ON_NOOP(ps->info.info.ps.writes_memory);
|
||||
|
||||
if (pipeline->device->physical_device->has_rbplus)
|
||||
pipeline->graphics.db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1);
|
||||
|
@@ -1021,6 +1021,7 @@ void si_emit_wait_fence(struct radeon_winsys_cs *cs,
|
||||
uint64_t va, uint32_t ref,
|
||||
uint32_t mask);
|
||||
void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
bool predicated,
|
||||
enum chip_class chip_class,
|
||||
uint32_t *fence_ptr, uint64_t va,
|
||||
bool is_mec,
|
||||
|
@@ -917,6 +917,7 @@ si_emit_acquire_mem(struct radeon_winsys_cs *cs,
|
||||
|
||||
void
|
||||
si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
bool predicated,
|
||||
enum chip_class chip_class,
|
||||
uint32_t *flush_cnt,
|
||||
uint64_t flush_va,
|
||||
@@ -947,7 +948,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
/* Necessary for DCC */
|
||||
if (chip_class >= VI) {
|
||||
si_cs_emit_write_event_eop(cs,
|
||||
false,
|
||||
predicated,
|
||||
chip_class,
|
||||
is_mec,
|
||||
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
|
||||
@@ -961,12 +962,12 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
}
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
|
||||
}
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
|
||||
}
|
||||
|
||||
@@ -979,7 +980,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
}
|
||||
|
||||
if (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
|
||||
}
|
||||
|
||||
@@ -1036,14 +1037,14 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
assert(flush_cnt);
|
||||
uint32_t old_fence = (*flush_cnt)++;
|
||||
|
||||
si_cs_emit_write_event_eop(cs, false, chip_class, false, cb_db_event, tc_flags, 1,
|
||||
si_cs_emit_write_event_eop(cs, predicated, chip_class, false, cb_db_event, tc_flags, 1,
|
||||
flush_va, old_fence, *flush_cnt);
|
||||
si_emit_wait_fence(cs, false, flush_va, *flush_cnt, 0xffffffff);
|
||||
si_emit_wait_fence(cs, predicated, flush_va, *flush_cnt, 0xffffffff);
|
||||
}
|
||||
|
||||
/* VGT state sync */
|
||||
if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, predicated));
|
||||
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
|
||||
}
|
||||
|
||||
@@ -1056,13 +1057,13 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
RADV_CMD_FLAG_INV_GLOBAL_L2 |
|
||||
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
|
||||
!is_mec) {
|
||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, predicated));
|
||||
radeon_emit(cs, 0);
|
||||
}
|
||||
|
||||
if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
|
||||
(chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
|
||||
si_emit_acquire_mem(cs, is_mec, false, chip_class >= GFX9,
|
||||
si_emit_acquire_mem(cs, is_mec, predicated, chip_class >= GFX9,
|
||||
cp_coher_cntl |
|
||||
S_0085F0_TC_ACTION_ENA(1) |
|
||||
S_0085F0_TCL1_ACTION_ENA(1) |
|
||||
@@ -1076,7 +1077,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
*
|
||||
* WB doesn't work without NC.
|
||||
*/
|
||||
si_emit_acquire_mem(cs, is_mec, false,
|
||||
si_emit_acquire_mem(cs, is_mec, predicated,
|
||||
chip_class >= GFX9,
|
||||
cp_coher_cntl |
|
||||
S_0301F0_TC_WB_ACTION_ENA(1) |
|
||||
@@ -1085,7 +1086,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
}
|
||||
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
|
||||
si_emit_acquire_mem(cs, is_mec,
|
||||
false, chip_class >= GFX9,
|
||||
predicated, chip_class >= GFX9,
|
||||
cp_coher_cntl |
|
||||
S_0085F0_TCL1_ACTION_ENA(1));
|
||||
cp_coher_cntl = 0;
|
||||
@@ -1096,7 +1097,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
|
||||
* Therefore, it should be last. Done in PFP.
|
||||
*/
|
||||
if (cp_coher_cntl)
|
||||
si_emit_acquire_mem(cs, is_mec, false, chip_class >= GFX9, cp_coher_cntl);
|
||||
si_emit_acquire_mem(cs, is_mec, predicated, chip_class >= GFX9, cp_coher_cntl);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -1126,6 +1127,7 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
|
||||
ptr = &cmd_buffer->gfx9_fence_idx;
|
||||
}
|
||||
si_cs_emit_cache_flush(cmd_buffer->cs,
|
||||
cmd_buffer->state.predicating,
|
||||
cmd_buffer->device->physical_device->rad_info.chip_class,
|
||||
ptr, va,
|
||||
radv_cmd_buffer_uses_mec(cmd_buffer),
|
||||
|
@@ -585,7 +585,6 @@ union packed_tex_data {
|
||||
unsigned component:2;
|
||||
unsigned has_texture_deref:1;
|
||||
unsigned has_sampler_deref:1;
|
||||
unsigned unused:10; /* Mark unused for valgrind. */
|
||||
} u;
|
||||
};
|
||||
|
||||
|
@@ -115,6 +115,7 @@ struct _egl_extensions
|
||||
EGLBoolean KHR_config_attribs;
|
||||
EGLBoolean KHR_context_flush_control;
|
||||
EGLBoolean KHR_create_context;
|
||||
EGLBoolean KHR_create_context_no_error;
|
||||
EGLBoolean KHR_fence_sync;
|
||||
EGLBoolean KHR_get_all_proc_addresses;
|
||||
EGLBoolean KHR_gl_colorspace;
|
||||
@@ -130,7 +131,6 @@ struct _egl_extensions
|
||||
EGLBoolean KHR_reusable_sync;
|
||||
EGLBoolean KHR_surfaceless_context;
|
||||
EGLBoolean KHR_wait_sync;
|
||||
EGLBoolean KHR_create_context_no_error;
|
||||
|
||||
EGLBoolean MESA_drm_image;
|
||||
EGLBoolean MESA_image_dma_buf_export;
|
||||
|
@@ -160,7 +160,7 @@ libegl = shared_library(
|
||||
c_args : [
|
||||
c_vis_args,
|
||||
c_args_for_egl,
|
||||
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
|
||||
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_driver_dir),
|
||||
'-D_EGL_BUILT_IN_DRIVER_DRI2',
|
||||
'-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_@0@'.format(egl_native_platform.to_upper()),
|
||||
],
|
||||
|
@@ -33,7 +33,6 @@
|
||||
|
||||
#include "state_tracker/drm_driver.h"
|
||||
#include "pipe/p_screen.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_inlines.h"
|
||||
#include "util/u_memory.h"
|
||||
|
||||
@@ -74,7 +73,7 @@ renderonly_create_kms_dumb_buffer_for_resource(struct pipe_resource *rsc,
|
||||
struct drm_mode_create_dumb create_dumb = {
|
||||
.width = rsc->width0,
|
||||
.height = rsc->height0,
|
||||
.bpp = util_format_get_blocksizebits(rsc->format),
|
||||
.bpp = 32,
|
||||
};
|
||||
struct drm_mode_destroy_dumb destroy_dumb = { };
|
||||
|
||||
|
@@ -228,7 +228,7 @@ util_probe_rect_rgba_multi(struct pipe_context *ctx, struct pipe_resource *tex,
|
||||
expected[e*4], expected[e*4+1],
|
||||
expected[e*4+2], expected[e*4+3]);
|
||||
printf("Got: %.3f, %.3f, %.3f, %.3f\n",
|
||||
probe[0], probe[1], probe[2], probe[2]);
|
||||
probe[0], probe[1], probe[2], probe[3]);
|
||||
pass = false;
|
||||
goto done;
|
||||
}
|
||||
@@ -592,6 +592,113 @@ test_sync_file_fences(struct pipe_context *ctx)
|
||||
util_report_result(pass);
|
||||
}
|
||||
|
||||
static void
|
||||
test_texture_barrier(struct pipe_context *ctx, bool use_fbfetch)
|
||||
{
|
||||
struct cso_context *cso;
|
||||
struct pipe_resource *cb;
|
||||
void *fs, *vs;
|
||||
struct pipe_sampler_view *view = NULL;
|
||||
const char *text;
|
||||
|
||||
if (!ctx->screen->get_param(ctx->screen, PIPE_CAP_TEXTURE_BARRIER)) {
|
||||
util_report_result_helper(SKIP, "%s: %s", __func__,
|
||||
use_fbfetch ? "FBFETCH" : "sampler");
|
||||
return;
|
||||
}
|
||||
if (use_fbfetch &&
|
||||
!ctx->screen->get_param(ctx->screen, PIPE_CAP_TGSI_FS_FBFETCH)) {
|
||||
util_report_result_helper(SKIP, "%s: %s", __func__,
|
||||
use_fbfetch ? "FBFETCH" : "sampler");
|
||||
return;
|
||||
}
|
||||
|
||||
cso = cso_create_context(ctx, 0);
|
||||
cb = util_create_texture2d(ctx->screen, 256, 256,
|
||||
PIPE_FORMAT_R8G8B8A8_UNORM);
|
||||
util_set_common_states_and_clear(cso, ctx, cb);
|
||||
|
||||
if (use_fbfetch) {
|
||||
/* Fragment shader. */
|
||||
text = "FRAG\n"
|
||||
"DCL OUT[0], COLOR[0]\n"
|
||||
"DCL TEMP[0]\n"
|
||||
"IMM[0] FLT32 { 0.1, 0.2, 0.3, 0.4}\n"
|
||||
|
||||
"FBFETCH TEMP[0], OUT[0]\n"
|
||||
"ADD OUT[0], TEMP[0], IMM[0]\n"
|
||||
"END\n";
|
||||
} else {
|
||||
struct pipe_sampler_view templ = {{0}};
|
||||
templ.format = cb->format;
|
||||
templ.target = cb->target;
|
||||
templ.swizzle_r = PIPE_SWIZZLE_X;
|
||||
templ.swizzle_g = PIPE_SWIZZLE_Y;
|
||||
templ.swizzle_b = PIPE_SWIZZLE_Z;
|
||||
templ.swizzle_a = PIPE_SWIZZLE_W;
|
||||
view = ctx->create_sampler_view(ctx, cb, &templ);
|
||||
ctx->set_sampler_views(ctx, PIPE_SHADER_FRAGMENT, 0, 1, &view);
|
||||
|
||||
/* Fragment shader. */
|
||||
text = "FRAG\n"
|
||||
"DCL SV[0], POSITION\n"
|
||||
"DCL SAMP[0]\n"
|
||||
"DCL SVIEW[0], 2D, FLOAT\n"
|
||||
"DCL OUT[0], COLOR[0]\n"
|
||||
"DCL TEMP[0]\n"
|
||||
"IMM[0] FLT32 { 0.1, 0.2, 0.3, 0.4}\n"
|
||||
"IMM[1] INT32 { 0, 0, 0, 0}\n"
|
||||
|
||||
"F2I TEMP[0].xy, SV[0].xyyy\n"
|
||||
"MOV TEMP[0].z, IMM[1].xxxx\n"
|
||||
"TXF TEMP[0], TEMP[0].xyzz, SAMP[0], 2D\n"
|
||||
"ADD OUT[0], TEMP[0], IMM[0]\n"
|
||||
"END\n";
|
||||
}
|
||||
|
||||
struct tgsi_token tokens[1000];
|
||||
struct pipe_shader_state state;
|
||||
|
||||
if (!tgsi_text_translate(text, tokens, ARRAY_SIZE(tokens))) {
|
||||
assert(0);
|
||||
util_report_result_helper(FAIL, "%s: %s", __func__,
|
||||
use_fbfetch ? "FBFETCH" : "sampler");
|
||||
return;
|
||||
}
|
||||
pipe_shader_state_from_tgsi(&state, tokens);
|
||||
#if 0
|
||||
tgsi_dump(state.tokens, 0);
|
||||
#endif
|
||||
|
||||
fs = ctx->create_fs_state(ctx, &state);
|
||||
cso_set_fragment_shader_handle(cso, fs);
|
||||
|
||||
/* Vertex shader. */
|
||||
vs = util_set_passthrough_vertex_shader(cso, ctx, false);
|
||||
|
||||
for (int i = 0; i < 2; i++) {
|
||||
ctx->texture_barrier(ctx,
|
||||
use_fbfetch ? PIPE_TEXTURE_BARRIER_FRAMEBUFFER :
|
||||
PIPE_TEXTURE_BARRIER_SAMPLER);
|
||||
util_draw_fullscreen_quad(cso);
|
||||
}
|
||||
|
||||
/* Probe pixels. */
|
||||
static const float expected[] = {0.3, 0.5, 0.7, 0.9};
|
||||
bool pass = util_probe_rect_rgba(ctx, cb, 0, 0,
|
||||
cb->width0, cb->height0, expected);
|
||||
|
||||
/* Cleanup. */
|
||||
cso_destroy_context(cso);
|
||||
ctx->delete_vs_state(ctx, vs);
|
||||
ctx->delete_fs_state(ctx, fs);
|
||||
pipe_sampler_view_reference(&view, NULL);
|
||||
pipe_resource_reference(&cb, NULL);
|
||||
|
||||
util_report_result_helper(pass, "%s: %s", __func__,
|
||||
use_fbfetch ? "FBFETCH" : "sampler");
|
||||
}
|
||||
|
||||
/**
|
||||
* Run all tests. This should be run with a clean context after
|
||||
* context_create.
|
||||
@@ -607,6 +714,8 @@ util_run_tests(struct pipe_screen *screen)
|
||||
null_sampler_view(ctx, TGSI_TEXTURE_BUFFER);
|
||||
util_test_constant_buffer(ctx, NULL);
|
||||
test_sync_file_fences(ctx);
|
||||
test_texture_barrier(ctx, false);
|
||||
test_texture_barrier(ctx, true);
|
||||
|
||||
ctx->destroy(ctx);
|
||||
|
||||
|
@@ -766,7 +766,7 @@ static void compute_emit_cs(struct r600_context *rctx,
|
||||
} else {
|
||||
uint32_t rat_mask;
|
||||
|
||||
rat_mask = evergreen_construct_rat_mask(rctx, &rctx->cb_misc_state, 0);
|
||||
rat_mask = ((1ULL << (((unsigned)rctx->cb_misc_state.nr_image_rats + rctx->cb_misc_state.nr_buffer_rats) * 4)) - 1);
|
||||
radeon_compute_set_context_reg(cs, R_028238_CB_TARGET_MASK,
|
||||
rat_mask);
|
||||
}
|
||||
|
@@ -1998,31 +1998,13 @@ static void evergreen_emit_polygon_offset(struct r600_context *rctx, struct r600
|
||||
pa_su_poly_offset_db_fmt_cntl);
|
||||
}
|
||||
|
||||
uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a,
|
||||
unsigned nr_cbufs)
|
||||
{
|
||||
unsigned base_mask = 0;
|
||||
unsigned dirty_mask = a->image_rat_enabled_mask;
|
||||
while (dirty_mask) {
|
||||
unsigned idx = u_bit_scan(&dirty_mask);
|
||||
base_mask |= (0xf << (idx * 4));
|
||||
}
|
||||
unsigned offset = util_last_bit(a->image_rat_enabled_mask);
|
||||
dirty_mask = a->buffer_rat_enabled_mask;
|
||||
while (dirty_mask) {
|
||||
unsigned idx = u_bit_scan(&dirty_mask);
|
||||
base_mask |= (0xf << (idx + offset) * 4);
|
||||
}
|
||||
return base_mask << (nr_cbufs * 4);
|
||||
}
|
||||
|
||||
static void evergreen_emit_cb_misc_state(struct r600_context *rctx, struct r600_atom *atom)
|
||||
{
|
||||
struct radeon_winsys_cs *cs = rctx->b.gfx.cs;
|
||||
struct r600_cb_misc_state *a = (struct r600_cb_misc_state*)atom;
|
||||
unsigned fb_colormask = (1ULL << ((unsigned)a->nr_cbufs * 4)) - 1;
|
||||
unsigned ps_colormask = (1ULL << ((unsigned)a->nr_ps_color_outputs * 4)) - 1;
|
||||
unsigned rat_colormask = evergreen_construct_rat_mask(rctx, a, a->nr_cbufs);
|
||||
unsigned rat_colormask = ((1ULL << ((unsigned)(a->nr_image_rats + a->nr_buffer_rats) * 4)) - 1) << (a->nr_cbufs * 4);
|
||||
radeon_set_context_reg_seq(cs, R_028238_CB_TARGET_MASK, 2);
|
||||
radeon_emit(cs, (a->blend_colormask & fb_colormask) | rat_colormask); /* R_028238_CB_TARGET_MASK */
|
||||
/* This must match the used export instructions exactly.
|
||||
@@ -4050,9 +4032,8 @@ static void evergreen_set_shader_buffers(struct pipe_context *ctx,
|
||||
if (old_mask != istate->enabled_mask)
|
||||
r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
|
||||
|
||||
/* construct the target mask */
|
||||
if (rctx->cb_misc_state.buffer_rat_enabled_mask != istate->enabled_mask) {
|
||||
rctx->cb_misc_state.buffer_rat_enabled_mask = istate->enabled_mask;
|
||||
if (rctx->cb_misc_state.nr_buffer_rats != util_bitcount(istate->enabled_mask)) {
|
||||
rctx->cb_misc_state.nr_buffer_rats = util_bitcount(istate->enabled_mask);
|
||||
r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
|
||||
}
|
||||
|
||||
@@ -4227,8 +4208,8 @@ static void evergreen_set_shader_images(struct pipe_context *ctx,
|
||||
if (old_mask != istate->enabled_mask)
|
||||
r600_mark_atom_dirty(rctx, &rctx->framebuffer.atom);
|
||||
|
||||
if (rctx->cb_misc_state.image_rat_enabled_mask != istate->enabled_mask) {
|
||||
rctx->cb_misc_state.image_rat_enabled_mask = istate->enabled_mask;
|
||||
if (rctx->cb_misc_state.nr_image_rats != util_bitcount(istate->enabled_mask)) {
|
||||
rctx->cb_misc_state.nr_image_rats = util_bitcount(istate->enabled_mask);
|
||||
r600_mark_atom_dirty(rctx, &rctx->cb_misc_state.atom);
|
||||
}
|
||||
|
||||
|
@@ -152,8 +152,8 @@ struct r600_cb_misc_state {
|
||||
unsigned blend_colormask; /* 8*4 bits for 8 RGBA colorbuffers */
|
||||
unsigned nr_cbufs;
|
||||
unsigned nr_ps_color_outputs;
|
||||
unsigned image_rat_enabled_mask;
|
||||
unsigned buffer_rat_enabled_mask;
|
||||
unsigned nr_image_rats;
|
||||
unsigned nr_buffer_rats;
|
||||
bool multiwrite;
|
||||
bool dual_src_blend;
|
||||
};
|
||||
@@ -700,9 +700,6 @@ void evergreen_init_color_surface_rat(struct r600_context *rctx,
|
||||
struct r600_surface *surf);
|
||||
void evergreen_update_db_shader_control(struct r600_context * rctx);
|
||||
bool evergreen_adjust_gprs(struct r600_context *rctx);
|
||||
|
||||
uint32_t evergreen_construct_rat_mask(struct r600_context *rctx, struct r600_cb_misc_state *a,
|
||||
unsigned nr_cbufs);
|
||||
/* r600_blit.c */
|
||||
void r600_init_blit_functions(struct r600_context *rctx);
|
||||
void r600_decompress_depth_textures(struct r600_context *rctx,
|
||||
|
@@ -665,7 +665,6 @@ public:
|
||||
return false;
|
||||
|
||||
switch (hw_chip) {
|
||||
case HW_CHIP_HEMLOCK:
|
||||
case HW_CHIP_CYPRESS:
|
||||
case HW_CHIP_JUNIPER:
|
||||
return false;
|
||||
|
@@ -208,25 +208,8 @@ void bc_finalizer::finalize_if(region_node* r) {
|
||||
r->push_front(if_jump);
|
||||
r->push_back(if_pop);
|
||||
|
||||
/* the depart/repeat 1 is actually part of the "else" code.
|
||||
* if it's a depart for an outer loop region it will want to
|
||||
* insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need
|
||||
* to emit the else clause.
|
||||
*/
|
||||
bool has_else = n_if->next;
|
||||
|
||||
if (repdep1->is_depart()) {
|
||||
depart_node *dep1 = static_cast<depart_node*>(repdep1);
|
||||
if (dep1->target != r && dep1->target->is_loop())
|
||||
has_else = true;
|
||||
}
|
||||
|
||||
if (repdep1->is_repeat()) {
|
||||
repeat_node *rep1 = static_cast<repeat_node*>(repdep1);
|
||||
if (rep1->target != r && rep1->target->is_loop())
|
||||
has_else = true;
|
||||
}
|
||||
|
||||
if (has_else) {
|
||||
cf_node *nelse = sh.create_cf(CF_OP_ELSE);
|
||||
n_if->insert_after(nelse);
|
||||
|
@@ -610,7 +610,7 @@ void si_llvm_load_input_vs(
|
||||
|
||||
input[i] = ac_build_buffer_load_format(&ctx->ac, t_list,
|
||||
vertex_index, voffset,
|
||||
true);
|
||||
4, true);
|
||||
}
|
||||
|
||||
/* Break up the vec4 into individual components */
|
||||
|
@@ -1826,7 +1826,7 @@ static void build_tex_intrinsic(const struct lp_build_tgsi_action *action,
|
||||
emit_data->args[0],
|
||||
emit_data->args[2],
|
||||
emit_data->args[1],
|
||||
true);
|
||||
4, true);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@@ -448,10 +448,10 @@ typedef enum {
|
||||
SVGADX_DXFMT_MULTISAMPLE_8 )
|
||||
|
||||
typedef union {
|
||||
Bool b;
|
||||
SVGA3dBool b;
|
||||
uint32 u;
|
||||
int32 i;
|
||||
float f;
|
||||
int32 i;
|
||||
float f;
|
||||
} SVGA3dDevCapResult;
|
||||
|
||||
#endif /* _SVGA3D_DEVCAPS_H_ */
|
||||
|
@@ -134,7 +134,7 @@ svga_context_create(struct pipe_screen *screen, void *priv, unsigned flags)
|
||||
|
||||
svga = CALLOC_STRUCT(svga_context);
|
||||
if (!svga)
|
||||
goto cleanup;
|
||||
goto done;
|
||||
|
||||
LIST_INITHEAD(&svga->dirty_buffers);
|
||||
|
||||
|
@@ -140,6 +140,8 @@ JITTER_CXX_SOURCES := \
|
||||
rasterizer/jitter/builder.cpp \
|
||||
rasterizer/jitter/builder.h \
|
||||
rasterizer/jitter/builder_math.h \
|
||||
rasterizer/jitter/builder_mem.cpp \
|
||||
rasterizer/jitter/builder_mem.h \
|
||||
rasterizer/jitter/builder_misc.cpp \
|
||||
rasterizer/jitter/builder_misc.h \
|
||||
rasterizer/jitter/fetch_jit.cpp \
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright © 2017-2018 Intel Corporation
|
||||
# Copyright © 2017 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -68,6 +68,8 @@ files_swr_mesa = files(
|
||||
'rasterizer/jitter/builder.cpp',
|
||||
'rasterizer/jitter/builder.h',
|
||||
'rasterizer/jitter/builder_math.h',
|
||||
'rasterizer/jitter/builder_mem.cpp',
|
||||
'rasterizer/jitter/builder_mem.h',
|
||||
'rasterizer/jitter/builder_misc.cpp',
|
||||
'rasterizer/jitter/builder_misc.h',
|
||||
'rasterizer/jitter/fetch_jit.cpp',
|
||||
@@ -149,22 +151,7 @@ files_swr_arch = files(
|
||||
swr_context_files = files('swr_context.h')
|
||||
swr_state_files = files('rasterizer/core/state.h')
|
||||
swr_event_proto_files = files('rasterizer/archrast/events.proto')
|
||||
swr_gen_backend_files = files('rasterizer/codegen/templates/gen_backend.cpp')
|
||||
swr_gen_rasterizer_files = files('rasterizer/codegen/templates/gen_rasterizer.cpp')
|
||||
swr_gen_header_init_files = files('rasterizer/codegen/templates/gen_header_init.hpp')
|
||||
|
||||
swr_gen_llvm_ir_macros_py = files('rasterizer/codegen/gen_llvm_ir_macros.py')
|
||||
swr_gen_backends_py = files('rasterizer/codegen/gen_backends.py')
|
||||
|
||||
swr_gen_builder_depends = files(
|
||||
'rasterizer/codegen/templates/gen_builder.hpp',
|
||||
'rasterizer/codegen/gen_common.py'
|
||||
)
|
||||
|
||||
|
||||
subdir('rasterizer/jitter')
|
||||
subdir('rasterizer/codegen')
|
||||
subdir('rasterizer/core/backends')
|
||||
|
||||
swr_incs = include_directories(
|
||||
'rasterizer/codegen', 'rasterizer/core', 'rasterizer/jitter',
|
||||
@@ -193,7 +180,7 @@ if with_swr_arches.contains('avx')
|
||||
swr_arch_defines += '-DHAVE_SWR_AVX'
|
||||
swr_arch_libs += shared_library(
|
||||
'swrAVX',
|
||||
[files_swr_common, files_swr_arch],
|
||||
files_swr_common,
|
||||
cpp_args : [swr_cpp_args, swr_avx_args, '-DKNOB_ARCH=KNOB_ARCH_AVX'],
|
||||
link_args : [ld_args_gc_sections],
|
||||
include_directories : [swr_incs],
|
||||
@@ -225,7 +212,7 @@ if with_swr_arches.contains('avx2')
|
||||
swr_arch_defines += '-DHAVE_SWR_AVX2'
|
||||
swr_arch_libs += shared_library(
|
||||
'swrAVX2',
|
||||
[files_swr_common, files_swr_arch],
|
||||
files_swr_common,
|
||||
cpp_args : [swr_cpp_args, swr_avx2_args, '-DKNOB_ARCH=KNOB_ARCH_AVX2'],
|
||||
link_args : [ld_args_gc_sections],
|
||||
include_directories : [swr_incs],
|
||||
@@ -249,7 +236,7 @@ if with_swr_arches.contains('knl')
|
||||
swr_arch_defines += '-DHAVE_SWR_KNL'
|
||||
swr_arch_libs += shared_library(
|
||||
'swrKNL',
|
||||
[files_swr_common, files_swr_arch],
|
||||
files_swr_common,
|
||||
cpp_args : [
|
||||
swr_cpp_args, swr_knl_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512',
|
||||
'-DKNOB_ARCH_KNIGHTS',
|
||||
@@ -276,7 +263,7 @@ if with_swr_arches.contains('skx')
|
||||
swr_arch_defines += '-DHAVE_SWR_SKX'
|
||||
swr_arch_libs += shared_library(
|
||||
'swrSKX',
|
||||
[files_swr_common, files_swr_arch],
|
||||
files_swr_common,
|
||||
cpp_args : [swr_cpp_args, swr_skx_args, '-DKNOB_ARCH=KNOB_ARCH_AVX512'],
|
||||
link_args : [ld_args_gc_sections],
|
||||
include_directories : [swr_incs],
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright (C) 2014-2016 Intel Corporation. All Rights Reserved.
|
||||
# Copyright (C) 2014-2018 Intel Corporation. All Rights Reserved.
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
|
@@ -1,4 +1,4 @@
|
||||
# Copyright © 2017-2018 Intel Corporation
|
||||
# Copyright © 2017 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -40,6 +40,40 @@ gen_knobs_h = custom_target(
|
||||
),
|
||||
)
|
||||
|
||||
gen_builder_hpp = custom_target(
|
||||
'gen_builder.hpp',
|
||||
input : [
|
||||
'gen_llvm_ir_macros.py',
|
||||
join_paths(
|
||||
dep_llvm.get_configtool_variable('includedir'), 'llvm', 'IR',
|
||||
'IRBuilder.h'
|
||||
)
|
||||
],
|
||||
output : 'gen_builder.hpp',
|
||||
command : [
|
||||
prog_python2, '@INPUT0@', '--input', '@INPUT1@', '--output', '@OUTPUT@',
|
||||
'--gen_h', '--output-dir', meson.current_build_dir()
|
||||
],
|
||||
depend_files : files(
|
||||
'templates/gen_builder.hpp',
|
||||
'gen_common.py',
|
||||
),
|
||||
build_by_default : true,
|
||||
)
|
||||
|
||||
gen_builder_x86_hpp = custom_target(
|
||||
'gen_builder_x86.hpp',
|
||||
input : 'gen_llvm_ir_macros.py',
|
||||
output : 'gen_builder_x86.hpp',
|
||||
command : [
|
||||
prog_python2, '@INPUT0@', '--gen_x86_h', '--output', '@OUTPUT@',
|
||||
'--output-dir', meson.current_build_dir()
|
||||
],
|
||||
depend_files : files(
|
||||
'templates/gen_builder.hpp',
|
||||
'gen_common.py',
|
||||
),
|
||||
)
|
||||
|
||||
# The generators above this are needed individually, while the below generators
|
||||
# are all inputs to the same lib, so they don't need unique names.
|
||||
@@ -80,3 +114,45 @@ foreach x : [['gen_ar_event.hpp', '--gen_event_hpp'],
|
||||
)
|
||||
endforeach
|
||||
|
||||
files_swr_common += custom_target(
|
||||
'gen_backend_pixel',
|
||||
input : 'gen_backends.py',
|
||||
output : [
|
||||
'gen_BackendPixelRate0.cpp', 'gen_BackendPixelRate1.cpp',
|
||||
'gen_BackendPixelRate2.cpp', 'gen_BackendPixelRate3.cpp',
|
||||
'gen_BackendPixelRate.hpp',
|
||||
],
|
||||
command : [
|
||||
prog_python2, '@INPUT@',
|
||||
'--outdir', meson.current_build_dir(),
|
||||
'--dim', '5', '2', '3', '2', '2', '2',
|
||||
'--numfiles', '4',
|
||||
'--cpp', '--hpp',
|
||||
],
|
||||
depend_files : files(
|
||||
'templates/gen_backend.cpp',
|
||||
'templates/gen_header_init.hpp',
|
||||
),
|
||||
)
|
||||
|
||||
files_swr_common += custom_target(
|
||||
'gen_backend_raster',
|
||||
input : 'gen_backends.py',
|
||||
output : [
|
||||
'gen_rasterizer0.cpp', 'gen_rasterizer1.cpp',
|
||||
'gen_rasterizer2.cpp', 'gen_rasterizer3.cpp',
|
||||
'gen_rasterizer.hpp',
|
||||
],
|
||||
command : [
|
||||
prog_python2, '@INPUT@',
|
||||
'--outdir', meson.current_build_dir(),
|
||||
'--rast',
|
||||
'--dim', '5', '2', '2', '3', '5', '2',
|
||||
'--numfiles', '4',
|
||||
'--cpp', '--hpp',
|
||||
],
|
||||
depend_files : files(
|
||||
'templates/gen_rasterizer.cpp',
|
||||
'templates/gen_header_init.hpp',
|
||||
),
|
||||
)
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/******************************************************************************
|
||||
* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@@ -1,5 +1,5 @@
|
||||
/******************************************************************************
|
||||
* Copyright (C) 2015-2017 Intel Corporation. All Rights Reserved.
|
||||
* Copyright (C) 2015-2018 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
|
@@ -23,6 +23,7 @@
|
||||
|
||||
#include "common/os.h"
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <sstream>
|
||||
|
||||
#if defined(_WIN32)
|
||||
@@ -151,3 +152,160 @@ void SWR_API CreateDirectoryPath(const std::string& path)
|
||||
}
|
||||
#endif // Unix
|
||||
}
|
||||
|
||||
/// Execute Command (block until finished)
|
||||
/// @returns process exit value
|
||||
int SWR_API ExecCmd(
|
||||
const std::string& cmd, ///< (In) Command line string
|
||||
const char* pOptEnvStrings, ///< (Optional In) Environment block for new process
|
||||
std::string* pOptStdOut, ///< (Optional Out) Standard Output text
|
||||
std::string* pOptStdErr, ///< (Optional Out) Standard Error text
|
||||
const std::string* pOptStdIn) ///< (Optional In) Standard Input text
|
||||
{
|
||||
int rvalue = -1;
|
||||
|
||||
#if defined(_WIN32)
|
||||
struct WinPipe
|
||||
{
|
||||
HANDLE hRead;
|
||||
HANDLE hWrite;
|
||||
};
|
||||
std::array<WinPipe, 3> hPipes = {};
|
||||
|
||||
SECURITY_ATTRIBUTES saAttr = { sizeof(SECURITY_ATTRIBUTES) };
|
||||
saAttr.bInheritHandle = TRUE; //Pipe handles are inherited by child process.
|
||||
saAttr.lpSecurityDescriptor = NULL;
|
||||
|
||||
{
|
||||
bool bFail = false;
|
||||
for (WinPipe& p : hPipes)
|
||||
{
|
||||
if (!CreatePipe(&p.hRead, &p.hWrite, &saAttr, 0))
|
||||
{
|
||||
bFail = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (bFail)
|
||||
{
|
||||
for (WinPipe& p : hPipes)
|
||||
{
|
||||
CloseHandle(p.hRead);
|
||||
CloseHandle(p.hWrite);
|
||||
}
|
||||
return rvalue;
|
||||
}
|
||||
}
|
||||
|
||||
STARTUPINFOA StartupInfo{};
|
||||
StartupInfo.cb = sizeof(STARTUPINFOA);
|
||||
StartupInfo.dwFlags = STARTF_USESTDHANDLES;
|
||||
StartupInfo.dwFlags |= STARTF_USESHOWWINDOW;
|
||||
StartupInfo.wShowWindow = SW_HIDE;
|
||||
if (pOptStdIn)
|
||||
{
|
||||
StartupInfo.hStdInput = hPipes[0].hRead;
|
||||
}
|
||||
StartupInfo.hStdOutput = hPipes[1].hWrite;
|
||||
StartupInfo.hStdError = hPipes[2].hWrite;
|
||||
PROCESS_INFORMATION procInfo{};
|
||||
|
||||
// CreateProcess can modify the string
|
||||
std::string local_cmd = cmd;
|
||||
|
||||
BOOL ProcessValue = CreateProcessA(
|
||||
NULL,
|
||||
(LPSTR)local_cmd.c_str(),
|
||||
NULL,
|
||||
NULL,
|
||||
TRUE,
|
||||
0,
|
||||
(LPVOID)pOptEnvStrings,
|
||||
NULL,
|
||||
&StartupInfo,
|
||||
&procInfo);
|
||||
|
||||
if (ProcessValue && procInfo.hProcess)
|
||||
{
|
||||
auto ReadFromPipe = [](HANDLE hPipe, std::string* pOutStr)
|
||||
{
|
||||
char buf[1024];
|
||||
DWORD dwRead = 0;
|
||||
DWORD dwAvail = 0;
|
||||
while (true)
|
||||
{
|
||||
if (!::PeekNamedPipe(hPipe, NULL, 0, NULL, &dwAvail, NULL))
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (!dwAvail) // no data available, return
|
||||
{
|
||||
break;
|
||||
}
|
||||
|
||||
if (!::ReadFile(hPipe, buf, std::min<size_t>(sizeof(buf) - 1, size_t(dwAvail)), &dwRead, NULL) || !dwRead)
|
||||
{
|
||||
// error, the child process might ended
|
||||
break;
|
||||
}
|
||||
|
||||
buf[dwRead] = 0;
|
||||
if (pOutStr)
|
||||
{
|
||||
(*pOutStr) += buf;
|
||||
}
|
||||
}
|
||||
};
|
||||
bool bProcessEnded = false;
|
||||
size_t bytesWritten = 0;
|
||||
do
|
||||
{
|
||||
if (pOptStdIn && (pOptStdIn->size() > bytesWritten))
|
||||
{
|
||||
DWORD bytesToWrite = static_cast<DWORD>(pOptStdIn->size()) - bytesWritten;
|
||||
if (!::WriteFile(
|
||||
hPipes[0].hWrite,
|
||||
pOptStdIn->data() + bytesWritten,
|
||||
bytesToWrite, &bytesToWrite, nullptr))
|
||||
{
|
||||
// Failed to write to pipe
|
||||
break;
|
||||
}
|
||||
bytesWritten += bytesToWrite;
|
||||
}
|
||||
|
||||
// Give some timeslice (50ms), so we won't waste 100% cpu.
|
||||
bProcessEnded = (WaitForSingleObject(procInfo.hProcess, 50) == WAIT_OBJECT_0);
|
||||
|
||||
ReadFromPipe(hPipes[1].hRead, pOptStdOut);
|
||||
ReadFromPipe(hPipes[2].hRead, pOptStdErr);
|
||||
}
|
||||
while (!bProcessEnded);
|
||||
|
||||
DWORD exitVal = 0;
|
||||
if (!GetExitCodeProcess(procInfo.hProcess, &exitVal))
|
||||
{
|
||||
exitVal = 1;
|
||||
}
|
||||
|
||||
CloseHandle(procInfo.hProcess);
|
||||
CloseHandle(procInfo.hThread);
|
||||
|
||||
rvalue = exitVal;
|
||||
}
|
||||
|
||||
for (WinPipe& p : hPipes)
|
||||
{
|
||||
CloseHandle(p.hRead);
|
||||
CloseHandle(p.hWrite);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Non-Windows implementation
|
||||
|
||||
#endif
|
||||
|
||||
return rvalue;
|
||||
}
|
||||
|
@@ -280,4 +280,13 @@ typedef MEGABYTE GIGABYTE[1024];
|
||||
void SWR_API SetCurrentThreadName(const char* pThreadName);
|
||||
void SWR_API CreateDirectoryPath(const std::string& path);
|
||||
|
||||
/// Execute Command (block until finished)
|
||||
/// @returns process exit value
|
||||
int SWR_API ExecCmd(
|
||||
const std::string& cmd, ///< (In) Command line string
|
||||
const char* pOptEnvStrings = nullptr, ///< (Optional In) Environment block for new process
|
||||
std::string* pOptStdOut = nullptr, ///< (Optional Out) Standard Output text
|
||||
std::string* pOptStdErr = nullptr, ///< (Optional Out) Standard Error text
|
||||
const std::string* pOptStdIn = nullptr); ///< (Optional In) Standard Input text
|
||||
|
||||
#endif//__SWR_OS_H__
|
||||
|
@@ -1,57 +0,0 @@
|
||||
# Copyright © 2017-2018 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
files_swr_common += custom_target(
|
||||
'gen_backend_pixel',
|
||||
input : swr_gen_backends_py,
|
||||
output : [
|
||||
'gen_BackendPixelRate0.cpp', 'gen_BackendPixelRate1.cpp',
|
||||
'gen_BackendPixelRate2.cpp', 'gen_BackendPixelRate3.cpp',
|
||||
'gen_BackendPixelRate.hpp',
|
||||
],
|
||||
command : [
|
||||
prog_python2, '@INPUT@',
|
||||
'--outdir', '@OUTDIR@',
|
||||
'--dim', '5', '2', '3', '2', '2', '2',
|
||||
'--numfiles', '4',
|
||||
'--cpp', '--hpp',
|
||||
],
|
||||
depend_files : [ swr_gen_backend_files, swr_gen_header_init_files ],
|
||||
)
|
||||
|
||||
files_swr_common += custom_target(
|
||||
'gen_backend_raster',
|
||||
input : swr_gen_backends_py,
|
||||
output : [
|
||||
'gen_rasterizer0.cpp', 'gen_rasterizer1.cpp',
|
||||
'gen_rasterizer2.cpp', 'gen_rasterizer3.cpp',
|
||||
'gen_rasterizer.hpp',
|
||||
],
|
||||
command : [
|
||||
prog_python2, '@INPUT@',
|
||||
'--outdir', '@OUTDIR@',
|
||||
'--rast',
|
||||
'--dim', '5', '2', '2', '3', '5', '2',
|
||||
'--numfiles', '4',
|
||||
'--cpp', '--hpp',
|
||||
],
|
||||
depend_files : [ swr_gen_rasterizer_files, swr_gen_header_init_files ],
|
||||
)
|
@@ -1032,31 +1032,31 @@ static void GeometryShaderStage(
|
||||
simdscalari vPrimId = _simd_set1_epi32(pPrimitiveId[inputPrim]);
|
||||
|
||||
// Gather data from the SVG if provided.
|
||||
simdscalari vViewportIdx = SIMD16::setzero_si();
|
||||
simdscalari vRtIdx = SIMD16::setzero_si();
|
||||
SIMD8::Vec4 svgAttrib[4];
|
||||
simdscalari vViewportIdx = SIMD::setzero_si();
|
||||
simdscalari vRtIdx = SIMD::setzero_si();
|
||||
SIMD::Vec4 svgAttrib[4];
|
||||
|
||||
if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
|
||||
{
|
||||
tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
|
||||
gsPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
|
||||
}
|
||||
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
|
||||
tessPa.viewportArrayActive = true;
|
||||
vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
|
||||
simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
|
||||
gsPa.viewportArrayActive = true;
|
||||
}
|
||||
if (state.backendState.readRenderTargetArrayIndex)
|
||||
{
|
||||
vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
|
||||
tessPa.rtArrayActive = true;
|
||||
vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
|
||||
gsPa.rtArrayActive = true;
|
||||
}
|
||||
|
||||
pfnClipFunc(pDC, gsPa, workerId, attrib, GenMask(gsPa.NumPrims()), vPrimId, vViewportIdx, vRtIdx);
|
||||
@@ -1437,9 +1437,9 @@ static void TessellationStages(
|
||||
}
|
||||
#else
|
||||
// Gather data from the SVG if provided.
|
||||
simdscalari vViewportIdx = SIMD16::setzero_si();
|
||||
simdscalari vRtIdx = SIMD16::setzero_si();
|
||||
SIMD8::Vec4 svgAttrib[4];
|
||||
simdscalari vViewportIdx = SIMD::setzero_si();
|
||||
simdscalari vRtIdx = SIMD::setzero_si();
|
||||
SIMD::Vec4 svgAttrib[4];
|
||||
|
||||
if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
|
||||
{
|
||||
@@ -1448,18 +1448,18 @@ static void TessellationStages(
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
|
||||
vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
|
||||
simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
|
||||
tessPa.viewportArrayActive = true;
|
||||
}
|
||||
if (state.backendState.readRenderTargetArrayIndex)
|
||||
{
|
||||
vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
|
||||
vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
|
||||
tessPa.rtArrayActive = true;
|
||||
}
|
||||
pfnClipFunc(pDC, tessPa, workerId, prim,
|
||||
@@ -2053,30 +2053,30 @@ void ProcessDraw(
|
||||
SWR_ASSERT(pDC->pState->pfnProcessPrims);
|
||||
|
||||
// Gather data from the SVG if provided.
|
||||
simdscalari vViewportIdx = SIMD16::setzero_si();
|
||||
simdscalari vRtIdx = SIMD16::setzero_si();
|
||||
SIMD8::Vec4 svgAttrib[4];
|
||||
simdscalari vViewportIdx = SIMD::setzero_si();
|
||||
simdscalari vRtIdx = SIMD::setzero_si();
|
||||
SIMD::Vec4 svgAttrib[4];
|
||||
|
||||
if (state.backendState.readViewportArrayIndex || state.backendState.readRenderTargetArrayIndex)
|
||||
{
|
||||
tessPa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
|
||||
pa.Assemble(VERTEX_SGV_SLOT, svgAttrib);
|
||||
}
|
||||
|
||||
if (state.backendState.readViewportArrayIndex)
|
||||
{
|
||||
vViewportIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
vViewportIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_VAI_COMP]);
|
||||
|
||||
// OOB VPAI indices => forced to zero.
|
||||
vViewportIdx = SIMD8::max_epi32(vViewportIdx, SIMD8::setzero_si());
|
||||
simd16scalari vNumViewports = SIMD8::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simd16scalari vClearMask = SIMD8::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD8::and_si(vClearMask, vViewportIdx);
|
||||
tessPa.viewportArrayActive = true;
|
||||
vViewportIdx = SIMD::max_epi32(vViewportIdx, SIMD::setzero_si());
|
||||
simdscalari vNumViewports = SIMD::set1_epi32(KNOB_NUM_VIEWPORTS_SCISSORS);
|
||||
simdscalari vClearMask = SIMD::cmplt_epi32(vViewportIdx, vNumViewports);
|
||||
vViewportIdx = SIMD::and_si(vClearMask, vViewportIdx);
|
||||
pa.viewportArrayActive = true;
|
||||
}
|
||||
if (state.backendState.readRenderTargetArrayIndex)
|
||||
{
|
||||
vRtIdx = SIMD8::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
|
||||
tessPa.rtArrayActive = true;
|
||||
vRtIdx = SIMD::castps_si(svgAttrib[0][VERTEX_SGV_RTAI_COMP]);
|
||||
pa.rtArrayActive = true;
|
||||
}
|
||||
|
||||
pDC->pState->pfnProcessPrims(pDC, pa, workerId, prim,
|
||||
|
@@ -421,8 +421,7 @@ void JitManager::DumpToFile(Function *f, const char *fileName)
|
||||
sprintf(fName, "%s.%s.ll", funcName, fileName);
|
||||
#endif
|
||||
raw_fd_ostream fd(fName, EC, llvm::sys::fs::F_None);
|
||||
Module* pModule = f->getParent();
|
||||
pModule->print(fd, nullptr);
|
||||
f->print(fd, nullptr);
|
||||
|
||||
#if defined(_WIN32)
|
||||
sprintf(fName, "%s\\cfg.%s.%s.dot", outDir.c_str(), funcName, fileName);
|
||||
@@ -599,44 +598,12 @@ JitCache::JitCache()
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_WIN32)
|
||||
int ExecUnhookedProcess(const char* pCmdLine)
|
||||
int ExecUnhookedProcess(const std::string& CmdLine, std::string* pStdOut, std::string* pStdErr)
|
||||
{
|
||||
static const char *g_pEnv = "RASTY_DISABLE_HOOK=1\0";
|
||||
|
||||
STARTUPINFOA StartupInfo{};
|
||||
StartupInfo.cb = sizeof(STARTUPINFOA);
|
||||
PROCESS_INFORMATION procInfo{};
|
||||
|
||||
BOOL ProcessValue = CreateProcessA(
|
||||
NULL,
|
||||
(LPSTR)pCmdLine,
|
||||
NULL,
|
||||
NULL,
|
||||
TRUE,
|
||||
0,
|
||||
(LPVOID)g_pEnv,
|
||||
NULL,
|
||||
&StartupInfo,
|
||||
&procInfo);
|
||||
|
||||
if (ProcessValue && procInfo.hProcess)
|
||||
{
|
||||
WaitForSingleObject(procInfo.hProcess, INFINITE);
|
||||
DWORD exitVal = 0;
|
||||
if (!GetExitCodeProcess(procInfo.hProcess, &exitVal))
|
||||
{
|
||||
exitVal = 1;
|
||||
}
|
||||
|
||||
CloseHandle(procInfo.hProcess);
|
||||
|
||||
return exitVal;
|
||||
}
|
||||
|
||||
return -1;
|
||||
return ExecCmd(CmdLine, g_pEnv, pStdOut, pStdErr);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_WIN64) && defined(ENABLE_JIT_DEBUG) && defined(JIT_BASE_DIR)
|
||||
EXTERN_C IMAGE_DOS_HEADER __ImageBase;
|
||||
|
@@ -38,6 +38,8 @@ namespace SwrJit
|
||||
struct Builder
|
||||
{
|
||||
Builder(JitManager *pJitMgr);
|
||||
virtual ~Builder() {}
|
||||
|
||||
IRBuilder<> *IRB() { return mpIRBuilder; };
|
||||
JitManager *JM() { return mpJitMgr; }
|
||||
|
||||
@@ -92,5 +94,6 @@ namespace SwrJit
|
||||
#include "gen_builder_x86.hpp"
|
||||
#include "builder_misc.h"
|
||||
#include "builder_math.h"
|
||||
#include "builder_mem.h"
|
||||
};
|
||||
}
|
||||
|
816
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
Normal file
816
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.cpp
Normal file
@@ -0,0 +1,816 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file builder_misc.cpp
|
||||
*
|
||||
* @brief Implementation for miscellaneous builder functions
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#include "jit_pch.hpp"
|
||||
#include "builder.h"
|
||||
#include "common/rdtsc_buckets.h"
|
||||
|
||||
#include <cstdarg>
|
||||
|
||||
|
||||
namespace SwrJit
|
||||
{
|
||||
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(i);
|
||||
return GEPA(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(C(i));
|
||||
return GEPA(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(i);
|
||||
return IN_BOUNDS_GEP(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(C(i));
|
||||
return IN_BOUNDS_GEP(ptr, indices);
|
||||
}
|
||||
|
||||
LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(C(i));
|
||||
return LOAD(GEPA(basePtr, valIndices), name);
|
||||
}
|
||||
|
||||
LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list<Value*> &indices, const llvm::Twine& name)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(i);
|
||||
return LOAD(GEPA(basePtr, valIndices), name);
|
||||
}
|
||||
|
||||
StoreInst *Builder::STORE(Value *val, Value *basePtr, const std::initializer_list<uint32_t> &indices)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(C(i));
|
||||
return STORE(val, GEPA(basePtr, valIndices));
|
||||
}
|
||||
|
||||
StoreInst *Builder::STOREV(Value *val, Value *basePtr, const std::initializer_list<Value*> &indices)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(i);
|
||||
return STORE(val, GEPA(basePtr, valIndices));
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate an i32 masked load operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with float masked load
|
||||
/// @param src - base address pointer for the load
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory load 0
|
||||
Value *Builder::MASKLOADD(Value* src, Value* mask)
|
||||
{
|
||||
Value* vResult;
|
||||
// use avx2 gather instruction is available
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_maskload_d_256);
|
||||
vResult = CALL(func, { src,mask });
|
||||
}
|
||||
else
|
||||
{
|
||||
// maskload intrinsic expects integer mask operand in llvm >= 3.8
|
||||
#if (LLVM_VERSION_MAJOR > 3) || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8)
|
||||
mask = BITCAST(mask, VectorType::get(mInt32Ty, mVWidth));
|
||||
#else
|
||||
mask = BITCAST(mask, VectorType::get(mFP32Ty, mVWidth));
|
||||
#endif
|
||||
Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx_maskload_ps_256);
|
||||
vResult = BITCAST(CALL(func, { src,mask }), VectorType::get(mInt32Ty, mVWidth));
|
||||
}
|
||||
return vResult;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a masked gather operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with loads
|
||||
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
|
||||
/// @param pBase - Int8* base VB address pointer value
|
||||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale, Value *pDrawContext)
|
||||
{
|
||||
Value *vGather;
|
||||
|
||||
// use avx2 gather instruction if available
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
// force mask to <N x float>, required by vgather
|
||||
Value *mask = BITCAST(VMASK(vMask), mSimdFP32Ty);
|
||||
|
||||
vGather = VGATHERPS(vSrc, pBase, vIndices, mask, C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value* pStack = STACKSAVE();
|
||||
|
||||
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||
STORE(vSrc, vSrcPtr);
|
||||
|
||||
vGather = VUNDEF_F();
|
||||
Value *vScaleVec = VIMMED1((uint32_t)scale);
|
||||
Value *vOffsets = MUL(vIndices, vScaleVec);
|
||||
for (uint32_t i = 0; i < mVWidth; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
Value *offset = VEXTRACT(vOffsets, C(i));
|
||||
// byte pointer to component
|
||||
Value *loadAddress = GEP(pBase, offset);
|
||||
loadAddress = BITCAST(loadAddress, PointerType::get(mFP32Ty, 0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
|
||||
Value *selMask = VEXTRACT(vMask, C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress);
|
||||
vGather = VINSERT(vGather, val, C(i));
|
||||
}
|
||||
|
||||
STACKRESTORE(pStack);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
|
||||
{
|
||||
Value *vGather = VUNDEF_F_16();
|
||||
|
||||
// use AVX512F gather instruction if available
|
||||
if (JM()->mArch.AVX512F())
|
||||
{
|
||||
// force mask to <N-bit Integer>, required by vgather2
|
||||
Value *mask = BITCAST(vMask, mInt16Ty);
|
||||
|
||||
vGather = VGATHERPS_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value *src0 = EXTRACT_16(vSrc, 0);
|
||||
Value *src1 = EXTRACT_16(vSrc, 1);
|
||||
|
||||
Value *indices0 = EXTRACT_16(vIndices, 0);
|
||||
Value *indices1 = EXTRACT_16(vIndices, 1);
|
||||
|
||||
Value *mask0 = EXTRACT_16(vMask, 0);
|
||||
Value *mask1 = EXTRACT_16(vMask, 1);
|
||||
|
||||
Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale);
|
||||
Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale);
|
||||
|
||||
vGather = JOIN_16(gather0, gather1);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a masked gather operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with loads
|
||||
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
|
||||
/// @param pBase - Int8* base VB address pointer value
|
||||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
|
||||
{
|
||||
Value* vGather;
|
||||
|
||||
// use avx2 gather instruction if available
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
vGather = VGATHERDD(vSrc, pBase, vIndices, VMASK(vMask), C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value* pStack = STACKSAVE();
|
||||
|
||||
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||
STORE(vSrc, vSrcPtr);
|
||||
|
||||
vGather = VUNDEF_I();
|
||||
Value *vScaleVec = VIMMED1((uint32_t)scale);
|
||||
Value *vOffsets = MUL(vIndices, vScaleVec);
|
||||
for (uint32_t i = 0; i < mVWidth; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
Value *offset = VEXTRACT(vOffsets, C(i));
|
||||
// byte pointer to component
|
||||
Value *loadAddress = GEP(pBase, offset);
|
||||
loadAddress = BITCAST(loadAddress, PointerType::get(mInt32Ty, 0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
|
||||
Value *selMask = VEXTRACT(vMask, C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress, C(0));
|
||||
vGather = VINSERT(vGather, val, C(i));
|
||||
}
|
||||
|
||||
STACKRESTORE(pStack);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
|
||||
{
|
||||
Value *vGather = VUNDEF_I_16();
|
||||
|
||||
// use AVX512F gather instruction if available
|
||||
if (JM()->mArch.AVX512F())
|
||||
{
|
||||
// force mask to <N-bit Integer>, required by vgather2
|
||||
Value *mask = BITCAST(vMask, mInt16Ty);
|
||||
|
||||
vGather = VGATHERDD_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value *src0 = EXTRACT_16(vSrc, 0);
|
||||
Value *src1 = EXTRACT_16(vSrc, 1);
|
||||
|
||||
Value *indices0 = EXTRACT_16(vIndices, 0);
|
||||
Value *indices1 = EXTRACT_16(vIndices, 1);
|
||||
|
||||
Value *mask0 = EXTRACT_16(vMask, 0);
|
||||
Value *mask1 = EXTRACT_16(vMask, 1);
|
||||
|
||||
Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale);
|
||||
Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale);
|
||||
|
||||
vGather = JOIN_16(gather0, gather1);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a masked gather operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with loads
|
||||
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
|
||||
/// @param pBase - Int8* base VB address pointer value
|
||||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
|
||||
{
|
||||
Value* vGather;
|
||||
|
||||
// use avx2 gather instruction if available
|
||||
if (JM()->mArch.AVX2())
|
||||
{
|
||||
vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth / 2)), VectorType::get(mDoubleTy, mVWidth / 2));
|
||||
vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value* pStack = STACKSAVE();
|
||||
|
||||
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||
STORE(vSrc, vSrcPtr);
|
||||
|
||||
vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
|
||||
Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale));
|
||||
Value *vOffsets = MUL(vIndices, vScaleVec);
|
||||
for (uint32_t i = 0; i < mVWidth / 2; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
Value *offset = VEXTRACT(vOffsets, C(i));
|
||||
// byte pointer to component
|
||||
Value *loadAddress = GEP(pBase, offset);
|
||||
loadAddress = BITCAST(loadAddress, PointerType::get(mDoubleTy, 0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr, { C(0), C(i) });
|
||||
Value *selMask = VEXTRACT(vMask, C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress);
|
||||
vGather = VINSERT(vGather, val, C(i));
|
||||
}
|
||||
STACKRESTORE(pStack);
|
||||
}
|
||||
return vGather;
|
||||
}
|
||||
|
||||
void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput)
|
||||
{
|
||||
const SWR_FORMAT_INFO &info = GetFormatInfo(format);
|
||||
if (info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
|
||||
{
|
||||
GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
|
||||
{
|
||||
switch (info.bpp / info.numComps)
|
||||
{
|
||||
case 16:
|
||||
{
|
||||
Value* vGatherResult[2];
|
||||
|
||||
// TODO: vGatherMaskedVal
|
||||
Value* vGatherMaskedVal = VIMMED1((float)0);
|
||||
|
||||
// always have at least one component out of x or y to fetch
|
||||
|
||||
vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of first 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
|
||||
//
|
||||
|
||||
// if we have at least one component out of x or y to fetch
|
||||
if (info.numComps > 2)
|
||||
{
|
||||
// offset base to the next components(zw) in the vertex to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
|
||||
vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of second 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
|
||||
//
|
||||
}
|
||||
else
|
||||
{
|
||||
vGatherResult[1] = vGatherMaskedVal;
|
||||
}
|
||||
|
||||
// Shuffle gathered components into place, each row is a component
|
||||
Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
{
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherComponents[i] = VIMMED1(*(float*)&info.defaults[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info.numComps; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// Gather a SIMD of components
|
||||
vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
|
||||
|
||||
// offset base to the next component to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
SWR_INVALID("Invalid float format");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
|
||||
{
|
||||
switch (info.bpp / info.numComps)
|
||||
{
|
||||
case 8:
|
||||
{
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
|
||||
Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of an 8x32bit integer gather for 8bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
|
||||
|
||||
Shuffle8bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
{
|
||||
Value* vGatherResult[2];
|
||||
|
||||
// TODO: vGatherMaskedVal
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
|
||||
|
||||
// always have at least one component out of x or y to fetch
|
||||
|
||||
vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of first 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
|
||||
//
|
||||
|
||||
// if we have at least one component out of x or y to fetch
|
||||
if (info.numComps > 2)
|
||||
{
|
||||
// offset base to the next components(zw) in the vertex to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
|
||||
vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of second 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
|
||||
//
|
||||
}
|
||||
else
|
||||
{
|
||||
vGatherResult[1] = vGatherMaskedVal;
|
||||
}
|
||||
|
||||
// Shuffle gathered components into place, each row is a component
|
||||
Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
|
||||
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
{
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherComponents[i] = VIMMED1((int)info.defaults[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info.numComps; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// Gather a SIMD of components
|
||||
vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
|
||||
|
||||
// offset base to the next component to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
SWR_INVALID("unsupported format");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[2], Value* vGatherOutput[4], bool bPackedOutput)
|
||||
{
|
||||
// cast types
|
||||
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
|
||||
Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
|
||||
|
||||
// input could either be float or int vector; do shuffle work in int
|
||||
vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
|
||||
vGatherInput[1] = BITCAST(vGatherInput[1], mSimdInt32Ty);
|
||||
|
||||
if (bPackedOutput)
|
||||
{
|
||||
Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
|
||||
|
||||
// shuffle mask
|
||||
Value* vConstMask = C<char>({ 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 });
|
||||
Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[0], v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb: group components together in each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
|
||||
|
||||
Value* vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
|
||||
// after PERMD: move and pack xy components into each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
|
||||
|
||||
// do the same for zw components
|
||||
Value* vi128ZW = nullptr;
|
||||
if (info.numComps > 2)
|
||||
{
|
||||
Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
|
||||
vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({ 0, 1, 4, 5, 2, 3, 6, 7 })), v128bitTy);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
// todo: fixed for packed
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)(info.defaults[i]));
|
||||
if (i >= info.numComps)
|
||||
{
|
||||
// set the default component val
|
||||
vGatherOutput[swizzleIndex] = vGatherMaskedVal;
|
||||
continue;
|
||||
}
|
||||
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
// extract packed component 128 bit lanes
|
||||
vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// pshufb masks for each component
|
||||
Value* vConstMask[2];
|
||||
// x/z shuffle mask
|
||||
vConstMask[0] = C<char>({ 0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
|
||||
0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
|
||||
|
||||
// y/w shuffle mask
|
||||
vConstMask[1] = C<char>({ 2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
|
||||
2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1 });
|
||||
|
||||
|
||||
// shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info.numComps; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// select correct constMask for x/z or y/w pshufb
|
||||
uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
uint32_t selectedGather = (i < 2) ? 0 : 1;
|
||||
|
||||
vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
|
||||
// after pshufb mask for x channel; z uses the same shuffle from the second gather
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput)
|
||||
{
|
||||
// cast types
|
||||
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
|
||||
Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
|
||||
|
||||
if (bPackedOutput)
|
||||
{
|
||||
Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
|
||||
// shuffle mask
|
||||
Value* vConstMask = C<char>({ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
|
||||
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 });
|
||||
Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb: group components together in each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
|
||||
|
||||
Value* vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({ 0, 4, 0, 0, 1, 5, 0, 0 })), v128Ty);
|
||||
// after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
|
||||
|
||||
// do the same for zw components
|
||||
Value* vi128ZW = nullptr;
|
||||
if (info.numComps > 2)
|
||||
{
|
||||
vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({ 2, 6, 0, 0, 3, 7, 0, 0 })), v128Ty);
|
||||
}
|
||||
|
||||
// sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
|
||||
for (uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
// todo: fix for packed
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)(info.defaults[i]));
|
||||
if (i >= info.numComps)
|
||||
{
|
||||
// set the default component val
|
||||
vGatherOutput[swizzleIndex] = vGatherMaskedVal;
|
||||
continue;
|
||||
}
|
||||
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
// sign extend
|
||||
vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
|
||||
}
|
||||
}
|
||||
// else zero extend
|
||||
else {
|
||||
// shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < info.numComps; i++) {
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// pshufb masks for each component
|
||||
Value* vConstMask;
|
||||
switch (i)
|
||||
{
|
||||
case 0:
|
||||
// x shuffle mask
|
||||
vConstMask = C<char>({ 0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
|
||||
0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1 });
|
||||
break;
|
||||
case 1:
|
||||
// y shuffle mask
|
||||
vConstMask = C<char>({ 1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
|
||||
1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1 });
|
||||
break;
|
||||
case 2:
|
||||
// z shuffle mask
|
||||
vConstMask = C<char>({ 2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
|
||||
2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1 });
|
||||
break;
|
||||
case 3:
|
||||
// w shuffle mask
|
||||
vConstMask = C<char>({ 3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
|
||||
3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1 });
|
||||
break;
|
||||
default:
|
||||
vConstMask = nullptr;
|
||||
break;
|
||||
}
|
||||
|
||||
vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb for x channel
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// x000 x000 x000 x000 x000 x000 x000 x000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief emulates a scatter operation.
|
||||
/// @param pDst - pointer to destination
|
||||
/// @param vSrc - vector of src data to scatter
|
||||
/// @param vOffsets - vector of byte offsets from pDst
|
||||
/// @param vMask - mask of valid lanes
|
||||
void Builder::SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask)
|
||||
{
|
||||
/* Scatter algorithm
|
||||
|
||||
while(Index = BitScanForward(mask))
|
||||
srcElem = srcVector[Index]
|
||||
offsetElem = offsetVector[Index]
|
||||
*(pDst + offsetElem) = srcElem
|
||||
Update mask (&= ~(1<<Index)
|
||||
|
||||
*/
|
||||
|
||||
BasicBlock* pCurBB = IRB()->GetInsertBlock();
|
||||
Function* pFunc = pCurBB->getParent();
|
||||
Type* pSrcTy = vSrc->getType()->getVectorElementType();
|
||||
|
||||
// Store vectors on stack
|
||||
if (pScatterStackSrc == nullptr)
|
||||
{
|
||||
// Save off stack allocations and reuse per scatter. Significantly reduces stack
|
||||
// requirements for shaders with a lot of scatters.
|
||||
pScatterStackSrc = CreateEntryAlloca(pFunc, mSimdInt64Ty);
|
||||
pScatterStackOffsets = CreateEntryAlloca(pFunc, mSimdInt32Ty);
|
||||
}
|
||||
|
||||
Value* pSrcArrayPtr = BITCAST(pScatterStackSrc, PointerType::get(vSrc->getType(), 0));
|
||||
Value* pOffsetsArrayPtr = pScatterStackOffsets;
|
||||
STORE(vSrc, pSrcArrayPtr);
|
||||
STORE(vOffsets, pOffsetsArrayPtr);
|
||||
|
||||
// Cast to pointers for random access
|
||||
pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
|
||||
pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0));
|
||||
|
||||
Value* pMask = VMOVMSKPS(BITCAST(vMask, mSimdFP32Ty));
|
||||
|
||||
// Get cttz function
|
||||
Function* pfnCttz = Intrinsic::getDeclaration(mpJitMgr->mpCurrentModule, Intrinsic::cttz, { mInt32Ty });
|
||||
|
||||
// Setup loop basic block
|
||||
BasicBlock* pLoop = BasicBlock::Create(mpJitMgr->mContext, "Scatter_Loop", pFunc);
|
||||
|
||||
// compute first set bit
|
||||
Value* pIndex = CALL(pfnCttz, { pMask, C(false) });
|
||||
|
||||
Value* pIsUndef = ICMP_EQ(pIndex, C(32));
|
||||
|
||||
// Split current block
|
||||
BasicBlock* pPostLoop = pCurBB->splitBasicBlock(cast<Instruction>(pIsUndef)->getNextNode());
|
||||
|
||||
// Remove unconditional jump created by splitBasicBlock
|
||||
pCurBB->getTerminator()->eraseFromParent();
|
||||
|
||||
// Add terminator to end of original block
|
||||
IRB()->SetInsertPoint(pCurBB);
|
||||
|
||||
// Add conditional branch
|
||||
COND_BR(pIsUndef, pPostLoop, pLoop);
|
||||
|
||||
// Add loop basic block contents
|
||||
IRB()->SetInsertPoint(pLoop);
|
||||
PHINode* pIndexPhi = PHI(mInt32Ty, 2);
|
||||
PHINode* pMaskPhi = PHI(mInt32Ty, 2);
|
||||
|
||||
pIndexPhi->addIncoming(pIndex, pCurBB);
|
||||
pMaskPhi->addIncoming(pMask, pCurBB);
|
||||
|
||||
// Extract elements for this index
|
||||
Value* pSrcElem = LOADV(pSrcArrayPtr, { pIndexPhi });
|
||||
Value* pOffsetElem = LOADV(pOffsetsArrayPtr, { pIndexPhi });
|
||||
|
||||
// GEP to this offset in dst
|
||||
Value* pCurDst = GEP(pDst, pOffsetElem);
|
||||
pCurDst = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0));
|
||||
STORE(pSrcElem, pCurDst);
|
||||
|
||||
// Update the mask
|
||||
Value* pNewMask = AND(pMaskPhi, NOT(SHL(C(1), pIndexPhi)));
|
||||
|
||||
// Terminator
|
||||
Value* pNewIndex = CALL(pfnCttz, { pNewMask, C(false) });
|
||||
|
||||
pIsUndef = ICMP_EQ(pNewIndex, C(32));
|
||||
COND_BR(pIsUndef, pPostLoop, pLoop);
|
||||
|
||||
// Update phi edges
|
||||
pIndexPhi->addIncoming(pNewIndex, pLoop);
|
||||
pMaskPhi->addIncoming(pNewMask, pLoop);
|
||||
|
||||
// Move builder to beginning of post loop
|
||||
IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief save/restore stack, providing ability to push/pop the stack and
|
||||
/// reduce overall stack requirements for temporary stack use
|
||||
Value* Builder::STACKSAVE()
|
||||
{
|
||||
Function* pfnStackSave = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::stacksave);
|
||||
return CALLA(pfnStackSave);
|
||||
}
|
||||
|
||||
void Builder::STACKRESTORE(Value* pSaved)
|
||||
{
|
||||
Function* pfnStackRestore = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::stackrestore);
|
||||
CALL(pfnStackRestore, std::initializer_list<Value*>{pSaved});
|
||||
}
|
||||
|
||||
}
|
73
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
Normal file
73
src/gallium/drivers/swr/rasterizer/jitter/builder_mem.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/****************************************************************************
|
||||
* Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*
|
||||
* @file builder_misc.h
|
||||
*
|
||||
* @brief miscellaneous builder functions
|
||||
*
|
||||
* Notes:
|
||||
*
|
||||
******************************************************************************/
|
||||
#pragma once
|
||||
|
||||
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
|
||||
LoadInst *LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name = "");
|
||||
LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
|
||||
StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
|
||||
StoreInst *STOREV(Value *Val, Value *BasePtr, const std::initializer_list<Value*> &offset);
|
||||
|
||||
Value *MASKLOADD(Value* src, Value* mask);
|
||||
|
||||
void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
virtual Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1, Value *pDrawContext = nullptr);
|
||||
Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
|
||||
|
||||
void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
|
||||
Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
|
||||
|
||||
void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
|
||||
|
||||
void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
|
||||
|
||||
void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
|
||||
void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Value* vGatherOutput[], bool bPackedOutput);
|
||||
|
||||
Value* STACKSAVE();
|
||||
void STACKRESTORE(Value* pSaved);
|
||||
|
||||
// Static stack allocations for scatter operations
|
||||
Value* pScatterStackSrc{ nullptr };
|
||||
Value* pScatterStackOffsets{ nullptr };
|
||||
|
||||
|
@@ -303,70 +303,6 @@ namespace SwrJit
|
||||
return pValConst->getSExtValue();
|
||||
}
|
||||
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(i);
|
||||
return GEPA(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(C(i));
|
||||
return GEPA(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(i);
|
||||
return IN_BOUNDS_GEP(ptr, indices);
|
||||
}
|
||||
|
||||
Value *Builder::IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList)
|
||||
{
|
||||
std::vector<Value*> indices;
|
||||
for (auto i : indexList)
|
||||
indices.push_back(C(i));
|
||||
return IN_BOUNDS_GEP(ptr, indices);
|
||||
}
|
||||
|
||||
LoadInst *Builder::LOAD(Value *basePtr, const std::initializer_list<uint32_t> &indices, const llvm::Twine& name)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(C(i));
|
||||
return LOAD(GEPA(basePtr, valIndices), name);
|
||||
}
|
||||
|
||||
LoadInst *Builder::LOADV(Value *basePtr, const std::initializer_list<Value*> &indices, const llvm::Twine& name)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(i);
|
||||
return LOAD(GEPA(basePtr, valIndices), name);
|
||||
}
|
||||
|
||||
StoreInst *Builder::STORE(Value *val, Value *basePtr, const std::initializer_list<uint32_t> &indices)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(C(i));
|
||||
return STORE(val, GEPA(basePtr, valIndices));
|
||||
}
|
||||
|
||||
StoreInst *Builder::STOREV(Value *val, Value *basePtr, const std::initializer_list<Value*> &indices)
|
||||
{
|
||||
std::vector<Value*> valIndices;
|
||||
for (auto i : indices)
|
||||
valIndices.push_back(i);
|
||||
return STORE(val, GEPA(basePtr, valIndices));
|
||||
}
|
||||
|
||||
CallInst *Builder::CALL(Value *Callee, const std::initializer_list<Value*> &argsList, const llvm::Twine& name)
|
||||
{
|
||||
std::vector<Value*> args;
|
||||
@@ -418,34 +354,6 @@ namespace SwrJit
|
||||
return vOut;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate an i32 masked load operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with float masked load
|
||||
/// @param src - base address pointer for the load
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory load 0
|
||||
Value *Builder::MASKLOADD(Value* src,Value* mask)
|
||||
{
|
||||
Value* vResult;
|
||||
// use avx2 gather instruction is available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::x86_avx2_maskload_d_256);
|
||||
vResult = CALL(func,{src,mask});
|
||||
}
|
||||
else
|
||||
{
|
||||
// maskload intrinsic expects integer mask operand in llvm >= 3.8
|
||||
#if (LLVM_VERSION_MAJOR > 3) || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR >= 8)
|
||||
mask = BITCAST(mask,VectorType::get(mInt32Ty,mVWidth));
|
||||
#else
|
||||
mask = BITCAST(mask,VectorType::get(mFP32Ty,mVWidth));
|
||||
#endif
|
||||
Function *func = Intrinsic::getDeclaration(JM()->mpCurrentModule,Intrinsic::x86_avx_maskload_ps_256);
|
||||
vResult = BITCAST(CALL(func,{src,mask}), VectorType::get(mInt32Ty,mVWidth));
|
||||
}
|
||||
return vResult;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief insert a JIT call to CallPrint
|
||||
/// - outputs formatted string to both stdout and VS output window
|
||||
@@ -581,222 +489,6 @@ namespace SwrJit
|
||||
return PRINT(printStr, {});
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a masked gather operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with loads
|
||||
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
|
||||
/// @param pBase - Int8* base VB address pointer value
|
||||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *Builder::GATHERPS(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
|
||||
{
|
||||
Value *vGather;
|
||||
|
||||
// use avx2 gather instruction if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
// force mask to <N x float>, required by vgather
|
||||
Value *mask = BITCAST(VMASK(vMask), mSimdFP32Ty);
|
||||
|
||||
vGather = VGATHERPS(vSrc, pBase, vIndices, mask, C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value* pStack = STACKSAVE();
|
||||
|
||||
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||
STORE(vSrc, vSrcPtr);
|
||||
|
||||
vGather = VUNDEF_F();
|
||||
Value *vScaleVec = VIMMED1((uint32_t)scale);
|
||||
Value *vOffsets = MUL(vIndices,vScaleVec);
|
||||
for(uint32_t i = 0; i < mVWidth; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
Value *offset = VEXTRACT(vOffsets,C(i));
|
||||
// byte pointer to component
|
||||
Value *loadAddress = GEP(pBase,offset);
|
||||
loadAddress = BITCAST(loadAddress,PointerType::get(mFP32Ty,0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)});
|
||||
Value *selMask = VEXTRACT(vMask,C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress);
|
||||
vGather = VINSERT(vGather,val,C(i));
|
||||
}
|
||||
|
||||
STACKRESTORE(pStack);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
Value *Builder::GATHERPS_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
|
||||
{
|
||||
Value *vGather = VUNDEF_F_16();
|
||||
|
||||
// use AVX512F gather instruction if available
|
||||
if (JM()->mArch.AVX512F())
|
||||
{
|
||||
// force mask to <N-bit Integer>, required by vgather2
|
||||
Value *mask = BITCAST(vMask, mInt16Ty);
|
||||
|
||||
vGather = VGATHERPS_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value *src0 = EXTRACT_16(vSrc, 0);
|
||||
Value *src1 = EXTRACT_16(vSrc, 1);
|
||||
|
||||
Value *indices0 = EXTRACT_16(vIndices, 0);
|
||||
Value *indices1 = EXTRACT_16(vIndices, 1);
|
||||
|
||||
Value *mask0 = EXTRACT_16(vMask, 0);
|
||||
Value *mask1 = EXTRACT_16(vMask, 1);
|
||||
|
||||
Value *gather0 = GATHERPS(src0, pBase, indices0, mask0, scale);
|
||||
Value *gather1 = GATHERPS(src1, pBase, indices1, mask1, scale);
|
||||
|
||||
vGather = JOIN_16(gather0, gather1);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a masked gather operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with loads
|
||||
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
|
||||
/// @param pBase - Int8* base VB address pointer value
|
||||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *Builder::GATHERDD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
|
||||
{
|
||||
Value* vGather;
|
||||
|
||||
// use avx2 gather instruction if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
vGather = VGATHERDD(vSrc, pBase, vIndices, VMASK(vMask), C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value* pStack = STACKSAVE();
|
||||
|
||||
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||
STORE(vSrc, vSrcPtr);
|
||||
|
||||
vGather = VUNDEF_I();
|
||||
Value *vScaleVec = VIMMED1((uint32_t)scale);
|
||||
Value *vOffsets = MUL(vIndices, vScaleVec);
|
||||
for(uint32_t i = 0; i < mVWidth; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
Value *offset = VEXTRACT(vOffsets, C(i));
|
||||
// byte pointer to component
|
||||
Value *loadAddress = GEP(pBase, offset);
|
||||
loadAddress = BITCAST(loadAddress, PointerType::get(mInt32Ty, 0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr, {C(0), C(i)});
|
||||
Value *selMask = VEXTRACT(vMask, C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress, C(0));
|
||||
vGather = VINSERT(vGather, val, C(i));
|
||||
}
|
||||
|
||||
STACKRESTORE(pStack);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
Value *Builder::GATHERDD_16(Value *vSrc, Value *pBase, Value *vIndices, Value *vMask, uint8_t scale)
|
||||
{
|
||||
Value *vGather = VUNDEF_I_16();
|
||||
|
||||
// use AVX512F gather instruction if available
|
||||
if (JM()->mArch.AVX512F())
|
||||
{
|
||||
// force mask to <N-bit Integer>, required by vgather2
|
||||
Value *mask = BITCAST(vMask, mInt16Ty);
|
||||
|
||||
vGather = VGATHERDD_16(vSrc, pBase, vIndices, mask, C((uint32_t)scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value *src0 = EXTRACT_16(vSrc, 0);
|
||||
Value *src1 = EXTRACT_16(vSrc, 1);
|
||||
|
||||
Value *indices0 = EXTRACT_16(vIndices, 0);
|
||||
Value *indices1 = EXTRACT_16(vIndices, 1);
|
||||
|
||||
Value *mask0 = EXTRACT_16(vMask, 0);
|
||||
Value *mask1 = EXTRACT_16(vMask, 1);
|
||||
|
||||
Value *gather0 = GATHERDD(src0, pBase, indices0, mask0, scale);
|
||||
Value *gather1 = GATHERDD(src1, pBase, indices1, mask1, scale);
|
||||
|
||||
vGather = JOIN_16(gather0, gather1);
|
||||
}
|
||||
|
||||
return vGather;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief Generate a masked gather operation in LLVM IR. If not
|
||||
/// supported on the underlying platform, emulate it with loads
|
||||
/// @param vSrc - SIMD wide value that will be loaded if mask is invalid
|
||||
/// @param pBase - Int8* base VB address pointer value
|
||||
/// @param vIndices - SIMD wide value of VB byte offsets
|
||||
/// @param vMask - SIMD wide mask that controls whether to access memory or the src values
|
||||
/// @param scale - value to scale indices by
|
||||
Value *Builder::GATHERPD(Value* vSrc, Value* pBase, Value* vIndices, Value* vMask, uint8_t scale)
|
||||
{
|
||||
Value* vGather;
|
||||
|
||||
// use avx2 gather instruction if available
|
||||
if(JM()->mArch.AVX2())
|
||||
{
|
||||
vMask = BITCAST(S_EXT(vMask, VectorType::get(mInt64Ty, mVWidth/2)), VectorType::get(mDoubleTy, mVWidth/2));
|
||||
vGather = VGATHERPD(vSrc, pBase, vIndices, vMask, C(scale));
|
||||
}
|
||||
else
|
||||
{
|
||||
Value* pStack = STACKSAVE();
|
||||
|
||||
// store vSrc on the stack. this way we can select between a valid load address and the vSrc address
|
||||
Value* vSrcPtr = ALLOCA(vSrc->getType());
|
||||
STORE(vSrc, vSrcPtr);
|
||||
|
||||
vGather = UndefValue::get(VectorType::get(mDoubleTy, 4));
|
||||
Value *vScaleVec = VECTOR_SPLAT(4, C((uint32_t)scale));
|
||||
Value *vOffsets = MUL(vIndices,vScaleVec);
|
||||
for(uint32_t i = 0; i < mVWidth/2; ++i)
|
||||
{
|
||||
// single component byte index
|
||||
Value *offset = VEXTRACT(vOffsets,C(i));
|
||||
// byte pointer to component
|
||||
Value *loadAddress = GEP(pBase,offset);
|
||||
loadAddress = BITCAST(loadAddress,PointerType::get(mDoubleTy,0));
|
||||
// pointer to the value to load if we're masking off a component
|
||||
Value *maskLoadAddress = GEP(vSrcPtr,{C(0), C(i)});
|
||||
Value *selMask = VEXTRACT(vMask,C(i));
|
||||
// switch in a safe address to load if we're trying to access a vertex
|
||||
Value *validAddress = SELECT(selMask, loadAddress, maskLoadAddress);
|
||||
Value *val = LOAD(validAddress);
|
||||
vGather = VINSERT(vGather,val,C(i));
|
||||
}
|
||||
STACKRESTORE(pStack);
|
||||
}
|
||||
return vGather;
|
||||
}
|
||||
|
||||
Value *Builder::EXTRACT_16(Value *x, uint32_t imm)
|
||||
{
|
||||
if (imm == 0)
|
||||
@@ -1064,360 +756,6 @@ namespace SwrJit
|
||||
return SELECT(cmp, a, b);
|
||||
}
|
||||
|
||||
void Builder::Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput)
|
||||
{
|
||||
const SWR_FORMAT_INFO &info = GetFormatInfo(format);
|
||||
if(info.type[0] == SWR_TYPE_FLOAT && info.bpc[0] == 32)
|
||||
{
|
||||
GATHER4PS(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
else
|
||||
{
|
||||
GATHER4DD(info, pSrcBase, byteOffsets, mask, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
|
||||
{
|
||||
switch(info.bpp / info.numComps)
|
||||
{
|
||||
case 16:
|
||||
{
|
||||
Value* vGatherResult[2];
|
||||
|
||||
// TODO: vGatherMaskedVal
|
||||
Value* vGatherMaskedVal = VIMMED1((float)0);
|
||||
|
||||
// always have at least one component out of x or y to fetch
|
||||
|
||||
vGatherResult[0] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of first 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
|
||||
//
|
||||
|
||||
// if we have at least one component out of x or y to fetch
|
||||
if(info.numComps > 2)
|
||||
{
|
||||
// offset base to the next components(zw) in the vertex to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
|
||||
vGatherResult[1] = GATHERPS(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of second 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
|
||||
//
|
||||
}
|
||||
else
|
||||
{
|
||||
vGatherResult[1] = vGatherMaskedVal;
|
||||
}
|
||||
|
||||
// Shuffle gathered components into place, each row is a component
|
||||
Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
{
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherComponents[i] = VIMMED1(*(float*)&info.defaults[i]);
|
||||
}
|
||||
|
||||
for(uint32_t i = 0; i < info.numComps; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// Gather a SIMD of components
|
||||
vGatherComponents[swizzleIndex] = GATHERPS(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
|
||||
|
||||
// offset base to the next component to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
SWR_INVALID("Invalid float format");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* vMask, Value* vGatherComponents[], bool bPackedOutput)
|
||||
{
|
||||
switch (info.bpp / info.numComps)
|
||||
{
|
||||
case 8:
|
||||
{
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
|
||||
Value* vGatherResult = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of an 8x32bit integer gather for 8bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xyzw xyzw xyzw xyzw xyzw xyzw xyzw xyzw
|
||||
|
||||
Shuffle8bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
|
||||
}
|
||||
break;
|
||||
case 16:
|
||||
{
|
||||
Value* vGatherResult[2];
|
||||
|
||||
// TODO: vGatherMaskedVal
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)0);
|
||||
|
||||
// always have at least one component out of x or y to fetch
|
||||
|
||||
vGatherResult[0] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of first 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xyxy xyxy xyxy xyxy xyxy xyxy xyxy xyxy
|
||||
//
|
||||
|
||||
// if we have at least one component out of x or y to fetch
|
||||
if(info.numComps > 2)
|
||||
{
|
||||
// offset base to the next components(zw) in the vertex to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
|
||||
vGatherResult[1] = GATHERDD(vGatherMaskedVal, pSrcBase, byteOffsets, vMask);
|
||||
// e.g. result of second 8x32bit integer gather for 16bit components
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// zwzw zwzw zwzw zwzw zwzw zwzw zwzw zwzw
|
||||
//
|
||||
}
|
||||
else
|
||||
{
|
||||
vGatherResult[1] = vGatherMaskedVal;
|
||||
}
|
||||
|
||||
// Shuffle gathered components into place, each row is a component
|
||||
Shuffle16bpcGather4(info, vGatherResult, vGatherComponents, bPackedOutput);
|
||||
|
||||
}
|
||||
break;
|
||||
case 32:
|
||||
{
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherComponents[i] = VIMMED1((int)info.defaults[i]);
|
||||
}
|
||||
|
||||
for(uint32_t i = 0; i < info.numComps; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// Gather a SIMD of components
|
||||
vGatherComponents[swizzleIndex] = GATHERDD(vGatherComponents[swizzleIndex], pSrcBase, byteOffsets, vMask);
|
||||
|
||||
// offset base to the next component to gather
|
||||
pSrcBase = GEP(pSrcBase, C((char)4));
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
SWR_INVALID("unsupported format");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[2], Value* vGatherOutput[4], bool bPackedOutput)
|
||||
{
|
||||
// cast types
|
||||
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
|
||||
Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4); // vwidth is units of 32 bits
|
||||
|
||||
// input could either be float or int vector; do shuffle work in int
|
||||
vGatherInput[0] = BITCAST(vGatherInput[0], mSimdInt32Ty);
|
||||
vGatherInput[1] = BITCAST(vGatherInput[1], mSimdInt32Ty);
|
||||
|
||||
if(bPackedOutput)
|
||||
{
|
||||
Type* v128bitTy = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
|
||||
|
||||
// shuffle mask
|
||||
Value* vConstMask = C<char>({0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15,
|
||||
0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15});
|
||||
Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[0], v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb: group components together in each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx yyyy yyyy xxxx xxxx yyyy yyyy
|
||||
|
||||
Value* vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
|
||||
// after PERMD: move and pack xy components into each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx xxxx xxxx yyyy yyyy yyyy yyyy
|
||||
|
||||
// do the same for zw components
|
||||
Value* vi128ZW = nullptr;
|
||||
if(info.numComps > 2)
|
||||
{
|
||||
Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput[1], v32x8Ty), vConstMask), vGatherTy);
|
||||
vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({0, 1, 4, 5, 2, 3, 6, 7})), v128bitTy);
|
||||
}
|
||||
|
||||
for(uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
// todo: fixed for packed
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)(info.defaults[i]));
|
||||
if(i >= info.numComps)
|
||||
{
|
||||
// set the default component val
|
||||
vGatherOutput[swizzleIndex] = vGatherMaskedVal;
|
||||
continue;
|
||||
}
|
||||
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
// extract packed component 128 bit lanes
|
||||
vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
// pshufb masks for each component
|
||||
Value* vConstMask[2];
|
||||
// x/z shuffle mask
|
||||
vConstMask[0] = C<char>({0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1,
|
||||
0, 1, -1, -1, 4, 5, -1, -1, 8, 9, -1, -1, 12, 13, -1, -1, });
|
||||
|
||||
// y/w shuffle mask
|
||||
vConstMask[1] = C<char>({2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1,
|
||||
2, 3, -1, -1, 6, 7, -1, -1, 10, 11, -1, -1, 14, 15, -1, -1});
|
||||
|
||||
|
||||
// shuffle enabled components into lower word of each 32bit lane, 0 extending to 32 bits
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
|
||||
}
|
||||
|
||||
for(uint32_t i = 0; i < info.numComps; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// select correct constMask for x/z or y/w pshufb
|
||||
uint32_t selectedMask = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
uint32_t selectedGather = (i < 2) ? 0 : 1;
|
||||
|
||||
vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput[selectedGather], v32x8Ty), vConstMask[selectedMask]), vGatherTy);
|
||||
// after pshufb mask for x channel; z uses the same shuffle from the second gather
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xx00 xx00 xx00 xx00 xx00 xx00 xx00 xx00
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Builder::Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput)
|
||||
{
|
||||
// cast types
|
||||
Type* vGatherTy = VectorType::get(IntegerType::getInt32Ty(JM()->mContext), mVWidth);
|
||||
Type* v32x8Ty = VectorType::get(mInt8Ty, mVWidth * 4 ); // vwidth is units of 32 bits
|
||||
|
||||
if(bPackedOutput)
|
||||
{
|
||||
Type* v128Ty = VectorType::get(IntegerType::getIntNTy(JM()->mContext, 128), mVWidth / 4); // vwidth is units of 32 bits
|
||||
// shuffle mask
|
||||
Value* vConstMask = C<char>({0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15,
|
||||
0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15});
|
||||
Value* vShufResult = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb: group components together in each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx yyyy zzzz wwww xxxx yyyy zzzz wwww
|
||||
|
||||
Value* vi128XY = BITCAST(PERMD(vShufResult, C<int32_t>({0, 4, 0, 0, 1, 5, 0, 0})), v128Ty);
|
||||
// after PERMD: move and pack xy and zw components in low 64 bits of each 128bit lane
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// xxxx xxxx dcdc dcdc yyyy yyyy dcdc dcdc (dc - don't care)
|
||||
|
||||
// do the same for zw components
|
||||
Value* vi128ZW = nullptr;
|
||||
if(info.numComps > 2)
|
||||
{
|
||||
vi128ZW = BITCAST(PERMD(vShufResult, C<int32_t>({2, 6, 0, 0, 3, 7, 0, 0})), v128Ty);
|
||||
}
|
||||
|
||||
// sign extend all enabled components. If we have a fill vVertexElements, output to current simdvertex
|
||||
for(uint32_t i = 0; i < 4; i++)
|
||||
{
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
// todo: fix for packed
|
||||
Value* vGatherMaskedVal = VIMMED1((int32_t)(info.defaults[i]));
|
||||
if(i >= info.numComps)
|
||||
{
|
||||
// set the default component val
|
||||
vGatherOutput[swizzleIndex] = vGatherMaskedVal;
|
||||
continue;
|
||||
}
|
||||
|
||||
// if x or z, extract 128bits from lane 0, else for y or w, extract from lane 1
|
||||
uint32_t lane = ((i == 0) || (i == 2)) ? 0 : 1;
|
||||
// if x or y, use vi128XY permute result, else use vi128ZW
|
||||
Value* selectedPermute = (i < 2) ? vi128XY : vi128ZW;
|
||||
|
||||
// sign extend
|
||||
vGatherOutput[swizzleIndex] = VEXTRACT(selectedPermute, C(lane));
|
||||
}
|
||||
}
|
||||
// else zero extend
|
||||
else{
|
||||
// shuffle enabled components into lower byte of each 32bit lane, 0 extending to 32 bits
|
||||
// apply defaults
|
||||
for (uint32_t i = 0; i < 4; ++i)
|
||||
{
|
||||
vGatherOutput[i] = VIMMED1((int32_t)info.defaults[i]);
|
||||
}
|
||||
|
||||
for(uint32_t i = 0; i < info.numComps; i++){
|
||||
uint32_t swizzleIndex = info.swizzle[i];
|
||||
|
||||
// pshufb masks for each component
|
||||
Value* vConstMask;
|
||||
switch(i)
|
||||
{
|
||||
case 0:
|
||||
// x shuffle mask
|
||||
vConstMask = C<char>({0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1,
|
||||
0, -1, -1, -1, 4, -1, -1, -1, 8, -1, -1, -1, 12, -1, -1, -1});
|
||||
break;
|
||||
case 1:
|
||||
// y shuffle mask
|
||||
vConstMask = C<char>({1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1,
|
||||
1, -1, -1, -1, 5, -1, -1, -1, 9, -1, -1, -1, 13, -1, -1, -1});
|
||||
break;
|
||||
case 2:
|
||||
// z shuffle mask
|
||||
vConstMask = C<char>({2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1,
|
||||
2, -1, -1, -1, 6, -1, -1, -1, 10, -1, -1, -1, 14, -1, -1, -1});
|
||||
break;
|
||||
case 3:
|
||||
// w shuffle mask
|
||||
vConstMask = C<char>({3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1,
|
||||
3, -1, -1, -1, 7, -1, -1, -1, 11, -1, -1, -1, 15, -1, -1, -1});
|
||||
break;
|
||||
default:
|
||||
vConstMask = nullptr;
|
||||
break;
|
||||
}
|
||||
|
||||
vGatherOutput[swizzleIndex] = BITCAST(PSHUFB(BITCAST(vGatherInput, v32x8Ty), vConstMask), vGatherTy);
|
||||
// after pshufb for x channel
|
||||
// 256i - 0 1 2 3 4 5 6 7
|
||||
// x000 x000 x000 x000 x000 x000 x000 x000
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Helper function to create alloca in entry block of function
|
||||
Value* Builder::CreateEntryAlloca(Function* pFunc, Type* pType)
|
||||
{
|
||||
@@ -1439,105 +777,6 @@ namespace SwrJit
|
||||
return pAlloca;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief emulates a scatter operation.
|
||||
/// @param pDst - pointer to destination
|
||||
/// @param vSrc - vector of src data to scatter
|
||||
/// @param vOffsets - vector of byte offsets from pDst
|
||||
/// @param vMask - mask of valid lanes
|
||||
void Builder::SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask)
|
||||
{
|
||||
/* Scatter algorithm
|
||||
|
||||
while(Index = BitScanForward(mask))
|
||||
srcElem = srcVector[Index]
|
||||
offsetElem = offsetVector[Index]
|
||||
*(pDst + offsetElem) = srcElem
|
||||
Update mask (&= ~(1<<Index)
|
||||
|
||||
*/
|
||||
|
||||
BasicBlock* pCurBB = IRB()->GetInsertBlock();
|
||||
Function* pFunc = pCurBB->getParent();
|
||||
Type* pSrcTy = vSrc->getType()->getVectorElementType();
|
||||
|
||||
// Store vectors on stack
|
||||
if (pScatterStackSrc == nullptr)
|
||||
{
|
||||
// Save off stack allocations and reuse per scatter. Significantly reduces stack
|
||||
// requirements for shaders with a lot of scatters.
|
||||
pScatterStackSrc = CreateEntryAlloca(pFunc, mSimdInt64Ty);
|
||||
pScatterStackOffsets = CreateEntryAlloca(pFunc, mSimdInt32Ty);
|
||||
}
|
||||
|
||||
Value* pSrcArrayPtr = BITCAST(pScatterStackSrc, PointerType::get(vSrc->getType(), 0));
|
||||
Value* pOffsetsArrayPtr = pScatterStackOffsets;
|
||||
STORE(vSrc, pSrcArrayPtr);
|
||||
STORE(vOffsets, pOffsetsArrayPtr);
|
||||
|
||||
// Cast to pointers for random access
|
||||
pSrcArrayPtr = POINTER_CAST(pSrcArrayPtr, PointerType::get(pSrcTy, 0));
|
||||
pOffsetsArrayPtr = POINTER_CAST(pOffsetsArrayPtr, PointerType::get(mInt32Ty, 0));
|
||||
|
||||
Value* pMask = VMOVMSKPS(BITCAST(vMask, mSimdFP32Ty));
|
||||
|
||||
// Get cttz function
|
||||
Function* pfnCttz = Intrinsic::getDeclaration(mpJitMgr->mpCurrentModule, Intrinsic::cttz, { mInt32Ty });
|
||||
|
||||
// Setup loop basic block
|
||||
BasicBlock* pLoop = BasicBlock::Create(mpJitMgr->mContext, "Scatter_Loop", pFunc);
|
||||
|
||||
// compute first set bit
|
||||
Value* pIndex = CALL(pfnCttz, { pMask, C(false) });
|
||||
|
||||
Value* pIsUndef = ICMP_EQ(pIndex, C(32));
|
||||
|
||||
// Split current block
|
||||
BasicBlock* pPostLoop = pCurBB->splitBasicBlock(cast<Instruction>(pIsUndef)->getNextNode());
|
||||
|
||||
// Remove unconditional jump created by splitBasicBlock
|
||||
pCurBB->getTerminator()->eraseFromParent();
|
||||
|
||||
// Add terminator to end of original block
|
||||
IRB()->SetInsertPoint(pCurBB);
|
||||
|
||||
// Add conditional branch
|
||||
COND_BR(pIsUndef, pPostLoop, pLoop);
|
||||
|
||||
// Add loop basic block contents
|
||||
IRB()->SetInsertPoint(pLoop);
|
||||
PHINode* pIndexPhi = PHI(mInt32Ty, 2);
|
||||
PHINode* pMaskPhi = PHI(mInt32Ty, 2);
|
||||
|
||||
pIndexPhi->addIncoming(pIndex, pCurBB);
|
||||
pMaskPhi->addIncoming(pMask, pCurBB);
|
||||
|
||||
// Extract elements for this index
|
||||
Value* pSrcElem = LOADV(pSrcArrayPtr, { pIndexPhi });
|
||||
Value* pOffsetElem = LOADV(pOffsetsArrayPtr, { pIndexPhi });
|
||||
|
||||
// GEP to this offset in dst
|
||||
Value* pCurDst = GEP(pDst, pOffsetElem);
|
||||
pCurDst = POINTER_CAST(pCurDst, PointerType::get(pSrcTy, 0));
|
||||
STORE(pSrcElem, pCurDst);
|
||||
|
||||
// Update the mask
|
||||
Value* pNewMask = AND(pMaskPhi, NOT(SHL(C(1), pIndexPhi)));
|
||||
|
||||
// Terminator
|
||||
Value* pNewIndex = CALL(pfnCttz, { pNewMask, C(false) });
|
||||
|
||||
pIsUndef = ICMP_EQ(pNewIndex, C(32));
|
||||
COND_BR(pIsUndef, pPostLoop, pLoop);
|
||||
|
||||
// Update phi edges
|
||||
pIndexPhi->addIncoming(pNewIndex, pLoop);
|
||||
pMaskPhi->addIncoming(pNewMask, pLoop);
|
||||
|
||||
// Move builder to beginning of post loop
|
||||
IRB()->SetInsertPoint(pPostLoop, pPostLoop->begin());
|
||||
}
|
||||
|
||||
Value* Builder::VABSPS(Value* a)
|
||||
{
|
||||
Value* asInt = BITCAST(a, mSimdInt32Ty);
|
||||
@@ -1575,21 +814,6 @@ namespace SwrJit
|
||||
return result;
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// @brief save/restore stack, providing ability to push/pop the stack and
|
||||
/// reduce overall stack requirements for temporary stack use
|
||||
Value* Builder::STACKSAVE()
|
||||
{
|
||||
Function* pfnStackSave = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::stacksave);
|
||||
return CALLA(pfnStackSave);
|
||||
}
|
||||
|
||||
void Builder::STACKRESTORE(Value* pSaved)
|
||||
{
|
||||
Function* pfnStackRestore = Intrinsic::getDeclaration(JM()->mpCurrentModule, Intrinsic::stackrestore);
|
||||
CALL(pfnStackRestore, std::initializer_list<Value*>{pSaved});
|
||||
}
|
||||
|
||||
Value *Builder::FMADDPS(Value* a, Value* b, Value* c)
|
||||
{
|
||||
Value* vOut;
|
||||
@@ -1707,7 +931,6 @@ namespace SwrJit
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
uint32_t Builder::GetTypeSize(Type* pType)
|
||||
{
|
||||
if (pType->isStructTy())
|
||||
|
@@ -90,22 +90,12 @@ Value *VPLANEPS(Value* vA, Value* vB, Value* vC, Value* &vX, Value* &vY);
|
||||
uint32_t IMMED(Value* i);
|
||||
int32_t S_IMMED(Value* i);
|
||||
|
||||
Value *GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<Value*> &indexList);
|
||||
Value *IN_BOUNDS_GEP(Value* ptr, const std::initializer_list<uint32_t> &indexList);
|
||||
|
||||
CallInst *CALL(Value *Callee, const std::initializer_list<Value*> &args, const llvm::Twine& name = "");
|
||||
CallInst *CALL(Value *Callee) { return CALLA(Callee); }
|
||||
CallInst *CALL(Value *Callee, Value* arg);
|
||||
CallInst *CALL2(Value *Callee, Value* arg1, Value* arg2);
|
||||
CallInst *CALL3(Value *Callee, Value* arg1, Value* arg2, Value* arg3);
|
||||
|
||||
LoadInst *LOAD(Value *BasePtr, const std::initializer_list<uint32_t> &offset, const llvm::Twine& name = "");
|
||||
LoadInst *LOADV(Value *BasePtr, const std::initializer_list<Value*> &offset, const llvm::Twine& name = "");
|
||||
StoreInst *STORE(Value *Val, Value *BasePtr, const std::initializer_list<uint32_t> &offset);
|
||||
StoreInst *STOREV(Value *Val, Value *BasePtr, const std::initializer_list<Value*> &offset);
|
||||
|
||||
Value *VCMPPS_EQ(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_EQ_OQ)); }
|
||||
Value *VCMPPS_LT(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_LT_OQ)); }
|
||||
Value *VCMPPS_LE(Value* a, Value* b) { return VCMPPS(a, b, C((uint8_t)_CMP_LE_OQ)); }
|
||||
@@ -129,30 +119,6 @@ Value *VMASK_16(Value *mask);
|
||||
Value *EXTRACT_16(Value *x, uint32_t imm);
|
||||
Value *JOIN_16(Value *a, Value *b);
|
||||
|
||||
Value *MASKLOADD(Value* src, Value* mask);
|
||||
|
||||
void Gather4(const SWR_FORMAT format, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
Value *GATHERPS(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
|
||||
Value *GATHERPS_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
|
||||
|
||||
void GATHER4PS(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
Value *GATHERDD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
|
||||
Value *GATHERDD_16(Value *src, Value *pBase, Value *indices, Value *mask, uint8_t scale = 1);
|
||||
|
||||
void GATHER4DD(const SWR_FORMAT_INFO &info, Value* pSrcBase, Value* byteOffsets,
|
||||
Value* mask, Value* vGatherComponents[], bool bPackedOutput);
|
||||
|
||||
Value *GATHERPD(Value* src, Value* pBase, Value* indices, Value* mask, uint8_t scale = 1);
|
||||
|
||||
void SCATTERPS(Value* pDst, Value* vSrc, Value* vOffsets, Value* vMask);
|
||||
|
||||
void Shuffle8bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput, Value* vGatherOutput[], bool bPackedOutput);
|
||||
void Shuffle16bpcGather4(const SWR_FORMAT_INFO &info, Value* vGatherInput[], Value* vGatherOutput[], bool bPackedOutput);
|
||||
|
||||
Value *PSHUFB(Value* a, Value* b);
|
||||
Value *PMOVSXBD(Value* a);
|
||||
Value *PMOVSXWD(Value* a);
|
||||
@@ -180,8 +146,6 @@ Value *FCLAMP(Value* src, float low, float high);
|
||||
|
||||
CallInst *PRINT(const std::string &printStr);
|
||||
CallInst *PRINT(const std::string &printStr,const std::initializer_list<Value*> &printArgs);
|
||||
Value* STACKSAVE();
|
||||
void STACKRESTORE(Value* pSaved);
|
||||
|
||||
Value* POPCNT(Value* a);
|
||||
|
||||
@@ -199,9 +163,4 @@ void RDTSC_STOP(Value* pBucketMgr, Value* pId);
|
||||
Value* CreateEntryAlloca(Function* pFunc, Type* pType);
|
||||
Value* CreateEntryAlloca(Function* pFunc, Type* pType, Value* pArraySize);
|
||||
|
||||
// Static stack allocations for scatter operations
|
||||
Value* pScatterStackSrc{ nullptr };
|
||||
Value* pScatterStackOffsets{ nullptr };
|
||||
|
||||
|
||||
uint32_t GetTypeSize(Type* pType);
|
||||
|
@@ -55,9 +55,12 @@ enum ConversionType
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
/// Interface to Jitting a fetch shader
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
struct FetchJit : public Builder
|
||||
struct FetchJit :
|
||||
public Builder
|
||||
{
|
||||
FetchJit(JitManager* pJitMgr) : Builder(pJitMgr){};
|
||||
FetchJit(JitManager* pJitMgr) :
|
||||
Builder(pJitMgr)
|
||||
{}
|
||||
|
||||
Function* Create(const FETCH_COMPILE_STATE& fetchState);
|
||||
|
||||
@@ -1361,7 +1364,7 @@ void FetchJit::JitGatherVertices(const FETCH_COMPILE_STATE &fetchState,
|
||||
// But, we know that elements must be aligned for FETCH. :)
|
||||
// Right shift the offset by a bit and then scale by 2 to remove the sign extension.
|
||||
Value *vShiftedOffsets = LSHR(vOffsets, 1);
|
||||
vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2);
|
||||
vVertexElements[currentVertexElement++] = GATHERPS(gatherSrc, pStreamBase, vShiftedOffsets, vGatherMask, 2, mpPrivateContext);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@@ -1,50 +0,0 @@
|
||||
# Copyright © 2017-2018 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
# in the Software without restriction, including without limitation the rights
|
||||
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
# copies of the Software, and to permit persons to whom the Software is
|
||||
# furnished to do so, subject to the following conditions:
|
||||
|
||||
# The above copyright notice and this permission notice shall be included in
|
||||
# all copies or substantial portions of the Software.
|
||||
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
|
||||
gen_builder_hpp = custom_target(
|
||||
'gen_builder.hpp',
|
||||
input : [
|
||||
swr_gen_llvm_ir_macros_py,
|
||||
join_paths(
|
||||
dep_llvm.get_configtool_variable('includedir'), 'llvm', 'IR',
|
||||
'IRBuilder.h'
|
||||
)
|
||||
],
|
||||
output : 'gen_builder.hpp',
|
||||
command : [
|
||||
prog_python2, '@INPUT0@', '--input', '@INPUT1@', '--output', '@OUTPUT@',
|
||||
'--gen_h', '--output-dir', '@OUTDIR@'
|
||||
],
|
||||
depend_files : swr_gen_builder_depends,
|
||||
build_by_default : true,
|
||||
)
|
||||
|
||||
gen_builder_x86_hpp = custom_target(
|
||||
'gen_builder_x86.hpp',
|
||||
input : '../codegen/gen_llvm_ir_macros.py',
|
||||
output : 'gen_builder_x86.hpp',
|
||||
command : [
|
||||
prog_python2, '@INPUT0@', '--gen_x86_h', '--output', '@OUTPUT@',
|
||||
'--output-dir', '@OUTDIR@'
|
||||
],
|
||||
depend_files : swr_gen_builder_depends,
|
||||
)
|
||||
|
@@ -76,6 +76,7 @@ virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
|
||||
for (unsigned i = 0; i < inst->Instruction.NumSrcRegs; i++) {
|
||||
if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT &&
|
||||
inst->Src[i].Register.Dimension &&
|
||||
!inst->Src[i].Register.Indirect &&
|
||||
inst->Src[i].Dimension.Index == 0)
|
||||
inst->Src[i].Register.Dimension = 0;
|
||||
}
|
||||
|
@@ -670,10 +670,11 @@ static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib,
|
||||
ws->info.gart_page_size,
|
||||
RADEON_DOMAIN_GTT,
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_READ_ONLY |
|
||||
(ring_type == RING_GFX ||
|
||||
ring_type == RING_COMPUTE ||
|
||||
ring_type == RING_DMA ?
|
||||
RADEON_FLAG_READ_ONLY | RADEON_FLAG_GTT_WC : 0));
|
||||
RADEON_FLAG_GTT_WC : 0));
|
||||
if (!pb)
|
||||
return false;
|
||||
|
||||
|
@@ -215,9 +215,6 @@ static void surf_drm_to_winsys(struct radeon_drm_winsys *ws,
|
||||
}
|
||||
|
||||
set_micro_tile_mode(surf_ws, &ws->info);
|
||||
surf_ws->is_displayable = surf_ws->is_linear ||
|
||||
surf_ws->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
|
||||
surf_ws->micro_tile_mode == RADEON_MICRO_MODE_ROTATED;
|
||||
}
|
||||
|
||||
static int radeon_winsys_surface_init(struct radeon_winsys *rws,
|
||||
|
@@ -150,7 +150,7 @@ vmw_mob_pools_init(struct vmw_winsys_screen *vws)
|
||||
vws->pools.mob_shader_slab_fenced =
|
||||
simple_fenced_bufmgr_create(vws->pools.mob_shader_slab,
|
||||
vws->fence_ops);
|
||||
if(!vws->pools.mob_fenced)
|
||||
if(!vws->pools.mob_shader_slab_fenced)
|
||||
goto out_no_mob_shader_slab_fenced;
|
||||
|
||||
return TRUE;
|
||||
|
@@ -38,7 +38,7 @@ incs_gbm = [
|
||||
if with_dri2
|
||||
files_gbm += files('backends/dri/gbm_dri.c', 'backends/dri/gbm_driint.h')
|
||||
deps_gbm += dep_libdrm # TODO: pthread-stubs
|
||||
args_gbm += '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path)
|
||||
args_gbm += '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_driver_dir)
|
||||
endif
|
||||
if with_platform_wayland
|
||||
deps_gbm += dep_wayland_server
|
||||
|
@@ -41,6 +41,7 @@
|
||||
#include "main/glheader.h"
|
||||
#include "glapi.h"
|
||||
#include "glapitable.h"
|
||||
#include "main/dispatch.h"
|
||||
|
||||
#include "apple_glx.h"
|
||||
#include "apple_xgl_api.h"
|
||||
@@ -60,11 +61,12 @@ static void _apple_glapi_create_table(void) {
|
||||
assert(__applegl_api);
|
||||
memcpy(__applegl_api, __ogl_framework_api, sizeof(struct _glapi_table));
|
||||
|
||||
_glapi_table_patch(__applegl_api, "ReadPixels", __applegl_glReadPixels);
|
||||
_glapi_table_patch(__applegl_api, "CopyPixels", __applegl_glCopyPixels);
|
||||
_glapi_table_patch(__applegl_api, "CopyColorTable", __applegl_glCopyColorTable);
|
||||
_glapi_table_patch(__applegl_api, "DrawBuffers", __applegl_glDrawBuffer);
|
||||
_glapi_table_patch(__applegl_api, "Viewport", __applegl_glViewport);
|
||||
SET_ReadPixels(__applegl_api, __applegl_glReadPixels);
|
||||
SET_CopyPixels(__applegl_api, __applegl_glCopyPixels);
|
||||
SET_CopyColorTable(__applegl_api, __applegl_glCopyColorTable);
|
||||
SET_DrawBuffer(__applegl_api, __applegl_glDrawBuffer);
|
||||
SET_DrawBuffers(__applegl_api, __applegl_glDrawBuffers);
|
||||
SET_Viewport(__applegl_api, __applegl_glViewport);
|
||||
}
|
||||
|
||||
void apple_glapi_set_dispatch(void) {
|
||||
|
@@ -32,7 +32,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <GL/gl.h>
|
||||
#include <util/debug.h>
|
||||
|
||||
/* <rdar://problem/6953344> */
|
||||
#define glTexImage1D glTexImage1D_OSX
|
||||
|
@@ -43,7 +43,6 @@
|
||||
#ifdef GLX_USE_APPLEGL
|
||||
#include "apple/apple_glx_context.h"
|
||||
#include "apple/apple_glx.h"
|
||||
#include "util/debug.h"
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#ifdef XF86VIDMODE
|
||||
|
@@ -18,9 +18,7 @@
|
||||
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
# SOFTWARE.
|
||||
|
||||
if with_dri_platform == 'windows'
|
||||
subdir('windows')
|
||||
endif
|
||||
subdir('windows')
|
||||
|
||||
files_libglx = files(
|
||||
'clientattrib.c',
|
||||
@@ -113,6 +111,7 @@ elif with_dri_platform == 'windows'
|
||||
extra_ld_args_libgl = '-Wl,--disable-stdcall-fixup'
|
||||
endif
|
||||
|
||||
dri_driver_dir = join_paths(get_option('prefix'), dri_drivers_path)
|
||||
if not with_glvnd
|
||||
gl_lib_name = 'GL'
|
||||
gl_lib_version = '1.2.0'
|
||||
@@ -129,8 +128,7 @@ else
|
||||
endif
|
||||
|
||||
gl_lib_cargs = [
|
||||
'-D_REENTRANT',
|
||||
'-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_search_path),
|
||||
'-D_REENTRANT', '-DDEFAULT_DRIVER_DIR="@0@"'.format(dri_driver_dir),
|
||||
]
|
||||
|
||||
if dep_xxf86vm != [] and dep_xxf86vm.found()
|
||||
|
@@ -75,18 +75,6 @@ indirect_create_context_attribs(struct glx_screen *base,
|
||||
return indirect_create_context(base, config_base, shareList, 0);
|
||||
}
|
||||
|
||||
#ifdef GLX_USE_APPLEGL
|
||||
#warning Indirect GLX tests are not built
|
||||
extern "C" struct glx_context *
|
||||
applegl_create_context(struct glx_screen *base,
|
||||
struct glx_config *config_base,
|
||||
struct glx_context *shareList,
|
||||
int renderType)
|
||||
{
|
||||
return indirect_create_context(base, config_base, shareList, renderType);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* This is necessary so that we don't have to link with glxcurrent.c
|
||||
* which would require us to link with X libraries and what not.
|
||||
*/
|
||||
|
@@ -705,8 +705,6 @@ void __indirect_glFramebufferTextureLayer(void) { }
|
||||
}
|
||||
/*@}*/
|
||||
|
||||
#ifndef GLX_USE_APPLEGL
|
||||
|
||||
class IndirectAPI : public ::testing::Test {
|
||||
public:
|
||||
virtual void SetUp();
|
||||
@@ -1520,5 +1518,3 @@ TEST_F(IndirectAPI, EXT_texture_array)
|
||||
{
|
||||
EXPECT_EQ((_glapi_proc) __indirect_glFramebufferTextureLayer, table[_glapi_get_proc_offset("glFramebufferTextureLayerEXT")]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@@ -390,7 +390,7 @@ get_gen_batch_bo(void *user_data, uint64_t address)
|
||||
return (struct gen_batch_decode_bo) {
|
||||
.addr = sections[s].gtt_offset,
|
||||
.map = sections[s].data,
|
||||
.size = sections[s].count,
|
||||
.size = sections[s].count * 4,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
@@ -547,6 +547,14 @@ anv_cmd_buffer_bind_descriptor_set(struct anv_cmd_buffer *cmd_buffer,
|
||||
cmd_buffer->state.descriptors_dirty |=
|
||||
set_layout->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS;
|
||||
}
|
||||
|
||||
/* Pipeline layout objects are required to live at least while any command
|
||||
* buffers that use them are in recording state. We need to grab a reference
|
||||
* to the pipeline layout being bound here so we can compute correct dynamic
|
||||
* offsets for VK_DESCRIPTOR_TYPE_*_DYNAMIC in dynamic_offset_for_binding()
|
||||
* when we record draw commands that come after this.
|
||||
*/
|
||||
pipe_state->layout = layout;
|
||||
}
|
||||
|
||||
void anv_CmdBindDescriptorSets(
|
||||
@@ -913,8 +921,7 @@ void anv_CmdPushDescriptorSetKHR(
|
||||
|
||||
assert(_set < MAX_SETS);
|
||||
|
||||
const struct anv_descriptor_set_layout *set_layout =
|
||||
layout->set[_set].layout;
|
||||
struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout;
|
||||
|
||||
struct anv_push_descriptor_set *push_set =
|
||||
anv_cmd_buffer_get_push_descriptor_set(cmd_buffer,
|
||||
@@ -1006,8 +1013,7 @@ void anv_CmdPushDescriptorSetWithTemplateKHR(
|
||||
|
||||
assert(_set < MAX_PUSH_DESCRIPTORS);
|
||||
|
||||
const struct anv_descriptor_set_layout *set_layout =
|
||||
layout->set[_set].layout;
|
||||
struct anv_descriptor_set_layout *set_layout = layout->set[_set].layout;
|
||||
|
||||
struct anv_push_descriptor_set *push_set =
|
||||
anv_cmd_buffer_get_push_descriptor_set(cmd_buffer,
|
||||
|
@@ -57,16 +57,21 @@ VkResult anv_CreateDescriptorSetLayout(
|
||||
struct anv_descriptor_set_binding_layout *bindings;
|
||||
struct anv_sampler **samplers;
|
||||
|
||||
/* We need to allocate decriptor set layouts off the device allocator
|
||||
* with DEVICE scope because they are reference counted and may not be
|
||||
* destroyed when vkDestroyDescriptorSetLayout is called.
|
||||
*/
|
||||
ANV_MULTIALLOC(ma);
|
||||
anv_multialloc_add(&ma, &set_layout, 1);
|
||||
anv_multialloc_add(&ma, &bindings, max_binding + 1);
|
||||
anv_multialloc_add(&ma, &samplers, immutable_sampler_count);
|
||||
|
||||
if (!anv_multialloc_alloc2(&ma, &device->alloc, pAllocator,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
|
||||
if (!anv_multialloc_alloc(&ma, &device->alloc,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE))
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
memset(set_layout, 0, sizeof(*set_layout));
|
||||
set_layout->ref_cnt = 1;
|
||||
set_layout->binding_count = max_binding + 1;
|
||||
|
||||
for (uint32_t b = 0; b <= max_binding; b++) {
|
||||
@@ -204,7 +209,7 @@ void anv_DestroyDescriptorSetLayout(
|
||||
if (!set_layout)
|
||||
return;
|
||||
|
||||
vk_free2(&device->alloc, pAllocator, set_layout);
|
||||
anv_descriptor_set_layout_unref(device, set_layout);
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -246,6 +251,7 @@ VkResult anv_CreatePipelineLayout(
|
||||
ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout,
|
||||
pCreateInfo->pSetLayouts[set]);
|
||||
layout->set[set].layout = set_layout;
|
||||
anv_descriptor_set_layout_ref(set_layout);
|
||||
|
||||
layout->set[set].dynamic_offset_start = dynamic_offset_count;
|
||||
for (uint32_t b = 0; b < set_layout->binding_count; b++) {
|
||||
@@ -290,6 +296,9 @@ void anv_DestroyPipelineLayout(
|
||||
if (!pipeline_layout)
|
||||
return;
|
||||
|
||||
for (uint32_t i = 0; i < pipeline_layout->num_sets; i++)
|
||||
anv_descriptor_set_layout_unref(device, pipeline_layout->set[i].layout);
|
||||
|
||||
vk_free2(&device->alloc, pAllocator, pipeline_layout);
|
||||
}
|
||||
|
||||
@@ -423,7 +432,7 @@ struct surface_state_free_list_entry {
|
||||
VkResult
|
||||
anv_descriptor_set_create(struct anv_device *device,
|
||||
struct anv_descriptor_pool *pool,
|
||||
const struct anv_descriptor_set_layout *layout,
|
||||
struct anv_descriptor_set_layout *layout,
|
||||
struct anv_descriptor_set **out_set)
|
||||
{
|
||||
struct anv_descriptor_set *set;
|
||||
@@ -455,8 +464,10 @@ anv_descriptor_set_create(struct anv_device *device,
|
||||
}
|
||||
}
|
||||
|
||||
set->size = size;
|
||||
set->layout = layout;
|
||||
anv_descriptor_set_layout_ref(layout);
|
||||
|
||||
set->size = size;
|
||||
set->buffer_views =
|
||||
(struct anv_buffer_view *) &set->descriptors[layout->size];
|
||||
set->buffer_count = layout->buffer_count;
|
||||
@@ -512,6 +523,8 @@ anv_descriptor_set_destroy(struct anv_device *device,
|
||||
struct anv_descriptor_pool *pool,
|
||||
struct anv_descriptor_set *set)
|
||||
{
|
||||
anv_descriptor_set_layout_unref(device, set->layout);
|
||||
|
||||
/* Put the buffer view surface state back on the free list. */
|
||||
for (uint32_t b = 0; b < set->buffer_count; b++) {
|
||||
struct surface_state_free_list_entry *entry =
|
||||
|
@@ -83,7 +83,7 @@ EXTENSIONS = [
|
||||
Extension('VK_KHR_wayland_surface', 6, 'VK_USE_PLATFORM_WAYLAND_KHR'),
|
||||
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
|
||||
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
||||
Extension('VK_KHX_multiview', 1, False),
|
||||
Extension('VK_KHX_multiview', 1, True),
|
||||
Extension('VK_EXT_debug_report', 8, True),
|
||||
Extension('VK_EXT_external_memory_dma_buf', 1, True),
|
||||
]
|
||||
|
@@ -38,9 +38,10 @@ void anv_nir_lower_push_constants(nir_shader *shader);
|
||||
bool anv_nir_lower_multiview(nir_shader *shader, uint32_t view_mask);
|
||||
|
||||
bool anv_nir_lower_ycbcr_textures(nir_shader *shader,
|
||||
struct anv_pipeline *pipeline);
|
||||
struct anv_pipeline_layout *layout);
|
||||
|
||||
void anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
struct anv_pipeline_layout *layout,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map);
|
||||
|
@@ -147,7 +147,7 @@ lower_res_reindex_intrinsic(nir_intrinsic_instr *intrin,
|
||||
* array elements are sequential. A resource_reindex just turns into an
|
||||
* add of the two indices.
|
||||
*/
|
||||
assert(intrin->src[0].is_ssa && intrin->src[0].is_ssa);
|
||||
assert(intrin->src[0].is_ssa && intrin->src[1].is_ssa);
|
||||
nir_ssa_def *new_index = nir_iadd(b, intrin->src[0].ssa,
|
||||
intrin->src[1].ssa);
|
||||
|
||||
@@ -326,11 +326,11 @@ setup_vec4_uniform_value(uint32_t *params, uint32_t offset, unsigned n)
|
||||
|
||||
void
|
||||
anv_nir_apply_pipeline_layout(struct anv_pipeline *pipeline,
|
||||
struct anv_pipeline_layout *layout,
|
||||
nir_shader *shader,
|
||||
struct brw_stage_prog_data *prog_data,
|
||||
struct anv_pipeline_bind_map *map)
|
||||
{
|
||||
struct anv_pipeline_layout *layout = pipeline->layout;
|
||||
gl_shader_stage stage = shader->info.stage;
|
||||
|
||||
struct apply_pipeline_layout_state state = {
|
||||
|
@@ -316,13 +316,13 @@ swizzle_channel(struct isl_swizzle swizzle, unsigned channel)
|
||||
}
|
||||
|
||||
static bool
|
||||
try_lower_tex_ycbcr(struct anv_pipeline *pipeline,
|
||||
try_lower_tex_ycbcr(struct anv_pipeline_layout *layout,
|
||||
nir_builder *builder,
|
||||
nir_tex_instr *tex)
|
||||
{
|
||||
nir_variable *var = tex->texture->var;
|
||||
const struct anv_descriptor_set_layout *set_layout =
|
||||
pipeline->layout->set[var->data.descriptor_set].layout;
|
||||
layout->set[var->data.descriptor_set].layout;
|
||||
const struct anv_descriptor_set_binding_layout *binding =
|
||||
&set_layout->binding[var->data.binding];
|
||||
|
||||
@@ -440,7 +440,8 @@ try_lower_tex_ycbcr(struct anv_pipeline *pipeline,
|
||||
}
|
||||
|
||||
bool
|
||||
anv_nir_lower_ycbcr_textures(nir_shader *shader, struct anv_pipeline *pipeline)
|
||||
anv_nir_lower_ycbcr_textures(nir_shader *shader,
|
||||
struct anv_pipeline_layout *layout)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
@@ -458,7 +459,7 @@ anv_nir_lower_ycbcr_textures(nir_shader *shader, struct anv_pipeline *pipeline)
|
||||
continue;
|
||||
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
function_progress |= try_lower_tex_ycbcr(pipeline, &builder, tex);
|
||||
function_progress |= try_lower_tex_ycbcr(layout, &builder, tex);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -349,6 +349,7 @@ populate_cs_prog_key(const struct gen_device_info *devinfo,
|
||||
|
||||
static void
|
||||
anv_pipeline_hash_shader(struct anv_pipeline *pipeline,
|
||||
struct anv_pipeline_layout *layout,
|
||||
struct anv_shader_module *module,
|
||||
const char *entrypoint,
|
||||
gl_shader_stage stage,
|
||||
@@ -363,10 +364,8 @@ anv_pipeline_hash_shader(struct anv_pipeline *pipeline,
|
||||
_mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,
|
||||
sizeof(pipeline->subpass->view_mask));
|
||||
}
|
||||
if (pipeline->layout) {
|
||||
_mesa_sha1_update(&ctx, pipeline->layout->sha1,
|
||||
sizeof(pipeline->layout->sha1));
|
||||
}
|
||||
if (layout)
|
||||
_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));
|
||||
_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));
|
||||
_mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));
|
||||
_mesa_sha1_update(&ctx, &stage, sizeof(stage));
|
||||
@@ -382,6 +381,7 @@ anv_pipeline_hash_shader(struct anv_pipeline *pipeline,
|
||||
static nir_shader *
|
||||
anv_pipeline_compile(struct anv_pipeline *pipeline,
|
||||
void *mem_ctx,
|
||||
struct anv_pipeline_layout *layout,
|
||||
struct anv_shader_module *module,
|
||||
const char *entrypoint,
|
||||
gl_shader_stage stage,
|
||||
@@ -398,7 +398,7 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
||||
if (nir == NULL)
|
||||
return NULL;
|
||||
|
||||
NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, pipeline);
|
||||
NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);
|
||||
|
||||
NIR_PASS_V(nir, anv_nir_lower_push_constants);
|
||||
|
||||
@@ -438,8 +438,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
||||
pipeline->needs_data_cache = true;
|
||||
|
||||
/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */
|
||||
if (pipeline->layout)
|
||||
anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map);
|
||||
if (layout)
|
||||
anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map);
|
||||
|
||||
if (stage != MESA_SHADER_COMPUTE)
|
||||
brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges);
|
||||
@@ -508,8 +508,10 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
||||
|
||||
populate_vs_prog_key(&pipeline->device->info, &key);
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
if (cache) {
|
||||
anv_pipeline_hash_shader(pipeline, module, entrypoint,
|
||||
anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
|
||||
MESA_SHADER_VERTEX, spec_info,
|
||||
&key, sizeof(key), sha1);
|
||||
bin = anv_pipeline_cache_search(cache, sha1, 20);
|
||||
@@ -527,7 +529,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx,
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout,
|
||||
module, entrypoint,
|
||||
MESA_SHADER_VERTEX, spec_info,
|
||||
&prog_data.base.base, &map);
|
||||
@@ -633,11 +635,13 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
|
||||
populate_sampler_prog_key(&pipeline->device->info, &tes_key.tex);
|
||||
tcs_key.input_vertices = info->pTessellationState->patchControlPoints;
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
if (cache) {
|
||||
anv_pipeline_hash_shader(pipeline, tcs_module, tcs_entrypoint,
|
||||
anv_pipeline_hash_shader(pipeline, layout, tcs_module, tcs_entrypoint,
|
||||
MESA_SHADER_TESS_CTRL, tcs_spec_info,
|
||||
&tcs_key, sizeof(tcs_key), tcs_sha1);
|
||||
anv_pipeline_hash_shader(pipeline, tes_module, tes_entrypoint,
|
||||
anv_pipeline_hash_shader(pipeline, layout, tes_module, tes_entrypoint,
|
||||
MESA_SHADER_TESS_EVAL, tes_spec_info,
|
||||
&tes_key, sizeof(tes_key), tes_sha1);
|
||||
memcpy(&tcs_sha1[20], tes_sha1, 20);
|
||||
@@ -666,11 +670,13 @@ anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline,
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *tcs_nir =
|
||||
anv_pipeline_compile(pipeline, mem_ctx, tcs_module, tcs_entrypoint,
|
||||
anv_pipeline_compile(pipeline, mem_ctx, layout,
|
||||
tcs_module, tcs_entrypoint,
|
||||
MESA_SHADER_TESS_CTRL, tcs_spec_info,
|
||||
&tcs_prog_data.base.base, &tcs_map);
|
||||
nir_shader *tes_nir =
|
||||
anv_pipeline_compile(pipeline, mem_ctx, tes_module, tes_entrypoint,
|
||||
anv_pipeline_compile(pipeline, mem_ctx, layout,
|
||||
tes_module, tes_entrypoint,
|
||||
MESA_SHADER_TESS_EVAL, tes_spec_info,
|
||||
&tes_prog_data.base.base, &tes_map);
|
||||
if (tcs_nir == NULL || tes_nir == NULL) {
|
||||
@@ -771,8 +777,10 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
|
||||
|
||||
populate_gs_prog_key(&pipeline->device->info, &key);
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
if (cache) {
|
||||
anv_pipeline_hash_shader(pipeline, module, entrypoint,
|
||||
anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
|
||||
MESA_SHADER_GEOMETRY, spec_info,
|
||||
&key, sizeof(key), sha1);
|
||||
bin = anv_pipeline_cache_search(cache, sha1, 20);
|
||||
@@ -790,7 +798,7 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx,
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout,
|
||||
module, entrypoint,
|
||||
MESA_SHADER_GEOMETRY, spec_info,
|
||||
&prog_data.base.base, &map);
|
||||
@@ -849,8 +857,10 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
||||
|
||||
populate_wm_prog_key(pipeline, info, &key);
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
if (cache) {
|
||||
anv_pipeline_hash_shader(pipeline, module, entrypoint,
|
||||
anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
|
||||
MESA_SHADER_FRAGMENT, spec_info,
|
||||
&key, sizeof(key), sha1);
|
||||
bin = anv_pipeline_cache_search(cache, sha1, 20);
|
||||
@@ -868,7 +878,7 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx,
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout,
|
||||
module, entrypoint,
|
||||
MESA_SHADER_FRAGMENT, spec_info,
|
||||
&prog_data.base, &map);
|
||||
@@ -997,8 +1007,10 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
|
||||
|
||||
populate_cs_prog_key(&pipeline->device->info, &key);
|
||||
|
||||
ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);
|
||||
|
||||
if (cache) {
|
||||
anv_pipeline_hash_shader(pipeline, module, entrypoint,
|
||||
anv_pipeline_hash_shader(pipeline, layout, module, entrypoint,
|
||||
MESA_SHADER_COMPUTE, spec_info,
|
||||
&key, sizeof(key), sha1);
|
||||
bin = anv_pipeline_cache_search(cache, sha1, 20);
|
||||
@@ -1016,7 +1028,7 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
|
||||
|
||||
void *mem_ctx = ralloc_context(NULL);
|
||||
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx,
|
||||
nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout,
|
||||
module, entrypoint,
|
||||
MESA_SHADER_COMPUTE, spec_info,
|
||||
&prog_data.base, &map);
|
||||
@@ -1279,8 +1291,6 @@ anv_pipeline_init(struct anv_pipeline *pipeline,
|
||||
assert(pCreateInfo->subpass < render_pass->subpass_count);
|
||||
pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];
|
||||
|
||||
pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
|
||||
|
||||
result = anv_reloc_list_init(&pipeline->batch_relocs, alloc);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
@@ -1199,6 +1199,9 @@ struct anv_descriptor_set_binding_layout {
|
||||
};
|
||||
|
||||
struct anv_descriptor_set_layout {
|
||||
/* Descriptor set layouts can be destroyed at almost any time */
|
||||
uint32_t ref_cnt;
|
||||
|
||||
/* Number of bindings in this descriptor set */
|
||||
uint16_t binding_count;
|
||||
|
||||
@@ -1218,6 +1221,22 @@ struct anv_descriptor_set_layout {
|
||||
struct anv_descriptor_set_binding_layout binding[0];
|
||||
};
|
||||
|
||||
static inline void
|
||||
anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)
|
||||
{
|
||||
assert(layout && layout->ref_cnt >= 1);
|
||||
p_atomic_inc(&layout->ref_cnt);
|
||||
}
|
||||
|
||||
static inline void
|
||||
anv_descriptor_set_layout_unref(struct anv_device *device,
|
||||
struct anv_descriptor_set_layout *layout)
|
||||
{
|
||||
assert(layout && layout->ref_cnt >= 1);
|
||||
if (p_atomic_dec_zero(&layout->ref_cnt))
|
||||
vk_free(&device->alloc, layout);
|
||||
}
|
||||
|
||||
struct anv_descriptor {
|
||||
VkDescriptorType type;
|
||||
|
||||
@@ -1239,7 +1258,7 @@ struct anv_descriptor {
|
||||
};
|
||||
|
||||
struct anv_descriptor_set {
|
||||
const struct anv_descriptor_set_layout *layout;
|
||||
struct anv_descriptor_set_layout *layout;
|
||||
uint32_t size;
|
||||
uint32_t buffer_count;
|
||||
struct anv_buffer_view *buffer_views;
|
||||
@@ -1363,7 +1382,7 @@ anv_descriptor_set_write_template(struct anv_descriptor_set *set,
|
||||
VkResult
|
||||
anv_descriptor_set_create(struct anv_device *device,
|
||||
struct anv_descriptor_pool *pool,
|
||||
const struct anv_descriptor_set_layout *layout,
|
||||
struct anv_descriptor_set_layout *layout,
|
||||
struct anv_descriptor_set **out_set);
|
||||
|
||||
void
|
||||
@@ -1675,6 +1694,7 @@ struct anv_attachment_state {
|
||||
*/
|
||||
struct anv_cmd_pipeline_state {
|
||||
struct anv_pipeline *pipeline;
|
||||
struct anv_pipeline_layout *layout;
|
||||
|
||||
struct anv_descriptor_set *descriptors[MAX_SETS];
|
||||
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
|
||||
@@ -2124,7 +2144,6 @@ struct anv_pipeline {
|
||||
struct anv_dynamic_state dynamic_state;
|
||||
|
||||
struct anv_subpass * subpass;
|
||||
struct anv_pipeline_layout * layout;
|
||||
|
||||
bool needs_data_cache;
|
||||
|
||||
|
@@ -1529,7 +1529,6 @@ anv_descriptor_for_binding(const struct anv_cmd_pipeline_state *pipe_state,
|
||||
|
||||
static uint32_t
|
||||
dynamic_offset_for_binding(const struct anv_cmd_pipeline_state *pipe_state,
|
||||
const struct anv_pipeline *pipeline,
|
||||
const struct anv_pipeline_binding *binding)
|
||||
{
|
||||
assert(binding->set < MAX_SETS);
|
||||
@@ -1537,7 +1536,7 @@ dynamic_offset_for_binding(const struct anv_cmd_pipeline_state *pipe_state,
|
||||
pipe_state->descriptors[binding->set];
|
||||
|
||||
uint32_t dynamic_offset_idx =
|
||||
pipeline->layout->set[binding->set].dynamic_offset_start +
|
||||
pipe_state->layout->set[binding->set].dynamic_offset_start +
|
||||
set->layout->binding[binding->binding].dynamic_offset_index +
|
||||
binding->index;
|
||||
|
||||
@@ -1725,7 +1724,7 @@ emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
|
||||
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
/* Compute the offset within the buffer */
|
||||
uint32_t dynamic_offset =
|
||||
dynamic_offset_for_binding(pipe_state, pipeline, binding);
|
||||
dynamic_offset_for_binding(pipe_state, binding);
|
||||
uint64_t offset = desc->offset + dynamic_offset;
|
||||
/* Clamp to the buffer size */
|
||||
offset = MIN2(offset, desc->buffer->size);
|
||||
@@ -2000,8 +1999,7 @@ cmd_buffer_flush_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
||||
assert(desc->type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC);
|
||||
|
||||
uint32_t dynamic_offset =
|
||||
dynamic_offset_for_binding(&gfx_state->base,
|
||||
pipeline, binding);
|
||||
dynamic_offset_for_binding(&gfx_state->base, binding);
|
||||
uint32_t buf_offset =
|
||||
MIN2(desc->offset + dynamic_offset, desc->buffer->size);
|
||||
uint32_t buf_range =
|
||||
@@ -3208,17 +3206,6 @@ genX(cmd_buffer_set_subpass)(struct anv_cmd_buffer *cmd_buffer,
|
||||
if (GEN_GEN == 7)
|
||||
cmd_buffer->state.gfx.vb_dirty |= ~0;
|
||||
|
||||
/* It is possible to start a render pass with an old pipeline. Because the
|
||||
* render pass and subpass index are both baked into the pipeline, this is
|
||||
* highly unlikely. In order to do so, it requires that you have a render
|
||||
* pass with a single subpass and that you use that render pass twice
|
||||
* back-to-back and use the same pipeline at the start of the second render
|
||||
* pass as at the end of the first. In order to avoid unpredictable issues
|
||||
* with this edge case, we just dirty the pipeline at the start of every
|
||||
* subpass.
|
||||
*/
|
||||
cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_PIPELINE;
|
||||
|
||||
/* Perform transitions to the subpass layout before any writes have
|
||||
* occurred.
|
||||
*/
|
||||
|
@@ -1756,7 +1756,6 @@ compute_pipeline_create(
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
pipeline->device = device;
|
||||
pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout);
|
||||
|
||||
pipeline->blend_state.map = NULL;
|
||||
|
||||
|
@@ -110,8 +110,7 @@ static char *loader_get_dri_config_device_id(void)
|
||||
|
||||
static char *drm_construct_id_path_tag(drmDevicePtr device)
|
||||
{
|
||||
/* Length of "pci-xxxx_xx_xx_x\0" */
|
||||
#define PCI_ID_PATH_TAG_LENGTH 17
|
||||
#define PCI_ID_PATH_TAG_LENGTH sizeof("pci-xxxx_xx_xx_x")
|
||||
char *tag = NULL;
|
||||
|
||||
if (device->bustype == DRM_BUS_PCI) {
|
||||
|
@@ -2,9 +2,6 @@
|
||||
#
|
||||
# mapi may be used in several ways
|
||||
#
|
||||
# - In default mode, mapi implements the interface defined by mapi.h. To use
|
||||
# this mode, compile MAPI_FILES.
|
||||
#
|
||||
# - In util mode, mapi provides utility functions for use with glapi. To use
|
||||
# this mode, compile MAPI_UTIL_FILES with MAPI_MODE_UTIL defined.
|
||||
#
|
||||
@@ -30,14 +27,6 @@ MAPI_BRIDGE_FILES = \
|
||||
entry_ppc64le_tsd.h \
|
||||
mapi_tmp.h
|
||||
|
||||
MAPI_FILES = \
|
||||
entry.c \
|
||||
stub.c \
|
||||
stub.h \
|
||||
table.c \
|
||||
table.h \
|
||||
$(MAPI_UTIL_FILES)
|
||||
|
||||
MAPI_GLAPI_FILES = \
|
||||
entry.c \
|
||||
mapi_glapi.c \
|
||||
|
@@ -56,7 +56,6 @@ header = """/* GLXEXT is the define used in the xserver when the GLX extension i
|
||||
#endif
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "main/glheader.h"
|
||||
|
||||
@@ -145,19 +144,6 @@ _glapi_create_table_from_handle(void *handle, const char *symbol_prefix) {
|
||||
|
||||
return disp;
|
||||
}
|
||||
|
||||
void
|
||||
_glapi_table_patch(struct _glapi_table *table, const char *name, void *wrapper)
|
||||
{
|
||||
for (int func_index = 0; func_index < GLAPI_TABLE_COUNT; ++func_index) {
|
||||
if (!strcmp(_glapi_table_func_names[func_index], name)) {
|
||||
((void **)table)[func_index] = wrapper;
|
||||
return;
|
||||
}
|
||||
}
|
||||
fprintf(stderr, "could not patch %s in dispatch table\\n", name);
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
|
||||
|
@@ -161,9 +161,6 @@ _glapi_get_proc_name(unsigned int offset);
|
||||
#if defined(GLX_USE_APPLEGL) || defined(GLX_USE_WINDOWSGL)
|
||||
_GLAPI_EXPORT struct _glapi_table *
|
||||
_glapi_create_table_from_handle(void *handle, const char *symbol_prefix);
|
||||
|
||||
_GLAPI_EXPORT void
|
||||
_glapi_table_patch(struct _glapi_table *, const char *name, void *wrapper);
|
||||
#endif
|
||||
|
||||
|
||||
|
@@ -27,25 +27,12 @@
|
||||
#define _GLAPI_PRIV_H
|
||||
|
||||
|
||||
#ifdef HAVE_DIX_CONFIG_H
|
||||
#include <dix-config.h>
|
||||
#include "glapi/mesa.h"
|
||||
#else /* HAVE_DIX_CONFIG_H */
|
||||
#define GL_GLEXT_PROTOTYPES
|
||||
#include "GL/gl.h"
|
||||
#include "GL/glext.h"
|
||||
|
||||
#ifndef GL_OES_fixed_point
|
||||
typedef int GLfixed;
|
||||
#endif
|
||||
typedef int GLclampx;
|
||||
|
||||
#ifndef GL_OES_EGL_image
|
||||
typedef void *GLeglImageOES;
|
||||
#endif
|
||||
|
||||
#endif /* HAVE_DIX_CONFIG_H */
|
||||
|
||||
#include "glapi/glapi.h"
|
||||
|
||||
|
||||
|
@@ -393,7 +393,6 @@ VBO_FILES = \
|
||||
vbo/vbo_attrib.h \
|
||||
vbo/vbo_attrib_tmp.h \
|
||||
vbo/vbo_context.c \
|
||||
vbo/vbo_context.h \
|
||||
vbo/vbo_exec_api.c \
|
||||
vbo/vbo_exec_array.c \
|
||||
vbo/vbo_exec.c \
|
||||
@@ -405,6 +404,7 @@ VBO_FILES = \
|
||||
vbo/vbo_noop.c \
|
||||
vbo/vbo_noop.h \
|
||||
vbo/vbo_primitive_restart.c \
|
||||
vbo/vbo_private.h \
|
||||
vbo/vbo_rebase.c \
|
||||
vbo/vbo_save_api.c \
|
||||
vbo/vbo_save.c \
|
||||
|
@@ -277,7 +277,7 @@ _mesa_init_driver_state(struct gl_context *ctx)
|
||||
}
|
||||
|
||||
ctx->Driver.LineWidth(ctx, ctx->Line.Width);
|
||||
ctx->Driver.LogicOpcode(ctx, ctx->Color.LogicOp);
|
||||
ctx->Driver.LogicOpcode(ctx, ctx->Color._LogicOp);
|
||||
ctx->Driver.PointSize(ctx, ctx->Point.Size);
|
||||
ctx->Driver.PolygonStipple(ctx, (const GLubyte *) ctx->PolygonStipple);
|
||||
ctx->Driver.Scissor(ctx);
|
||||
|
@@ -57,7 +57,7 @@ mesa_dri_drivers_la_LDFLAGS = \
|
||||
-module \
|
||||
-no-undefined \
|
||||
-avoid-version \
|
||||
$(LD_BUILD_ID) \
|
||||
-Wl,--build-id=sha1 \
|
||||
$(BSYMBOLIC) \
|
||||
$(GC_SECTIONS) \
|
||||
$(LD_NO_UNDEFINED)
|
||||
|
@@ -573,16 +573,16 @@ i830Scissor(struct gl_context * ctx)
|
||||
}
|
||||
|
||||
static void
|
||||
i830LogicOp(struct gl_context * ctx, GLenum opcode)
|
||||
i830LogicOp(struct gl_context * ctx, enum gl_logicop_mode opcode)
|
||||
{
|
||||
struct i830_context *i830 = i830_context(ctx);
|
||||
int tmp = intel_translate_logic_op(opcode);
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
|
||||
assert((unsigned)opcode <= 15);
|
||||
I830_STATECHANGE(i830, I830_UPLOAD_CTX);
|
||||
i830->state.Ctx[I830_CTXREG_STATE4] &= ~LOGICOP_MASK;
|
||||
i830->state.Ctx[I830_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
|
||||
i830->state.Ctx[I830_CTXREG_STATE4] |= opcode;
|
||||
}
|
||||
|
||||
|
||||
|
@@ -539,16 +539,16 @@ i915Scissor(struct gl_context * ctx)
|
||||
}
|
||||
|
||||
static void
|
||||
i915LogicOp(struct gl_context * ctx, GLenum opcode)
|
||||
i915LogicOp(struct gl_context * ctx, enum gl_logicop_mode opcode)
|
||||
{
|
||||
struct i915_context *i915 = I915_CONTEXT(ctx);
|
||||
int tmp = intel_translate_logic_op(opcode);
|
||||
|
||||
DBG("%s\n", __func__);
|
||||
|
||||
|
||||
assert((unsigned)opcode <= 15);
|
||||
I915_STATECHANGE(i915, I915_UPLOAD_CTX);
|
||||
i915->state.Ctx[I915_CTXREG_STATE4] &= ~LOGICOP_MASK;
|
||||
i915->state.Ctx[I915_CTXREG_STATE4] |= LOGIC_OP_FUNC(tmp);
|
||||
i915->state.Ctx[I915_CTXREG_STATE4] |= LOGIC_OP_FUNC(opcode);
|
||||
}
|
||||
|
||||
|
||||
|
@@ -48,27 +48,9 @@ intel_miptree_set_alpha_to_one(struct intel_context *intel,
|
||||
struct intel_mipmap_tree *mt,
|
||||
int x, int y, int width, int height);
|
||||
|
||||
static GLuint translate_raster_op(GLenum logicop)
|
||||
static GLuint translate_raster_op(enum gl_logicop_mode logicop)
|
||||
{
|
||||
switch(logicop) {
|
||||
case GL_CLEAR: return 0x00;
|
||||
case GL_AND: return 0x88;
|
||||
case GL_AND_REVERSE: return 0x44;
|
||||
case GL_COPY: return 0xCC;
|
||||
case GL_AND_INVERTED: return 0x22;
|
||||
case GL_NOOP: return 0xAA;
|
||||
case GL_XOR: return 0x66;
|
||||
case GL_OR: return 0xEE;
|
||||
case GL_NOR: return 0x11;
|
||||
case GL_EQUIV: return 0x99;
|
||||
case GL_INVERT: return 0x55;
|
||||
case GL_OR_REVERSE: return 0xDD;
|
||||
case GL_COPY_INVERTED: return 0x33;
|
||||
case GL_OR_INVERTED: return 0xBB;
|
||||
case GL_NAND: return 0x77;
|
||||
case GL_SET: return 0xFF;
|
||||
default: return 0;
|
||||
}
|
||||
return logicop | (logicop << 4);
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
@@ -90,144 +72,23 @@ br13_for_cpp(int cpp)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a rectangular block transfer (blit) of pixels between two
|
||||
* miptrees.
|
||||
*
|
||||
* Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
|
||||
* but limited, pitches and sizes allowed.
|
||||
*
|
||||
* The src/dst coordinates are relative to the given level/slice of the
|
||||
* miptree.
|
||||
*
|
||||
* If @src_flip or @dst_flip is set, then the rectangle within that miptree
|
||||
* will be inverted (including scanline order) when copying. This is common
|
||||
* in GL when copying between window system and user-created
|
||||
* renderbuffers/textures.
|
||||
*/
|
||||
bool
|
||||
intel_miptree_blit(struct intel_context *intel,
|
||||
struct intel_mipmap_tree *src_mt,
|
||||
int src_level, int src_slice,
|
||||
uint32_t src_x, uint32_t src_y, bool src_flip,
|
||||
struct intel_mipmap_tree *dst_mt,
|
||||
int dst_level, int dst_slice,
|
||||
uint32_t dst_x, uint32_t dst_y, bool dst_flip,
|
||||
uint32_t width, uint32_t height,
|
||||
GLenum logicop)
|
||||
{
|
||||
/* No sRGB decode or encode is done by the hardware blitter, which is
|
||||
* consistent with what we want in the callers (glCopyTexSubImage(),
|
||||
* glBlitFramebuffer(), texture validation, etc.).
|
||||
*/
|
||||
mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
|
||||
mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
|
||||
|
||||
/* The blitter doesn't support doing any format conversions. We do also
|
||||
* support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
|
||||
* the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
|
||||
* channel to 1.0 at the end.
|
||||
*/
|
||||
if (src_format != dst_format &&
|
||||
((src_format != MESA_FORMAT_B8G8R8A8_UNORM &&
|
||||
src_format != MESA_FORMAT_B8G8R8X8_UNORM) ||
|
||||
(dst_format != MESA_FORMAT_B8G8R8A8_UNORM &&
|
||||
dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) {
|
||||
perf_debug("%s: Can't use hardware blitter from %s to %s, "
|
||||
"falling back.\n", __func__,
|
||||
_mesa_get_format_name(src_format),
|
||||
_mesa_get_format_name(dst_format));
|
||||
return false;
|
||||
}
|
||||
|
||||
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
|
||||
* Data Size Limitations):
|
||||
*
|
||||
* The BLT engine is capable of transferring very large quantities of
|
||||
* graphics data. Any graphics data read from and written to the
|
||||
* destination is permitted to represent a number of pixels that
|
||||
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
|
||||
* at the destination. The maximum number of pixels that may be
|
||||
* represented per scan line’s worth of graphics data depends on the
|
||||
* color depth.
|
||||
*
|
||||
* Furthermore, intelEmitCopyBlit (which is called below) uses a signed
|
||||
* 16-bit integer to represent buffer pitch, so it can only handle buffer
|
||||
* pitches < 32k.
|
||||
*
|
||||
* As a result of these two limitations, we can only use the blitter to do
|
||||
* this copy when the region's pitch is less than 32k.
|
||||
*/
|
||||
if (src_mt->region->pitch > 32768 ||
|
||||
dst_mt->region->pitch > 32768) {
|
||||
perf_debug("Falling back due to >32k pitch\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src_flip)
|
||||
src_y = src_mt->level[src_level].height - src_y - height;
|
||||
|
||||
if (dst_flip)
|
||||
dst_y = dst_mt->level[dst_level].height - dst_y - height;
|
||||
|
||||
int src_pitch = src_mt->region->pitch;
|
||||
if (src_flip != dst_flip)
|
||||
src_pitch = -src_pitch;
|
||||
|
||||
uint32_t src_image_x, src_image_y;
|
||||
intel_miptree_get_image_offset(src_mt, src_level, src_slice,
|
||||
&src_image_x, &src_image_y);
|
||||
src_x += src_image_x;
|
||||
src_y += src_image_y;
|
||||
|
||||
uint32_t dst_image_x, dst_image_y;
|
||||
intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
|
||||
&dst_image_x, &dst_image_y);
|
||||
dst_x += dst_image_x;
|
||||
dst_y += dst_image_y;
|
||||
|
||||
if (!intelEmitCopyBlit(intel,
|
||||
src_mt->cpp,
|
||||
src_pitch,
|
||||
src_mt->region->bo, src_mt->offset,
|
||||
src_mt->region->tiling,
|
||||
dst_mt->region->pitch,
|
||||
dst_mt->region->bo, dst_mt->offset,
|
||||
dst_mt->region->tiling,
|
||||
src_x, src_y,
|
||||
dst_x, dst_y,
|
||||
width, height,
|
||||
logicop)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM &&
|
||||
dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) {
|
||||
intel_miptree_set_alpha_to_one(intel, dst_mt,
|
||||
dst_x, dst_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Copy BitBlt
|
||||
*/
|
||||
bool
|
||||
intelEmitCopyBlit(struct intel_context *intel,
|
||||
GLuint cpp,
|
||||
GLshort src_pitch,
|
||||
drm_intel_bo *src_buffer,
|
||||
GLuint src_offset,
|
||||
uint32_t src_tiling,
|
||||
GLshort dst_pitch,
|
||||
drm_intel_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
uint32_t dst_tiling,
|
||||
GLshort src_x, GLshort src_y,
|
||||
GLshort dst_x, GLshort dst_y,
|
||||
GLshort w, GLshort h,
|
||||
GLenum logic_op)
|
||||
static bool
|
||||
emit_copy_blit(struct intel_context *intel,
|
||||
GLuint cpp,
|
||||
GLshort src_pitch,
|
||||
drm_intel_bo *src_buffer,
|
||||
GLuint src_offset,
|
||||
uint32_t src_tiling,
|
||||
GLshort dst_pitch,
|
||||
drm_intel_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
uint32_t dst_tiling,
|
||||
GLshort src_x, GLshort src_y,
|
||||
GLshort dst_x, GLshort dst_y,
|
||||
GLshort w, GLshort h,
|
||||
enum gl_logicop_mode logic_op)
|
||||
{
|
||||
GLuint CMD, BR13, pass = 0;
|
||||
int dst_y2 = dst_y + h;
|
||||
@@ -338,6 +199,126 @@ intelEmitCopyBlit(struct intel_context *intel,
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Implements a rectangular block transfer (blit) of pixels between two
|
||||
* miptrees.
|
||||
*
|
||||
* Our blitter can operate on 1, 2, or 4-byte-per-pixel data, with generous,
|
||||
* but limited, pitches and sizes allowed.
|
||||
*
|
||||
* The src/dst coordinates are relative to the given level/slice of the
|
||||
* miptree.
|
||||
*
|
||||
* If @src_flip or @dst_flip is set, then the rectangle within that miptree
|
||||
* will be inverted (including scanline order) when copying. This is common
|
||||
* in GL when copying between window system and user-created
|
||||
* renderbuffers/textures.
|
||||
*/
|
||||
bool
|
||||
intel_miptree_blit(struct intel_context *intel,
|
||||
struct intel_mipmap_tree *src_mt,
|
||||
int src_level, int src_slice,
|
||||
uint32_t src_x, uint32_t src_y, bool src_flip,
|
||||
struct intel_mipmap_tree *dst_mt,
|
||||
int dst_level, int dst_slice,
|
||||
uint32_t dst_x, uint32_t dst_y, bool dst_flip,
|
||||
uint32_t width, uint32_t height,
|
||||
enum gl_logicop_mode logicop)
|
||||
{
|
||||
/* No sRGB decode or encode is done by the hardware blitter, which is
|
||||
* consistent with what we want in the callers (glCopyTexSubImage(),
|
||||
* glBlitFramebuffer(), texture validation, etc.).
|
||||
*/
|
||||
mesa_format src_format = _mesa_get_srgb_format_linear(src_mt->format);
|
||||
mesa_format dst_format = _mesa_get_srgb_format_linear(dst_mt->format);
|
||||
|
||||
/* The blitter doesn't support doing any format conversions. We do also
|
||||
* support blitting ARGB8888 to XRGB8888 (trivial, the values dropped into
|
||||
* the X channel don't matter), and XRGB8888 to ARGB8888 by setting the A
|
||||
* channel to 1.0 at the end.
|
||||
*/
|
||||
if (src_format != dst_format &&
|
||||
((src_format != MESA_FORMAT_B8G8R8A8_UNORM &&
|
||||
src_format != MESA_FORMAT_B8G8R8X8_UNORM) ||
|
||||
(dst_format != MESA_FORMAT_B8G8R8A8_UNORM &&
|
||||
dst_format != MESA_FORMAT_B8G8R8X8_UNORM))) {
|
||||
perf_debug("%s: Can't use hardware blitter from %s to %s, "
|
||||
"falling back.\n", __func__,
|
||||
_mesa_get_format_name(src_format),
|
||||
_mesa_get_format_name(dst_format));
|
||||
return false;
|
||||
}
|
||||
|
||||
/* According to the Ivy Bridge PRM, Vol1 Part4, section 1.2.1.2 (Graphics
|
||||
* Data Size Limitations):
|
||||
*
|
||||
* The BLT engine is capable of transferring very large quantities of
|
||||
* graphics data. Any graphics data read from and written to the
|
||||
* destination is permitted to represent a number of pixels that
|
||||
* occupies up to 65,536 scan lines and up to 32,768 bytes per scan line
|
||||
* at the destination. The maximum number of pixels that may be
|
||||
* represented per scan line’s worth of graphics data depends on the
|
||||
* color depth.
|
||||
*
|
||||
* Furthermore, emit_copy_blit (which is called below) uses a signed
|
||||
* 16-bit integer to represent buffer pitch, so it can only handle buffer
|
||||
* pitches < 32k.
|
||||
*
|
||||
* As a result of these two limitations, we can only use the blitter to do
|
||||
* this copy when the region's pitch is less than 32k.
|
||||
*/
|
||||
if (src_mt->region->pitch > 32768 ||
|
||||
dst_mt->region->pitch > 32768) {
|
||||
perf_debug("Falling back due to >32k pitch\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src_flip)
|
||||
src_y = src_mt->level[src_level].height - src_y - height;
|
||||
|
||||
if (dst_flip)
|
||||
dst_y = dst_mt->level[dst_level].height - dst_y - height;
|
||||
|
||||
int src_pitch = src_mt->region->pitch;
|
||||
if (src_flip != dst_flip)
|
||||
src_pitch = -src_pitch;
|
||||
|
||||
uint32_t src_image_x, src_image_y;
|
||||
intel_miptree_get_image_offset(src_mt, src_level, src_slice,
|
||||
&src_image_x, &src_image_y);
|
||||
src_x += src_image_x;
|
||||
src_y += src_image_y;
|
||||
|
||||
uint32_t dst_image_x, dst_image_y;
|
||||
intel_miptree_get_image_offset(dst_mt, dst_level, dst_slice,
|
||||
&dst_image_x, &dst_image_y);
|
||||
dst_x += dst_image_x;
|
||||
dst_y += dst_image_y;
|
||||
|
||||
if (!emit_copy_blit(intel,
|
||||
src_mt->cpp,
|
||||
src_pitch,
|
||||
src_mt->region->bo, src_mt->offset,
|
||||
src_mt->region->tiling,
|
||||
dst_mt->region->pitch,
|
||||
dst_mt->region->bo, dst_mt->offset,
|
||||
dst_mt->region->tiling,
|
||||
src_x, src_y,
|
||||
dst_x, dst_y,
|
||||
width, height,
|
||||
logicop)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (src_mt->format == MESA_FORMAT_B8G8R8X8_UNORM &&
|
||||
dst_mt->format == MESA_FORMAT_B8G8R8A8_UNORM) {
|
||||
intel_miptree_set_alpha_to_one(intel, dst_mt,
|
||||
dst_x, dst_y,
|
||||
width, height);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use blitting to clear the renderbuffers named by 'flags'.
|
||||
@@ -523,7 +504,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
|
||||
uint32_t dst_tiling,
|
||||
GLshort x, GLshort y,
|
||||
GLshort w, GLshort h,
|
||||
GLenum logic_op)
|
||||
enum gl_logicop_mode logic_op)
|
||||
{
|
||||
int dwords = ALIGN(src_size, 8) / 4;
|
||||
uint32_t opcode, br13, blit_cmd;
|
||||
@@ -535,7 +516,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
|
||||
return false;
|
||||
}
|
||||
|
||||
assert((logic_op >= GL_CLEAR) && (logic_op <= (GL_CLEAR + 0x0f)));
|
||||
assert((unsigned)logic_op <= 0x0f);
|
||||
assert(dst_pitch > 0);
|
||||
|
||||
if (w < 0 || h < 0)
|
||||
@@ -607,13 +588,13 @@ intel_emit_linear_blit(struct intel_context *intel,
|
||||
*/
|
||||
pitch = ROUND_DOWN_TO(MIN2(size, (1 << 15) - 1), 4);
|
||||
height = (pitch == 0) ? 1 : size / pitch;
|
||||
ok = intelEmitCopyBlit(intel, 1,
|
||||
pitch, src_bo, src_offset, I915_TILING_NONE,
|
||||
pitch, dst_bo, dst_offset, I915_TILING_NONE,
|
||||
0, 0, /* src x/y */
|
||||
0, 0, /* dst x/y */
|
||||
pitch, height, /* w, h */
|
||||
GL_COPY);
|
||||
ok = emit_copy_blit(intel, 1,
|
||||
pitch, src_bo, src_offset, I915_TILING_NONE,
|
||||
pitch, dst_bo, dst_offset, I915_TILING_NONE,
|
||||
0, 0, /* src x/y */
|
||||
0, 0, /* dst x/y */
|
||||
pitch, height, /* w, h */
|
||||
COLOR_LOGICOP_COPY);
|
||||
if (!ok)
|
||||
_mesa_problem(ctx, "Failed to linear blit %dx%d\n", pitch, height);
|
||||
|
||||
@@ -623,13 +604,13 @@ intel_emit_linear_blit(struct intel_context *intel,
|
||||
assert (size < (1 << 15));
|
||||
pitch = ALIGN(size, 4);
|
||||
if (size != 0) {
|
||||
ok = intelEmitCopyBlit(intel, 1,
|
||||
pitch, src_bo, src_offset, I915_TILING_NONE,
|
||||
pitch, dst_bo, dst_offset, I915_TILING_NONE,
|
||||
0, 0, /* src x/y */
|
||||
0, 0, /* dst x/y */
|
||||
size, 1, /* w, h */
|
||||
GL_COPY);
|
||||
ok = emit_copy_blit(intel, 1,
|
||||
pitch, src_bo, src_offset, I915_TILING_NONE,
|
||||
pitch, dst_bo, dst_offset, I915_TILING_NONE,
|
||||
0, 0, /* src x/y */
|
||||
0, 0, /* dst x/y */
|
||||
size, 1, /* w, h */
|
||||
COLOR_LOGICOP_COPY);
|
||||
if (!ok)
|
||||
_mesa_problem(ctx, "Failed to linear blit %dx%d\n", size, 1);
|
||||
}
|
||||
|
@@ -35,22 +35,6 @@ extern void intelCopyBuffer(const __DRIdrawable * dpriv,
|
||||
|
||||
extern GLbitfield intelClearWithBlit(struct gl_context * ctx, GLbitfield mask);
|
||||
|
||||
bool
|
||||
intelEmitCopyBlit(struct intel_context *intel,
|
||||
GLuint cpp,
|
||||
GLshort src_pitch,
|
||||
drm_intel_bo *src_buffer,
|
||||
GLuint src_offset,
|
||||
uint32_t src_tiling,
|
||||
GLshort dst_pitch,
|
||||
drm_intel_bo *dst_buffer,
|
||||
GLuint dst_offset,
|
||||
uint32_t dst_tiling,
|
||||
GLshort srcx, GLshort srcy,
|
||||
GLshort dstx, GLshort dsty,
|
||||
GLshort w, GLshort h,
|
||||
GLenum logicop );
|
||||
|
||||
bool intel_miptree_blit(struct intel_context *intel,
|
||||
struct intel_mipmap_tree *src_mt,
|
||||
int src_level, int src_slice,
|
||||
@@ -59,7 +43,7 @@ bool intel_miptree_blit(struct intel_context *intel,
|
||||
int dst_level, int dst_slice,
|
||||
uint32_t dst_x, uint32_t dst_y, bool dst_flip,
|
||||
uint32_t width, uint32_t height,
|
||||
GLenum logicop);
|
||||
enum gl_logicop_mode logicop);
|
||||
|
||||
bool
|
||||
intelEmitImmediateColorExpandBlit(struct intel_context *intel,
|
||||
@@ -72,7 +56,7 @@ intelEmitImmediateColorExpandBlit(struct intel_context *intel,
|
||||
uint32_t dst_tiling,
|
||||
GLshort x, GLshort y,
|
||||
GLshort w, GLshort h,
|
||||
GLenum logic_op);
|
||||
enum gl_logicop_mode logic_op);
|
||||
void intel_emit_linear_blit(struct intel_context *intel,
|
||||
drm_intel_bo *dst_bo,
|
||||
unsigned int dst_offset,
|
||||
|
@@ -421,7 +421,6 @@ extern int intel_translate_shadow_compare_func(GLenum func);
|
||||
extern int intel_translate_compare_func(GLenum func);
|
||||
extern int intel_translate_stencil_op(GLenum op);
|
||||
extern int intel_translate_blend_factor(GLenum factor);
|
||||
extern int intel_translate_logic_op(GLenum opcode);
|
||||
|
||||
void intel_update_renderbuffers(__DRIcontext *context,
|
||||
__DRIdrawable *drawable);
|
||||
|
@@ -287,7 +287,7 @@ intel_image_target_renderbuffer_storage(struct gl_context *ctx,
|
||||
* intel_process_dri2_buffer().
|
||||
*/
|
||||
static GLboolean
|
||||
intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
|
||||
intel_alloc_window_storage(UNUSED struct gl_context *ctx, struct gl_renderbuffer *rb,
|
||||
GLenum internalFormat, GLuint width, GLuint height)
|
||||
{
|
||||
assert(rb->Name == 0);
|
||||
@@ -300,8 +300,10 @@ intel_alloc_window_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
|
||||
|
||||
/** Dummy function for gl_renderbuffer::AllocStorage() */
|
||||
static GLboolean
|
||||
intel_nop_alloc_storage(struct gl_context * ctx, struct gl_renderbuffer *rb,
|
||||
GLenum internalFormat, GLuint width, GLuint height)
|
||||
intel_nop_alloc_storage(UNUSED struct gl_context *ctx,
|
||||
UNUSED struct gl_renderbuffer *rb,
|
||||
UNUSED GLenum internalFormat,
|
||||
UNUSED GLuint width, UNUSED GLuint height)
|
||||
{
|
||||
_mesa_problem(ctx, "intel_op_alloc_storage should never be called.");
|
||||
return false;
|
||||
@@ -393,7 +395,8 @@ intel_new_renderbuffer(struct gl_context * ctx, GLuint name)
|
||||
*/
|
||||
static void
|
||||
intel_bind_framebuffer(struct gl_context * ctx, GLenum target,
|
||||
struct gl_framebuffer *fb, struct gl_framebuffer *fbread)
|
||||
UNUSED struct gl_framebuffer *fb,
|
||||
UNUSED struct gl_framebuffer *fbread)
|
||||
{
|
||||
if (target == GL_FRAMEBUFFER_EXT || target == GL_DRAW_FRAMEBUFFER_EXT) {
|
||||
intel_draw_buffer(ctx);
|
||||
@@ -419,8 +422,7 @@ intel_framebuffer_renderbuffer(struct gl_context * ctx,
|
||||
}
|
||||
|
||||
static bool
|
||||
intel_renderbuffer_update_wrapper(struct intel_context *intel,
|
||||
struct intel_renderbuffer *irb,
|
||||
intel_renderbuffer_update_wrapper(struct intel_renderbuffer *irb,
|
||||
struct gl_texture_image *image,
|
||||
uint32_t layer)
|
||||
{
|
||||
@@ -468,7 +470,6 @@ intel_render_texture(struct gl_context * ctx,
|
||||
struct gl_framebuffer *fb,
|
||||
struct gl_renderbuffer_attachment *att)
|
||||
{
|
||||
struct intel_context *intel = intel_context(ctx);
|
||||
struct gl_renderbuffer *rb = att->Renderbuffer;
|
||||
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
|
||||
struct gl_texture_image *image = rb->TexImage;
|
||||
@@ -495,7 +496,7 @@ intel_render_texture(struct gl_context * ctx,
|
||||
|
||||
intel_miptree_check_level_layer(mt, att->TextureLevel, layer);
|
||||
|
||||
if (!intel_renderbuffer_update_wrapper(intel, irb, image, layer)) {
|
||||
if (!intel_renderbuffer_update_wrapper(irb, image, layer)) {
|
||||
_swrast_render_texture(ctx, fb, att);
|
||||
return;
|
||||
}
|
||||
@@ -641,7 +642,7 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
|
||||
GLint srcX1, GLint srcY1,
|
||||
GLint dstX0, GLint dstY0,
|
||||
GLint dstX1, GLint dstY1,
|
||||
GLbitfield mask, GLenum filter)
|
||||
GLbitfield mask)
|
||||
{
|
||||
struct intel_context *intel = intel_context(ctx);
|
||||
|
||||
@@ -714,7 +715,7 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
|
||||
dst_irb->mt,
|
||||
dst_irb->mt_level, dst_irb->mt_layer,
|
||||
dstX0, dstY0, dst_rb->Name == 0,
|
||||
dstX1 - dstX0, dstY1 - dstY0, GL_COPY)) {
|
||||
dstX1 - dstX0, dstY1 - dstY0, COLOR_LOGICOP_COPY)) {
|
||||
perf_debug("glBlitFramebuffer(): unknown blit failure. "
|
||||
"Falling back to software rendering.\n");
|
||||
return mask;
|
||||
@@ -739,7 +740,7 @@ intel_blit_framebuffer(struct gl_context *ctx,
|
||||
mask = intel_blit_framebuffer_with_blitter(ctx, readFb, drawFb,
|
||||
srcX0, srcY0, srcX1, srcY1,
|
||||
dstX0, dstY0, dstX1, dstY1,
|
||||
mask, filter);
|
||||
mask);
|
||||
if (mask == 0x0)
|
||||
return;
|
||||
|
||||
|
@@ -620,7 +620,7 @@ intel_miptree_copy_slice(struct intel_context *intel,
|
||||
if (!intel_miptree_blit(intel,
|
||||
src_mt, level, slice, 0, 0, false,
|
||||
dst_mt, level, slice, 0, 0, false,
|
||||
width, height, GL_COPY)) {
|
||||
width, height, COLOR_LOGICOP_COPY)) {
|
||||
perf_debug("miptree validate blit for %s failed\n",
|
||||
_mesa_get_format_name(format));
|
||||
|
||||
@@ -757,7 +757,7 @@ intel_miptree_map_blit(struct intel_context *intel,
|
||||
map->x, map->y, false,
|
||||
map->mt, 0, 0,
|
||||
0, 0, false,
|
||||
map->w, map->h, GL_COPY)) {
|
||||
map->w, map->h, COLOR_LOGICOP_COPY)) {
|
||||
fprintf(stderr, "Failed to blit\n");
|
||||
goto fail;
|
||||
}
|
||||
@@ -795,7 +795,7 @@ intel_miptree_unmap_blit(struct intel_context *intel,
|
||||
0, 0, false,
|
||||
mt, level, slice,
|
||||
map->x, map->y, false,
|
||||
map->w, map->h, GL_COPY);
|
||||
map->w, map->h, COLOR_LOGICOP_COPY);
|
||||
WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
|
||||
}
|
||||
|
||||
|
@@ -262,8 +262,8 @@ do_blit_bitmap( struct gl_context *ctx,
|
||||
int h = MIN2(DY, height - py);
|
||||
int w = MIN2(DX, width - px);
|
||||
GLuint sz = ALIGN(ALIGN(w,8) * h, 64)/8;
|
||||
GLenum logic_op = ctx->Color.ColorLogicOpEnabled ?
|
||||
ctx->Color.LogicOp : GL_COPY;
|
||||
const enum gl_logicop_mode logic_op = ctx->Color.ColorLogicOpEnabled ?
|
||||
ctx->Color._LogicOp : COLOR_LOGICOP_COPY;
|
||||
|
||||
assert(sz <= sizeof(stipple));
|
||||
memset(stipple, 0, sz);
|
||||
|
@@ -176,7 +176,7 @@ do_blit_copypixels(struct gl_context * ctx,
|
||||
dstx, dsty, _mesa_is_winsys_fbo(fb),
|
||||
width, height,
|
||||
(ctx->Color.ColorLogicOpEnabled ?
|
||||
ctx->Color.LogicOp : GL_COPY))) {
|
||||
ctx->Color._LogicOp : COLOR_LOGICOP_COPY))) {
|
||||
DBG("%s: blit failure\n", __func__);
|
||||
return false;
|
||||
}
|
||||
|
@@ -141,7 +141,7 @@ do_blit_readpixels(struct gl_context * ctx,
|
||||
x, y, _mesa_is_winsys_fbo(ctx->ReadBuffer),
|
||||
pbo_mt, 0, 0,
|
||||
0, 0, dst_flip,
|
||||
width, height, GL_COPY)) {
|
||||
width, height, COLOR_LOGICOP_COPY)) {
|
||||
intel_miptree_release(&pbo_mt);
|
||||
return false;
|
||||
}
|
||||
|
@@ -117,7 +117,6 @@ struct intel_screen
|
||||
#define intel_check_front_buffer_rendering old_intel_check_front_buffer_rendering
|
||||
#define intelInitBufferFuncs old_intelInitBufferFuncs
|
||||
#define intelClearWithBlit old_intelClearWithBlit
|
||||
#define intelEmitCopyBlit old_intelEmitCopyBlit
|
||||
#define intelEmitImmediateColorExpandBlit old_intelEmitImmediateColorExpandBlit
|
||||
#define intel_emit_linear_blit old_intel_emit_linear_blit
|
||||
#define intel_miptree_blit old_intel_miptree_blit
|
||||
@@ -139,7 +138,6 @@ struct intel_screen
|
||||
#define get_time old_get_time
|
||||
#define intel_translate_blend_factor old_intel_translate_blend_factor
|
||||
#define intel_translate_compare_func old_intel_translate_compare_func
|
||||
#define intel_translate_logic_op old_intel_translate_logic_op
|
||||
#define intel_translate_shadow_compare_func old_intel_translate_shadow_compare_func
|
||||
#define intel_translate_stencil_op old_intel_translate_stencil_op
|
||||
#define intel_init_syncobj_functions old_intel_init_syncobj_functions
|
||||
|
@@ -151,44 +151,3 @@ intel_translate_blend_factor(GLenum factor)
|
||||
fprintf(stderr, "Unknown value in %s: %x\n", __func__, factor);
|
||||
return BLENDFACT_ZERO;
|
||||
}
|
||||
|
||||
int
|
||||
intel_translate_logic_op(GLenum opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case GL_CLEAR:
|
||||
return LOGICOP_CLEAR;
|
||||
case GL_AND:
|
||||
return LOGICOP_AND;
|
||||
case GL_AND_REVERSE:
|
||||
return LOGICOP_AND_RVRSE;
|
||||
case GL_COPY:
|
||||
return LOGICOP_COPY;
|
||||
case GL_COPY_INVERTED:
|
||||
return LOGICOP_COPY_INV;
|
||||
case GL_AND_INVERTED:
|
||||
return LOGICOP_AND_INV;
|
||||
case GL_NOOP:
|
||||
return LOGICOP_NOOP;
|
||||
case GL_XOR:
|
||||
return LOGICOP_XOR;
|
||||
case GL_OR:
|
||||
return LOGICOP_OR;
|
||||
case GL_OR_INVERTED:
|
||||
return LOGICOP_OR_INV;
|
||||
case GL_NOR:
|
||||
return LOGICOP_NOR;
|
||||
case GL_EQUIV:
|
||||
return LOGICOP_EQUIV;
|
||||
case GL_INVERT:
|
||||
return LOGICOP_INV;
|
||||
case GL_OR_REVERSE:
|
||||
return LOGICOP_OR_RVRSE;
|
||||
case GL_NAND:
|
||||
return LOGICOP_NAND;
|
||||
case GL_SET:
|
||||
return LOGICOP_SET;
|
||||
default:
|
||||
return LOGICOP_SET;
|
||||
}
|
||||
}
|
||||
|
@@ -70,7 +70,7 @@ intel_copy_texsubimage(struct intel_context *intel,
|
||||
intelImage->mt, intelImage->base.Base.Level,
|
||||
intelImage->base.Base.Face + slice,
|
||||
dstx, dsty, false,
|
||||
width, height, GL_COPY)) {
|
||||
width, height, COLOR_LOGICOP_COPY)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@@ -163,7 +163,7 @@ try_pbo_upload(struct gl_context *ctx,
|
||||
0, 0, false,
|
||||
intelImage->mt, image->Level, image->Face,
|
||||
0, 0, false,
|
||||
image->Width, image->Height, GL_COPY)) {
|
||||
image->Width, image->Height, COLOR_LOGICOP_COPY)) {
|
||||
DBG("%s: blit failed\n", __func__);
|
||||
intel_miptree_release(&pbo_mt);
|
||||
return false;
|
||||
|
@@ -111,7 +111,7 @@ intel_blit_texsubimage(struct gl_context * ctx,
|
||||
0, 0, false,
|
||||
intelImage->mt, texImage->Level, texImage->Face,
|
||||
xoffset, yoffset, false,
|
||||
width, height, GL_COPY);
|
||||
width, height, COLOR_LOGICOP_COPY);
|
||||
assert(ret);
|
||||
|
||||
intel_miptree_release(&temp_mt);
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user