Compare commits
58 Commits
mesa-25.0.
...
mesa-19.1.
Author | SHA1 | Date | |
---|---|---|---|
|
1dd62eb6e2 | ||
|
ab75e1e289 | ||
|
2153c3ae8e | ||
|
04e9d7bf8f | ||
|
857210b0dd | ||
|
6bac1a041d | ||
|
2040f10cb0 | ||
|
260f517d54 | ||
|
b6778c9f52 | ||
|
5d05324e65 | ||
|
8dbdeb27f3 | ||
|
dab3945ff3 | ||
|
d08fde8e7a | ||
|
f69eb770cd | ||
|
5bed00cf0f | ||
|
b551be82a7 | ||
|
7fa89fd959 | ||
|
5fcfcdb162 | ||
|
d70d8b2ffa | ||
|
558a067d17 | ||
|
51354d2bf5 | ||
|
06bf5428cf | ||
|
75ea0eeed1 | ||
|
8cf49e1662 | ||
|
c03d9a7fa9 | ||
|
9b51dcf1e2 | ||
|
914ac06e32 | ||
|
e2654c2379 | ||
|
bb845df961 | ||
|
f7c0ca6d38 | ||
|
38fdfdaff1 | ||
|
87722e0c42 | ||
|
f0e147bd47 | ||
|
1fc65774e9 | ||
|
349153f097 | ||
|
f8ec40e28b | ||
|
5e75803339 | ||
|
f1ab22209e | ||
|
e0c082d6eb | ||
|
a97f44ac1f | ||
|
5d7d13d227 | ||
|
4a7b0cc5e4 | ||
|
d95797de61 | ||
|
424b60dc70 | ||
|
9d610c1cc3 | ||
|
f770e81ba7 | ||
|
faa7daa55e | ||
|
fd27561c9d | ||
|
5d72a334e8 | ||
|
825ca9e42e | ||
|
67f2be0fbf | ||
|
05faf6eb56 | ||
|
3495bdca13 | ||
|
f93e1f92c4 | ||
|
8c0b80e08a | ||
|
070d763d5d | ||
|
ed0d4eaa4c | ||
|
6e52daa18c |
208
.travis.yml
208
.travis.yml
@@ -1,198 +1,40 @@
|
||||
language: c
|
||||
|
||||
dist: xenial
|
||||
os: osx
|
||||
|
||||
cache:
|
||||
apt: true
|
||||
ccache: true
|
||||
|
||||
env:
|
||||
global:
|
||||
- XORG_RELEASES=https://xorg.freedesktop.org/releases/individual
|
||||
- XCB_RELEASES=https://xcb.freedesktop.org/dist
|
||||
- WAYLAND_RELEASES=https://wayland.freedesktop.org/releases
|
||||
- XORGMACROS_VERSION=util-macros-1.19.0
|
||||
- GLPROTO_VERSION=glproto-1.4.17
|
||||
- DRI2PROTO_VERSION=dri2proto-2.8
|
||||
- LIBPCIACCESS_VERSION=libpciaccess-0.13.4
|
||||
- LIBDRM_VERSION=libdrm-2.4.97
|
||||
- XCBPROTO_VERSION=xcb-proto-1.13
|
||||
- RANDRPROTO_VERSION=randrproto-1.3.0
|
||||
- LIBXRANDR_VERSION=libXrandr-1.3.0
|
||||
- LIBXCB_VERSION=libxcb-1.13
|
||||
- LIBXSHMFENCE_VERSION=libxshmfence-1.2
|
||||
- LIBVDPAU_VERSION=libvdpau-1.1
|
||||
- LIBVA_VERSION=libva-1.7.0
|
||||
- LIBWAYLAND_VERSION=wayland-1.15.0
|
||||
- WAYLAND_PROTOCOLS_VERSION=wayland-protocols-1.8
|
||||
- PKG_CONFIG_PATH=$HOME/prefix/lib/pkgconfig:$HOME/prefix/share/pkgconfig
|
||||
- LD_LIBRARY_PATH="$HOME/prefix/lib:$LD_LIBRARY_PATH"
|
||||
- PATH="$HOME/prefix/bin:$PATH"
|
||||
|
||||
matrix:
|
||||
include:
|
||||
- env:
|
||||
- LABEL="macOS meson"
|
||||
- BUILD=meson
|
||||
- DRI_LOADERS="-Dplatforms=x11"
|
||||
- GALLIUM_DRIVERS=swrast
|
||||
os: osx
|
||||
- PKG_CONFIG_PATH=""
|
||||
|
||||
before_install:
|
||||
- |
|
||||
if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then
|
||||
HOMEBREW_NO_AUTO_UPDATE=1 brew install python3 ninja expat gettext
|
||||
# Set PATH for homebrew pip3 installs
|
||||
PATH="$HOME/Library/Python/3.6/bin:${PATH}"
|
||||
# Set PKG_CONFIG_PATH for keg-only expat
|
||||
PKG_CONFIG_PATH="/usr/local/opt/expat/lib/pkgconfig:${PKG_CONFIG_PATH}"
|
||||
# Set PATH for keg-only gettext
|
||||
PATH="/usr/local/opt/gettext/bin:${PATH}"
|
||||
- HOMEBREW_NO_AUTO_UPDATE=1 brew install python3 ninja expat gettext
|
||||
# Set PATH for homebrew pip3 installs
|
||||
- PATH="$HOME/Library/Python/3.6/bin:${PATH}"
|
||||
# Set PKG_CONFIG_PATH for keg-only expat
|
||||
- PKG_CONFIG_PATH="/usr/local/opt/expat/lib/pkgconfig:${PKG_CONFIG_PATH}"
|
||||
# Set PATH for keg-only gettext
|
||||
- PATH="/usr/local/opt/gettext/bin:${PATH}"
|
||||
|
||||
# Install xquartz for prereqs ...
|
||||
XQUARTZ_VERSION="2.7.11"
|
||||
wget -nv https://dl.bintray.com/xquartz/downloads/XQuartz-${XQUARTZ_VERSION}.dmg
|
||||
hdiutil attach XQuartz-${XQUARTZ_VERSION}.dmg
|
||||
sudo installer -pkg /Volumes/XQuartz-${XQUARTZ_VERSION}/XQuartz.pkg -target /
|
||||
hdiutil detach /Volumes/XQuartz-${XQUARTZ_VERSION}
|
||||
# ... and set paths
|
||||
PATH="/opt/X11/bin:${PATH}"
|
||||
PKG_CONFIG_PATH="/opt/X11/share/pkgconfig:/opt/X11/lib/pkgconfig:${PKG_CONFIG_PATH}"
|
||||
ACLOCAL="aclocal -I /opt/X11/share/aclocal -I /usr/local/share/aclocal"
|
||||
fi
|
||||
# Install xquartz for prereqs ...
|
||||
- XQUARTZ_VERSION="2.7.11"
|
||||
- wget -nv https://dl.bintray.com/xquartz/downloads/XQuartz-${XQUARTZ_VERSION}.dmg
|
||||
- hdiutil attach XQuartz-${XQUARTZ_VERSION}.dmg
|
||||
- sudo installer -pkg /Volumes/XQuartz-${XQUARTZ_VERSION}/XQuartz.pkg -target /
|
||||
- hdiutil detach /Volumes/XQuartz-${XQUARTZ_VERSION}
|
||||
# ... and set paths
|
||||
- PKG_CONFIG_PATH="/opt/X11/share/pkgconfig:/opt/X11/lib/pkgconfig:${PKG_CONFIG_PATH}"
|
||||
|
||||
install:
|
||||
# Install a more modern meson from pip, since the version in the
|
||||
# ubuntu repos is often quite old.
|
||||
- if test "x$BUILD" = xmeson; then
|
||||
pip3 install --user meson;
|
||||
pip3 install --user mako;
|
||||
fi
|
||||
|
||||
# Install dependencies where we require specific versions (or where
|
||||
# disallowed by Travis CI's package whitelisting).
|
||||
|
||||
- |
|
||||
if [[ "$TRAVIS_OS_NAME" == "linux" ]]; then
|
||||
wget $XORG_RELEASES/util/$XORGMACROS_VERSION.tar.bz2
|
||||
tar -jxvf $XORGMACROS_VERSION.tar.bz2
|
||||
(cd $XORGMACROS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/proto/$GLPROTO_VERSION.tar.bz2
|
||||
tar -jxvf $GLPROTO_VERSION.tar.bz2
|
||||
(cd $GLPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/proto/$DRI2PROTO_VERSION.tar.bz2
|
||||
tar -jxvf $DRI2PROTO_VERSION.tar.bz2
|
||||
(cd $DRI2PROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XCB_RELEASES/$XCBPROTO_VERSION.tar.bz2
|
||||
tar -jxvf $XCBPROTO_VERSION.tar.bz2
|
||||
(cd $XCBPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XCB_RELEASES/$LIBXCB_VERSION.tar.bz2
|
||||
tar -jxvf $LIBXCB_VERSION.tar.bz2
|
||||
(cd $LIBXCB_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/lib/$LIBPCIACCESS_VERSION.tar.bz2
|
||||
tar -jxvf $LIBPCIACCESS_VERSION.tar.bz2
|
||||
(cd $LIBPCIACCESS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget https://dri.freedesktop.org/libdrm/$LIBDRM_VERSION.tar.bz2
|
||||
tar -jxvf $LIBDRM_VERSION.tar.bz2
|
||||
(cd $LIBDRM_VERSION && ./configure --prefix=$HOME/prefix --enable-vc4 --enable-freedreno --enable-etnaviv-experimental-api && make install)
|
||||
|
||||
wget $XORG_RELEASES/proto/$RANDRPROTO_VERSION.tar.bz2
|
||||
tar -jxvf $RANDRPROTO_VERSION.tar.bz2
|
||||
(cd $RANDRPROTO_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/lib/$LIBXRANDR_VERSION.tar.bz2
|
||||
tar -jxvf $LIBXRANDR_VERSION.tar.bz2
|
||||
(cd $LIBXRANDR_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget $XORG_RELEASES/lib/$LIBXSHMFENCE_VERSION.tar.bz2
|
||||
tar -jxvf $LIBXSHMFENCE_VERSION.tar.bz2
|
||||
(cd $LIBXSHMFENCE_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget https://people.freedesktop.org/~aplattner/vdpau/$LIBVDPAU_VERSION.tar.bz2
|
||||
tar -jxvf $LIBVDPAU_VERSION.tar.bz2
|
||||
(cd $LIBVDPAU_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
wget https://www.freedesktop.org/software/vaapi/releases/libva/$LIBVA_VERSION.tar.bz2
|
||||
tar -jxvf $LIBVA_VERSION.tar.bz2
|
||||
(cd $LIBVA_VERSION && ./configure --prefix=$HOME/prefix --disable-wayland --disable-dummy-driver && make install)
|
||||
|
||||
wget $WAYLAND_RELEASES/$LIBWAYLAND_VERSION.tar.xz
|
||||
tar -axvf $LIBWAYLAND_VERSION.tar.xz
|
||||
(cd $LIBWAYLAND_VERSION && ./configure --prefix=$HOME/prefix --enable-libraries --without-host-scanner --disable-documentation --disable-dtd-validation && make install)
|
||||
|
||||
wget $WAYLAND_RELEASES/$WAYLAND_PROTOCOLS_VERSION.tar.xz
|
||||
tar -axvf $WAYLAND_PROTOCOLS_VERSION.tar.xz
|
||||
(cd $WAYLAND_PROTOCOLS_VERSION && ./configure --prefix=$HOME/prefix && make install)
|
||||
|
||||
# Meson requires ninja >= 1.6, but xenial has 1.3.x
|
||||
wget https://github.com/ninja-build/ninja/releases/download/v1.6.0/ninja-linux.zip
|
||||
unzip ninja-linux.zip
|
||||
mv ninja $HOME/prefix/bin/
|
||||
|
||||
# Generate this header since one is missing on the Travis instance
|
||||
mkdir -p linux
|
||||
printf "%s\n" \
|
||||
"#ifndef _LINUX_MEMFD_H" \
|
||||
"#define _LINUX_MEMFD_H" \
|
||||
"" \
|
||||
"#define MFD_CLOEXEC 0x0001U" \
|
||||
"#define MFD_ALLOW_SEALING 0x0002U" \
|
||||
"" \
|
||||
"#endif /* _LINUX_MEMFD_H */" > linux/memfd.h
|
||||
|
||||
# Generate this header, including the missing SYS_memfd_create
|
||||
# macro, which is not provided by the header in the Travis
|
||||
# instance
|
||||
mkdir -p sys
|
||||
printf "%s\n" \
|
||||
"#ifndef _SYSCALL_H" \
|
||||
"#define _SYSCALL_H 1" \
|
||||
"" \
|
||||
"#include <asm/unistd.h>" \
|
||||
"" \
|
||||
"#ifndef _LIBC" \
|
||||
"# include <bits/syscall.h>" \
|
||||
"#endif" \
|
||||
"" \
|
||||
"#ifndef __NR_memfd_create" \
|
||||
"# define __NR_memfd_create 319 /* Taken from <asm/unistd_64.h> */" \
|
||||
"#endif" \
|
||||
"" \
|
||||
"#ifndef SYS_memfd_create" \
|
||||
"# define SYS_memfd_create __NR_memfd_create" \
|
||||
"#endif" \
|
||||
"" \
|
||||
"#endif" > sys/syscall.h
|
||||
fi
|
||||
- pip3 install --user meson
|
||||
- pip3 install --user mako
|
||||
|
||||
script:
|
||||
if test "x$BUILD" = xmeson; then
|
||||
if test -n "$LLVM_CONFIG"; then
|
||||
# We need to control the version of llvm-config we're using, so we'll
|
||||
# generate a native file to do so. This requires meson >=0.49
|
||||
#
|
||||
echo -e "[binaries]\nllvm-config = '`which $LLVM_CONFIG`'" > native.file
|
||||
|
||||
$LLVM_CONFIG --version
|
||||
else
|
||||
: > native.file
|
||||
fi
|
||||
|
||||
export CFLAGS="$CFLAGS -isystem`pwd`"
|
||||
meson _build \
|
||||
--native-file=native.file \
|
||||
-Dbuild-tests=true \
|
||||
${DRI_LOADERS} \
|
||||
-Ddri-drivers=${DRI_DRIVERS:-[]} \
|
||||
-Dgallium-drivers=${GALLIUM_DRIVERS:-[]} \
|
||||
-Dvulkan-drivers=${VULKAN_DRIVERS:-[]}
|
||||
meson configure _build
|
||||
ninja -C _build
|
||||
ninja -C _build test
|
||||
fi
|
||||
- meson _build
|
||||
-Dbuild-tests=true
|
||||
-Dplatforms=x11
|
||||
-Dgallium-drivers=swrast
|
||||
- ninja -C _build
|
||||
- ninja -C _build test
|
||||
|
@@ -110,6 +110,7 @@ endef
|
||||
|
||||
# add subdirectories
|
||||
SUBDIRS := \
|
||||
src/freedreno \
|
||||
src/gbm \
|
||||
src/loader \
|
||||
src/mapi \
|
||||
|
2
bin/.cherry-ignore
Normal file
2
bin/.cherry-ignore
Normal file
@@ -0,0 +1,2 @@
|
||||
stable: this commit causes issues in several systems
|
||||
78e35df52aa2f7d770f929a0866a0faa89c261a9 radeonsi: update buffer descriptors in all contexts after buffer invalidation
|
@@ -1258,6 +1258,7 @@ if _llvm != 'false'
|
||||
with_gallium_opencl or _llvm == 'true'
|
||||
),
|
||||
static : not _shared_llvm,
|
||||
method : 'config-tool',
|
||||
)
|
||||
with_llvm = dep_llvm.found()
|
||||
endif
|
||||
|
@@ -151,13 +151,14 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
|
||||
LLVMTargetRef target = ac_get_llvm_target(triple);
|
||||
|
||||
snprintf(features, sizeof(features),
|
||||
"+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
|
||||
"+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s",
|
||||
HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
|
||||
tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
|
||||
tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
|
||||
tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
|
||||
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
|
||||
|
||||
tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "",
|
||||
tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : "");
|
||||
|
||||
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
|
||||
target,
|
||||
triple,
|
||||
|
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
|
||||
AC_TM_CHECK_IR = (1 << 5),
|
||||
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
|
||||
AC_TM_CREATE_LOW_OPT = (1 << 7),
|
||||
AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
|
||||
};
|
||||
|
||||
enum ac_float_mode {
|
||||
|
@@ -301,7 +301,6 @@ radv_cmd_buffer_destroy(struct radv_cmd_buffer *cmd_buffer)
|
||||
static VkResult
|
||||
radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
|
||||
cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
|
||||
|
||||
list_for_each_entry_safe(struct radv_cmd_buffer_upload, up,
|
||||
@@ -326,6 +325,8 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
|
||||
|
||||
cmd_buffer->record_result = VK_SUCCESS;
|
||||
|
||||
memset(cmd_buffer->vertex_bindings, 0, sizeof(cmd_buffer->vertex_bindings));
|
||||
|
||||
for (unsigned i = 0; i < VK_PIPELINE_BIND_POINT_RANGE_SIZE; i++) {
|
||||
cmd_buffer->descriptors[i].dirty = 0;
|
||||
cmd_buffer->descriptors[i].valid = 0;
|
||||
|
@@ -51,6 +51,7 @@ enum {
|
||||
RADV_DEBUG_CHECKIR = 0x200000,
|
||||
RADV_DEBUG_NOTHREADLLVM = 0x400000,
|
||||
RADV_DEBUG_NOBINNING = 0x800000,
|
||||
RADV_DEBUG_NO_LOAD_STORE_OPT = 0x1000000,
|
||||
};
|
||||
|
||||
enum {
|
||||
|
@@ -200,7 +200,7 @@ VkResult radv_CreateDescriptorSetLayout(
|
||||
break;
|
||||
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
|
||||
/* main descriptor + fmask descriptor + sampler */
|
||||
set_layout->binding[b].size = 32 + 32 * max_sampled_image_descriptors;
|
||||
set_layout->binding[b].size = 96;
|
||||
binding_buffer_count = 1;
|
||||
alignment = 32;
|
||||
break;
|
||||
@@ -247,7 +247,8 @@ VkResult radv_CreateDescriptorSetLayout(
|
||||
|
||||
/* Don't reserve space for the samplers if they're not accessed. */
|
||||
if (set_layout->binding[b].immutable_samplers_equal) {
|
||||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
|
||||
if (binding->descriptorType == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER &&
|
||||
max_sampled_image_descriptors <= 2)
|
||||
set_layout->binding[b].size -= 32;
|
||||
else if (binding->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
|
||||
set_layout->binding[b].size -= 16;
|
||||
|
@@ -104,7 +104,7 @@ radv_immutable_samplers(const struct radv_descriptor_set_layout *set,
|
||||
static inline unsigned
|
||||
radv_combined_image_descriptor_sampler_offset(const struct radv_descriptor_set_binding_layout *binding)
|
||||
{
|
||||
return binding->size - ((!binding->immutable_samplers_equal) ? 32 : 0);
|
||||
return binding->size - ((!binding->immutable_samplers_equal) ? 16 : 0);
|
||||
}
|
||||
|
||||
static inline const struct radv_sampler_ycbcr_conversion *
|
||||
|
@@ -464,6 +464,7 @@ static const struct debug_control radv_debug_options[] = {
|
||||
{"checkir", RADV_DEBUG_CHECKIR},
|
||||
{"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
|
||||
{"nobinning", RADV_DEBUG_NOBINNING},
|
||||
{"noloadstoreopt", RADV_DEBUG_NO_LOAD_STORE_OPT},
|
||||
{NULL, 0}
|
||||
};
|
||||
|
||||
@@ -510,6 +511,13 @@ radv_handle_per_app_options(struct radv_instance *instance,
|
||||
} else if (!strcmp(name, "DOOM_VFR")) {
|
||||
/* Work around a Doom VFR game bug */
|
||||
instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
|
||||
} else if (!strcmp(name, "MonsterHunterWorld.exe")) {
|
||||
/* Workaround for a WaW hazard when LLVM moves/merges
|
||||
* load/store memory operations.
|
||||
* See https://reviews.llvm.org/D61313
|
||||
*/
|
||||
if (HAVE_LLVM < 0x900)
|
||||
instance->debug_flags |= RADV_DEBUG_NO_LOAD_STORE_OPT;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -635,7 +635,8 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
|
||||
const struct vk_format_description *desc = vk_format_description(format);
|
||||
bool blendable;
|
||||
bool scaled = false;
|
||||
if (!desc) {
|
||||
/* TODO: implement some software emulation of SUBSAMPLED formats. */
|
||||
if (!desc || desc->layout == VK_FORMAT_LAYOUT_SUBSAMPLED) {
|
||||
out_properties->linearTilingFeatures = linear;
|
||||
out_properties->optimalTilingFeatures = tiled;
|
||||
out_properties->bufferFeatures = buffer;
|
||||
@@ -655,6 +656,7 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
|
||||
uint32_t tiling = VK_FORMAT_FEATURE_TRANSFER_SRC_BIT |
|
||||
VK_FORMAT_FEATURE_TRANSFER_DST_BIT |
|
||||
VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT |
|
||||
VK_FORMAT_FEATURE_COSITED_CHROMA_SAMPLES_BIT |
|
||||
VK_FORMAT_FEATURE_MIDPOINT_CHROMA_SAMPLES_BIT;
|
||||
|
||||
/* The subsampled formats have no support for linear filters. */
|
||||
|
@@ -156,6 +156,73 @@ convert_ycbcr(struct ycbcr_state *state,
|
||||
converted_channels[2], nir_imm_float(b, 1.0f));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_texture_size(struct ycbcr_state *state, nir_deref_instr *texture)
|
||||
{
|
||||
nir_builder *b = state->builder;
|
||||
const struct glsl_type *type = texture->type;
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
|
||||
|
||||
tex->op = nir_texop_txs;
|
||||
tex->sampler_dim = glsl_get_sampler_dim(type);
|
||||
tex->is_array = glsl_sampler_type_is_array(type);
|
||||
tex->is_shadow = glsl_sampler_type_is_shadow(type);
|
||||
tex->dest_type = nir_type_int;
|
||||
|
||||
tex->src[0].src_type = nir_tex_src_texture_deref;
|
||||
tex->src[0].src = nir_src_for_ssa(&texture->dest.ssa);
|
||||
|
||||
nir_ssa_dest_init(&tex->instr, &tex->dest,
|
||||
nir_tex_instr_dest_size(tex), 32, NULL);
|
||||
nir_builder_instr_insert(b, &tex->instr);
|
||||
|
||||
return nir_i2f32(b, &tex->dest.ssa);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
implicit_downsampled_coord(nir_builder *b,
|
||||
nir_ssa_def *value,
|
||||
nir_ssa_def *max_value,
|
||||
int div_scale)
|
||||
{
|
||||
return nir_fadd(b,
|
||||
value,
|
||||
nir_fdiv(b,
|
||||
nir_imm_float(b, 1.0f),
|
||||
nir_fmul(b,
|
||||
nir_imm_float(b, div_scale),
|
||||
max_value)));
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
implicit_downsampled_coords(struct ycbcr_state *state,
|
||||
nir_ssa_def *old_coords)
|
||||
{
|
||||
nir_builder *b = state->builder;
|
||||
const struct radv_sampler_ycbcr_conversion *conversion = state->conversion;
|
||||
nir_ssa_def *image_size = NULL;
|
||||
nir_ssa_def *comp[4] = { NULL, };
|
||||
const struct vk_format_description *fmt_desc = vk_format_description(state->conversion->format);
|
||||
const unsigned divisors[2] = {fmt_desc->width_divisor, fmt_desc->height_divisor};
|
||||
|
||||
for (int c = 0; c < old_coords->num_components; c++) {
|
||||
if (c < ARRAY_SIZE(divisors) && divisors[c] > 1 &&
|
||||
conversion->chroma_offsets[c] == VK_CHROMA_LOCATION_COSITED_EVEN) {
|
||||
if (!image_size)
|
||||
image_size = get_texture_size(state, state->tex_deref);
|
||||
|
||||
comp[c] = implicit_downsampled_coord(b,
|
||||
nir_channel(b, old_coords, c),
|
||||
nir_channel(b, image_size, c),
|
||||
divisors[c]);
|
||||
} else {
|
||||
comp[c] = nir_channel(b, old_coords, c);
|
||||
}
|
||||
}
|
||||
|
||||
return nir_vec(b, comp, old_coords->num_components);
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
create_plane_tex_instr_implicit(struct ycbcr_state *state,
|
||||
uint32_t plane)
|
||||
@@ -163,10 +230,23 @@ create_plane_tex_instr_implicit(struct ycbcr_state *state,
|
||||
nir_builder *b = state->builder;
|
||||
nir_tex_instr *old_tex = state->origin_tex;
|
||||
nir_tex_instr *tex = nir_tex_instr_create(b->shader, old_tex->num_srcs+ 1);
|
||||
|
||||
for (uint32_t i = 0; i < old_tex->num_srcs; i++) {
|
||||
tex->src[i].src_type = old_tex->src[i].src_type;
|
||||
nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
|
||||
|
||||
switch (old_tex->src[i].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
if (plane && true/*state->conversion->chroma_reconstruction*/) {
|
||||
assert(old_tex->src[i].src.is_ssa);
|
||||
tex->src[i].src =
|
||||
nir_src_for_ssa(implicit_downsampled_coords(state,
|
||||
old_tex->src[i].src.ssa));
|
||||
break;
|
||||
}
|
||||
/* fall through */
|
||||
default:
|
||||
nir_src_copy(&tex->src[i].src, &old_tex->src[i].src, tex);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
tex->src[tex->num_srcs - 1].src = nir_src_for_ssa(nir_imm_int(b, plane));
|
||||
|
@@ -2019,16 +2019,34 @@ static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
|
||||
|
||||
assert(stride % type_size == 0);
|
||||
|
||||
if (!index)
|
||||
index = ctx->ac.i32_0;
|
||||
LLVMValueRef adjusted_index = index;
|
||||
if (!adjusted_index)
|
||||
adjusted_index = ctx->ac.i32_0;
|
||||
|
||||
index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
|
||||
adjusted_index = LLVMBuildMul(builder, adjusted_index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
|
||||
|
||||
list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
|
||||
list = LLVMBuildPointerCast(builder, list,
|
||||
ac_array_in_const32_addr_space(type), "");
|
||||
|
||||
return ac_build_load_to_sgpr(&ctx->ac, list, index);
|
||||
LLVMValueRef descriptor = ac_build_load_to_sgpr(&ctx->ac, list, adjusted_index);
|
||||
|
||||
/* 3 plane formats always have same size and format for plane 1 & 2, so
|
||||
* use the tail from plane 1 so that we can store only the first 16 bytes
|
||||
* of the last plane. */
|
||||
if (desc_type == AC_DESC_PLANE_2) {
|
||||
LLVMValueRef descriptor2 = radv_get_sampler_desc(abi, descriptor_set, base_index, constant_index, index, AC_DESC_PLANE_1,image, write, bindless);
|
||||
|
||||
LLVMValueRef components[8];
|
||||
for (unsigned i = 0; i < 4; ++i)
|
||||
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor, i);
|
||||
|
||||
for (unsigned i = 4; i < 8; ++i)
|
||||
components[i] = ac_llvm_extract_elem(&ctx->ac, descriptor2, i);
|
||||
descriptor = ac_build_gather_values(&ctx->ac, components, 8);
|
||||
}
|
||||
|
||||
return descriptor;
|
||||
}
|
||||
|
||||
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
||||
|
@@ -1417,11 +1417,13 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
|
||||
|
||||
const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT);
|
||||
if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
|
||||
if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
|
||||
dynamic->discard_rectangle.count = discard_rectangle_info->discardRectangleCount;
|
||||
typed_memcpy(dynamic->discard_rectangle.rectangles,
|
||||
discard_rectangle_info->pDiscardRectangles,
|
||||
discard_rectangle_info->discardRectangleCount);
|
||||
if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) {
|
||||
typed_memcpy(dynamic->discard_rectangle.rectangles,
|
||||
discard_rectangle_info->pDiscardRectangles,
|
||||
discard_rectangle_info->discardRectangleCount);
|
||||
}
|
||||
}
|
||||
|
||||
pipeline->dynamic_state.mask = states;
|
||||
|
@@ -624,6 +624,8 @@ shader_variant_create(struct radv_device *device,
|
||||
tm_options |= AC_TM_SISCHED;
|
||||
if (options->check_ir)
|
||||
tm_options |= AC_TM_CHECK_IR;
|
||||
if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
|
||||
tm_options |= AC_TM_NO_LOAD_STORE_OPT;
|
||||
|
||||
thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
|
||||
radv_init_llvm_once();
|
||||
|
@@ -244,6 +244,7 @@ NIR_FILES = \
|
||||
nir/nir_lower_constant_initializers.c \
|
||||
nir/nir_lower_double_ops.c \
|
||||
nir/nir_lower_drawpixels.c \
|
||||
nir/nir_lower_fb_read.c \
|
||||
nir/nir_lower_fragcoord_wtrans.c \
|
||||
nir/nir_lower_frexp.c \
|
||||
nir/nir_lower_global_vars_to_local.c \
|
||||
|
@@ -50,7 +50,7 @@ glsl_type::glsl_type(GLenum gl_type,
|
||||
gl_type(gl_type),
|
||||
base_type(base_type), sampled_type(GLSL_TYPE_VOID),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
interface_packing(0), interface_row_major(row_major),
|
||||
interface_packing(0), interface_row_major(row_major), packed(0),
|
||||
vector_elements(vector_elements), matrix_columns(matrix_columns),
|
||||
length(0), explicit_stride(explicit_stride)
|
||||
{
|
||||
@@ -85,7 +85,7 @@ glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type,
|
||||
base_type(base_type), sampled_type(type),
|
||||
sampler_dimensionality(dim), sampler_shadow(shadow),
|
||||
sampler_array(array), interface_packing(0),
|
||||
interface_row_major(0),
|
||||
interface_row_major(0), packed(0),
|
||||
length(0), explicit_stride(0)
|
||||
{
|
||||
this->mem_ctx = ralloc_context(NULL);
|
||||
@@ -134,7 +134,7 @@ glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields,
|
||||
base_type(GLSL_TYPE_INTERFACE), sampled_type(GLSL_TYPE_VOID),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
interface_packing((unsigned) packing),
|
||||
interface_row_major((unsigned) row_major),
|
||||
interface_row_major((unsigned) row_major), packed(0),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
length(num_fields), explicit_stride(0)
|
||||
{
|
||||
@@ -159,7 +159,7 @@ glsl_type::glsl_type(const glsl_type *return_type,
|
||||
gl_type(0),
|
||||
base_type(GLSL_TYPE_FUNCTION), sampled_type(GLSL_TYPE_VOID),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
interface_packing(0), interface_row_major(0),
|
||||
interface_packing(0), interface_row_major(0), packed(0),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
length(num_params), explicit_stride(0)
|
||||
{
|
||||
@@ -188,7 +188,7 @@ glsl_type::glsl_type(const char *subroutine_name) :
|
||||
gl_type(0),
|
||||
base_type(GLSL_TYPE_SUBROUTINE), sampled_type(GLSL_TYPE_VOID),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
interface_packing(0), interface_row_major(0),
|
||||
interface_packing(0), interface_row_major(0), packed(0),
|
||||
vector_elements(1), matrix_columns(1),
|
||||
length(0), explicit_stride(0)
|
||||
{
|
||||
@@ -534,7 +534,7 @@ glsl_type::glsl_type(const glsl_type *array, unsigned length,
|
||||
unsigned explicit_stride) :
|
||||
base_type(GLSL_TYPE_ARRAY), sampled_type(GLSL_TYPE_VOID),
|
||||
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
|
||||
interface_packing(0), interface_row_major(0),
|
||||
interface_packing(0), interface_row_major(0), packed(0),
|
||||
vector_elements(0), matrix_columns(0),
|
||||
length(length), name(NULL), explicit_stride(explicit_stride)
|
||||
{
|
||||
|
@@ -151,9 +151,11 @@ nir_variable_clone(const nir_variable *var, nir_shader *shader)
|
||||
nvar->name = ralloc_strdup(nvar, var->name);
|
||||
nvar->data = var->data;
|
||||
nvar->num_state_slots = var->num_state_slots;
|
||||
nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
|
||||
memcpy(nvar->state_slots, var->state_slots,
|
||||
var->num_state_slots * sizeof(nir_state_slot));
|
||||
if (var->num_state_slots) {
|
||||
nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots);
|
||||
memcpy(nvar->state_slots, var->state_slots,
|
||||
var->num_state_slots * sizeof(nir_state_slot));
|
||||
}
|
||||
if (var->constant_initializer) {
|
||||
nvar->constant_initializer =
|
||||
nir_constant_clone(var->constant_initializer, nvar);
|
||||
|
@@ -34,6 +34,7 @@ read_first_invocation(nir_builder *b, nir_ssa_def *x)
|
||||
first->src[0] = nir_src_for_ssa(x);
|
||||
nir_ssa_dest_init(&first->instr, &first->dest,
|
||||
x->num_components, x->bit_size, NULL);
|
||||
nir_builder_instr_insert(b, &first->instr);
|
||||
return &first->dest.ssa;
|
||||
}
|
||||
|
||||
@@ -129,7 +130,7 @@ nir_lower_non_uniform_access_impl(nir_function_impl *impl,
|
||||
nir_builder_init(&b, impl);
|
||||
|
||||
nir_foreach_block(block, impl) {
|
||||
nir_foreach_instr(instr, block) {
|
||||
nir_foreach_instr_safe(instr, block) {
|
||||
switch (instr->type) {
|
||||
case nir_instr_type_tex: {
|
||||
nir_tex_instr *tex = nir_instr_as_tex(instr);
|
||||
|
@@ -1086,9 +1086,6 @@ late_optimizations = [
|
||||
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
|
||||
(('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'),
|
||||
|
||||
(('b2f(is_used_more_than_once)', ('inot', 'a@1')), ('bcsel', a, 0.0, 1.0)),
|
||||
(('fneg(is_used_more_than_once)', ('b2f', ('inot', 'a@1'))), ('bcsel', a, -0.0, -1.0)),
|
||||
|
||||
# we do these late so that we don't get in the way of creating ffmas
|
||||
(('fmin', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmin', a, b))),
|
||||
(('fmax', ('fadd(is_used_once)', '#c', a), ('fadd(is_used_once)', '#c', b)), ('fadd', c, ('fmax', a, b))),
|
||||
|
@@ -65,15 +65,17 @@ build_umod(nir_builder *b, nir_ssa_def *n, uint64_t d)
|
||||
static nir_ssa_def *
|
||||
build_idiv(nir_builder *b, nir_ssa_def *n, int64_t d)
|
||||
{
|
||||
uint64_t abs_d = d < 0 ? -d : d;
|
||||
|
||||
if (d == 0) {
|
||||
return nir_imm_intN_t(b, 0, n->bit_size);
|
||||
} else if (d == 1) {
|
||||
return n;
|
||||
} else if (d == -1) {
|
||||
return nir_ineg(b, n);
|
||||
} else if (util_is_power_of_two_or_zero64(d)) {
|
||||
uint64_t abs_d = d < 0 ? -d : d;
|
||||
nir_ssa_def *uq = nir_ishr(b, n, nir_imm_int(b, util_logbase2_64(abs_d)));
|
||||
} else if (util_is_power_of_two_or_zero64(abs_d)) {
|
||||
nir_ssa_def *uq = nir_ushr(b, nir_iabs(b, n),
|
||||
nir_imm_int(b, util_logbase2_64(abs_d)));
|
||||
nir_ssa_def *n_neg = nir_ilt(b, n, nir_imm_intN_t(b, 0, n->bit_size));
|
||||
nir_ssa_def *neg = d < 0 ? nir_inot(b, n_neg) : n_neg;
|
||||
return nir_bcsel(b, neg, nir_ineg(b, uq), uq);
|
||||
|
@@ -143,22 +143,6 @@ is_not_const(nir_alu_instr *instr, unsigned src, UNUSED unsigned num_components,
|
||||
return !nir_src_is_const(instr->src[src].src);
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_used_more_than_once(nir_alu_instr *instr)
|
||||
{
|
||||
bool zero_if_use = list_empty(&instr->dest.dest.ssa.if_uses);
|
||||
bool zero_use = list_empty(&instr->dest.dest.ssa.uses);
|
||||
|
||||
if (zero_use && zero_if_use)
|
||||
return false;
|
||||
else if (zero_use && list_is_singular(&instr->dest.dest.ssa.if_uses))
|
||||
return false;
|
||||
else if (zero_if_use && list_is_singular(&instr->dest.dest.ssa.uses))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool
|
||||
is_used_once(nir_alu_instr *instr)
|
||||
{
|
||||
|
41
src/freedreno/Android.drm.mk
Normal file
41
src/freedreno/Android.drm.mk
Normal file
@@ -0,0 +1,41 @@
|
||||
# Mesa 3-D graphics library
|
||||
#
|
||||
# Copyright (C)
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# Android.mk for libfreedreno_drm.a
|
||||
|
||||
# ---------------------------------------
|
||||
# Build libfreedreno_drm
|
||||
# ---------------------------------------
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_SRC_FILES := \
|
||||
$(drm_SOURCES)
|
||||
|
||||
LOCAL_C_INCLUDES := \
|
||||
$(MESA_TOP)/src/gallium/include \
|
||||
$(MESA_TOP)/src/gallium/auxiliary
|
||||
|
||||
LOCAL_MODULE := libfreedreno_drm
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
51
src/freedreno/Android.ir3.mk
Normal file
51
src/freedreno/Android.ir3.mk
Normal file
@@ -0,0 +1,51 @@
|
||||
# Mesa 3-D graphics library
|
||||
#
|
||||
# Copyright (C)
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# Android.mk for libfreedreno_ir3.a
|
||||
|
||||
# ---------------------------------------
|
||||
# Build libfreedreno_ir3
|
||||
# ---------------------------------------
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_SRC_FILES := \
|
||||
$(ir3_SOURCES)
|
||||
|
||||
LOCAL_C_INCLUDES := \
|
||||
$(MESA_TOP)/src/compiler/nir \
|
||||
$(MESA_TOP)/src/gallium/include \
|
||||
$(MESA_TOP)/src/gallium/auxiliary \
|
||||
$(MESA_TOP)/prebuilt-intermediates/nir \
|
||||
|
||||
# We need libmesa_nir to get NIR's generated include directories.
|
||||
LOCAL_STATIC_LIBRARIES := \
|
||||
libmesa_nir
|
||||
|
||||
LOCAL_MODULE := libfreedreno_ir3
|
||||
|
||||
LOCAL_GENERATED_SOURCES := \
|
||||
$(MESA_GEN_GLSL_H) \
|
||||
$(MESA_GEN_NIR_H)
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
30
src/freedreno/Android.mk
Normal file
30
src/freedreno/Android.mk
Normal file
@@ -0,0 +1,30 @@
|
||||
# Mesa 3-D graphics library
|
||||
#
|
||||
# Copyright (C)
|
||||
#
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a
|
||||
# copy of this software and associated documentation files (the "Software"),
|
||||
# to deal in the Software without restriction, including without limitation
|
||||
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
# and/or sell copies of the Software, and to permit persons to whom the
|
||||
# Software is furnished to do so, subject to the following conditions:
|
||||
#
|
||||
# The above copyright notice and this permission notice shall be included
|
||||
# in all copies or substantial portions of the Software.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
# DEALINGS IN THE SOFTWARE.
|
||||
|
||||
# Android.mk for libfreedreno_*
|
||||
|
||||
LOCAL_PATH := $(call my-dir)
|
||||
|
||||
include $(LOCAL_PATH)/Makefile.sources
|
||||
include $(MESA_TOP)/src/gallium/drivers/freedreno/Android.gen.mk
|
||||
include $(LOCAL_PATH)/Android.drm.mk
|
||||
include $(LOCAL_PATH)/Android.ir3.mk
|
@@ -36,6 +36,8 @@ ir3_SOURCES := \
|
||||
ir3/ir3_nir.c \
|
||||
ir3/ir3_nir.h \
|
||||
ir3/ir3_nir_analyze_ubo_ranges.c \
|
||||
ir3/ir3_nir_lower_load_barycentric_at_sample.c \
|
||||
ir3/ir3_nir_lower_load_barycentric_at_offset.c \
|
||||
ir3/ir3_nir_lower_io_offsets.c \
|
||||
ir3/ir3_nir_lower_tg4_to_tex.c \
|
||||
ir3/ir3_nir_move_varying_inputs.c \
|
||||
|
@@ -1304,7 +1304,8 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
|
||||
idx += nir_src_as_uint(intr->src[1]);
|
||||
for (int i = 0; i < intr->num_components; i++) {
|
||||
unsigned inloc = idx * 4 + i + comp;
|
||||
if (ctx->so->inputs[idx].bary) {
|
||||
if (ctx->so->inputs[idx].bary &&
|
||||
!ctx->so->inputs[idx].use_ldlv) {
|
||||
dst[i] = ir3_BARY_F(b, create_immed(b, inloc), 0, coord, 0);
|
||||
} else {
|
||||
/* for non-varyings use the pre-setup input, since
|
||||
@@ -2402,8 +2403,6 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
||||
so->inputs[n].bary = true;
|
||||
instr = create_frag_input(ctx, false, idx);
|
||||
} else {
|
||||
bool use_ldlv = false;
|
||||
|
||||
/* detect the special case for front/back colors where
|
||||
* we need to do flat vs smooth shading depending on
|
||||
* rast state:
|
||||
@@ -2424,12 +2423,12 @@ setup_input(struct ir3_context *ctx, nir_variable *in)
|
||||
if (ctx->compiler->flat_bypass) {
|
||||
if ((so->inputs[n].interpolate == INTERP_MODE_FLAT) ||
|
||||
(so->inputs[n].rasterflat && ctx->so->key.rasterflat))
|
||||
use_ldlv = true;
|
||||
so->inputs[n].use_ldlv = true;
|
||||
}
|
||||
|
||||
so->inputs[n].bary = true;
|
||||
|
||||
instr = create_frag_input(ctx, use_ldlv, idx);
|
||||
instr = create_frag_input(ctx, so->inputs[n].use_ldlv, idx);
|
||||
}
|
||||
|
||||
compile_assert(ctx, idx < ctx->ir->ninputs);
|
||||
|
@@ -414,6 +414,7 @@ struct ir3_shader_variant {
|
||||
/* fragment shader specific: */
|
||||
bool bary : 1; /* fetched varying (vs one loaded into reg) */
|
||||
bool rasterflat : 1; /* special handling for emit->rasterflat */
|
||||
bool use_ldlv : 1; /* internal to ir3_compiler_nir */
|
||||
bool half : 1;
|
||||
enum glsl_interp_mode interpolate;
|
||||
} inputs[16 + 2]; /* +POSITION +FACE */
|
||||
|
@@ -27,6 +27,9 @@ LOCAL_C_INCLUDES += \
|
||||
$(GALLIUM_TOP)/include \
|
||||
$(GALLIUM_TOP)/auxiliary \
|
||||
$(GALLIUM_TOP)/winsys \
|
||||
$(GALLIUM_TOP)/drivers
|
||||
$(GALLIUM_TOP)/drivers \
|
||||
$(MESA_TOP)/src/freedreno \
|
||||
$(MESA_TOP)/src/freedreno/ir3 \
|
||||
$(MESA_TOP)/src/freedreno/registers
|
||||
|
||||
include $(MESA_COMMON_MK)
|
||||
|
@@ -950,6 +950,8 @@ draw_set_mapped_so_targets(struct draw_context *draw,
|
||||
{
|
||||
int i;
|
||||
|
||||
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
|
||||
|
||||
for (i = 0; i < num_targets; i++)
|
||||
draw->so.targets[i] = targets[i];
|
||||
for (i = num_targets; i < PIPE_MAX_SO_BUFFERS; i++)
|
||||
|
@@ -753,8 +753,10 @@ void draw_gs_destroy( struct draw_context *draw )
|
||||
{
|
||||
int i;
|
||||
if (draw->gs.tgsi.machine) {
|
||||
for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++)
|
||||
for (i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
|
||||
align_free(draw->gs.tgsi.machine->Primitives[i]);
|
||||
align_free(draw->gs.tgsi.machine->PrimitiveOffsets[i]);
|
||||
}
|
||||
tgsi_exec_machine_destroy(draw->gs.tgsi.machine);
|
||||
}
|
||||
}
|
||||
|
@@ -25,7 +25,7 @@ LOCAL_MODULE_CLASS := STATIC_LIBRARIES
|
||||
endif
|
||||
|
||||
ir3_nir_trig_deps := \
|
||||
$(LOCAL_PATH)/ir3/ir3_nir_trig.py \
|
||||
$(MESA_TOP)/src/freedreno/ir3/ir3_nir_trig.py \
|
||||
$(MESA_TOP)/src/compiler/nir/nir_algebraic.py
|
||||
|
||||
intermediates := $(call local-generated-sources-dir)
|
||||
|
@@ -44,7 +44,7 @@ LOCAL_C_INCLUDES := \
|
||||
LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H)
|
||||
|
||||
LOCAL_SHARED_LIBRARIES := libdrm
|
||||
LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir
|
||||
LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir libfreedreno_drm libfreedreno_ir3
|
||||
LOCAL_MODULE := libmesa_pipe_freedreno
|
||||
|
||||
include $(LOCAL_PATH)/Android.gen.mk
|
||||
|
@@ -631,6 +631,7 @@ iris_emit_l3_config(struct iris_batch *batch, const struct gen_l3_config *cfg,
|
||||
* desirable behavior.
|
||||
*/
|
||||
reg.ErrorDetectionBehaviorControl = true;
|
||||
reg.UseFullWays = true;
|
||||
#endif
|
||||
reg.URBAllocation = cfg->n[GEN_L3P_URB];
|
||||
reg.ROAllocation = cfg->n[GEN_L3P_RO];
|
||||
|
@@ -995,7 +995,7 @@ emit_load_const(compiler_context *ctx, nir_load_const_instr *instr)
|
||||
{
|
||||
nir_ssa_def def = instr->def;
|
||||
|
||||
float *v = ralloc_array(NULL, float, 4);
|
||||
float *v = rzalloc_array(NULL, float, 4);
|
||||
nir_const_load_to_arr(v, instr, f32);
|
||||
_mesa_hash_table_u64_insert(ctx->ssa_constants, def.index + 1, v);
|
||||
}
|
||||
|
@@ -164,10 +164,10 @@ panfrost_texture_swizzle(unsigned off_x,
|
||||
|
||||
/* Use fast path if available */
|
||||
if (!(off_x || off_y) && (width == dest_width)) {
|
||||
if (bytes_per_pixel == 4 /* && (ALIGN(width, 16) == width) */) {
|
||||
if (bytes_per_pixel == 4 && (ALIGN(width, 16) == width)) {
|
||||
swizzle_bpp4_align16(width, height, source_stride >> 2, (block_pitch * 256 >> 4), (const uint32_t *) pixels, (uint32_t *) ldest);
|
||||
return;
|
||||
} else if (bytes_per_pixel == 1 /* && (ALIGN(width, 16) == width) */) {
|
||||
} else if (bytes_per_pixel == 1 && (ALIGN(width, 16) == width)) {
|
||||
swizzle_bpp1_align16(width, height, source_stride, (block_pitch * 256 >> 4), pixels, (uint8_t *) ldest);
|
||||
return;
|
||||
}
|
||||
|
@@ -287,11 +287,9 @@ si_invalidate_buffer(struct si_context *sctx,
|
||||
/* Check if mapping this buffer would cause waiting for the GPU. */
|
||||
if (si_rings_is_buffer_referenced(sctx, buf->buf, RADEON_USAGE_READWRITE) ||
|
||||
!sctx->ws->buffer_wait(buf->buf, 0, RADEON_USAGE_READWRITE)) {
|
||||
uint64_t old_va = buf->gpu_address;
|
||||
|
||||
/* Reallocate the buffer in the same pipe_resource. */
|
||||
si_alloc_resource(sctx->screen, buf);
|
||||
si_rebind_buffer(sctx, &buf->b.b, old_va);
|
||||
si_rebind_buffer(sctx, &buf->b.b);
|
||||
} else {
|
||||
util_range_set_empty(&buf->valid_buffer_range);
|
||||
}
|
||||
@@ -307,7 +305,6 @@ void si_replace_buffer_storage(struct pipe_context *ctx,
|
||||
struct si_context *sctx = (struct si_context*)ctx;
|
||||
struct si_resource *sdst = si_resource(dst);
|
||||
struct si_resource *ssrc = si_resource(src);
|
||||
uint64_t old_gpu_address = sdst->gpu_address;
|
||||
|
||||
pb_reference(&sdst->buf, ssrc->buf);
|
||||
sdst->gpu_address = ssrc->gpu_address;
|
||||
@@ -322,7 +319,7 @@ void si_replace_buffer_storage(struct pipe_context *ctx,
|
||||
assert(sdst->bo_alignment == ssrc->bo_alignment);
|
||||
assert(sdst->domains == ssrc->domains);
|
||||
|
||||
si_rebind_buffer(sctx, dst, old_gpu_address);
|
||||
si_rebind_buffer(sctx, dst);
|
||||
}
|
||||
|
||||
static void si_invalidate_resource(struct pipe_context *ctx,
|
||||
|
@@ -999,6 +999,7 @@ static void si_init_buffer_resources(struct si_buffer_resources *buffers,
|
||||
buffers->priority = priority;
|
||||
buffers->priority_constbuf = priority_constbuf;
|
||||
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource*));
|
||||
buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0]));
|
||||
|
||||
si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers);
|
||||
}
|
||||
@@ -1013,6 +1014,7 @@ static void si_release_buffer_resources(struct si_buffer_resources *buffers,
|
||||
}
|
||||
|
||||
FREE(buffers->buffers);
|
||||
FREE(buffers->offsets);
|
||||
}
|
||||
|
||||
static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
|
||||
@@ -1219,11 +1221,10 @@ static void si_set_constant_buffer(struct si_context *sctx,
|
||||
if (input && (input->buffer || input->user_buffer)) {
|
||||
struct pipe_resource *buffer = NULL;
|
||||
uint64_t va;
|
||||
unsigned buffer_offset;
|
||||
|
||||
/* Upload the user buffer if needed. */
|
||||
if (input->user_buffer) {
|
||||
unsigned buffer_offset;
|
||||
|
||||
si_upload_const_buffer(sctx,
|
||||
(struct si_resource**)&buffer, input->user_buffer,
|
||||
input->buffer_size, &buffer_offset);
|
||||
@@ -1232,12 +1233,13 @@ static void si_set_constant_buffer(struct si_context *sctx,
|
||||
si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, NULL);
|
||||
return;
|
||||
}
|
||||
va = si_resource(buffer)->gpu_address + buffer_offset;
|
||||
} else {
|
||||
pipe_resource_reference(&buffer, input->buffer);
|
||||
va = si_resource(buffer)->gpu_address + input->buffer_offset;
|
||||
buffer_offset = input->buffer_offset;
|
||||
}
|
||||
|
||||
va = si_resource(buffer)->gpu_address + buffer_offset;
|
||||
|
||||
/* Set the descriptor. */
|
||||
uint32_t *desc = descs->list + slot*4;
|
||||
desc[0] = va;
|
||||
@@ -1252,6 +1254,7 @@ static void si_set_constant_buffer(struct si_context *sctx,
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
|
||||
buffers->buffers[slot] = buffer;
|
||||
buffers->offsets[slot] = buffer_offset;
|
||||
radeon_add_to_gfx_buffer_list_check_mem(sctx,
|
||||
si_resource(buffer),
|
||||
RADEON_USAGE_READ,
|
||||
@@ -1336,6 +1339,7 @@ static void si_set_shader_buffer(struct si_context *sctx,
|
||||
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
|
||||
|
||||
pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
|
||||
buffers->offsets[slot] = sbuffer->buffer_offset;
|
||||
radeon_add_to_gfx_buffer_list_check_mem(sctx, buf,
|
||||
writable ? RADEON_USAGE_READWRITE :
|
||||
RADEON_USAGE_READ,
|
||||
@@ -1505,20 +1509,6 @@ void si_set_ring_buffer(struct si_context *sctx, uint slot,
|
||||
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
|
||||
}
|
||||
|
||||
static void si_desc_reset_buffer_offset(uint32_t *desc, uint64_t old_buf_va,
|
||||
struct pipe_resource *new_buf)
|
||||
{
|
||||
/* Retrieve the buffer offset from the descriptor. */
|
||||
uint64_t old_desc_va = si_desc_extract_buffer_address(desc);
|
||||
|
||||
assert(old_buf_va <= old_desc_va);
|
||||
uint64_t offset_within_buffer = old_desc_va - old_buf_va;
|
||||
|
||||
/* Update the descriptor. */
|
||||
si_set_buf_desc_address(si_resource(new_buf), offset_within_buffer,
|
||||
desc);
|
||||
}
|
||||
|
||||
/* INTERNAL CONST BUFFERS */
|
||||
|
||||
static void si_set_polygon_stipple(struct pipe_context *ctx,
|
||||
@@ -1603,7 +1593,6 @@ static void si_reset_buffer_resources(struct si_context *sctx,
|
||||
unsigned descriptors_idx,
|
||||
unsigned slot_mask,
|
||||
struct pipe_resource *buf,
|
||||
uint64_t old_va,
|
||||
enum radeon_bo_priority priority)
|
||||
{
|
||||
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
|
||||
@@ -1612,8 +1601,8 @@ static void si_reset_buffer_resources(struct si_context *sctx,
|
||||
while (mask) {
|
||||
unsigned i = u_bit_scan(&mask);
|
||||
if (buffers->buffers[i] == buf) {
|
||||
si_desc_reset_buffer_offset(descs->list + i*4,
|
||||
old_va, buf);
|
||||
si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
|
||||
descs->list + i*4);
|
||||
sctx->descriptors_dirty |= 1u << descriptors_idx;
|
||||
|
||||
radeon_add_to_gfx_buffer_list_check_mem(sctx,
|
||||
@@ -1629,8 +1618,7 @@ static void si_reset_buffer_resources(struct si_context *sctx,
|
||||
/* Update all resource bindings where the buffer is bound, including
|
||||
* all resource descriptors. This is invalidate_buffer without
|
||||
* the invalidation. */
|
||||
void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
|
||||
uint64_t old_va)
|
||||
void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
|
||||
{
|
||||
struct si_resource *buffer = si_resource(buf);
|
||||
unsigned i, shader;
|
||||
@@ -1670,8 +1658,8 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
|
||||
if (buffers->buffers[i] != buf)
|
||||
continue;
|
||||
|
||||
si_desc_reset_buffer_offset(descs->list + i*4,
|
||||
old_va, buf);
|
||||
si_set_buf_desc_address(si_resource(buf), buffers->offsets[i],
|
||||
descs->list + i*4);
|
||||
sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
|
||||
|
||||
radeon_add_to_gfx_buffer_list_check_mem(sctx,
|
||||
@@ -1694,7 +1682,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
|
||||
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
|
||||
si_const_and_shader_buffer_descriptors_idx(shader),
|
||||
u_bit_consecutive(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
|
||||
buf, old_va,
|
||||
buf,
|
||||
sctx->const_and_shader_buffers[shader].priority_constbuf);
|
||||
}
|
||||
|
||||
@@ -1703,7 +1691,7 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
|
||||
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
|
||||
si_const_and_shader_buffer_descriptors_idx(shader),
|
||||
u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS),
|
||||
buf, old_va,
|
||||
buf,
|
||||
sctx->const_and_shader_buffers[shader].priority);
|
||||
}
|
||||
|
||||
@@ -1720,9 +1708,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
|
||||
if (samplers->views[i]->texture == buf) {
|
||||
unsigned desc_slot = si_get_sampler_slot(i);
|
||||
|
||||
si_desc_reset_buffer_offset(descs->list +
|
||||
desc_slot * 16 + 4,
|
||||
old_va, buf);
|
||||
si_set_buf_desc_address(si_resource(buf),
|
||||
samplers->views[i]->u.buf.offset,
|
||||
descs->list + desc_slot * 16 + 4);
|
||||
sctx->descriptors_dirty |=
|
||||
1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
|
||||
@@ -1752,9 +1740,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
|
||||
if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
|
||||
si_mark_image_range_valid(&images->views[i]);
|
||||
|
||||
si_desc_reset_buffer_offset(
|
||||
descs->list + desc_slot * 8 + 4,
|
||||
old_va, buf);
|
||||
si_set_buf_desc_address(si_resource(buf),
|
||||
images->views[i].u.buf.offset,
|
||||
descs->list + desc_slot * 8 + 4);
|
||||
sctx->descriptors_dirty |=
|
||||
1u << si_sampler_and_image_descriptors_idx(shader);
|
||||
|
||||
|
@@ -409,6 +409,7 @@ struct si_descriptors {
|
||||
|
||||
struct si_buffer_resources {
|
||||
struct pipe_resource **buffers; /* this has num_buffers elements */
|
||||
unsigned *offsets; /* this has num_buffers elements */
|
||||
|
||||
enum radeon_bo_priority priority:6;
|
||||
enum radeon_bo_priority priority_constbuf:6;
|
||||
@@ -487,8 +488,7 @@ struct pb_slab *si_bindless_descriptor_slab_alloc(void *priv, unsigned heap,
|
||||
unsigned entry_size,
|
||||
unsigned group_index);
|
||||
void si_bindless_descriptor_slab_free(void *priv, struct pb_slab *pslab);
|
||||
void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf,
|
||||
uint64_t old_va);
|
||||
void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf);
|
||||
/* si_state.c */
|
||||
void si_init_state_compute_functions(struct si_context *sctx);
|
||||
void si_init_state_functions(struct si_context *sctx);
|
||||
|
@@ -53,6 +53,10 @@ LOCAL_SHARED_LIBRARIES += \
|
||||
libexpat
|
||||
endif
|
||||
|
||||
LOCAL_STATIC_LIBRARIES += \
|
||||
libfreedreno_drm \
|
||||
libfreedreno_ir3
|
||||
|
||||
ifeq ($(USE_LIBBACKTRACE),true)
|
||||
LOCAL_SHARED_LIBRARIES += libbacktrace
|
||||
endif
|
||||
|
@@ -78,8 +78,8 @@ foreach d : [[with_gallium_kmsro, [
|
||||
'pl111_dri.so',
|
||||
'repaper_dri.so',
|
||||
'rockchip_dri.so',
|
||||
'st7586.so',
|
||||
'st7735r.so',
|
||||
'st7586_dri.so',
|
||||
'st7735r_dri.so',
|
||||
'sun4i-drm_dri.so',
|
||||
]],
|
||||
[with_gallium_radeonsi, 'radeonsi_dri.so'],
|
||||
|
@@ -43,9 +43,9 @@ libosmesa = shared_library(
|
||||
inc_gallium_drivers,
|
||||
],
|
||||
link_depends : osmesa_link_deps,
|
||||
link_whole : [libosmesa_st],
|
||||
link_whole : [libosmesa_st, libglapi_static],
|
||||
link_with : [
|
||||
libmesa_gallium, libgallium, libglapi_static, libws_null, osmesa_link_with,
|
||||
libmesa_gallium, libgallium, libws_null, osmesa_link_with,
|
||||
],
|
||||
dependencies : [
|
||||
dep_selinux, dep_thread, dep_clock, dep_unwind,
|
||||
|
@@ -378,7 +378,8 @@ static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs)
|
||||
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCE &&
|
||||
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_UVD_ENC &&
|
||||
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_DEC &&
|
||||
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC;
|
||||
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_ENC &&
|
||||
cs->ib[IB_MAIN].ip_type != AMDGPU_HW_IP_VCN_JPEG;
|
||||
}
|
||||
|
||||
static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs)
|
||||
|
@@ -54,7 +54,7 @@ __glXSendError(Display * dpy, int_fast8_t errorCode, uint_fast32_t resourceID,
|
||||
error.errorCode = glx_dpy->codes->first_error + errorCode;
|
||||
}
|
||||
|
||||
error.sequenceNumber = dpy->last_request_read;
|
||||
error.sequenceNumber = dpy->request;
|
||||
error.resourceID = resourceID;
|
||||
error.minorCode = minorCode;
|
||||
error.majorCode = glx_dpy->majorOpcode;
|
||||
@@ -73,7 +73,7 @@ __glXSendErrorForXcb(Display * dpy, const xcb_generic_error_t *err)
|
||||
|
||||
error.type = X_Error;
|
||||
error.errorCode = err->error_code;
|
||||
error.sequenceNumber = dpy->last_request_read;
|
||||
error.sequenceNumber = err->sequence;
|
||||
error.resourceID = err->resource_id;
|
||||
error.minorCode = err->minor_code;
|
||||
error.majorCode = err->major_code;
|
||||
|
@@ -591,7 +591,7 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
|
||||
*/
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
if (inst->dst.file == VGRF && inst->has_source_and_destination_hazard()) {
|
||||
for (unsigned i = 0; i < 3; i++) {
|
||||
for (unsigned i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == VGRF) {
|
||||
ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
|
||||
}
|
||||
@@ -710,14 +710,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
|
||||
if (inst->opcode == SHADER_OPCODE_SEND && inst->ex_mlen > 0 &&
|
||||
inst->src[2].file == VGRF &&
|
||||
inst->src[3].file == VGRF &&
|
||||
inst->src[2].nr != inst->src[3].nr) {
|
||||
for (unsigned i = 0; i < inst->mlen; i++) {
|
||||
for (unsigned j = 0; j < inst->ex_mlen; j++) {
|
||||
ra_add_node_interference(g, inst->src[2].nr + i,
|
||||
inst->src[3].nr + j);
|
||||
}
|
||||
}
|
||||
}
|
||||
inst->src[2].nr != inst->src[3].nr)
|
||||
ra_add_node_interference(g, inst->src[2].nr,
|
||||
inst->src[3].nr);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -165,7 +165,7 @@ anv_state_table_init(struct anv_state_table *table,
|
||||
goto fail_fd;
|
||||
}
|
||||
|
||||
if (!u_vector_init(&table->mmap_cleanups,
|
||||
if (!u_vector_init(&table->cleanups,
|
||||
round_to_power_of_two(sizeof(struct anv_state_table_cleanup)),
|
||||
128)) {
|
||||
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
|
||||
@@ -179,12 +179,12 @@ anv_state_table_init(struct anv_state_table *table,
|
||||
uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE;
|
||||
result = anv_state_table_expand_range(table, initial_size);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_mmap_cleanups;
|
||||
goto fail_cleanups;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_mmap_cleanups:
|
||||
u_vector_finish(&table->mmap_cleanups);
|
||||
fail_cleanups:
|
||||
u_vector_finish(&table->cleanups);
|
||||
fail_fd:
|
||||
close(table->fd);
|
||||
|
||||
@@ -195,7 +195,7 @@ static VkResult
|
||||
anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
|
||||
{
|
||||
void *map;
|
||||
struct anv_mmap_cleanup *cleanup;
|
||||
struct anv_state_table_cleanup *cleanup;
|
||||
|
||||
/* Assert that we only ever grow the pool */
|
||||
assert(size >= table->state.end);
|
||||
@@ -204,11 +204,11 @@ anv_state_table_expand_range(struct anv_state_table *table, uint32_t size)
|
||||
if (size > BLOCK_POOL_MEMFD_SIZE)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
cleanup = u_vector_add(&table->mmap_cleanups);
|
||||
cleanup = u_vector_add(&table->cleanups);
|
||||
if (!cleanup)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
*cleanup = ANV_MMAP_CLEANUP_INIT;
|
||||
*cleanup = ANV_STATE_TABLE_CLEANUP_INIT;
|
||||
|
||||
/* Just leak the old map until we destroy the pool. We can't munmap it
|
||||
* without races or imposing locking on the block allocate fast path. On
|
||||
@@ -272,12 +272,12 @@ anv_state_table_finish(struct anv_state_table *table)
|
||||
{
|
||||
struct anv_state_table_cleanup *cleanup;
|
||||
|
||||
u_vector_foreach(cleanup, &table->mmap_cleanups) {
|
||||
u_vector_foreach(cleanup, &table->cleanups) {
|
||||
if (cleanup->map)
|
||||
munmap(cleanup->map, cleanup->size);
|
||||
}
|
||||
|
||||
u_vector_finish(&table->mmap_cleanups);
|
||||
u_vector_finish(&table->cleanups);
|
||||
|
||||
close(table->fd);
|
||||
}
|
||||
|
@@ -103,6 +103,12 @@ anv_descriptor_data_for_type(const struct anv_physical_device *device,
|
||||
type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC))
|
||||
data |= ANV_DESCRIPTOR_ADDRESS_RANGE;
|
||||
|
||||
/* On Ivy Bridge and Bay Trail, we need swizzles textures in the shader */
|
||||
if (device->info.gen == 7 && !device->info.is_haswell &&
|
||||
(type == VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ||
|
||||
type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER))
|
||||
data |= ANV_DESCRIPTOR_TEXTURE_SWIZZLE;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -123,6 +129,9 @@ anv_descriptor_data_size(enum anv_descriptor_data data)
|
||||
if (data & ANV_DESCRIPTOR_ADDRESS_RANGE)
|
||||
size += sizeof(struct anv_address_range_descriptor);
|
||||
|
||||
if (data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE)
|
||||
size += sizeof(struct anv_texture_swizzle_descriptor);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
@@ -1184,6 +1193,26 @@ anv_descriptor_set_write_image_view(struct anv_device *device,
|
||||
|
||||
anv_descriptor_set_write_image_param(desc_map, image_param);
|
||||
}
|
||||
|
||||
if (bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) {
|
||||
assert(!(bind_layout->data & ANV_DESCRIPTOR_SAMPLED_IMAGE));
|
||||
assert(image_view);
|
||||
struct anv_texture_swizzle_descriptor desc_data[3];
|
||||
memset(desc_data, 0, sizeof(desc_data));
|
||||
|
||||
for (unsigned p = 0; p < image_view->n_planes; p++) {
|
||||
desc_data[p] = (struct anv_texture_swizzle_descriptor) {
|
||||
.swizzle = {
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.r,
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.g,
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.b,
|
||||
(uint8_t)image_view->planes[p].isl.swizzle.a,
|
||||
},
|
||||
};
|
||||
}
|
||||
memcpy(desc_map, desc_data,
|
||||
MAX2(1, bind_layout->max_plane_count) * sizeof(desc_data[0]));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -1170,6 +1170,11 @@ void anv_GetPhysicalDeviceFeatures2(
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS 64
|
||||
|
||||
#define MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS 64
|
||||
#define MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS 256
|
||||
|
||||
void anv_GetPhysicalDeviceProperties(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
VkPhysicalDeviceProperties* pProperties)
|
||||
@@ -1215,20 +1220,20 @@ void anv_GetPhysicalDeviceProperties(
|
||||
.sparseAddressSpaceSize = 0,
|
||||
.maxBoundDescriptorSets = MAX_SETS,
|
||||
.maxPerStageDescriptorSamplers = max_samplers,
|
||||
.maxPerStageDescriptorUniformBuffers = 64,
|
||||
.maxPerStageDescriptorUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS,
|
||||
.maxPerStageDescriptorStorageBuffers = max_ssbos,
|
||||
.maxPerStageDescriptorSampledImages = max_textures,
|
||||
.maxPerStageDescriptorStorageImages = max_images,
|
||||
.maxPerStageDescriptorInputAttachments = 64,
|
||||
.maxPerStageDescriptorInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS,
|
||||
.maxPerStageResources = max_per_stage,
|
||||
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
|
||||
.maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */
|
||||
.maxDescriptorSetUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS, /* number of stages * maxPerStageDescriptorUniformBuffers */
|
||||
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
|
||||
.maxDescriptorSetStorageBuffers = 6 * max_ssbos, /* number of stages * maxPerStageDescriptorStorageBuffers */
|
||||
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
|
||||
.maxDescriptorSetSampledImages = 6 * max_textures, /* number of stages * maxPerStageDescriptorSampledImages */
|
||||
.maxDescriptorSetStorageImages = 6 * max_images, /* number of stages * maxPerStageDescriptorStorageImages */
|
||||
.maxDescriptorSetInputAttachments = 256,
|
||||
.maxDescriptorSetInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS,
|
||||
.maxVertexInputAttributes = MAX_VBS,
|
||||
.maxVertexInputBindings = MAX_VBS,
|
||||
.maxVertexInputAttributeOffset = 2047,
|
||||
@@ -1393,20 +1398,20 @@ void anv_GetPhysicalDeviceProperties2(
|
||||
props->robustBufferAccessUpdateAfterBind = true;
|
||||
props->quadDivergentImplicitLod = false;
|
||||
props->maxPerStageDescriptorUpdateAfterBindSamplers = max_bindless_views;
|
||||
props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = 0;
|
||||
props->maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
|
||||
props->maxPerStageDescriptorUpdateAfterBindStorageBuffers = UINT32_MAX;
|
||||
props->maxPerStageDescriptorUpdateAfterBindSampledImages = max_bindless_views;
|
||||
props->maxPerStageDescriptorUpdateAfterBindStorageImages = max_bindless_views;
|
||||
props->maxPerStageDescriptorUpdateAfterBindInputAttachments = 0;
|
||||
props->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_PER_STAGE_DESCRIPTOR_INPUT_ATTACHMENTS;
|
||||
props->maxPerStageUpdateAfterBindResources = UINT32_MAX;
|
||||
props->maxDescriptorSetUpdateAfterBindSamplers = max_bindless_views;
|
||||
props->maxDescriptorSetUpdateAfterBindUniformBuffers = 0;
|
||||
props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = 0;
|
||||
props->maxDescriptorSetUpdateAfterBindUniformBuffers = 6 * MAX_PER_STAGE_DESCRIPTOR_UNIFORM_BUFFERS;
|
||||
props->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
|
||||
props->maxDescriptorSetUpdateAfterBindStorageBuffers = UINT32_MAX;
|
||||
props->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2;
|
||||
props->maxDescriptorSetUpdateAfterBindSampledImages = max_bindless_views;
|
||||
props->maxDescriptorSetUpdateAfterBindStorageImages = max_bindless_views;
|
||||
props->maxDescriptorSetUpdateAfterBindInputAttachments = 0;
|
||||
props->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_DESCRIPTOR_SET_INPUT_ATTACHMENTS;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -2995,6 +3000,9 @@ void anv_FreeMemory(
|
||||
if (mem->map)
|
||||
anv_UnmapMemory(_device, _mem);
|
||||
|
||||
p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used,
|
||||
-mem->bo->size);
|
||||
|
||||
anv_bo_cache_release(device, &device->bo_cache, mem->bo);
|
||||
|
||||
#if defined(ANDROID) && ANDROID_API_LEVEL >= 26
|
||||
@@ -3002,9 +3010,6 @@ void anv_FreeMemory(
|
||||
AHardwareBuffer_release(mem->ahw);
|
||||
#endif
|
||||
|
||||
p_atomic_add(&pdevice->memory.heaps[mem->type->heapIndex].used,
|
||||
-mem->bo->size);
|
||||
|
||||
vk_free2(&device->alloc, pAllocator, mem);
|
||||
}
|
||||
|
||||
|
@@ -1278,6 +1278,10 @@ anv_image_fill_surface_state(struct anv_device *device,
|
||||
if (view_usage == ISL_SURF_USAGE_RENDER_TARGET_BIT)
|
||||
view.swizzle = anv_swizzle_for_render(view.swizzle);
|
||||
|
||||
/* On Ivy Bridge and Bay Trail we do the swizzle in the shader */
|
||||
if (device->info.gen == 7 && !device->info.is_haswell)
|
||||
view.swizzle = ISL_SWIZZLE_IDENTITY;
|
||||
|
||||
/* If this is a HiZ buffer we can sample from with a programmable clear
|
||||
* value (SKL+), define the clear value to the optimal constant.
|
||||
*/
|
||||
|
@@ -725,6 +725,10 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
|
||||
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
|
||||
nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
||||
unsigned set = var->data.descriptor_set;
|
||||
unsigned binding = var->data.binding;
|
||||
unsigned binding_offset = state->set[set].surface_offsets[binding];
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
b->cursor = nir_before_instr(&intrin->instr);
|
||||
|
||||
@@ -742,7 +746,7 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
|
||||
intrin->dest.ssa.bit_size, state);
|
||||
|
||||
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, nir_src_for_ssa(desc));
|
||||
} else if (use_bindless) {
|
||||
} else if (binding_offset > MAX_BINDING_TABLE_SIZE) {
|
||||
const bool write_only =
|
||||
(var->data.image.access & ACCESS_NON_READABLE) != 0;
|
||||
nir_ssa_def *desc =
|
||||
@@ -750,9 +754,6 @@ lower_image_intrinsic(nir_intrinsic_instr *intrin,
|
||||
nir_ssa_def *handle = nir_channel(b, desc, write_only ? 1 : 0);
|
||||
nir_rewrite_image_intrinsic(intrin, handle, true);
|
||||
} else {
|
||||
unsigned set = var->data.descriptor_set;
|
||||
unsigned binding = var->data.binding;
|
||||
unsigned binding_offset = state->set[set].surface_offsets[binding];
|
||||
unsigned array_size =
|
||||
state->layout->set[set].layout->binding[binding].array_size;
|
||||
|
||||
@@ -899,13 +900,98 @@ tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
|
||||
return plane;
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
|
||||
unsigned start, unsigned end)
|
||||
{
|
||||
if (start == end - 1) {
|
||||
return srcs[start];
|
||||
} else {
|
||||
unsigned mid = start + (end - start) / 2;
|
||||
return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)),
|
||||
build_def_array_select(b, srcs, idx, start, mid),
|
||||
build_def_array_select(b, srcs, idx, mid, end));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lower_gen7_tex_swizzle(nir_tex_instr *tex, unsigned plane,
|
||||
struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
assert(state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell);
|
||||
if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
|
||||
nir_tex_instr_is_query(tex) ||
|
||||
tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
|
||||
(tex->is_shadow && tex->is_new_style_shadow))
|
||||
return;
|
||||
|
||||
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
|
||||
assert(deref_src_idx >= 0);
|
||||
|
||||
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
|
||||
UNUSED nir_variable *var = nir_deref_instr_get_variable(deref);
|
||||
|
||||
UNUSED unsigned set = var->data.descriptor_set;
|
||||
UNUSED unsigned binding = var->data.binding;
|
||||
UNUSED const struct anv_descriptor_set_binding_layout *bind_layout =
|
||||
&state->layout->set[set].layout->binding[binding];
|
||||
assert(bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE);
|
||||
|
||||
nir_builder *b = &state->builder;
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
const unsigned plane_offset =
|
||||
plane * sizeof(struct anv_texture_swizzle_descriptor);
|
||||
nir_ssa_def *swiz =
|
||||
build_descriptor_load(deref, plane_offset, 1, 32, state);
|
||||
|
||||
b->cursor = nir_after_instr(&tex->instr);
|
||||
|
||||
assert(tex->dest.ssa.bit_size == 32);
|
||||
assert(tex->dest.ssa.num_components == 4);
|
||||
|
||||
/* Initializing to undef is ok; nir_opt_undef will clean it up. */
|
||||
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
|
||||
nir_ssa_def *comps[8];
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
|
||||
comps[i] = undef;
|
||||
|
||||
comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
|
||||
if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
|
||||
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
|
||||
else
|
||||
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
|
||||
comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
|
||||
comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
|
||||
comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
|
||||
comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
|
||||
|
||||
nir_ssa_def *swiz_comps[4];
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
|
||||
swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
|
||||
}
|
||||
nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
|
||||
|
||||
/* Rewrite uses before we insert so we don't rewrite this use */
|
||||
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
|
||||
nir_src_for_ssa(swiz_tex_res),
|
||||
swiz_tex_res->parent_instr);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_tex(nir_tex_instr *tex, struct apply_pipeline_layout_state *state)
|
||||
{
|
||||
state->builder.cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
unsigned plane = tex_instr_get_and_remove_plane_src(tex);
|
||||
|
||||
/* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
|
||||
* before we lower the derefs away so we can still find the descriptor.
|
||||
*/
|
||||
if (state->pdevice->info.gen == 7 && !state->pdevice->info.is_haswell)
|
||||
lower_gen7_tex_swizzle(tex, plane, state);
|
||||
|
||||
state->builder.cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
lower_tex_deref(tex, nir_tex_src_texture_deref,
|
||||
&tex->texture_index, plane, state);
|
||||
|
||||
|
@@ -400,12 +400,12 @@ populate_wm_prog_key(const struct gen_device_info *devinfo,
|
||||
* harmless to compute it and then let dead-code take care of it.
|
||||
*/
|
||||
if (ms_info->rasterizationSamples > 1) {
|
||||
key->persample_interp =
|
||||
key->persample_interp = ms_info->sampleShadingEnable &&
|
||||
(ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;
|
||||
key->multisample_fbo = true;
|
||||
}
|
||||
|
||||
key->frag_coord_adds_sample_pos = ms_info->sampleShadingEnable;
|
||||
key->frag_coord_adds_sample_pos = key->persample_interp;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -744,7 +744,7 @@ struct anv_state_table {
|
||||
struct anv_free_entry *map;
|
||||
uint32_t size;
|
||||
struct anv_block_state state;
|
||||
struct u_vector mmap_cleanups;
|
||||
struct u_vector cleanups;
|
||||
};
|
||||
|
||||
struct anv_state_pool {
|
||||
@@ -1548,6 +1548,17 @@ struct anv_sampled_image_descriptor {
|
||||
uint32_t sampler;
|
||||
};
|
||||
|
||||
struct anv_texture_swizzle_descriptor {
|
||||
/** Texture swizzle
|
||||
*
|
||||
* See also nir_intrinsic_channel_select_intel
|
||||
*/
|
||||
uint8_t swizzle[4];
|
||||
|
||||
/** Unused padding to ensure the struct is a multiple of 64 bits */
|
||||
uint32_t _pad;
|
||||
};
|
||||
|
||||
/** Struct representing a storage image descriptor */
|
||||
struct anv_storage_image_descriptor {
|
||||
/** Bindless image handles
|
||||
@@ -1589,6 +1600,8 @@ enum anv_descriptor_data {
|
||||
ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),
|
||||
/** Storage image handles */
|
||||
ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),
|
||||
/** Storage image handles */
|
||||
ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),
|
||||
};
|
||||
|
||||
struct anv_descriptor_set_binding_layout {
|
||||
@@ -3201,7 +3214,13 @@ anv_can_sample_with_hiz(const struct gen_device_info * const devinfo,
|
||||
if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
|
||||
return false;
|
||||
|
||||
if (devinfo->gen < 8)
|
||||
/* Allow this feature on BDW even though it is disabled in the BDW devinfo
|
||||
* struct. There's documentation which suggests that this feature actually
|
||||
* reduces performance on BDW, but it has only been observed to help so
|
||||
* far. Sampling fast-cleared blocks on BDW must also be handled with care
|
||||
* (see depth_stencil_attachment_compute_aux_usage() for more info).
|
||||
*/
|
||||
if (devinfo->gen != 8 && !devinfo->has_sample_with_hiz)
|
||||
return false;
|
||||
|
||||
return image->samples == 1;
|
||||
|
@@ -346,14 +346,23 @@ emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
|
||||
}
|
||||
|
||||
static void
|
||||
emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address addr)
|
||||
emit_query_mi_availability(struct gen_mi_builder *b,
|
||||
struct anv_address addr,
|
||||
bool available)
|
||||
{
|
||||
gen_mi_store(b, gen_mi_mem64(addr), gen_mi_imm(available));
|
||||
}
|
||||
|
||||
static void
|
||||
emit_query_pc_availability(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_address addr,
|
||||
bool available)
|
||||
{
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DestinationAddressType = DAT_PPGTT;
|
||||
pc.PostSyncOperation = WriteImmediateData;
|
||||
pc.Address = addr;
|
||||
pc.ImmediateData = 1;
|
||||
pc.ImmediateData = available;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -366,11 +375,39 @@ emit_zero_queries(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct gen_mi_builder *b, struct anv_query_pool *pool,
|
||||
uint32_t first_index, uint32_t num_queries)
|
||||
{
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
struct anv_address slot_addr =
|
||||
anv_query_address(pool, first_index + i);
|
||||
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
|
||||
emit_query_availability(cmd_buffer, slot_addr);
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
/* These queries are written with a PIPE_CONTROL so clear them using the
|
||||
* PIPE_CONTROL as well so we don't have to synchronize between 2 types
|
||||
* of operations.
|
||||
*/
|
||||
assert((pool->stride % 8) == 0);
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
struct anv_address slot_addr =
|
||||
anv_query_address(pool, first_index + i);
|
||||
|
||||
for (uint32_t qword = 1; qword < (pool->stride / 8); qword++) {
|
||||
emit_query_pc_availability(cmd_buffer,
|
||||
anv_address_add(slot_addr, qword * 8),
|
||||
false);
|
||||
}
|
||||
emit_query_pc_availability(cmd_buffer, slot_addr, true);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT:
|
||||
for (uint32_t i = 0; i < num_queries; i++) {
|
||||
struct anv_address slot_addr =
|
||||
anv_query_address(pool, first_index + i);
|
||||
gen_mi_memset(b, anv_address_add(slot_addr, 8), 0, pool->stride - 8);
|
||||
emit_query_mi_availability(b, slot_addr, true);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -383,11 +420,28 @@ void genX(CmdResetQueryPool)(
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
ANV_FROM_HANDLE(anv_query_pool, pool, queryPool);
|
||||
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(MI_STORE_DATA_IMM), sdm) {
|
||||
sdm.Address = anv_query_address(pool, firstQuery + i);
|
||||
sdm.ImmediateData = 0;
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
case VK_QUERY_TYPE_TIMESTAMP:
|
||||
for (uint32_t i = 0; i < queryCount; i++) {
|
||||
emit_query_pc_availability(cmd_buffer,
|
||||
anv_query_address(pool, firstQuery + i),
|
||||
false);
|
||||
}
|
||||
break;
|
||||
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
|
||||
case VK_QUERY_TYPE_TRANSFORM_FEEDBACK_STREAM_EXT: {
|
||||
struct gen_mi_builder b;
|
||||
gen_mi_builder_init(&b, &cmd_buffer->batch);
|
||||
|
||||
for (uint32_t i = 0; i < queryCount; i++)
|
||||
emit_query_mi_availability(&b, anv_query_address(pool, firstQuery + i), false);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Unsupported query type");
|
||||
}
|
||||
}
|
||||
|
||||
@@ -525,7 +579,7 @@ void genX(CmdEndQueryIndexedEXT)(
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
emit_ps_depth_count(cmd_buffer, anv_address_add(query_addr, 16));
|
||||
emit_query_availability(cmd_buffer, query_addr);
|
||||
emit_query_pc_availability(cmd_buffer, query_addr, true);
|
||||
break;
|
||||
|
||||
case VK_QUERY_TYPE_PIPELINE_STATISTICS: {
|
||||
@@ -543,7 +597,7 @@ void genX(CmdEndQueryIndexedEXT)(
|
||||
offset += 16;
|
||||
}
|
||||
|
||||
emit_query_availability(cmd_buffer, query_addr);
|
||||
emit_query_mi_availability(&b, query_addr, true);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -554,7 +608,7 @@ void genX(CmdEndQueryIndexedEXT)(
|
||||
}
|
||||
|
||||
emit_xfb_query(&b, index, anv_address_add(query_addr, 16));
|
||||
emit_query_availability(cmd_buffer, query_addr);
|
||||
emit_query_mi_availability(&b, query_addr, true);
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -613,7 +667,7 @@ void genX(CmdWriteTimestamp)(
|
||||
break;
|
||||
}
|
||||
|
||||
emit_query_availability(cmd_buffer, query_addr);
|
||||
emit_query_pc_availability(cmd_buffer, query_addr, true);
|
||||
|
||||
/* When multiview is active the spec requires that N consecutive query
|
||||
* indices are used, where N is the number of active views in the subpass.
|
||||
@@ -684,7 +738,20 @@ void genX(CmdCopyQueryPoolResults)(
|
||||
}
|
||||
|
||||
if ((flags & VK_QUERY_RESULT_WAIT_BIT) ||
|
||||
(cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS)) {
|
||||
(cmd_buffer->state.pending_pipe_bits & ANV_PIPE_FLUSH_BITS) ||
|
||||
/* Occlusion & timestamp queries are written using a PIPE_CONTROL and
|
||||
* because we're about to copy values from MI commands, we need to
|
||||
* stall the command streamer to make sure the PIPE_CONTROL values have
|
||||
* landed, otherwise we could see inconsistent values & availability.
|
||||
*
|
||||
* From the vulkan spec:
|
||||
*
|
||||
* "vkCmdCopyQueryPoolResults is guaranteed to see the effect of
|
||||
* previous uses of vkCmdResetQueryPool in the same queue, without
|
||||
* any additional synchronization."
|
||||
*/
|
||||
pool->type == VK_QUERY_TYPE_OCCLUSION ||
|
||||
pool->type == VK_QUERY_TYPE_TIMESTAMP) {
|
||||
cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_CS_STALL_BIT;
|
||||
genX(cmd_buffer_apply_pipe_flushes)(cmd_buffer);
|
||||
}
|
||||
|
@@ -402,6 +402,8 @@ vma_alloc(struct brw_bufmgr *bufmgr,
|
||||
/* Without softpin support, we let the kernel assign addresses. */
|
||||
assert(brw_using_softpin(bufmgr));
|
||||
|
||||
alignment = ALIGN(alignment, PAGE_SIZE);
|
||||
|
||||
struct bo_cache_bucket *bucket = get_bucket_allocator(bufmgr, size);
|
||||
uint64_t addr;
|
||||
|
||||
@@ -1717,6 +1719,9 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
|
||||
|
||||
const uint64_t _4GB = 4ull << 30;
|
||||
|
||||
/* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */
|
||||
const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE;
|
||||
|
||||
if (devinfo->gen >= 8 && gtt_size > _4GB) {
|
||||
bufmgr->initial_kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
|
||||
|
||||
@@ -1726,9 +1731,13 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int fd)
|
||||
bufmgr->initial_kflags |= EXEC_OBJECT_PINNED;
|
||||
|
||||
util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_LOW_4G],
|
||||
PAGE_SIZE, _4GB);
|
||||
PAGE_SIZE, _4GB_minus_1);
|
||||
|
||||
/* Leave the last 4GB out of the high vma range, so that no state
|
||||
* base address + size can overflow 48 bits.
|
||||
*/
|
||||
util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER],
|
||||
1 * _4GB, gtt_size - 1 * _4GB);
|
||||
1 * _4GB, gtt_size - 2 * _4GB);
|
||||
} else if (devinfo->gen >= 10) {
|
||||
/* Softpin landed in 4.5, but GVT used an aliasing PPGTT until
|
||||
* kernel commit 6b3816d69628becb7ff35978aa0751798b4a940a in
|
||||
|
@@ -1685,6 +1685,11 @@ brw_upload_cs_work_groups_surface(struct brw_context *brw)
|
||||
ISL_FORMAT_RAW,
|
||||
3 * sizeof(GLuint), 1,
|
||||
RELOC_WRITE);
|
||||
|
||||
/* The state buffer now holds a reference to our upload, drop ours. */
|
||||
if (bo != brw->compute.num_work_groups_bo)
|
||||
brw_bo_unreference(bo);
|
||||
|
||||
brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
|
||||
}
|
||||
}
|
||||
|
@@ -33,7 +33,8 @@ libosmesa = shared_library(
|
||||
include_directories : [
|
||||
inc_include, inc_src, inc_mapi, inc_mesa, inc_gallium, inc_gallium_aux,
|
||||
],
|
||||
link_with : [libmesa_classic, libglapi_static, osmesa_link_with],
|
||||
link_whole : libglapi_static,
|
||||
link_with : [libmesa_classic, osmesa_link_with],
|
||||
dependencies : [dep_thread, dep_selinux],
|
||||
version : '8.0.0',
|
||||
install : true,
|
||||
|
@@ -1767,6 +1767,10 @@ _mesa_make_current( struct gl_context *newCtx,
|
||||
|
||||
check_init_viewport(newCtx, drawBuffer->Width, drawBuffer->Height);
|
||||
}
|
||||
else {
|
||||
_mesa_reference_framebuffer(&newCtx->WinSysDrawBuffer, NULL);
|
||||
_mesa_reference_framebuffer(&newCtx->WinSysReadBuffer, NULL);
|
||||
}
|
||||
|
||||
if (newCtx->FirstTimeCurrent) {
|
||||
handle_first_current(newCtx);
|
||||
|
@@ -1105,10 +1105,17 @@ st_api_make_current(struct st_api *stapi, struct st_context_iface *stctxi,
|
||||
else {
|
||||
GET_CURRENT_CONTEXT(ctx);
|
||||
|
||||
ret = _mesa_make_current(NULL, NULL, NULL);
|
||||
|
||||
if (ctx)
|
||||
if (ctx) {
|
||||
/* Before releasing the context, release its associated
|
||||
* winsys buffers first. Then purge the context's winsys buffers list
|
||||
* to free the resources of any winsys buffers that no longer have
|
||||
* an existing drawable.
|
||||
*/
|
||||
ret = _mesa_make_current(ctx, NULL, NULL);
|
||||
st_framebuffers_purge(ctx->st);
|
||||
}
|
||||
|
||||
ret = _mesa_make_current(NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@@ -269,31 +269,39 @@ yuv_to_rgb(struct tgsi_transform_context *tctx,
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
|
||||
/* DP3 dst.x, tmpA, imm[0] */
|
||||
inst = dp3_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
|
||||
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
|
||||
reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
|
||||
inst = dp3_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_X);
|
||||
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
|
||||
reg_src(&inst.Src[1], &ctx->imm[0], SWIZ(X, Y, Z, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
}
|
||||
|
||||
/* DP3 dst.y, tmpA, imm[1] */
|
||||
inst = dp3_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
|
||||
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
|
||||
reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
|
||||
inst = dp3_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Y);
|
||||
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
|
||||
reg_src(&inst.Src[1], &ctx->imm[1], SWIZ(X, Y, Z, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
}
|
||||
|
||||
/* DP3 dst.z, tmpA, imm[2] */
|
||||
inst = dp3_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
|
||||
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
|
||||
reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
|
||||
inst = dp3_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_Z);
|
||||
reg_src(&inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
|
||||
reg_src(&inst.Src[1], &ctx->imm[2], SWIZ(X, Y, Z, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
}
|
||||
|
||||
/* MOV dst.w, imm[0].x */
|
||||
inst = mov_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
|
||||
reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
|
||||
inst = mov_instruction();
|
||||
reg_dst(&inst.Dst[0], dst, TGSI_WRITEMASK_W);
|
||||
reg_src(&inst.Src[0], &ctx->imm[3], SWIZ(_, _, _, W));
|
||||
tctx->emit_instruction(tctx, &inst);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -434,7 +442,7 @@ st_tgsi_lower_yuv(const struct tgsi_token *tokens, unsigned free_slots,
|
||||
/* TODO better job of figuring out how many extra tokens we need..
|
||||
* this is a pain about tgsi_transform :-/
|
||||
*/
|
||||
newlen = tgsi_num_tokens(tokens) + 120;
|
||||
newlen = tgsi_num_tokens(tokens) + 300;
|
||||
newtoks = tgsi_alloc_tokens(newlen);
|
||||
if (!newtoks)
|
||||
return NULL;
|
||||
|
@@ -111,6 +111,11 @@ TODO: document the other workarounds.
|
||||
<option name="allow_glsl_builtin_variable_redeclaration" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Doom 3: BFG" executable="Doom3BFG.exe">
|
||||
<option name="allow_glsl_builtin_variable_redeclaration" value="true" />
|
||||
<option name="force_glsl_extensions_warn" value="true" />
|
||||
</application>
|
||||
|
||||
<application name="Dying Light" executable="DyingLightGame">
|
||||
<option name="allow_glsl_builtin_variable_redeclaration" value="true" />
|
||||
</application>
|
||||
@@ -463,6 +468,9 @@ TODO: document the other workarounds.
|
||||
<application name="ARK: Survival Evolved (and unintentionally the UE4 demo template)" executable="ShooterGame">
|
||||
<option name="radeonsi_clear_db_cache_before_clear" value="true" />
|
||||
</application>
|
||||
<application name="Counter-Strike Global Offensive" executable="csgo_linux64">
|
||||
<option name="radeonsi_zerovram" value="true" />
|
||||
</application>
|
||||
<application name="No Mans Sky" executable="NMS.exe">
|
||||
<option name="radeonsi_zerovram" value="true" />
|
||||
</application>
|
||||
|
@@ -38,11 +38,29 @@ readN(int fd, char *buf, size_t len)
|
||||
return total ? total : err;
|
||||
}
|
||||
|
||||
static char *
|
||||
read_grow(int fd)
|
||||
char *
|
||||
os_read_file(const char *filename)
|
||||
{
|
||||
/* Note that this also serves as a slight margin to avoid a 2x grow when
|
||||
* the file is just a few bytes larger when we read it than when we
|
||||
* fstat'ed it.
|
||||
* The string's NULL terminator is also included in here.
|
||||
*/
|
||||
size_t len = 64;
|
||||
|
||||
int fd = open(filename, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
/* errno set by open() */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Pre-allocate a buffer at least the size of the file if we can read
|
||||
* that information.
|
||||
*/
|
||||
struct stat stat;
|
||||
if (fstat(fd, &stat) == 0)
|
||||
len += stat.st_size;
|
||||
|
||||
char *buf = malloc(len);
|
||||
if (!buf) {
|
||||
close(fd);
|
||||
@@ -77,46 +95,6 @@ read_grow(int fd)
|
||||
return buf;
|
||||
}
|
||||
|
||||
char *
|
||||
os_read_file(const char *filename)
|
||||
{
|
||||
size_t len = 0;
|
||||
|
||||
int fd = open(filename, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
/* errno set by open() */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct stat stat;
|
||||
if (fstat(fd, &stat) == 0)
|
||||
len = stat.st_size;
|
||||
|
||||
if (!len)
|
||||
return read_grow(fd);
|
||||
|
||||
/* add NULL terminator */
|
||||
len++;
|
||||
|
||||
char *buf = malloc(len);
|
||||
if (!buf) {
|
||||
close(fd);
|
||||
errno = -ENOMEM;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ssize_t read = readN(fd, buf, len - 1);
|
||||
|
||||
close(fd);
|
||||
|
||||
if (read == -1)
|
||||
return NULL;
|
||||
|
||||
buf[read] = '\0';
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
char *
|
||||
|
@@ -109,6 +109,23 @@ struct queue_data {
|
||||
struct list_head running_command_buffer;
|
||||
};
|
||||
|
||||
struct overlay_draw {
|
||||
struct list_head link;
|
||||
|
||||
VkCommandBuffer command_buffer;
|
||||
|
||||
VkSemaphore semaphore;
|
||||
VkFence fence;
|
||||
|
||||
VkBuffer vertex_buffer;
|
||||
VkDeviceMemory vertex_buffer_mem;
|
||||
VkDeviceSize vertex_buffer_size;
|
||||
|
||||
VkBuffer index_buffer;
|
||||
VkDeviceMemory index_buffer_mem;
|
||||
VkDeviceSize index_buffer_size;
|
||||
};
|
||||
|
||||
/* Mapped from VkSwapchainKHR */
|
||||
struct swapchain_data {
|
||||
struct device_data *device;
|
||||
@@ -135,17 +152,7 @@ struct swapchain_data {
|
||||
|
||||
VkCommandPool command_pool;
|
||||
|
||||
struct {
|
||||
VkCommandBuffer command_buffer;
|
||||
|
||||
VkBuffer vertex_buffer;
|
||||
VkDeviceMemory vertex_buffer_mem;
|
||||
VkDeviceSize vertex_buffer_size;
|
||||
|
||||
VkBuffer index_buffer;
|
||||
VkDeviceMemory index_buffer_mem;
|
||||
VkDeviceSize index_buffer_size;
|
||||
} frame_data[2];
|
||||
struct list_head draws; /* List of struct overlay_draw */
|
||||
|
||||
bool font_uploaded;
|
||||
VkImage font_image;
|
||||
@@ -154,8 +161,6 @@ struct swapchain_data {
|
||||
VkBuffer upload_font_buffer;
|
||||
VkDeviceMemory upload_font_buffer_mem;
|
||||
|
||||
VkSemaphore submission_semaphore;
|
||||
|
||||
/**/
|
||||
ImGuiContext* imgui_context;
|
||||
ImVec2 window_size;
|
||||
@@ -194,49 +199,45 @@ static const VkQueryPipelineStatisticFlags overlay_query_flags =
|
||||
VK_QUERY_PIPELINE_STATISTIC_COMPUTE_SHADER_INVOCATIONS_BIT;
|
||||
#define OVERLAY_QUERY_COUNT (11)
|
||||
|
||||
static struct hash_table *vk_object_to_data = NULL;
|
||||
static struct hash_table_u64 *vk_object_to_data = NULL;
|
||||
static simple_mtx_t vk_object_to_data_mutex = _SIMPLE_MTX_INITIALIZER_NP;
|
||||
|
||||
thread_local ImGuiContext* __MesaImGui;
|
||||
|
||||
static inline void ensure_vk_object_map(void)
|
||||
{
|
||||
if (!vk_object_to_data) {
|
||||
vk_object_to_data = _mesa_hash_table_create(NULL,
|
||||
_mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
}
|
||||
if (!vk_object_to_data)
|
||||
vk_object_to_data = _mesa_hash_table_u64_create(NULL);
|
||||
}
|
||||
|
||||
#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data((void *) obj))
|
||||
#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data((void *) obj))
|
||||
#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data((void *) obj))
|
||||
#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data((void *) obj))
|
||||
#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
|
||||
#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data((void *) obj))
|
||||
static void *find_object_data(void *obj)
|
||||
#define HKEY(obj) ((uint64_t)(obj))
|
||||
#define FIND_SWAPCHAIN_DATA(obj) ((struct swapchain_data *)find_object_data(HKEY(obj)))
|
||||
#define FIND_CMD_BUFFER_DATA(obj) ((struct command_buffer_data *)find_object_data(HKEY(obj)))
|
||||
#define FIND_DEVICE_DATA(obj) ((struct device_data *)find_object_data(HKEY(obj)))
|
||||
#define FIND_QUEUE_DATA(obj) ((struct queue_data *)find_object_data(HKEY(obj)))
|
||||
#define FIND_PHYSICAL_DEVICE_DATA(obj) ((struct instance_data *)find_object_data(HKEY(obj)))
|
||||
#define FIND_INSTANCE_DATA(obj) ((struct instance_data *)find_object_data(HKEY(obj)))
|
||||
static void *find_object_data(uint64_t obj)
|
||||
{
|
||||
simple_mtx_lock(&vk_object_to_data_mutex);
|
||||
ensure_vk_object_map();
|
||||
struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj);
|
||||
void *data = entry ? entry->data : NULL;
|
||||
void *data = _mesa_hash_table_u64_search(vk_object_to_data, obj);
|
||||
simple_mtx_unlock(&vk_object_to_data_mutex);
|
||||
return data;
|
||||
}
|
||||
|
||||
static void map_object(void *obj, void *data)
|
||||
static void map_object(uint64_t obj, void *data)
|
||||
{
|
||||
simple_mtx_lock(&vk_object_to_data_mutex);
|
||||
ensure_vk_object_map();
|
||||
_mesa_hash_table_insert(vk_object_to_data, obj, data);
|
||||
_mesa_hash_table_u64_insert(vk_object_to_data, obj, data);
|
||||
simple_mtx_unlock(&vk_object_to_data_mutex);
|
||||
}
|
||||
|
||||
static void unmap_object(void *obj)
|
||||
static void unmap_object(uint64_t obj)
|
||||
{
|
||||
simple_mtx_lock(&vk_object_to_data_mutex);
|
||||
struct hash_entry *entry = _mesa_hash_table_search(vk_object_to_data, obj);
|
||||
_mesa_hash_table_remove(vk_object_to_data, entry);
|
||||
_mesa_hash_table_u64_remove(vk_object_to_data, obj);
|
||||
simple_mtx_unlock(&vk_object_to_data_mutex);
|
||||
}
|
||||
|
||||
@@ -321,7 +322,7 @@ static struct instance_data *new_instance_data(VkInstance instance)
|
||||
{
|
||||
struct instance_data *data = rzalloc(NULL, struct instance_data);
|
||||
data->instance = instance;
|
||||
map_object(data->instance, data);
|
||||
map_object(HKEY(data->instance), data);
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -329,7 +330,7 @@ static void destroy_instance_data(struct instance_data *data)
|
||||
{
|
||||
if (data->params.output_file)
|
||||
fclose(data->params.output_file);
|
||||
unmap_object(data->instance);
|
||||
unmap_object(HKEY(data->instance));
|
||||
ralloc_free(data);
|
||||
}
|
||||
|
||||
@@ -348,9 +349,9 @@ static void instance_data_map_physical_devices(struct instance_data *instance_da
|
||||
|
||||
for (uint32_t i = 0; i < physicalDeviceCount; i++) {
|
||||
if (map)
|
||||
map_object(physicalDevices[i], instance_data);
|
||||
map_object(HKEY(physicalDevices[i]), instance_data);
|
||||
else
|
||||
unmap_object(physicalDevices[i]);
|
||||
unmap_object(HKEY(physicalDevices[i]));
|
||||
}
|
||||
|
||||
free(physicalDevices);
|
||||
@@ -362,7 +363,7 @@ static struct device_data *new_device_data(VkDevice device, struct instance_data
|
||||
struct device_data *data = rzalloc(NULL, struct device_data);
|
||||
data->instance = instance;
|
||||
data->device = device;
|
||||
map_object(data->device, data);
|
||||
map_object(HKEY(data->device), data);
|
||||
return data;
|
||||
}
|
||||
|
||||
@@ -375,10 +376,10 @@ static struct queue_data *new_queue_data(VkQueue queue,
|
||||
data->device = device_data;
|
||||
data->queue = queue;
|
||||
data->flags = family_props->queueFlags;
|
||||
data->timestamp_mask = (1ul << family_props->timestampValidBits) - 1;
|
||||
data->timestamp_mask = (1ull << family_props->timestampValidBits) - 1;
|
||||
data->family_index = family_index;
|
||||
LIST_INITHEAD(&data->running_command_buffer);
|
||||
map_object(data->queue, data);
|
||||
map_object(HKEY(data->queue), data);
|
||||
|
||||
/* Fence synchronizing access to queries on that queue. */
|
||||
VkFenceCreateInfo fence_info = {};
|
||||
@@ -400,7 +401,7 @@ static void destroy_queue(struct queue_data *data)
|
||||
{
|
||||
struct device_data *device_data = data->device;
|
||||
device_data->vtable.DestroyFence(device_data->device, data->queries_fence, NULL);
|
||||
unmap_object(data->queue);
|
||||
unmap_object(HKEY(data->queue));
|
||||
ralloc_free(data);
|
||||
}
|
||||
|
||||
@@ -449,7 +450,7 @@ static void device_unmap_queues(struct device_data *data)
|
||||
|
||||
static void destroy_device_data(struct device_data *data)
|
||||
{
|
||||
unmap_object(data->device);
|
||||
unmap_object(HKEY(data->device));
|
||||
ralloc_free(data);
|
||||
}
|
||||
|
||||
@@ -469,13 +470,13 @@ static struct command_buffer_data *new_command_buffer_data(VkCommandBuffer cmd_b
|
||||
data->timestamp_query_pool = timestamp_query_pool;
|
||||
data->query_index = query_index;
|
||||
list_inithead(&data->link);
|
||||
map_object((void *) data->cmd_buffer, data);
|
||||
map_object(HKEY(data->cmd_buffer), data);
|
||||
return data;
|
||||
}
|
||||
|
||||
static void destroy_command_buffer_data(struct command_buffer_data *data)
|
||||
{
|
||||
unmap_object((void *) data->cmd_buffer);
|
||||
unmap_object(HKEY(data->cmd_buffer));
|
||||
list_delinit(&data->link);
|
||||
ralloc_free(data);
|
||||
}
|
||||
@@ -489,16 +490,63 @@ static struct swapchain_data *new_swapchain_data(VkSwapchainKHR swapchain,
|
||||
data->device = device_data;
|
||||
data->swapchain = swapchain;
|
||||
data->window_size = ImVec2(instance_data->params.width, instance_data->params.height);
|
||||
map_object((void *) data->swapchain, data);
|
||||
list_inithead(&data->draws);
|
||||
map_object(HKEY(data->swapchain), data);
|
||||
return data;
|
||||
}
|
||||
|
||||
static void destroy_swapchain_data(struct swapchain_data *data)
|
||||
{
|
||||
unmap_object((void *) data->swapchain);
|
||||
unmap_object(HKEY(data->swapchain));
|
||||
ralloc_free(data);
|
||||
}
|
||||
|
||||
struct overlay_draw *get_overlay_draw(struct swapchain_data *data)
|
||||
{
|
||||
struct device_data *device_data = data->device;
|
||||
struct overlay_draw *draw = list_empty(&data->draws) ?
|
||||
NULL : list_first_entry(&data->draws, struct overlay_draw, link);
|
||||
|
||||
VkSemaphoreCreateInfo sem_info = {};
|
||||
sem_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
|
||||
if (draw && device_data->vtable.GetFenceStatus(device_data->device, draw->fence) == VK_SUCCESS) {
|
||||
list_del(&draw->link);
|
||||
VK_CHECK(device_data->vtable.ResetFences(device_data->device,
|
||||
1, &draw->fence));
|
||||
list_addtail(&draw->link, &data->draws);
|
||||
return draw;
|
||||
}
|
||||
|
||||
draw = rzalloc(data, struct overlay_draw);
|
||||
|
||||
VkCommandBufferAllocateInfo cmd_buffer_info = {};
|
||||
cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
cmd_buffer_info.commandPool = data->command_pool;
|
||||
cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
cmd_buffer_info.commandBufferCount = 1;
|
||||
VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device,
|
||||
&cmd_buffer_info,
|
||||
&draw->command_buffer));
|
||||
VK_CHECK(device_data->set_device_loader_data(device_data->device,
|
||||
draw->command_buffer));
|
||||
|
||||
|
||||
VkFenceCreateInfo fence_info = {};
|
||||
fence_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
|
||||
VK_CHECK(device_data->vtable.CreateFence(device_data->device,
|
||||
&fence_info,
|
||||
NULL,
|
||||
&draw->fence));
|
||||
|
||||
VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &sem_info,
|
||||
NULL, &draw->semaphore));
|
||||
|
||||
list_addtail(&draw->link, &data->draws);
|
||||
|
||||
return draw;
|
||||
}
|
||||
|
||||
static const char *param_unit(enum overlay_param_enabled param)
|
||||
{
|
||||
switch (param) {
|
||||
@@ -872,20 +920,19 @@ static void CreateOrResizeBuffer(struct device_data *data,
|
||||
*buffer_size = new_size;
|
||||
}
|
||||
|
||||
static void render_swapchain_display(struct swapchain_data *data,
|
||||
const VkSemaphore *wait_semaphores,
|
||||
unsigned n_wait_semaphores,
|
||||
unsigned image_index)
|
||||
static struct overlay_draw *render_swapchain_display(struct swapchain_data *data,
|
||||
const VkSemaphore *wait_semaphores,
|
||||
unsigned n_wait_semaphores,
|
||||
unsigned image_index)
|
||||
{
|
||||
ImDrawData* draw_data = ImGui::GetDrawData();
|
||||
if (draw_data->TotalVtxCount == 0)
|
||||
return;
|
||||
return NULL;
|
||||
|
||||
struct device_data *device_data = data->device;
|
||||
uint32_t idx = data->n_frames % ARRAY_SIZE(data->frame_data);
|
||||
VkCommandBuffer command_buffer = data->frame_data[idx].command_buffer;
|
||||
struct overlay_draw *draw = get_overlay_draw(data);
|
||||
|
||||
device_data->vtable.ResetCommandBuffer(command_buffer, 0);
|
||||
device_data->vtable.ResetCommandBuffer(draw->command_buffer, 0);
|
||||
|
||||
VkRenderPassBeginInfo render_pass_info = {};
|
||||
render_pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
|
||||
@@ -897,9 +944,9 @@ static void render_swapchain_display(struct swapchain_data *data,
|
||||
VkCommandBufferBeginInfo buffer_begin_info = {};
|
||||
buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
|
||||
|
||||
device_data->vtable.BeginCommandBuffer(command_buffer, &buffer_begin_info);
|
||||
device_data->vtable.BeginCommandBuffer(draw->command_buffer, &buffer_begin_info);
|
||||
|
||||
ensure_swapchain_fonts(data, command_buffer);
|
||||
ensure_swapchain_fonts(data, draw->command_buffer);
|
||||
|
||||
/* Bounce the image to display back to color attachment layout for
|
||||
* rendering on top of it.
|
||||
@@ -919,7 +966,7 @@ static void render_swapchain_display(struct swapchain_data *data,
|
||||
imb.subresourceRange.layerCount = 1;
|
||||
imb.srcQueueFamilyIndex = device_data->graphic_queue->family_index;
|
||||
imb.dstQueueFamilyIndex = device_data->graphic_queue->family_index;
|
||||
device_data->vtable.CmdPipelineBarrier(command_buffer,
|
||||
device_data->vtable.CmdPipelineBarrier(draw->command_buffer,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT,
|
||||
0, /* dependency flags */
|
||||
@@ -927,37 +974,33 @@ static void render_swapchain_display(struct swapchain_data *data,
|
||||
0, nullptr, /* buffer memory barriers */
|
||||
1, &imb); /* image memory barriers */
|
||||
|
||||
device_data->vtable.CmdBeginRenderPass(command_buffer, &render_pass_info,
|
||||
device_data->vtable.CmdBeginRenderPass(draw->command_buffer, &render_pass_info,
|
||||
VK_SUBPASS_CONTENTS_INLINE);
|
||||
|
||||
/* Create/Resize vertex & index buffers */
|
||||
size_t vertex_size = draw_data->TotalVtxCount * sizeof(ImDrawVert);
|
||||
size_t index_size = draw_data->TotalIdxCount * sizeof(ImDrawIdx);
|
||||
if (data->frame_data[idx].vertex_buffer_size < vertex_size) {
|
||||
if (draw->vertex_buffer_size < vertex_size) {
|
||||
CreateOrResizeBuffer(device_data,
|
||||
&data->frame_data[idx].vertex_buffer,
|
||||
&data->frame_data[idx].vertex_buffer_mem,
|
||||
&data->frame_data[idx].vertex_buffer_size,
|
||||
&draw->vertex_buffer,
|
||||
&draw->vertex_buffer_mem,
|
||||
&draw->vertex_buffer_size,
|
||||
vertex_size, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
|
||||
}
|
||||
if (data->frame_data[idx].index_buffer_size < index_size) {
|
||||
if (draw->index_buffer_size < index_size) {
|
||||
CreateOrResizeBuffer(device_data,
|
||||
&data->frame_data[idx].index_buffer,
|
||||
&data->frame_data[idx].index_buffer_mem,
|
||||
&data->frame_data[idx].index_buffer_size,
|
||||
&draw->index_buffer,
|
||||
&draw->index_buffer_mem,
|
||||
&draw->index_buffer_size,
|
||||
index_size, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
|
||||
}
|
||||
|
||||
/* Upload vertex & index data */
|
||||
VkBuffer vertex_buffer = data->frame_data[idx].vertex_buffer;
|
||||
VkDeviceMemory vertex_mem = data->frame_data[idx].vertex_buffer_mem;
|
||||
VkBuffer index_buffer = data->frame_data[idx].index_buffer;
|
||||
VkDeviceMemory index_mem = data->frame_data[idx].index_buffer_mem;
|
||||
ImDrawVert* vtx_dst = NULL;
|
||||
ImDrawIdx* idx_dst = NULL;
|
||||
VK_CHECK(device_data->vtable.MapMemory(device_data->device, vertex_mem,
|
||||
VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->vertex_buffer_mem,
|
||||
0, vertex_size, 0, (void**)(&vtx_dst)));
|
||||
VK_CHECK(device_data->vtable.MapMemory(device_data->device, index_mem,
|
||||
VK_CHECK(device_data->vtable.MapMemory(device_data->device, draw->index_buffer_mem,
|
||||
0, index_size, 0, (void**)(&idx_dst)));
|
||||
for (int n = 0; n < draw_data->CmdListsCount; n++)
|
||||
{
|
||||
@@ -969,26 +1012,26 @@ static void render_swapchain_display(struct swapchain_data *data,
|
||||
}
|
||||
VkMappedMemoryRange range[2] = {};
|
||||
range[0].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
range[0].memory = vertex_mem;
|
||||
range[0].memory = draw->vertex_buffer_mem;
|
||||
range[0].size = VK_WHOLE_SIZE;
|
||||
range[1].sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
|
||||
range[1].memory = index_mem;
|
||||
range[1].memory = draw->index_buffer_mem;
|
||||
range[1].size = VK_WHOLE_SIZE;
|
||||
VK_CHECK(device_data->vtable.FlushMappedMemoryRanges(device_data->device, 2, range));
|
||||
device_data->vtable.UnmapMemory(device_data->device, vertex_mem);
|
||||
device_data->vtable.UnmapMemory(device_data->device, index_mem);
|
||||
device_data->vtable.UnmapMemory(device_data->device, draw->vertex_buffer_mem);
|
||||
device_data->vtable.UnmapMemory(device_data->device, draw->index_buffer_mem);
|
||||
|
||||
/* Bind pipeline and descriptor sets */
|
||||
device_data->vtable.CmdBindPipeline(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline);
|
||||
device_data->vtable.CmdBindPipeline(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, data->pipeline);
|
||||
VkDescriptorSet desc_set[1] = { data->descriptor_set };
|
||||
device_data->vtable.CmdBindDescriptorSets(command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
device_data->vtable.CmdBindDescriptorSets(draw->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
|
||||
data->pipeline_layout, 0, 1, desc_set, 0, NULL);
|
||||
|
||||
/* Bind vertex & index buffers */
|
||||
VkBuffer vertex_buffers[1] = { vertex_buffer };
|
||||
VkBuffer vertex_buffers[1] = { draw->vertex_buffer };
|
||||
VkDeviceSize vertex_offset[1] = { 0 };
|
||||
device_data->vtable.CmdBindVertexBuffers(command_buffer, 0, 1, vertex_buffers, vertex_offset);
|
||||
device_data->vtable.CmdBindIndexBuffer(command_buffer, index_buffer, 0, VK_INDEX_TYPE_UINT16);
|
||||
device_data->vtable.CmdBindVertexBuffers(draw->command_buffer, 0, 1, vertex_buffers, vertex_offset);
|
||||
device_data->vtable.CmdBindIndexBuffer(draw->command_buffer, draw->index_buffer, 0, VK_INDEX_TYPE_UINT16);
|
||||
|
||||
/* Setup viewport */
|
||||
VkViewport viewport;
|
||||
@@ -998,7 +1041,7 @@ static void render_swapchain_display(struct swapchain_data *data,
|
||||
viewport.height = draw_data->DisplaySize.y;
|
||||
viewport.minDepth = 0.0f;
|
||||
viewport.maxDepth = 1.0f;
|
||||
device_data->vtable.CmdSetViewport(command_buffer, 0, 1, &viewport);
|
||||
device_data->vtable.CmdSetViewport(draw->command_buffer, 0, 1, &viewport);
|
||||
|
||||
|
||||
/* Setup scale and translation through push constants :
|
||||
@@ -1013,10 +1056,10 @@ static void render_swapchain_display(struct swapchain_data *data,
|
||||
float translate[2];
|
||||
translate[0] = -1.0f - draw_data->DisplayPos.x * scale[0];
|
||||
translate[1] = -1.0f - draw_data->DisplayPos.y * scale[1];
|
||||
device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout,
|
||||
device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
|
||||
VK_SHADER_STAGE_VERTEX_BIT,
|
||||
sizeof(float) * 0, sizeof(float) * 2, scale);
|
||||
device_data->vtable.CmdPushConstants(command_buffer, data->pipeline_layout,
|
||||
device_data->vtable.CmdPushConstants(draw->command_buffer, data->pipeline_layout,
|
||||
VK_SHADER_STAGE_VERTEX_BIT,
|
||||
sizeof(float) * 2, sizeof(float) * 2, translate);
|
||||
|
||||
@@ -1037,42 +1080,33 @@ static void render_swapchain_display(struct swapchain_data *data,
|
||||
scissor.offset.y = (int32_t)(pcmd->ClipRect.y - display_pos.y) > 0 ? (int32_t)(pcmd->ClipRect.y - display_pos.y) : 0;
|
||||
scissor.extent.width = (uint32_t)(pcmd->ClipRect.z - pcmd->ClipRect.x);
|
||||
scissor.extent.height = (uint32_t)(pcmd->ClipRect.w - pcmd->ClipRect.y + 1); // FIXME: Why +1 here?
|
||||
device_data->vtable.CmdSetScissor(command_buffer, 0, 1, &scissor);
|
||||
device_data->vtable.CmdSetScissor(draw->command_buffer, 0, 1, &scissor);
|
||||
|
||||
// Draw
|
||||
device_data->vtable.CmdDrawIndexed(command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0);
|
||||
device_data->vtable.CmdDrawIndexed(draw->command_buffer, pcmd->ElemCount, 1, idx_offset, vtx_offset, 0);
|
||||
|
||||
idx_offset += pcmd->ElemCount;
|
||||
}
|
||||
vtx_offset += cmd_list->VtxBuffer.Size;
|
||||
}
|
||||
|
||||
device_data->vtable.CmdEndRenderPass(command_buffer);
|
||||
device_data->vtable.EndCommandBuffer(command_buffer);
|
||||
|
||||
if (data->submission_semaphore) {
|
||||
device_data->vtable.DestroySemaphore(device_data->device,
|
||||
data->submission_semaphore,
|
||||
NULL);
|
||||
}
|
||||
/* Submission semaphore */
|
||||
VkSemaphoreCreateInfo semaphore_info = {};
|
||||
semaphore_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
VK_CHECK(device_data->vtable.CreateSemaphore(device_data->device, &semaphore_info,
|
||||
NULL, &data->submission_semaphore));
|
||||
device_data->vtable.CmdEndRenderPass(draw->command_buffer);
|
||||
device_data->vtable.EndCommandBuffer(draw->command_buffer);
|
||||
|
||||
VkSubmitInfo submit_info = {};
|
||||
VkPipelineStageFlags stage_wait = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submit_info.commandBufferCount = 1;
|
||||
submit_info.pCommandBuffers = &command_buffer;
|
||||
submit_info.pCommandBuffers = &draw->command_buffer;
|
||||
submit_info.pWaitDstStageMask = &stage_wait;
|
||||
submit_info.waitSemaphoreCount = n_wait_semaphores;
|
||||
submit_info.pWaitSemaphores = wait_semaphores;
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &data->submission_semaphore;
|
||||
submit_info.pSignalSemaphores = &draw->semaphore;
|
||||
|
||||
device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, VK_NULL_HANDLE);
|
||||
device_data->vtable.QueueSubmit(device_data->graphic_queue->queue, 1, &submit_info, draw->fence);
|
||||
|
||||
return draw;
|
||||
}
|
||||
|
||||
static const uint32_t overlay_vert_spv[] = {
|
||||
@@ -1437,7 +1471,7 @@ static void setup_swapchain_data(struct swapchain_data *data,
|
||||
NULL, &data->framebuffers[i]));
|
||||
}
|
||||
|
||||
/* Command buffer */
|
||||
/* Command buffer pool */
|
||||
VkCommandPoolCreateInfo cmd_buffer_pool_info = {};
|
||||
cmd_buffer_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
|
||||
cmd_buffer_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
|
||||
@@ -1445,29 +1479,21 @@ static void setup_swapchain_data(struct swapchain_data *data,
|
||||
VK_CHECK(device_data->vtable.CreateCommandPool(device_data->device,
|
||||
&cmd_buffer_pool_info,
|
||||
NULL, &data->command_pool));
|
||||
|
||||
VkCommandBuffer cmd_bufs[ARRAY_SIZE(data->frame_data)];
|
||||
|
||||
VkCommandBufferAllocateInfo cmd_buffer_info = {};
|
||||
cmd_buffer_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
|
||||
cmd_buffer_info.commandPool = data->command_pool;
|
||||
cmd_buffer_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
|
||||
cmd_buffer_info.commandBufferCount = 2;
|
||||
VK_CHECK(device_data->vtable.AllocateCommandBuffers(device_data->device,
|
||||
&cmd_buffer_info,
|
||||
cmd_bufs));
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) {
|
||||
VK_CHECK(device_data->set_device_loader_data(device_data->device,
|
||||
cmd_bufs[i]));
|
||||
|
||||
data->frame_data[i].command_buffer = cmd_bufs[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void shutdown_swapchain_data(struct swapchain_data *data)
|
||||
{
|
||||
struct device_data *device_data = data->device;
|
||||
|
||||
list_for_each_entry_safe(struct overlay_draw, draw, &data->draws, link) {
|
||||
device_data->vtable.DestroySemaphore(device_data->device, draw->semaphore, NULL);
|
||||
device_data->vtable.DestroyFence(device_data->device, draw->fence, NULL);
|
||||
device_data->vtable.DestroyBuffer(device_data->device, draw->vertex_buffer, NULL);
|
||||
device_data->vtable.DestroyBuffer(device_data->device, draw->index_buffer, NULL);
|
||||
device_data->vtable.FreeMemory(device_data->device, draw->vertex_buffer_mem, NULL);
|
||||
device_data->vtable.FreeMemory(device_data->device, draw->index_buffer_mem, NULL);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < data->n_images; i++) {
|
||||
device_data->vtable.DestroyImageView(device_data->device, data->image_views[i], NULL);
|
||||
device_data->vtable.DestroyFramebuffer(device_data->device, data->framebuffers[i], NULL);
|
||||
@@ -1475,24 +1501,8 @@ static void shutdown_swapchain_data(struct swapchain_data *data)
|
||||
|
||||
device_data->vtable.DestroyRenderPass(device_data->device, data->render_pass, NULL);
|
||||
|
||||
for (uint32_t i = 0; i < ARRAY_SIZE(data->frame_data); i++) {
|
||||
device_data->vtable.FreeCommandBuffers(device_data->device,
|
||||
data->command_pool,
|
||||
1, &data->frame_data[i].command_buffer);
|
||||
if (data->frame_data[i].vertex_buffer)
|
||||
device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].vertex_buffer, NULL);
|
||||
if (data->frame_data[i].index_buffer)
|
||||
device_data->vtable.DestroyBuffer(device_data->device, data->frame_data[i].index_buffer, NULL);
|
||||
if (data->frame_data[i].vertex_buffer_mem)
|
||||
device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].vertex_buffer_mem, NULL);
|
||||
if (data->frame_data[i].index_buffer_mem)
|
||||
device_data->vtable.FreeMemory(device_data->device, data->frame_data[i].index_buffer_mem, NULL);
|
||||
}
|
||||
device_data->vtable.DestroyCommandPool(device_data->device, data->command_pool, NULL);
|
||||
|
||||
if (data->submission_semaphore)
|
||||
device_data->vtable.DestroySemaphore(device_data->device, data->submission_semaphore, NULL);
|
||||
|
||||
device_data->vtable.DestroyPipeline(device_data->device, data->pipeline, NULL);
|
||||
device_data->vtable.DestroyPipelineLayout(device_data->device, data->pipeline_layout, NULL);
|
||||
|
||||
@@ -1512,19 +1522,24 @@ static void shutdown_swapchain_data(struct swapchain_data *data)
|
||||
ImGui::DestroyContext(data->imgui_context);
|
||||
}
|
||||
|
||||
static void before_present(struct swapchain_data *swapchain_data,
|
||||
const VkSemaphore *wait_semaphores,
|
||||
unsigned n_wait_semaphores,
|
||||
unsigned imageIndex)
|
||||
static struct overlay_draw *before_present(struct swapchain_data *swapchain_data,
|
||||
const VkSemaphore *wait_semaphores,
|
||||
unsigned n_wait_semaphores,
|
||||
unsigned imageIndex)
|
||||
{
|
||||
struct instance_data *instance_data = swapchain_data->device->instance;
|
||||
struct overlay_draw *draw = NULL;
|
||||
|
||||
snapshot_swapchain_frame(swapchain_data);
|
||||
|
||||
if (!instance_data->params.no_display && swapchain_data->n_frames > 0) {
|
||||
compute_swapchain_display(swapchain_data);
|
||||
render_swapchain_display(swapchain_data, wait_semaphores, n_wait_semaphores, imageIndex);
|
||||
draw = render_swapchain_display(swapchain_data,
|
||||
wait_semaphores, n_wait_semaphores,
|
||||
imageIndex);
|
||||
}
|
||||
|
||||
return draw;
|
||||
}
|
||||
|
||||
static VkResult overlay_CreateSwapchainKHR(
|
||||
@@ -1642,16 +1657,19 @@ static VkResult overlay_QueuePresentKHR(
|
||||
present_info.swapchainCount = 1;
|
||||
present_info.pSwapchains = &swapchain;
|
||||
|
||||
before_present(swapchain_data,
|
||||
pPresentInfo->pWaitSemaphores,
|
||||
pPresentInfo->waitSemaphoreCount,
|
||||
pPresentInfo->pImageIndices[i]);
|
||||
uint32_t image_index = pPresentInfo->pImageIndices[i];
|
||||
|
||||
struct overlay_draw *draw = before_present(swapchain_data,
|
||||
pPresentInfo->pWaitSemaphores,
|
||||
pPresentInfo->waitSemaphoreCount,
|
||||
image_index);
|
||||
|
||||
/* Because the submission of the overlay draw waits on the semaphores
|
||||
* handed for present, we don't need to have this present operation
|
||||
* wait on them as well, we can just wait on the overlay submission
|
||||
* semaphore.
|
||||
*/
|
||||
present_info.pWaitSemaphores = &swapchain_data->submission_semaphore;
|
||||
present_info.pWaitSemaphores = &draw->semaphore;
|
||||
present_info.waitSemaphoreCount = 1;
|
||||
|
||||
VkResult chain_result = queue_data->device->vtable.QueuePresentKHR(queue, &present_info);
|
||||
@@ -2011,9 +2029,9 @@ static VkResult overlay_AllocateCommandBuffers(
|
||||
}
|
||||
|
||||
if (pipeline_query_pool)
|
||||
map_object(pipeline_query_pool, (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
|
||||
map_object(HKEY(pipeline_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
|
||||
if (timestamp_query_pool)
|
||||
map_object(timestamp_query_pool, (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
|
||||
map_object(HKEY(timestamp_query_pool), (void *)(uintptr_t) pAllocateInfo->commandBufferCount);
|
||||
|
||||
return result;
|
||||
}
|
||||
@@ -2028,21 +2046,21 @@ static void overlay_FreeCommandBuffers(
|
||||
for (uint32_t i = 0; i < commandBufferCount; i++) {
|
||||
struct command_buffer_data *cmd_buffer_data =
|
||||
FIND_CMD_BUFFER_DATA(pCommandBuffers[i]);
|
||||
uint64_t count = (uintptr_t)find_object_data((void *)cmd_buffer_data->pipeline_query_pool);
|
||||
uint64_t count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->pipeline_query_pool));
|
||||
if (count == 1) {
|
||||
unmap_object(cmd_buffer_data->pipeline_query_pool);
|
||||
unmap_object(HKEY(cmd_buffer_data->pipeline_query_pool));
|
||||
device_data->vtable.DestroyQueryPool(device_data->device,
|
||||
cmd_buffer_data->pipeline_query_pool, NULL);
|
||||
} else if (count != 0) {
|
||||
map_object(cmd_buffer_data->pipeline_query_pool, (void *)(uintptr_t)(count - 1));
|
||||
map_object(HKEY(cmd_buffer_data->pipeline_query_pool), (void *)(uintptr_t)(count - 1));
|
||||
}
|
||||
count = (uintptr_t)find_object_data((void *)cmd_buffer_data->timestamp_query_pool);
|
||||
count = (uintptr_t)find_object_data(HKEY(cmd_buffer_data->timestamp_query_pool));
|
||||
if (count == 1) {
|
||||
unmap_object(cmd_buffer_data->timestamp_query_pool);
|
||||
unmap_object(HKEY(cmd_buffer_data->timestamp_query_pool));
|
||||
device_data->vtable.DestroyQueryPool(device_data->device,
|
||||
cmd_buffer_data->timestamp_query_pool, NULL);
|
||||
} else if (count != 0) {
|
||||
map_object(cmd_buffer_data->timestamp_query_pool, (void *)(uintptr_t)(count - 1));
|
||||
map_object(HKEY(cmd_buffer_data->timestamp_query_pool), (void *)(uintptr_t)(count - 1));
|
||||
}
|
||||
destroy_command_buffer_data(cmd_buffer_data);
|
||||
}
|
||||
|
Reference in New Issue
Block a user