Compare commits

..

3 Commits

Author SHA1 Message Date
Chad Versace
202ef9b8eb egl/android: Mark surface as lost when dequeueBuffer fails
This ensures that future calls to eglSwapBuffers and eglMakeCurrent emit
an error.

This patch is part of a series for fixing
android.hardware.camera2.cts.RobustnessTest#testAbandonRepeatingRequestSurface
on Chrome OS x86 devices.

Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Nicolas Boichat <drinkcat@chromium.org>
Cc: Tapani Pälli <tapani.palli@intel.com>
2017-05-03 16:46:03 -07:00
Chad Versace
58bfeb4ef2 egl/android: Cancel any outstanding ANativeBuffer in surface destructor
That is, call ANativeWindow::cancelBuffer in droid_destroy_surface().

This should prevent application deadlock when the app destroys the
EGLSurface after EGL has acquired a buffer from SurfaceFlinger
(ANativeWindow::dequeueBuffer) but before EGL has released it
(ANativeWindow::enqueueBuffer).

This patch is part of a series for fixing
android.hardware.camera2.cts.RobustnessTest#testAbandonRepeatingRequestSurface
on Chrome OS x86 devices.

Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Nicolas Boichat <drinkcat@chromium.org>
Cc: Tapani Pälli <tapani.palli@intel.com>
2017-05-03 16:46:01 -07:00
Chad Versace
9c2b74ba2a egl: Emit error when EGLSurface is lost
Add a new bool, _EGLSurface::Lost, and check it in eglMakeCurrent and
eglSwapBuffers. The EGL 1.5 spec says that those functions emit errors
when the native surface is no longer valid.

This patch just updates core EGL. No driver sets _EGLSurface::Lost yet.

I discovered that Mesa failed to detect lost surfaces while debugging an
Android CTS camera test,
android.hardware.camera2.cts.RobustnessTest#testAbandonRepeatingRequestSurface.
This patch doesn't fix the test though, though, because the test expects
EGL_BAD_SURFACE when the surface becomes lost, and this patch actually
complies with the EGL spec. If I interpreted the EGL spec correctly,
EGL_BAD_NATIVE_WINDOW or EGL_BAD_CURRENT_SURFACE is the correct error.

Cc: Tomasz Figa <tfiga@chromium.org>
Cc: Nicolas Boichat <drinkcat@chromium.org>
Cc: Tapani Pälli <tapani.palli@intel.com>
2017-05-03 16:45:48 -07:00
1338 changed files with 41180 additions and 168869 deletions

View File

@@ -45,8 +45,6 @@ matrix:
- x11proto-xf86vidmode-dev
- libexpat1-dev
- libx11-xcb-dev
- libxdamage-dev
- libxfixes-dev
- env:
# NOTE: Building SWR is 2x (yes two) times slower than all the other
# gallium drivers combined.
@@ -91,7 +89,7 @@ matrix:
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl"
- DRI_DRIVERS=""
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
- GALLIUM_DRIVERS="i915,nouveau,pl111,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
- GALLIUM_DRIVERS="i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,etnaviv,imx"
- VULKAN_DRIVERS=""
addons:
apt:
@@ -181,9 +179,12 @@ matrix:
- MAKE_CHECK_COMMAND="make -C src/gtest check && make -C src/intel check"
- LLVM_VERSION=3.9
- LLVM_CONFIG="llvm-config-${LLVM_VERSION}"
- DRI_LOADERS="--disable-glx --disable-gbm --disable-egl --with-platforms=x11,wayland"
# XXX: we want to test the WSI, but those are enabled via the EGL toggles
# XXX: Platform X11 dependencies are checked when --enable-glx is set
- DRI_LOADERS="--enable-glx --disable-gbm --enable-egl --with-platforms=x11,wayland"
- DRI_DRIVERS=""
- GALLIUM_ST="--enable-dri --enable-dri3 --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
# XXX: enable DRI for EGL above
- GALLIUM_ST="--enable-dri --disable-opencl --disable-xa --disable-nine --disable-xvmc --disable-vdpau --disable-va --disable-omx --disable-gallium-osmesa"
- GALLIUM_DRIVERS=""
- VULKAN_DRIVERS="intel,radeon"
addons:

View File

@@ -37,18 +37,11 @@ LOCAL_CFLAGS += \
-Wno-missing-field-initializers \
-Wno-initializer-overrides \
-Wno-mismatched-tags \
-DVERSION=\"$(MESA_VERSION)\" \
-DPACKAGE_VERSION=\"$(MESA_VERSION)\" \
-DPACKAGE_BUGREPORT=\"https://bugs.freedesktop.org/enter_bug.cgi?product=Mesa\"
# XXX: The following __STDC_*_MACROS defines should not be needed.
# It's likely due to a bug elsewhere, but let's temporarily add them
# here to fix the radeonsi build.
LOCAL_CFLAGS += \
-DANDROID_API_LEVEL=$(PLATFORM_SDK_VERSION) \
-DENABLE_SHADER_CACHE \
-D__STDC_CONSTANT_MACROS \
-D__STDC_LIMIT_MACROS \
-DHAVE___BUILTIN_EXPECT \
-DHAVE___BUILTIN_FFS \
-DHAVE___BUILTIN_FFSLL \
@@ -66,7 +59,6 @@ LOCAL_CFLAGS += \
-DHAVE_PTHREAD=1 \
-DHAVE_DLOPEN \
-DHAVE_DL_ITERATE_PHDR \
-DMAJOR_IN_SYSMACROS \
-fvisibility=hidden \
-Wno-sign-compare
@@ -89,10 +81,28 @@ LOCAL_CFLAGS += \
endif
endif
ifeq ($(MESA_ENABLE_LLVM),true)
ifeq ($(MESA_ANDROID_MAJOR_VERSION),5)
LOCAL_CFLAGS += -DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2
ELF_INCLUDES := external/elfutils/0.153/libelf
endif
ifeq ($(MESA_ANDROID_MAJOR_VERSION),6)
LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0
ELF_INCLUDES := external/elfutils/src/libelf
endif
ifeq ($(MESA_ANDROID_MAJOR_VERSION),7)
LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0
ELF_INCLUDES := external/elfutils/libelf
endif
endif
ifneq ($(LOCAL_IS_HOST_MODULE),true)
# add libdrm if there are hardware drivers
ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
LOCAL_CFLAGS += -DHAVE_LIBDRM
LOCAL_SHARED_LIBRARIES += libdrm
endif
endif
LOCAL_CFLAGS_32 += -DDEFAULT_DRIVER_DIR=\"/system/lib/$(MESA_DRI_MODULE_REL_PATH)\"
LOCAL_CFLAGS_64 += -DDEFAULT_DRIVER_DIR=\"/system/lib64/$(MESA_DRI_MODULE_REL_PATH)\"
@@ -106,3 +116,7 @@ endif
# Quiet down the build system and remove any .h files from the sources
LOCAL_SRC_FILES := $(patsubst %.h, , $(LOCAL_SRC_FILES))
ifneq ($(LOCAL_IS_HOST_MODULE),true)
LOCAL_SHARED_LIBRARIES += libz
endif

View File

@@ -24,7 +24,7 @@
# BOARD_GPU_DRIVERS should be defined. The valid values are
#
# classic drivers: i915 i965
# gallium drivers: swrast freedreno i915g nouveau pl111 r300g r600g radeonsi vc4 virgl vmwgfx
# gallium drivers: swrast freedreno i915g nouveau r300g r600g radeonsi vc4 virgl vmwgfx
#
# The main target is libGLES_mesa. For each classic driver enabled, a DRI
# module will also be built. DRI modules will be loaded by libGLES_mesa.
@@ -32,9 +32,6 @@
MESA_TOP := $(call my-dir)
MESA_ANDROID_MAJOR_VERSION := $(word 1, $(subst ., , $(PLATFORM_VERSION)))
ifneq ($(filter 2 4, $(MESA_ANDROID_MAJOR_VERSION)),)
$(error "Android 4.4 and earlier not supported")
endif
MESA_DRI_MODULE_REL_PATH := dri
MESA_DRI_MODULE_PATH := $(TARGET_OUT_SHARED_LIBRARIES)/$(MESA_DRI_MODULE_REL_PATH)
@@ -43,37 +40,19 @@ MESA_DRI_MODULE_UNSTRIPPED_PATH := $(TARGET_OUT_SHARED_LIBRARIES_UNSTRIPPED)/$(M
MESA_COMMON_MK := $(MESA_TOP)/Android.common.mk
MESA_PYTHON2 := python
# Lists to convert driver names to boolean variables
# in form of <driver name>.<boolean make variable>
classic_drivers := i915.HAVE_I915_DRI i965.HAVE_I965_DRI
gallium_drivers := \
swrast.HAVE_GALLIUM_SOFTPIPE \
freedreno.HAVE_GALLIUM_FREEDRENO \
i915g.HAVE_GALLIUM_I915 \
nouveau.HAVE_GALLIUM_NOUVEAU \
pl111.HAVE_GALLIUM_PL111 \
r300g.HAVE_GALLIUM_R300 \
r600g.HAVE_GALLIUM_R600 \
radeonsi.HAVE_GALLIUM_RADEONSI \
vmwgfx.HAVE_GALLIUM_VMWGFX \
vc4.HAVE_GALLIUM_VC4 \
virgl.HAVE_GALLIUM_VIRGL
classic_drivers := i915 i965
gallium_drivers := swrast freedreno i915g nouveau r300g r600g radeonsi vmwgfx vc4 virgl
ifeq ($(BOARD_GPU_DRIVERS),all)
MESA_BUILD_CLASSIC := $(filter HAVE_%, $(subst ., , $(classic_drivers)))
MESA_BUILD_GALLIUM := $(filter HAVE_%, $(subst ., , $(gallium_drivers)))
else
# Warn if we have any invalid driver names
$(foreach d, $(BOARD_GPU_DRIVERS), \
$(if $(findstring $(d).,$(classic_drivers) $(gallium_drivers)), \
, \
$(warning invalid GPU driver: $(d)) \
) \
)
MESA_BUILD_CLASSIC := $(strip $(foreach d, $(BOARD_GPU_DRIVERS), $(patsubst $(d).%,%, $(filter $(d).%, $(classic_drivers)))))
MESA_BUILD_GALLIUM := $(strip $(foreach d, $(BOARD_GPU_DRIVERS), $(patsubst $(d).%,%, $(filter $(d).%, $(gallium_drivers)))))
MESA_GPU_DRIVERS := $(strip $(BOARD_GPU_DRIVERS))
# warn about invalid drivers
invalid_drivers := $(filter-out \
$(classic_drivers) $(gallium_drivers), $(MESA_GPU_DRIVERS))
ifneq ($(invalid_drivers),)
$(warning invalid GPU drivers: $(invalid_drivers))
# tidy up
MESA_GPU_DRIVERS := $(filter-out $(invalid_drivers), $(MESA_GPU_DRIVERS))
endif
$(foreach d, $(MESA_BUILD_CLASSIC) $(MESA_BUILD_GALLIUM), $(eval $(d) := true))
# host and target must be the same arch to generate matypes.h
ifeq ($(TARGET_ARCH),$(HOST_ARCH))
@@ -82,27 +61,23 @@ else
MESA_ENABLE_ASM := false
endif
ifneq ($(filter true, $(HAVE_GALLIUM_RADEONSI)),)
MESA_ENABLE_LLVM := true
ifneq ($(filter $(classic_drivers), $(MESA_GPU_DRIVERS)),)
MESA_BUILD_CLASSIC := true
else
MESA_BUILD_CLASSIC := false
endif
define mesa-build-with-llvm
$(if $(filter $(MESA_ANDROID_MAJOR_VERSION), 4 5), \
$(warning Unsupported LLVM version in Android $(MESA_ANDROID_MAJOR_VERSION)),) \
$(if $(filter 6,$(MESA_ANDROID_MAJOR_VERSION)), \
$(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0307 -DMESA_LLVM_VERSION_PATCH=0) \
$(eval LOCAL_STATIC_LIBRARIES += libLLVMCore) \
$(eval LOCAL_C_INCLUDES += external/llvm/include external/llvm/device/include),) \
$(if $(filter 7,$(MESA_ANDROID_MAJOR_VERSION)), \
$(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0308 -DMESA_LLVM_VERSION_PATCH=0) \
$(eval LOCAL_STATIC_LIBRARIES += libLLVMCore) \
$(eval LOCAL_C_INCLUDES += external/llvm/include external/llvm/device/include),) \
$(if $(filter O,$(MESA_ANDROID_MAJOR_VERSION)), \
$(eval LOCAL_CFLAGS += -DHAVE_LLVM=0x0309 -DMESA_LLVM_VERSION_PATCH=0) \
$(eval LOCAL_HEADER_LIBRARIES += llvm-headers),)
endef
ifneq ($(filter $(gallium_drivers), $(MESA_GPU_DRIVERS)),)
MESA_BUILD_GALLIUM := true
else
MESA_BUILD_GALLIUM := false
endif
MESA_ENABLE_LLVM := $(if $(filter radeonsi,$(MESA_GPU_DRIVERS)),true,false)
# add subdirectories
ifneq ($(strip $(MESA_GPU_DRIVERS)),)
SUBDIRS := \
src/gbm \
src/loader \
@@ -117,5 +92,11 @@ SUBDIRS := \
src/vulkan
INC_DIRS := $(call all-named-subdir-makefiles,$(SUBDIRS))
ifeq ($(strip $(MESA_BUILD_GALLIUM)),true)
INC_DIRS += $(call all-named-subdir-makefiles,src/gallium)
endif
include $(INC_DIRS)
endif

View File

@@ -43,7 +43,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
--enable-llvm-shared-libs \
--with-platforms=x11,wayland,drm,surfaceless \
--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
--with-gallium-drivers=i915,nouveau,r300,pl111,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
--with-gallium-drivers=i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr,etnaviv,imx \
--with-vulkan-drivers=intel,radeon
ACLOCAL_AMFLAGS = -I m4

View File

@@ -1,2 +1,3 @@
[*.sh]
indent_style = tab
indent_style = space
indent_size = 2

View File

@@ -31,48 +31,38 @@ do
fi
fi
# Skip if it has been already cherry-picked.
if grep -q ^$sha already_picked ; then
# For each one try to extract the tag
fixes_count=`git show $sha | grep -i "fixes:" | wc -l`
if [ "x$fixes_count" != x1 ] ; then
printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
"`git log -n1 --pretty=oneline $sha`"
fi
fixes=`git show $sha | grep -i "fixes:" | head -n 1`
# The following sed/cut combination is borrowed from GregKH
id=`echo ${fixes} | sed -e 's/^[ \t]*//' | cut -f 2 -d ':' | sed -e 's/^[ \t]*//' | cut -f 1 -d ' '`
# Bail out if we cannot find suitable id.
# Any specific validation the $id is valid and not some junk, is
# implied with the follow up code
if [ "x$id" = x ] ; then
continue
fi
# Place every "fixes:" tag on its own line and join with the next word
# on its line or a later one.
fixes=`git show -s $sha | tr -d "\n" | sed -e 's/fixes:[[:space:]]*/\nfixes:/Ig' | grep "fixes:" | sed -e 's/\(fixes:[a-zA-Z0-9]*\).*$/\1/'`
# Check if the offending commit is in branch.
# For each one try to extract the tag
fixes_count=`echo "$fixes" | wc -l`
warn=`(test $fixes_count -gt 1 && echo $fixes_count) || echo 0`
while [ $fixes_count -gt 0 ] ; do
# Treat only the current line
id=`echo "$fixes" | tail -n $fixes_count | head -n 1 | cut -d : -f 2`
fixes_count=$(($fixes_count-1))
# Be that cherry-picked ...
# ... or landed before the branchpoint.
if grep -q ^$id already_picked ||
grep -q ^$id already_landed ; then
# Bail out if we cannot find suitable id.
# Any specific validation the $id is valid and not some junk, is
# implied with the follow up code
if [ "x$id" = x ] ; then
# Finally nominate the fix if it hasn't landed yet.
if grep -q ^$sha already_picked ; then
continue
fi
# Check if the offending commit is in branch.
# Be that cherry-picked ...
# ... or landed before the branchpoint.
if grep -q ^$id already_picked ||
grep -q ^$id already_landed ; then
printf "Commit \"%s\" fixes %s\n" \
"`git log -n1 --pretty=oneline $sha`" \
"$id"
warn=$(($warn-1))
fi
done
if [ $warn -gt 0 ] ; then
printf "WARNING: Commit \"%s\" has more than one Fixes tag\n" \
"`git log -n1 --pretty=oneline $sha`"
printf "Commit \"%s\" fixes %s\n" \
"`git log -n1 --pretty=oneline $sha`" \
"$id"
fi
done

View File

@@ -133,7 +133,7 @@ class PerfParser(LineParser):
def __init__(self, infile, symbol):
LineParser.__init__(self, infile)
self.symbol = symbol
self.symbol = symbol
def readline(self):
# Override LineParser.readline to ignore comment lines
@@ -155,7 +155,7 @@ class PerfParser(LineParser):
addresses.sort()
total_samples = 0
sys.stdout.write('%s:\n' % self.symbol)
sys.stdout.write('%s:\n' % self.symbol)
for address, instr in asm:
try:
sample = samples.pop(address)

View File

@@ -74,7 +74,7 @@ AC_SUBST([OPENCL_VERSION])
# in the first entry.
LIBDRM_REQUIRED=2.4.75
LIBDRM_RADEON_REQUIRED=2.4.71
LIBDRM_AMDGPU_REQUIRED=2.4.81
LIBDRM_AMDGPU_REQUIRED=2.4.79
LIBDRM_INTEL_REQUIRED=2.4.75
LIBDRM_NVVIEUX_REQUIRED=2.4.66
LIBDRM_NOUVEAU_REQUIRED=2.4.66
@@ -97,13 +97,13 @@ XSHMFENCE_REQUIRED=1.1
XVMC_REQUIRED=1.0.6
PYTHON_MAKO_REQUIRED=0.8.0
LIBSENSORS_REQUIRED=4.0.0
ZLIB_REQUIRED=1.2.3
ZLIB_REQUIRED=1.2.8
dnl LLVM versions
LLVM_REQUIRED_GALLIUM=3.3.0
LLVM_REQUIRED_OPENCL=3.6.0
LLVM_REQUIRED_R600=3.9.0
LLVM_REQUIRED_RADEONSI=3.9.0
LLVM_REQUIRED_R600=3.8.0
LLVM_REQUIRED_RADEONSI=3.8.0
LLVM_REQUIRED_RADV=3.9.0
LLVM_REQUIRED_SWR=3.9.0
@@ -269,7 +269,7 @@ DEFINES="-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS"
AC_SUBST([DEFINES])
android=no
case "$host_os" in
*-android*)
*-android)
android=yes
;;
linux*|*-gnu*|gnu*|cygwin*)
@@ -455,7 +455,7 @@ int main () {
CFLAGS=$save_CFLAGS
AC_ARG_ENABLE(pwr8,
[AS_HELP_STRING([--disable-pwr8],
[AS_HELP_STRING([--disable-pwr8-inst],
[disable POWER8-specific instructions])],
[enable_pwr8=$enableval], [enable_pwr8=auto])
@@ -766,13 +766,6 @@ if test "x$enable_asm" = xyes; then
;;
esac
;;
powerpc64le)
case "$host_os" in
linux*)
asm_arch=ppc64le
;;
esac
;;
esac
case "$asm_arch" in
@@ -788,10 +781,6 @@ if test "x$enable_asm" = xyes; then
DEFINES="$DEFINES -DUSE_SPARC_ASM"
AC_MSG_RESULT([yes, sparc])
;;
ppc64le)
DEFINES="$DEFINES -DUSE_PPC64LE_ASM"
AC_MSG_RESULT([yes, ppc64le])
;;
*)
AC_MSG_RESULT([no, platform not supported])
;;
@@ -848,11 +837,6 @@ dnl is not valid for that platform.
if test "x$android" = xno; then
test -z "$PTHREAD_LIBS" && PTHREAD_LIBS="-lpthread"
fi
dnl According to the manual when using pthreads, one should add -pthread to
dnl both compile and link-time arguments.
dnl In practise that should be sufficient for all platforms, since any
dnl platforms build with GCC and Clang support the flag.
PTHREAD_LIBS="$PTHREAD_LIBS -pthread"
dnl pthread-stubs is mandatory on BSD platforms, due to the nature of the
dnl project. Even then there's a notable issue as described in the project README
@@ -867,6 +851,8 @@ esac
if test "x$pthread_stubs_possible" = xyes; then
PKG_CHECK_MODULES(PTHREADSTUBS, pthread-stubs >= 0.4)
AC_SUBST(PTHREADSTUBS_CFLAGS)
AC_SUBST(PTHREADSTUBS_LIBS)
fi
dnl SELinux awareness.
@@ -1080,18 +1066,27 @@ AC_SUBST([LLVM_INCLUDEDIR])
dnl
dnl libunwind
dnl
PKG_CHECK_EXISTS(libunwind, [HAVE_LIBUNWIND=yes], [HAVE_LIBUNWIND=no])
AC_ARG_ENABLE([libunwind],
[AS_HELP_STRING([--enable-libunwind],
[Use libunwind for backtracing (default: auto)])],
[LIBUNWIND="$enableval"],
[LIBUNWIND="$HAVE_LIBUNWIND"])
[LIBUNWIND="auto"])
PKG_CHECK_EXISTS(libunwind, [HAVE_LIBUNWIND=yes], [HAVE_LIBUNWIND=no])
if test "x$LIBUNWIND" = "xauto"; then
LIBUNWIND="$HAVE_LIBUNWIND"
fi
if test "x$LIBUNWIND" = "xyes"; then
PKG_CHECK_MODULES(LIBUNWIND, libunwind)
if test "x$HAVE_LIBUNWIND" != "xyes"; then
AC_MSG_ERROR([libunwind requested but not installed.])
fi
AC_DEFINE(HAVE_LIBUNWIND, 1, [Have libunwind support])
fi
AM_CONDITIONAL(HAVE_LIBUNWIND, [test "x$LIBUNWIND" = xyes])
dnl Options for APIs
AC_ARG_ENABLE([opengl],
@@ -1250,7 +1245,7 @@ GALLIUM_DRIVERS_DEFAULT="r300,r600,svga,swrast"
AC_ARG_WITH([gallium-drivers],
[AS_HELP_STRING([--with-gallium-drivers@<:@=DIRS...@:>@],
[comma delimited Gallium drivers list, e.g.
"i915,nouveau,r300,r600,radeonsi,freedreno,pl111,svga,swrast,swr,vc4,virgl,etnaviv,imx"
"i915,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,swr,vc4,virgl,etnaviv,imx"
@<:@default=r300,r600,svga,swrast@:>@])],
[with_gallium_drivers="$withval"],
[with_gallium_drivers="$GALLIUM_DRIVERS_DEFAULT"])
@@ -1546,10 +1541,15 @@ xdri)
PKG_CHECK_MODULES([DRI2PROTO], [dri2proto >= $DRI2PROTO_REQUIRED])
GL_PC_REQ_PRIV="$GL_PC_REQ_PRIV libdrm >= $LIBDRM_REQUIRED"
if test x"$enable_dri" = xyes; then
dri_modules="$dri_modules xcb-dri2 >= $XCBDRI2_REQUIRED"
fi
if test x"$enable_dri3" = xyes; then
PKG_CHECK_EXISTS([xcb >= $XCB_REQUIRED], [], AC_MSG_ERROR([DRI3 requires xcb >= $XCB_REQUIRED]))
dri3_modules="xcb xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
fi
fi
if test x"$dri_platform" = xapple ; then
DEFINES="$DEFINES -DGLX_USE_APPLEGL"
@@ -1638,111 +1638,6 @@ if test "x$enable_glx_read_only_text" = xyes; then
DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
fi
dnl
dnl DEPRECATED: EGL Platforms configuration
dnl
AC_ARG_WITH([egl-platforms],
[AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
[DEPRECATED: use --with-platforms instead@<:@default=auto@:>@])],
[with_egl_platforms="$withval"],
[with_egl_platforms=auto])
if test "x$with_egl_platforms" = xauto; then
with_egl_platforms="x11,surfaceless"
if test "x$enable_gbm" = xyes; then
with_egl_platforms="$with_egl_platforms,drm"
fi
else
AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-platforms instead.])
fi
dnl
dnl Platforms configuration
dnl
AC_ARG_WITH([platforms],
[AS_HELP_STRING([--with-platforms@<:@=DIRS...@:>@],
[comma delimited native platforms libEGL/Vulkan/other supports, e.g.
"x11,drm,wayland,surfaceless..." @<:@default=auto@:>@])],
[with_platforms="$withval"],
[with_platforms=auto])
# Reuse the autodetection rather than duplicating it.
if test "x$with_platforms" = xauto; then
with_platforms=$with_egl_platforms
fi
PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
WAYLAND_SCANNER='')
if test "x$WAYLAND_SCANNER" = x; then
AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner], [:])
fi
# Do per platform setups and checks
platforms=`IFS=', '; echo $with_platforms`
for plat in $platforms; do
case "$plat" in
wayland)
PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
if test "x$WAYLAND_SCANNER" = "x:"; then
AC_MSG_ERROR([wayland-scanner is needed to compile the wayland platform])
fi
DEFINES="$DEFINES -DHAVE_WAYLAND_PLATFORM"
;;
x11)
PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED xcb-xfixes])
DEFINES="$DEFINES -DHAVE_X11_PLATFORM"
;;
drm)
test "x$enable_gbm" = "xno" &&
AC_MSG_ERROR([EGL platform drm needs gbm])
DEFINES="$DEFINES -DHAVE_DRM_PLATFORM"
;;
surfaceless)
DEFINES="$DEFINES -DHAVE_SURFACELESS_PLATFORM"
;;
android)
PKG_CHECK_MODULES([ANDROID], [cutils hardware sync])
DEFINES="$DEFINES -DHAVE_ANDROID_PLATFORM"
;;
*)
AC_MSG_ERROR([platform '$plat' does not exist])
;;
esac
case "$plat" in
wayland|drm|surfaceless)
require_libdrm "Platform $plat"
;;
esac
done
if test "x$enable_glx" != xno; then
if ! echo "$platforms" | grep -q 'x11'; then
AC_MSG_ERROR([Building GLX without the x11 platform is not supported])
fi
fi
if test x"$enable_dri3" = xyes; then
DEFINES="$DEFINES -DHAVE_DRI3"
dri3_modules="x11-xcb xcb >= $XCB_REQUIRED xcb-dri3 xcb-xfixes xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED"
PKG_CHECK_MODULES([XCB_DRI3], [$dri3_modules])
fi
AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$platforms" | grep -q 'x11')
AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$platforms" | grep -q 'wayland')
AM_CONDITIONAL(HAVE_PLATFORM_DRM, echo "$platforms" | grep -q 'drm')
AM_CONDITIONAL(HAVE_PLATFORM_SURFACELESS, echo "$platforms" | grep -q 'surfaceless')
AM_CONDITIONAL(HAVE_PLATFORM_ANDROID, echo "$platforms" | grep -q 'android')
dnl
dnl More DRI setup
dnl
@@ -1785,6 +1680,10 @@ if test "x$enable_dri" = xyes; then
# Platform specific settings and drivers to build
case "$host_os" in
linux*)
if test "x$enable_dri3" = xyes; then
DEFINES="$DEFINES -DHAVE_DRI3"
fi
case "$host_cpu" in
powerpc* | sparc*)
# Build only the drivers for cards that exist on PowerPC/sparc
@@ -1841,11 +1740,12 @@ if test -n "$with_dri_drivers"; then
xi915)
require_libdrm "i915"
HAVE_I915_DRI=yes
PKG_CHECK_MODULES([I915], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
;;
xi965)
require_libdrm "i965"
HAVE_I965_DRI=yes
PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
;;
xnouveau)
require_libdrm "nouveau"
@@ -1939,14 +1839,6 @@ AC_ARG_WITH([vulkan-icddir],
[VULKAN_ICD_INSTALL_DIR='${datarootdir}/vulkan/icd.d'])
AC_SUBST([VULKAN_ICD_INSTALL_DIR])
require_x11_dri3() {
if echo "$platforms" | grep -q 'x11'; then
if test "x$enable_dri3" != xyes; then
AC_MSG_ERROR([$1 Vulkan driver requires DRI3 when built with X11])
fi
fi
}
if test -n "$with_vulkan_drivers"; then
if test "x$ac_cv_func_dl_iterate_phdr" = xno; then
AC_MSG_ERROR([Vulkan drivers require the dl_iterate_phdr function])
@@ -1957,14 +1849,13 @@ if test -n "$with_vulkan_drivers"; then
case "x$driver" in
xintel)
require_libdrm "ANV"
require_x11_dri3 "ANV"
PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
HAVE_INTEL_VULKAN=yes
;;
xradeon)
require_libdrm "radv"
PKG_CHECK_MODULES([AMDGPU], [libdrm >= $LIBDRM_AMDGPU_REQUIRED libdrm_amdgpu >= $LIBDRM_AMDGPU_REQUIRED])
radeon_llvm_check $LLVM_REQUIRED_RADV "radv"
require_x11_dri3 "radv"
HAVE_RADEON_VULKAN=yes
;;
*)
@@ -2070,47 +1961,23 @@ if test "x$enable_xa" = xyes; then
fi
AM_CONDITIONAL(HAVE_ST_XA, test "x$enable_xa" = xyes)
if echo $platforms | grep -q "x11"; then
have_xvmc_platform=yes
else
have_xvmc_platform=no
fi
if echo $platforms | grep -q "x11"; then
have_vdpau_platform=yes
else
have_vdpau_platform=no
fi
if echo $platforms | grep -q "x11\|drm"; then
have_omx_platform=yes
else
have_omx_platform=no
fi
if echo $platforms | grep -q "x11\|drm\|wayland"; then
have_va_platform=yes
else
have_va_platform=no
fi
dnl
dnl Gallium G3DVL configuration
dnl
if test -n "$with_gallium_drivers" -a "x$with_gallium_drivers" != xswrast; then
if test "x$enable_xvmc" = xauto -a "x$have_xvmc_platform" = xyes; then
PKG_CHECK_EXISTS([xvmc >= $XVMC_REQUIRED], [enable_xvmc=yes], [enable_xvmc=no])
if test "x$enable_xvmc" = xauto; then
PKG_CHECK_EXISTS([xvmc >= $XVMC_REQUIRED], [enable_xvmc=yes], [enable_xvmc=no])
fi
if test "x$enable_vdpau" = xauto -a "x$have_vdpau_platform" = xyes; then
if test "x$enable_vdpau" = xauto; then
PKG_CHECK_EXISTS([vdpau >= $VDPAU_REQUIRED], [enable_vdpau=yes], [enable_vdpau=no])
fi
if test "x$enable_omx" = xauto -a "x$have_omx_platform" = xyes; then
PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], [enable_omx=yes], [enable_omx=no])
if test "x$enable_omx" = xauto; then
PKG_CHECK_EXISTS([libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED], [enable_omx=yes], [enable_omx=no])
fi
if test "x$enable_va" = xauto -a "x$have_va_platform" = xyes; then
if test "x$enable_va" = xauto; then
PKG_CHECK_EXISTS([libva >= $LIBVA_REQUIRED], [enable_va=yes], [enable_va=no])
fi
fi
@@ -2128,24 +1995,23 @@ if test "x$enable_xvmc" = xyes -o \
"x$enable_vdpau" = xyes -o \
"x$enable_omx" = xyes -o \
"x$enable_va" = xyes; then
PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
if test x"$enable_dri3" = xyes; then
PKG_CHECK_MODULES([VL], [xcb-dri3 xcb-present xcb-sync xshmfence >= $XSHMFENCE_REQUIRED
xcb-xfixes x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
else
PKG_CHECK_MODULES([VL], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED])
fi
need_gallium_vl_winsys=yes
fi
AM_CONDITIONAL(NEED_GALLIUM_VL_WINSYS, test "x$need_gallium_vl_winsys" = xyes)
if test "x$enable_xvmc" = xyes; then
if test "x$have_xvmc_platform" != xyes; then
AC_MSG_ERROR([XVMC requires the x11 platforms])
fi
PKG_CHECK_MODULES([XVMC], [xvmc >= $XVMC_REQUIRED])
gallium_st="$gallium_st xvmc"
fi
AM_CONDITIONAL(HAVE_ST_XVMC, test "x$enable_xvmc" = xyes)
if test "x$enable_vdpau" = xyes; then
if test "x$have_vdpau_platform" != xyes; then
AC_MSG_ERROR([VDPAU requires the x11 platforms])
fi
PKG_CHECK_MODULES([VDPAU], [vdpau >= $VDPAU_REQUIRED])
gallium_st="$gallium_st vdpau"
DEFINES="$DEFINES -DHAVE_ST_VDPAU"
@@ -2153,18 +2019,12 @@ fi
AM_CONDITIONAL(HAVE_ST_VDPAU, test "x$enable_vdpau" = xyes)
if test "x$enable_omx" = xyes; then
if test "x$have_omx_platform" != xyes; then
AC_MSG_ERROR([OMX requires at least one of the x11 or drm platforms])
fi
PKG_CHECK_MODULES([OMX], [libomxil-bellagio >= $LIBOMXIL_BELLAGIO_REQUIRED])
gallium_st="$gallium_st omx"
fi
AM_CONDITIONAL(HAVE_ST_OMX, test "x$enable_omx" = xyes)
if test "x$enable_va" = xyes; then
if test "x$have_va_platform" != xyes; then
AC_MSG_ERROR([VA requires at least one of the x11 drm or wayland platforms])
fi
PKG_CHECK_MODULES([VA], [libva >= $LIBVA_REQUIRED])
gallium_st="$gallium_st va"
fi
@@ -2281,21 +2141,113 @@ dnl Gallium configuration
dnl
AM_CONDITIONAL(HAVE_GALLIUM, test -n "$with_gallium_drivers")
dnl
dnl DEPRECATED: EGL Platforms configuration
dnl
AC_ARG_WITH([egl-platforms],
[AS_HELP_STRING([--with-egl-platforms@<:@=DIRS...@:>@],
[DEPRECATED: use --with-platforms instead@<:@default=auto@:>@])],
[with_egl_platforms="$withval"],
[with_egl_platforms=auto])
if test "x$with_egl_platforms" = xauto; then
if test "x$enable_egl" = xyes; then
if test "x$enable_gbm" = xyes; then
with_egl_platforms="x11,drm"
else
with_egl_platforms="x11"
fi
else
with_egl_platforms=""
fi
else
AC_MSG_WARN([--with-egl-platforms is deprecated. Use --with-platforms instead.])
fi
dnl
dnl Platforms configuration
dnl
AC_ARG_WITH([platforms],
[AS_HELP_STRING([--with-platforms@<:@=DIRS...@:>@],
[comma delimited native platforms libEGL/Vulkan/other supports, e.g.
"x11,drm,wayland,surfaceless..." @<:@default=auto@:>@])],
[with_platforms="$withval"],
[with_platforms=auto])
# For the time being, we still reuse the EGL named variables/defines.
if test "x$with_platforms" != xauto; then
with_egl_platforms=$with_platforms
fi
PKG_CHECK_MODULES([WAYLAND_SCANNER], [wayland-scanner],
WAYLAND_SCANNER=`$PKG_CONFIG --variable=wayland_scanner wayland-scanner`,
WAYLAND_SCANNER='')
if test "x$WAYLAND_SCANNER" = x; then
AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner], [:])
fi
# Do per-EGL platform setups and checks
egl_platforms=`IFS=', '; echo $with_egl_platforms`
for plat in $egl_platforms; do
case "$plat" in
wayland)
PKG_CHECK_MODULES([WAYLAND], [wayland-client >= $WAYLAND_REQUIRED wayland-server >= $WAYLAND_REQUIRED])
if test "x$WAYLAND_SCANNER" = "x:"; then
AC_MSG_ERROR([wayland-scanner is needed to compile the wayland egl platform])
fi
;;
x11)
PKG_CHECK_MODULES([XCB_DRI2], [x11-xcb xcb xcb-dri2 >= $XCBDRI2_REQUIRED xcb-xfixes])
;;
drm)
test "x$enable_gbm" = "xno" &&
AC_MSG_ERROR([EGL platform drm needs gbm])
;;
surfaceless)
;;
android)
PKG_CHECK_MODULES([ANDROID], [cutils hardware sync])
;;
*)
AC_MSG_ERROR([EGL platform '$plat' does not exist])
;;
esac
case "$plat" in
wayland|drm|surfaceless)
require_libdrm "Platform $plat"
;;
esac
done
# libEGL wants to default to the first platform specified in
# ./configure. parse that here.
if test "x$platforms" != "x"; then
FIRST_PLATFORM_CAPS=`echo $platforms | sed 's| .*||' | tr '[[a-z]]' '[[A-Z]]'`
if test "x$egl_platforms" != "x"; then
FIRST_PLATFORM_CAPS=`echo $egl_platforms | sed 's| .*||' | tr '[[a-z]]' '[[A-Z]]'`
EGL_NATIVE_PLATFORM="_EGL_PLATFORM_$FIRST_PLATFORM_CAPS"
else
EGL_NATIVE_PLATFORM="_EGL_INVALID_PLATFORM"
fi
AM_CONDITIONAL(HAVE_PLATFORM_X11, echo "$egl_platforms" | grep -q 'x11')
AM_CONDITIONAL(HAVE_PLATFORM_WAYLAND, echo "$egl_platforms" | grep -q 'wayland')
AM_CONDITIONAL(HAVE_EGL_PLATFORM_DRM, echo "$egl_platforms" | grep -q 'drm')
AM_CONDITIONAL(HAVE_EGL_PLATFORM_SURFACELESS, echo "$egl_platforms" | grep -q 'surfaceless')
AM_CONDITIONAL(HAVE_EGL_PLATFORM_ANDROID, echo "$egl_platforms" | grep -q 'android')
AC_SUBST([EGL_NATIVE_PLATFORM])
AC_SUBST([EGL_CFLAGS])
# If we don't have the X11 platform, set this define so we don't try to include
# the X11 headers.
if ! echo "$platforms" | grep -q 'x11'; then
if ! echo "$egl_platforms" | grep -q 'x11'; then
DEFINES="$DEFINES -DMESA_EGL_NO_X11_HEADERS"
GL_PC_CFLAGS="$GL_PC_CFLAGS -DMESA_EGL_NO_X11_HEADERS"
fi
@@ -2365,7 +2317,7 @@ dnl DRM is needed by X, Wayland, and offscreen rendering.
dnl Surfaceless is an alternative for the last one.
dnl
require_basic_egl() {
case "$with_platforms" in
case "$with_egl_platforms" in
*drm*|*surfaceless*)
;;
*)
@@ -2427,7 +2379,7 @@ if test -n "$with_gallium_drivers"; then
;;
xi915)
HAVE_GALLIUM_I915=yes
PKG_CHECK_MODULES([I915], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
PKG_CHECK_MODULES([INTEL], [libdrm >= $LIBDRM_INTEL_REQUIRED libdrm_intel >= $LIBDRM_INTEL_REQUIRED])
require_libdrm "Gallium i915"
;;
xr300)
@@ -2484,10 +2436,10 @@ if test -n "$with_gallium_drivers"; then
xswr)
llvm_require_version $LLVM_REQUIRED_SWR "swr"
swr_require_cxx_feature_flags "C++11" "__cplusplus >= 201103L" \
",-std=c++11" \
SWR_CXX11_CXXFLAGS
AC_SUBST([SWR_CXX11_CXXFLAGS])
swr_require_cxx_feature_flags "C++14" "__cplusplus >= 201402L" \
"-std=c++14" \
SWR_CXX14_CXXFLAGS
AC_SUBST([SWR_CXX14_CXXFLAGS])
swr_require_cxx_feature_flags "AVX" "defined(__AVX__)" \
",-mavx,-march=core-avx" \
@@ -2511,9 +2463,6 @@ if test -n "$with_gallium_drivers"; then
DEFINES="$DEFINES -DUSE_VC4_SIMULATOR"],
[USE_VC4_SIMULATOR=no])
;;
xpl111)
HAVE_GALLIUM_PL111=yes
;;
xvirgl)
HAVE_GALLIUM_VIRGL=yes
require_libdrm "virgl"
@@ -2528,10 +2477,6 @@ if test -n "$with_gallium_drivers"; then
done
fi
# XXX: Keep in sync with LLVM_REQUIRED_SWR
AM_CONDITIONAL(SWR_INVALID_LLVM_VERSION, test "x$LLVM_VERSION" != x3.9.0 -a \
"x$LLVM_VERSION" != x3.9.1)
if test "x$enable_llvm" = "xyes" -a "$with_gallium_drivers"; then
llvm_require_version $LLVM_REQUIRED_GALLIUM "gallium"
llvm_add_default_components "gallium"
@@ -2543,10 +2488,6 @@ if test "x$HAVE_GALLIUM_ETNAVIV" != xyes -a "x$HAVE_GALLIUM_IMX" = xyes ; then
AC_MSG_ERROR([Building with imx requires etnaviv])
fi
if test "x$HAVE_GALLIUM_VC4" != xyes -a "x$HAVE_GALLIUM_PL111" = xyes ; then
AC_MSG_ERROR([Building with pl111 requires vc4])
fi
dnl
dnl Set defines and buildtime variables only when using LLVM.
dnl
@@ -2611,7 +2552,6 @@ fi
AM_CONDITIONAL(HAVE_GALLIUM_SVGA, test "x$HAVE_GALLIUM_SVGA" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_I915, test "x$HAVE_GALLIUM_I915" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_PL111, test "x$HAVE_GALLIUM_PL111" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_R300, test "x$HAVE_GALLIUM_R300" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_R600, test "x$HAVE_GALLIUM_R600" = xyes)
AM_CONDITIONAL(HAVE_GALLIUM_RADEONSI, test "x$HAVE_GALLIUM_RADEONSI" = xyes)
@@ -2651,7 +2591,8 @@ AM_CONDITIONAL(HAVE_SWRAST_DRI, test x$HAVE_SWRAST_DRI = xyes)
AM_CONDITIONAL(HAVE_RADEON_VULKAN, test "x$HAVE_RADEON_VULKAN" = xyes)
AM_CONDITIONAL(HAVE_INTEL_VULKAN, test "x$HAVE_INTEL_VULKAN" = xyes)
AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_RADEONSI" = xyes -o \
AM_CONDITIONAL(HAVE_AMD_DRIVERS, test "x$HAVE_GALLIUM_R600" = xyes -o \
"x$HAVE_GALLIUM_RADEONSI" = xyes -o \
"x$HAVE_RADEON_VULKAN" = xyes)
AM_CONDITIONAL(HAVE_INTEL_DRIVERS, test "x$HAVE_INTEL_VULKAN" = xyes -o \
@@ -2674,7 +2615,6 @@ AM_CONDITIONAL(HAVE_COMMON_OSMESA, test "x$enable_osmesa" = xyes -o \
AM_CONDITIONAL(HAVE_X86_ASM, test "x$asm_arch" = xx86 -o "x$asm_arch" = xx86_64)
AM_CONDITIONAL(HAVE_X86_64_ASM, test "x$asm_arch" = xx86_64)
AM_CONDITIONAL(HAVE_SPARC_ASM, test "x$asm_arch" = xsparc)
AM_CONDITIONAL(HAVE_PPC64LE_ASM, test "x$asm_arch" = xppc64le)
AC_SUBST([NINE_MAJOR], 1)
AC_SUBST([NINE_MINOR], 0)
@@ -2761,7 +2701,6 @@ AC_CONFIG_FILES([Makefile
src/gallium/drivers/llvmpipe/Makefile
src/gallium/drivers/noop/Makefile
src/gallium/drivers/nouveau/Makefile
src/gallium/drivers/pl111/Makefile
src/gallium/drivers/r300/Makefile
src/gallium/drivers/r600/Makefile
src/gallium/drivers/radeon/Makefile
@@ -2807,7 +2746,6 @@ AC_CONFIG_FILES([Makefile
src/gallium/winsys/freedreno/drm/Makefile
src/gallium/winsys/i915/drm/Makefile
src/gallium/winsys/nouveau/drm/Makefile
src/gallium/winsys/pl111/drm/Makefile
src/gallium/winsys/radeon/drm/Makefile
src/gallium/winsys/amdgpu/drm/Makefile
src/gallium/winsys/svga/drm/Makefile
@@ -2934,7 +2872,7 @@ else
echo " GBM: no"
fi
echo " EGL/Vulkan/VL platforms: $platforms"
echo " EGL/Vulkan/VL platforms: $egl_platforms"
# Vulkan
echo ""

View File

@@ -77,13 +77,15 @@ drivers will be installed to <code>${libdir}/egl</code>.</p>
</dd>
<dt><code>--with-platforms</code></dt>
<dt><code>--with-egl-platforms</code></dt>
<dd>
<p>List the platforms (window systems) to support. Its argument is a comma
separated string such as <code>--with-platforms=x11,drm</code>. It decides
separated string such as <code>--with-egl-platforms=x11,drm</code>. It decides
the platforms a driver may support. The first listed platform is also used by
the main library to decide the native platform.</p>
the main library to decide the native platform: this defines EGL native
types such as <code>EGLNativeDisplayType</code> or
<code>EGLNativeWindowType</code>.</p>
<p>The available platforms are <code>x11</code>, <code>drm</code>,
<code>wayland</code>, <code>surfaceless</code>, <code>android</code>,
@@ -165,9 +167,9 @@ binaries.</p>
<dd>
<p>This variable specifies the native platform. The valid values are the same
as those for <code>--with-platforms</code>. When the variable is not set,
as those for <code>--with-egl-platforms</code>. When the variable is not set,
the main library uses the first platform listed in
<code>--with-platforms</code> as the native platform.</p>
<code>--with-egl-platforms</code> as the native platform.</p>
<p>Extensions like <code>EGL_MESA_drm_display</code> define new functions to
create displays for non-native platforms. These extensions are usually used by

View File

@@ -195,15 +195,16 @@ See the <a href="xlibdriver.html">Xlib software driver page</a> for details.
<li>spill_fs - force spilling of all registers in the scalar backend (useful to debug spilling code)</li>
<li>spill_vec4 - force spilling of all registers in the vec4 backend (useful to debug spilling code)</li>
<li>state - emit messages about state flag tracking</li>
<li>stats - enable statistics counters. you probably actually want perfmon or intel_gpu_top instead.</li>
<li>sync - after sending each batch, emit a message and wait for that batch to finish rendering</li>
<li>tcs - dump shader assembly for tessellation control shaders</li>
<li>tes - dump shader assembly for tessellation evaluation shaders</li>
<li>tex - emit messages about textures.</li>
<li>urb - emit messages about URB setup</li>
<li>vec4 - force vec4 mode in vertex shader</li>
<li>vert - emit messages about vertex assembly</li>
<li>vs - dump shader assembly for vertex shaders</li>
</ul>
<li>INTEL_SCALAR_VS (or TCS, TES, GS) - force scalar/vec4 mode for a shader stage (Gen8-9 only)</li>
<li>INTEL_PRECISE_TRIG - if set to 1, true or yes, then the driver prefers
accuracy over performance in trig functions.</li>
</ul>
@@ -304,8 +305,6 @@ See src/mesa/state_tracker/st_debug.c for other options.
(will often result in incorrect rendering).
<li>SVGA_DEBUG - for dumping shaders, constant buffers, etc. See the code
for details.
<li>SVGA_EXTRA_LOGGING - if set, enables extra logging to the vmware.log file,
such as the OpenGL program's name and command line arguments.
<li>See the driver code for other, lesser-used variables.
</ul>

View File

@@ -277,7 +277,7 @@ GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES version:
GL_ARB_bindless_texture DONE (radeonsi)
GL_ARB_bindless_texture started (airlied)
GL_ARB_cl_event not started
GL_ARB_compute_variable_group_size DONE (nvc0, radeonsi)
GL_ARB_ES3_2_compatibility DONE (i965/gen8+)
@@ -306,8 +306,8 @@ Khronos, ARB, and OES extensions that are not part of any OpenGL or OpenGL ES ve
GL_ARB_transform_feedback_overflow_query DONE (i965/gen6+)
GL_KHR_blend_equation_advanced_coherent DONE (i965/gen9+)
GL_KHR_no_error started (Timothy Arceri)
GL_KHR_texture_compression_astc_hdr DONE (i965/bxt)
GL_KHR_texture_compression_astc_sliced_3d DONE (i965/gen9+)
GL_KHR_texture_compression_astc_hdr DONE (core only)
GL_KHR_texture_compression_astc_sliced_3d not started
GL_OES_depth_texture_cube_map DONE (all drivers that support GLSL 1.30+)
GL_OES_EGL_image DONE (all drivers)
GL_OES_EGL_image_external_essl3 not started

View File

@@ -16,47 +16,6 @@
<h1>News</h1>
<h2>June 19, 2017</h2>
<p>
<a href="relnotes/17.1.3.html">Mesa 17.1.3</a> is released.
This is a bug-fix release.
</p>
<h2>June 5, 2017</h2>
<p>
<a href="relnotes/17.1.2.html">Mesa 17.1.2</a> is released.
This is a bug-fix release.
</p>
<h2>June 1, 2017</h2>
<p>
<a href="relnotes/17.0.7.html">Mesa 17.0.7</a> is released.
This is a bug-fix release.
<br>
NOTE: It is anticipated that 17.0.7 will be the final release in the 17.0
series. Users of 17.0 are encouraged to migrate to the 17.1 series in order
to obtain future fixes.
</p>
<h2>May 25, 2017</h2>
<p>
<a href="relnotes/17.1.1.html">Mesa 17.1.1</a> is released.
This is a bug-fix release.
</p>
<h2>May 12, 2017</h2>
<p>
<a href="relnotes/17.0.6.html">Mesa 17.0.6</a> is released.
This is a bug-fix release.
</p>
<h2>May 10, 2017</h2>
<p>
<a href="relnotes/17.1.0.html">Mesa 17.1.0</a> is released. This is a
new development release. See the release notes for more information
about the release.
</p>
<h2>April 28, 2017</h2>
<p>
<a href="relnotes/17.0.5.html">Mesa 17.0.5</a> is released.

View File

@@ -2,7 +2,7 @@
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Release calendar</title>
<title>Releasing process</title>
<link rel="stylesheet" type="text/css" href="mesa.css">
</head>
<body>
@@ -39,66 +39,54 @@ if you'd like to nominate a patch in the next stable release.
<th>Notes</th>
</tr>
<tr>
<td rowspan="3">17.0</td>
<td>2017-04-28</td>
<td>17.0.5</td>
<td>Andres Gomez</td>
<td></td>
</tr>
<tr>
<td>2017-05-12</td>
<td>17.0.6</td>
<td>Emil Velikov</td>
<td></td>
</tr>
<tr>
<td>2017-05-26</td>
<td>17.0.7</td>
<td>Emil Velikov</td>
<td>Final planned release for the 17.0 series</td>
</tr>
<tr>
<td rowspan="5">17.1</td>
<td>2017-06-30</td>
<td>17.1.4</td>
<td>Andres Gomez</td>
<td></td>
</tr>
<tr>
<td>2017-07-14</td>
<td>17.1.5</td>
<td>Andres Gomez</td>
<td></td>
</tr>
<tr>
<td>2017-07-28</td>
<td>17.1.6</td>
<td>2017-04-28</td>
<td>17.1.0-rc3</td>
<td>Emil Velikov</td>
<td></td>
</tr>
<tr>
<td>2017-08-11</td>
<td>17.1.7</td>
<td>Juan A. Suarez Romero</td>
<td>2017-05-05</td>
<td>17.1.0-rc4</td>
<td>Emil Velikov</td>
<td>May be promoted to 17.1.0 final</td>
</tr>
<tr>
<td>2017-05-19</td>
<td>17.1.1</td>
<td>Emil Velikov</td>
<td></td>
</tr>
<tr>
<td>2017-08-25</td>
<td>17.1.8</td>
<td>Andres Gomez</td>
<td>Final planned release for the 17.1 series</td>
</tr>
<tr>
<td rowspan="5">17.2</td>
<td>2017-07-21</td>
<td>17.2.0-rc1</td>
<td>2017-06-02</td>
<td>17.1.2</td>
<td>Emil Velikov</td>
<td></td>
</tr>
<tr>
<td>2017-07-28</td>
<td>17.2.0-rc2</td>
<td>2017-06-16</td>
<td>17.1.3</td>
<td>Emil Velikov</td>
<td></td>
</tr>
<tr>
<td>2017-08-04</td>
<td>17.2.0-rc3</td>
<td>Emil Velikov</td>
<td></td>
</tr>
<tr>
<td>2017-08-11</td>
<td>17.2.0-rc4</td>
<td>Emil Velikov</td>
<td>May be promoted to 17.2.0 final</td>
</tr>
<tr>
<td>2017-08-25</td>
<td>17.2.1</td>
<td>Emil Velikov</td>
<td></td>
</table>
</div>

View File

@@ -14,7 +14,6 @@
<iframe src="contents.html"></iframe>
<div class="content">
<h1>Releasing process</h1>
<ul>
@@ -24,13 +23,11 @@
<li><a href="#branch">Making a branchpoint</a>
<li><a href="#prerelease">Pre-release announcement</a>
<li><a href="#release">Making a new release</a>
<li><a href="#calendar">Update the calendar</a>
<li><a href="#announce">Announce the release</a>
<li><a href="#website">Update the mesa3d.org website</a>
<li><a href="#bugzilla">Update Bugzilla</a>
</ul>
<h1 id="overview">Overview</h1>
<p>
@@ -51,7 +48,6 @@ For example:
Mesa 12.0.2 - 12.0 branch, bugfix
</pre>
<h1 id="schedule">Release schedule</h1>
<p>
@@ -86,24 +82,15 @@ The final release from the 12.0 series Mesa 12.0.5 will be out around the same
time (or shortly after) 13.0.1 is out.
</p>
<h1 id="pickntest">Cherry-picking and testing</h1>
<p>
Commits nominated for the active branch are picked as based on the
<a href="submittingpatches.html#criteria" target="_parent">criteria</a> as
described in the same section.
</p>
<p>
Nomination happens in the mesa-stable@ mailing list. However,
maintainer is resposible of checking for forgotten candidates in the
master branch. This is achieved by a combination of ad-hoc scripts and
a casual search for terms such as regression, fix, broken and similar.
</p>
<p>
Maintainer is also responsible for testing in various possible permutations of
Maintainer is responsible for testing in various possible permutations of
the autoconf and scons build.
</p>
@@ -117,57 +104,33 @@ release. This is made <strong>only</strong> with explicit permission/request,
and the patch <strong>must</strong> be very well contained. Thus it cannot
affect more than one driver/subsystem.
</p>
<p>
Currently Ilia Mirkin and AMD devs have requested "permanent" exception.
</p>
<ul>
<li>make distcheck, scons and scons check must pass
<li>Testing with different version of system components - LLVM and others is also
performed where possible.
<li>As a general rule, testing with various combinations of configure
switches, depending on the specific patchset.
</ul>
<p>
Achieved by combination of local ad-hoc scripts, mingw-w64 cross
compilation and AppVeyor plus Travis-CI, the latter as part of their
Github integration.
Achieved by combination of local ad-hoc scripts and AppVeyor plus Travis-CI,
the latter as part of their Github integration.
</p>
<p>
For Windows related changes, the main contact point is Brian
Paul. Jose Fonseca can also help as a fallback contact.
</p>
<p>
For Android related changes, the main contact is Tapani
P&auml;lli. Mauro Rossi is collaborating with android-x86 and may
provide feedback about the build status in that project.
</p>
<p>
For MacOSX related changes, Jeremy Huddleston Sequoia is currently a
good contact point.
</p>
<p>
<strong>Note:</strong> If a patch in the current queue needs any additional
fix(es), then they should be squashed together.
<br>
The commit messages and the <code>cherry picked from</code> tags must be preserved.
</p>
<p>
This should be noted in the <a href="#prerelease">pre-announce</a> email.
</p>
<pre>
git show b10859ec41d09c57663a258f43fe57c12332698e
commit b10859ec41d09c57663a258f43fe57c12332698e
Author: Jonas Pfeil &lt;pfeiljonas@gmx.de&gt;
Author: Jonas Pfeil &ltpfeiljonas@gmx.de&gt
Date: Wed Mar 1 18:11:10 2017 +0100
ralloc: Make sure ralloc() allocations match malloc()'s alignment.
@@ -186,6 +149,7 @@ This should be noted in the <a href="#prerelease">pre-announce</a> email.
(cherry picked from commit ff494fe999510ea40e3ed5827e7818550b6de126)
</pre>
</p>
<h2>Regression/functionality testing</h2>
@@ -193,23 +157,15 @@ This should be noted in the <a href="#prerelease">pre-announce</a> email.
Less often (once or twice), shortly before the pre-release announcement.
Ensure that testing is redone if Intel devs have requested an exception, as per above.
</p>
<ul>
<li><em>no regressions should be observed for Piglit/dEQP/CTS/Vulkan on Intel platforms</em>
<li><em>no regressions should be observed for Piglit using the swrast, softpipe
and llvmpipe drivers</em>
</ul>
<p>
Currently testing is performed courtesy of the Intel OTC team and their Jenkins CI setup. Check with the Intel team over IRC how to get things setup.
</p>
<p>
Installing the built driver from the pre-announced RC branch in the
system and making some every day's use until the release may be a good
idea too.
</p>
<h1 id="branch">Making a branchpoint</h1>
@@ -249,18 +205,15 @@ To setup the branchpoint:
Now go to
<a href="https://bugs.freedesktop.org/editversions.cgi?action=add&amp;product=Mesa" target="_parent">Bugzilla</a> and add the new Mesa version X.Y.
</p>
<p>
Check that there are no distribution breaking changes and revert them if needed.
For example: files being overwritten on install, etc. Happens extremely rarely -
we had only one case so far (see commit 2ced8eb136528914e1bf4e000dea06a9d53c7e04).
</p>
<p>
Proceed to <a href="#release">release</a> -rc1.
</p>
<h1 id="prerelease">Pre-release announcement</h1>
<p>
@@ -274,22 +227,18 @@ release is made.
</p>
<h2>Terminology used</h2>
<ul><li>Nominated</ul>
<p>
Patch that is nominated but yet to to merged in the patch queue/branch.
</p>
<ul><li>Queued</ul>
<p>
Patch is in the queue/branch and will feature in the next release.
Barring reported regressions or objections from developers.
</p>
<ul><li>Rejected</ul>
<p>
Patch does not fit the
<a href="submittingpatches.html#criteria" target="_parent">criteria</a> and
@@ -395,7 +344,6 @@ AUTHOR (NUMBER):
Reason: ...
</pre>
<h1 id="release">Making a new release</h1>
<p>
@@ -403,21 +351,18 @@ These are the instructions for making a new Mesa release.
</p>
<h3>Get latest source files</h3>
<p>
Ensure the latest code is available - both in your local master and the
relevant branch.
</p>
<h3>Perform basic testing</h3>
<p>
Most of the testing should already be done during the
<a href="#pickntest">cherry-pick</a> and
<a href="#prerelease">pre-announce</a> stages.
So we do a quick 'touch test'
</p>
So we do a quick 'touch test'
<ul>
<li>make distcheck (you can omit this if you're not using --dist below)
<li>scons (from release tarball)
@@ -460,7 +405,7 @@ Here is one solution that I've been using.
--enable-glx-tls \
--enable-gbm \
--enable-egl \
--with-platforms=x11,drm,wayland,surfaceless
--with-egl-platforms=x11,drm,wayland
make -j2 &amp;&amp; DESTDIR=`pwd`/test make -j6 install
__glxinfo_cmd='glxinfo 2>&amp;1 | egrep -o "Mesa.*|Gallium.*|.*dri\.so"'
__glxgears_cmd='glxgears 2>&amp;1 | grep -v "configuration file"'
@@ -510,7 +455,6 @@ be empty (TBD) at this point.
<p>
Two scripts are available to help generate portions of the release notes:
</p>
<pre>
./bin/bugzilla_mesa.sh
@@ -527,7 +471,6 @@ to be included in the release notes.
<p>
Commit these changes and push the branch.
</p>
<pre>
git push origin HEAD
</pre>
@@ -538,7 +481,6 @@ Commit these changes and push the branch.
<p>
Start the release process.
</p>
<pre>
../relative/path/to/release.sh . # append --dist if you've already done distcheck above
</pre>
@@ -576,15 +518,7 @@ docs/index.html to add a news entry. Then commit and push:
</pre>
<h1 id="calendar">Update the calendar</h1>
<p>
Remove the version from the <a href="release-calendar.html" target="_parent">calendar</a>.
</p>
<h1 id="announce">Announce the release</h1>
<p>
Use the generated template during the releasing process.
</p>
@@ -597,7 +531,6 @@ As the hosting was moved to freedesktop, git hooks are deployed to update the
website. Manually check that it is updated 5-10 minutes after the final <code>git push</code>
</p>
<h1 id="bugzilla">Update Bugzilla</h1>
<p>

View File

@@ -21,12 +21,6 @@ The release notes summarize what's new or changed in each Mesa release.
</p>
<ul>
<li><a href="relnotes/17.1.3.html">17.1.3 release notes</a>
<li><a href="relnotes/17.1.2.html">17.1.2 release notes</a>
<li><a href="relnotes/17.0.7.html">17.0.7 release notes</a>
<li><a href="relnotes/17.1.1.html">17.1.1 release notes</a>
<li><a href="relnotes/17.0.6.html">17.0.6 release notes</a>
<li><a href="relnotes/17.1.0.html">17.1.0 release notes</a>
<li><a href="relnotes/17.0.5.html">17.0.5 release notes</a>
<li><a href="relnotes/17.0.4.html">17.0.4 release notes</a>
<li><a href="relnotes/17.0.3.html">17.0.3 release notes</a>

View File

@@ -1,186 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.6 Release Notes / May 12, 2017</h1>
<p>
Mesa 17.0.6 is a bug fix release which fixes bugs found since the 17.0.5 release.
</p>
<p>
Mesa 17.0.6 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
f1b2497d553e9a584f0caa3a2d9d310e27ead15fb0af170da69f6e70fb5031cd mesa-17.0.6.tar.gz
89ecf3bcd0f18dcca5aaa42bf36bb52a2df33be89889f94aaaad91f7a504a69d mesa-17.0.6.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
</ul>
<h2>Changes</h2>
<p>Adam Jackson (1):</p>
<ul>
<li>egl/platform/drm: Don't take display ownership until gbm is initialized</li>
</ul>
<p>Andres Gomez (7):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.5</li>
<li>travis: replace Trusty-based LLVM toolchain apt-get with apt addon</li>
<li>travis: add the possibility of using the txc-dxtn library</li>
<li>cherry-ignore: 17.1 nominations only</li>
<li>cherry-ignore: fix regression in descriptor set freeing.</li>
<li>cherry-ignore: rejected commits</li>
<li>Update version to 17.0.6</li>
</ul>
<p>Ben Boeckel (1):</p>
<ul>
<li>scons: update for LLVM 4.0</li>
</ul>
<p>Brian Paul (1):</p>
<ul>
<li>st/mesa: move duplicated st_ws_framebuffer() function into header file</li>
</ul>
<p>Chad Versace (3):</p>
<ul>
<li>egl: Emit error when EGLSurface is lost</li>
<li>egl/android: Cancel any outstanding ANativeBuffer in surface destructor</li>
<li>egl/android: Mark surface as lost when dequeueBuffer fails</li>
</ul>
<p>Christian Gmeiner (1):</p>
<ul>
<li>etnaviv: add L8A8_UNORM texture format</li>
</ul>
<p>Dave Airlie (2):</p>
<ul>
<li>radv/wsi: report presentation error per image request</li>
<li>radv: enable POLARIS12 support.</li>
</ul>
<p>Emil Velikov (21):</p>
<ul>
<li>travis: correct libdrm required regex to also track libdrm itself</li>
<li>travis: add nearly all gallium drivers to the list</li>
<li>travis: use both cores for make/make check</li>
<li>travis: bring the scons build on par with AppVeyor</li>
<li>travis: explicitly LD_LIBRARY_PATH the local libraries</li>
<li>travis: enable apt cache</li>
<li>travis: automatically manage ccache caching</li>
<li>travis: remove unused -dev packages</li>
<li>travis: rework "if test" blocks in the script section</li>
<li>travis: split out matrix from env</li>
<li>travis: add separate "scons" and "scons llvm" targets</li>
<li>travis: add "scons swr" to the build matrix</li>
<li>travis: add "make swr" to the build matrix</li>
<li>travis: split the make target to three separate ones</li>
<li>travis: model scons check target like the make one</li>
<li>travis: add Gallium state-tracker targets</li>
<li>travis: enable wayland support</li>
<li>travis: bump MAKEFLAGS to -j4</li>
<li>gallium/dri: always link against shared glapi</li>
<li>mesa/dri: always link against shared glapi</li>
<li>glx: glX_proto_send.py: use correct compile guard GLX_INDIRECT_RENDERING</li>
</ul>
<p>Eric Anholt (1):</p>
<ul>
<li>nir: Pick just the channels we want for bitmap and drawpixels lowering.</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>gallium/targets: fix bool setting on BE architectures</li>
</ul>
<p>Jason Ekstrand (1):</p>
<ul>
<li>anv/cmd_buffer: Use the device allocator for QueueSubmit</li>
</ul>
<p>Johnson Lin (1):</p>
<ul>
<li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
</ul>
<p>Marek Olšák (2):</p>
<ul>
<li>radeonsi: adjust ESGS ring buffer size computation on VI</li>
<li>radeonsi: apply the tess+GS hang workaround to Polaris12 as well</li>
</ul>
<p>Nicolai Hähnle (1):</p>
<ul>
<li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
</ul>
<p>Philipp Zabel (3):</p>
<ul>
<li>renderonly: close transfer prime_fd</li>
<li>renderonly: drop resources on destroy</li>
<li>renderonly: use drmIoctl</li>
</ul>
<p>Rhys Kidd (3):</p>
<ul>
<li>travis: Support LLVM 3.8+ on Trusty-based Travis-CI via apt-get not apt addon</li>
<li>travis: Add radv vulkan driver to continuous integration</li>
<li>travis: Add radeonsi to continuous integration</li>
</ul>
<p>Rob Clark (1):</p>
<ul>
<li>freedreno/a3xx: fix hang w/ large render targets and small gmem</li>
</ul>
<p>Samuel Iglesias Gonsálvez (5):</p>
<ul>
<li>i965/vec4: fix vertical stride to avoid breaking region parameter rule</li>
<li>i965/vec4: fix register width for DF VGRF and UNIFORM</li>
<li>i965/vec4: don't modify regioning parameters to the sources of DF align1 instructions</li>
<li>anv: anv_gem_mmap() returns MAP_FAILED as mapping error</li>
<li>anv: vkBindImageMemory() should return VK_ERROR_OUT_OF_{HOST,DEVICE}_MEMORY on failure</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,145 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.0.7 Release Notes / June 1, 2017</h1>
<p>
Mesa 17.0.7 is a bug fix release which fixes bugs found since the 17.0.6 release.
</p>
<p>
Mesa 17.0.7 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
bc68d13c6b1a053b855ac453ebf7e62bd89511adf44bad6c613e09f7fa13390a mesa-17.0.7.tar.gz
f6d75304a229c8d10443e219d6b6c0c342567dbab5a879ebe7cfa3c9139c4492 mesa-17.0.7.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
</ul>
<h2>Changes</h2>
<p>Andres Gomez (1):</p>
<ul>
<li>docs: add sha256 checksums for 17.0.6</li>
</ul>
<p>Bartosz Tomczyk (1):</p>
<ul>
<li>mesa: Avoid leaking surface in st_renderbuffer_delete</li>
</ul>
<p>Chad Versace (1):</p>
<ul>
<li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
</ul>
<p>Daniel Stone (7):</p>
<ul>
<li>vulkan: Fix Wayland uninitialised registry</li>
<li>vulkan/wsi/wayland: Remove roundtrip when creating image</li>
<li>vulkan/wsi/wayland: Use per-display event queue</li>
<li>vulkan/wsi/wayland: Use proxy wrappers for swapchain</li>
<li>egl/wayland: Don't open-code roundtrip</li>
<li>egl/wayland: Use per-surface event queues</li>
<li>egl/wayland: Ensure we get a back buffer</li>
</ul>
<p>Emil Velikov (5):</p>
<ul>
<li>st/va: fix misplaced closing bracket</li>
<li>anv: automake: list shared libraries after the static ones</li>
<li>radv: automake: list shared libraries after the static ones</li>
<li>egl/wayland: select the format based on the interface used</li>
<li>Update version to 17.0.7</li>
</ul>
<p>Eric Anholt (2):</p>
<ul>
<li>renderonly: Initialize fields of struct winsys_handle.</li>
<li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
</ul>
<p>Hans de Goede (1):</p>
<ul>
<li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>nvc0/ir: SHLADD's middle source must be an immediate</li>
</ul>
<p>Jason Ekstrand (2):</p>
<ul>
<li>i965/blorp: Do and end-of-pipe sync on both sides of fast-clear ops</li>
<li>i965: Round copy size to the nearest block in intel_miptree_copy</li>
</ul>
<p>Lucas Stach (1):</p>
<ul>
<li>etnaviv: stop oversizing buffer resources</li>
</ul>
<p>Nanley Chery (2):</p>
<ul>
<li>anv/formats: Update the three-channel BC1 mappings</li>
<li>i965/formats: Update the three-channel DXT1 mappings</li>
</ul>
<p>Pohjolainen, Topi (1):</p>
<ul>
<li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
</ul>
<p>Samuel Iglesias Gonsálvez (3):</p>
<ul>
<li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
<li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
<li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
</ul>
<p>Tom Stellard (1):</p>
<ul>
<li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
</ul>
</div>
</body>
</html>

View File

@@ -14,7 +14,7 @@
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.1.0 Release Notes / May 10, 2017</h1>
<h1>Mesa 17.1.0 Release Notes / TBD</h1>
<p>
Mesa 17.1.0 is a new development release.
@@ -34,8 +34,7 @@ because compatibility contexts are not supported.
<h2>SHA256 checksums</h2>
<pre>
c388069581a72853161657ac365f2c083afabd7cffd53f80513dacfa1cfa58a8 mesa-17.1.0.tar.gz
cf234a6ed4764673886b6661553b54675776ef0898f774716173cec890ac3b17 mesa-17.1.0.tar.xz
TBD.
</pre>
@@ -65,147 +64,6 @@ Note: some of the new features are only available with certain drivers.
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=68504">Bug 68504</a> - 9.2-rc1 workaround for clover build failure on ppc/altivec: cannot convert 'bool' to '__vector(4) __bool int' in return</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84325">Bug 84325</a> - X.Org segfaults when starting DE on an Intel+Radeon laptop, caused by libpciaccess cleanup, patch attached</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93089">Bug 93089</a> - mesa fails to check for gcc atomic primitives before using them</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95460">Bug 95460</a> - Please add more drivers (freedreno, virgl) to features.txt status document</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=96743">Bug 96743</a> - [BYT, HSW, SKL, BXT, KBL] GPU hangs with GfxBench 4.0 CarChase</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97102">Bug 97102</a> - [dri][swr] stack overflow / infinite loop with GALLIUM_DRIVER=swr</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97338">Bug 97338</a> - Black squares in the Spec Ops: The Line chapter select screen</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97524">Bug 97524</a> - Samplers referring to the same texture unit with different types should raise GL_INVALID_OPERATION</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97967">Bug 97967</a> - glsl/tests/cache-test regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97988">Bug 97988</a> - [radeonsi] playing back videos with VDPAU exhibits deinterlacing/anti-aliasing issues not visible with VA-API</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98263">Bug 98263</a> - [radv] The Talos Principle fails to launch with &quot;Fatal error: Cannot set display mode.&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98428">Bug 98428</a> - Undefined non-weak-symbol in dri-drivers</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98502">Bug 98502</a> - Delay when starting firefox, thunderbird or chromium and dmesg spam</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98869">Bug 98869</a> - Electronic Super Joy graphic artefacts (regression,bisected)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98975">Bug 98975</a> - Wasteland 2 Directors Cut: Hangs. GPU fault</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99010">Bug 99010</a> - --disable-gallium-llvm no longer recognized</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99246">Bug 99246</a> - [d3dadapter+radeonsi &amp; bisect] EVE-Online : hang on wormhole sight</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99265">Bug 99265</a> - i965: Piglit egl_khr_gl_renderbuffer_image-clear-shared-image fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99339">Bug 99339</a> - Blender line rendering broken after removing XY clipping of lines</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99401">Bug 99401</a> - [g33] regression: piglit.spec.!opengl 1_0.gl-1_0-beginend-coverage</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99450">Bug 99450</a> - [amdgpu] Payday 2 visual glitches on some models</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99451">Bug 99451</a> - polygon offset use after free</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99456">Bug 99456</a> - Firefox crashing when opening about:support with WebGL2 enabled</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99465">Bug 99465</a> - vtn_vector_construct writing out of bounds when given multiple non-zero length sources</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99484">Bug 99484</a> - Crusader Kings 2 - Loading bars, siege bars, morale bars, etc. do not render correctly</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99532">Bug 99532</a> - Compute shader doesn't give right result under some circumstances</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99542">Bug 99542</a> - vdpau logging errors since gallium/radeon: adjust the rule for using the LINEAR_ALIGNED layout</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99631">Bug 99631</a> - segfault with OSVRTrackerView and openscenegraph git master</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99633">Bug 99633</a> - rasterizer/core/clip.h:279:49: error: const struct API_STATE has no member named linkageCount</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99660">Bug 99660</a> - Not all of the int64 conversion opcodes got implemented</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99677">Bug 99677</a> - heap-use-after-free in glsl</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99692">Bug 99692</a> - [radv] Mostly broken on Hawaii PRO/CIK ASICs</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99701">Bug 99701</a> - loader.c:353:8: error: implicit declaration of function 'geteuid' is invalid in C99 [-Werror,-Wimplicit-function-declaration]</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99715">Bug 99715</a> - Don't print: &quot;Note: Buggy applications may crash, if they do please report to vendor&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99789">Bug 99789</a> - Memory leak on failure to create an ir_constant in calculate_iterations in loop_controls.cpp</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99817">Bug 99817</a> - [softpipe] piglit glsl-fs-tan-1 regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99842">Bug 99842</a> - GL_ARB_transform_feedback2 on i965 gen6</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99850">Bug 99850</a> - Tessellation bug on Carrizo</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99918">Bug 99918</a> - disk_cache.h:57:20: error: no member named 'st_mtim' in 'struct stat'</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99953">Bug 99953</a> - device9.c:122:49: error: PIPE_CAP_USER_INDEX_BUFFERS undeclared (first use in this function)</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99955">Bug 99955</a> - [r600g] GPU load always displayed at 100% with GALLIUM_HUD=GPU-load</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100026">Bug 100026</a> - piglit.spec.arb_shader_subroutine.compiler.direct-call_vert regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100049">Bug 100049</a> - &quot;ralloc: Make sure ralloc() allocations match malloc()'s alignment.&quot; causes seg fault in 32bit build</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100060">Bug 100060</a> - wsi/wsi_common_wayland.c:25:41: fatal error: wayland-drm-client-protocol.h: No such file or directory</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100061">Bug 100061</a> - LODQ instruction generated with invalid dst mask</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100068">Bug 100068</a> - LLVM ERROR: Cannot select: intrinsic %llvm.amdgcn.buffer.load.format</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100088">Bug 100088</a> - piglit.spec.arb_get_texture_sub_image.arb_get_texture_sub_image regressions</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100091">Bug 100091</a> - Failure to create folder for on-disk shader cache</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100133">Bug 100133</a> - swr_context.cpp:336:44: error: invalid conversion from uint {aka unsigned int} to pipe_render_cond_flag [-fpermissive]</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100154">Bug 100154</a> - test_eu_compact regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100180">Bug 100180</a> - Build failure in GNOME Continuous</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100182">Bug 100182</a> - Flickering in The Talos Principle on Sky Lake GT4.</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100201">Bug 100201</a> - Windows scons build with MSVC toolchain and LLVM 4.0 fails</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100223">Bug 100223</a> - marshal_generated.c:38:10: fatal error: 'X11/Xlib-xcb.h' file not found</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100236">Bug 100236</a> - Undefined symbols for architecture x86_64: &quot;typeinfo for llvm::RTDyldMemoryManager&quot;</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100259">Bug 100259</a> - [EGL] [GBM] undefined reference to `gbm_bo_create_with_modifiers'</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100288">Bug 100288</a> - clover unable to run OpenCL kernels since 03127bb radeonsi: compile all TGSI compute shaders asynchronously</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100303">Bug 100303</a> - Adding a single, meaningless if-else to a shader source leads to different image</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100391">Bug 100391</a> - SachaWillems deferredmultisampling asserts</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100452">Bug 100452</a> - push_constants host memory leak when resetting command buffer</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100531">Bug 100531</a> - [regression] Broken graphics in several games</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100562">Bug 100562</a> - u_debug_stack.c:59: undefined reference to `_Ux86_64_getcontext'</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100569">Bug 100569</a> - core/resource.cpp:36:33: error: non-constant-expression cannot be narrowed from type 'int' to 'int16_t' (aka 'short') in initializer list [-Wc++11-narrowing]</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100574">Bug 100574</a> - anv_device.c:189: undefined reference to `anv_gem_supports_48b_addresses'</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100582">Bug 100582</a> - [GEN8+] piglit.spec.arb_stencil_texturing.glblitframebuffer corrupts state.gl_texture* assertions</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100600">Bug 100600</a> - anv_device.c:1337: undefined reference to `anv_gem_busy'</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100620">Bug 100620</a> - [SKL] 48-bit addresses break DOOM</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100663">Bug 100663</a> - commit 61e47d92c5196 breaks RS780</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100690">Bug 100690</a> - [Regression, bisected] TotalWar: Warhammer corrupted graphics</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100892">Bug 100892</a> - Polaris 12: winsys init bad switch (missing break) initializing addrlib</li>
</ul>
<h2>Changes</h2>

View File

@@ -1,188 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.1.1 Release Notes / March 25, 2017</h1>
<p>
Mesa 17.1.1 is a bug fix release which fixes bugs found since the 17.1.0 release.
</p>
<p>
Mesa 17.1.1 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
652315af87f2bb015ce99ee3b90d9d115d53cbf9e052493bd13d521a753b1930 mesa-17.1.1.tar.gz
aed503f94c0c1630a162a3e276f4ee12a86764cee4cb92338ea2dea99a04e7ef mesa-17.1.1.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100854">Bug 100854</a> - YUV to RGB Color Space Conversion result is not precise</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100925">Bug 100925</a> - [HSW/BSW/BDW/SKL] Google Earth is not resolving all the details in the map correctly</li>
</ul>
<h2>Changes</h2>
<p>Alex Deucher (1):</p>
<ul>
<li>radeonsi: add new vega10 pci ids</li>
</ul>
<p>Andres Gomez (2):</p>
<ul>
<li>bin/get-fixes-pick-list.sh: don't warn if more than one, go over them</li>
<li>bin/get-fixes-pick-list.sh: bring back the warning</li>
</ul>
<p>Bruce Cherniak (1):</p>
<ul>
<li>swr: move msaa resolve to generalized StoreTile</li>
</ul>
<p>Chad Versace (1):</p>
<ul>
<li>egl: Partially revert 23c86c74, fix eglMakeCurrent</li>
</ul>
<p>Chih-Wei Huang (1):</p>
<ul>
<li>Android: correct libz dependency</li>
</ul>
<p>Daniel Stone (1):</p>
<ul>
<li>gbm/dri: Fix sign-extension in modifier query</li>
</ul>
<p>Emil Velikov (6):</p>
<ul>
<li>docs: add sha256 checksums for 17.1.0</li>
<li>radeon: automake: remove unneeded elf Cflags/Libs</li>
<li>configure: remove unneeded bits around libunwind handling</li>
<li>egl: add g_egldispatchstubs.h to the release tarball</li>
<li>automake: add SWR LLVM gen_builder.hpp workaround</li>
<li>Update version to 17.1.1</li>
</ul>
<p>Eric Anholt (2):</p>
<ul>
<li>renderonly: Initialize fields of struct winsys_handle.</li>
<li>vc4: Don't allocate new BOs to avoid synchronization when they're shared.</li>
</ul>
<p>Grazvydas Ignotas (2):</p>
<ul>
<li>anv: fix possible stack corruption</li>
<li>anv: don't leak DRM devices</li>
</ul>
<p>Hans de Goede (1):</p>
<ul>
<li>glxglvnddispatch: Add missing dispatch for GetDriverConfig</li>
</ul>
<p>Ilia Mirkin (1):</p>
<ul>
<li>nvc0/ir: SHLADD's middle source must be an immediate</li>
</ul>
<p>Johnson Lin (1):</p>
<ul>
<li>nir/lower_tex: Fix minor error in YUV color conversion matrix</li>
</ul>
<p>Juan A. Suarez Romero (2):</p>
<ul>
<li>bin/get-{extra,fixes}-pick-list.sh: add support for ignore list</li>
<li>bin/get-{extra,fixes}-pick-list.sh: improve output</li>
</ul>
<p>Lucas Stach (2):</p>
<ul>
<li>etnaviv: stop oversizing buffer resources</li>
<li>etnaviv: allow R/B swapped surfaces to be cleared</li>
</ul>
<p>Marek Olšák (2):</p>
<ul>
<li>amd/addrlib: import Raven support</li>
<li>radeonsi/gfx9: add support for Raven</li>
</ul>
<p>Nanley Chery (2):</p>
<ul>
<li>anv/formats: Update the three-channel BC1 mappings</li>
<li>i965/formats: Update the three-channel DXT1 mappings</li>
</ul>
<p>Nicolai Hähnle (5):</p>
<ul>
<li>radeonsi: mark fast-cleared textures as compressed when dirtying</li>
<li>radeonsi: fix primitive ID in fragment shader when using tessellation</li>
<li>radeonsi: fix gl_PrimitiveID in tessellation with instanced draws on SI</li>
<li>radeonsi: fix gl_PrimitiveIDIn in geometry shader when using tessellation</li>
<li>st/mesa: remove an incorrect assertion</li>
</ul>
<p>Pohjolainen, Topi (1):</p>
<ul>
<li>intel/isl/gen7: Use stencil vertical alignment of 8 instead of 4</li>
</ul>
<p>Rob Clark (2):</p>
<ul>
<li>mesa/st: fix yuv EGLImage's</li>
<li>freedreno: fix crash when flush() but no rendering</li>
</ul>
<p>Rob Herring (1):</p>
<ul>
<li>virgl: fix virgl_bo_transfer_{put, get} box struct copy</li>
</ul>
<p>Samuel Iglesias Gonsálvez (3):</p>
<ul>
<li>i965/vec4/gs: restore the uniform values which was overwritten by failed vec4_gs_visitor execution</li>
<li>i965/vec4: fix swizzle and writemask when loading an uniform with constant offset</li>
<li>i965/vec4: load dvec3/4 uniforms first in the push constant buffer</li>
</ul>
<p>Tom Stellard (1):</p>
<ul>
<li>gallivm: Make sure module has the correct data layout when pass manager runs</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,187 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.1.2 Release Notes / June 5, 2017</h1>
<p>
Mesa 17.1.2 is a bug fix release which fixes bugs found since the 17.1.1 release.
</p>
<p>
Mesa 17.1.2 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
0d2020c2115db0d13a5be0075abf0da143290f69f5817a2f277861e89166a3e1 mesa-17.1.2.tar.gz
0937804f43746339b1f9540d8f9c8b4a1bb3d3eec0e4020eac283b8799798239 mesa-17.1.2.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98833">Bug 98833</a> - [REGRESSION, bisected] Wayland revert commit breaks non-Vsync fullscreen frame updates</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100741">Bug 100741</a> - Chromium - Memory leak</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100877">Bug 100877</a> - vulkan/tests/block_pool_no_free regression</li>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=101110">Bug 101110</a> - Build failure in GNOME Continuous</li>
</ul>
<h2>Changes</h2>
<p>Bartosz Tomczyk (1):</p>
<ul>
<li>mesa: Avoid leaking surface in st_renderbuffer_delete</li>
</ul>
<p>Bas Nieuwenhuizen (1):</p>
<ul>
<li>radv: Reserve space for descriptor and push constant user SGPR setting.</li>
</ul>
<p>Daniel Stone (7):</p>
<ul>
<li>vulkan: Fix Wayland uninitialised registry</li>
<li>vulkan/wsi/wayland: Remove roundtrip when creating image</li>
<li>vulkan/wsi/wayland: Use per-display event queue</li>
<li>vulkan/wsi/wayland: Use proxy wrappers for swapchain</li>
<li>egl/wayland: Don't open-code roundtrip</li>
<li>egl/wayland: Use per-surface event queues</li>
<li>egl/wayland: Ensure we get a back buffer</li>
</ul>
<p>Emil Velikov (24):</p>
<ul>
<li>docs: add sha256 checksums for 17.1.1</li>
<li>configure: move platform handling further up</li>
<li>configure: rename remaining HAVE_EGL_PLATFORM_* guards</li>
<li>configure: update remaining --with-egl-platforms references</li>
<li>configure: loosen --with-platforms heuristics</li>
<li>configure: enable the surfaceless platform by default</li>
<li>configure: set HAVE_foo_PLATFORM as applicable</li>
<li>configure: error out when building GLX w/o the X11 platform</li>
<li>configure: check once for DRI3 dependencies</li>
<li>loader: build libloader_dri3_helper.la only with HAVE_PLATFORM_X11</li>
<li>configure: error out when building X11 Vulkan without DRI3</li>
<li>auxiliary/vl: use vl_*_screen_create stubs when building w/o platform</li>
<li>st/va: fix misplaced closing bracket</li>
<li>st/omx: remove unneeded X11 include</li>
<li>st/omx: fix building against X11-less setups</li>
<li>gallium/targets: link against XCB only as needed</li>
<li>configure: error out if building VA w/o supported platform</li>
<li>configure: error out if building OMX w/o supported platform</li>
<li>configure: error out if building VDPAU w/o supported platform</li>
<li>configure: error out if building XVMC w/o supported platform</li>
<li>travis: remove workarounds for the Vulkan target</li>
<li>anv: automake: list shared libraries after the static ones</li>
<li>radv: automake: list shared libraries after the static ones</li>
<li>egl/wayland: select the format based on the interface used</li>
</ul>
<p>Ian Romanick (3):</p>
<ul>
<li>r100: Don't assume that the base mipmap of a texture exists</li>
<li>r100,r200: Don't assume glVisual is non-NULL during context creation</li>
<li>r100: Use _mesa_get_format_base_format in radeon_update_wrapper</li>
</ul>
<p>Jason Ekstrand (17):</p>
<ul>
<li>anv: Handle color layout transitions from the UNINITIALIZED layout</li>
<li>anv: Handle transitioning depth from UNDEFINED to other layouts</li>
<li>anv/image: Get rid of the memset(aux, 0, sizeof(aux)) hack</li>
<li>anv: Predicate 48bit support on gen &gt;= 8</li>
<li>anv: Set up memory types and heaps during physical device init</li>
<li>anv: Set image memory types based on the type count</li>
<li>i965/blorp: Do and end-of-pipe sync on both sides of fast-clear ops</li>
<li>i965: Round copy size to the nearest block in intel_miptree_copy</li>
<li>anv: Set EXEC_OBJECT_ASYNC when available</li>
<li>anv: Determine the type of mapping based on type metadata</li>
<li>anv: Add valid_bufer_usage to the memory type metadata</li>
<li>anv: Stop setting BO flags in bo_init_new</li>
<li>anv: Make supports_48bit_addresses a heap property</li>
<li>anv: Refactor memory type setup</li>
<li>anv: Advertise both 32-bit and 48-bit heaps when we have enough memory</li>
<li>i965: Rework Sandy Bridge HiZ and stencil layouts</li>
<li>anv: Require vertex buffers to come from a 32-bit heap</li>
</ul>
<p>Juan A. Suarez Romero (13):</p>
<ul>
<li>Revert "android: fix segfault within swap_buffers"</li>
<li>cherry-ignore: radeonsi: load patch_id for TES-as-ES when exporting for PS</li>
<li>cherry-ignore: anv: Determine the type of mapping based on type metadata</li>
<li>cherry-ignore: anv: Stop setting BO flags in bo_init_new</li>
<li>cherry-ignore: anv: Make supports_48bit_addresses a heap property</li>
<li>cherry-ignore: anv: Advertise both 32-bit and 48-bit heaps when we have enough memory</li>
<li>cherry-ignore: anv: Require vertex buffers to come from a 32-bit heap</li>
<li>cherry-ignore: radv: fix regression in descriptor set freeing</li>
<li>cherry-ignore: anv: Add valid_bufer_usage to the memory type metadata</li>
<li>cherry-ignore: anv: Refactor memory type setup</li>
<li>Revert "cherry-ignore: anv: [...]"</li>
<li>Revert "cherry-ignore: anv: Require vertex buffers to come from a 32-bit heap"</li>
<li>Update version to 17.1.2</li>
</ul>
<p>Marek Olšák (1):</p>
<ul>
<li>radeonsi/gfx9: compile shaders with +xnack</li>
</ul>
<p>Nicolai Hähnle (1):</p>
<ul>
<li>st/mesa: remove redundant stfb-&gt;iface checks</li>
</ul>
<p>Nicolas Boichat (1):</p>
<ul>
<li>configure.ac: Also match -androideabi tuple</li>
</ul>
<p>Rob Clark (1):</p>
<ul>
<li>freedreno: fix fence creation fail if no rendering</li>
</ul>
<p>Tapani Pälli (1):</p>
<ul>
<li>egl/android: fix segfault within swap_buffers</li>
</ul>
<p>Timothy Arceri (1):</p>
<ul>
<li>st/mesa: don't mark the program as in cache_fallback when there is cache miss</li>
</ul>
</div>
</body>
</html>

View File

@@ -1,156 +0,0 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
<html lang="en">
<head>
<meta http-equiv="content-type" content="text/html; charset=utf-8">
<title>Mesa Release Notes</title>
<link rel="stylesheet" type="text/css" href="../mesa.css">
</head>
<body>
<div class="header">
<h1>The Mesa 3D Graphics Library</h1>
</div>
<iframe src="../contents.html"></iframe>
<div class="content">
<h1>Mesa 17.1.3 Release Notes / June 19, 2017</h1>
<p>
Mesa 17.1.3 is a bug fix release which fixes bugs found since the 17.1.2 release.
</p>
<p>
Mesa 17.1.3 implements the OpenGL 4.5 API, but the version reported by
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
4.5 is <strong>only</strong> available if requested at context creation
because compatibility contexts are not supported.
</p>
<h2>SHA256 checksums</h2>
<pre>
81ae9127286ff8d631e466d258608d6dea9854fe7bee2e8521da44c7544f01e5 mesa-17.1.3.tar.gz
5f1ee9a8aea2880f887884df2dea0c16dd1b13eb42fd2e52265db0dc1b380e8c mesa-17.1.3.tar.xz
</pre>
<h2>New features</h2>
<p>None</p>
<h2>Bug fixes</h2>
<ul>
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=100988">Bug 100988</a> - glXGetCurrentDisplay() no longer works for FakeGLX contexts?</li>
</ul>
<h2>Changes</h2>
<p>Bas Nieuwenhuizen (3):</p>
<ul>
<li>radv: Set both compute and graphics SGPRS on descriptor set flush.</li>
<li>radv: Dirty all descriptors sets when changing the pipeline.</li>
<li>radv: Remove SI num RB override for occlusion queries.</li>
</ul>
<p>Brian Paul (1):</p>
<ul>
<li>xlib: fix glXGetCurrentDisplay() failure</li>
</ul>
<p>Chad Versace (1):</p>
<ul>
<li>i965/dri: Fix bad GL error in intel_create_winsys_renderbuffer()</li>
</ul>
<p>Chuck Atkins (1):</p>
<ul>
<li>configure.ac: Reduce zlib requirement from 1.2.8 to 1.2.3.</li>
</ul>
<p>Dave Airlie (3):</p>
<ul>
<li>radv: expose integrated device type for APUs.</li>
<li>radv: set fmask state to all 0s when no fmask. (v2)</li>
<li>glsl/lower_distance: only set max_array_access for 1D clip dist arrays</li>
</ul>
<p>Emil Velikov (1):</p>
<ul>
<li>Update version to 17.1.3</li>
</ul>
<p>Grazvydas Ignotas (1):</p>
<ul>
<li>radv: fix trace dumping for !use_ib_bos</li>
</ul>
<p>Jason Ekstrand (4):</p>
<ul>
<li>i965/blorp: Take a layer range in intel_hiz_exec</li>
<li>i965: Move the pre-depth-clear flush/stalls to intel_hiz_exec</li>
<li>i965: Perform HiZ flush/stall prior to HiZ resolves</li>
<li>i965: Mark depth surfaces as needing a HiZ resolve after blitting</li>
</ul>
<p>José Fonseca (1):</p>
<ul>
<li>automake: Link all libGL.so variants with -Bsymbolic.</li>
</ul>
<p>Juan A. Suarez Romero (1):</p>
<ul>
<li>docs: add sha256 checksums for 17.1.2</li>
</ul>
<p>Lucas Stach (1):</p>
<ul>
<li>etnaviv: always do cpu_fini in transfer_unmap</li>
</ul>
<p>Lyude (1):</p>
<ul>
<li>nvc0: disable BGRA8 images on Fermi</li>
</ul>
<p>Marek Olšák (3):</p>
<ul>
<li>st/mesa: don't load cached TGSI shaders on demand</li>
<li>radeonsi: fix a GPU hang with tessellation on 2-CU configs</li>
<li>radeonsi: disable the patch ID workaround on SI when the patch ID isn't used (v2)</li>
</ul>
<p>Nicolai Hähnle (1):</p>
<ul>
<li>radv: fewer than 8 RBs are possible</li>
</ul>
<p>Nicolas Dechesne (1):</p>
<ul>
<li>util/rand_xor: add missing include statements</li>
</ul>
<p>Tapani Pälli (1):</p>
<ul>
<li>egl: fix _eglQuerySurface in EGL_BUFFER_AGE_EXT case</li>
</ul>
<p>Thomas Hellstrom (1):</p>
<ul>
<li>dri3/GLX: Fix drawable invalidation v2</li>
</ul>
<p>Tim Rowley (1):</p>
<ul>
<li>swr: relax c++ requirement from c++14 to c++11</li>
</ul>
</div>
</body>
</html>

View File

@@ -44,8 +44,6 @@ Note: some of the new features are only available with certain drivers.
</p>
<ul>
<li>GL_ARB_bindless_texture on radeonsi</li>
<li>GL_ARB_post_depth_coverage on nvc0 (GM200+)</li>
<li>GL_ARB_shader_viewport_layer_array on nvc0 (GM200+)</li>
<li>GL_AMD_vertex_shader_layer on nvc0 (GM200+)</li>
<li>GL_AMD_vertex_shader_viewport_index on nvc0 (GM200+)</li>

View File

@@ -50,8 +50,6 @@ execution. These are generally used for debugging.
The filenames will be "shader_X.vert" or "shader_X.frag" where X
the shader ID.
<li><b>cache_info</b> - print debug information about shader cache
<li><b>cache_fb</b> - force cached shaders to be ignored and do a full
recompile via the fallback path</li>
<li><b>uniform</b> - print message to stdout when glUniform is called
<li><b>nopvert</b> - force vertex shaders to be a simple shader that just transforms
the vertex position with ftransform() and passes through the color and

View File

@@ -1,12 +0,0 @@
#!/bin/sh
# run git from the sources directory
cd "$(dirname "$0")"
# don't print anything if git fails
if ! git_sha1=$(git --git-dir=.git rev-parse --short=10 HEAD 2>/dev/null)
then
exit
fi
printf '#define MESA_GIT_SHA1 "git-%s"\n' "$git_sha1"

View File

@@ -702,7 +702,6 @@ struct __DRIuseInvalidateExtensionRec {
#define __DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS 46
#define __DRI_ATTRIB_YINVERTED 47
#define __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE 48
#define __DRI_ATTRIB_MAX (__DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE + 1)
/* __DRI_ATTRIB_RENDER_TYPE */
#define __DRI_ATTRIB_RGBA_BIT 0x01
@@ -1137,7 +1136,7 @@ struct __DRIdri2ExtensionRec {
* extensions.
*/
#define __DRI_IMAGE "DRI_IMAGE"
#define __DRI_IMAGE_VERSION 15
#define __DRI_IMAGE_VERSION 14
/**
* These formats correspond to the similarly named MESA_FORMAT_*
@@ -1494,67 +1493,6 @@ struct __DRIimageExtensionRec {
const uint64_t *modifiers,
const unsigned int modifier_count,
void *loaderPrivate);
/*
* Like createImageFromDmaBufs, but takes also format modifiers.
*
* For EGL_EXT_image_dma_buf_import_modifiers.
*
* \since 15
*/
__DRIimage *(*createImageFromDmaBufs2)(__DRIscreen *screen,
int width, int height, int fourcc,
uint64_t modifier,
int *fds, int num_fds,
int *strides, int *offsets,
enum __DRIYUVColorSpace color_space,
enum __DRISampleRange sample_range,
enum __DRIChromaSiting horiz_siting,
enum __DRIChromaSiting vert_siting,
unsigned *error,
void *loaderPrivate);
/*
* dmabuf format query to support EGL_EXT_image_dma_buf_import_modifiers.
*
* \param max Maximum number of formats that can be accomodated into
* \param formats. If zero, no formats are returned -
* instead, the driver returns the total number of
* supported dmabuf formats in \param count.
* \param formats Buffer to fill formats into.
* \param count Count of formats returned, or, total number of
* supported formats in case \param max is zero.
*
* Returns true on success.
*
* \since 15
*/
GLboolean (*queryDmaBufFormats)(__DRIscreen *screen, int max,
int *formats, int *count);
/*
* dmabuf format modifier query for a given format to support
* EGL_EXT_image_dma_buf_import_modifiers.
*
* \param fourcc The format to query modifiers for. If this format
* is not supported by the driver, return false.
* \param max Maximum number of modifiers that can be accomodated in
* \param modifiers. If zero, no modifiers are returned -
* instead, the driver returns the total number of
* modifiers for \param format in \param count.
* \param modifiers Buffer to fill modifiers into.
* \param count Count of the modifiers returned, or, total number of
* supported modifiers for \param fourcc in case
* \param max is zero.
*
* Returns true upon success.
*
* \since 15
*/
GLboolean (*queryDmaBufModifiers)(__DRIscreen *screen, int fourcc,
int max, uint64_t *modifiers,
unsigned int *external_only,
int *count);
};
@@ -1782,19 +1720,6 @@ struct __DRIbackgroundCallableExtensionRec {
* operations (e.g. it should just set a thread-local variable).
*/
void (*setBackgroundContext)(void *loaderPrivate);
/**
* Indicate that it is multithread safe to use glthread. For GLX/EGL
* platforms using Xlib, that involves calling XInitThreads, before
* opening an X display.
*
* Note: only supported if extension version is at least 2.
*
* \param loaderPrivate is the value that was passed to to the driver when
* the context was created. This can be used by the loader to identify
* which context any callbacks are associated with.
*/
GLboolean (*isThreadSafe)(void *loaderPrivate);
};
#endif

View File

@@ -502,13 +502,9 @@ thrd_current(void)
HANDLE hCurrentThread;
BOOL bRet;
/* GetCurrentThread() returns a pseudo-handle, which we need
* to pass to DuplicateHandle(). Only the resulting handle can be used
* from other threads.
*
* Note that neither handle can be compared to the one by thread_create.
* Only the thread IDs - as returned by GetThreadId() and GetCurrentThreadId()
* can be compared directly.
/* GetCurrentThread() returns a pseudo-handle, which is useless. We need
* to call DuplicateHandle to get a real handle. However the handle value
* will not match the one returned by thread_create.
*
* Other potential solutions would be:
* - define thrd_t as a thread Ids, but this would mean we'd need to OpenThread for many operations

View File

@@ -165,26 +165,3 @@ CHIPSET(0x5927, kbl_gt3, "Intel(R) Iris Plus Graphics 650 (Kaby Lake GT3)")
CHIPSET(0x593B, kbl_gt4, "Intel(R) Kabylake GT4")
CHIPSET(0x3184, glk, "Intel(R) HD Graphics (Geminilake)")
CHIPSET(0x3185, glk_2x6, "Intel(R) HD Graphics (Geminilake 2x6)")
CHIPSET(0x3E90, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
CHIPSET(0x3E93, cfl_gt1, "Intel(R) HD Graphics (Coffeelake 2x6 GT1)")
CHIPSET(0x3E91, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
CHIPSET(0x3E92, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
CHIPSET(0x3E96, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
CHIPSET(0x3E9B, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
CHIPSET(0x3E94, cfl_gt2, "Intel(R) HD Graphics (Coffeelake 3x8 GT2)")
CHIPSET(0x3EA6, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
CHIPSET(0x3EA7, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
CHIPSET(0x3EA8, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
CHIPSET(0x3EA5, cfl_gt3, "Intel(R) HD Graphics (Coffeelake 3x8 GT3)")
CHIPSET(0x5A49, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
CHIPSET(0x5A4A, cnl_2x8, "Intel(R) HD Graphics (Cannonlake 2x8 GT0.5)")
CHIPSET(0x5A41, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
CHIPSET(0x5A42, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
CHIPSET(0x5A44, cnl_3x8, "Intel(R) HD Graphics (Cannonlake 3x8 GT1)")
CHIPSET(0x5A59, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
CHIPSET(0x5A5A, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
CHIPSET(0x5A5C, cnl_4x8, "Intel(R) HD Graphics (Cannonlake 4x8 GT1.5)")
CHIPSET(0x5A50, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
CHIPSET(0x5A51, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
CHIPSET(0x5A52, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")
CHIPSET(0x5A54, cnl_5x8, "Intel(R) HD Graphics (Cannonlake 5x8 GT2)")

View File

@@ -213,17 +213,12 @@ CHIPSET(0x6985, POLARIS12_, POLARIS12)
CHIPSET(0x6986, POLARIS12_, POLARIS12)
CHIPSET(0x6987, POLARIS12_, POLARIS12)
CHIPSET(0x6995, POLARIS12_, POLARIS12)
CHIPSET(0x6997, POLARIS12_, POLARIS12)
CHIPSET(0x699F, POLARIS12_, POLARIS12)
CHIPSET(0x6860, VEGA10_, VEGA10)
CHIPSET(0x6861, VEGA10_, VEGA10)
CHIPSET(0x6862, VEGA10_, VEGA10)
CHIPSET(0x6863, VEGA10_, VEGA10)
CHIPSET(0x6864, VEGA10_, VEGA10)
CHIPSET(0x6867, VEGA10_, VEGA10)
CHIPSET(0x6868, VEGA10_, VEGA10)
CHIPSET(0x687F, VEGA10_, VEGA10)
CHIPSET(0x686C, VEGA10_, VEGA10)
CHIPSET(0x15DD, RAVEN_, RAVEN)

View File

@@ -43,7 +43,7 @@ extern "C" {
#define VK_VERSION_MINOR(version) (((uint32_t)(version) >> 12) & 0x3ff)
#define VK_VERSION_PATCH(version) ((uint32_t)(version) & 0xfff)
// Version of this file
#define VK_HEADER_VERSION 49
#define VK_HEADER_VERSION 46
#define VK_NULL_HANDLE 0
@@ -261,6 +261,9 @@ typedef enum VkStructureType {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_BUFFER_INFO_KHX = 1000071002,
VK_STRUCTURE_TYPE_EXTERNAL_BUFFER_PROPERTIES_KHX = 1000071003,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX = 1000071004,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHX = 1000071005,
VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHX = 1000071006,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHX = 1000071007,
VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_BUFFER_CREATE_INFO_KHX = 1000072000,
VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO_KHX = 1000072001,
VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO_KHX = 1000072002,
@@ -298,10 +301,6 @@ typedef enum VkStructureType {
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT = 1000099000,
VK_STRUCTURE_TYPE_PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT = 1000099001,
VK_STRUCTURE_TYPE_HDR_METADATA_EXT = 1000105000,
VK_STRUCTURE_TYPE_SHARED_PRESENT_SURFACE_CAPABILITIES_KHR = 1000111000,
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SURFACE_INFO_2_KHR = 1000119000,
VK_STRUCTURE_TYPE_SURFACE_CAPABILITIES_2_KHR = 1000119001,
VK_STRUCTURE_TYPE_SURFACE_FORMAT_2_KHR = 1000119002,
VK_STRUCTURE_TYPE_IOS_SURFACE_CREATE_INFO_MVK = 1000122000,
VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK = 1000123000,
VK_STRUCTURE_TYPE_BEGIN_RANGE = VK_STRUCTURE_TYPE_APPLICATION_INFO,
@@ -591,7 +590,6 @@ typedef enum VkImageLayout {
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL = 7,
VK_IMAGE_LAYOUT_PREINITIALIZED = 8,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR = 1000001002,
VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR = 1000111000,
VK_IMAGE_LAYOUT_BEGIN_RANGE = VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_END_RANGE = VK_IMAGE_LAYOUT_PREINITIALIZED,
VK_IMAGE_LAYOUT_RANGE_SIZE = (VK_IMAGE_LAYOUT_PREINITIALIZED - VK_IMAGE_LAYOUT_UNDEFINED + 1),
@@ -898,47 +896,6 @@ typedef enum VkSubpassContents {
VK_SUBPASS_CONTENTS_MAX_ENUM = 0x7FFFFFFF
} VkSubpassContents;
typedef enum VkObjectType {
VK_OBJECT_TYPE_UNKNOWN = 0,
VK_OBJECT_TYPE_INSTANCE = 1,
VK_OBJECT_TYPE_PHYSICAL_DEVICE = 2,
VK_OBJECT_TYPE_DEVICE = 3,
VK_OBJECT_TYPE_QUEUE = 4,
VK_OBJECT_TYPE_SEMAPHORE = 5,
VK_OBJECT_TYPE_COMMAND_BUFFER = 6,
VK_OBJECT_TYPE_FENCE = 7,
VK_OBJECT_TYPE_DEVICE_MEMORY = 8,
VK_OBJECT_TYPE_BUFFER = 9,
VK_OBJECT_TYPE_IMAGE = 10,
VK_OBJECT_TYPE_EVENT = 11,
VK_OBJECT_TYPE_QUERY_POOL = 12,
VK_OBJECT_TYPE_BUFFER_VIEW = 13,
VK_OBJECT_TYPE_IMAGE_VIEW = 14,
VK_OBJECT_TYPE_SHADER_MODULE = 15,
VK_OBJECT_TYPE_PIPELINE_CACHE = 16,
VK_OBJECT_TYPE_PIPELINE_LAYOUT = 17,
VK_OBJECT_TYPE_RENDER_PASS = 18,
VK_OBJECT_TYPE_PIPELINE = 19,
VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT = 20,
VK_OBJECT_TYPE_SAMPLER = 21,
VK_OBJECT_TYPE_DESCRIPTOR_POOL = 22,
VK_OBJECT_TYPE_DESCRIPTOR_SET = 23,
VK_OBJECT_TYPE_FRAMEBUFFER = 24,
VK_OBJECT_TYPE_COMMAND_POOL = 25,
VK_OBJECT_TYPE_SURFACE_KHR = 1000000000,
VK_OBJECT_TYPE_SWAPCHAIN_KHR = 1000001000,
VK_OBJECT_TYPE_DISPLAY_KHR = 1000002000,
VK_OBJECT_TYPE_DISPLAY_MODE_KHR = 1000002001,
VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT = 1000011000,
VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_KHR = 1000085000,
VK_OBJECT_TYPE_OBJECT_TABLE_NVX = 1000086000,
VK_OBJECT_TYPE_INDIRECT_COMMANDS_LAYOUT_NVX = 1000086001,
VK_OBJECT_TYPE_BEGIN_RANGE = VK_OBJECT_TYPE_UNKNOWN,
VK_OBJECT_TYPE_END_RANGE = VK_OBJECT_TYPE_COMMAND_POOL,
VK_OBJECT_TYPE_RANGE_SIZE = (VK_OBJECT_TYPE_COMMAND_POOL - VK_OBJECT_TYPE_UNKNOWN + 1),
VK_OBJECT_TYPE_MAX_ENUM = 0x7FFFFFFF
} VkObjectType;
typedef VkFlags VkInstanceCreateFlags;
typedef enum VkFormatFeatureFlagBits {
@@ -3366,8 +3323,6 @@ typedef enum VkPresentModeKHR {
VK_PRESENT_MODE_MAILBOX_KHR = 1,
VK_PRESENT_MODE_FIFO_KHR = 2,
VK_PRESENT_MODE_FIFO_RELAXED_KHR = 3,
VK_PRESENT_MODE_SHARED_DEMAND_REFRESH_KHR = 1000111000,
VK_PRESENT_MODE_SHARED_CONTINUOUS_REFRESH_KHR = 1000111001,
VK_PRESENT_MODE_BEGIN_RANGE_KHR = VK_PRESENT_MODE_IMMEDIATE_KHR,
VK_PRESENT_MODE_END_RANGE_KHR = VK_PRESENT_MODE_FIFO_RELAXED_KHR,
VK_PRESENT_MODE_RANGE_SIZE_KHR = (VK_PRESENT_MODE_FIFO_RELAXED_KHR - VK_PRESENT_MODE_IMMEDIATE_KHR + 1),
@@ -4146,64 +4101,6 @@ VKAPI_ATTR void VKAPI_CALL vkCmdPushDescriptorSetWithTemplateKHR(
const void* pData);
#endif
#define VK_KHR_shared_presentable_image 1
#define VK_KHR_SHARED_PRESENTABLE_IMAGE_SPEC_VERSION 1
#define VK_KHR_SHARED_PRESENTABLE_IMAGE_EXTENSION_NAME "VK_KHR_shared_presentable_image"
typedef struct VkSharedPresentSurfaceCapabilitiesKHR {
VkStructureType sType;
void* pNext;
VkImageUsageFlags sharedPresentSupportedUsageFlags;
} VkSharedPresentSurfaceCapabilitiesKHR;
typedef VkResult (VKAPI_PTR *PFN_vkGetSwapchainStatusKHR)(VkDevice device, VkSwapchainKHR swapchain);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkGetSwapchainStatusKHR(
VkDevice device,
VkSwapchainKHR swapchain);
#endif
#define VK_KHR_get_surface_capabilities2 1
#define VK_KHR_GET_SURFACE_CAPABILITIES_2_SPEC_VERSION 1
#define VK_KHR_GET_SURFACE_CAPABILITIES_2_EXTENSION_NAME "VK_KHR_get_surface_capabilities2"
typedef struct VkPhysicalDeviceSurfaceInfo2KHR {
VkStructureType sType;
const void* pNext;
VkSurfaceKHR surface;
} VkPhysicalDeviceSurfaceInfo2KHR;
typedef struct VkSurfaceCapabilities2KHR {
VkStructureType sType;
void* pNext;
VkSurfaceCapabilitiesKHR surfaceCapabilities;
} VkSurfaceCapabilities2KHR;
typedef struct VkSurfaceFormat2KHR {
VkStructureType sType;
void* pNext;
VkSurfaceFormatKHR surfaceFormat;
} VkSurfaceFormat2KHR;
typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceCapabilities2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, VkSurfaceCapabilities2KHR* pSurfaceCapabilities);
typedef VkResult (VKAPI_PTR *PFN_vkGetPhysicalDeviceSurfaceFormats2KHR)(VkPhysicalDevice physicalDevice, const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo, uint32_t* pSurfaceFormatCount, VkSurfaceFormat2KHR* pSurfaceFormats);
#ifndef VK_NO_PROTOTYPES
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceCapabilities2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
VkSurfaceCapabilities2KHR* pSurfaceCapabilities);
VKAPI_ATTR VkResult VKAPI_CALL vkGetPhysicalDeviceSurfaceFormats2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceSurfaceInfo2KHR* pSurfaceInfo,
uint32_t* pSurfaceFormatCount,
VkSurfaceFormat2KHR* pSurfaceFormats);
#endif
#define VK_EXT_debug_report 1
VK_DEFINE_NON_DISPATCHABLE_HANDLE(VkDebugReportCallbackEXT)

View File

@@ -103,26 +103,8 @@ def generate(env):
'HAVE_STDINT_H',
])
env.Prepend(LIBPATH = [os.path.join(llvm_dir, 'lib')])
# LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter irreader`
if llvm_version >= distutils.version.LooseVersion('4.0'):
env.Prepend(LIBS = [
'LLVMX86Disassembler', 'LLVMX86AsmParser',
'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',
'LLVMDebugInfoCodeView', 'LLVMCodeGen',
'LLVMScalarOpts', 'LLVMInstCombine',
'LLVMTransformUtils',
'LLVMBitWriter', 'LLVMX86Desc',
'LLVMMCDisassembler', 'LLVMX86Info',
'LLVMX86AsmPrinter', 'LLVMX86Utils',
'LLVMMCJIT', 'LLVMExecutionEngine', 'LLVMTarget',
'LLVMAnalysis', 'LLVMProfileData',
'LLVMRuntimeDyld', 'LLVMObject', 'LLVMMCParser',
'LLVMBitReader', 'LLVMMC', 'LLVMCore',
'LLVMSupport',
'LLVMIRReader', 'LLVMAsmParser',
'LLVMDemangle', 'LLVMGlobalISel', 'LLVMDebugInfoMSF',
])
elif llvm_version >= distutils.version.LooseVersion('3.9'):
# LIBS should match the output of `llvm-config --libs engine mcjit bitwriter x86asmprinter`
if llvm_version >= distutils.version.LooseVersion('3.9'):
env.Prepend(LIBS = [
'LLVMX86Disassembler', 'LLVMX86AsmParser',
'LLVMX86CodeGen', 'LLVMSelectionDAG', 'LLVMAsmPrinter',

View File

@@ -21,7 +21,18 @@
.PHONY: git_sha1.h.tmp
git_sha1.h.tmp:
@sh $(top_srcdir)/git_sha1_gen.sh > $@
@# Don't assume that $(top_srcdir)/.git is a directory. It may be
@# a gitlink file if $(top_srcdir) is a submodule checkout or a linked
@# worktree.
@# If we are building from a release tarball copy the bundled header.
@touch git_sha1.h.tmp
@if test -e $(top_srcdir)/.git; then \
if which git > /dev/null; then \
git --git-dir=$(top_srcdir)/.git log -n 1 --oneline | \
sed 's/^\([^ ]*\) .*/#define MESA_GIT_SHA1 "git-\1"/' \
> git_sha1.h.tmp ; \
fi \
fi
git_sha1.h: git_sha1.h.tmp
@echo "updating git_sha1.h"

View File

@@ -22,15 +22,27 @@ def write_git_sha1_h_file(filename):
to retrieve the git hashid and write the header file. An empty file
will be created if anything goes wrong."""
tempfile = "git_sha1.h.tmp"
with open(tempfile, "w") as f:
args = [ 'sh', Dir('#').abspath + '/git_sha1_gen.sh' ]
try:
subprocess.Popen(args, stdout=f).wait()
except:
print "Warning: exception in write_git_sha1_h_file()"
return
args = [ 'git', 'rev-parse', '--short=10', 'HEAD' ]
try:
(commit, foo) = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()
except:
print "Warning: exception in write_git_sha1_h_file()"
# git log command didn't work
if not os.path.exists(filename):
dirname = os.path.dirname(filename)
if dirname and not os.path.exists(dirname):
os.makedirs(dirname)
# create an empty file if none already exists
f = open(filename, "w")
f.close()
return
# note that commit[:-1] removes the trailing newline character
commit = '#define MESA_GIT_SHA1 "git-%s"\n' % commit[:-1]
tempfile = "git_sha1.h.tmp"
f = open(tempfile, "w")
f.write(commit)
f.close()
if not os.path.exists(filename) or not filecmp.cmp(tempfile, filename):
# The filename does not exist or it's different from the new file,
# so replace old file with new.

View File

@@ -42,11 +42,5 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/amd/addrlib/gfx9/chip \
$(MESA_TOP)/src/amd/addrlib/r800/chip
LOCAL_EXPORT_C_INCLUDE_DIRS := \
$(LOCAL_PATH) \
$(LOCAL_PATH)/addrlib/core \
$(LOCAL_PATH)/addrlib/inc/chip/r800 \
$(LOCAL_PATH)/addrlib/r800/chip
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)

View File

@@ -29,7 +29,6 @@ include $(CLEAR_VARS)
LOCAL_MODULE := libmesa_amd_common
LOCAL_SRC_FILES := \
$(AMD_COMMON_FILES) \
$(AMD_COMPILER_FILES) \
$(AMD_DEBUG_FILES)
@@ -56,21 +55,11 @@ LOCAL_C_INCLUDES := \
$(MESA_TOP)/src/gallium/auxiliary \
$(intermediates)/common \
external/llvm/include \
external/llvm/device/include
external/llvm/device/include \
external/libcxx/include \
$(ELF_INCLUDES)
LOCAL_EXPORT_C_INCLUDE_DIRS := \
$(LOCAL_PATH)/common
LOCAL_SHARED_LIBRARIES := \
libdrm_amdgpu
LOCAL_STATIC_LIBRARIES := \
libmesa_nir
LOCAL_WHOLE_STATIC_LIBRARIES := \
libelf
$(call mesa-build-with-llvm)
LOCAL_STATIC_LIBRARIES := libLLVMCore
include $(MESA_COMMON_MK)
include $(BUILD_STATIC_LIBRARY)

View File

@@ -25,7 +25,6 @@ COMMON_LIBS = common/libamd_common.la
# TODO cleanup these
common_libamd_common_la_CPPFLAGS = \
$(AMDGPU_CFLAGS) \
$(VALGRIND_CFLAGS) \
$(DEFINES) \
-I$(top_srcdir)/include \
@@ -55,7 +54,6 @@ common_libamd_common_la_CXXFLAGS = \
noinst_LTLIBRARIES += $(COMMON_LIBS)
common_libamd_common_la_SOURCES = \
$(AMD_COMMON_FILES) \
$(AMD_COMPILER_FILES) \
$(AMD_DEBUG_FILES) \
$(AMD_GENERATED_FILES)
@@ -67,8 +65,6 @@ common_libamd_common_la_SOURCES += $(AMD_NIR_FILES)
endif
endif
common_libamd_common_la_LIBADD = $(LIBELF_LIBS)
common/sid_tables.h: $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h
$(AM_V_at)$(MKDIR_P) $(@D)
$(AM_V_GEN) $(PYTHON2) $(srcdir)/common/sid_tables.py $(srcdir)/common/sid.h $(srcdir)/common/gfx9d.h > $@

View File

@@ -55,12 +55,6 @@ AMD_NIR_FILES = \
common/ac_nir_to_llvm.c \
common/ac_nir_to_llvm.h
AMD_COMMON_FILES = \
common/ac_gpu_info.c \
common/ac_gpu_info.h \
common/ac_surface.c \
common/ac_surface.h
AMD_DEBUG_FILES = \
common/ac_debug.c \
common/ac_debug.h

View File

@@ -1193,20 +1193,6 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
m_settings.depthPipeXorDisable = 1;
break;
case FAMILY_RV:
m_settings.isArcticIsland = 1;
m_settings.isRaven = ASICREV_IS_RAVEN(uChipRevision);
if (m_settings.isRaven)
{
m_settings.isDcn1 = 1;
}
m_settings.metaBaseAlignFix = 1;
m_settings.depthPipeXorDisable = 1;
break;
default:
ADDR_ASSERT(!"This should be a Fusion");
break;
@@ -2748,35 +2734,6 @@ BOOL_32 Gfx9Lib::IsValidDisplaySwizzleMode(
break;
}
}
else if (m_settings.isDcn1)
{
switch (swizzleMode)
{
case ADDR_SW_4KB_D:
case ADDR_SW_64KB_D:
case ADDR_SW_VAR_D:
case ADDR_SW_64KB_D_T:
case ADDR_SW_4KB_D_X:
case ADDR_SW_64KB_D_X:
case ADDR_SW_VAR_D_X:
support = (pIn->bpp == 64);
break;
case ADDR_SW_LINEAR:
case ADDR_SW_4KB_S:
case ADDR_SW_64KB_S:
case ADDR_SW_VAR_S:
case ADDR_SW_64KB_S_T:
case ADDR_SW_4KB_S_X:
case ADDR_SW_64KB_S_X:
case ADDR_SW_VAR_S_X:
support = (pIn->bpp <= 64);
break;
default:
break;
}
}
else
{
ADDR_NOT_IMPLEMENTED();
@@ -3238,20 +3195,6 @@ ADDR_E_RETURNCODE Gfx9Lib::HwlGetPreferredSurfaceSetting(
// DCE12 does not support display surface to be _T swizzle mode
prtXor = FALSE;
}
else if (m_settings.isDcn1)
{
// _R is not supported by Dcn1
if (pIn->bpp == 64)
{
swType = ADDR_SW_D;
}
else
{
swType = ADDR_SW_S;
}
blockSet.micro = FALSE;
}
else
{
ADDR_NOT_IMPLEMENTED();

View File

@@ -54,13 +54,11 @@ struct Gfx9ChipSettings
// Asic/Generation name
UINT_32 isArcticIsland : 1;
UINT_32 isVega10 : 1;
UINT_32 isRaven : 1;
UINT_32 reserved0 : 29;
UINT_32 reserved0 : 30;
// Display engine IP version name
UINT_32 isDce12 : 1;
UINT_32 isDcn1 : 1;
UINT_32 reserved1 : 29;
UINT_32 reserved1 : 31;
// Misc configuration bits
UINT_32 metaBaseAlignFix : 1;
@@ -203,7 +201,7 @@ protected:
if (IsXor(swizzleMode))
{
if (m_settings.isVega10 || m_settings.isRaven)
if (m_settings.isVega10)
{
baseAlign = GetBlockSize(swizzleMode);
}

View File

@@ -132,15 +132,9 @@ void ac_dump_reg(FILE *file, unsigned offset, uint32_t value,
static void ac_parse_set_reg_packet(FILE *f, uint32_t *ib, unsigned count,
unsigned reg_offset)
{
unsigned reg = ((ib[1] & 0xFFFF) << 2) + reg_offset;
unsigned index = ib[1] >> 28;
unsigned reg = (ib[1] << 2) + reg_offset;
int i;
if (index != 0) {
print_spaces(f, INDENT_PKT);
fprintf(f, "INDEX = %u\n", index);
}
for (i = 0; i < count; i++)
ac_dump_reg(f, reg + i*4, ib[2+i], ~0);
}
@@ -220,52 +214,6 @@ static uint32_t *ac_parse_packet3(FILE *f, uint32_t *ib, int *num_dw,
print_named_value(f, "ADDRESS_HI", ib[3], 16);
}
break;
case PKT3_EVENT_WRITE_EOP:
ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
S_028A90_EVENT_TYPE(~0));
print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
print_named_value(f, "TCL1_VOL_ACTION_ENA", (ib[1] >> 12) & 0x1, 1);
print_named_value(f, "TC_VOL_ACTION_ENA", (ib[1] >> 13) & 0x1, 1);
print_named_value(f, "TC_WB_ACTION_ENA", (ib[1] >> 15) & 0x1, 1);
print_named_value(f, "TCL1_ACTION_ENA", (ib[1] >> 16) & 0x1, 1);
print_named_value(f, "TC_ACTION_ENA", (ib[1] >> 17) & 0x1, 1);
print_named_value(f, "ADDRESS_LO", ib[2], 32);
print_named_value(f, "ADDRESS_HI", ib[3], 16);
print_named_value(f, "DST_SEL", (ib[3] >> 16) & 0x3, 2);
print_named_value(f, "INT_SEL", (ib[3] >> 24) & 0x7, 3);
print_named_value(f, "DATA_SEL", ib[3] >> 29, 3);
print_named_value(f, "DATA_LO", ib[4], 32);
print_named_value(f, "DATA_HI", ib[5], 32);
break;
case PKT3_RELEASE_MEM:
ac_dump_reg(f, R_028A90_VGT_EVENT_INITIATOR, ib[1],
S_028A90_EVENT_TYPE(~0));
print_named_value(f, "EVENT_INDEX", (ib[1] >> 8) & 0xf, 4);
print_named_value(f, "TCL1_VOL_ACTION_ENA", (ib[1] >> 12) & 0x1, 1);
print_named_value(f, "TC_VOL_ACTION_ENA", (ib[1] >> 13) & 0x1, 1);
print_named_value(f, "TC_WB_ACTION_ENA", (ib[1] >> 15) & 0x1, 1);
print_named_value(f, "TCL1_ACTION_ENA", (ib[1] >> 16) & 0x1, 1);
print_named_value(f, "TC_ACTION_ENA", (ib[1] >> 17) & 0x1, 1);
print_named_value(f, "TC_NC_ACTION_ENA", (ib[1] >> 19) & 0x1, 1);
print_named_value(f, "TC_WC_ACTION_ENA", (ib[1] >> 20) & 0x1, 1);
print_named_value(f, "TC_MD_ACTION_ENA", (ib[1] >> 21) & 0x1, 1);
print_named_value(f, "DST_SEL", (ib[2] >> 16) & 0x3, 2);
print_named_value(f, "INT_SEL", (ib[2] >> 24) & 0x7, 3);
print_named_value(f, "DATA_SEL", ib[2] >> 29, 3);
print_named_value(f, "ADDRESS_LO", ib[3], 32);
print_named_value(f, "ADDRESS_HI", ib[4], 32);
print_named_value(f, "DATA_LO", ib[5], 32);
print_named_value(f, "DATA_HI", ib[6], 32);
print_named_value(f, "CTXID", ib[7], 32);
break;
case PKT3_WAIT_REG_MEM:
print_named_value(f, "OP", ib[1], 32);
print_named_value(f, "ADDRESS_LO", ib[2], 32);
print_named_value(f, "ADDRESS_HI", ib[3], 32);
print_named_value(f, "REF", ib[4], 32);
print_named_value(f, "MASK", ib[5], 32);
print_named_value(f, "POLL_INTERVAL", ib[6], 16);
break;
case PKT3_DRAW_INDEX_AUTO:
ac_dump_reg(f, R_030930_VGT_NUM_INDICES, ib[1], ~0);
ac_dump_reg(f, R_0287F0_VGT_DRAW_INITIATOR, ib[2], ~0);

View File

@@ -1,303 +0,0 @@
/*
* Copyright © 2017 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
#include "ac_gpu_info.h"
#include "sid.h"
#include "gfx9d.h"
#include "util/u_math.h"
#include <stdio.h>
#include <xf86drm.h>
#include <amdgpu_drm.h>
#include <amdgpu.h>
#define CIK_TILE_MODE_COLOR_2D 14
#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f)
#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14
#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16
#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17
static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
{
unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D];
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
case CIK__PIPE_CONFIG__ADDR_SURF_P2:
return 2;
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32:
return 4;
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32:
return 8;
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
return 16;
default:
fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
assert(!"this should never occur");
return 2;
}
}
bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
struct radeon_info *info,
struct amdgpu_gpu_info *amdinfo)
{
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, vram_vis, gtt;
struct drm_amdgpu_info_hw_ip dma = {}, compute = {}, uvd = {}, vce = {}, vcn_dec = {};
uint32_t vce_version = 0, vce_feature = 0, uvd_version = 0, uvd_feature = 0;
uint32_t unused_feature;
int r, i, j;
drmDevicePtr devinfo;
/* Get PCI info. */
r = drmGetDevice2(fd, 0, &devinfo);
if (r) {
fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
return false;
}
info->pci_domain = devinfo->businfo.pci->domain;
info->pci_bus = devinfo->businfo.pci->bus;
info->pci_dev = devinfo->businfo.pci->dev;
info->pci_func = devinfo->businfo.pci->func;
drmFreeDevice(&devinfo);
/* Query hardware and driver information. */
r = amdgpu_query_gpu_info(dev, amdinfo);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
return false;
}
r = amdgpu_query_buffer_size_alignment(dev, &alignment_info);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
&vram_vis);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram_vis) failed.\n");
return false;
}
r = amdgpu_query_heap_info(dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
return false;
}
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_DMA, 0, &dma);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
return false;
}
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
return false;
}
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_UVD, 0, &uvd);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(uvd) failed.\n");
return false;
}
if (info->drm_major == 3 && info->drm_minor >= 17) {
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCN_DEC, 0, &vcn_dec);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vcn_dec) failed.\n");
return false;
}
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_ME, 0, 0,
&info->me_fw_version, &unused_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(me) failed.\n");
return false;
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_PFP, 0, 0,
&info->pfp_fw_version, &unused_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(pfp) failed.\n");
return false;
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_GFX_CE, 0, 0,
&info->ce_fw_version, &unused_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(ce) failed.\n");
return false;
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_UVD, 0, 0,
&uvd_version, &uvd_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(uvd) failed.\n");
return false;
}
r = amdgpu_query_hw_ip_info(dev, AMDGPU_HW_IP_VCE, 0, &vce);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(vce) failed.\n");
return false;
}
r = amdgpu_query_firmware_version(dev, AMDGPU_INFO_FW_VCE, 0, 0,
&vce_version, &vce_feature);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_firmware_version(vce) failed.\n");
return false;
}
/* Set chip identification. */
info->pci_id = amdinfo->asic_id; /* TODO: is this correct? */
info->vce_harvest_config = amdinfo->vce_harvest_config;
switch (info->pci_id) {
#define CHIPSET(pci_id, name, cfamily) case pci_id: info->family = CHIP_##cfamily; break;
#include "pci_ids/radeonsi_pci_ids.h"
#undef CHIPSET
default:
fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
return false;
}
if (info->family >= CHIP_VEGA10)
info->chip_class = GFX9;
else if (info->family >= CHIP_TONGA)
info->chip_class = VI;
else if (info->family >= CHIP_BONAIRE)
info->chip_class = CIK;
else if (info->family >= CHIP_TAHITI)
info->chip_class = SI;
else {
fprintf(stderr, "amdgpu: Unknown family.\n");
return false;
}
/* Set which chips have dedicated VRAM. */
info->has_dedicated_vram =
!(amdinfo->ids_flags & AMDGPU_IDS_FLAGS_FUSION);
/* Set hardware information. */
info->gart_size = gtt.heap_size;
info->vram_size = vram.heap_size;
info->vram_vis_size = vram_vis.heap_size;
/* The kernel can split large buffers in VRAM but not in GTT, so large
* allocations can fail or cause buffer movement failures in the kernel.
*/
info->max_alloc_size = MIN2(info->vram_size * 0.9, info->gart_size * 0.7);
/* convert the shader clock from KHz to MHz */
info->max_shader_clock = amdinfo->max_engine_clk / 1000;
info->max_se = amdinfo->num_shader_engines;
info->max_sh_per_se = amdinfo->num_shader_arrays_per_engine;
info->has_hw_decode =
(uvd.available_rings != 0) || (vcn_dec.available_rings != 0);
info->uvd_fw_version =
uvd.available_rings ? uvd_version : 0;
info->vce_fw_version =
vce.available_rings ? vce_version : 0;
info->has_userptr = true;
info->num_render_backends = amdinfo->rb_pipes;
info->clock_crystal_freq = amdinfo->gpu_counter_freq;
info->tcc_cache_line_size = 64; /* TC L2 line size on GCN */
if (info->chip_class == GFX9) {
info->num_tile_pipes = 1 << G_0098F8_NUM_PIPES(amdinfo->gb_addr_cfg);
info->pipe_interleave_bytes =
256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(amdinfo->gb_addr_cfg);
} else {
info->num_tile_pipes = cik_get_num_tile_pipes(amdinfo);
info->pipe_interleave_bytes =
256 << G_0098F8_PIPE_INTERLEAVE_SIZE_GFX6(amdinfo->gb_addr_cfg);
}
info->has_virtual_memory = true;
assert(util_is_power_of_two(dma.available_rings + 1));
assert(util_is_power_of_two(compute.available_rings + 1));
info->num_sdma_rings = util_bitcount(dma.available_rings);
info->num_compute_rings = util_bitcount(compute.available_rings);
/* Get the number of good compute units. */
info->num_good_compute_units = 0;
for (i = 0; i < info->max_se; i++)
for (j = 0; j < info->max_sh_per_se; j++)
info->num_good_compute_units +=
util_bitcount(amdinfo->cu_bitmap[i][j]);
memcpy(info->si_tile_mode_array, amdinfo->gb_tile_mode,
sizeof(amdinfo->gb_tile_mode));
info->enabled_rb_mask = amdinfo->enabled_rb_pipes_mask;
memcpy(info->cik_macrotile_mode_array, amdinfo->gb_macro_tile_mode,
sizeof(amdinfo->gb_macro_tile_mode));
info->pte_fragment_size = alignment_info.size_local;
info->gart_page_size = alignment_info.size_remote;
if (info->chip_class == SI)
info->gfx_ib_pad_with_type2 = TRUE;
return true;
}

View File

@@ -1,111 +0,0 @@
/*
* Copyright © 2017 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
#ifndef AC_GPU_INFO_H
#define AC_GPU_INFO_H
#include <stdint.h>
#include <stdbool.h>
#include "amd_family.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Prior to C11 the following may trigger a typedef redeclaration warning */
typedef struct amdgpu_device *amdgpu_device_handle;
struct amdgpu_gpu_info;
struct radeon_info {
/* PCI info: domain:bus:dev:func */
uint32_t pci_domain;
uint32_t pci_bus;
uint32_t pci_dev;
uint32_t pci_func;
/* Device info. */
uint32_t pci_id;
enum radeon_family family;
enum chip_class chip_class;
uint32_t pte_fragment_size;
uint32_t gart_page_size;
uint64_t gart_size;
uint64_t vram_size;
uint64_t vram_vis_size;
uint64_t max_alloc_size;
uint32_t min_alloc_size;
bool has_dedicated_vram;
bool has_virtual_memory;
bool gfx_ib_pad_with_type2;
bool has_hw_decode;
uint32_t num_sdma_rings;
uint32_t num_compute_rings;
uint32_t uvd_fw_version;
uint32_t vce_fw_version;
uint32_t me_fw_version;
uint32_t pfp_fw_version;
uint32_t ce_fw_version;
uint32_t vce_harvest_config;
uint32_t clock_crystal_freq;
uint32_t tcc_cache_line_size;
/* Kernel info. */
uint32_t drm_major; /* version */
uint32_t drm_minor;
uint32_t drm_patchlevel;
bool has_userptr;
/* Shader cores. */
uint32_t r600_max_quad_pipes; /* wave size / 16 */
uint32_t max_shader_clock;
uint32_t num_good_compute_units;
uint32_t max_se; /* shader engines */
uint32_t max_sh_per_se; /* shader arrays per shader engine */
/* Render backends (color + depth blocks). */
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
uint32_t r600_gb_backend_map; /* R600 harvest config */
bool r600_gb_backend_map_valid;
uint32_t r600_num_banks;
uint32_t num_render_backends;
uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
uint32_t pipe_interleave_bytes;
uint32_t enabled_rb_mask; /* GCN harvest config */
/* Tile modes. */
uint32_t si_tile_mode_array[32];
uint32_t cik_macrotile_mode_array[16];
};
bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
struct radeon_info *info,
struct amdgpu_gpu_info *amdinfo);
#ifdef __cplusplus
}
#endif
#endif /* AC_GPU_INFO_H */

View File

@@ -56,20 +56,11 @@ ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
ctx->voidt = LLVMVoidTypeInContext(ctx->context);
ctx->i1 = LLVMInt1TypeInContext(ctx->context);
ctx->i8 = LLVMInt8TypeInContext(ctx->context);
ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
ctx->f16 = LLVMHalfTypeInContext(ctx->context);
ctx->f32 = LLVMFloatTypeInContext(ctx->context);
ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
ctx->i32_0 = LLVMConstInt(ctx->i32, 0, false);
ctx->i32_1 = LLVMConstInt(ctx->i32, 1, false);
ctx->f32_0 = LLVMConstReal(ctx->f32, 0.0);
ctx->f32_1 = LLVMConstReal(ctx->f32, 1.0);
ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
"range", 5);
@@ -242,16 +233,42 @@ build_cube_intrinsic(struct ac_llvm_context *ctx,
LLVMValueRef in[3],
struct cube_selection_coords *out)
{
LLVMTypeRef f32 = ctx->f32;
LLVMBuilderRef builder = ctx->builder;
out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
f32, in, 3, AC_FUNC_ATTR_READNONE);
out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
f32, in, 3, AC_FUNC_ATTR_READNONE);
out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
f32, in, 3, AC_FUNC_ATTR_READNONE);
out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
f32, in, 3, AC_FUNC_ATTR_READNONE);
if (HAVE_LLVM >= 0x0309) {
LLVMTypeRef f32 = ctx->f32;
out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
f32, in, 3, AC_FUNC_ATTR_READNONE);
out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
f32, in, 3, AC_FUNC_ATTR_READNONE);
out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
f32, in, 3, AC_FUNC_ATTR_READNONE);
out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
f32, in, 3, AC_FUNC_ATTR_READNONE);
} else {
LLVMValueRef c[4] = {
in[0],
in[1],
in[2],
LLVMGetUndef(LLVMTypeOf(in[0]))
};
LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
LLVMValueRef tmp =
ac_build_intrinsic(ctx, "llvm.AMDGPU.cube",
LLVMTypeOf(vec), &vec, 1,
AC_FUNC_ATTR_READNONE);
out->stc[1] = LLVMBuildExtractElement(builder, tmp,
LLVMConstInt(ctx->i32, 0, 0), "");
out->stc[0] = LLVMBuildExtractElement(builder, tmp,
LLVMConstInt(ctx->i32, 1, 0), "");
out->ma = LLVMBuildExtractElement(builder, tmp,
LLVMConstInt(ctx->i32, 2, 0), "");
out->id = LLVMBuildExtractElement(builder, tmp,
LLVMConstInt(ctx->i32, 3, 0), "");
}
}
/**
@@ -541,7 +558,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
bool has_add_tid)
{
/* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
if (!has_add_tid) {
if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
/* Split 3 channel stores, becase LLVM doesn't support 3-channel
* intrinsics. */
if (num_channels == 3) {
@@ -642,89 +659,114 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
unsigned inst_offset,
unsigned glc,
unsigned slc,
bool can_speculate,
bool allow_smem)
bool readonly_memory)
{
LLVMValueRef offset = LLVMConstInt(ctx->i32, inst_offset, 0);
if (voffset)
offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
if (soffset)
offset = LLVMBuildAdd(ctx->builder, offset, soffset, "");
/* TODO: VI and later generations can use SMEM with GLC=1.*/
if (allow_smem && !glc && !slc) {
assert(vindex == NULL);
LLVMValueRef result[4];
for (int i = 0; i < num_channels; i++) {
if (i) {
offset = LLVMBuildAdd(ctx->builder, offset,
LLVMConstInt(ctx->i32, 4, 0), "");
}
LLVMValueRef args[2] = {rsrc, offset};
result[i] = ac_build_intrinsic(ctx, "llvm.SI.load.const.v4i32",
ctx->f32, args, 2,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_LEGACY);
}
if (num_channels == 1)
return result[0];
if (num_channels == 3)
result[num_channels++] = LLVMGetUndef(ctx->f32);
return ac_build_gather_values(ctx, result, num_channels);
}
unsigned func = CLAMP(num_channels, 1, 3) - 1;
LLVMValueRef args[] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
offset,
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0)
};
if (HAVE_LLVM >= 0x309) {
LLVMValueRef args[] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
LLVMConstInt(ctx->i32, inst_offset, 0),
LLVMConstInt(ctx->i1, glc, 0),
LLVMConstInt(ctx->i1, slc, 0)
};
LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
ctx->v4f32};
const char *type_names[] = {"f32", "v2f32", "v4f32"};
char name[256];
LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
ctx->v4f32};
const char *type_names[] = {"f32", "v2f32", "v4f32"};
char name[256];
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
type_names[func]);
if (voffset) {
args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
"");
}
return ac_build_intrinsic(ctx, name, types[func], args,
ARRAY_SIZE(args),
/* READNONE means writes can't affect it, while
* READONLY means that writes can affect it. */
can_speculate && HAVE_LLVM >= 0x0400 ?
AC_FUNC_ATTR_READNONE :
AC_FUNC_ATTR_READONLY);
if (soffset) {
args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
"");
}
snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
type_names[func]);
return ac_build_intrinsic(ctx, name, types[func], args,
ARRAY_SIZE(args),
/* READNONE means writes can't
* affect it, while READONLY means
* that writes can affect it. */
readonly_memory && HAVE_LLVM >= 0x0400 ?
AC_FUNC_ATTR_READNONE :
AC_FUNC_ATTR_READONLY);
} else {
LLVMValueRef args[] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
voffset ? voffset : vindex,
soffset,
LLVMConstInt(ctx->i32, inst_offset, 0),
LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
LLVMConstInt(ctx->i32, glc, 0),
LLVMConstInt(ctx->i32, slc, 0),
LLVMConstInt(ctx->i32, 0, 0), // TFE
};
LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
ctx->v4i32};
const char *type_names[] = {"i32", "v2i32", "v4i32"};
const char *arg_type = "i32";
char name[256];
if (voffset && vindex) {
LLVMValueRef vaddr[] = {vindex, voffset};
arg_type = "v2i32";
args[1] = ac_build_gather_values(ctx, vaddr, 2);
}
snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
type_names[func], arg_type);
return ac_build_intrinsic(ctx, name, types[func], args,
ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
}
}
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
bool can_speculate)
bool readonly_memory)
{
LLVMValueRef args [] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex,
voffset,
LLVMConstInt(ctx->i1, 0, 0), /* glc */
LLVMConstInt(ctx->i1, 0, 0), /* slc */
};
if (HAVE_LLVM >= 0x0309) {
LLVMValueRef args [] = {
LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
vindex,
voffset,
LLVMConstInt(ctx->i1, 0, 0), /* glc */
LLVMConstInt(ctx->i1, 0, 0), /* slc */
};
return ac_build_intrinsic(ctx,
"llvm.amdgcn.buffer.load.format.v4f32",
ctx->v4f32, args, ARRAY_SIZE(args),
/* READNONE means writes can't affect it, while
* READONLY means that writes can affect it. */
can_speculate && HAVE_LLVM >= 0x0400 ?
AC_FUNC_ATTR_READNONE :
AC_FUNC_ATTR_READONLY);
return ac_build_intrinsic(ctx,
"llvm.amdgcn.buffer.load.format.v4f32",
ctx->v4f32, args, ARRAY_SIZE(args),
/* READNONE means writes can't
* affect it, while READONLY means
* that writes can affect it. */
readonly_memory && HAVE_LLVM >= 0x0400 ?
AC_FUNC_ATTR_READNONE :
AC_FUNC_ATTR_READONLY);
}
LLVMValueRef args[] = {
rsrc,
voffset,
vindex,
};
return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input",
ctx->v4f32, args, 3,
AC_FUNC_ATTR_READNONE |
AC_FUNC_ATTR_LEGACY);
}
/**
@@ -1439,30 +1481,30 @@ void ac_optimize_vs_outputs(struct ac_llvm_context *ctx,
* This is done by renumbering all PARAM exports.
*/
if (removed_any) {
uint8_t old_offset[VARYING_SLOT_MAX];
uint8_t current_offset[VARYING_SLOT_MAX];
unsigned new_count = 0;
unsigned out, i;
/* Make a copy of the offsets. We need the old version while
* we are modifying some of them. */
memcpy(old_offset, vs_output_param_offset,
sizeof(old_offset));
memcpy(current_offset, vs_output_param_offset,
sizeof(current_offset));
for (i = 0; i < exports.num; i++) {
unsigned offset = exports.exp[i].offset;
/* Update vs_output_param_offset. Multiple outputs can
* have the same offset.
*/
for (out = 0; out < num_outputs; out++) {
if (old_offset[out] == offset)
vs_output_param_offset[out] = i;
}
if (current_offset[out] != offset)
continue;
/* Change the PARAM offset in the instruction. */
LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
LLVMConstInt(ctx->i32,
V_008DFC_SQ_EXP_PARAM + i, 0));
LLVMSetOperand(exports.exp[i].inst, AC_EXP_TARGET,
LLVMConstInt(ctx->i32,
V_008DFC_SQ_EXP_PARAM + new_count, 0));
vs_output_param_offset[out] = new_count;
new_count++;
break;
}
}
*num_param_exports = exports.num;
*num_param_exports = new_count;
}
}

View File

@@ -40,20 +40,11 @@ struct ac_llvm_context {
LLVMTypeRef voidt;
LLVMTypeRef i1;
LLVMTypeRef i8;
LLVMTypeRef i16;
LLVMTypeRef i32;
LLVMTypeRef i64;
LLVMTypeRef f16;
LLVMTypeRef f32;
LLVMTypeRef f64;
LLVMTypeRef v4i32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef f32_0;
LLVMValueRef f32_1;
LLVMTypeRef v16i8;
unsigned range_md_kind;
unsigned invariant_load_md_kind;
@@ -152,14 +143,13 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
unsigned inst_offset,
unsigned glc,
unsigned slc,
bool can_speculate,
bool allow_smem);
bool readonly_memory);
LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vindex,
LLVMValueRef voffset,
bool can_speculate);
bool readonly_memory);
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);

View File

@@ -105,14 +105,17 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
return "fiji";
case CHIP_STONEY:
return "stoney";
#if HAVE_LLVM == 0x0308
case CHIP_POLARIS10:
return "tonga";
case CHIP_POLARIS11:
return "tonga";
#else
case CHIP_POLARIS10:
return "polaris10";
case CHIP_POLARIS11:
case CHIP_POLARIS12:
return "polaris11";
case CHIP_VEGA10:
case CHIP_RAVEN:
return "gfx900";
#endif
default:
return "";
}
@@ -220,13 +223,3 @@ ac_dump_module(LLVMModuleRef module)
fprintf(stderr, "%s", str);
LLVMDisposeMessage(str);
}
void
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
const char *name, int value)
{
char str[16];
snprintf(str, sizeof(str), "%i", value);
LLVMAddTargetDependentFunctionAttr(F, name, str);
}

View File

@@ -66,11 +66,6 @@ void ac_dump_module(LLVMModuleRef module);
LLVMValueRef ac_llvm_get_called_value(LLVMValueRef call);
bool ac_llvm_is_function(LLVMValueRef v);
void
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
const char *name, int value);
#ifdef __cplusplus
}
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -41,12 +41,10 @@ struct ac_vs_variant_key {
uint32_t instance_rate_inputs;
uint32_t as_es:1;
uint32_t as_ls:1;
uint32_t export_prim_id:1;
};
struct ac_tes_variant_key {
uint32_t as_es:1;
uint32_t export_prim_id:1;
};
struct ac_tcs_variant_key {
@@ -130,7 +128,6 @@ struct ac_vs_output_info {
bool writes_pointsize;
bool writes_layer;
bool writes_viewport_index;
bool export_prim_id;
uint32_t export_mask;
unsigned pos_exports;
};
@@ -181,7 +178,6 @@ struct ac_shader_variant_info {
unsigned invocations;
unsigned gsvs_vertex_size;
unsigned max_gsvs_emit_size;
bool uses_prim_id;
} gs;
struct {
bool uses_prim_id;

View File

@@ -20,13 +20,13 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef AC_SHADER_INFO_H
#define AC_SHADER_INFO_H
struct nir_shader;
struct ac_nir_compiler_options;
/* a NIR pass to gather all the info needed to optimise the alloction patterns for the RADV user sgprs */
struct ac_shader_info {
bool needs_push_constants;
uint32_t desc_set_used_mask;
@@ -42,12 +42,8 @@ struct ac_shader_info {
} cs;
};
/* A NIR pass to gather all the info needed to optimise the allocation patterns
* for the RADV user sgprs
*/
void
ac_nir_shader_info_pass(struct nir_shader *nir,
const struct ac_nir_compiler_options *options,
struct ac_shader_info *info);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,220 +0,0 @@
/*
* Copyright © 2017 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sub license, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
* AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*
* The above copyright notice and this permission notice (including the
* next paragraph) shall be included in all copies or substantial portions
* of the Software.
*/
#ifndef AC_SURFACE_H
#define AC_SURFACE_H
#include <stdint.h>
#include "amd_family.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declarations. */
typedef void* ADDR_HANDLE;
struct amdgpu_gpu_info;
struct radeon_info;
#define RADEON_SURF_MAX_LEVELS 15
enum radeon_surf_mode {
RADEON_SURF_MODE_LINEAR_ALIGNED = 1,
RADEON_SURF_MODE_1D = 2,
RADEON_SURF_MODE_2D = 3,
};
/* These are defined exactly like GB_TILE_MODEn.MICRO_TILE_MODE_NEW. */
enum radeon_micro_mode {
RADEON_MICRO_MODE_DISPLAY = 0,
RADEON_MICRO_MODE_THIN = 1,
RADEON_MICRO_MODE_DEPTH = 2,
RADEON_MICRO_MODE_ROTATED = 3,
};
/* the first 16 bits are reserved for libdrm_radeon, don't use them */
#define RADEON_SURF_SCANOUT (1 << 16)
#define RADEON_SURF_ZBUFFER (1 << 17)
#define RADEON_SURF_SBUFFER (1 << 18)
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
/* bits 19 and 20 are reserved for libdrm_radeon, don't use them */
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
#define RADEON_SURF_IMPORTED (1 << 24)
#define RADEON_SURF_OPTIMIZE_FOR_SPACE (1 << 25)
struct legacy_surf_level {
uint64_t offset;
uint64_t slice_size;
uint64_t dcc_offset;
uint64_t dcc_fast_clear_size;
uint16_t nblk_x;
uint16_t nblk_y;
enum radeon_surf_mode mode;
};
struct legacy_surf_layout {
unsigned bankw:4; /* max 8 */
unsigned bankh:4; /* max 8 */
unsigned mtilea:4; /* max 8 */
unsigned tile_split:13; /* max 4K */
unsigned stencil_tile_split:13; /* max 4K */
unsigned pipe_config:5; /* max 17 */
unsigned num_banks:5; /* max 16 */
unsigned macro_tile_index:4; /* max 15 */
/* Whether the depth miptree or stencil miptree as used by the DB are
* adjusted from their TC compatible form to ensure depth/stencil
* compatibility. If either is true, the corresponding plane cannot be
* sampled from.
*/
unsigned depth_adjusted:1;
unsigned stencil_adjusted:1;
struct legacy_surf_level level[RADEON_SURF_MAX_LEVELS];
struct legacy_surf_level stencil_level[RADEON_SURF_MAX_LEVELS];
uint8_t tiling_index[RADEON_SURF_MAX_LEVELS];
uint8_t stencil_tiling_index[RADEON_SURF_MAX_LEVELS];
};
/* Same as addrlib - AddrResourceType. */
enum gfx9_resource_type {
RADEON_RESOURCE_1D = 0,
RADEON_RESOURCE_2D,
RADEON_RESOURCE_3D,
};
struct gfx9_surf_flags {
uint16_t swizzle_mode; /* tile mode */
uint16_t epitch; /* (pitch - 1) or (height - 1) */
};
struct gfx9_surf_meta_flags {
unsigned rb_aligned:1; /* optimal for RBs */
unsigned pipe_aligned:1; /* optimal for TC */
};
struct gfx9_surf_layout {
struct gfx9_surf_flags surf; /* color or depth surface */
struct gfx9_surf_flags fmask; /* not added to surf_size */
struct gfx9_surf_flags stencil; /* added to surf_size, use stencil_offset */
struct gfx9_surf_meta_flags dcc; /* metadata of color */
struct gfx9_surf_meta_flags htile; /* metadata of depth and stencil */
struct gfx9_surf_meta_flags cmask; /* metadata of fmask */
enum gfx9_resource_type resource_type; /* 1D, 2D or 3D */
uint64_t surf_offset; /* 0 unless imported with an offset */
/* The size of the 2D plane containing all mipmap levels. */
uint64_t surf_slice_size;
uint16_t surf_pitch; /* in blocks */
uint16_t surf_height;
/* Mipmap level offset within the slice in bytes. Only valid for LINEAR. */
uint32_t offset[RADEON_SURF_MAX_LEVELS];
uint16_t dcc_pitch_max; /* (mip chain pitch - 1) */
uint64_t stencil_offset; /* separate stencil */
uint64_t fmask_size;
uint64_t cmask_size;
uint32_t fmask_alignment;
uint32_t cmask_alignment;
};
struct radeon_surf {
/* Format properties. */
unsigned blk_w:4;
unsigned blk_h:4;
unsigned bpe:5;
/* Number of mipmap levels where DCC is enabled starting from level 0.
* Non-zero levels may be disabled due to alignment constraints, but not
* the first level.
*/
unsigned num_dcc_levels:4;
unsigned is_linear:1;
/* Displayable, thin, depth, rotated. AKA D,S,Z,R swizzle modes. */
unsigned micro_tile_mode:3;
uint32_t flags;
/* These are return values. Some of them can be set by the caller, but
* they will be treated as hints (e.g. bankw, bankh) and might be
* changed by the calculator.
*/
uint64_t surf_size;
uint64_t dcc_size;
uint64_t htile_size;
uint32_t htile_slice_size;
uint32_t surf_alignment;
uint32_t dcc_alignment;
uint32_t htile_alignment;
union {
/* R600-VI return values.
*
* Some of them can be set by the caller if certain parameters are
* desirable. The allocator will try to obey them.
*/
struct legacy_surf_layout legacy;
/* GFX9+ return values. */
struct gfx9_surf_layout gfx9;
} u;
};
struct ac_surf_info {
uint32_t width;
uint32_t height;
uint32_t depth;
uint8_t samples;
uint8_t levels;
uint16_t array_size;
};
struct ac_surf_config {
struct ac_surf_info info;
unsigned is_3d : 1;
unsigned is_cube : 1;
};
ADDR_HANDLE amdgpu_addr_create(const struct radeon_info *info,
const struct amdgpu_gpu_info *amdinfo);
int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
const struct ac_surf_config * config,
enum radeon_surf_mode mode,
struct radeon_surf *surf);
#ifdef __cplusplus
}
#endif
#endif /* AC_SURFACE_H */

View File

@@ -93,7 +93,6 @@ enum radeon_family {
CHIP_POLARIS11,
CHIP_POLARIS12,
CHIP_VEGA10,
CHIP_RAVEN,
CHIP_LAST,
};

View File

@@ -36,7 +36,7 @@
// Gets bits for specified mask from specified src packed instance.
#define AMD_HSA_BITS_GET(src, mask) \
((src & mask) >> mask ## _SHIFT)
((src & mask) >> mask ## _SHIFT) \
/* Every amd_*_code_t has the following properties, which are composed of
* a number of bit fields. Every bit field has a mask (AMD_CODE_PROPERTY_*),

View File

@@ -49,7 +49,6 @@ enum {
FAMILY_CZ,
FAMILY_PI,
FAMILY_AI,
FAMILY_RV,
FAMILY_LAST,
};
@@ -186,13 +185,4 @@ enum {
#define ASICREV_IS_VEGA10_P(eChipRev) \
((eChipRev) >= AI_VEGA10_P_A0 && (eChipRev) < AI_UNKNOWN)
/* RV specific rev IDs */
enum {
RAVEN_A0 = 0x01,
RAVEN_UNKNOWN = 0xFF
};
#define ASICREV_IS_RAVEN(eChipRev) \
((eChipRev) >= RAVEN_A0 && (eChipRev) < RAVEN_UNKNOWN)
#endif /* AMDGPU_ID_H */

View File

@@ -1345,8 +1345,8 @@
#define V_008F14_IMG_DATA_FORMAT_RESERVED_56 0x38
#define V_008F14_IMG_DATA_FORMAT_4_4 0x39
#define V_008F14_IMG_DATA_FORMAT_6_5_5 0x3A
#define V_008F14_IMG_DATA_FORMAT_S8_16 0x3B
#define V_008F14_IMG_DATA_FORMAT_S8_32 0x3C
#define V_008F14_IMG_DATA_S8_16 0x3B
#define V_008F14_IMG_DATA_S8_32 0x3C
#define V_008F14_IMG_DATA_FORMAT_8_AS_32 0x3D
#define V_008F14_IMG_DATA_FORMAT_8_AS_32_32 0x3E
#define V_008F14_IMG_DATA_FORMAT_32_AS_32_32_32_32 0x3F
@@ -4074,10 +4074,6 @@
#define S_028060_PUNCHOUT_MODE(x) (((unsigned)(x) & 0x03) << 0)
#define G_028060_PUNCHOUT_MODE(x) (((x) >> 0) & 0x03)
#define C_028060_PUNCHOUT_MODE 0xFFFFFFFC
#define V_028060_AUTO 0
#define V_028060_FORCE_ON 1
#define V_028060_FORCE_OFF 2
#define V_028060_RESERVED 3
#define S_028060_POPS_DRAIN_PS_ON_OVERLAP(x) (((unsigned)(x) & 0x1) << 2)
#define G_028060_POPS_DRAIN_PS_ON_OVERLAP(x) (((x) >> 2) & 0x1)
#define C_028060_POPS_DRAIN_PS_ON_OVERLAP 0xFFFFFFFB

View File

@@ -54,17 +54,6 @@
#define PKT3_WAIT_REG_MEM 0x3C
#define WAIT_REG_MEM_EQUAL 3
#define WAIT_REG_MEM_MEM_SPACE(x) (((unsigned)(x) & 0x3) << 4)
#define PKT3_COPY_DATA 0x40
#define COPY_DATA_SRC_SEL(x) ((x) & 0xf)
#define COPY_DATA_REG 0
#define COPY_DATA_MEM 1
#define COPY_DATA_PERF 4
#define COPY_DATA_IMM 5
#define COPY_DATA_TIMESTAMP 9
#define COPY_DATA_DST_SEL(x) (((unsigned)(x) & 0xf) << 8)
#define COPY_DATA_MEM_ASYNC 5
#define COPY_DATA_COUNT_SEL (1 << 16)
#define COPY_DATA_WR_CONFIRM (1 << 20)
#define PKT3_EVENT_WRITE 0x46
#define PKT3_EVENT_WRITE_EOP 0x47
#define EOP_DATA_SEL(x) ((x) << 29)

View File

@@ -170,7 +170,7 @@
*/
/* fix CP DMA before uncommenting: */
/*#define PKT3_EVENT_WRITE_EOS 0x48*/ /* not on GFX9 */
#define PKT3_RELEASE_MEM 0x49 /* GFX9+ [any ring] or GFX8 [compute ring only] */
#define PKT3_RELEASE_MEM 0x49 /* GFX9+ (any ring) or GFX8 (compute ring only) */
#define PKT3_ONE_REG_WRITE 0x57 /* not on CIK */
#define PKT3_ACQUIRE_MEM 0x58 /* new for CIK */
#define PKT3_SET_CONFIG_REG 0x68
@@ -280,7 +280,6 @@
#define S_500_DSL_SEL(x) (((unsigned)(x) & 0x3) << 20)
#define V_500_DST_ADDR 0
#define V_500_GDS 1 /* program DAS to 1 as well */
#define V_500_NOWHERE 2 /* new for GFX9 */
#define V_500_DST_ADDR_TC_L2 3 /* new for CIK */
#define S_500_ENGINE(x) ((x) & 0x1)
#define V_500_ME 0

View File

@@ -110,7 +110,7 @@ class IntTable:
[static] const typename name[] = { ... };
to filp.
"""
idxs = sorted(self.idxs) + [len(self.table)]
idxs = sorted(self.idxs) + [-1]
fragments = [
('\t/* %s */ %s' % (

View File

@@ -59,22 +59,8 @@ VULKAN_SOURCES = \
$(VULKAN_GENERATED_FILES) \
$(VULKAN_FILES)
VULKAN_LIB_DEPS = \
libvulkan_common.la \
$(top_builddir)/src/vulkan/libvulkan_util.la \
$(top_builddir)/src/vulkan/libvulkan_wsi.la \
$(top_builddir)/src/amd/common/libamd_common.la \
$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
$(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/util/libmesautil.la \
$(LLVM_LIBS) \
$(LIBELF_LIBS) \
$(PTHREAD_LIBS) \
$(AMDGPU_LIBS) \
$(LIBDRM_LIBS) \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS) \
-lm
VULKAN_LIB_DEPS =
if HAVE_PLATFORM_X11
AM_CPPFLAGS += \
@@ -84,7 +70,8 @@ AM_CPPFLAGS += \
VULKAN_SOURCES += $(VULKAN_WSI_X11_FILES)
VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS)
# FIXME: Use pkg-config for X11-xcb ldflags.
VULKAN_LIB_DEPS += $(XCB_DRI3_LIBS) -lX11-xcb
endif
@@ -102,6 +89,23 @@ endif
noinst_LTLIBRARIES = libvulkan_common.la
libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)
VULKAN_LIB_DEPS += \
libvulkan_common.la \
$(top_builddir)/src/vulkan/libvulkan_util.la \
$(top_builddir)/src/vulkan/libvulkan_wsi.la \
$(top_builddir)/src/amd/common/libamd_common.la \
$(top_builddir)/src/amd/addrlib/libamdgpu_addrlib.la \
$(top_builddir)/src/compiler/nir/libnir.la \
$(top_builddir)/src/util/libmesautil.la \
$(LLVM_LIBS) \
$(LIBELF_LIBS) \
$(PTHREAD_LIBS) \
$(AMDGPU_LIBS) \
$(LIBDRM_LIBS) \
$(PTHREAD_LIBS) \
$(DLOPEN_LIBS) \
-lm
nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)

View File

@@ -51,7 +51,6 @@ VULKAN_FILES := \
radv_meta_fast_clear.c \
radv_meta_resolve.c \
radv_meta_resolve_cs.c \
radv_meta_resolve_fs.c \
radv_pass.c \
radv_pipeline.c \
radv_pipeline_cache.c \

File diff suppressed because it is too large Load Diff

View File

@@ -37,7 +37,4 @@ enum {
RADV_DEBUG_NO_IBS = 0x200,
};
enum {
RADV_PERFTEST_BATCHCHAIN = 0x1,
};
#endif

View File

@@ -33,7 +33,7 @@
#include "radv_cs.h"
#include "util/disk_cache.h"
#include "util/strtod.h"
#include "vk_util.h"
#include "util/vk_util.h"
#include <xf86drm.h>
#include <amdgpu.h>
#include <amdgpu_drm.h>
@@ -42,7 +42,6 @@
#include "ac_llvm_util.h"
#include "vk_format.h"
#include "sid.h"
#include "gfx9d.h"
#include "util/debug.h"
static int
@@ -62,15 +61,6 @@ radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
return 0;
}
static void
radv_get_device_uuid(drmDevicePtr device, void *uuid) {
memset(uuid, 0, VK_UUID_SIZE);
memcpy((char*)uuid + 0, &device->businfo.pci->domain, 2);
memcpy((char*)uuid + 2, &device->businfo.pci->bus, 1);
memcpy((char*)uuid + 3, &device->businfo.pci->dev, 1);
memcpy((char*)uuid + 4, &device->businfo.pci->func, 1);
}
static const VkExtensionProperties instance_extensions[] = {
{
.extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
@@ -98,10 +88,6 @@ static const VkExtensionProperties instance_extensions[] = {
.extensionName = VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
.specVersion = 1,
},
{
.extensionName = VK_KHX_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
.specVersion = 1,
},
};
static const VkExtensionProperties common_device_extensions[] = {
@@ -141,14 +127,6 @@ static const VkExtensionProperties common_device_extensions[] = {
.extensionName = VK_NV_DEDICATED_ALLOCATION_EXTENSION_NAME,
.specVersion = 1,
},
{
.extensionName = VK_KHX_EXTERNAL_MEMORY_EXTENSION_NAME,
.specVersion = 1,
},
{
.extensionName = VK_KHX_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
.specVersion = 1,
},
};
static VkResult
@@ -209,40 +187,11 @@ is_extension_enabled(const VkExtensionProperties *extensions,
return false;
}
static const char *
get_chip_name(enum radeon_family family)
{
switch (family) {
case CHIP_TAHITI: return "AMD RADV TAHITI";
case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
case CHIP_VERDE: return "AMD RADV CAPE VERDE";
case CHIP_OLAND: return "AMD RADV OLAND";
case CHIP_HAINAN: return "AMD RADV HAINAN";
case CHIP_BONAIRE: return "AMD RADV BONAIRE";
case CHIP_KAVERI: return "AMD RADV KAVERI";
case CHIP_KABINI: return "AMD RADV KABINI";
case CHIP_HAWAII: return "AMD RADV HAWAII";
case CHIP_MULLINS: return "AMD RADV MULLINS";
case CHIP_TONGA: return "AMD RADV TONGA";
case CHIP_ICELAND: return "AMD RADV ICELAND";
case CHIP_CARRIZO: return "AMD RADV CARRIZO";
case CHIP_FIJI: return "AMD RADV FIJI";
case CHIP_POLARIS10: return "AMD RADV POLARIS10";
case CHIP_POLARIS11: return "AMD RADV POLARIS11";
case CHIP_POLARIS12: return "AMD RADV POLARIS12";
case CHIP_STONEY: return "AMD RADV STONEY";
case CHIP_VEGA10: return "AMD RADV VEGA";
case CHIP_RAVEN: return "AMD RADV RAVEN";
default: return "AMD RADV unknown";
}
}
static VkResult
radv_physical_device_init(struct radv_physical_device *device,
struct radv_instance *instance,
drmDevicePtr drm_device)
const char *path)
{
const char *path = drm_device->nodes[DRM_NODE_RENDER];
VkResult result;
drmVersionPtr version;
int fd;
@@ -270,8 +219,7 @@ radv_physical_device_init(struct radv_physical_device *device,
assert(strlen(path) < ARRAY_SIZE(device->path));
strncpy(device->path, path, ARRAY_SIZE(device->path));
device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
instance->perftest_flags);
device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags);
if (!device->ws) {
result = VK_ERROR_INCOMPATIBLE_DRIVER;
goto fail;
@@ -301,15 +249,7 @@ radv_physical_device_init(struct radv_physical_device *device,
goto fail;
fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
device->name = get_chip_name(device->rad_info.family);
radv_get_device_uuid(drm_device, device->device_uuid);
if (device->rad_info.family == CHIP_STONEY ||
device->rad_info.chip_class >= GFX9) {
device->has_rbplus = true;
device->rbplus_allowed = device->rad_info.family == CHIP_STONEY;
}
device->name = device->rad_info.name;
return VK_SUCCESS;
@@ -327,6 +267,7 @@ radv_physical_device_finish(struct radv_physical_device *device)
close(device->local_fd);
}
static void *
default_alloc_func(void *pUserData, size_t size, size_t align,
VkSystemAllocationScope allocationScope)
@@ -368,11 +309,6 @@ static const struct debug_control radv_debug_options[] = {
{NULL, 0}
};
static const struct debug_control radv_perftest_options[] = {
{"batchchain", RADV_PERFTEST_BATCHCHAIN},
{NULL, 0}
};
VkResult radv_CreateInstance(
const VkInstanceCreateInfo* pCreateInfo,
const VkAllocationCallbacks* pAllocator,
@@ -430,9 +366,6 @@ VkResult radv_CreateInstance(
instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
radv_debug_options);
instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
radv_perftest_options);
*pInstance = radv_instance_to_handle(instance);
return VK_SUCCESS;
@@ -480,7 +413,7 @@ radv_enumerate_devices(struct radv_instance *instance)
result = radv_physical_device_init(instance->physicalDevices +
instance->physicalDeviceCount,
instance,
devices[i]);
devices[i]->nodes[DRM_NODE_RENDER]);
if (result == VK_SUCCESS)
++instance->physicalDeviceCount;
else if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
@@ -523,8 +456,8 @@ void radv_GetPhysicalDeviceFeatures(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceFeatures* pFeatures)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
bool is_gfx9 = pdevice->rad_info.chip_class >= GFX9;
// RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
memset(pFeatures, 0, sizeof(*pFeatures));
*pFeatures = (VkPhysicalDeviceFeatures) {
@@ -532,8 +465,8 @@ void radv_GetPhysicalDeviceFeatures(
.fullDrawIndexUint32 = true,
.imageCubeArray = true,
.independentBlend = true,
.geometryShader = !is_gfx9,
.tessellationShader = !is_gfx9,
.geometryShader = true,
.tessellationShader = true,
.sampleRateShading = false,
.dualSrcBlend = true,
.logicOp = true,
@@ -583,6 +516,28 @@ void radv_GetPhysicalDeviceFeatures2KHR(
return radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
}
static uint32_t radv_get_driver_version()
{
const char *minor_string = strchr(VERSION, '.');
const char *patch_string = minor_string ? strchr(minor_string + 1, ','): NULL;
int major = atoi(VERSION);
int minor = minor_string ? atoi(minor_string + 1) : 0;
int patch = patch_string ? atoi(patch_string + 1) : 0;
if (strstr(VERSION, "devel")) {
if (patch == 0) {
patch = 99;
if (minor == 0) {
minor = 99;
--major;
} else
--minor;
} else
--patch;
}
uint32_t version = VK_MAKE_VERSION(major, minor, patch);
return version;
}
void radv_GetPhysicalDeviceProperties(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties* pProperties)
@@ -718,10 +673,10 @@ void radv_GetPhysicalDeviceProperties(
*pProperties = (VkPhysicalDeviceProperties) {
.apiVersion = VK_MAKE_VERSION(1, 0, 42),
.driverVersion = vk_get_driver_version(),
.driverVersion = radv_get_driver_version(),
.vendorID = 0x1002,
.deviceID = pdevice->rad_info.pci_id,
.deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
.deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU,
.limits = limits,
.sparseProperties = {0},
};
@@ -734,7 +689,6 @@ void radv_GetPhysicalDeviceProperties2KHR(
VkPhysicalDevice physicalDevice,
VkPhysicalDeviceProperties2KHR *pProperties)
{
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
vk_foreach_struct(ext, pProperties->pNext) {
@@ -745,13 +699,6 @@ void radv_GetPhysicalDeviceProperties2KHR(
properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
break;
}
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHX: {
VkPhysicalDeviceIDPropertiesKHX *properties = (VkPhysicalDeviceIDPropertiesKHX*)ext;
radv_device_get_cache_uuid(0, properties->driverUUID);
memcpy(properties->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
properties->deviceLUIDValid = false;
break;
}
default:
break;
}
@@ -765,7 +712,7 @@ static void radv_get_physical_device_queue_family_properties(
{
int num_queue_families = 1;
int idx;
if (pdevice->rad_info.num_compute_rings > 0 &&
if (pdevice->rad_info.compute_rings > 0 &&
pdevice->rad_info.chip_class >= CIK &&
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
num_queue_families++;
@@ -792,7 +739,7 @@ static void radv_get_physical_device_queue_family_properties(
idx++;
}
if (pdevice->rad_info.num_compute_rings > 0 &&
if (pdevice->rad_info.compute_rings > 0 &&
pdevice->rad_info.chip_class >= CIK &&
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
if (*pCount > idx) {
@@ -800,7 +747,7 @@ static void radv_get_physical_device_queue_family_properties(
.queueFlags = VK_QUEUE_COMPUTE_BIT |
VK_QUEUE_TRANSFER_BIT |
VK_QUEUE_SPARSE_BINDING_BIT,
.queueCount = pdevice->rad_info.num_compute_rings,
.queueCount = pdevice->rad_info.compute_rings,
.timestampValidBits = 64,
.minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
};
@@ -884,11 +831,11 @@ void radv_GetPhysicalDeviceMemoryProperties(
pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT;
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) {
.size = physical_device->rad_info.vram_size -
physical_device->rad_info.vram_vis_size,
physical_device->rad_info.visible_vram_size,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
};
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) {
.size = physical_device->rad_info.vram_vis_size,
.size = physical_device->rad_info.visible_vram_size,
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
};
pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) {
@@ -970,9 +917,6 @@ radv_device_init_gs_info(struct radv_device *device)
case CHIP_FIJI:
case CHIP_POLARIS10:
case CHIP_POLARIS11:
case CHIP_POLARIS12:
case CHIP_VEGA10:
case CHIP_RAVEN:
device->gs_table_depth = 32;
return;
default:
@@ -1096,7 +1040,6 @@ VkResult radv_CreateDevice(
case RADV_QUEUE_COMPUTE:
si_cs_emit_cache_flush(device->flush_cs[family],
device->physical_device->rad_info.chip_class,
NULL, 0,
family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
RADV_CMD_FLAG_INV_ICACHE |
RADV_CMD_FLAG_INV_SMEM_L1 |
@@ -1112,7 +1055,6 @@ VkResult radv_CreateDevice(
case RADV_QUEUE_COMPUTE:
si_cs_emit_cache_flush(device->flush_shader_cs[family],
device->physical_device->rad_info.chip_class,
NULL, 0,
family == RADV_QUEUE_COMPUTE && device->physical_device->rad_info.chip_class >= CIK,
family == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH) |
RADV_CMD_FLAG_INV_ICACHE |
@@ -1475,11 +1417,12 @@ radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buff
max_offchip_buffers = MIN2(max_offchip_buffers, 126);
break;
case CIK:
case VI:
case GFX9:
default:
max_offchip_buffers = MIN2(max_offchip_buffers, 508);
break;
case VI:
default:
max_offchip_buffers = MIN2(max_offchip_buffers, 512);
break;
}
*max_offchip_buffers_p = max_offchip_buffers;
@@ -1715,10 +1658,6 @@ radv_get_preamble_cs(struct radv_queue *queue,
S_030938_SIZE(tess_factor_ring_size / 4));
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
tf_va >> 8);
if (queue->device->physical_device->rad_info.chip_class >= GFX9) {
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
tf_va >> 40);
}
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
} else {
radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
@@ -1762,7 +1701,6 @@ radv_get_preamble_cs(struct radv_queue *queue,
if (!i) {
si_cs_emit_cache_flush(cs,
queue->device->physical_device->rad_info.chip_class,
NULL, 0,
queue->queue_family_index == RING_COMPUTE &&
queue->device->physical_device->rad_info.chip_class >= CIK,
RADV_CMD_FLAG_INV_ICACHE |
@@ -2076,7 +2014,7 @@ VkResult radv_AllocateMemory(
VkResult result;
enum radeon_bo_domain domain;
uint32_t flags = 0;
const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info = NULL;
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
if (pAllocateInfo->allocationSize == 0) {
@@ -2085,10 +2023,15 @@ VkResult radv_AllocateMemory(
return VK_SUCCESS;
}
const VkImportMemoryFdInfoKHX *import_info =
vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHX);
const VkDedicatedAllocationMemoryAllocateInfoNV *dedicate_info =
vk_find_struct_const(pAllocateInfo->pNext, DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV);
vk_foreach_struct(ext, pAllocateInfo->pNext) {
switch (ext->sType) {
case VK_STRUCTURE_TYPE_DEDICATED_ALLOCATION_MEMORY_ALLOCATE_INFO_NV:
dedicate_info = (const VkDedicatedAllocationMemoryAllocateInfoNV *)ext;
break;
default:
break;
}
}
mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
@@ -2103,18 +2046,6 @@ VkResult radv_AllocateMemory(
mem->buffer = NULL;
}
if (import_info) {
assert(import_info->handleType ==
VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
NULL, NULL);
if (!mem->bo) {
result = VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
goto fail;
} else
goto out_success;
}
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE ||
pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED)
@@ -2138,7 +2069,7 @@ VkResult radv_AllocateMemory(
goto fail;
}
mem->type_index = pAllocateInfo->memoryTypeIndex;
out_success:
*pMem = radv_device_memory_to_handle(mem);
return VK_SUCCESS;
@@ -2674,9 +2605,9 @@ static inline unsigned
si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
{
if (stencil)
return image->surface.u.legacy.stencil_tiling_index[level];
return image->surface.stencil_tiling_index[level];
else
return image->surface.u.legacy.tiling_index[level];
return image->surface.tiling_index[level];
}
static uint32_t radv_surface_layer_count(struct radv_image_view *iview)
@@ -2692,68 +2623,24 @@ radv_initialise_color_surface(struct radv_device *device,
const struct vk_format_description *desc;
unsigned ntype, format, swap, endian;
unsigned blend_clamp = 0, blend_bypass = 0;
unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
uint64_t va;
const struct radeon_surf *surf = &iview->image->surface;
const struct radeon_surf_level *level_info = &surf->level[iview->base_mip];
desc = vk_format_description(iview->vk_format);
memset(cb, 0, sizeof(*cb));
/* Intensity is implemented as Red, so treat it that way. */
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
if (device->physical_device->rad_info.chip_class >= GFX9) {
struct gfx9_surf_meta_flags meta;
if (iview->image->dcc_offset)
meta = iview->image->surface.u.gfx9.dcc;
else
meta = iview->image->surface.u.gfx9.cmask;
cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
S_028C74_FMASK_SW_MODE(iview->image->surface.u.gfx9.fmask.swizzle_mode) |
S_028C74_RB_ALIGNED(meta.rb_aligned) |
S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
va += iview->image->surface.u.gfx9.surf_offset >> 8;
} else {
const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
va += level_info->offset;
pitch_tile_max = level_info->nblk_x / 8 - 1;
slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
if (iview->image->fmask.size) {
if (device->physical_device->rad_info.chip_class >= CIK)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
} else {
/* This must be set for fast clear to work without FMASK. */
if (device->physical_device->rad_info.chip_class >= CIK)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
}
}
va += level_info->offset;
cb->cb_color_base = va >> 8;
/* CMASK variables */
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->cmask.offset;
cb->cb_color_cmask = va >> 8;
cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max;
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
va += iview->image->dcc_offset;
@@ -2763,6 +2650,18 @@ radv_initialise_color_surface(struct radv_device *device,
cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
S_028C6C_SLICE_MAX(iview->base_layer + max_slice - 1);
cb->micro_tile_mode = iview->image->surface.micro_tile_mode;
pitch_tile_max = level_info->nblk_x / 8 - 1;
slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false);
cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
/* Intensity is implemented as Red, so treat it that way. */
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) |
S_028C74_TILE_MODE_INDEX(tile_mode_index);
if (iview->image->info.samples > 1) {
unsigned log_samples = util_logbase2(iview->image->info.samples);
@@ -2772,9 +2671,18 @@ radv_initialise_color_surface(struct radv_device *device,
if (iview->image->fmask.size) {
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset;
if (device->physical_device->rad_info.chip_class >= CIK)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index);
cb->cb_color_fmask = va >> 8;
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max);
} else {
/* This must be set for fast clear to work without FMASK. */
if (device->physical_device->rad_info.chip_class >= CIK)
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
cb->cb_color_fmask = cb->cb_color_base;
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
}
ntype = radv_translate_color_numformat(iview->vk_format,
@@ -2827,7 +2735,7 @@ radv_initialise_color_surface(struct radv_device *device,
!(device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
if (iview->image->surface.dcc_size && iview->base_mip < surf->num_dcc_levels)
if (iview->image->surface.dcc_size && level_info->dcc_enabled)
cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
if (device->physical_device->rad_info.chip_class >= VI) {
@@ -2846,24 +2754,9 @@ radv_initialise_color_surface(struct radv_device *device,
/* This must be set for fast clear to work without FMASK. */
if (!iview->image->fmask.size &&
device->physical_device->rad_info.chip_class == SI) {
unsigned bankh = util_logbase2(iview->image->surface.u.legacy.bankh);
unsigned bankh = util_logbase2(iview->image->surface.bankh);
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
}
if (device->physical_device->rad_info.chip_class >= GFX9) {
uint32_t max_slice = radv_surface_layer_count(iview);
unsigned mip0_depth = iview->base_layer + max_slice - 1;
cb->cb_color_view |= S_028C6C_MIP_LEVEL(iview->base_mip);
cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
S_028C74_RESOURCE_TYPE(iview->image->surface.u.gfx9.resource_type);
cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(iview->image->info.width - 1) |
S_028C68_MIP0_HEIGHT(iview->image->info.height - 1) |
S_028C68_MAX_MIP(iview->image->info.levels);
cb->gfx9_epitch = S_0287A0_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
}
}
static void
@@ -2872,8 +2765,9 @@ radv_initialise_ds_surface(struct radv_device *device,
struct radv_image_view *iview)
{
unsigned level = iview->base_mip;
unsigned format, stencil_format;
unsigned format;
uint64_t va, s_offs, z_offs;
const struct radeon_surf_level *level_info = &iview->image->surface.level[level];
bool stencil_only = false;
memset(ds, 0, sizeof(*ds));
switch (iview->vk_format) {
@@ -2895,121 +2789,98 @@ radv_initialise_ds_surface(struct radv_device *device,
break;
case VK_FORMAT_S8_UINT:
stencil_only = true;
level_info = &iview->image->surface.stencil_level[level];
break;
default:
break;
}
format = radv_translate_dbformat(iview->vk_format);
stencil_format = iview->image->surface.flags & RADEON_SURF_SBUFFER ?
V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
s_offs = z_offs = va;
z_offs += iview->image->surface.level[level].offset;
s_offs += iview->image->surface.stencil_level[level].offset;
uint32_t max_slice = radv_surface_layer_count(iview);
ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
S_028008_SLICE_MAX(iview->base_layer + max_slice - 1);
ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
ds->db_htile_data_base = 0;
ds->db_htile_surface = 0;
if (iview->image->info.samples > 1)
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset;
s_offs = z_offs = va;
if (iview->image->surface.flags & RADEON_SURF_SBUFFER)
ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8);
else
ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID);
if (device->physical_device->rad_info.chip_class >= GFX9) {
assert(iview->image->surface.u.gfx9.surf_offset == 0);
s_offs += iview->image->surface.u.gfx9.stencil_offset;
ds->db_z_info = S_028038_FORMAT(format) |
S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
S_028038_SW_MODE(iview->image->surface.u.gfx9.surf.swizzle_mode) |
S_028038_MAXMIP(iview->image->info.levels - 1);
ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
S_02803C_SW_MODE(iview->image->surface.u.gfx9.stencil.swizzle_mode);
ds->db_z_info2 = S_028068_EPITCH(iview->image->surface.u.gfx9.surf.epitch);
ds->db_stencil_info2 = S_02806C_EPITCH(iview->image->surface.u.gfx9.stencil.epitch);
ds->db_depth_view |= S_028008_MIPID(level);
ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
S_02801C_Y_MAX(iview->image->info.height - 1);
/* Only use HTILE for the first level. */
if (iview->image->surface.htile_size && !level) {
ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
/* Use all of the htile_buffer for depth if there's no stencil. */
ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
iview->image->htile_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
S_028ABC_PIPE_ALIGNED(iview->image->surface.u.gfx9.htile.pipe_aligned) |
S_028ABC_RB_ALIGNED(iview->image->surface.u.gfx9.htile.rb_aligned);
}
} else {
const struct legacy_surf_level *level_info = &iview->image->surface.u.legacy.level[level];
if (device->physical_device->rad_info.chip_class >= CIK) {
struct radeon_info *info = &device->physical_device->rad_info;
unsigned tiling_index = iview->image->surface.tiling_index[level];
unsigned stencil_index = iview->image->surface.stencil_tiling_index[level];
unsigned macro_index = iview->image->surface.macro_tile_index;
unsigned tile_mode = info->si_tile_mode_array[tiling_index];
unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
if (stencil_only)
level_info = &iview->image->surface.u.legacy.stencil_level[level];
tile_mode = stencil_tile_mode;
z_offs += iview->image->surface.u.legacy.level[level].offset;
s_offs += iview->image->surface.u.legacy.stencil_level[level].offset;
ds->db_depth_info |=
S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
} else {
unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
tile_mode_index = si_tile_mode_index(iview->image, level, true);
ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
}
ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1);
ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
ds->db_stencil_info = S_028044_FORMAT(stencil_format);
if (iview->image->surface.htile_size && !level) {
ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) |
S_028040_ALLOW_EXPCLEAR(1);
if (iview->image->info.samples > 1)
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
if (iview->image->surface.flags & RADEON_SURF_SBUFFER) {
/* Workaround: For a not yet understood reason, the
* combination of MSAA, fast stencil clear and stencil
* decompress messes with subsequent stencil buffer
* uses. Problem was reproduced on Verde, Bonaire,
* Tonga, and Carrizo.
*
* Disabling EXPCLEAR works around the problem.
*
* Check piglit's arb_texture_multisample-stencil-clear
* test if you want to try changing this.
*/
if (iview->image->info.samples <= 1)
ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1);
} else
/* Use all of the htile_buffer for depth if there's no stencil. */
ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
if (device->physical_device->rad_info.chip_class >= CIK) {
struct radeon_info *info = &device->physical_device->rad_info;
unsigned tiling_index = iview->image->surface.u.legacy.tiling_index[level];
unsigned stencil_index = iview->image->surface.u.legacy.stencil_tiling_index[level];
unsigned macro_index = iview->image->surface.u.legacy.macro_tile_index;
unsigned tile_mode = info->si_tile_mode_array[tiling_index];
unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
if (stencil_only)
tile_mode = stencil_tile_mode;
ds->db_depth_info |=
S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
} else {
unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false);
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
tile_mode_index = si_tile_mode_index(iview->image, level, true);
ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
}
ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
if (iview->image->surface.htile_size && !level) {
ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
if (!(iview->image->surface.flags & RADEON_SURF_SBUFFER))
/* Use all of the htile_buffer for depth if there's no stencil. */
ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
iview->image->htile_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
}
va = device->ws->buffer_get_va(iview->bo) + iview->image->offset +
iview->image->htile_offset;
ds->db_htile_data_base = va >> 8;
ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
} else {
ds->db_htile_data_base = 0;
ds->db_htile_surface = 0;
}
ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
}
VkResult radv_CreateFramebuffer(
@@ -3243,6 +3114,7 @@ void radv_DestroySampler(
vk_free2(&device->alloc, pAllocator, sampler);
}
/* vk_icd.h does not declare this function, so we declare it here to
* suppress Wmissing-prototypes.
*/
@@ -3286,34 +3158,3 @@ vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
*pSupportedVersion = MIN2(*pSupportedVersion, 3u);
return VK_SUCCESS;
}
VkResult radv_GetMemoryFdKHX(VkDevice _device,
VkDeviceMemory _memory,
VkExternalMemoryHandleTypeFlagsKHX handleType,
int *pFD)
{
RADV_FROM_HANDLE(radv_device, device, _device);
RADV_FROM_HANDLE(radv_device_memory, memory, _memory);
/* We support only one handle type. */
assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX);
bool ret = radv_get_memory_fd(device, memory, pFD);
if (ret == false)
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
return VK_SUCCESS;
}
VkResult radv_GetMemoryFdPropertiesKHX(VkDevice _device,
VkExternalMemoryHandleTypeFlagBitsKHX handleType,
int fd,
VkMemoryFdPropertiesKHX *pMemoryFdProperties)
{
/* The valid usage section for this function says:
*
* "handleType must not be one of the handle types defined as opaque."
*
* Since we only handle opaque handles for now, there are no FD properties.
*/
return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHX;
}

View File

@@ -42,9 +42,6 @@ supported_extensions = [
'VK_KHR_wayland_surface',
'VK_KHR_xcb_surface',
'VK_KHR_xlib_surface',
'VK_KHX_external_memory_capabilities',
'VK_KHX_external_memory',
'VK_KHX_external_memory_fd',
]
# We generate a static hash table for entry point lookup

View File

@@ -28,8 +28,6 @@
#include "sid.h"
#include "r600d_common.h"
#include "vk_util.h"
#include "util/u_half.h"
#include "util/format_srgb.h"
#include "util/format_r11g11b10f.h"
@@ -599,13 +597,13 @@ radv_physical_device_get_format_properties(struct radv_physical_device *physical
tiled |= VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BLEND_BIT;
}
}
if (tiled && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
tiled |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
}
}
if (linear && util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
if (util_is_power_of_two(vk_format_get_blocksize(format)) && !scaled) {
linear |= VK_FORMAT_FEATURE_TRANSFER_SRC_BIT_KHR |
VK_FORMAT_FEATURE_TRANSFER_DST_BIT_KHR;
}
@@ -1008,11 +1006,16 @@ void radv_GetPhysicalDeviceFormatProperties2KHR(
&pFormatProperties->formatProperties);
}
static VkResult radv_get_image_format_properties(struct radv_physical_device *physical_device,
const VkPhysicalDeviceImageFormatInfo2KHR *info,
VkImageFormatProperties *pImageFormatProperties)
VkResult radv_GetPhysicalDeviceImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
VkImageType type,
VkImageTiling tiling,
VkImageUsageFlags usage,
VkImageCreateFlags createFlags,
VkImageFormatProperties* pImageFormatProperties)
{
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
VkFormatProperties format_props;
VkFormatFeatureFlags format_feature_flags;
VkExtent3D maxExtent;
@@ -1020,11 +1023,11 @@ static VkResult radv_get_image_format_properties(struct radv_physical_device *ph
uint32_t maxArraySize;
VkSampleCountFlags sampleCounts = VK_SAMPLE_COUNT_1_BIT;
radv_physical_device_get_format_properties(physical_device, info->format,
radv_physical_device_get_format_properties(physical_device, format,
&format_props);
if (info->tiling == VK_IMAGE_TILING_LINEAR) {
if (tiling == VK_IMAGE_TILING_LINEAR) {
format_feature_flags = format_props.linearTilingFeatures;
} else if (info->tiling == VK_IMAGE_TILING_OPTIMAL) {
} else if (tiling == VK_IMAGE_TILING_OPTIMAL) {
format_feature_flags = format_props.optimalTilingFeatures;
} else {
unreachable("bad VkImageTiling");
@@ -1033,7 +1036,7 @@ static VkResult radv_get_image_format_properties(struct radv_physical_device *ph
if (format_feature_flags == 0)
goto unsupported;
switch (info->type) {
switch (type) {
default:
unreachable("bad vkimage type\n");
case VK_IMAGE_TYPE_1D:
@@ -1059,34 +1062,34 @@ static VkResult radv_get_image_format_properties(struct radv_physical_device *ph
break;
}
if (info->tiling == VK_IMAGE_TILING_OPTIMAL &&
info->type == VK_IMAGE_TYPE_2D &&
if (tiling == VK_IMAGE_TILING_OPTIMAL &&
type == VK_IMAGE_TYPE_2D &&
(format_feature_flags & (VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT |
VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
!(info->flags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
!(info->usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
!(createFlags & VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) &&
!(usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
sampleCounts |= VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
}
if (info->usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
if (usage & VK_IMAGE_USAGE_SAMPLED_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
goto unsupported;
}
}
if (info->usage & VK_IMAGE_USAGE_STORAGE_BIT) {
if (usage & VK_IMAGE_USAGE_STORAGE_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
goto unsupported;
}
}
if (info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
if (usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
goto unsupported;
}
}
if (info->usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
if (usage & VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) {
if (!(format_feature_flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
goto unsupported;
}
@@ -1117,132 +1120,18 @@ unsupported:
return VK_ERROR_FORMAT_NOT_SUPPORTED;
}
VkResult radv_GetPhysicalDeviceImageFormatProperties(
VkPhysicalDevice physicalDevice,
VkFormat format,
VkImageType type,
VkImageTiling tiling,
VkImageUsageFlags usage,
VkImageCreateFlags createFlags,
VkImageFormatProperties* pImageFormatProperties)
{
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
const VkPhysicalDeviceImageFormatInfo2KHR info = {
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
.pNext = NULL,
.format = format,
.type = type,
.tiling = tiling,
.usage = usage,
.flags = createFlags,
};
return radv_get_image_format_properties(physical_device, &info,
pImageFormatProperties);
}
static void
get_external_image_format_properties(const VkPhysicalDeviceImageFormatInfo2KHR *pImageFormatInfo,
VkExternalMemoryPropertiesKHX *external_properties)
{
VkExternalMemoryFeatureFlagBitsKHX flags = 0;
VkExternalMemoryHandleTypeFlagsKHX export_flags = 0;
VkExternalMemoryHandleTypeFlagsKHX compat_flags = 0;
switch (pImageFormatInfo->type) {
case VK_IMAGE_TYPE_2D:
flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_KHX|VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHX|VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHX;
compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX;
break;
default:
break;
}
*external_properties = (VkExternalMemoryPropertiesKHX) {
.externalMemoryFeatures = flags,
.exportFromImportedHandleTypes = export_flags,
.compatibleHandleTypes = compat_flags,
};
}
VkResult radv_GetPhysicalDeviceImageFormatProperties2KHR(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceImageFormatInfo2KHR *base_info,
VkImageFormatProperties2KHR *base_props)
const VkPhysicalDeviceImageFormatInfo2KHR* pImageFormatInfo,
VkImageFormatProperties2KHR *pImageFormatProperties)
{
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
const VkPhysicalDeviceExternalImageFormatInfoKHX *external_info = NULL;
VkExternalImageFormatPropertiesKHX *external_props = NULL;
VkResult result;
result = radv_get_image_format_properties(physical_device, base_info,
&base_props->imageFormatProperties);
if (result != VK_SUCCESS)
return result;
/* Extract input structs */
vk_foreach_struct_const(s, base_info->pNext) {
switch (s->sType) {
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_IMAGE_FORMAT_INFO_KHX:
external_info = (const void *) s;
break;
default:
break;
}
}
/* Extract output structs */
vk_foreach_struct(s, base_props->pNext) {
switch (s->sType) {
case VK_STRUCTURE_TYPE_EXTERNAL_IMAGE_FORMAT_PROPERTIES_KHX:
external_props = (void *) s;
break;
default:
break;
}
}
/* From the Vulkan 1.0.42 spec:
*
* If handleType is 0, vkGetPhysicalDeviceImageFormatProperties2KHR will
* behave as if VkPhysicalDeviceExternalImageFormatInfoKHX was not
* present and VkExternalImageFormatPropertiesKHX will be ignored.
*/
if (external_info && external_info->handleType != 0) {
switch (external_info->handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX:
get_external_image_format_properties(base_info, &external_props->externalMemoryProperties);
break;
default:
/* From the Vulkan 1.0.42 spec:
*
* If handleType is not compatible with the [parameters] specified
* in VkPhysicalDeviceImageFormatInfo2KHR, then
* vkGetPhysicalDeviceImageFormatProperties2KHR returns
* VK_ERROR_FORMAT_NOT_SUPPORTED.
*/
result = vk_errorf(VK_ERROR_FORMAT_NOT_SUPPORTED,
"unsupported VkExternalMemoryTypeFlagBitsKHX 0x%x",
external_info->handleType);
goto fail;
}
}
return VK_SUCCESS;
fail:
if (result == VK_ERROR_FORMAT_NOT_SUPPORTED) {
/* From the Vulkan 1.0.42 spec:
*
* If the combination of parameters to
* vkGetPhysicalDeviceImageFormatProperties2KHR is not supported by
* the implementation for use in vkCreateImage, then all members of
* imageFormatProperties will be filled with zero.
*/
base_props->imageFormatProperties = (VkImageFormatProperties) {0};
}
return result;
return radv_GetPhysicalDeviceImageFormatProperties(physicalDevice,
pImageFormatInfo->format,
pImageFormatInfo->type,
pImageFormatInfo->tiling,
pImageFormatInfo->usage,
pImageFormatInfo->flags,
&pImageFormatProperties->imageFormatProperties);
}
void radv_GetPhysicalDeviceSparseImageFormatProperties(
@@ -1268,28 +1157,3 @@ void radv_GetPhysicalDeviceSparseImageFormatProperties2KHR(
/* Sparse images are not yet supported. */
*pPropertyCount = 0;
}
void radv_GetPhysicalDeviceExternalBufferPropertiesKHX(
VkPhysicalDevice physicalDevice,
const VkPhysicalDeviceExternalBufferInfoKHX *pExternalBufferInfo,
VkExternalBufferPropertiesKHX *pExternalBufferProperties)
{
VkExternalMemoryFeatureFlagBitsKHX flags = 0;
VkExternalMemoryHandleTypeFlagsKHX export_flags = 0;
VkExternalMemoryHandleTypeFlagsKHX compat_flags = 0;
switch(pExternalBufferInfo->handleType) {
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX:
flags = VK_EXTERNAL_MEMORY_FEATURE_DEDICATED_ONLY_BIT_KHX |
VK_EXTERNAL_MEMORY_FEATURE_EXPORTABLE_BIT_KHX |
VK_EXTERNAL_MEMORY_FEATURE_IMPORTABLE_BIT_KHX;
compat_flags = export_flags = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHX;
break;
default:
break;
}
pExternalBufferProperties->externalMemoryProperties = (VkExternalMemoryPropertiesKHX) {
.externalMemoryFeatures = flags,
.exportFromImportedHandleTypes = export_flags,
.compatibleHandleTypes = compat_flags,
};
}

View File

@@ -29,7 +29,6 @@
#include "vk_format.h"
#include "radv_radeon_winsys.h"
#include "sid.h"
#include "gfx9d.h"
#include "util/debug.h"
static unsigned
radv_choose_tiling(struct radv_device *Device,
@@ -104,7 +103,8 @@ radv_init_surface(struct radv_device *device,
}
if (is_stencil)
surface->flags |= RADEON_SURF_SBUFFER;
surface->flags |= RADEON_SURF_SBUFFER |
RADEON_SURF_HAS_SBUFFER_MIPTREE;
surface->flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
@@ -130,9 +130,9 @@ static inline unsigned
si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil)
{
if (stencil)
return image->surface.u.legacy.stencil_tiling_index[level];
return image->surface.stencil_tiling_index[level];
else
return image->surface.u.legacy.tiling_index[level];
return image->surface.tiling_index[level];
}
static unsigned radv_map_swizzle(unsigned swizzle)
@@ -190,80 +190,33 @@ radv_make_buffer_descriptor(struct radv_device *device,
static void
si_set_mutable_tex_desc_fields(struct radv_device *device,
struct radv_image *image,
const struct legacy_surf_level *base_level_info,
const struct radeon_surf_level *base_level_info,
unsigned base_level, unsigned first_level,
unsigned block_width, bool is_stencil,
uint32_t *state)
{
uint64_t gpu_address = device->ws->buffer_get_va(image->bo) + image->offset;
uint64_t va = gpu_address;
uint64_t va = gpu_address + base_level_info->offset;
unsigned pitch = base_level_info->nblk_x * block_width;
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
uint64_t meta_va = 0;
if (chip_class >= GFX9) {
if (is_stencil)
va += image->surface.u.gfx9.stencil_offset;
else
va += image->surface.u.gfx9.surf_offset;
} else
va += base_level_info->offset;
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[3] &= C_008F1C_TILING_INDEX;
state[4] &= C_008F20_PITCH_GFX6;
state[6] &= C_008F28_COMPRESSION_EN;
assert(!(va & 255));
state[0] = va >> 8;
state[1] &= C_008F14_BASE_ADDRESS_HI;
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
state[3] |= S_008F1C_TILING_INDEX(si_tile_mode_index(image, base_level,
is_stencil));
state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
if (chip_class >= VI) {
state[6] &= C_008F28_COMPRESSION_EN;
state[7] = 0;
if (image->surface.dcc_size && first_level < image->surface.num_dcc_levels) {
uint64_t meta_va = gpu_address + image->dcc_offset;
if (chip_class <= VI)
meta_va += base_level_info->dcc_offset;
state[6] |= S_008F28_COMPRESSION_EN(1);
state[7] = meta_va >> 8;
}
}
if (chip_class >= GFX9) {
state[3] &= C_008F1C_SW_MODE;
state[4] &= C_008F20_PITCH_GFX9;
if (is_stencil) {
state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.stencil.swizzle_mode);
state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.stencil.epitch);
} else {
state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.surf.swizzle_mode);
state[4] |= S_008F20_PITCH_GFX9(image->surface.u.gfx9.surf.epitch);
}
state[5] &= C_008F24_META_DATA_ADDRESS &
C_008F24_META_PIPE_ALIGNED &
C_008F24_META_RB_ALIGNED;
if (meta_va) {
struct gfx9_surf_meta_flags meta;
if (image->dcc_offset)
meta = image->surface.u.gfx9.dcc;
else
meta = image->surface.u.gfx9.htile;
state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
S_008F24_META_RB_ALIGNED(meta.rb_aligned);
}
} else {
/* SI-CI-VI */
unsigned pitch = base_level_info->nblk_x * block_width;
unsigned index = si_tile_mode_index(image, base_level, is_stencil);
state[3] &= C_008F1C_TILING_INDEX;
state[3] |= S_008F1C_TILING_INDEX(index);
state[4] &= C_008F20_PITCH_GFX6;
state[4] |= S_008F20_PITCH_GFX6(pitch - 1);
if (image->surface.dcc_size && image->surface.level[first_level].dcc_enabled) {
state[6] |= S_008F28_COMPRESSION_EN(1);
state[7] = (gpu_address +
image->dcc_offset +
base_level_info->dcc_offset) >> 8;
}
}
@@ -289,36 +242,6 @@ static unsigned radv_tex_dim(VkImageType image_type, VkImageViewType view_type,
unreachable("illegale image type");
}
}
static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4])
{
unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
if (swizzle[3] == VK_SWIZZLE_X) {
/* For the pre-defined border color values (white, opaque
* black, transparent black), the only thing that matters is
* that the alpha channel winds up in the correct place
* (because the RGB channels are all the same) so either of
* these enumerations will work.
*/
if (swizzle[2] == VK_SWIZZLE_Y)
bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
else
bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
} else if (swizzle[0] == VK_SWIZZLE_X) {
if (swizzle[1] == VK_SWIZZLE_Y)
bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
else
bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
} else if (swizzle[1] == VK_SWIZZLE_X) {
bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
} else if (swizzle[2] == VK_SWIZZLE_X) {
bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
}
return bc_swizzle;
}
/**
* Build the sampler view descriptor for a texture.
*/
@@ -377,8 +300,7 @@ si_make_texture_descriptor(struct radv_device *device,
state[1] = (S_008F14_DATA_FORMAT_GFX6(data_format) |
S_008F14_NUM_FORMAT_GFX6(num_format));
state[2] = (S_008F18_WIDTH(width - 1) |
S_008F18_HEIGHT(height - 1) |
S_008F18_PERF_MOD(4));
S_008F18_HEIGHT(height - 1));
state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
@@ -388,32 +310,14 @@ si_make_texture_descriptor(struct radv_device *device,
S_008F1C_LAST_LEVEL(image->info.samples > 1 ?
util_logbase2(image->info.samples) :
last_level) |
S_008F1C_POW2_PAD(image->info.levels > 1) |
S_008F1C_TYPE(type));
state[4] = 0;
state[5] = S_008F24_BASE_ARRAY(first_layer);
state[4] = S_008F20_DEPTH(depth - 1);
state[5] = (S_008F24_BASE_ARRAY(first_layer) |
S_008F24_LAST_ARRAY(last_layer));
state[6] = 0;
state[7] = 0;
if (device->physical_device->rad_info.chip_class >= GFX9) {
unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle);
/* Depth is the the last accessible layer on Gfx9.
* The hw doesn't need to know the total number of layers.
*/
if (type == V_008F1C_SQ_RSRC_IMG_3D)
state[4] |= S_008F20_DEPTH(depth - 1);
else
state[4] |= S_008F20_DEPTH(last_layer);
state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ?
util_logbase2(image->info.samples) :
last_level);
} else {
state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
state[4] |= S_008F20_DEPTH(depth - 1);
state[5] |= S_008F24_LAST_ARRAY(last_layer);
}
if (image->dcc_offset) {
unsigned swap = radv_translate_colorswap(vk_format, FALSE);
@@ -432,75 +336,46 @@ si_make_texture_descriptor(struct radv_device *device,
/* Initialize the sampler view for FMASK. */
if (image->fmask.size) {
uint32_t fmask_format, num_format;
uint32_t fmask_format;
uint64_t gpu_address = device->ws->buffer_get_va(image->bo);
uint64_t va;
va = gpu_address + image->offset + image->fmask.offset;
if (device->physical_device->rad_info.chip_class >= GFX9) {
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
switch (image->info.samples) {
case 2:
num_format = V_008F14_IMG_FMASK_8_2_2;
break;
case 4:
num_format = V_008F14_IMG_FMASK_8_4_4;
break;
case 8:
num_format = V_008F14_IMG_FMASK_32_8_8;
break;
default:
unreachable("invalid nr_samples");
}
} else {
switch (image->info.samples) {
case 2:
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
break;
case 4:
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
break;
case 8:
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
break;
default:
assert(0);
fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
}
num_format = V_008F14_IMG_NUM_FORMAT_UINT;
switch (image->info.samples) {
case 2:
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
break;
case 4:
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
break;
case 8:
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
break;
default:
assert(0);
fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
}
fmask_state[0] = va >> 8;
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) |
S_008F14_DATA_FORMAT_GFX6(fmask_format) |
S_008F14_NUM_FORMAT_GFX6(num_format);
S_008F14_NUM_FORMAT_GFX6(V_008F14_IMG_NUM_FORMAT_UINT);
fmask_state[2] = S_008F18_WIDTH(width - 1) |
S_008F18_HEIGHT(height - 1);
fmask_state[3] = S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) |
S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) |
S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
S_008F1C_TILING_INDEX(image->fmask.tile_mode_index) |
S_008F1C_TYPE(radv_tex_dim(image->type, view_type, 1, 0, false));
fmask_state[4] = 0;
fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
fmask_state[4] = S_008F20_DEPTH(depth - 1) |
S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
fmask_state[5] = S_008F24_BASE_ARRAY(first_layer) |
S_008F24_LAST_ARRAY(last_layer);
fmask_state[6] = 0;
fmask_state[7] = 0;
if (device->physical_device->rad_info.chip_class >= GFX9) {
fmask_state[3] |= S_008F1C_SW_MODE(image->surface.u.gfx9.fmask.swizzle_mode);
fmask_state[4] |= S_008F20_DEPTH(last_layer) |
S_008F20_PITCH_GFX9(image->surface.u.gfx9.fmask.epitch);
fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(image->surface.u.gfx9.cmask.pipe_aligned) |
S_008F24_META_RB_ALIGNED(image->surface.u.gfx9.cmask.rb_aligned);
} else {
fmask_state[3] |= S_008F1C_TILING_INDEX(image->fmask.tile_mode_index);
fmask_state[4] |= S_008F20_DEPTH(depth - 1) |
S_008F20_PITCH_GFX6(image->fmask.pitch_in_pixels - 1);
fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
}
} else if (fmask_state)
memset(fmask_state, 0, 8 * 4);
}
}
static void
@@ -534,7 +409,7 @@ radv_query_opaque_metadata(struct radv_device *device,
image->info.depth,
desc, NULL);
si_set_mutable_tex_desc_fields(device, image, &image->surface.u.legacy.level[0], 0, 0,
si_set_mutable_tex_desc_fields(device, image, &image->surface.level[0], 0, 0,
image->surface.blk_w, false, desc);
/* Clear the base address and set the relative DCC offset. */
@@ -547,7 +422,7 @@ radv_query_opaque_metadata(struct radv_device *device,
/* Dwords [10:..] contain the mipmap level offsets. */
for (i = 0; i <= image->info.levels - 1; i++)
md->metadata[10+i] = image->surface.u.legacy.level[i].offset >> 8;
md->metadata[10+i] = image->surface.level[i].offset >> 8;
md->size_metadata = (11 + image->info.levels - 1) * 4;
}
@@ -560,23 +435,19 @@ radv_init_metadata(struct radv_device *device,
struct radeon_surf *surface = &image->surface;
memset(metadata, 0, sizeof(*metadata));
metadata->microtile = surface->level[0].mode >= RADEON_SURF_MODE_1D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
metadata->macrotile = surface->level[0].mode >= RADEON_SURF_MODE_2D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
metadata->pipe_config = surface->pipe_config;
metadata->bankw = surface->bankw;
metadata->bankh = surface->bankh;
metadata->tile_split = surface->tile_split;
metadata->mtilea = surface->mtilea;
metadata->num_banks = surface->num_banks;
metadata->stride = surface->level[0].pitch_bytes;
metadata->scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
if (device->physical_device->rad_info.chip_class >= GFX9) {
metadata->u.gfx9.swizzle_mode = surface->u.gfx9.surf.swizzle_mode;
} else {
metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D ?
RADEON_LAYOUT_TILED : RADEON_LAYOUT_LINEAR;
metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
metadata->u.legacy.bankw = surface->u.legacy.bankw;
metadata->u.legacy.bankh = surface->u.legacy.bankh;
metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
}
radv_query_opaque_metadata(device, image, metadata);
}
@@ -588,27 +459,22 @@ radv_image_get_fmask_info(struct radv_device *device,
struct radv_fmask_info *out)
{
/* FMASK is allocated like an ordinary texture. */
struct radeon_surf fmask = {};
struct ac_surf_info info = image->info;
struct radeon_surf fmask = image->surface;
struct radeon_surf_info info = image->info;
memset(out, 0, sizeof(*out));
if (device->physical_device->rad_info.chip_class >= GFX9) {
out->alignment = image->surface.u.gfx9.fmask_alignment;
out->size = image->surface.u.gfx9.fmask_size;
return;
}
fmask.blk_w = image->surface.blk_w;
fmask.blk_h = image->surface.blk_h;
fmask.bo_alignment = 0;
fmask.bo_size = 0;
fmask.flags |= RADEON_SURF_FMASK;
info.samples = 1;
fmask.flags = image->surface.flags | RADEON_SURF_FMASK;
/* Force 2D tiling if it wasn't set. This may occur when creating
* FMASK for MSAA resolve on R6xx. On R6xx, the single-sample
* destination buffer must have an FMASK too. */
fmask.flags = RADEON_SURF_CLR(fmask.flags, MODE);
fmask.flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
fmask.flags |= RADEON_SURF_HAS_TILE_MODE_INDEX;
switch (nr_samples) {
case 2:
case 4:
@@ -622,17 +488,17 @@ radv_image_get_fmask_info(struct radv_device *device,
}
device->ws->surface_init(device->ws, &info, &fmask);
assert(fmask.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
assert(fmask.level[0].mode == RADEON_SURF_MODE_2D);
out->slice_tile_max = (fmask.u.legacy.level[0].nblk_x * fmask.u.legacy.level[0].nblk_y) / 64;
out->slice_tile_max = (fmask.level[0].nblk_x * fmask.level[0].nblk_y) / 64;
if (out->slice_tile_max)
out->slice_tile_max -= 1;
out->tile_mode_index = fmask.u.legacy.tiling_index[0];
out->pitch_in_pixels = fmask.u.legacy.level[0].nblk_x;
out->bank_height = fmask.u.legacy.bankh;
out->alignment = MAX2(256, fmask.surf_alignment);
out->size = fmask.surf_size;
out->tile_mode_index = fmask.tiling_index[0];
out->pitch_in_pixels = fmask.level[0].nblk_x;
out->bank_height = fmask.bankh;
out->alignment = MAX2(256, fmask.bo_alignment);
out->size = fmask.bo_size;
}
static void
@@ -655,12 +521,6 @@ radv_image_get_cmask_info(struct radv_device *device,
unsigned num_pipes = device->physical_device->rad_info.num_tile_pipes;
unsigned cl_width, cl_height;
if (device->physical_device->rad_info.chip_class >= GFX9) {
out->alignment = image->surface.u.gfx9.cmask_alignment;
out->size = image->surface.u.gfx9.cmask_size;
return;
}
switch (num_pipes) {
case 2:
cl_width = 32;
@@ -783,18 +643,15 @@ radv_image_create(VkDevice _device,
image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL_KHX)
image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
else
image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
}
radv_init_surface(device, &image->surface, create_info);
device->ws->surface_init(device->ws, &image->info, &image->surface);
image->size = image->surface.surf_size;
image->alignment = image->surface.surf_alignment;
image->size = image->surface.bo_size;
image->alignment = image->surface.bo_alignment;
if (image->exclusive || image->queue_family_mask == 1)
can_cmask_dcc = true;
@@ -816,6 +673,13 @@ radv_image_create(VkDevice _device,
radv_image_alloc_htile(device, image);
}
if (create_info->stride && create_info->stride != image->surface.level[0].pitch_bytes) {
image->surface.level[0].nblk_x = create_info->stride / image->surface.bpe;
image->surface.level[0].pitch_bytes = create_info->stride;
image->surface.level[0].slice_size = create_info->stride * image->surface.level[0].nblk_y;
}
if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
image->alignment = MAX2(image->alignment, 4096);
image->size = align64(image->size, image->alignment);
@@ -837,7 +701,9 @@ radv_image_create(VkDevice _device,
void
radv_image_view_init(struct radv_image_view *iview,
struct radv_device *device,
const VkImageViewCreateInfo* pCreateInfo)
const VkImageViewCreateInfo* pCreateInfo,
struct radv_cmd_buffer *cmd_buffer,
VkImageUsageFlags usage_mask)
{
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
@@ -898,31 +764,91 @@ radv_image_view_init(struct radv_image_view *iview,
iview->descriptor,
iview->fmask_descriptor);
si_set_mutable_tex_desc_fields(device, image,
is_stencil ? &image->surface.u.legacy.stencil_level[range->baseMipLevel]
: &image->surface.u.legacy.level[range->baseMipLevel],
range->baseMipLevel,
is_stencil ? &image->surface.stencil_level[range->baseMipLevel] : &image->surface.level[range->baseMipLevel], range->baseMipLevel,
range->baseMipLevel,
blk_w, is_stencil, iview->descriptor);
}
bool radv_layout_has_htile(const struct radv_image *image,
VkImageLayout layout,
unsigned queue_mask)
void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
struct radv_image *image, uint32_t micro_tile_mode)
{
return image->surface.htile_size &&
(layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
queue_mask == (1u << RADV_QUEUE_GENERAL);
/* These magic numbers were copied from addrlib. It doesn't use any
* definitions for them either. They are all 2D_TILED_THIN1 modes with
* different bpp and micro tile mode.
*/
if (device->physical_device->rad_info.chip_class >= CIK) {
switch (micro_tile_mode) {
case 0: /* displayable */
image->surface.tiling_index[0] = 10;
break;
case 1: /* thin */
image->surface.tiling_index[0] = 14;
break;
case 3: /* rotated */
image->surface.tiling_index[0] = 28;
break;
default: /* depth, thick */
assert(!"unexpected micro mode");
return;
}
} else { /* SI */
switch (micro_tile_mode) {
case 0: /* displayable */
switch (image->surface.bpe) {
case 1:
image->surface.tiling_index[0] = 10;
break;
case 2:
image->surface.tiling_index[0] = 11;
break;
default: /* 4, 8 */
image->surface.tiling_index[0] = 12;
break;
}
break;
case 1: /* thin */
switch (image->surface.bpe) {
case 1:
image->surface.tiling_index[0] = 14;
break;
case 2:
image->surface.tiling_index[0] = 15;
break;
case 4:
image->surface.tiling_index[0] = 16;
break;
default: /* 8, 16 */
image->surface.tiling_index[0] = 17;
break;
}
break;
default: /* depth, thick */
assert(!"unexpected micro mode");
return;
}
}
image->surface.micro_tile_mode = micro_tile_mode;
}
bool radv_layout_has_htile(const struct radv_image *image,
VkImageLayout layout)
{
return (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
bool radv_layout_is_htile_compressed(const struct radv_image *image,
VkImageLayout layout,
unsigned queue_mask)
VkImageLayout layout)
{
return image->surface.htile_size &&
(layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) &&
queue_mask == (1u << RADV_QUEUE_GENERAL);
return layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
}
bool radv_layout_can_expclear(const struct radv_image *image,
VkImageLayout layout)
{
return (layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ||
layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
}
bool radv_layout_can_fast_clear(const struct radv_image *image,
@@ -938,8 +864,6 @@ unsigned radv_image_queue_family_mask(const struct radv_image *image, uint32_t f
{
if (!image->exclusive)
return image->queue_family_mask;
if (family == VK_QUEUE_FAMILY_EXTERNAL_KHX)
return (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
if (family == VK_QUEUE_FAMILY_IGNORED)
return 1u << queue_family;
return 1u << family;
@@ -985,15 +909,14 @@ void radv_GetImageSubresourceLayout(
RADV_FROM_HANDLE(radv_image, image, _image);
int level = pSubresource->mipLevel;
int layer = pSubresource->arrayLayer;
struct radeon_surf *surface = &image->surface;
pLayout->offset = surface->u.legacy.level[level].offset + surface->u.legacy.level[level].slice_size * layer;
pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
pLayout->arrayPitch = surface->u.legacy.level[level].slice_size;
pLayout->depthPitch = surface->u.legacy.level[level].slice_size;
pLayout->size = surface->u.legacy.level[level].slice_size;
pLayout->offset = image->surface.level[level].offset + image->surface.level[level].slice_size * layer;
pLayout->rowPitch = image->surface.level[level].pitch_bytes;
pLayout->arrayPitch = image->surface.level[level].slice_size;
pLayout->depthPitch = image->surface.level[level].slice_size;
pLayout->size = image->surface.level[level].slice_size;
if (image->type == VK_IMAGE_TYPE_3D)
pLayout->size *= u_minify(image->info.depth, level);
pLayout->size *= image->surface.level[level].nblk_z;
}
@@ -1011,7 +934,7 @@ radv_CreateImageView(VkDevice _device,
if (view == NULL)
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
radv_image_view_init(view, device, pCreateInfo);
radv_image_view_init(view, device, pCreateInfo, NULL, ~0);
*pView = radv_image_view_to_handle(view);

View File

@@ -30,13 +30,12 @@
#include <pwd.h>
#include <sys/stat.h>
static void
void
radv_meta_save_novertex(struct radv_meta_saved_state *state,
const struct radv_cmd_buffer *cmd_buffer,
uint32_t dynamic_mask)
{
state->old_pipeline = cmd_buffer->state.pipeline;
state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
state->dynamic_mask = dynamic_mask;
radv_dynamic_state_copy(&state->dynamic, &cmd_buffer->state.dynamic,
@@ -46,14 +45,25 @@ radv_meta_save_novertex(struct radv_meta_saved_state *state,
state->vertex_saved = false;
}
void
radv_meta_save(struct radv_meta_saved_state *state,
const struct radv_cmd_buffer *cmd_buffer,
uint32_t dynamic_mask)
{
radv_meta_save_novertex(state, cmd_buffer, dynamic_mask);
state->old_descriptor_set0 = cmd_buffer->state.descriptors[0];
memcpy(state->old_vertex_bindings, cmd_buffer->state.vertex_bindings,
sizeof(state->old_vertex_bindings));
state->vertex_saved = true;
}
void
radv_meta_restore(const struct radv_meta_saved_state *state,
struct radv_cmd_buffer *cmd_buffer)
{
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
radv_pipeline_to_handle(state->old_pipeline));
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
cmd_buffer->state.pipeline = state->old_pipeline;
if (state->vertex_saved) {
radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
memcpy(cmd_buffer->state.vertex_bindings, state->old_vertex_bindings,
sizeof(state->old_vertex_bindings));
cmd_buffer->state.vb_dirty |= (1 << RADV_META_VERTEX_BINDING_COUNT) - 1;
@@ -112,8 +122,7 @@ radv_meta_restore_compute(const struct radv_meta_saved_compute_state *state,
{
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
radv_pipeline_to_handle(state->old_pipeline));
cmd_buffer->state.descriptors[0] = state->old_descriptor_set0;
radv_bind_descriptor_set(cmd_buffer, state->old_descriptor_set0, 0);
if (push_constant_size) {
memcpy(cmd_buffer->push_constants, state->push_constants, push_constant_size);
@@ -338,14 +347,8 @@ radv_device_init_meta(struct radv_device *device)
result = radv_device_init_meta_resolve_compute_state(device);
if (result != VK_SUCCESS)
goto fail_resolve_compute;
result = radv_device_init_meta_resolve_fragment_state(device);
if (result != VK_SUCCESS)
goto fail_resolve_fragment;
return VK_SUCCESS;
fail_resolve_fragment:
radv_device_finish_meta_resolve_compute_state(device);
fail_resolve_compute:
radv_device_finish_meta_fast_clear_flush_state(device);
fail_fast_clear:
@@ -382,7 +385,6 @@ radv_device_finish_meta(struct radv_device *device)
radv_device_finish_meta_buffer_state(device);
radv_device_finish_meta_fast_clear_flush_state(device);
radv_device_finish_meta_resolve_compute_state(device);
radv_device_finish_meta_resolve_fragment_state(device);
radv_store_meta_pipeline(device);
radv_pipeline_cache_finish(&device->meta_state.cache);
@@ -393,6 +395,17 @@ radv_device_finish_meta(struct radv_device *device)
* reset and any scissors disabled. The rest of the dynamic state
* should have no effect.
*/
void
radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state,
struct radv_cmd_buffer *cmd_buffer)
{
uint32_t dirty_state = (1 << VK_DYNAMIC_STATE_VIEWPORT) | (1 << VK_DYNAMIC_STATE_SCISSOR);
radv_meta_save(saved_state, cmd_buffer, dirty_state);
cmd_buffer->state.dynamic.viewport.count = 0;
cmd_buffer->state.dynamic.scissor.count = 0;
cmd_buffer->state.dirty |= dirty_state;
}
void
radv_meta_save_graphics_reset_vport_scissor_novertex(struct radv_meta_saved_state *saved_state,
struct radv_cmd_buffer *cmd_buffer)
@@ -452,7 +465,7 @@ radv_meta_build_nir_vs_generate_vertices(void)
nir_variable *v_position;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_vs_gen_verts");
b.shader->info->name = ralloc_strdup(b.shader, "meta_vs_gen_verts");
nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
@@ -471,135 +484,8 @@ radv_meta_build_nir_fs_noop(void)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_asprintf(b.shader,
b.shader->info->name = ralloc_asprintf(b.shader,
"meta_noop_fs");
return b.shader;
}
static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
nir_ssa_def *input)
{
nir_const_value v;
unsigned i;
v.u32[0] = 0x3b4d2e1c; // 0.00313080009
nir_ssa_def *cmp[3];
for (i = 0; i < 3; i++)
cmp[i] = nir_flt(b, nir_channel(b, input, i),
nir_build_imm(b, 1, 32, v));
nir_ssa_def *ltvals[3];
v.f32[0] = 12.92;
for (i = 0; i < 3; i++)
ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
nir_build_imm(b, 1, 32, v));
nir_ssa_def *gtvals[3];
for (i = 0; i < 3; i++) {
v.f32[0] = 1.0/2.4;
gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
nir_build_imm(b, 1, 32, v));
v.f32[0] = 1.055;
gtvals[i] = nir_fmul(b, gtvals[i],
nir_build_imm(b, 1, 32, v));
v.f32[0] = 0.055;
gtvals[i] = nir_fsub(b, gtvals[i],
nir_build_imm(b, 1, 32, v));
}
nir_ssa_def *comp[4];
for (i = 0; i < 3; i++)
comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
comp[3] = nir_channels(b, input, 3);
return nir_vec(b, comp, 4);
}
void radv_meta_build_resolve_shader_core(nir_builder *b,
bool is_integer,
bool is_srgb,
int samples,
nir_variable *input_img,
nir_variable *color,
nir_ssa_def *img_coord)
{
/* do a txf_ms on each sample */
nir_ssa_def *tmp;
nir_if *outer_if = NULL;
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(img_coord);
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(b, 0));
tex->dest_type = nir_type_float;
tex->is_array = false;
tex->coord_components = 2;
tex->texture = nir_deref_var_create(tex, input_img);
tex->sampler = NULL;
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(b, &tex->instr);
tmp = &tex->dest.ssa;
if (!is_integer && samples > 1) {
nir_tex_instr *tex_all_same = nir_tex_instr_create(b->shader, 1);
tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex_all_same->op = nir_texop_samples_identical;
tex_all_same->src[0].src_type = nir_tex_src_coord;
tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
tex_all_same->dest_type = nir_type_float;
tex_all_same->is_array = false;
tex_all_same->coord_components = 2;
tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img);
tex_all_same->sampler = NULL;
nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
nir_builder_instr_insert(b, &tex_all_same->instr);
nir_ssa_def *all_same = nir_ine(b, &tex_all_same->dest.ssa, nir_imm_int(b, 0));
nir_if *if_stmt = nir_if_create(b->shader);
if_stmt->condition = nir_src_for_ssa(all_same);
nir_cf_node_insert(b->cursor, &if_stmt->cf_node);
b->cursor = nir_after_cf_list(&if_stmt->then_list);
for (int i = 1; i < samples; i++) {
nir_tex_instr *tex_add = nir_tex_instr_create(b->shader, 2);
tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex_add->op = nir_texop_txf_ms;
tex_add->src[0].src_type = nir_tex_src_coord;
tex_add->src[0].src = nir_src_for_ssa(img_coord);
tex_add->src[1].src_type = nir_tex_src_ms_index;
tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(b, i));
tex_add->dest_type = nir_type_float;
tex_add->is_array = false;
tex_add->coord_components = 2;
tex_add->texture = nir_deref_var_create(tex_add, input_img);
tex_add->sampler = NULL;
nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
nir_builder_instr_insert(b, &tex_add->instr);
tmp = nir_fadd(b, tmp, &tex_add->dest.ssa);
}
tmp = nir_fdiv(b, tmp, nir_imm_float(b, samples));
nir_store_var(b, color, tmp, 0xf);
b->cursor = nir_after_cf_list(&if_stmt->else_list);
outer_if = if_stmt;
}
nir_store_var(b, color, &tex->dest.ssa, 0xf);
if (outer_if)
b->cursor = nir_after_cf_node(&outer_if->cf_node);
if (is_srgb) {
nir_ssa_def *newv = nir_load_var(b, color);
newv = radv_meta_build_resolve_srgb_conversion(b, newv);
nir_store_var(b, color, newv, 0xf);
}
}

View File

@@ -91,9 +91,13 @@ void radv_device_finish_meta_query_state(struct radv_device *device);
VkResult radv_device_init_meta_resolve_compute_state(struct radv_device *device);
void radv_device_finish_meta_resolve_compute_state(struct radv_device *device);
void radv_meta_save(struct radv_meta_saved_state *state,
const struct radv_cmd_buffer *cmd_buffer,
uint32_t dynamic_mask);
VkResult radv_device_init_meta_resolve_fragment_state(struct radv_device *device);
void radv_device_finish_meta_resolve_fragment_state(struct radv_device *device);
void radv_meta_save_novertex(struct radv_meta_saved_state *state,
const struct radv_cmd_buffer *cmd_buffer,
uint32_t dynamic_mask);
void radv_meta_restore(const struct radv_meta_saved_state *state,
struct radv_cmd_buffer *cmd_buffer);
@@ -201,6 +205,8 @@ void radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
const VkImageSubresourceRange *subresourceRange);
void radv_meta_save_graphics_reset_vport_scissor(struct radv_meta_saved_state *saved_state,
struct radv_cmd_buffer *cmd_buffer);
void radv_meta_save_graphics_reset_vport_scissor_novertex(struct radv_meta_saved_state *saved_state,
struct radv_cmd_buffer *cmd_buffer);
@@ -212,14 +218,6 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
uint32_t region_count,
const VkImageResolve *regions);
void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkImageLayout src_image_layout,
struct radv_image *dest_image,
VkImageLayout dest_image_layout,
uint32_t region_count,
const VkImageResolve *regions);
void radv_blit_to_prime_linear(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *image,
struct radv_image *linear_image);
@@ -231,14 +229,6 @@ nir_ssa_def *radv_meta_gen_rect_vertices(nir_builder *vs_b);
nir_ssa_def *radv_meta_gen_rect_vertices_comp2(nir_builder *vs_b, nir_ssa_def *comp2);
nir_shader *radv_meta_build_nir_vs_generate_vertices(void);
nir_shader *radv_meta_build_nir_fs_noop(void);
void radv_meta_build_resolve_shader_core(nir_builder *b,
bool is_integer,
bool is_srgb,
int samples,
nir_variable *input_img,
nir_variable *color,
nir_ssa_def *img_coord);
#ifdef __cplusplus
}
#endif

View File

@@ -38,64 +38,24 @@ build_nir_vertex_shader(void)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_blit_vs");
b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");
nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "gl_Position");
pos_out->data.location = VARYING_SLOT_POS;
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "a_tex_pos");
tex_pos_in->data.location = VERT_ATTRIB_GENERIC0;
nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "v_tex_pos");
tex_pos_out->data.location = VARYING_SLOT_VAR0;
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
nir_copy_var(&b, tex_pos_out, tex_pos_in);
nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
nir_store_var(&b, pos_out, outvec, 0xf);
nir_intrinsic_instr *src_box = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
src_box->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_intrinsic_set_base(src_box, 0);
nir_intrinsic_set_range(src_box, 16);
src_box->num_components = 4;
nir_ssa_dest_init(&src_box->instr, &src_box->dest, 4, 32, "src_box");
nir_builder_instr_insert(&b, &src_box->instr);
nir_intrinsic_instr *src0_z = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
src0_z->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_intrinsic_set_base(src0_z, 16);
nir_intrinsic_set_range(src0_z, 4);
src0_z->num_components = 1;
nir_ssa_dest_init(&src0_z->instr, &src0_z->dest, 1, 32, "src0_z");
nir_builder_instr_insert(&b, &src0_z->instr);
nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_vertex_id_zero_base);
nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
nir_builder_instr_insert(&b, &vertex_id->instr);
/* vertex 0 - src0_x, src0_y, src0_z */
/* vertex 1 - src0_x, src1_y, src0_z*/
/* vertex 2 - src1_x, src0_y, src0_z */
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
channel 1 is vertex id != 1 ? src_y : src_y + w */
nir_ssa_def *c0cmp = nir_ine(&b, &vertex_id->dest.ssa,
nir_imm_int(&b, 2));
nir_ssa_def *c1cmp = nir_ine(&b, &vertex_id->dest.ssa,
nir_imm_int(&b, 1));
nir_ssa_def *comp[4];
comp[0] = nir_bcsel(&b, c0cmp,
nir_channel(&b, &src_box->dest.ssa, 0),
nir_channel(&b, &src_box->dest.ssa, 2));
comp[1] = nir_bcsel(&b, c1cmp,
nir_channel(&b, &src_box->dest.ssa, 1),
nir_channel(&b, &src_box->dest.ssa, 3));
comp[2] = &src0_z->dest.ssa;
comp[3] = nir_imm_float(&b, 1.0);
nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 4);
nir_store_var(&b, tex_pos_out, out_tex_vec, 0xf);
return b.shader;
}
@@ -109,7 +69,7 @@ build_nir_copy_fragment_shader(enum glsl_sampler_dim tex_dim)
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
sprintf(shader_name, "meta_blit_fs.%d", tex_dim);
b.shader->info.name = ralloc_strdup(b.shader, shader_name);
b.shader->info->name = ralloc_strdup(b.shader, shader_name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "v_tex_pos");
@@ -163,7 +123,7 @@ build_nir_copy_fragment_shader_depth(enum glsl_sampler_dim tex_dim)
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
sprintf(shader_name, "meta_blit_depth_fs.%d", tex_dim);
b.shader->info.name = ralloc_strdup(b.shader, shader_name);
b.shader->info->name = ralloc_strdup(b.shader, shader_name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "v_tex_pos");
@@ -217,7 +177,7 @@ build_nir_copy_fragment_shader_stencil(enum glsl_sampler_dim tex_dim)
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
sprintf(shader_name, "meta_blit_stencil_fs.%d", tex_dim);
b.shader->info.name = ralloc_strdup(b.shader, shader_name);
b.shader->info->name = ralloc_strdup(b.shader, shader_name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec4, "v_tex_pos");
@@ -275,21 +235,52 @@ meta_emit_blit(struct radv_cmd_buffer *cmd_buffer,
VkFilter blit_filter)
{
struct radv_device *device = cmd_buffer->device;
unsigned offset = 0;
struct blit_vb_data {
float tex_coord[3];
} vb_data[3];
assert(src_image->info.samples == dest_image->info.samples);
float vertex_push_constants[5] = {
(float)src_offset_0.x / (float)src_iview->extent.width,
(float)src_offset_0.y / (float)src_iview->extent.height,
(float)src_offset_1.x / (float)src_iview->extent.width,
(float)src_offset_1.y / (float)src_iview->extent.height,
(float)src_offset_0.z / (float)src_iview->extent.depth,
unsigned vb_size = 3 * sizeof(*vb_data);
vb_data[0] = (struct blit_vb_data) {
.tex_coord = {
(float)src_offset_0.x / (float)src_iview->extent.width,
(float)src_offset_0.y / (float)src_iview->extent.height,
(float)src_offset_0.z / (float)src_iview->extent.depth,
},
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.blit.pipeline_layout,
VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
vertex_push_constants);
vb_data[1] = (struct blit_vb_data) {
.tex_coord = {
(float)src_offset_0.x / (float)src_iview->extent.width,
(float)src_offset_1.y / (float)src_iview->extent.height,
(float)src_offset_0.z / (float)src_iview->extent.depth,
},
};
vb_data[2] = (struct blit_vb_data) {
.tex_coord = {
(float)src_offset_1.x / (float)src_iview->extent.width,
(float)src_offset_0.y / (float)src_iview->extent.height,
(float)src_offset_0.z / (float)src_iview->extent.depth,
},
};
radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
struct radv_buffer vertex_buffer = {
.device = device,
.size = vb_size,
.bo = cmd_buffer->upload.upload_bo,
.offset = offset,
};
radv_CmdBindVertexBuffers(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
(VkBuffer[]) {
radv_buffer_to_handle(&vertex_buffer)
},
(VkDeviceSize[]) {
0,
});
VkSampler sampler;
radv_CreateSampler(radv_device_to_handle(device),
@@ -507,7 +498,7 @@ void radv_CmdBlitImage(
assert(src_image->info.samples == 1);
assert(dest_image->info.samples == 1);
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
for (unsigned r = 0; r < regionCount; r++) {
const VkImageSubresourceLayers *src_res = &pRegions[r].srcSubresource;
@@ -526,7 +517,8 @@ void radv_CmdBlitImage(
.baseArrayLayer = src_res->baseArrayLayer,
.layerCount = 1
},
});
},
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
unsigned dst_start, dst_end;
if (dest_image->type == VK_IMAGE_TYPE_3D) {
@@ -574,6 +566,12 @@ void radv_CmdBlitImage(
dest_box.extent.height = abs(dst_y1 - dst_y0);
struct radv_image_view dest_iview;
unsigned usage;
if (dst_res->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT)
usage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
else
usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
const unsigned num_layers = dst_end - dst_start;
for (unsigned i = 0; i < num_layers; i++) {
const VkOffset3D dest_offset_0 = {
@@ -613,7 +611,8 @@ void radv_CmdBlitImage(
.baseArrayLayer = dest_array_slice,
.layerCount = 1
},
});
},
cmd_buffer, usage);
meta_emit_blit(cmd_buffer,
src_image, &src_iview,
src_offset_0, src_offset_1,
@@ -752,8 +751,24 @@ radv_device_init_meta_blit_color(struct radv_device *device,
VkPipelineVertexInputStateCreateInfo vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = 3 * sizeof(float),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 1,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* Texture Coordinate */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32B32_SFLOAT,
.offset = 0
}
}
};
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -908,8 +923,24 @@ radv_device_init_meta_blit_depth(struct radv_device *device,
VkPipelineVertexInputStateCreateInfo vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = 3 * sizeof(float),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 1,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* Texture Coordinate */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32B32_SFLOAT,
.offset = 0,
}
}
};
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -1066,8 +1097,24 @@ radv_device_init_meta_blit_stencil(struct radv_device *device,
VkPipelineVertexInputStateCreateInfo vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = 3 * sizeof(float),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 1,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* Texture Coordinate */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32B32_SFLOAT,
.offset = 0
}
}
};
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
@@ -1226,15 +1273,11 @@ radv_device_init_meta_blit_state(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail;
const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_VERTEX_BIT, 0, 20};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&(VkPipelineLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.blit.ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constant_range,
},
&device->meta_state.alloc, &device->meta_state.blit.pipeline_layout);
if (result != VK_SUCCESS)
@@ -1251,10 +1294,12 @@ radv_device_init_meta_blit_state(struct radv_device *device)
goto fail;
result = radv_device_init_meta_blit_stencil(device, &vs);
if (result != VK_SUCCESS)
goto fail;
return VK_SUCCESS;
fail:
ralloc_free(vs.nir);
if (result != VK_SUCCESS)
radv_device_finish_meta_blit_state(device);
radv_device_finish_meta_blit_state(device);
return result;
}

View File

@@ -53,6 +53,7 @@ enum blit2d_src_type {
static void
create_iview(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *surf,
VkImageUsageFlags usage,
struct radv_image_view *iview, VkFormat depth_format)
{
VkFormat format;
@@ -75,7 +76,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
.baseArrayLayer = surf->layer,
.layerCount = 1
},
});
}, cmd_buffer, usage);
}
static void
@@ -135,10 +136,11 @@ blit2d_bind_src(struct radv_cmd_buffer *cmd_buffer,
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.blit2d.p_layouts[src_type],
VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4,
VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
&src_buf->pitch);
} else {
create_iview(cmd_buffer, src_img, &tmp->iview, depth_format);
create_iview(cmd_buffer, src_img, VK_IMAGE_USAGE_SAMPLED_BIT, &tmp->iview,
depth_format);
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
device->meta_state.blit2d.p_layouts[src_type],
@@ -177,7 +179,15 @@ blit2d_bind_dst(struct radv_cmd_buffer *cmd_buffer,
VkFormat depth_format,
struct blit2d_dst_temps *tmp)
{
create_iview(cmd_buffer, dst, &tmp->iview, depth_format);
VkImageUsageFlagBits bits;
if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT)
bits = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
else
bits = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
create_iview(cmd_buffer, dst, bits,
&tmp->iview, depth_format);
radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
&(VkFramebufferCreateInfo) {
@@ -258,21 +268,56 @@ radv_meta_blit2d_normal_dst(struct radv_cmd_buffer *cmd_buffer,
struct blit2d_src_temps src_temps;
blit2d_bind_src(cmd_buffer, src_img, src_buf, &src_temps, src_type, depth_format);
uint32_t offset = 0;
struct blit2d_dst_temps dst_temps;
blit2d_bind_dst(cmd_buffer, dst, rects[r].dst_x + rects[r].width,
rects[r].dst_y + rects[r].height, depth_format, &dst_temps);
float vertex_push_constants[4] = {
rects[r].src_x,
rects[r].src_y,
rects[r].src_x + rects[r].width,
rects[r].src_y + rects[r].height,
struct blit_vb_data {
float tex_coord[2];
} vb_data[3];
unsigned vb_size = 3 * sizeof(*vb_data);
vb_data[0] = (struct blit_vb_data) {
.tex_coord = {
rects[r].src_x,
rects[r].src_y,
},
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.blit2d.p_layouts[src_type],
VK_SHADER_STAGE_VERTEX_BIT, 0, 16,
vertex_push_constants);
vb_data[1] = (struct blit_vb_data) {
.tex_coord = {
rects[r].src_x,
rects[r].src_y + rects[r].height,
},
};
vb_data[2] = (struct blit_vb_data) {
.tex_coord = {
rects[r].src_x + rects[r].width,
rects[r].src_y,
},
};
radv_cmd_buffer_upload_data(cmd_buffer, vb_size, 16, vb_data, &offset);
struct radv_buffer vertex_buffer = {
.device = device,
.size = vb_size,
.bo = cmd_buffer->upload.upload_bo,
.offset = offset,
};
radv_CmdBindVertexBuffers(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
(VkBuffer[]) {
radv_buffer_to_handle(&vertex_buffer),
},
(VkDeviceSize[]) {
0,
});
if (dst->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
unsigned fs_key = radv_format_meta_fs_key(dst_temps.iview.vk_format);
@@ -375,53 +420,24 @@ build_nir_vertex_shader(void)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_blit2d_vs");
b.shader->info->name = ralloc_strdup(b.shader, "meta_blit_vs");
nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "gl_Position");
pos_out->data.location = VARYING_SLOT_POS;
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec2, "a_tex_pos");
tex_pos_in->data.location = VERT_ATTRIB_GENERIC0;
nir_variable *tex_pos_out = nir_variable_create(b.shader, nir_var_shader_out,
vec2, "v_tex_pos");
tex_pos_out->data.location = VARYING_SLOT_VAR0;
tex_pos_out->data.interpolation = INTERP_MODE_SMOOTH;
nir_copy_var(&b, tex_pos_out, tex_pos_in);
nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
nir_store_var(&b, pos_out, outvec, 0xf);
nir_intrinsic_instr *src_box = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
src_box->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
nir_intrinsic_set_base(src_box, 0);
nir_intrinsic_set_range(src_box, 16);
src_box->num_components = 4;
nir_ssa_dest_init(&src_box->instr, &src_box->dest, 4, 32, "src_box");
nir_builder_instr_insert(&b, &src_box->instr);
nir_intrinsic_instr *vertex_id = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_vertex_id_zero_base);
nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
nir_builder_instr_insert(&b, &vertex_id->instr);
/* vertex 0 - src_x, src_y */
/* vertex 1 - src_x, src_y+h */
/* vertex 2 - src_x+w, src_y */
/* so channel 0 is vertex_id != 2 ? src_x : src_x + w
channel 1 is vertex id != 1 ? src_y : src_y + w */
nir_ssa_def *c0cmp = nir_ine(&b, &vertex_id->dest.ssa,
nir_imm_int(&b, 2));
nir_ssa_def *c1cmp = nir_ine(&b, &vertex_id->dest.ssa,
nir_imm_int(&b, 1));
nir_ssa_def *comp[2];
comp[0] = nir_bcsel(&b, c0cmp,
nir_channel(&b, &src_box->dest.ssa, 0),
nir_channel(&b, &src_box->dest.ssa, 2));
comp[1] = nir_bcsel(&b, c1cmp,
nir_channel(&b, &src_box->dest.ssa, 1),
nir_channel(&b, &src_box->dest.ssa, 3));
nir_ssa_def *out_tex_vec = nir_vec(&b, comp, 2);
nir_store_var(&b, tex_pos_out, out_tex_vec, 0x3);
return b.shader;
}
@@ -472,8 +488,6 @@ build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
sampler->data.binding = 0;
nir_intrinsic_instr *width = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(width, 16);
nir_intrinsic_set_range(width, 4);
width->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
width->num_components = 1;
nir_ssa_dest_init(&width->instr, &width->dest, 1, 32, "width");
@@ -504,8 +518,24 @@ build_nir_buffer_fetch(struct nir_builder *b, struct radv_device *device,
static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = 2 * sizeof(float),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 1,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* Texture Coordinate */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32_SFLOAT,
.offset = 0
},
},
};
static nir_shader *
@@ -517,7 +547,7 @@ build_nir_copy_fragment_shader(struct radv_device *device,
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_strdup(b.shader, name);
b.shader->info->name = ralloc_strdup(b.shader, name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec2, "v_tex_pos");
@@ -546,7 +576,7 @@ build_nir_copy_fragment_shader_depth(struct radv_device *device,
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_strdup(b.shader, name);
b.shader->info->name = ralloc_strdup(b.shader, name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec2, "v_tex_pos");
@@ -575,7 +605,7 @@ build_nir_copy_fragment_shader_stencil(struct radv_device *device,
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_strdup(b.shader, name);
b.shader->info->name = ralloc_strdup(b.shader, name);
nir_variable *tex_pos_in = nir_variable_create(b.shader, nir_var_shader_in,
vec2, "v_tex_pos");
@@ -703,8 +733,8 @@ blit2d_init_color_pipeline(struct radv_device *device,
.format = format,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
},
.subpassCount = 1,
.pSubpasses = &(VkSubpassDescription) {
@@ -713,12 +743,12 @@ blit2d_init_color_pipeline(struct radv_device *device,
.colorAttachmentCount = 1,
.pColorAttachments = &(VkAttachmentReference) {
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.pResolveAttachments = NULL,
.pDepthStencilAttachment = &(VkAttachmentReference) {
.attachment = VK_ATTACHMENT_UNUSED,
.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.preserveAttachmentCount = 1,
.pPreserveAttachments = (uint32_t[]) { 0 },
@@ -861,8 +891,8 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
.format = 0,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
},
.subpassCount = 1,
.pSubpasses = &(VkSubpassDescription) {
@@ -873,7 +903,7 @@ blit2d_init_depth_only_pipeline(struct radv_device *device,
.pResolveAttachments = NULL,
.pDepthStencilAttachment = &(VkAttachmentReference) {
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.preserveAttachmentCount = 1,
.pPreserveAttachments = (uint32_t[]) { 0 },
@@ -1016,8 +1046,8 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
.format = 0,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.finalLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
},
.subpassCount = 1,
.pSubpasses = &(VkSubpassDescription) {
@@ -1028,7 +1058,7 @@ blit2d_init_stencil_only_pipeline(struct radv_device *device,
.pResolveAttachments = NULL,
.pDepthStencilAttachment = &(VkAttachmentReference) {
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.preserveAttachmentCount = 1,
.pPreserveAttachments = (uint32_t[]) { 0 },
@@ -1150,10 +1180,6 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
zero(device->meta_state.blit2d);
const VkPushConstantRange push_constant_ranges[] = {
{VK_SHADER_STAGE_VERTEX_BIT, 0, 16},
{VK_SHADER_STAGE_FRAGMENT_BIT, 16, 4},
};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&(VkDescriptorSetLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
@@ -1177,8 +1203,6 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_IMAGE],
.pushConstantRangeCount = 1,
.pPushConstantRanges = push_constant_ranges,
},
&device->meta_state.alloc, &device->meta_state.blit2d.p_layouts[BLIT2D_SRC_TYPE_IMAGE]);
if (result != VK_SUCCESS)
@@ -1202,14 +1226,14 @@ radv_device_init_meta_blit2d_state(struct radv_device *device)
if (result != VK_SUCCESS)
goto fail;
const VkPushConstantRange push_constant_range = {VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&(VkPipelineLayoutCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.blit2d.ds_layouts[BLIT2D_SRC_TYPE_BUFFER],
.pushConstantRangeCount = 2,
.pPushConstantRanges = push_constant_ranges,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &push_constant_range,
},
&device->meta_state.alloc, &device->meta_state.blit2d.p_layouts[BLIT2D_SRC_TYPE_BUFFER]);
if (result != VK_SUCCESS)

View File

@@ -10,17 +10,17 @@ build_buffer_fill_shader(struct radv_device *dev)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_fill");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_fill");
b.shader->info->cs.local_size[0] = 64;
b.shader->info->cs.local_size[1] = 1;
b.shader->info->cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
@@ -36,8 +36,6 @@ build_buffer_fill_shader(struct radv_device *dev)
nir_builder_instr_insert(&b, &dst_buf->instr);
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(load, 0);
nir_intrinsic_set_range(load, 4);
load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
load->num_components = 1;
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "fill_value");
@@ -62,17 +60,17 @@ build_buffer_copy_shader(struct radv_device *dev)
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_buffer_copy");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "meta_buffer_copy");
b.shader->info->cs.local_size[0] = 64;
b.shader->info->cs.local_size[1] = 1;
b.shader->info->cs.local_size[2] = 1;
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);

View File

@@ -42,10 +42,10 @@ build_nir_itob_compute_shader(struct radv_device *dev)
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_itob_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "meta_itob_cs");
b.shader->info->cs.local_size[0] = 16;
b.shader->info->cs.local_size[1] = 16;
b.shader->info->cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
@@ -59,25 +59,21 @@ build_nir_itob_compute_shader(struct radv_device *dev)
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(offset, 0);
nir_intrinsic_set_range(offset, 12);
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
offset->num_components = 2;
nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
nir_builder_instr_insert(&b, &offset->instr);
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(stride, 0);
nir_intrinsic_set_range(stride, 12);
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
stride->num_components = 1;
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
@@ -244,10 +240,10 @@ build_nir_btoi_compute_shader(struct radv_device *dev)
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "meta_btoi_cs");
b.shader->info->cs.local_size[0] = 16;
b.shader->info->cs.local_size[1] = 16;
b.shader->info->cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
buf_type, "s_tex");
input_img->data.descriptor_set = 0;
@@ -261,23 +257,19 @@ build_nir_btoi_compute_shader(struct radv_device *dev)
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(offset, 0);
nir_intrinsic_set_range(offset, 12);
offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
offset->num_components = 2;
nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
nir_builder_instr_insert(&b, &offset->instr);
nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(stride, 0);
nir_intrinsic_set_range(stride, 12);
stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
stride->num_components = 1;
nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
@@ -444,10 +436,10 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "meta_itoi_cs");
b.shader->info->cs.local_size[0] = 16;
b.shader->info->cs.local_size[1] = 16;
b.shader->info->cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
buf_type, "s_tex");
input_img->data.descriptor_set = 0;
@@ -461,23 +453,19 @@ build_nir_itoi_compute_shader(struct radv_device *dev)
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(src_offset, 0);
nir_intrinsic_set_range(src_offset, 16);
src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
src_offset->num_components = 2;
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
nir_builder_instr_insert(&b, &src_offset->instr);
nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(dst_offset, 0);
nir_intrinsic_set_range(dst_offset, 16);
dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
dst_offset->num_components = 2;
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
@@ -634,10 +622,10 @@ build_nir_cleari_compute_shader(struct radv_device *dev)
false,
GLSL_TYPE_FLOAT);
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_cs");
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "meta_cleari_cs");
b.shader->info->cs.local_size[0] = 16;
b.shader->info->cs.local_size[1] = 16;
b.shader->info->cs.local_size[2] = 1;
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
img_type, "out_img");
@@ -647,15 +635,13 @@ build_nir_cleari_compute_shader(struct radv_device *dev)
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(clear_val, 0);
nir_intrinsic_set_range(clear_val, 16);
clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
clear_val->num_components = 4;
nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
@@ -859,6 +845,7 @@ radv_meta_end_cleari(struct radv_cmd_buffer *cmd_buffer,
static void
create_iview(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_blit2d_surf *surf,
VkImageUsageFlags usage,
struct radv_image_view *iview)
{
@@ -875,7 +862,7 @@ create_iview(struct radv_cmd_buffer *cmd_buffer,
.baseArrayLayer = surf->layer,
.layerCount = 1
},
});
}, cmd_buffer, usage);
}
static void
@@ -961,7 +948,7 @@ radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
struct radv_device *device = cmd_buffer->device;
struct itob_temps temps;
create_iview(cmd_buffer, src, &temps.src_iview);
create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview);
create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &temps.dst_bview);
itob_bind_descriptors(cmd_buffer, &temps);
@@ -1047,7 +1034,7 @@ radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct btoi_temps temps;
create_bview(cmd_buffer, src->buffer, src->offset, src->format, &temps.src_bview);
create_iview(cmd_buffer, dst, &temps.dst_iview);
create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview);
btoi_bind_descriptors(cmd_buffer, &temps);
btoi_bind_pipeline(cmd_buffer);
@@ -1137,8 +1124,8 @@ radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_device *device = cmd_buffer->device;
struct itoi_temps temps;
create_iview(cmd_buffer, src, &temps.src_iview);
create_iview(cmd_buffer, dst, &temps.dst_iview);
create_iview(cmd_buffer, src, VK_IMAGE_USAGE_SAMPLED_BIT, &temps.src_iview);
create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &temps.dst_iview);
itoi_bind_descriptors(cmd_buffer, &temps);
@@ -1209,7 +1196,7 @@ radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
struct radv_device *device = cmd_buffer->device;
struct radv_image_view dst_iview;
create_iview(cmd_buffer, dst, &dst_iview);
create_iview(cmd_buffer, dst, VK_IMAGE_USAGE_STORAGE_BIT, &dst_iview);
cleari_bind_descriptors(cmd_buffer, &dst_iview);
cleari_bind_pipeline(cmd_buffer);

View File

@@ -28,6 +28,16 @@
#include "util/format_rgb9e5.h"
#include "vk_format.h"
/** Vertex attributes for color clears. */
struct color_clear_vattrs {
VkClearColorValue color;
};
/** Vertex attributes for depthstencil clears. */
struct depthstencil_clear_vattrs {
float depth_clear;
};
enum {
DEPTH_CLEAR_SLOW,
DEPTH_CLEAR_FAST_EXPCLEAR,
@@ -45,8 +55,8 @@ build_color_shaders(struct nir_shader **out_vs,
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_color_vs");
fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_color_fs");
const struct glsl_type *position_type = glsl_vec4_type();
const struct glsl_type *color_type = glsl_vec4_type();
@@ -56,23 +66,33 @@ build_color_shaders(struct nir_shader **out_vs,
"gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(fs_b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(in_color_load, 0);
nir_intrinsic_set_range(in_color_load, 16);
in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&fs_b, 0));
in_color_load->num_components = 4;
nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 4, 32, "clear color");
nir_builder_instr_insert(&fs_b, &in_color_load->instr);
nir_variable *vs_in_color =
nir_variable_create(vs_b.shader, nir_var_shader_in, color_type,
"a_color");
vs_in_color->data.location = VERT_ATTRIB_GENERIC0;
nir_variable *vs_out_color =
nir_variable_create(vs_b.shader, nir_var_shader_out, color_type,
"v_color");
vs_out_color->data.location = VARYING_SLOT_VAR0;
vs_out_color->data.interpolation = INTERP_MODE_FLAT;
nir_variable *fs_in_color =
nir_variable_create(fs_b.shader, nir_var_shader_in, color_type,
"v_color");
fs_in_color->data.location = vs_out_color->data.location;
fs_in_color->data.interpolation = vs_out_color->data.interpolation;
nir_variable *fs_out_color =
nir_variable_create(fs_b.shader, nir_var_shader_out, color_type,
"f_color");
fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
nir_store_var(&fs_b, fs_out_color, &in_color_load->dest.ssa, 0xf);
nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
nir_copy_var(&vs_b, vs_out_color, vs_in_color);
nir_copy_var(&fs_b, fs_out_color, fs_in_color);
const struct glsl_type *layer_type = glsl_int_type();
nir_variable *vs_out_layer =
@@ -97,7 +117,6 @@ create_pipeline(struct radv_device *device,
const VkPipelineVertexInputStateCreateInfo *vi_state,
const VkPipelineDepthStencilStateCreateInfo *ds_state,
const VkPipelineColorBlendStateCreateInfo *cb_state,
const VkPipelineLayout layout,
const struct radv_graphics_pipeline_create_info *extra,
const VkAllocationCallbacks *alloc,
struct radv_pipeline **pipeline)
@@ -177,11 +196,10 @@ create_pipeline(struct radv_device *device,
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
},
},
.layout = layout,
.flags = 0,
.renderPass = radv_render_pass_to_handle(render_pass),
.subpass = 0,
},
.flags = 0,
.renderPass = radv_render_pass_to_handle(render_pass),
.subpass = 0,
},
extra,
alloc,
&pipeline_h);
@@ -247,8 +265,24 @@ create_color_pipeline(struct radv_device *device,
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = sizeof(struct color_clear_vattrs),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 1,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* Color */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32G32B32A32_SFLOAT,
.offset = 0,
},
},
};
const VkPipelineDepthStencilStateCreateInfo ds_state = {
@@ -281,7 +315,6 @@ create_color_pipeline(struct radv_device *device,
};
result = create_pipeline(device, radv_render_pass_from_handle(pass),
samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
device->meta_state.clear_color_p_layout,
&extra, &device->meta_state.alloc, pipeline);
return result;
@@ -324,12 +357,7 @@ radv_device_finish_meta_clear_state(struct radv_device *device)
}
destroy_render_pass(device, state->clear[i].depthstencil_rp);
}
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->clear_color_p_layout,
&state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->clear_depth_p_layout,
&state->alloc);
}
static void
@@ -350,6 +378,7 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
VkClearColorValue clear_value = clear_att->clearValue.color;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
VkPipeline pipeline_h;
uint32_t offset;
if (fs_key == -1) {
radv_finishme("color clears incomplete");
@@ -367,10 +396,17 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(clear_att->colorAttachment < subpass->color_count);
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.clear_color_p_layout,
VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
&clear_value);
const struct color_clear_vattrs vertex_data[3] = {
{
.color = clear_value,
},
{
.color = clear_value,
},
{
.color = clear_value,
},
};
struct radv_subpass clear_subpass = {
.color_count = 1,
@@ -382,6 +418,19 @@ emit_color_clear(struct radv_cmd_buffer *cmd_buffer,
radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass, false);
radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
struct radv_buffer vertex_buffer = {
.device = device,
.size = sizeof(vertex_data),
.bo = cmd_buffer->upload.upload_bo,
.offset = offset,
};
radv_CmdBindVertexBuffers(cmd_buffer_h, 0, 1,
(VkBuffer[]) { radv_buffer_to_handle(&vertex_buffer) },
(VkDeviceSize[]) { 0 });
if (cmd_buffer->state.pipeline != pipeline) {
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline_h);
@@ -412,24 +461,22 @@ build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs
nir_builder_init_simple_shader(&vs_b, NULL, MESA_SHADER_VERTEX, NULL);
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
vs_b.shader->info.name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
vs_b.shader->info->name = ralloc_strdup(vs_b.shader, "meta_clear_depthstencil_vs");
fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "meta_clear_depthstencil_fs");
const struct glsl_type *position_out_type = glsl_vec4_type();
const struct glsl_type *position_type = glsl_float_type();
nir_variable *vs_in_pos =
nir_variable_create(vs_b.shader, nir_var_shader_in, position_type,
"a_position");
vs_in_pos->data.location = VERT_ATTRIB_GENERIC0;
nir_variable *vs_out_pos =
nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type,
"gl_Position");
vs_out_pos->data.location = VARYING_SLOT_POS;
nir_intrinsic_instr *in_color_load = nir_intrinsic_instr_create(vs_b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(in_color_load, 0);
nir_intrinsic_set_range(in_color_load, 4);
in_color_load->src[0] = nir_src_for_ssa(nir_imm_int(&vs_b, 0));
in_color_load->num_components = 1;
nir_ssa_dest_init(&in_color_load->instr, &in_color_load->dest, 1, 32, "depth value");
nir_builder_instr_insert(&vs_b, &in_color_load->instr);
nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, &in_color_load->dest.ssa);
nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, nir_load_var(&vs_b, vs_in_pos));
nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
const struct glsl_type *layer_type = glsl_int_type();
@@ -494,8 +541,24 @@ create_depthstencil_pipeline(struct radv_device *device,
const VkPipelineVertexInputStateCreateInfo vi_state = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
.vertexBindingDescriptionCount = 1,
.pVertexBindingDescriptions = (VkVertexInputBindingDescription[]) {
{
.binding = 0,
.stride = sizeof(struct depthstencil_clear_vattrs),
.inputRate = VK_VERTEX_INPUT_RATE_VERTEX
},
},
.vertexAttributeDescriptionCount = 1,
.pVertexAttributeDescriptions = (VkVertexInputAttributeDescription[]) {
{
/* Position */
.location = 0,
.binding = 0,
.format = VK_FORMAT_R32_SFLOAT,
.offset = 0,
},
},
};
const VkPipelineDepthStencilStateCreateInfo ds_state = {
@@ -535,19 +598,14 @@ create_depthstencil_pipeline(struct radv_device *device,
}
result = create_pipeline(device, radv_render_pass_from_handle(render_pass),
samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
device->meta_state.clear_depth_p_layout,
&extra, &device->meta_state.alloc, pipeline);
return result;
}
static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
const struct radv_image_view *iview,
static bool depth_view_can_fast_clear(const struct radv_image_view *iview,
VkImageLayout layout,
const VkClearRect *clear_rect)
{
uint32_t queue_mask = radv_image_queue_family_mask(iview->image,
cmd_buffer->queue_family_index,
cmd_buffer->queue_family_index);
if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
clear_rect->rect.extent.width != iview->extent.width ||
clear_rect->rect.extent.height != iview->extent.height)
@@ -555,15 +613,14 @@ static bool depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer,
if (iview->image->surface.htile_size &&
iview->base_mip == 0 &&
iview->base_layer == 0 &&
radv_layout_is_htile_compressed(iview->image, layout, queue_mask) &&
radv_layout_can_expclear(iview->image, layout) &&
!radv_image_extent_compare(iview->image, &iview->extent))
return true;
return false;
}
static struct radv_pipeline *
pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
struct radv_meta_state *meta_state,
pick_depthstencil_pipeline(struct radv_meta_state *meta_state,
const struct radv_image_view *iview,
int samples_log2,
VkImageAspectFlags aspects,
@@ -571,7 +628,7 @@ pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer,
const VkClearRect *clear_rect,
VkClearDepthStencilValue clear_value)
{
bool fast = depth_view_can_fast_clear(cmd_buffer, iview, layout, clear_rect);
bool fast = depth_view_can_fast_clear(iview, layout, clear_rect);
int index = DEPTH_CLEAR_SLOW;
if (fast) {
@@ -607,6 +664,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
const uint32_t samples = iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
uint32_t offset;
assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT ||
aspects == VK_IMAGE_ASPECT_STENCIL_BIT ||
@@ -617,18 +675,36 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
clear_value.depth = 1.0f;
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.clear_depth_p_layout,
VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
&clear_value.depth);
const struct depthstencil_clear_vattrs vertex_data[3] = {
{
.depth_clear = clear_value.depth,
},
{
.depth_clear = clear_value.depth,
},
{
.depth_clear = clear_value.depth,
},
};
radv_cmd_buffer_upload_data(cmd_buffer, sizeof(vertex_data), 16, vertex_data, &offset);
struct radv_buffer vertex_buffer = {
.device = device,
.size = sizeof(vertex_data),
.bo = cmd_buffer->upload.upload_bo,
.offset = offset,
};
if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT,
clear_value.stencil);
}
struct radv_pipeline *pipeline = pick_depthstencil_pipeline(cmd_buffer,
meta_state,
radv_CmdBindVertexBuffers(cmd_buffer_h, 0, 1,
(VkBuffer[]) { radv_buffer_to_handle(&vertex_buffer) },
(VkDeviceSize[]) { 0 });
struct radv_pipeline *pipeline = pick_depthstencil_pipeline(meta_state,
iview,
samples_log2,
aspects,
@@ -640,7 +716,7 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
radv_pipeline_to_handle(pipeline));
}
if (depth_view_can_fast_clear(cmd_buffer, iview, subpass->depth_stencil_attachment.layout, clear_rect))
if (depth_view_can_fast_clear(iview, subpass->depth_stencil_attachment.layout, clear_rect))
radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
@@ -657,95 +733,6 @@ emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer,
radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, 0);
}
static bool
emit_fast_htile_clear(struct radv_cmd_buffer *cmd_buffer,
const VkClearAttachment *clear_att,
const VkClearRect *clear_rect,
enum radv_cmd_flush_bits *pre_flush,
enum radv_cmd_flush_bits *post_flush)
{
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
const uint32_t pass_att = subpass->depth_stencil_attachment.attachment;
VkImageLayout image_layout = subpass->depth_stencil_attachment.layout;
const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_image_view *iview = fb->attachments[pass_att].attachment;
VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
VkImageAspectFlags aspects = clear_att->aspectMask;
uint32_t clear_word;
if (!iview->image->surface.htile_size)
return false;
if (cmd_buffer->device->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
return false;
if (!radv_layout_is_htile_compressed(iview->image, image_layout, radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index, cmd_buffer->queue_family_index)))
goto fail;
/* don't fast clear 3D */
if (iview->image->type == VK_IMAGE_TYPE_3D)
goto fail;
/* all layers are bound */
if (iview->base_layer > 0)
goto fail;
if (iview->image->info.array_size != iview->layer_count)
goto fail;
if (iview->image->info.levels > 1)
goto fail;
if (!radv_image_extent_compare(iview->image, &iview->extent))
goto fail;
if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
clear_rect->rect.extent.width != iview->image->info.width ||
clear_rect->rect.extent.height != iview->image->info.height)
goto fail;
if (clear_rect->baseArrayLayer != 0)
goto fail;
if (clear_rect->layerCount != iview->image->info.array_size)
goto fail;
/* Don't do stencil clears till we have figured out if the clear words are
* correct. */
if (vk_format_aspects(iview->image->vk_format) & VK_IMAGE_ASPECT_STENCIL_BIT)
goto fail;
if (clear_value.depth == 1.0)
clear_word = 0xfffffff0;
else if (clear_value.depth == 0.0)
clear_word = 0;
else
goto fail;
if (pre_flush) {
cmd_buffer->state.flush_bits |= (RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META) & ~ *pre_flush;
*pre_flush |= cmd_buffer->state.flush_bits;
} else
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
radv_fill_buffer(cmd_buffer, iview->image->bo,
iview->image->offset + iview->image->htile_offset,
iview->image->surface.htile_size, clear_word);
radv_set_depth_clear_regs(cmd_buffer, iview->image, clear_value, aspects);
if (post_flush)
*post_flush |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
else
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2;
return true;
fail:
return false;
}
static VkFormat pipeline_formats[] = {
VK_FORMAT_R8G8B8A8_UNORM,
@@ -768,34 +755,6 @@ radv_device_init_meta_clear_state(struct radv_device *device)
memset(&device->meta_state.clear, 0, sizeof(device->meta_state.clear));
VkPipelineLayoutCreateInfo pl_color_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
};
res = radv_CreatePipelineLayout(radv_device_to_handle(device),
&pl_color_create_info,
&device->meta_state.alloc,
&device->meta_state.clear_color_p_layout);
if (res != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo pl_depth_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 0,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
};
res = radv_CreatePipelineLayout(radv_device_to_handle(device),
&pl_depth_create_info,
&device->meta_state.alloc,
&device->meta_state.clear_depth_p_layout);
if (res != VK_SUCCESS)
goto fail;
for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {
uint32_t samples = 1 << i;
for (uint32_t j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
@@ -899,7 +858,7 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
if (iview->image->info.levels > 1)
goto fail;
if (iview->image->surface.u.legacy.level[0].mode < RADEON_SURF_MODE_1D)
if (iview->image->surface.level[0].mode < RADEON_SURF_MODE_1D)
goto fail;
if (!radv_image_extent_compare(iview->image, &iview->extent))
goto fail;
@@ -914,11 +873,6 @@ emit_fast_color_clear(struct radv_cmd_buffer *cmd_buffer,
if (clear_rect->layerCount != iview->image->info.array_size)
goto fail;
/* RB+ doesn't work with CMASK fast clear on Stoney. */
if (!iview->image->surface.dcc_size &&
cmd_buffer->device->physical_device->rad_info.family == CHIP_STONEY)
goto fail;
/* DCC */
ret = radv_format_pack_clear_color(iview->image->vk_format,
clear_color, &clear_value);
@@ -977,9 +931,7 @@ emit_clear(struct radv_cmd_buffer *cmd_buffer,
} else {
assert(clear_att->aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
VK_IMAGE_ASPECT_STENCIL_BIT));
if (!emit_fast_htile_clear(cmd_buffer, clear_att, clear_rect,
pre_flush, post_flush))
emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect);
emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect);
}
}
@@ -1023,7 +975,7 @@ radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
if (!subpass_needs_clear(cmd_buffer))
return;
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
VkClearRect clear_rect = {
.rect = cmd_state->render_area,
@@ -1094,7 +1046,8 @@ radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer,
.baseArrayLayer = range->baseArrayLayer + layer,
.layerCount = 1
},
});
},
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
VkFramebuffer fb;
radv_CreateFramebuffer(device_h,
@@ -1270,7 +1223,7 @@ void radv_CmdClearColorImage(
if (cs)
radv_meta_begin_cleari(cmd_buffer, &saved_state.compute);
else
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
radv_cmd_clear_image(cmd_buffer, image, imageLayout,
(const VkClearValue *) pColor,
@@ -1294,7 +1247,7 @@ void radv_CmdClearDepthStencilImage(
RADV_FROM_HANDLE(radv_image, image, image_h);
struct radv_meta_saved_state saved_state;
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
radv_cmd_clear_image(cmd_buffer, image, imageLayout,
(const VkClearValue *) pDepthStencil,
@@ -1318,7 +1271,7 @@ void radv_CmdClearAttachments(
if (!cmd_buffer->state.subpass)
return;
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
/* FINISHME: We can do better than this dumb loop. It thrashes too much
* state.

View File

@@ -123,7 +123,7 @@ meta_copy_buffer_to_image(struct radv_cmd_buffer *cmd_buffer,
if (cs)
radv_meta_begin_bufimage(cmd_buffer, &saved_state.compute);
else
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
for (unsigned r = 0; r < regionCount; r++) {
@@ -341,7 +341,7 @@ meta_copy_image(struct radv_cmd_buffer *cmd_buffer,
if (cs)
radv_meta_begin_itoi(cmd_buffer, &saved_state.compute);
else
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state.gfx, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state.gfx, cmd_buffer);
for (unsigned r = 0; r < regionCount; r++) {
assert(pRegions[r].srcSubresource.aspectMask ==

View File

@@ -318,7 +318,8 @@ static void radv_process_depth_image_inplace(struct radv_cmd_buffer *cmd_buffer,
.baseArrayLayer = subresourceRange->baseArrayLayer + layer,
.layerCount = 1,
},
});
},
cmd_buffer, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
VkFramebuffer fb_h;

View File

@@ -367,7 +367,8 @@ radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer,
.baseArrayLayer = subresourceRange->baseArrayLayer + layer,
.layerCount = 1,
},
});
},
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
VkFramebuffer fb_h;
radv_CreateFramebuffer(device_h,

View File

@@ -38,7 +38,7 @@ build_nir_fs(void)
nir_variable *f_color; /* vec4, fragment output color */
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_asprintf(b.shader,
b.shader->info->name = ralloc_asprintf(b.shader,
"meta_resolve_fs");
f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4,
@@ -303,25 +303,6 @@ emit_resolve(struct radv_cmd_buffer *cmd_buffer,
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
}
enum radv_resolve_method {
RESOLVE_HW,
RESOLVE_COMPUTE,
RESOLVE_FRAGMENT,
};
static void radv_pick_resolve_method_images(struct radv_image *src_image,
struct radv_image *dest_image,
enum radv_resolve_method *method)
{
if (dest_image->surface.micro_tile_mode != src_image->surface.micro_tile_mode) {
if (dest_image->surface.num_dcc_levels > 0)
*method = RESOLVE_FRAGMENT;
else
*method = RESOLVE_COMPUTE;
}
}
void radv_CmdResolveImage(
VkCommandBuffer cmd_buffer_h,
VkImage src_image_h,
@@ -337,39 +318,28 @@ void radv_CmdResolveImage(
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
VkDevice device_h = radv_device_to_handle(device);
enum radv_resolve_method resolve_method = RESOLVE_HW;
bool use_compute_resolve = false;
/* we can use the hw resolve only for single full resolves */
if (region_count == 1) {
if (regions[0].srcOffset.x ||
regions[0].srcOffset.y ||
regions[0].srcOffset.z)
resolve_method = RESOLVE_COMPUTE;
use_compute_resolve = true;
if (regions[0].dstOffset.x ||
regions[0].dstOffset.y ||
regions[0].dstOffset.z)
resolve_method = RESOLVE_COMPUTE;
use_compute_resolve = true;
if (regions[0].extent.width != src_image->info.width ||
regions[0].extent.height != src_image->info.height ||
regions[0].extent.depth != src_image->info.depth)
resolve_method = RESOLVE_COMPUTE;
use_compute_resolve = true;
} else
resolve_method = RESOLVE_COMPUTE;
use_compute_resolve = true;
radv_pick_resolve_method_images(src_image, dest_image,
&resolve_method);
if (use_compute_resolve) {
if (resolve_method == RESOLVE_FRAGMENT) {
radv_meta_resolve_fragment_image(cmd_buffer,
src_image,
src_image_layout,
dest_image,
dest_image_layout,
region_count, regions);
return;
}
if (resolve_method == RESOLVE_COMPUTE) {
radv_meta_resolve_compute_image(cmd_buffer,
src_image,
src_image_layout,
@@ -458,7 +428,8 @@ void radv_CmdResolveImage(
.baseArrayLayer = src_base_layer + layer,
.layerCount = 1,
},
});
},
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
struct radv_image_view dest_iview;
radv_image_view_init(&dest_iview, cmd_buffer->device,
@@ -474,7 +445,8 @@ void radv_CmdResolveImage(
.baseArrayLayer = dest_base_layer + layer,
.layerCount = 1,
},
});
},
cmd_buffer, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
VkFramebuffer fb_h;
radv_CreateFramebuffer(device_h,
@@ -543,7 +515,6 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
struct radv_meta_saved_state saved_state;
enum radv_resolve_method resolve_method = RESOLVE_HW;
/* FINISHME(perf): Skip clears for resolve attachments.
*
@@ -557,27 +528,7 @@ radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
if (!subpass->has_resolve)
return;
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
struct radv_image *src_img = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment->image;
radv_pick_resolve_method_images(dst_img, src_img, &resolve_method);
if (resolve_method == RESOLVE_FRAGMENT) {
break;
}
}
if (resolve_method == RESOLVE_COMPUTE) {
radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);
return;
} else if (resolve_method == RESOLVE_FRAGMENT) {
radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);
return;
}
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
radv_meta_save_graphics_reset_vport_scissor(&saved_state, cmd_buffer);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];

View File

@@ -32,10 +32,11 @@
#include "vk_format.h"
static nir_shader *
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, int samples)
{
nir_builder b;
char name[64];
nir_if *outer_if = NULL;
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
false,
false,
@@ -44,12 +45,12 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
false,
false,
GLSL_TYPE_FLOAT);
snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : "float");
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, name);
b.shader->info.cs.local_size[0] = 16;
b.shader->info.cs.local_size[1] = 16;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, name);
b.shader->info->cs.local_size[0] = 16;
b.shader->info->cs.local_size[1] = 16;
b.shader->info->cs.local_size[2] = 1;
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
sampler_type, "s_tex");
@@ -63,40 +64,105 @@ build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_s
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(src_offset, 0);
nir_intrinsic_set_range(src_offset, 16);
src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
src_offset->num_components = 2;
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
nir_builder_instr_insert(&b, &src_offset->instr);
nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(dst_offset, 0);
nir_intrinsic_set_range(dst_offset, 16);
dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
dst_offset->num_components = 2;
nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
nir_builder_instr_insert(&b, &dst_offset->instr);
nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
/* do a txf_ms on each sample */
nir_ssa_def *tmp;
radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb, samples,
input_img, color, img_coord);
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex->op = nir_texop_txf_ms;
tex->src[0].src_type = nir_tex_src_coord;
tex->src[0].src = nir_src_for_ssa(img_coord);
tex->src[1].src_type = nir_tex_src_ms_index;
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
tex->dest_type = nir_type_float;
tex->is_array = false;
tex->coord_components = 2;
tex->texture = nir_deref_var_create(tex, input_img);
tex->sampler = NULL;
nir_ssa_def *outval = nir_load_var(&b, color);
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex->instr);
tmp = &tex->dest.ssa;
nir_variable *color =
nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
if (!is_integer && samples > 1) {
nir_tex_instr *tex_all_same = nir_tex_instr_create(b.shader, 1);
tex_all_same->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex_all_same->op = nir_texop_samples_identical;
tex_all_same->src[0].src_type = nir_tex_src_coord;
tex_all_same->src[0].src = nir_src_for_ssa(img_coord);
tex_all_same->dest_type = nir_type_float;
tex_all_same->is_array = false;
tex_all_same->coord_components = 2;
tex_all_same->texture = nir_deref_var_create(tex_all_same, input_img);
tex_all_same->sampler = NULL;
nir_ssa_dest_init(&tex_all_same->instr, &tex_all_same->dest, 1, 32, "tex");
nir_builder_instr_insert(&b, &tex_all_same->instr);
nir_ssa_def *all_same = nir_ine(&b, &tex_all_same->dest.ssa, nir_imm_int(&b, 0));
nir_if *if_stmt = nir_if_create(b.shader);
if_stmt->condition = nir_src_for_ssa(all_same);
nir_cf_node_insert(b.cursor, &if_stmt->cf_node);
b.cursor = nir_after_cf_list(&if_stmt->then_list);
for (int i = 1; i < samples; i++) {
nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 2);
tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
tex_add->op = nir_texop_txf_ms;
tex_add->src[0].src_type = nir_tex_src_coord;
tex_add->src[0].src = nir_src_for_ssa(img_coord);
tex_add->src[1].src_type = nir_tex_src_ms_index;
tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
tex_add->dest_type = nir_type_float;
tex_add->is_array = false;
tex_add->coord_components = 2;
tex_add->texture = nir_deref_var_create(tex_add, input_img);
tex_add->sampler = NULL;
nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
nir_builder_instr_insert(&b, &tex_add->instr);
tmp = nir_fadd(&b, tmp, &tex_add->dest.ssa);
}
tmp = nir_fdiv(&b, tmp, nir_imm_float(&b, samples));
nir_store_var(&b, color, tmp, 0xf);
b.cursor = nir_after_cf_list(&if_stmt->else_list);
outer_if = if_stmt;
}
nir_store_var(&b, color, &tex->dest.ssa, 0xf);
if (outer_if)
b.cursor = nir_after_cf_node(&outer_if->cf_node);
nir_ssa_def *newv = nir_load_var(&b, color);
nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_store);
store->src[0] = nir_src_for_ssa(coord);
store->src[1] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
store->src[2] = nir_src_for_ssa(outval);
store->src[2] = nir_src_for_ssa(newv);
store->variables[0] = nir_deref_var_create(store, output_img);
nir_builder_instr_insert(&b, &store->instr);
return b.shader;
@@ -164,13 +230,12 @@ static VkResult
create_resolve_pipeline(struct radv_device *device,
int samples,
bool is_integer,
bool is_srgb,
VkPipeline *pipeline)
{
VkResult result;
struct radv_shader_module cs = { .nir = NULL };
cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
cs.nir = build_resolve_compute_shader(device, is_integer, samples);
/* compute shader */
@@ -217,15 +282,12 @@ radv_device_init_meta_resolve_compute_state(struct radv_device *device)
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
uint32_t samples = 1 << i;
res = create_resolve_pipeline(device, samples, false, false,
res = create_resolve_pipeline(device, samples, false,
&state->resolve_compute.rc[i].pipeline);
res = create_resolve_pipeline(device, samples, true, false,
res = create_resolve_pipeline(device, samples, true,
&state->resolve_compute.rc[i].i_pipeline);
res = create_resolve_pipeline(device, samples, false, true,
&state->resolve_compute.rc[i].srgb_pipeline);
}
return res;
@@ -243,10 +305,6 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
radv_DestroyPipeline(radv_device_to_handle(device),
state->resolve_compute.rc[i].i_pipeline,
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->resolve_compute.rc[i].srgb_pipeline,
&state->alloc);
}
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
@@ -257,78 +315,6 @@ radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
&state->alloc);
}
static void
emit_resolve(struct radv_cmd_buffer *cmd_buffer,
struct radv_image_view *src_iview,
struct radv_image_view *dest_iview,
const VkOffset2D *src_offset,
const VkOffset2D *dest_offset,
const VkExtent2D *resolve_extent)
{
struct radv_device *device = cmd_buffer->device;
const uint32_t samples = src_iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
radv_meta_push_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.resolve_compute.p_layout,
0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL },
}
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(dest_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
}
});
VkPipeline pipeline;
if (vk_format_is_int(src_iview->image->vk_format))
pipeline = device->meta_state.resolve_compute.rc[samples_log2].i_pipeline;
else if (vk_format_is_srgb(src_iview->image->vk_format))
pipeline = device->meta_state.resolve_compute.rc[samples_log2].srgb_pipeline;
else
pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline;
if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
}
unsigned push_constants[4] = {
src_offset->x,
src_offset->y,
dest_offset->x,
dest_offset->y,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.resolve_compute.p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
push_constants);
radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
}
void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkImageLayout src_image_layout,
@@ -337,7 +323,10 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
uint32_t region_count,
const VkImageResolve *regions)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_compute_state saved_state;
const uint32_t samples = src_image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = &regions[r];
@@ -394,7 +383,8 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
.baseArrayLayer = src_base_layer + layer,
.layerCount = 1,
},
});
},
cmd_buffer, VK_IMAGE_USAGE_SAMPLED_BIT);
struct radv_image_view dest_iview;
radv_image_view_init(&dest_iview, cmd_buffer->device,
@@ -410,108 +400,68 @@ void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
.baseArrayLayer = dest_base_layer + layer,
.layerCount = 1,
},
});
},
cmd_buffer, VK_IMAGE_USAGE_STORAGE_BIT);
emit_resolve(cmd_buffer,
&src_iview,
&dest_iview,
&(VkOffset2D) {srcOffset.x, srcOffset.y },
&(VkOffset2D) {dstOffset.x, dstOffset.y },
&(VkExtent2D) {extent.width, extent.height });
radv_meta_push_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_COMPUTE,
device->meta_state.resolve_compute.p_layout,
0, /* set */
2, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(&src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
},
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 1,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(&dest_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
}
});
VkPipeline pipeline;
if (vk_format_is_int(src_image->vk_format))
pipeline = device->meta_state.resolve_compute.rc[samples_log2].i_pipeline;
else
pipeline = device->meta_state.resolve_compute.rc[samples_log2].pipeline;
if (cmd_buffer->state.compute_pipeline != radv_pipeline_from_handle(pipeline)) {
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
}
unsigned push_constants[4] = {
srcOffset.x,
srcOffset.y,
dstOffset.x,
dstOffset.y,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.resolve_compute.p_layout,
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
push_constants);
radv_unaligned_dispatch(cmd_buffer, extent.width, extent.height, 1);
}
}
radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
}
/**
* Emit any needed resolves for the current subpass.
*/
void
radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
struct radv_meta_saved_compute_state saved_state;
/* FINISHME(perf): Skip clears for resolve attachments.
*
* From the Vulkan 1.0 spec:
*
* If the first use of an attachment in a render pass is as a resolve
* attachment, then the loadOp is effectively ignored as the resolve is
* guaranteed to overwrite all pixels in the render area.
*/
if (!subpass->has_resolve)
return;
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
continue;
if (dst_img->surface.dcc_size) {
radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff);
cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
VkImageSubresourceRange range;
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
range.baseMipLevel = 0;
range.levelCount = 1;
range.baseArrayLayer = 0;
range.layerCount = 1;
radv_fast_clear_flush_image_inplace(cmd_buffer, src_iview->image, &range);
}
radv_meta_save_compute(&saved_state, cmd_buffer, 16);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
struct radv_image_view *dst_iview = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
continue;
struct radv_subpass resolve_subpass = {
.color_count = 1,
.color_attachments = (VkAttachmentReference[]) { dest_att },
.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
};
radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
/* Subpass resolves must respect the render area. We can ignore the
* render area here because vkCmdBeginRenderPass set the render area
* with 3DSTATE_DRAWING_RECTANGLE.
*
* XXX(chadv): Does the hardware really respect
* 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
*/
emit_resolve(cmd_buffer,
src_iview,
dst_iview,
&(VkOffset2D) { 0, 0 },
&(VkOffset2D) { 0, 0 },
&(VkExtent2D) { fb->width, fb->height });
}
radv_meta_restore_compute(&saved_state, cmd_buffer, 16);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
struct radv_image *dst_img = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment->image;
if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
continue;
VkImageSubresourceRange range;
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
range.baseMipLevel = 0;
range.levelCount = 1;
range.baseArrayLayer = 0;
range.layerCount = 1;
radv_fast_clear_flush_image_inplace(cmd_buffer, dst_img, &range);
}
}

View File

@@ -1,666 +0,0 @@
/*
* Copyright © 2016 Dave Airlie
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <assert.h>
#include <stdbool.h>
#include "radv_meta.h"
#include "radv_private.h"
#include "nir/nir_builder.h"
#include "sid.h"
#include "vk_format.h"
static nir_shader *
build_nir_vertex_shader(void)
{
const struct glsl_type *vec4 = glsl_vec4_type();
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "meta_resolve_vs");
nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "gl_Position");
pos_out->data.location = VARYING_SLOT_POS;
nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&b);
nir_store_var(&b, pos_out, outvec, 0xf);
return b.shader;
}
static nir_shader *
build_resolve_fragment_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
{
nir_builder b;
char name[64];
const struct glsl_type *vec2 = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
const struct glsl_type *vec4 = glsl_vec4_type();
const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
false,
false,
GLSL_TYPE_FLOAT);
snprintf(name, 64, "meta_resolve_fs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, NULL);
b.shader->info.name = ralloc_strdup(b.shader, name);
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
sampler_type, "s_tex");
input_img->data.descriptor_set = 0;
input_img->data.binding = 0;
nir_variable *fs_pos_in = nir_variable_create(b.shader, nir_var_shader_in, vec2, "fs_pos_in");
fs_pos_in->data.location = VARYING_SLOT_POS;
nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
vec4, "f_color");
color_out->data.location = FRAG_RESULT_DATA0;
nir_ssa_def *pos_in = nir_load_var(&b, fs_pos_in);
nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(src_offset, 0);
nir_intrinsic_set_range(src_offset, 8);
src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
src_offset->num_components = 2;
nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
nir_builder_instr_insert(&b, &src_offset->instr);
nir_ssa_def *pos_int = nir_f2i32(&b, pos_in);
nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, pos_int, &src_offset->dest.ssa), 0x3);
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
radv_meta_build_resolve_shader_core(&b, is_integer, is_srgb,samples,
input_img, color, img_coord);
nir_ssa_def *outval = nir_load_var(&b, color);
nir_store_var(&b, color_out, outval, 0xf);
return b.shader;
}
static VkResult
create_layout(struct radv_device *device)
{
VkResult result;
/*
* one descriptors for the image being sampled
*/
VkDescriptorSetLayoutCreateInfo ds_create_info = {
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
.bindingCount = 1,
.pBindings = (VkDescriptorSetLayoutBinding[]) {
{
.binding = 0,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.descriptorCount = 1,
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
.pImmutableSamplers = NULL
},
}
};
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
&ds_create_info,
&device->meta_state.alloc,
&device->meta_state.resolve_fragment.ds_layout);
if (result != VK_SUCCESS)
goto fail;
VkPipelineLayoutCreateInfo pl_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
.setLayoutCount = 1,
.pSetLayouts = &device->meta_state.resolve_fragment.ds_layout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8},
};
result = radv_CreatePipelineLayout(radv_device_to_handle(device),
&pl_create_info,
&device->meta_state.alloc,
&device->meta_state.resolve_fragment.p_layout);
if (result != VK_SUCCESS)
goto fail;
return VK_SUCCESS;
fail:
return result;
}
static const VkPipelineVertexInputStateCreateInfo normal_vi_create_info = {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
.vertexBindingDescriptionCount = 0,
.vertexAttributeDescriptionCount = 0,
};
static VkFormat pipeline_formats[] = {
VK_FORMAT_R8G8B8A8_UNORM,
VK_FORMAT_R8G8B8A8_UINT,
VK_FORMAT_R8G8B8A8_SINT,
VK_FORMAT_R16G16B16A16_UNORM,
VK_FORMAT_R16G16B16A16_SNORM,
VK_FORMAT_R16G16B16A16_UINT,
VK_FORMAT_R16G16B16A16_SINT,
VK_FORMAT_R32_SFLOAT,
VK_FORMAT_R32G32_SFLOAT,
VK_FORMAT_R32G32B32A32_SFLOAT
};
static VkResult
create_resolve_pipeline(struct radv_device *device,
int samples_log2,
VkFormat format)
{
VkResult result;
bool is_integer = false, is_srgb = false;
uint32_t samples = 1 << samples_log2;
unsigned fs_key = radv_format_meta_fs_key(format);
const VkPipelineVertexInputStateCreateInfo *vi_create_info;
vi_create_info = &normal_vi_create_info;
if (vk_format_is_int(format))
is_integer = true;
else if (vk_format_is_srgb(format))
is_srgb = true;
struct radv_shader_module fs = { .nir = NULL };
fs.nir = build_resolve_fragment_shader(device, is_integer, is_srgb, samples);
struct radv_shader_module vs = {
.nir = build_nir_vertex_shader(),
};
VkRenderPass *rp = is_srgb ?
&device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
&device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
assert(!*rp);
VkPipeline *pipeline = is_srgb ?
&device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
&device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
assert(!*pipeline);
VkPipelineShaderStageCreateInfo pipeline_shader_stages[] = {
{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_VERTEX_BIT,
.module = radv_shader_module_to_handle(&vs),
.pName = "main",
.pSpecializationInfo = NULL
}, {
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
.module = radv_shader_module_to_handle(&fs),
.pName = "main",
.pSpecializationInfo = NULL
},
};
result = radv_CreateRenderPass(radv_device_to_handle(device),
&(VkRenderPassCreateInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = &(VkAttachmentDescription) {
.format = format,
.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
},
.subpassCount = 1,
.pSubpasses = &(VkSubpassDescription) {
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
.inputAttachmentCount = 0,
.colorAttachmentCount = 1,
.pColorAttachments = &(VkAttachmentReference) {
.attachment = 0,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.pResolveAttachments = NULL,
.pDepthStencilAttachment = &(VkAttachmentReference) {
.attachment = VK_ATTACHMENT_UNUSED,
.layout = VK_IMAGE_LAYOUT_GENERAL,
},
.preserveAttachmentCount = 1,
.pPreserveAttachments = (uint32_t[]) { 0 },
},
.dependencyCount = 0,
}, &device->meta_state.alloc, rp);
const VkGraphicsPipelineCreateInfo vk_pipeline_info = {
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
.stageCount = ARRAY_SIZE(pipeline_shader_stages),
.pStages = pipeline_shader_stages,
.pVertexInputState = vi_create_info,
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
.primitiveRestartEnable = false,
},
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
.viewportCount = 1,
.scissorCount = 1,
},
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
.rasterizerDiscardEnable = false,
.polygonMode = VK_POLYGON_MODE_FILL,
.cullMode = VK_CULL_MODE_NONE,
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE
},
.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
.rasterizationSamples = 1,
.sampleShadingEnable = false,
.pSampleMask = (VkSampleMask[]) { UINT32_MAX },
},
.pColorBlendState = &(VkPipelineColorBlendStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkPipelineColorBlendAttachmentState []) {
{ .colorWriteMask =
VK_COLOR_COMPONENT_A_BIT |
VK_COLOR_COMPONENT_R_BIT |
VK_COLOR_COMPONENT_G_BIT |
VK_COLOR_COMPONENT_B_BIT },
}
},
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
.dynamicStateCount = 9,
.pDynamicStates = (VkDynamicState[]) {
VK_DYNAMIC_STATE_VIEWPORT,
VK_DYNAMIC_STATE_SCISSOR,
VK_DYNAMIC_STATE_LINE_WIDTH,
VK_DYNAMIC_STATE_DEPTH_BIAS,
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
VK_DYNAMIC_STATE_DEPTH_BOUNDS,
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
},
},
.flags = 0,
.layout = device->meta_state.resolve_fragment.p_layout,
.renderPass = *rp,
.subpass = 0,
};
const struct radv_graphics_pipeline_create_info radv_pipeline_info = {
.use_rectlist = true
};
result = radv_graphics_pipeline_create(radv_device_to_handle(device),
radv_pipeline_cache_to_handle(&device->meta_state.cache),
&vk_pipeline_info, &radv_pipeline_info,
&device->meta_state.alloc,
pipeline);
ralloc_free(vs.nir);
ralloc_free(fs.nir);
if (result != VK_SUCCESS)
goto fail;
return VK_SUCCESS;
fail:
ralloc_free(vs.nir);
ralloc_free(fs.nir);
return result;
}
VkResult
radv_device_init_meta_resolve_fragment_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
VkResult res;
memset(&state->resolve_fragment, 0, sizeof(state->resolve_fragment));
res = create_layout(device);
if (res != VK_SUCCESS)
return res;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
for (unsigned j = 0; j < ARRAY_SIZE(pipeline_formats); ++j) {
res = create_resolve_pipeline(device, i, pipeline_formats[j]);
}
res = create_resolve_pipeline(device, i, VK_FORMAT_R8G8B8A8_SRGB);
}
return res;
}
void
radv_device_finish_meta_resolve_fragment_state(struct radv_device *device)
{
struct radv_meta_state *state = &device->meta_state;
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
for (unsigned j = 0; j < NUM_META_FS_KEYS; ++j) {
radv_DestroyRenderPass(radv_device_to_handle(device),
state->resolve_fragment.rc[i].render_pass[j],
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->resolve_fragment.rc[i].pipeline[j],
&state->alloc);
}
radv_DestroyRenderPass(radv_device_to_handle(device),
state->resolve_fragment.rc[i].srgb_render_pass,
&state->alloc);
radv_DestroyPipeline(radv_device_to_handle(device),
state->resolve_fragment.rc[i].srgb_pipeline,
&state->alloc);
}
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
state->resolve_fragment.ds_layout,
&state->alloc);
radv_DestroyPipelineLayout(radv_device_to_handle(device),
state->resolve_fragment.p_layout,
&state->alloc);
}
static void
emit_resolve(struct radv_cmd_buffer *cmd_buffer,
struct radv_image_view *src_iview,
struct radv_image_view *dest_iview,
const VkOffset2D *src_offset,
const VkOffset2D *dest_offset,
const VkExtent2D *resolve_extent)
{
struct radv_device *device = cmd_buffer->device;
VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
const uint32_t samples = src_iview->image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
radv_meta_push_descriptor_set(cmd_buffer,
VK_PIPELINE_BIND_POINT_GRAPHICS,
cmd_buffer->device->meta_state.resolve_fragment.p_layout,
0, /* set */
1, /* descriptorWriteCount */
(VkWriteDescriptorSet[]) {
{
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
.dstBinding = 0,
.dstArrayElement = 0,
.descriptorCount = 1,
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
.pImageInfo = (VkDescriptorImageInfo[]) {
{
.sampler = VK_NULL_HANDLE,
.imageView = radv_image_view_to_handle(src_iview),
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
},
}
},
});
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
unsigned push_constants[2] = {
src_offset->x,
src_offset->y,
};
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
device->meta_state.resolve_fragment.p_layout,
VK_SHADER_STAGE_FRAGMENT_BIT, 0, 8,
push_constants);
unsigned fs_key = radv_format_meta_fs_key(dest_iview->vk_format);
VkPipeline pipeline_h = vk_format_is_srgb(dest_iview->vk_format) ?
device->meta_state.resolve_fragment.rc[samples_log2].srgb_pipeline :
device->meta_state.resolve_fragment.rc[samples_log2].pipeline[fs_key];
radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
pipeline_h);
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkViewport) {
.x = dest_offset->x,
.y = dest_offset->y,
.width = resolve_extent->width,
.height = resolve_extent->height,
.minDepth = 0.0f,
.maxDepth = 1.0f
});
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &(VkRect2D) {
.offset = *dest_offset,
.extent = *resolve_extent,
});
radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
}
void radv_meta_resolve_fragment_image(struct radv_cmd_buffer *cmd_buffer,
struct radv_image *src_image,
VkImageLayout src_image_layout,
struct radv_image *dest_image,
VkImageLayout dest_image_layout,
uint32_t region_count,
const VkImageResolve *regions)
{
struct radv_device *device = cmd_buffer->device;
struct radv_meta_saved_state saved_state;
const uint32_t samples = src_image->info.samples;
const uint32_t samples_log2 = ffs(samples) - 1;
unsigned fs_key = radv_format_meta_fs_key(dest_image->vk_format);
VkRenderPass rp;
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = &regions[r];
const uint32_t src_base_layer =
radv_meta_get_iview_layer(src_image, &region->srcSubresource,
&region->srcOffset);
VkImageSubresourceRange range;
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
range.baseMipLevel = region->srcSubresource.mipLevel;
range.levelCount = 1;
range.baseArrayLayer = src_base_layer;
range.layerCount = region->srcSubresource.layerCount;
radv_fast_clear_flush_image_inplace(cmd_buffer, src_image, &range);
}
rp = vk_format_is_srgb(dest_image->vk_format) ?
device->meta_state.resolve_fragment.rc[samples_log2].srgb_render_pass :
device->meta_state.resolve_fragment.rc[samples_log2].render_pass[fs_key];
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
for (uint32_t r = 0; r < region_count; ++r) {
const VkImageResolve *region = &regions[r];
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
const uint32_t src_base_layer =
radv_meta_get_iview_layer(src_image, &region->srcSubresource,
&region->srcOffset);
const uint32_t dest_base_layer =
radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
&region->dstOffset);
const struct VkExtent3D extent =
radv_sanitize_image_extent(src_image->type, region->extent);
const struct VkOffset3D srcOffset =
radv_sanitize_image_offset(src_image->type, region->srcOffset);
const struct VkOffset3D dstOffset =
radv_sanitize_image_offset(dest_image->type, region->dstOffset);
for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
++layer) {
struct radv_image_view src_iview;
radv_image_view_init(&src_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(src_image),
.viewType = radv_meta_get_view_type(src_image),
.format = src_image->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = region->srcSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = src_base_layer + layer,
.layerCount = 1,
},
});
struct radv_image_view dest_iview;
radv_image_view_init(&dest_iview, cmd_buffer->device,
&(VkImageViewCreateInfo) {
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = radv_image_to_handle(dest_image),
.viewType = radv_meta_get_view_type(dest_image),
.format = dest_image->vk_format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = region->dstSubresource.mipLevel,
.levelCount = 1,
.baseArrayLayer = dest_base_layer + layer,
.layerCount = 1,
},
});
VkFramebuffer fb;
radv_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device),
&(VkFramebufferCreateInfo) {
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
.attachmentCount = 1,
.pAttachments = (VkImageView[]) {
radv_image_view_to_handle(&dest_iview),
},
.width = extent.width,
.height = extent.height,
.layers = 1
}, &cmd_buffer->pool->alloc, &fb);
radv_CmdBeginRenderPass(radv_cmd_buffer_to_handle(cmd_buffer),
&(VkRenderPassBeginInfo) {
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
.renderPass = rp,
.framebuffer = fb,
.renderArea = {
.offset = { dstOffset.x, dstOffset.y, },
.extent = { extent.width, extent.height },
},
.clearValueCount = 0,
.pClearValues = NULL,
}, VK_SUBPASS_CONTENTS_INLINE);
emit_resolve(cmd_buffer,
&src_iview,
&dest_iview,
&(VkOffset2D) { srcOffset.x, srcOffset.y },
&(VkOffset2D) { dstOffset.x, dstOffset.y },
&(VkExtent2D) { extent.width, extent.height });
radv_CmdEndRenderPass(radv_cmd_buffer_to_handle(cmd_buffer));
radv_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device), fb, &cmd_buffer->pool->alloc);
}
}
radv_meta_restore(&saved_state, cmd_buffer);
}
/**
* Emit any needed resolves for the current subpass.
*/
void
radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer)
{
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
struct radv_meta_saved_state saved_state;
/* FINISHME(perf): Skip clears for resolve attachments.
*
* From the Vulkan 1.0 spec:
*
* If the first use of an attachment in a render pass is as a resolve
* attachment, then the loadOp is effectively ignored as the resolve is
* guaranteed to overwrite all pixels in the render area.
*/
if (!subpass->has_resolve)
return;
radv_meta_save_graphics_reset_vport_scissor_novertex(&saved_state, cmd_buffer);
for (uint32_t i = 0; i < subpass->color_count; ++i) {
VkAttachmentReference src_att = subpass->color_attachments[i];
VkAttachmentReference dest_att = subpass->resolve_attachments[i];
struct radv_image_view *dest_iview = cmd_buffer->state.framebuffer->attachments[dest_att.attachment].attachment;
struct radv_image *dst_img = dest_iview->image;
struct radv_image_view *src_iview = cmd_buffer->state.framebuffer->attachments[src_att.attachment].attachment;
if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
continue;
if (dst_img->surface.dcc_size) {
radv_initialize_dcc(cmd_buffer, dst_img, 0xffffffff);
cmd_buffer->state.attachments[dest_att.attachment].current_layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
}
{
VkImageSubresourceRange range;
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
range.baseMipLevel = 0;
range.levelCount = 1;
range.baseArrayLayer = 0;
range.layerCount = 1;
radv_fast_clear_flush_image_inplace(cmd_buffer, src_iview->image, &range);
}
struct radv_subpass resolve_subpass = {
.color_count = 1,
.color_attachments = (VkAttachmentReference[]) { dest_att },
.depth_stencil_attachment = { .attachment = VK_ATTACHMENT_UNUSED },
};
radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass, false);
/* Subpass resolves must respect the render area. We can ignore the
* render area here because vkCmdBeginRenderPass set the render area
* with 3DSTATE_DRAWING_RECTANGLE.
*
* XXX(chadv): Does the hardware really respect
* 3DSTATE_DRAWING_RECTANGLE when draing a 3DPRIM_RECTLIST?
*/
emit_resolve(cmd_buffer,
src_iview,
dest_iview,
&(VkOffset2D) { 0, 0 },
&(VkOffset2D) { 0, 0 },
&(VkExtent2D) { fb->width, fb->height });
}
cmd_buffer->state.subpass = subpass;
radv_meta_restore(&saved_state, cmd_buffer);
}

View File

@@ -26,7 +26,6 @@
*/
#include "util/mesa-sha1.h"
#include "util/u_atomic.h"
#include "radv_private.h"
#include "nir/nir.h"
#include "nir/nir_builder.h"
@@ -36,7 +35,6 @@
#include <llvm-c/TargetMachine.h>
#include "sid.h"
#include "gfx9d.h"
#include "r600d_common.h"
#include "ac_binary.h"
#include "ac_llvm_util.h"
@@ -266,7 +264,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
}
/* Vulkan uses the separate-shader linking model */
nir->info.separate_shader = true;
nir->info->separate_shader = true;
nir_shader_gather_info(nir, entry_point->impl);
@@ -375,7 +373,7 @@ static void radv_dump_pipeline_stats(struct radv_device *device, struct radv_pip
void radv_shader_variant_destroy(struct radv_device *device,
struct radv_shader_variant *variant)
{
if (!p_atomic_dec_zero(&variant->ref_count))
if (__sync_fetch_and_sub(&variant->ref_count, 1) != 1)
return;
device->ws->buffer_destroy(variant->bo);
@@ -541,8 +539,8 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
if (module->nir)
_mesa_sha1_compute(module->nir->info.name,
strlen(module->nir->info.name),
_mesa_sha1_compute(module->nir->info->name,
strlen(module->nir->info->name),
module->sha1);
radv_hash_shader(sha1, module, entrypoint, spec_info, layout, key, 0);
@@ -606,14 +604,11 @@ radv_pipeline_compile(struct radv_pipeline *pipeline,
}
static union ac_shader_variant_key
radv_compute_tes_key(bool as_es, bool export_prim_id)
radv_compute_tes_key(bool as_es)
{
union ac_shader_variant_key key;
memset(&key, 0, sizeof(key));
key.tes.as_es = as_es;
/* export prim id only happens when no geom shader */
if (!as_es)
key.tes.export_prim_id = export_prim_id;
return key;
}
@@ -644,15 +639,13 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
nir_shader *tes_nir, *tcs_nir;
void *tes_code = NULL, *tcs_code = NULL;
unsigned tes_code_size = 0, tcs_code_size = 0;
union ac_shader_variant_key tes_key;
union ac_shader_variant_key tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline));
union ac_shader_variant_key tcs_key;
bool dump = (pipeline->device->debug_flags & RADV_DEBUG_DUMP_SHADERS);
tes_key = radv_compute_tes_key(radv_pipeline_has_gs(pipeline),
pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input);
if (tes_module->nir)
_mesa_sha1_compute(tes_module->nir->info.name,
strlen(tes_module->nir->info.name),
_mesa_sha1_compute(tes_module->nir->info->name,
strlen(tes_module->nir->info->name),
tes_module->sha1);
radv_hash_shader(tes_sha1, tes_module, tes_entrypoint, tes_spec_info, layout, &tes_key, 0);
@@ -664,8 +657,8 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
tcs_key = radv_compute_tcs_key(tes_variant->info.tes.primitive_mode, input_vertices);
if (tcs_module->nir)
_mesa_sha1_compute(tcs_module->nir->info.name,
strlen(tcs_module->nir->info.name),
_mesa_sha1_compute(tcs_module->nir->info->name,
strlen(tcs_module->nir->info->name),
tcs_module->sha1);
radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
@@ -694,16 +687,16 @@ radv_tess_pipeline_compile(struct radv_pipeline *pipeline,
return;
nir_lower_tes_patch_vertices(tes_nir,
tcs_nir->info.tess.tcs_vertices_out);
tcs_nir->info->tess.tcs_vertices_out);
tes_variant = radv_shader_variant_create(pipeline->device, tes_nir,
layout, &tes_key, &tes_code,
&tes_code_size, dump);
tcs_key = radv_compute_tcs_key(tes_nir->info.tess.primitive_mode, input_vertices);
tcs_key = radv_compute_tcs_key(tes_nir->info->tess.primitive_mode, input_vertices);
if (tcs_module->nir)
_mesa_sha1_compute(tcs_module->nir->info.name,
strlen(tcs_module->nir->info.name),
_mesa_sha1_compute(tcs_module->nir->info->name,
strlen(tcs_module->nir->info->name),
tcs_module->sha1);
radv_hash_shader(tcs_sha1, tcs_module, tcs_entrypoint, tcs_spec_info, layout, &tcs_key, 0);
@@ -1338,12 +1331,11 @@ radv_pipeline_init_multisample_state(struct radv_pipeline *pipeline,
S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) |
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1);
ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pipeline->device->physical_device->rad_info.chip_class >= GFX9);
if (ms->num_samples > 1) {
unsigned log_samples = util_logbase2(ms->num_samples);
unsigned log_ps_iter_samples = util_logbase2(util_next_power_of_two(ps_iter_samples));
ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1);
ms->pa_sc_mode_cntl_0 = S_028A48_MSAA_ENABLE(1);
ms->pa_sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1); /* CM_R_028BDC_PA_SC_LINE_CNTL */
ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_samples) |
S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) |
@@ -1612,7 +1604,7 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
}
static union ac_shader_variant_key
radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls, bool export_prim_id)
radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es, bool as_ls)
{
union ac_shader_variant_key key;
const VkPipelineVertexInputStateCreateInfo *input_state =
@@ -1622,7 +1614,6 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es,
key.vs.instance_rate_inputs = 0;
key.vs.as_es = as_es;
key.vs.as_ls = as_ls;
key.vs.export_prim_id = export_prim_id;
for (unsigned i = 0; i < input_state->vertexAttributeDescriptionCount; ++i) {
unsigned binding;
@@ -1864,24 +1855,6 @@ static uint32_t si_vgt_gs_mode(struct radv_shader_variant *gs)
S_028A40_GS_WRITE_OPTIMIZE(1);
}
static void calculate_vgt_gs_mode(struct radv_pipeline *pipeline)
{
struct radv_shader_variant *vs;
vs = radv_pipeline_has_gs(pipeline) ? pipeline->gs_copy_shader : (radv_pipeline_has_tess(pipeline) ? pipeline->shaders[MESA_SHADER_TESS_EVAL] : pipeline->shaders[MESA_SHADER_VERTEX]);
struct ac_vs_output_info *outinfo = &vs->info.vs.outinfo;
pipeline->graphics.vgt_primitiveid_en = false;
pipeline->graphics.vgt_gs_mode = 0;
if (radv_pipeline_has_gs(pipeline)) {
pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
} else if (outinfo->export_prim_id) {
pipeline->graphics.vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A);
pipeline->graphics.vgt_primitiveid_en = true;
}
}
static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
{
struct radv_shader_variant *vs;
@@ -1913,11 +1886,9 @@ static void calculate_pa_cl_vs_out_cntl(struct radv_pipeline *pipeline)
static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
{
uint32_t ps_input_cntl;
if (offset <= AC_EXP_PARAM_OFFSET_31) {
if (offset <= AC_EXP_PARAM_OFFSET_31)
ps_input_cntl = S_028644_OFFSET(offset);
if (flat_shade)
ps_input_cntl |= S_028644_FLAT_SHADE(1);
} else {
else {
/* The input is a DEFAULT_VAL constant. */
assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 &&
offset <= AC_EXP_PARAM_DEFAULT_VAL_1111);
@@ -1925,6 +1896,8 @@ static uint32_t offset_to_ps_input(uint32_t offset, bool flat_shade)
ps_input_cntl = S_028644_OFFSET(0x20) |
S_028644_DEFAULT_VAL(offset);
}
if (flat_shade)
ps_input_cntl |= S_028644_FLAT_SHADE(1);
return ps_input_cntl;
}
@@ -2013,10 +1986,62 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
radv_pipeline_init_blend_state(pipeline, pCreateInfo, extra);
if (modules[MESA_SHADER_VERTEX]) {
bool as_es = false;
bool as_ls = false;
if (modules[MESA_SHADER_TESS_CTRL])
as_ls = true;
else if (modules[MESA_SHADER_GEOMETRY])
as_es = true;
union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls);
pipeline->shaders[MESA_SHADER_VERTEX] =
radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX],
pStages[MESA_SHADER_VERTEX]->pName,
MESA_SHADER_VERTEX,
pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
pipeline->layout, &key);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
}
if (modules[MESA_SHADER_GEOMETRY]) {
union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false);
pipeline->shaders[MESA_SHADER_GEOMETRY] =
radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY],
pStages[MESA_SHADER_GEOMETRY]->pName,
MESA_SHADER_GEOMETRY,
pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo,
pipeline->layout, &key);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
pipeline->graphics.vgt_gs_mode = si_vgt_gs_mode(pipeline->shaders[MESA_SHADER_GEOMETRY]);
} else
pipeline->graphics.vgt_gs_mode = 0;
if (modules[MESA_SHADER_TESS_EVAL]) {
assert(modules[MESA_SHADER_TESS_CTRL]);
radv_tess_pipeline_compile(pipeline,
cache,
modules[MESA_SHADER_TESS_CTRL],
modules[MESA_SHADER_TESS_EVAL],
pStages[MESA_SHADER_TESS_CTRL]->pName,
pStages[MESA_SHADER_TESS_EVAL]->pName,
pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo,
pipeline->layout,
pCreateInfo->pTessellationState->patchControlPoints);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_TESS_EVAL) |
mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL);
}
if (!modules[MESA_SHADER_FRAGMENT]) {
nir_builder fs_b;
nir_builder_init_simple_shader(&fs_b, NULL, MESA_SHADER_FRAGMENT, NULL);
fs_b.shader->info.name = ralloc_strdup(fs_b.shader, "noop_fs");
fs_b.shader->info->name = ralloc_strdup(fs_b.shader, "noop_fs");
fs_m.nir = fs_b.shader;
modules[MESA_SHADER_FRAGMENT] = &fs_m;
}
@@ -2040,58 +2065,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
if (fs_m.nir)
ralloc_free(fs_m.nir);
if (modules[MESA_SHADER_VERTEX]) {
bool as_es = false;
bool as_ls = false;
bool export_prim_id = false;
if (modules[MESA_SHADER_TESS_CTRL])
as_ls = true;
else if (modules[MESA_SHADER_GEOMETRY])
as_es = true;
else if (pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
export_prim_id = true;
union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, as_es, as_ls, export_prim_id);
pipeline->shaders[MESA_SHADER_VERTEX] =
radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_VERTEX],
pStages[MESA_SHADER_VERTEX]->pName,
MESA_SHADER_VERTEX,
pStages[MESA_SHADER_VERTEX]->pSpecializationInfo,
pipeline->layout, &key);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_VERTEX);
}
if (modules[MESA_SHADER_GEOMETRY]) {
union ac_shader_variant_key key = radv_compute_vs_key(pCreateInfo, false, false, false);
pipeline->shaders[MESA_SHADER_GEOMETRY] =
radv_pipeline_compile(pipeline, cache, modules[MESA_SHADER_GEOMETRY],
pStages[MESA_SHADER_GEOMETRY]->pName,
MESA_SHADER_GEOMETRY,
pStages[MESA_SHADER_GEOMETRY]->pSpecializationInfo,
pipeline->layout, &key);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_GEOMETRY);
}
if (modules[MESA_SHADER_TESS_EVAL]) {
assert(modules[MESA_SHADER_TESS_CTRL]);
radv_tess_pipeline_compile(pipeline,
cache,
modules[MESA_SHADER_TESS_CTRL],
modules[MESA_SHADER_TESS_EVAL],
pStages[MESA_SHADER_TESS_CTRL]->pName,
pStages[MESA_SHADER_TESS_EVAL]->pName,
pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo,
pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo,
pipeline->layout,
pCreateInfo->pTessellationState->patchControlPoints);
pipeline->active_stages |= mesa_to_vk_shader_stage(MESA_SHADER_TESS_EVAL) |
mesa_to_vk_shader_stage(MESA_SHADER_TESS_CTRL);
}
radv_pipeline_init_depth_stencil_state(pipeline, pCreateInfo, extra);
radv_pipeline_init_raster_state(pipeline, pCreateInfo);
radv_pipeline_init_multisample_state(pipeline, pCreateInfo);
@@ -2155,7 +2128,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
ps->info.fs.writes_z ? V_028710_SPI_SHADER_32_R :
V_028710_SPI_SHADER_ZERO;
calculate_vgt_gs_mode(pipeline);
calculate_pa_cl_vs_out_cntl(pipeline);
calculate_ps_inputs(pipeline);
@@ -2176,15 +2148,10 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
else
stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS);
} else if (radv_pipeline_has_gs(pipeline))
stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) |
S_028B54_GS_EN(1) |
S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER);
if (device->physical_device->rad_info.chip_class >= GFX9)
stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2);
pipeline->graphics.vgt_shader_stages_en = stages;
if (radv_pipeline_has_gs(pipeline))
@@ -2232,16 +2199,6 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
pipeline->binding_stride[desc->binding] = desc->stride;
}
struct ac_userdata_info *loc = radv_lookup_user_sgpr(pipeline, MESA_SHADER_VERTEX,
AC_UD_VS_BASE_VERTEX_START_INSTANCE);
if (loc->sgpr_idx != -1) {
pipeline->graphics.vtx_base_sgpr = radv_shader_stage_to_user_data_0(MESA_SHADER_VERTEX, radv_pipeline_has_gs(pipeline), radv_pipeline_has_tess(pipeline));
pipeline->graphics.vtx_base_sgpr += loc->sgpr_idx * 4;
if (pipeline->shaders[MESA_SHADER_VERTEX]->info.info.vs.needs_draw_id)
pipeline->graphics.vtx_emit_num = 3;
else
pipeline->graphics.vtx_emit_num = 2;
}
if (device->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) {
radv_dump_pipeline_stats(device, pipeline);
}

View File

@@ -23,7 +23,6 @@
#include "util/mesa-sha1.h"
#include "util/debug.h"
#include "util/u_atomic.h"
#include "radv_private.h"
#include "ac_nir_to_llvm.h"
@@ -185,7 +184,7 @@ radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
entry->variant = variant;
}
p_atomic_inc(&entry->variant->ref_count);
__sync_fetch_and_add(&entry->variant->ref_count, 1);
return entry->variant;
}
@@ -277,7 +276,7 @@ radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
} else {
entry->variant = variant;
}
p_atomic_inc(&variant->ref_count);
__sync_fetch_and_add(&variant->ref_count, 1);
pthread_mutex_unlock(&cache->mutex);
return variant;
}
@@ -297,7 +296,7 @@ radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
entry->rsrc2 = variant->rsrc2;
entry->code_size = code_size;
entry->variant = variant;
p_atomic_inc(&variant->ref_count);
__sync_fetch_and_add(&variant->ref_count, 1);
radv_pipeline_cache_add_entry(cache, entry);

View File

@@ -47,14 +47,12 @@
#include "compiler/shader_enums.h"
#include "util/macros.h"
#include "util/list.h"
#include "util/vk_alloc.h"
#include "main/macros.h"
#include "vk_alloc.h"
#include "radv_radeon_winsys.h"
#include "ac_binary.h"
#include "ac_nir_to_llvm.h"
#include "ac_gpu_info.h"
#include "ac_surface.h"
#include "radv_debug.h"
#include "radv_descriptor_set.h"
@@ -268,14 +266,10 @@ struct radv_physical_device {
char path[20];
const char * name;
uint8_t uuid[VK_UUID_SIZE];
uint8_t device_uuid[VK_UUID_SIZE];
int local_fd;
struct wsi_device wsi_device;
struct radv_extensions extensions;
bool has_rbplus; /* if RB+ register exist */
bool rbplus_allowed; /* if RB+ is allowed */
};
struct radv_instance {
@@ -288,7 +282,6 @@ struct radv_instance {
struct radv_physical_device physicalDevices[RADV_MAX_DRM_DEVICES];
uint64_t debug_flags;
uint64_t perftest_flags;
};
VkResult radv_init_wsi(struct radv_physical_device *physical_device);
@@ -350,8 +343,6 @@ struct radv_meta_state {
struct radv_pipeline *depthstencil_pipeline[NUM_DEPTH_CLEAR_PIPELINES];
} clear[1 + MAX_SAMPLES_LOG2];
VkPipelineLayout clear_color_p_layout;
VkPipelineLayout clear_depth_p_layout;
struct {
VkRenderPass render_pass[NUM_META_FS_KEYS];
@@ -424,22 +415,9 @@ struct radv_meta_state {
struct {
VkPipeline pipeline;
VkPipeline i_pipeline;
VkPipeline srgb_pipeline;
} rc[MAX_SAMPLES_LOG2];
} resolve_compute;
struct {
VkDescriptorSetLayout ds_layout;
VkPipelineLayout p_layout;
struct {
VkRenderPass srgb_render_pass;
VkPipeline srgb_pipeline;
VkRenderPass render_pass[NUM_META_FS_KEYS];
VkPipeline pipeline[NUM_META_FS_KEYS];
} rc[MAX_SAMPLES_LOG2];
} resolve_fragment;
struct {
VkPipeline decompress_pipeline;
VkPipeline resummarize_pipeline;
@@ -751,6 +729,7 @@ struct radv_attachment_state {
struct radv_cmd_state {
uint32_t vb_dirty;
radv_cmd_dirty_mask_t dirty;
bool vertex_descriptors_dirty;
bool push_descriptors_dirty;
struct radv_pipeline * pipeline;
@@ -765,9 +744,9 @@ struct radv_cmd_state {
struct radv_descriptor_set * descriptors[MAX_SETS];
struct radv_attachment_state * attachments;
VkRect2D render_area;
struct radv_buffer * index_buffer;
uint32_t index_type;
uint64_t index_va;
uint32_t max_index_count;
uint32_t index_offset;
int32_t last_primitive_reset_en;
uint32_t last_primitive_reset_index;
enum radv_cmd_flush_bits flush_bits;
@@ -825,9 +804,6 @@ struct radv_cmd_buffer {
bool record_fail;
int ring_offsets_idx; /* just used for verification */
uint32_t gfx9_fence_offset;
struct radeon_winsys_bo *gfx9_fence_bo;
uint32_t gfx9_fence_idx;
};
struct radv_image;
@@ -847,23 +823,14 @@ void si_write_scissors(struct radeon_winsys_cs *cs, int first,
uint32_t si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
bool instanced_draw, bool indirect_draw,
uint32_t draw_vertex_count);
void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
bool is_mec,
unsigned event, unsigned event_flags,
unsigned data_sel,
uint64_t va,
uint32_t old_fence,
uint32_t new_fence);
void si_emit_wait_fence(struct radeon_winsys_cs *cs,
uint64_t va, uint32_t ref,
uint32_t mask);
void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
uint32_t *fence_ptr, uint64_t va,
bool is_mec,
enum radv_cmd_flush_bits flush_bits);
enum chip_class chip_class,
bool is_mec,
enum radv_cmd_flush_bits flush_bits);
void si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
bool is_mec,
enum radv_cmd_flush_bits flush_bits);
void si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer);
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
uint64_t src_va, uint64_t dest_va,
@@ -894,8 +861,6 @@ void
radv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer);
void radv_cmd_buffer_resolve_subpass_fs(struct radv_cmd_buffer *cmd_buffer);
void radv_cayman_emit_msaa_sample_locs(struct radeon_winsys_cs *cs, int nr_samples);
unsigned radv_cayman_get_maxdist(int log_samples);
void radv_device_init_msaa(struct radv_device *device);
@@ -1071,7 +1036,6 @@ struct radv_pipeline {
unsigned prim;
unsigned gs_out;
uint32_t vgt_gs_mode;
bool vgt_primitiveid_en;
bool prim_restart_enable;
unsigned esgs_ring_size;
unsigned gsvs_ring_size;
@@ -1079,8 +1043,6 @@ struct radv_pipeline {
uint32_t ps_input_cntl_num;
uint32_t pa_cl_vs_out_cntl;
uint32_t vgt_shader_stages_en;
uint32_t vtx_base_sgpr;
uint8_t vtx_emit_num;
struct radv_prim_vertex_count prim_vertex_count;
bool can_use_guardband;
} graphics;
@@ -1100,11 +1062,6 @@ static inline bool radv_pipeline_has_tess(struct radv_pipeline *pipeline)
return pipeline->shaders[MESA_SHADER_TESS_EVAL] ? true : false;
}
uint32_t radv_shader_stage_to_user_data_0(gl_shader_stage stage, bool has_gs, bool has_tess);
struct ac_userdata_info *radv_lookup_user_sgpr(struct radv_pipeline *pipeline,
gl_shader_stage stage,
int idx);
struct radv_graphics_pipeline_create_info {
bool use_rectlist;
bool db_depth_clear;
@@ -1189,7 +1146,7 @@ struct radv_image {
*/
VkFormat vk_format;
VkImageAspectFlags aspects;
struct ac_surf_info info;
struct radeon_surf_info info;
VkImageUsageFlags usage; /**< Superset of VkImageCreateInfo::usage. */
VkImageTiling tiling; /** VkImageCreateInfo::tiling */
VkImageCreateFlags flags; /** VkImageCreateInfo::flags */
@@ -1212,22 +1169,12 @@ struct radv_image {
uint32_t clear_value_offset;
};
/* Whether the image has a htile that is known consistent with the contents of
* the image. */
bool radv_layout_has_htile(const struct radv_image *image,
VkImageLayout layout,
unsigned queue_mask);
/* Whether the image has a htile that is known consistent with the contents of
* the image and is allowed to be in compressed form.
*
* If this is false reads that don't use the htile should be able to return
* correct results.
*/
VkImageLayout layout);
bool radv_layout_is_htile_compressed(const struct radv_image *image,
VkImageLayout layout,
unsigned queue_mask);
VkImageLayout layout);
bool radv_layout_can_expclear(const struct radv_image *image,
VkImageLayout layout);
bool radv_layout_can_fast_clear(const struct radv_image *image,
VkImageLayout layout,
unsigned queue_mask);
@@ -1275,6 +1222,7 @@ struct radv_image_view {
struct radv_image_create_info {
const VkImageCreateInfo *vk_info;
uint32_t stride;
bool scanout;
};
@@ -1285,8 +1233,11 @@ VkResult radv_image_create(VkDevice _device,
void radv_image_view_init(struct radv_image_view *view,
struct radv_device *device,
const VkImageViewCreateInfo* pCreateInfo);
const VkImageViewCreateInfo* pCreateInfo,
struct radv_cmd_buffer *cmd_buffer,
VkImageUsageFlags usage_mask);
void radv_image_set_optimal_micro_tile_mode(struct radv_device *device,
struct radv_image *image, uint32_t micro_tile_mode);
struct radv_buffer_view {
struct radeon_winsys_bo *bo;
VkFormat vk_format;
@@ -1346,41 +1297,37 @@ struct radv_sampler {
};
struct radv_color_buffer_info {
uint64_t cb_color_base;
uint64_t cb_color_cmask;
uint64_t cb_color_fmask;
uint64_t cb_dcc_base;
uint32_t cb_color_base;
uint32_t cb_color_pitch;
uint32_t cb_color_slice;
uint32_t cb_color_view;
uint32_t cb_color_info;
uint32_t cb_color_attrib;
uint32_t cb_color_attrib2;
uint32_t cb_dcc_control;
uint32_t cb_color_cmask;
uint32_t cb_color_cmask_slice;
uint32_t cb_color_fmask;
uint32_t cb_color_fmask_slice;
uint32_t cb_clear_value0;
uint32_t cb_clear_value1;
uint32_t cb_dcc_base;
uint32_t micro_tile_mode;
uint32_t gfx9_epitch;
};
struct radv_ds_buffer_info {
uint64_t db_z_read_base;
uint64_t db_stencil_read_base;
uint64_t db_z_write_base;
uint64_t db_stencil_write_base;
uint64_t db_htile_data_base;
uint32_t db_depth_info;
uint32_t db_z_info;
uint32_t db_stencil_info;
uint32_t db_z_read_base;
uint32_t db_stencil_read_base;
uint32_t db_z_write_base;
uint32_t db_stencil_write_base;
uint32_t db_depth_view;
uint32_t db_depth_size;
uint32_t db_depth_slice;
uint32_t db_htile_surface;
uint32_t db_htile_data_base;
uint32_t pa_su_poly_offset_db_fmt_cntl;
uint32_t db_z_info2;
uint32_t db_stencil_info2;
float offset_scale;
};

View File

@@ -44,6 +44,11 @@ static unsigned get_max_db(struct radv_device *device)
unsigned num_db = device->physical_device->rad_info.num_render_backends;
MAYBE_UNUSED unsigned rb_mask = device->physical_device->rad_info.enabled_rb_mask;
if (device->physical_device->rad_info.chip_class == SI)
num_db = 8;
else
num_db = MAX2(8, num_db);
/* Otherwise we need to change the query reset procedure */
assert(rb_mask == ((1ull << num_db) - 1));
@@ -72,8 +77,6 @@ static struct nir_ssa_def *
radv_load_push_int(nir_builder *b, unsigned offset, const char *name)
{
nir_intrinsic_instr *flags = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
nir_intrinsic_set_base(flags, 0);
nir_intrinsic_set_range(flags, 16);
flags->src[0] = nir_src_for_ssa(nir_imm_int(b, offset));
flags->num_components = 1;
nir_ssa_dest_init(&flags->instr, &flags->dest, 1, 32, name);
@@ -122,10 +125,10 @@ build_occlusion_query_shader(struct radv_device *device) {
*/
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "occlusion_query");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "occlusion_query");
b.shader->info->cs.local_size[0] = 64;
b.shader->info->cs.local_size[1] = 1;
b.shader->info->cs.local_size[2] = 1;
nir_variable *result = nir_local_variable_create(b.impl, glsl_uint64_t_type(), "result");
nir_variable *outer_counter = nir_local_variable_create(b.impl, glsl_int_type(), "outer_counter");
@@ -155,9 +158,9 @@ build_occlusion_query_shader(struct radv_device *device) {
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
global_id = nir_channel(&b, global_id, 0); // We only care about x here.
@@ -317,10 +320,10 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
*/
nir_builder b;
nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
b.shader->info.name = ralloc_strdup(b.shader, "pipeline_statistics_query");
b.shader->info.cs.local_size[0] = 64;
b.shader->info.cs.local_size[1] = 1;
b.shader->info.cs.local_size[2] = 1;
b.shader->info->name = ralloc_strdup(b.shader, "pipeline_statistics_query");
b.shader->info->cs.local_size[0] = 64;
b.shader->info->cs.local_size[1] = 1;
b.shader->info->cs.local_size[2] = 1;
nir_variable *output_offset = nir_local_variable_create(b.impl, glsl_int_type(), "output_offset");
@@ -347,9 +350,9 @@ build_pipeline_statistics_query_shader(struct radv_device *device) {
nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
nir_ssa_def *block_size = nir_imm_ivec4(&b,
b.shader->info.cs.local_size[0],
b.shader->info.cs.local_size[1],
b.shader->info.cs.local_size[2], 0);
b.shader->info->cs.local_size[0],
b.shader->info->cs.local_size[1],
b.shader->info->cs.local_size[2], 0);
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
global_id = nir_channel(&b, global_id, 0); // We only care about x here.
@@ -609,10 +612,12 @@ VkResult radv_device_init_meta_query_state(struct radv_device *device)
radv_pipeline_cache_to_handle(&device->meta_state.cache),
1, &pipeline_statistics_vk_pipeline_info, NULL,
&device->meta_state.query.pipeline_statistics_query_pipeline);
fail:
if (result != VK_SUCCESS)
radv_device_finish_meta_query_state(device);
goto fail;
return VK_SUCCESS;
fail:
radv_device_finish_meta_query_state(device);
ralloc_free(occlusion_cs.nir);
ralloc_free(pipeline_statistics_cs.nir);
return result;
@@ -992,7 +997,13 @@ void radv_CmdCopyQueryPoolResults(
uint64_t avail_va = va + pool->availability_offset + 4 * query;
/* This waits on the ME. All copies below are done on the ME */
si_emit_wait_fence(cs, avail_va, 1, 0xffffffff);
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
radeon_emit(cs, avail_va);
radeon_emit(cs, avail_va >> 32);
radeon_emit(cs, 1); /* reference value */
radeon_emit(cs, 0xffffffff); /* mask */
radeon_emit(cs, 4); /* poll interval */
}
}
radv_query_shader(cmd_buffer, cmd_buffer->device->meta_state.query.pipeline_statistics_query_pipeline,
@@ -1015,7 +1026,13 @@ void radv_CmdCopyQueryPoolResults(
uint64_t avail_va = va + pool->availability_offset + 4 * query;
/* This waits on the ME. All copies below are done on the ME */
si_emit_wait_fence(cs, avail_va, 1, 0xffffffff);
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
radeon_emit(cs, avail_va);
radeon_emit(cs, avail_va >> 32);
radeon_emit(cs, 1); /* reference value */
radeon_emit(cs, 0xffffffff); /* mask */
radeon_emit(cs, 4); /* poll interval */
}
if (flags & VK_QUERY_RESULT_WITH_AVAILABILITY_BIT) {
uint64_t avail_va = va + pool->availability_offset + 4 * query;
@@ -1139,7 +1156,7 @@ void radv_CmdEndQuery(
break;
case VK_QUERY_TYPE_PIPELINE_STATISTICS:
radeon_check_space(cmd_buffer->device->ws, cs, 16);
radeon_check_space(cmd_buffer->device->ws, cs, 10);
va += pipelinestat_block_size;
@@ -1148,11 +1165,13 @@ void radv_CmdEndQuery(
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
si_cs_emit_write_event_eop(cs,
cmd_buffer->device->physical_device->rad_info.chip_class,
false,
EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
EVENT_INDEX(5));
radeon_emit(cs, avail_va);
radeon_emit(cs, (avail_va >> 32) | EOP_DATA_SEL(1));
radeon_emit(cs, 1);
radeon_emit(cs, 0);
break;
default:
unreachable("ending unhandled query type");
@@ -1175,7 +1194,7 @@ void radv_CmdWriteTimestamp(
cmd_buffer->device->ws->cs_add_buffer(cs, pool->bo, 5);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
MAYBE_UNUSED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 14);
switch(pipelineStage) {
case VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT:
@@ -1197,16 +1216,37 @@ void radv_CmdWriteTimestamp(
radeon_emit(cs, 1);
break;
default:
si_cs_emit_write_event_eop(cs,
cmd_buffer->device->physical_device->rad_info.chip_class,
mec,
V_028A90_BOTTOM_OF_PIPE_TS, 0,
3, query_va, 0, 0);
si_cs_emit_write_event_eop(cs,
cmd_buffer->device->physical_device->rad_info.chip_class,
mec,
V_028A90_BOTTOM_OF_PIPE_TS, 0,
1, avail_va, 0, 1);
if (mec) {
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
radeon_emit(cs, 3 << 29);
radeon_emit(cs, query_va);
radeon_emit(cs, query_va >> 32);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, 5, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
radeon_emit(cs, 1 << 29);
radeon_emit(cs, avail_va);
radeon_emit(cs, avail_va >> 32);
radeon_emit(cs, 1);
radeon_emit(cs, 0);
} else {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
radeon_emit(cs, query_va);
radeon_emit(cs, (3 << 29) | ((query_va >> 32) & 0xFFFF));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
radeon_emit(cs, avail_va);
radeon_emit(cs, (1 << 29) | ((avail_va >> 32) & 0xFFFF));
radeon_emit(cs, 1);
radeon_emit(cs, 0);
}
break;
}

View File

@@ -35,10 +35,6 @@
#include "main/macros.h"
#include "amd_family.h"
struct radeon_info;
struct ac_surf_info;
struct radeon_surf;
#define FREE(x) free(x)
enum radeon_bo_domain { /* bitfield */
@@ -75,6 +71,63 @@ struct radeon_winsys_cs {
uint32_t *buf; /* The base pointer of the chunk. */
};
struct radeon_info {
/* PCI info: domain:bus:dev:func */
uint32_t pci_domain;
uint32_t pci_bus;
uint32_t pci_dev;
uint32_t pci_func;
/* Device info. */
uint32_t pci_id;
enum radeon_family family;
const char *name;
enum chip_class chip_class;
uint32_t gart_page_size;
uint64_t gart_size;
uint64_t vram_size;
uint64_t visible_vram_size;
bool has_dedicated_vram;
bool has_virtual_memory;
bool gfx_ib_pad_with_type2;
bool has_uvd;
uint32_t sdma_rings;
uint32_t compute_rings;
uint32_t vce_fw_version;
uint32_t vce_harvest_config;
uint32_t clock_crystal_freq; /* in kHz */
/* Kernel info. */
uint32_t drm_major; /* version */
uint32_t drm_minor;
uint32_t drm_patchlevel;
bool has_userptr;
/* Shader cores. */
uint32_t r600_max_quad_pipes; /* wave size / 16 */
uint32_t max_shader_clock;
uint32_t num_good_compute_units;
uint32_t max_se; /* shader engines */
uint32_t max_sh_per_se; /* shader arrays per shader engine */
/* Render backends (color + depth blocks). */
uint32_t r300_num_gb_pipes;
uint32_t r300_num_z_pipes;
uint32_t r600_gb_backend_map; /* R600 harvest config */
bool r600_gb_backend_map_valid;
uint32_t r600_num_banks;
uint32_t num_render_backends;
uint32_t num_tile_pipes; /* pipe count from PIPE_CONFIG */
uint32_t pipe_interleave_bytes;
uint32_t enabled_rb_mask; /* GCN harvest config */
/* Tile modes. */
uint32_t si_tile_mode_array[32];
uint32_t cik_macrotile_mode_array[16];
};
#define RADEON_SURF_MAX_LEVEL 32
#define RADEON_SURF_TYPE_MASK 0xFF
#define RADEON_SURF_TYPE_SHIFT 0
#define RADEON_SURF_TYPE_1D 0
@@ -85,11 +138,92 @@ struct radeon_winsys_cs {
#define RADEON_SURF_TYPE_2D_ARRAY 5
#define RADEON_SURF_MODE_MASK 0xFF
#define RADEON_SURF_MODE_SHIFT 8
#define RADEON_SURF_MODE_LINEAR_ALIGNED 1
#define RADEON_SURF_MODE_1D 2
#define RADEON_SURF_MODE_2D 3
#define RADEON_SURF_SCANOUT (1 << 16)
#define RADEON_SURF_ZBUFFER (1 << 17)
#define RADEON_SURF_SBUFFER (1 << 18)
#define RADEON_SURF_Z_OR_SBUFFER (RADEON_SURF_ZBUFFER | RADEON_SURF_SBUFFER)
#define RADEON_SURF_HAS_SBUFFER_MIPTREE (1 << 19)
#define RADEON_SURF_HAS_TILE_MODE_INDEX (1 << 20)
#define RADEON_SURF_FMASK (1 << 21)
#define RADEON_SURF_DISABLE_DCC (1 << 22)
#define RADEON_SURF_TC_COMPATIBLE_HTILE (1 << 23)
#define RADEON_SURF_GET(v, field) (((v) >> RADEON_SURF_ ## field ## _SHIFT) & RADEON_SURF_ ## field ## _MASK)
#define RADEON_SURF_SET(v, field) (((v) & RADEON_SURF_ ## field ## _MASK) << RADEON_SURF_ ## field ## _SHIFT)
#define RADEON_SURF_CLR(v, field) ((v) & ~(RADEON_SURF_ ## field ## _MASK << RADEON_SURF_ ## field ## _SHIFT))
struct radeon_surf_info {
uint32_t width;
uint32_t height;
uint32_t depth;
uint8_t samples;
uint8_t levels;
uint16_t array_size;
};
struct radeon_surf_level {
uint64_t offset;
uint64_t slice_size;
uint32_t nblk_x;
uint32_t nblk_y;
uint32_t nblk_z;
uint32_t pitch_bytes;
uint32_t mode;
bool dcc_enabled;
uint64_t dcc_offset;
uint64_t dcc_fast_clear_size;
};
/* surface defintions from the winsys */
struct radeon_surf {
/* These are inputs to the calculator. */
uint32_t blk_w;
uint32_t blk_h;
uint32_t bpe;
uint32_t flags;
/* These are return values. Some of them can be set by the caller, but
* they will be treated as hints (e.g. bankw, bankh) and might be
* changed by the calculator.
*/
uint64_t bo_size;
uint64_t bo_alignment;
/* This applies to EG and later. */
uint32_t bankw;
uint32_t bankh;
uint32_t mtilea;
uint32_t tile_split;
uint32_t stencil_tile_split;
uint64_t stencil_offset;
struct radeon_surf_level level[RADEON_SURF_MAX_LEVEL];
struct radeon_surf_level stencil_level[RADEON_SURF_MAX_LEVEL];
uint32_t tiling_index[RADEON_SURF_MAX_LEVEL];
uint32_t stencil_tiling_index[RADEON_SURF_MAX_LEVEL];
uint32_t pipe_config;
uint32_t num_banks;
uint32_t macro_tile_index;
uint32_t micro_tile_mode; /* displayable, thin, depth, rotated */
/* Whether the depth miptree or stencil miptree as used by the DB are
* adjusted from their TC compatible form to ensure depth/stencil
* compatibility. If either is true, the corresponding plane cannot be
* sampled from.
*/
bool depth_adjusted;
bool stencil_adjusted;
uint64_t dcc_size;
uint64_t dcc_alignment;
uint64_t htile_size;
uint64_t htile_slice_size;
uint64_t htile_alignment;
};
enum radeon_bo_layout {
RADEON_LAYOUT_LINEAR = 0,
RADEON_LAYOUT_TILED,
@@ -103,25 +237,16 @@ struct radeon_bo_metadata {
/* Tiling flags describing the texture layout for display code
* and DRI sharing.
*/
union {
struct {
enum radeon_bo_layout microtile;
enum radeon_bo_layout macrotile;
unsigned pipe_config;
unsigned bankw;
unsigned bankh;
unsigned tile_split;
unsigned mtilea;
unsigned num_banks;
unsigned stride;
bool scanout;
} legacy;
struct {
/* surface flags */
unsigned swizzle_mode:5;
} gfx9;
} u;
enum radeon_bo_layout microtile;
enum radeon_bo_layout macrotile;
unsigned pipe_config;
unsigned bankw;
unsigned bankh;
unsigned tile_split;
unsigned mtilea;
unsigned num_banks;
unsigned stride;
bool scanout;
/* Additional metadata associated with the buffer, in bytes.
* The maximum size is 64 * 4. This is opaque for the winsys & kernel.
@@ -208,7 +333,7 @@ struct radeon_winsys {
void (*cs_dump)(struct radeon_winsys_cs *cs, FILE* file, uint32_t trace_id);
int (*surface_init)(struct radeon_winsys *ws,
const struct ac_surf_info *surf_info,
const struct radeon_surf_info *surf_info,
struct radeon_surf *surf);
int (*surface_best)(struct radeon_winsys *ws,

View File

@@ -26,7 +26,7 @@
#include "radv_private.h"
#include "radv_meta.h"
#include "wsi_common.h"
#include "vk_util.h"
#include "util/vk_util.h"
static const struct wsi_callbacks wsi_cbs = {
.get_phys_device_format_properties = radv_GetPhysicalDeviceFormatProperties,
@@ -224,7 +224,7 @@ radv_wsi_image_create(VkDevice device_h,
*memory_p = memory_h;
*size = image->size;
*offset = image->offset;
*row_pitch = surface->u.legacy.level[0].nblk_x * surface->bpe;
*row_pitch = surface->level[0].pitch_bytes;
return VK_SUCCESS;
fail_alloc_memory:
radv_FreeMemory(device_h, memory_h, pAllocator);
@@ -438,7 +438,7 @@ VkResult radv_AcquireNextImageKHR(
VkResult result = swapchain->acquire_next_image(swapchain, timeout, semaphore,
pImageIndex);
if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) {
if (fence && result == VK_SUCCESS) {
fence->submitted = true;
fence->signalled = true;
}

View File

@@ -30,7 +30,6 @@
#include "radv_private.h"
#include "radv_cs.h"
#include "sid.h"
#include "gfx9d.h"
#include "radv_util.h"
#include "main/macros.h"
@@ -242,9 +241,6 @@ si_emit_config(struct radv_physical_device *physical_device,
radeon_set_context_reg(cs, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
radeon_set_context_reg(cs, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
radeon_set_context_reg(cs, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1);
if (physical_device->rad_info.chip_class >= GFX9)
radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF, 0);
radeon_set_context_reg(cs, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (physical_device->rad_info.chip_class < CIK)
radeon_set_config_reg(cs, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
@@ -301,7 +297,6 @@ si_emit_config(struct radv_physical_device *physical_device,
raster_config_1 = 0x0000002a;
break;
case CHIP_POLARIS11:
case CHIP_POLARIS12:
raster_config = 0x16000012;
raster_config_1 = 0x00000000;
break;
@@ -332,28 +327,24 @@ si_emit_config(struct radv_physical_device *physical_device,
raster_config_1 = 0x00000000;
break;
default:
if (physical_device->rad_info.chip_class <= VI) {
fprintf(stderr,
"radeonsi: Unknown GPU, using 0 for raster_config\n");
raster_config = 0x00000000;
raster_config_1 = 0x00000000;
}
fprintf(stderr,
"radeonsi: Unknown GPU, using 0 for raster_config\n");
raster_config = 0x00000000;
raster_config_1 = 0x00000000;
break;
}
/* Always use the default config when all backends are enabled
* (or when we failed to determine the enabled backends).
*/
if (physical_device->rad_info.chip_class <= VI) {
if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
raster_config);
if (physical_device->rad_info.chip_class >= CIK)
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
raster_config_1);
} else {
si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
}
if (!rb_mask || util_bitcount(rb_mask) >= num_rb) {
radeon_set_context_reg(cs, R_028350_PA_SC_RASTER_CONFIG,
raster_config);
if (physical_device->rad_info.chip_class >= CIK)
radeon_set_context_reg(cs, R_028354_PA_SC_RASTER_CONFIG_1,
raster_config_1);
} else {
si_write_harvested_raster_configs(physical_device, cs, raster_config, raster_config_1);
}
radeon_set_context_reg(cs, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1));
@@ -377,31 +368,22 @@ si_emit_config(struct radv_physical_device *physical_device,
S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) |
S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE));
if (physical_device->rad_info.chip_class >= GFX9) {
radeon_set_uconfig_reg(cs, R_030920_VGT_MAX_VTX_INDX, ~0);
radeon_set_uconfig_reg(cs, R_030924_VGT_MIN_VTX_INDX, 0);
radeon_set_uconfig_reg(cs, R_030928_VGT_INDX_OFFSET, 0);
} else {
radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
}
radeon_set_context_reg(cs, R_028400_VGT_MAX_VTX_INDX, ~0);
radeon_set_context_reg(cs, R_028404_VGT_MIN_VTX_INDX, 0);
radeon_set_context_reg(cs, R_028408_VGT_INDX_OFFSET, 0);
if (physical_device->rad_info.chip_class >= CIK) {
if (physical_device->rad_info.chip_class >= GFX9) {
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_CU_EN(0xffff));
} else {
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
*/
radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
S_028A44_ES_VERTS_PER_SUBGRP(64) |
S_028A44_GS_PRIMS_PER_SUBGRP(4));
}
/* If this is 0, Bonaire can hang even if GS isn't being used.
* Other chips are unaffected. These are suboptimal values,
* but we don't use on-chip GS.
*/
radeon_set_context_reg(cs, R_028A44_VGT_GS_ONCHIP_CNTL,
S_028A44_ES_VERTS_PER_SUBGRP(64) |
S_028A44_GS_PRIMS_PER_SUBGRP(4));
radeon_set_sh_reg(cs, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, S_00B51C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 0);
radeon_set_sh_reg(cs, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, S_00B31C_CU_EN(0xffff));
radeon_set_sh_reg(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, S_00B21C_CU_EN(0xffff));
if (physical_device->rad_info.num_good_compute_units /
@@ -452,41 +434,9 @@ si_emit_config(struct radv_physical_device *physical_device,
radeon_set_context_reg(cs, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16);
}
if (physical_device->has_rbplus)
if (physical_device->rad_info.family == CHIP_STONEY)
radeon_set_context_reg(cs, R_028C40_PA_SC_SHADER_CONTROL, 0);
if (physical_device->rad_info.chip_class >= GFX9) {
unsigned num_se = physical_device->rad_info.max_se;
unsigned pc_lines = 0;
switch (physical_device->rad_info.family) {
case CHIP_VEGA10:
pc_lines = 4096;
break;
case CHIP_RAVEN:
pc_lines = 1024;
break;
default:
assert(0);
}
radeon_set_context_reg(cs, R_028060_DB_DFSM_CONTROL,
S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF));
radeon_set_context_reg(cs, R_028064_DB_RENDER_FILTER, 0);
/* TODO: We can use this to disable RBs for rendering to GART: */
radeon_set_context_reg(cs, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 0);
radeon_set_context_reg(cs, R_02883C_PA_SU_OVER_RASTERIZATION_CNTL, 0);
/* TODO: Enable the binner: */
radeon_set_context_reg(cs, R_028C44_PA_SC_BINNER_CNTL_0,
S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) |
S_028C44_DISABLE_START_OF_PRIM(1));
radeon_set_context_reg(cs, R_028C48_PA_SC_BINNER_CNTL_1,
S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
S_028C48_MAX_PRIM_PER_BATCH(1023));
radeon_set_context_reg(cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1));
radeon_set_uconfig_reg(cs, R_030968_VGT_INSTANCE_BASE_ID, 0);
}
si_emit_compute(physical_device, cs);
}
@@ -700,9 +650,6 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
multi_instances_smaller_than_primgroup = indirect_draw || (instanced_draw &&
num_prims < primgroup_size);
if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_FRAGMENT]->info.fs.prim_id_input)
ia_switch_on_eoi = true;
if (radv_pipeline_has_tess(cmd_buffer->state.pipeline)) {
/* SWITCH_ON_EOI must be set if PrimID is used. */
if (cmd_buffer->state.pipeline->shaders[MESA_SHADER_TESS_CTRL]->info.tcs.uses_prim_id ||
@@ -719,14 +666,12 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
/* Needed for 028B6C_DISTRIBUTION_MODE != 0 */
if (cmd_buffer->device->has_distributed_tess) {
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
if (chip_class <= VI)
partial_es_wave = true;
partial_es_wave = true;
if (family == CHIP_TONGA ||
family == CHIP_FIJI ||
family == CHIP_POLARIS10 ||
family == CHIP_POLARIS11 ||
family == CHIP_POLARIS12)
family == CHIP_POLARIS11)
partial_vs_wave = true;
} else {
partial_vs_wave = true;
@@ -788,15 +733,10 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
assert(wd_switch_on_eop || !ia_switch_on_eop);
}
/* If SWITCH_ON_EOI is set, PARTIAL_ES_WAVE must be set too. */
if (chip_class <= VI && ia_switch_on_eoi)
if (ia_switch_on_eoi)
partial_es_wave = true;
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline)) {
if (radv_pipeline_has_gs(cmd_buffer->state.pipeline) &&
cmd_buffer->state.pipeline->shaders[MESA_SHADER_GEOMETRY]->info.gs.uses_prim_id)
ia_switch_on_eoi = true;
/* GS requirement. */
if (SI_GS_PER_ES / primgroup_size >= cmd_buffer->device->gs_table_depth - 3)
partial_es_wave = true;
@@ -815,88 +755,22 @@ si_get_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer,
S_028AA8_PARTIAL_ES_WAVE_ON(partial_es_wave) |
S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1) |
S_028AA8_WD_SWITCH_ON_EOP(chip_class >= CIK ? wd_switch_on_eop : 0) |
/* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */
S_028AA8_MAX_PRIMGRP_IN_WAVE(chip_class == VI ?
max_primgroup_in_wave : 0) |
S_030960_EN_INST_OPT_BASIC(chip_class >= GFX9) |
S_030960_EN_INST_OPT_ADV(chip_class >= GFX9);
S_028AA8_MAX_PRIMGRP_IN_WAVE(chip_class >= VI ?
max_primgroup_in_wave : 0);
}
void si_cs_emit_write_event_eop(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
bool is_mec,
unsigned event, unsigned event_flags,
unsigned data_sel,
uint64_t va,
uint32_t old_fence,
uint32_t new_fence)
{
unsigned op = EVENT_TYPE(event) |
EVENT_INDEX(5) |
event_flags;
unsigned is_gfx8_mec = is_mec && chip_class < GFX9;
if (chip_class >= GFX9 || is_gfx8_mec) {
radeon_emit(cs, PKT3(PKT3_RELEASE_MEM, is_gfx8_mec ? 5 : 6, 0));
radeon_emit(cs, op);
radeon_emit(cs, EOP_DATA_SEL(data_sel));
radeon_emit(cs, va); /* address lo */
radeon_emit(cs, va >> 32); /* address hi */
radeon_emit(cs, new_fence); /* immediate data lo */
radeon_emit(cs, 0); /* immediate data hi */
if (!is_gfx8_mec)
radeon_emit(cs, 0); /* unused */
} else {
if (chip_class == CIK ||
chip_class == VI) {
/* Two EOP events are required to make all engines go idle
* (and optional cache flushes executed) before the timestamp
* is written.
*/
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, op);
radeon_emit(cs, va);
radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
radeon_emit(cs, old_fence); /* immediate data */
radeon_emit(cs, 0); /* unused */
}
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, op);
radeon_emit(cs, va);
radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
radeon_emit(cs, new_fence); /* immediate data */
radeon_emit(cs, 0); /* unused */
}
}
void
si_emit_wait_fence(struct radeon_winsys_cs *cs,
uint64_t va, uint32_t ref,
uint32_t mask)
{
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, ref); /* reference value */
radeon_emit(cs, mask); /* mask */
radeon_emit(cs, 4); /* poll interval */
}
static void
si_emit_acquire_mem(struct radeon_winsys_cs *cs,
bool is_mec, bool is_gfx9,
bool is_mec,
unsigned cp_coher_cntl)
{
if (is_mec || is_gfx9) {
uint32_t hi_val = is_gfx9 ? 0xffffff : 0xff;
if (is_mec) {
radeon_emit(cs, PKT3(PKT3_ACQUIRE_MEM, 5, 0) |
PKT3_SHADER_TYPE_S(is_mec));
PKT3_SHADER_TYPE_S(1));
radeon_emit(cs, cp_coher_cntl); /* CP_COHER_CNTL */
radeon_emit(cs, 0xffffffff); /* CP_COHER_SIZE */
radeon_emit(cs, hi_val); /* CP_COHER_SIZE_HI */
radeon_emit(cs, 0xff); /* CP_COHER_SIZE_HI */
radeon_emit(cs, 0); /* CP_COHER_BASE */
radeon_emit(cs, 0); /* CP_COHER_BASE_HI */
radeon_emit(cs, 0x0000000A); /* POLL_INTERVAL */
@@ -913,47 +787,44 @@ si_emit_acquire_mem(struct radeon_winsys_cs *cs,
void
si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
enum chip_class chip_class,
uint32_t *flush_cnt,
uint64_t flush_va,
bool is_mec,
enum radv_cmd_flush_bits flush_bits)
{
unsigned cp_coher_cntl = 0;
uint32_t flush_cb_db = flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB);
if (flush_bits & RADV_CMD_FLAG_INV_ICACHE)
cp_coher_cntl |= S_0085F0_SH_ICACHE_ACTION_ENA(1);
if (flush_bits & RADV_CMD_FLAG_INV_SMEM_L1)
cp_coher_cntl |= S_0085F0_SH_KCACHE_ACTION_ENA(1);
if (chip_class <= VI) {
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB1_DEST_BASE_ENA(1) |
S_0085F0_CB2_DEST_BASE_ENA(1) |
S_0085F0_CB3_DEST_BASE_ENA(1) |
S_0085F0_CB4_DEST_BASE_ENA(1) |
S_0085F0_CB5_DEST_BASE_ENA(1) |
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB) {
cp_coher_cntl |= S_0085F0_CB_ACTION_ENA(1) |
S_0085F0_CB0_DEST_BASE_ENA(1) |
S_0085F0_CB1_DEST_BASE_ENA(1) |
S_0085F0_CB2_DEST_BASE_ENA(1) |
S_0085F0_CB3_DEST_BASE_ENA(1) |
S_0085F0_CB4_DEST_BASE_ENA(1) |
S_0085F0_CB5_DEST_BASE_ENA(1) |
S_0085F0_CB6_DEST_BASE_ENA(1) |
S_0085F0_CB7_DEST_BASE_ENA(1);
/* Necessary for DCC */
if (chip_class >= VI) {
si_cs_emit_write_event_eop(cs,
chip_class,
is_mec,
V_028A90_FLUSH_AND_INV_CB_DATA_TS,
0, 0, 0, 0, 0);
}
}
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
S_0085F0_DB_DEST_BASE_ENA(1);
/* Necessary for DCC */
if (chip_class >= VI) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_DATA_TS) |
EVENT_INDEX(5));
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
radeon_emit(cs, 0);
}
}
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_DB) {
cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
S_0085F0_DB_DEST_BASE_ENA(1);
}
if (flush_bits & RADV_CMD_FLAG_FLUSH_AND_INV_CB_META) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
@@ -964,7 +835,8 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_DB_META) | EVENT_INDEX(0));
}
if (!flush_cb_db) {
if (!(flush_bits & (RADV_CMD_FLAG_FLUSH_AND_INV_CB |
RADV_CMD_FLAG_FLUSH_AND_INV_DB))) {
if (flush_bits & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
@@ -979,54 +851,6 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
radeon_emit(cs, EVENT_TYPE(V_028A90_CS_PARTIAL_FLUSH) | EVENT_INDEX(4));
}
if (chip_class >= GFX9 && flush_cb_db) {
unsigned cb_db_event, tc_flags;
/* Set the CB/DB flush event. */
switch (flush_cb_db) {
case RADV_CMD_FLAG_FLUSH_AND_INV_CB:
cb_db_event = V_028A90_FLUSH_AND_INV_CB_DATA_TS;
break;
case RADV_CMD_FLAG_FLUSH_AND_INV_DB:
cb_db_event = V_028A90_FLUSH_AND_INV_DB_DATA_TS;
break;
default:
/* both CB & DB */
cb_db_event = V_028A90_CACHE_FLUSH_AND_INV_TS_EVENT;
}
/* TC | TC_WB = invalidate L2 data
* TC_MD | TC_WB = invalidate L2 metadata
* TC | TC_WB | TC_MD = invalidate L2 data & metadata
*
* The metadata cache must always be invalidated for coherency
* between CB/DB and shaders. (metadata = HTILE, CMASK, DCC)
*
* TC must be invalidated on GFX9 only if the CB/DB surface is
* not pipe-aligned. If the surface is RB-aligned, it might not
* strictly be pipe-aligned since RB alignment takes precendence.
*/
tc_flags = EVENT_TC_WB_ACTION_ENA |
EVENT_TC_MD_ACTION_ENA;
/* Ideally flush TC together with CB/DB. */
if (flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) {
tc_flags |= EVENT_TC_ACTION_ENA |
EVENT_TCL1_ACTION_ENA;
/* Clear the flags. */
flush_bits &= ~(RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2 |
RADV_CMD_FLAG_INV_VMEM_L1);
}
assert(flush_cnt);
uint32_t old_fence = (*flush_cnt)++;
si_cs_emit_write_event_eop(cs, chip_class, false, cb_db_event, tc_flags, 1,
flush_va, old_fence, *flush_cnt);
si_emit_wait_fence(cs, flush_va, *flush_cnt, 0xffffffff);
}
/* VGT state sync */
if (flush_bits & RADV_CMD_FLAG_VGT_FLUSH) {
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
@@ -1036,11 +860,7 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
/* Make sure ME is idle (it executes most packets) before continuing.
* This prevents read-after-write hazards between PFP and ME.
*/
if ((cp_coher_cntl ||
(flush_bits & (RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
RADV_CMD_FLAG_INV_VMEM_L1 |
RADV_CMD_FLAG_INV_GLOBAL_L2 |
RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) &&
if ((cp_coher_cntl || (flush_bits & RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) &&
!is_mec) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
@@ -1048,39 +868,27 @@ si_cs_emit_cache_flush(struct radeon_winsys_cs *cs,
if ((flush_bits & RADV_CMD_FLAG_INV_GLOBAL_L2) ||
(chip_class <= CIK && (flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2))) {
si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
cp_coher_cntl |
S_0085F0_TC_ACTION_ENA(1) |
S_0085F0_TCL1_ACTION_ENA(1) |
S_0301F0_TC_WB_ACTION_ENA(chip_class >= VI));
cp_coher_cntl |= S_0085F0_TC_ACTION_ENA(1);
if (chip_class >= VI)
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1);
} else if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
cp_coher_cntl |= S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1);
/* L2 writeback doesn't combine with L1 invalidate */
si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
cp_coher_cntl = 0;
} else {
if(flush_bits & RADV_CMD_FLAG_WRITEBACK_GLOBAL_L2) {
/* WB = write-back
* NC = apply to non-coherent MTYPEs
* (i.e. MTYPE <= 1, which is what we use everywhere)
*
* WB doesn't work without NC.
*/
si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
cp_coher_cntl |
S_0301F0_TC_WB_ACTION_ENA(1) |
S_0301F0_TC_NC_ACTION_ENA(1));
cp_coher_cntl = 0;
}
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1) {
si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9,
cp_coher_cntl |
S_0085F0_TCL1_ACTION_ENA(1));
cp_coher_cntl = 0;
}
}
if (flush_bits & RADV_CMD_FLAG_INV_VMEM_L1)
cp_coher_cntl |= S_0085F0_TCL1_ACTION_ENA(1);
/* When one of the DEST_BASE flags is set, SURFACE_SYNC waits for idle.
* Therefore, it should be last. Done in PFP.
*/
if (cp_coher_cntl)
si_emit_acquire_mem(cs, is_mec, chip_class >= GFX9, cp_coher_cntl);
si_emit_acquire_mem(cs, is_mec, cp_coher_cntl);
}
void
@@ -1097,118 +905,67 @@ si_emit_cache_flush(struct radv_cmd_buffer *cmd_buffer)
RADV_CMD_FLAG_VS_PARTIAL_FLUSH |
RADV_CMD_FLAG_VGT_FLUSH);
if (!cmd_buffer->state.flush_bits)
return;
enum chip_class chip_class = cmd_buffer->device->physical_device->rad_info.chip_class;
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 128);
uint32_t *ptr = NULL;
uint64_t va = 0;
if (chip_class == GFX9) {
va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->gfx9_fence_bo) + cmd_buffer->gfx9_fence_offset;
ptr = &cmd_buffer->gfx9_fence_idx;
}
si_cs_emit_cache_flush(cmd_buffer->cs,
cmd_buffer->device->physical_device->rad_info.chip_class,
ptr, va,
radv_cmd_buffer_uses_mec(cmd_buffer),
cmd_buffer->state.flush_bits);
radv_cmd_buffer_trace_emit(cmd_buffer);
if (cmd_buffer->state.flush_bits)
radv_cmd_buffer_trace_emit(cmd_buffer);
cmd_buffer->state.flush_bits = 0;
}
/* Set this if you want the 3D engine to wait until CP DMA is done.
* It should be set on the last CP DMA packet. */
#define CP_DMA_SYNC (1 << 0)
#define R600_CP_DMA_SYNC (1 << 0) /* R600+ */
/* Set this if the source data was used as a destination in a previous CP DMA
* packet. It's for preventing a read-after-write (RAW) hazard between two
* CP DMA packets. */
#define CP_DMA_RAW_WAIT (1 << 1)
#define CP_DMA_USE_L2 (1 << 2)
#define CP_DMA_CLEAR (1 << 3)
#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
#define CIK_CP_DMA_USE_L2 (1 << 2)
/* Alignment for optimal performance. */
#define SI_CPDMA_ALIGNMENT 32
#define CP_DMA_ALIGNMENT 32
/* The max number of bytes to copy per packet. */
#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - CP_DMA_ALIGNMENT)
/* The max number of bytes that can be copied per packet. */
static inline unsigned cp_dma_max_byte_count(struct radv_cmd_buffer *cmd_buffer)
{
unsigned max = cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 ?
S_414_BYTE_COUNT_GFX9(~0u) :
S_414_BYTE_COUNT_GFX6(~0u);
/* make it aligned for optimal performance */
return max & ~(SI_CPDMA_ALIGNMENT - 1);
}
/* Emit a CP DMA packet to do a copy from one buffer to another, or to clear
* a buffer. The size must fit in bits [20:0]. If CP_DMA_CLEAR is set, src_va is a 32-bit
* clear value.
*/
static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
uint64_t dst_va, uint64_t src_va,
unsigned size, unsigned flags)
static void si_emit_cp_dma_copy_buffer(struct radv_cmd_buffer *cmd_buffer,
uint64_t dst_va, uint64_t src_va,
unsigned size, unsigned flags)
{
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint32_t header = 0, command = 0;
uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0;
uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
uint32_t sel = flags & CIK_CP_DMA_USE_L2 ?
S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2) |
S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
assert(size);
assert(size <= cp_dma_max_byte_count(cmd_buffer));
assert((size & ((1<<21)-1)) == size);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
command |= S_414_BYTE_COUNT_GFX9(size);
else
command |= S_414_BYTE_COUNT_GFX6(size);
/* Sync flags. */
if (flags & CP_DMA_SYNC)
header |= S_411_CP_SYNC(1);
else {
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9)
command |= S_414_DISABLE_WR_CONFIRM_GFX9(1);
else
command |= S_414_DISABLE_WR_CONFIRM_GFX6(1);
}
if (flags & CP_DMA_RAW_WAIT)
command |= S_414_RAW_WAIT(1);
/* Src and dst flags. */
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9 &&
!(flags & CP_DMA_CLEAR) &&
src_va == dst_va)
header |= S_411_DSL_SEL(V_411_NOWHERE); /* prefetch only */
else if (flags & CP_DMA_USE_L2)
header |= S_411_DSL_SEL(V_411_DST_ADDR_TC_L2);
if (flags & CP_DMA_CLEAR)
header |= S_411_SRC_SEL(V_411_DATA);
else if (flags & CP_DMA_USE_L2)
header |= S_411_SRC_SEL(V_411_SRC_ADDR_TC_L2);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, header);
radeon_emit(cs, sync_flag | sel); /* CP_SYNC [31] */
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
radeon_emit(cs, src_va >> 32); /* SRC_ADDR_HI [31:0] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [31:0] */
radeon_emit(cs, command);
radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
} else {
assert(!(flags & CP_DMA_USE_L2));
header |= S_411_SRC_ADDR_HI(src_va >> 32);
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, src_va); /* SRC_ADDR_LO [31:0] */
radeon_emit(cs, header); /* SRC_ADDR_HI [15:0] + flags. */
radeon_emit(cs, sync_flag | ((src_va >> 32) & 0xffff)); /* CP_SYNC [31] | SRC_ADDR_HI [15:0] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
radeon_emit(cs, command);
radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
}
/* CP DMA is executed in ME, but index buffers are read by PFP.
@@ -1216,7 +973,7 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
* indices. If we wanted to execute CP DMA in PFP, this packet
* should precede it.
*/
if ((flags & CP_DMA_SYNC) && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
@@ -1224,14 +981,55 @@ static void si_emit_cp_dma(struct radv_cmd_buffer *cmd_buffer,
radv_cmd_buffer_trace_emit(cmd_buffer);
}
/* Emit a CP DMA packet to clear a buffer. The size must fit in bits [20:0]. */
static void si_emit_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer,
uint64_t dst_va, unsigned size,
uint32_t clear_value, unsigned flags)
{
struct radeon_winsys_cs *cs = cmd_buffer->cs;
uint32_t sync_flag = flags & R600_CP_DMA_SYNC ? S_411_CP_SYNC(1) : 0;
uint32_t wr_confirm = !(flags & R600_CP_DMA_SYNC) ? S_414_DISABLE_WR_CONFIRM_GFX6(1) : 0;
uint32_t raw_wait = flags & SI_CP_DMA_RAW_WAIT ? S_414_RAW_WAIT(1) : 0;
uint32_t dst_sel = flags & CIK_CP_DMA_USE_L2 ? S_411_DSL_SEL(V_411_DST_ADDR_TC_L2) : 0;
assert(size);
assert((size & ((1<<21)-1)) == size);
radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 9);
if (cmd_buffer->device->physical_device->rad_info.chip_class >= CIK) {
radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
radeon_emit(cs, sync_flag | dst_sel | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
radeon_emit(cs, clear_value); /* DATA [31:0] */
radeon_emit(cs, 0);
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, dst_va >> 32); /* DST_ADDR_HI [15:0] */
radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
} else {
radeon_emit(cs, PKT3(PKT3_CP_DMA, 4, 0));
radeon_emit(cs, clear_value); /* DATA [31:0] */
radeon_emit(cs, sync_flag | S_411_SRC_SEL(V_411_DATA)); /* CP_SYNC [31] | SRC_SEL[30:29] */
radeon_emit(cs, dst_va); /* DST_ADDR_LO [31:0] */
radeon_emit(cs, (dst_va >> 32) & 0xffff); /* DST_ADDR_HI [15:0] */
radeon_emit(cs, size | wr_confirm | raw_wait); /* COMMAND [29:22] | BYTE_COUNT [20:0] */
}
/* See "copy_buffer" for explanation. */
if (sync_flag && cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL) {
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
radv_cmd_buffer_trace_emit(cmd_buffer);
}
void si_cp_dma_prefetch(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
unsigned size)
{
uint64_t aligned_va = va & ~(SI_CPDMA_ALIGNMENT - 1);
uint64_t aligned_size = ((va + size + SI_CPDMA_ALIGNMENT -1) & ~(SI_CPDMA_ALIGNMENT - 1)) - aligned_va;
uint64_t aligned_va = va & ~(CP_DMA_ALIGNMENT - 1);
uint64_t aligned_size = ((va + size + CP_DMA_ALIGNMENT -1) & ~(CP_DMA_ALIGNMENT - 1)) - aligned_va;
si_emit_cp_dma(cmd_buffer, aligned_va, aligned_va,
aligned_size, CP_DMA_USE_L2);
si_emit_cp_dma_copy_buffer(cmd_buffer, aligned_va, aligned_va,
aligned_size, CIK_CP_DMA_USE_L2);
}
static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_count,
@@ -1243,14 +1041,14 @@ static void si_cp_dma_prepare(struct radv_cmd_buffer *cmd_buffer, uint64_t byte_
*/
if (cmd_buffer->state.flush_bits) {
si_emit_cache_flush(cmd_buffer);
*flags |= CP_DMA_RAW_WAIT;
*flags |= SI_CP_DMA_RAW_WAIT;
}
/* Do the synchronization after the last dma, so that all data
* is written to memory.
*/
if (byte_count == remaining_size)
*flags |= CP_DMA_SYNC;
*flags |= R600_CP_DMA_SYNC;
}
static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigned size)
@@ -1258,20 +1056,20 @@ static void si_cp_dma_realign_engine(struct radv_cmd_buffer *cmd_buffer, unsigne
uint64_t va;
uint32_t offset;
unsigned dma_flags = 0;
unsigned buf_size = SI_CPDMA_ALIGNMENT * 2;
unsigned buf_size = CP_DMA_ALIGNMENT * 2;
void *ptr;
assert(size < SI_CPDMA_ALIGNMENT);
assert(size < CP_DMA_ALIGNMENT);
radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, SI_CPDMA_ALIGNMENT, &offset, &ptr);
radv_cmd_buffer_upload_alloc(cmd_buffer, buf_size, CP_DMA_ALIGNMENT, &offset, &ptr);
va = cmd_buffer->device->ws->buffer_get_va(cmd_buffer->upload.upload_bo);
va += offset;
si_cp_dma_prepare(cmd_buffer, size, size, &dma_flags);
si_emit_cp_dma(cmd_buffer, va, va + SI_CPDMA_ALIGNMENT, size,
dma_flags);
si_emit_cp_dma_copy_buffer(cmd_buffer, va, va + CP_DMA_ALIGNMENT, size,
dma_flags);
}
void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
@@ -1288,15 +1086,15 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
* just to align the internal counter. Otherwise, the DMA engine
* would slow down by an order of magnitude for following copies.
*/
if (size % SI_CPDMA_ALIGNMENT)
realign_size = SI_CPDMA_ALIGNMENT - (size % SI_CPDMA_ALIGNMENT);
if (size % CP_DMA_ALIGNMENT)
realign_size = CP_DMA_ALIGNMENT - (size % CP_DMA_ALIGNMENT);
/* If the copy begins unaligned, we must start copying from the next
* aligned block and the skipped part should be copied after everything
* else has been copied. Only the src alignment matters, not dst.
*/
if (src_va % SI_CPDMA_ALIGNMENT) {
skipped_size = SI_CPDMA_ALIGNMENT - (src_va % SI_CPDMA_ALIGNMENT);
if (src_va % CP_DMA_ALIGNMENT) {
skipped_size = CP_DMA_ALIGNMENT - (src_va % CP_DMA_ALIGNMENT);
/* The main part will be skipped if the size is too small. */
skipped_size = MIN2(skipped_size, size);
size -= skipped_size;
@@ -1307,14 +1105,14 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
while (size) {
unsigned dma_flags = 0;
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
si_cp_dma_prepare(cmd_buffer, byte_count,
size + skipped_size + realign_size,
&dma_flags);
si_emit_cp_dma(cmd_buffer, main_dest_va, main_src_va,
byte_count, dma_flags);
si_emit_cp_dma_copy_buffer(cmd_buffer, main_dest_va, main_src_va,
byte_count, dma_flags);
size -= byte_count;
main_src_va += byte_count;
@@ -1328,8 +1126,8 @@ void si_cp_dma_buffer_copy(struct radv_cmd_buffer *cmd_buffer,
size + skipped_size + realign_size,
&dma_flags);
si_emit_cp_dma(cmd_buffer, dest_va, src_va,
skipped_size, dma_flags);
si_emit_cp_dma_copy_buffer(cmd_buffer, dest_va, src_va,
skipped_size, dma_flags);
}
if (realign_size)
si_cp_dma_realign_engine(cmd_buffer, realign_size);
@@ -1345,14 +1143,14 @@ void si_cp_dma_clear_buffer(struct radv_cmd_buffer *cmd_buffer, uint64_t va,
assert(va % 4 == 0 && size % 4 == 0);
while (size) {
unsigned byte_count = MIN2(size, cp_dma_max_byte_count(cmd_buffer));
unsigned dma_flags = CP_DMA_CLEAR;
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
unsigned dma_flags = 0;
si_cp_dma_prepare(cmd_buffer, byte_count, size, &dma_flags);
/* Emit the clear packet. */
si_emit_cp_dma(cmd_buffer, va, value, byte_count,
dma_flags);
si_emit_cp_dma_clear_buffer(cmd_buffer, va, byte_count, value,
dma_flags);
size -= byte_count;
va += byte_count;

View File

@@ -396,13 +396,6 @@ vk_format_is_int(VkFormat format)
return channel >= 0 && desc->channel[channel].pure_integer;
}
static inline bool
vk_format_is_srgb(VkFormat format)
{
const struct vk_format_description *desc = vk_format_description(format);
return desc->colorspace == VK_FORMAT_COLORSPACE_SRGB;
}
static inline VkFormat
vk_format_stencil_only(VkFormat format)
{

View File

@@ -467,29 +467,25 @@ radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo,
struct amdgpu_bo_metadata metadata = {0};
uint32_t tiling_flags = 0;
if (bo->ws->info.chip_class >= GFX9) {
tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode);
} else {
if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
else
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
if (md->macrotile == RADEON_LAYOUT_TILED)
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
else if (md->microtile == RADEON_LAYOUT_TILED)
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
else
tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config);
tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw));
tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh));
if (md->u.legacy.tile_split)
tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split));
tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea));
tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1);
tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config);
tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw));
tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh));
if (md->tile_split)
tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->tile_split));
tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea));
tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1);
if (md->u.legacy.scanout)
tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
else
tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
}
if (md->scanout)
tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
else
tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
metadata.tiling_info = tiling_flags;
metadata.size_metadata = md->size_metadata;

View File

@@ -816,7 +816,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
struct amdgpu_cs_request request;
uint32_t pad_word = 0xffff1000U;
if (radv_amdgpu_winsys(ws)->info.chip_class == SI)
if (radv_amdgpu_winsys(ws)->family == FAMILY_SI)
pad_word = 0x80000000;
assert(cs_count);
@@ -931,7 +931,7 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
if (!cs->ws->use_ib_bos) {
ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
} else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && false) {
ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, cs_array,
cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
} else {
@@ -952,9 +952,6 @@ static void *radv_amdgpu_winsys_get_cpu_addr(void *_cs, uint64_t addr)
{
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
void *ret = NULL;
if (!cs->ib_buffer)
return NULL;
for (unsigned i = 0; i <= cs->num_old_ib_buffers; ++i) {
struct radv_amdgpu_winsys_bo *bo;
@@ -973,15 +970,10 @@ static void radv_amdgpu_winsys_cs_dump(struct radeon_winsys_cs *_cs,
uint32_t trace_id)
{
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs *)_cs;
void *ib = cs->base.buf;
int num_dw = cs->base.cdw;
if (cs->ws->use_ib_bos) {
ib = radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address);
num_dw = cs->ib.size;
}
assert(ib);
ac_parse_ib(file, ib, num_dw, trace_id, "main IB", cs->ws->info.chip_class,
ac_parse_ib(file,
radv_amdgpu_winsys_get_cpu_addr(cs, cs->ib.ib_mc_address),
cs->ib.size, trace_id, "main IB", cs->ws->info.chip_class,
radv_amdgpu_winsys_get_cpu_addr, cs);
}

View File

@@ -35,9 +35,19 @@
#include "radv_amdgpu_surface.h"
#include "sid.h"
#include "ac_surface.h"
#ifndef NO_ENTRIES
#define NO_ENTRIES 32
#endif
static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
#ifndef NO_MACRO_ENTRIES
#define NO_MACRO_ENTRIES 16
#endif
#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
#endif
static int radv_amdgpu_surface_sanity(const struct radeon_surf_info *surf_info,
const struct radeon_surf *surf)
{
unsigned type = RADEON_SURF_GET(surf->flags, TYPE);
@@ -45,9 +55,24 @@ static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
if (!(surf->flags & RADEON_SURF_HAS_TILE_MODE_INDEX))
return -EINVAL;
/* all dimension must be at least 1 ! */
if (!surf_info->width || !surf_info->height || !surf_info->depth ||
!surf_info->array_size)
return -EINVAL;
if (!surf->blk_w || !surf->blk_h)
return -EINVAL;
switch (surf_info->samples) {
case 1:
case 2:
case 4:
case 8:
break;
default:
return -EINVAL;
}
switch (type) {
case RADEON_SURF_TYPE_1D:
if (surf_info->height > 1)
@@ -76,28 +101,453 @@ static int radv_amdgpu_surface_sanity(const struct ac_surf_info *surf_info,
return 0;
}
static void *ADDR_API radv_allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
{
return malloc(pInput->sizeInBytes);
}
static ADDR_E_RETURNCODE ADDR_API radv_freeSysMem(const ADDR_FREESYSMEM_INPUT * pInput)
{
free(pInput->pVirtAddr);
return ADDR_OK;
}
ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, int rev_id,
enum chip_class chip_class)
{
ADDR_CREATE_INPUT addrCreateInput = {0};
ADDR_CREATE_OUTPUT addrCreateOutput = {0};
ADDR_REGISTER_VALUE regValue = {0};
ADDR_CREATE_FLAGS createFlags = {{0}};
ADDR_E_RETURNCODE addrRet;
addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
regValue.noOfBanks = amdinfo->mc_arb_ramcfg & 0x3;
regValue.gbAddrConfig = amdinfo->gb_addr_cfg;
regValue.noOfRanks = (amdinfo->mc_arb_ramcfg & 0x4) >> 2;
regValue.backendDisables = amdinfo->backend_disable[0];
regValue.pTileConfig = amdinfo->gb_tile_mode;
regValue.noOfEntries = ARRAY_SIZE(amdinfo->gb_tile_mode);
if (chip_class == SI) {
regValue.pMacroTileConfig = NULL;
regValue.noOfMacroEntries = 0;
} else {
regValue.pMacroTileConfig = amdinfo->gb_macro_tile_mode;
regValue.noOfMacroEntries = ARRAY_SIZE(amdinfo->gb_macro_tile_mode);
}
createFlags.value = 0;
createFlags.useTileIndex = 1;
addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
addrCreateInput.chipFamily = family;
addrCreateInput.chipRevision = rev_id;
addrCreateInput.createFlags = createFlags;
addrCreateInput.callbacks.allocSysMem = radv_allocSysMem;
addrCreateInput.callbacks.freeSysMem = radv_freeSysMem;
addrCreateInput.callbacks.debugPrint = 0;
addrCreateInput.regValue = regValue;
addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
if (addrRet != ADDR_OK)
return NULL;
return addrCreateOutput.hLib;
}
static int radv_compute_level(ADDR_HANDLE addrlib,
const struct radeon_surf_info *surf_info,
struct radeon_surf *surf, bool is_stencil,
unsigned level, unsigned type, bool compressed,
ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut)
{
struct radeon_surf_level *surf_level;
ADDR_E_RETURNCODE ret;
AddrSurfInfoIn->mipLevel = level;
AddrSurfInfoIn->width = u_minify(surf_info->width, level);
AddrSurfInfoIn->height = u_minify(surf_info->height, level);
if (type == RADEON_SURF_TYPE_3D)
AddrSurfInfoIn->numSlices = u_minify(surf_info->depth, level);
else if (type == RADEON_SURF_TYPE_CUBEMAP)
AddrSurfInfoIn->numSlices = 6;
else
AddrSurfInfoIn->numSlices = surf_info->array_size;
if (level > 0) {
/* Set the base level pitch. This is needed for calculation
* of non-zero levels. */
if (is_stencil)
AddrSurfInfoIn->basePitch = surf->stencil_level[0].nblk_x;
else
AddrSurfInfoIn->basePitch = surf->level[0].nblk_x;
/* Convert blocks to pixels for compressed formats. */
if (compressed)
AddrSurfInfoIn->basePitch *= surf->blk_w;
}
ret = AddrComputeSurfaceInfo(addrlib,
AddrSurfInfoIn,
AddrSurfInfoOut);
if (ret != ADDR_OK)
return ret;
surf_level = is_stencil ? &surf->stencil_level[level] : &surf->level[level];
surf_level->offset = align64(surf->bo_size, AddrSurfInfoOut->baseAlign);
surf_level->slice_size = AddrSurfInfoOut->sliceSize;
surf_level->pitch_bytes = AddrSurfInfoOut->pitch * (is_stencil ? 1 : surf->bpe);
surf_level->nblk_x = AddrSurfInfoOut->pitch;
surf_level->nblk_y = AddrSurfInfoOut->height;
if (type == RADEON_SURF_TYPE_3D)
surf_level->nblk_z = AddrSurfInfoOut->depth;
else
surf_level->nblk_z = 1;
switch (AddrSurfInfoOut->tileMode) {
case ADDR_TM_LINEAR_ALIGNED:
surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
break;
case ADDR_TM_1D_TILED_THIN1:
surf_level->mode = RADEON_SURF_MODE_1D;
break;
case ADDR_TM_2D_TILED_THIN1:
surf_level->mode = RADEON_SURF_MODE_2D;
break;
default:
assert(0);
}
if (is_stencil)
surf->stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
else
surf->tiling_index[level] = AddrSurfInfoOut->tileIndex;
surf->bo_size = surf_level->offset + AddrSurfInfoOut->surfSize;
/* Clear DCC fields at the beginning. */
surf_level->dcc_offset = 0;
surf_level->dcc_enabled = false;
/* The previous level's flag tells us if we can use DCC for this level. */
if (AddrSurfInfoIn->flags.dccCompatible &&
(level == 0 || AddrDccOut->subLvlCompressible)) {
AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
ret = AddrComputeDccInfo(addrlib,
AddrDccIn,
AddrDccOut);
if (ret == ADDR_OK) {
surf_level->dcc_offset = surf->dcc_size;
surf_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
surf_level->dcc_enabled = true;
surf->dcc_size = surf_level->dcc_offset + AddrDccOut->dccRamSize;
surf->dcc_alignment = MAX2(surf->dcc_alignment, AddrDccOut->dccRamBaseAlign);
}
}
if (!is_stencil && AddrSurfInfoIn->flags.depth &&
surf_level->mode == RADEON_SURF_MODE_2D && level == 0) {
ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
AddrHtileIn.flags.tcCompatible = AddrSurfInfoIn->flags.tcCompatible;
AddrHtileIn.pitch = AddrSurfInfoOut->pitch;
AddrHtileIn.height = AddrSurfInfoOut->height;
AddrHtileIn.numSlices = AddrSurfInfoOut->depth;
AddrHtileIn.blockWidth = ADDR_HTILE_BLOCKSIZE_8;
AddrHtileIn.blockHeight = ADDR_HTILE_BLOCKSIZE_8;
AddrHtileIn.pTileInfo = AddrSurfInfoOut->pTileInfo;
AddrHtileIn.tileIndex = AddrSurfInfoOut->tileIndex;
AddrHtileIn.macroModeIndex = AddrSurfInfoOut->macroModeIndex;
ret = AddrComputeHtileInfo(addrlib,
&AddrHtileIn,
&AddrHtileOut);
if (ret == ADDR_OK) {
surf->htile_size = AddrHtileOut.htileBytes;
surf->htile_slice_size = AddrHtileOut.sliceSize;
surf->htile_alignment = AddrHtileOut.baseAlign;
}
}
return 0;
}
static void radv_set_micro_tile_mode(struct radeon_surf *surf,
struct radeon_info *info)
{
uint32_t tile_mode = info->si_tile_mode_array[surf->tiling_index[0]];
if (info->chip_class >= CIK)
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
else
surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
}
static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
{
unsigned index, tileb;
tileb = 8 * 8 * surf->bpe;
tileb = MIN2(surf->tile_split, tileb);
for (index = 0; tileb > 64; index++)
tileb >>= 1;
assert(index < 16);
return index;
}
static int radv_amdgpu_winsys_surface_init(struct radeon_winsys *_ws,
const struct ac_surf_info *surf_info,
const struct radeon_surf_info *surf_info,
struct radeon_surf *surf)
{
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
unsigned mode, type;
unsigned level, mode, type;
bool compressed;
ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
ADDR_TILEINFO AddrTileInfoIn = {0};
ADDR_TILEINFO AddrTileInfoOut = {0};
int r;
uint32_t last_level = surf_info->levels - 1;
r = radv_amdgpu_surface_sanity(surf_info, surf);
if (r)
return r;
AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
type = RADEON_SURF_GET(surf->flags, TYPE);
mode = RADEON_SURF_GET(surf->flags, MODE);
compressed = surf->blk_w == 4 && surf->blk_h == 4;
struct ac_surf_config config;
/* MSAA and FMASK require 2D tiling. */
if (surf_info->samples > 1 ||
(surf->flags & RADEON_SURF_FMASK))
mode = RADEON_SURF_MODE_2D;
memcpy(&config.info, surf_info, sizeof(config.info));
config.is_3d = !!(type == RADEON_SURF_TYPE_3D);
config.is_cube = !!(type == RADEON_SURF_TYPE_CUBEMAP);
/* DB doesn't support linear layouts. */
if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) &&
mode < RADEON_SURF_MODE_1D)
mode = RADEON_SURF_MODE_1D;
return ac_compute_surface(ws->addrlib, &ws->info, &config, mode, surf);
/* Set the requested tiling mode. */
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
break;
case RADEON_SURF_MODE_1D:
AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
break;
case RADEON_SURF_MODE_2D:
AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
break;
default:
assert(0);
}
/* The format must be set correctly for the allocation of compressed
* textures to work. In other cases, setting the bpp is sufficient. */
if (compressed) {
switch (surf->bpe) {
case 8:
AddrSurfInfoIn.format = ADDR_FMT_BC1;
break;
case 16:
AddrSurfInfoIn.format = ADDR_FMT_BC3;
break;
default:
assert(0);
}
} else {
AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
}
AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = surf_info->samples;
AddrSurfInfoIn.tileIndex = -1;
/* Set the micro tile type. */
if (surf->flags & RADEON_SURF_SCANOUT)
AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
else
AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
AddrSurfInfoIn.flags.cube = type == RADEON_SURF_TYPE_CUBEMAP;
AddrSurfInfoIn.flags.display = (surf->flags & RADEON_SURF_SCANOUT) != 0;
AddrSurfInfoIn.flags.pow2Pad = last_level > 0;
AddrSurfInfoIn.flags.opt4Space = 1;
/* DCC notes:
* - If we add MSAA support, keep in mind that CB can't decompress 8bpp
* with samples >= 4.
* - Mipmapped array textures have low performance (discovered by a closed
* driver team).
*/
AddrSurfInfoIn.flags.dccCompatible = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
!(surf->flags & RADEON_SURF_DISABLE_DCC) &&
!compressed && AddrDccIn.numSamples <= 1 &&
((surf_info->array_size == 1 && surf_info->depth == 1) ||
last_level == 0);
AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
AddrSurfInfoIn.flags.compressZ = AddrSurfInfoIn.flags.depth;
/* noStencil = 0 can result in a depth part that is incompatible with
* mipmapped texturing. So set noStencil = 1 when mipmaps are requested (in
* this case, we may end up setting stencil_adjusted).
*
* TODO: update addrlib to a newer version, remove this, and
* use flags.matchStencilTileCfg = 1 as an alternative fix.
*/
if (last_level > 0)
AddrSurfInfoIn.flags.noStencil = 1;
/* Set preferred macrotile parameters. This is usually required
* for shared resources. This is for 2D tiling only. */
if (AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 &&
surf->bankw && surf->bankh && surf->mtilea && surf->tile_split) {
/* If any of these parameters are incorrect, the calculation
* will fail. */
AddrTileInfoIn.banks = surf->num_banks;
AddrTileInfoIn.bankWidth = surf->bankw;
AddrTileInfoIn.bankHeight = surf->bankh;
AddrTileInfoIn.macroAspectRatio = surf->mtilea;
AddrTileInfoIn.tileSplitBytes = surf->tile_split;
AddrTileInfoIn.pipeConfig = surf->pipe_config + 1; /* +1 compared to GB_TILE_MODE */
AddrSurfInfoIn.flags.opt4Space = 0;
AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
/* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
* the tile index, because we are expected to know it if
* we know the other parameters.
*
* This is something that can easily be fixed in Addrlib.
* For now, just figure it out here.
* Note that only 2D_TILE_THIN1 is handled here.
*/
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
if (ws->info.chip_class == SI) {
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
if (surf->bpe == 2)
AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
else
AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
} else {
if (surf->bpe == 1)
AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
else if (surf->bpe == 2)
AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
else if (surf->bpe == 4)
AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
else
AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
}
} else {
if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
else
AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
}
}
surf->bo_size = 0;
surf->dcc_size = 0;
surf->dcc_alignment = 1;
surf->htile_size = surf->htile_slice_size = 0;
surf->htile_alignment = 1;
/* Calculate texture layout information. */
for (level = 0; level <= last_level; level++) {
r = radv_compute_level(ws->addrlib, surf_info, surf, false, level, type, compressed,
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
if (r)
break;
if (level == 0) {
surf->bo_alignment = AddrSurfInfoOut.baseAlign;
surf->pipe_config = AddrSurfInfoOut.pTileInfo->pipeConfig - 1;
radv_set_micro_tile_mode(surf, &ws->info);
/* For 2D modes only. */
if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
surf->bankw = AddrSurfInfoOut.pTileInfo->bankWidth;
surf->bankh = AddrSurfInfoOut.pTileInfo->bankHeight;
surf->mtilea = AddrSurfInfoOut.pTileInfo->macroAspectRatio;
surf->tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
surf->num_banks = AddrSurfInfoOut.pTileInfo->banks;
surf->macro_tile_index = AddrSurfInfoOut.macroModeIndex;
} else {
surf->macro_tile_index = 0;
}
}
}
/* Calculate texture layout information for stencil. */
if (surf->flags & RADEON_SURF_SBUFFER) {
AddrSurfInfoIn.bpp = 8;
AddrSurfInfoIn.flags.depth = 0;
AddrSurfInfoIn.flags.stencil = 1;
/* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
AddrTileInfoIn.tileSplitBytes = surf->stencil_tile_split;
for (level = 0; level <= last_level; level++) {
r = radv_compute_level(ws->addrlib, surf_info, surf, true, level, type, compressed,
&AddrSurfInfoIn, &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut);
if (r)
return r;
/* DB uses the depth pitch for both stencil and depth. */
if (surf->stencil_level[level].nblk_x != surf->level[level].nblk_x)
surf->stencil_adjusted = true;
if (level == 0) {
/* For 2D modes only. */
if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
surf->stencil_tile_split =
AddrSurfInfoOut.pTileInfo->tileSplitBytes;
}
}
}
}
/* Recalculate the whole DCC miptree size including disabled levels.
* This is what addrlib does, but calling addrlib would be a lot more
* complicated.
*/
#if 0
if (surf->dcc_size && last_level > 0) {
surf->dcc_size = align64(surf->bo_size >> 8,
ws->info.pipe_interleave_bytes *
ws->info.num_tile_pipes);
}
#endif
return 0;
}
static int radv_amdgpu_winsys_surface_best(struct radeon_winsys *rws,

View File

@@ -28,5 +28,6 @@
#include <amdgpu.h>
void radv_amdgpu_surface_init_functions(struct radv_amdgpu_winsys *ws);
ADDR_HANDLE radv_amdgpu_addr_create(struct amdgpu_gpu_info *amdinfo, int family, int rev_id, enum chip_class chip_class);
#endif /* RADV_AMDGPU_SURFACE_H */

View File

@@ -29,7 +29,6 @@
#include "radv_amdgpu_surface.h"
#include "radv_debug.h"
#include "amdgpu_id.h"
#include "ac_surface.h"
#include "xf86drm.h"
#include <stdio.h>
#include <stdlib.h>
@@ -40,30 +39,297 @@
#include "radv_amdgpu_bo.h"
#include "radv_amdgpu_surface.h"
#define CIK_TILE_MODE_COLOR_2D 14
#define CIK__GB_TILE_MODE__PIPE_CONFIG(x) (((x) >> 6) & 0x1f)
#define CIK__PIPE_CONFIG__ADDR_SURF_P2 0
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16 4
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16 5
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32 6
#define CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32 7
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16 8
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16 9
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16 10
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16 11
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16 12
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32 13
#define CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32 14
#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16 16
#define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16 17
static unsigned radv_cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
{
unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D];
switch (CIK__GB_TILE_MODE__PIPE_CONFIG(mode2d)) {
case CIK__PIPE_CONFIG__ADDR_SURF_P2:
return 2;
case CIK__PIPE_CONFIG__ADDR_SURF_P4_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_16x32:
case CIK__PIPE_CONFIG__ADDR_SURF_P4_32x32:
return 4;
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x16_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_8x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_16x32_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x16:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x32_16x32:
case CIK__PIPE_CONFIG__ADDR_SURF_P8_32x64_32x32:
return 8;
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_8X16:
case CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16:
return 16;
default:
fprintf(stderr, "Invalid CIK pipe configuration, assuming P2\n");
assert(!"this should never occur");
return 2;
}
}
static const char *
get_chip_name(enum radeon_family family)
{
switch (family) {
case CHIP_TAHITI: return "AMD RADV TAHITI";
case CHIP_PITCAIRN: return "AMD RADV PITCAIRN";
case CHIP_VERDE: return "AMD RADV CAPE VERDE";
case CHIP_OLAND: return "AMD RADV OLAND";
case CHIP_HAINAN: return "AMD RADV HAINAN";
case CHIP_BONAIRE: return "AMD RADV BONAIRE";
case CHIP_KAVERI: return "AMD RADV KAVERI";
case CHIP_KABINI: return "AMD RADV KABINI";
case CHIP_HAWAII: return "AMD RADV HAWAII";
case CHIP_MULLINS: return "AMD RADV MULLINS";
case CHIP_TONGA: return "AMD RADV TONGA";
case CHIP_ICELAND: return "AMD RADV ICELAND";
case CHIP_CARRIZO: return "AMD RADV CARRIZO";
case CHIP_FIJI: return "AMD RADV FIJI";
case CHIP_POLARIS10: return "AMD RADV POLARIS10";
case CHIP_POLARIS11: return "AMD RADV POLARIS11";
case CHIP_STONEY: return "AMD RADV STONEY";
default: return "AMD RADV unknown";
}
}
static bool
do_winsys_init(struct radv_amdgpu_winsys *ws, int fd)
{
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
return false;
struct amdgpu_buffer_size_alignments alignment_info = {};
struct amdgpu_heap_info vram, visible_vram, gtt;
struct drm_amdgpu_info_hw_ip dma = {};
struct drm_amdgpu_info_hw_ip compute = {};
drmDevicePtr devinfo;
int r;
int i, j;
/* Get PCI info. */
r = drmGetDevice2(fd, 0, &devinfo);
if (r) {
fprintf(stderr, "amdgpu: drmGetDevice2 failed.\n");
goto fail;
}
ws->info.pci_domain = devinfo->businfo.pci->domain;
ws->info.pci_bus = devinfo->businfo.pci->bus;
ws->info.pci_dev = devinfo->businfo.pci->dev;
ws->info.pci_func = devinfo->businfo.pci->func;
drmFreeDevice(&devinfo);
/* LLVM 5.0 is required for GFX9. */
if (ws->info.chip_class >= GFX9 && HAVE_LLVM < 0x0500) {
fprintf(stderr, "amdgpu: LLVM 5.0 is required, got LLVM %i.%i\n",
HAVE_LLVM >> 8, HAVE_LLVM & 255);
return false;
/* Query hardware and driver information. */
r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_gpu_info failed.\n");
goto fail;
}
ws->addrlib = amdgpu_addr_create(&ws->info, &ws->amdinfo);
r = amdgpu_query_buffer_size_alignment(ws->dev, &alignment_info);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_buffer_size_alignment failed.\n");
goto fail;
}
r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM, 0, &vram);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(vram) failed.\n");
goto fail;
}
r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_VRAM,
AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED, &visible_vram);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(visible_vram) failed.\n");
goto fail;
}
r = amdgpu_query_heap_info(ws->dev, AMDGPU_GEM_DOMAIN_GTT, 0, &gtt);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_heap_info(gtt) failed.\n");
goto fail;
}
r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_DMA, 0, &dma);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(dma) failed.\n");
goto fail;
}
r = amdgpu_query_hw_ip_info(ws->dev, AMDGPU_HW_IP_COMPUTE, 0, &compute);
if (r) {
fprintf(stderr, "amdgpu: amdgpu_query_hw_ip_info(compute) failed.\n");
goto fail;
}
ws->info.pci_id = ws->amdinfo.asic_id; /* TODO: is this correct? */
ws->info.vce_harvest_config = ws->amdinfo.vce_harvest_config;
switch (ws->info.pci_id) {
#define CHIPSET(pci_id, name, cfamily) case pci_id: ws->info.family = CHIP_##cfamily; break;
#include "pci_ids/radeonsi_pci_ids.h"
#undef CHIPSET
default:
fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
goto fail;
}
if (ws->info.family >= CHIP_TONGA)
ws->info.chip_class = VI;
else if (ws->info.family >= CHIP_BONAIRE)
ws->info.chip_class = CIK;
else if (ws->info.family >= CHIP_TAHITI)
ws->info.chip_class = SI;
else {
fprintf(stderr, "amdgpu: Unknown family.\n");
goto fail;
}
/* family and rev_id are for addrlib */
switch (ws->info.family) {
case CHIP_TAHITI:
ws->family = FAMILY_SI;
ws->rev_id = SI_TAHITI_P_A0;
break;
case CHIP_PITCAIRN:
ws->family = FAMILY_SI;
ws->rev_id = SI_PITCAIRN_PM_A0;
break;
case CHIP_VERDE:
ws->family = FAMILY_SI;
ws->rev_id = SI_CAPEVERDE_M_A0;
break;
case CHIP_OLAND:
ws->family = FAMILY_SI;
ws->rev_id = SI_OLAND_M_A0;
break;
case CHIP_HAINAN:
ws->family = FAMILY_SI;
ws->rev_id = SI_HAINAN_V_A0;
break;
case CHIP_BONAIRE:
ws->family = FAMILY_CI;
ws->rev_id = CI_BONAIRE_M_A0;
break;
case CHIP_KAVERI:
ws->family = FAMILY_KV;
ws->rev_id = KV_SPECTRE_A0;
break;
case CHIP_KABINI:
ws->family = FAMILY_KV;
ws->rev_id = KB_KALINDI_A0;
break;
case CHIP_HAWAII:
ws->family = FAMILY_CI;
ws->rev_id = CI_HAWAII_P_A0;
break;
case CHIP_MULLINS:
ws->family = FAMILY_KV;
ws->rev_id = ML_GODAVARI_A0;
break;
case CHIP_TONGA:
ws->family = FAMILY_VI;
ws->rev_id = VI_TONGA_P_A0;
break;
case CHIP_ICELAND:
ws->family = FAMILY_VI;
ws->rev_id = VI_ICELAND_M_A0;
break;
case CHIP_CARRIZO:
ws->family = FAMILY_CZ;
ws->rev_id = CARRIZO_A0;
break;
case CHIP_STONEY:
ws->family = FAMILY_CZ;
ws->rev_id = STONEY_A0;
break;
case CHIP_FIJI:
ws->family = FAMILY_VI;
ws->rev_id = VI_FIJI_P_A0;
break;
case CHIP_POLARIS10:
ws->family = FAMILY_VI;
ws->rev_id = VI_POLARIS10_P_A0;
break;
case CHIP_POLARIS11:
ws->family = FAMILY_VI;
ws->rev_id = VI_POLARIS11_M_A0;
break;
default:
fprintf(stderr, "amdgpu: Unknown family.\n");
goto fail;
}
ws->addrlib = radv_amdgpu_addr_create(&ws->amdinfo, ws->family, ws->rev_id, ws->info.chip_class);
if (!ws->addrlib) {
fprintf(stderr, "amdgpu: Cannot create addrlib.\n");
return false;
goto fail;
}
ws->info.num_sdma_rings = MIN2(ws->info.num_sdma_rings, MAX_RINGS_PER_TYPE);
ws->info.num_compute_rings = MIN2(ws->info.num_compute_rings, MAX_RINGS_PER_TYPE);
assert(util_is_power_of_two(dma.available_rings + 1));
assert(util_is_power_of_two(compute.available_rings + 1));
ws->use_ib_bos = ws->info.chip_class >= CIK;
/* Set hardware information. */
ws->info.name = get_chip_name(ws->info.family);
ws->info.gart_size = gtt.heap_size;
ws->info.vram_size = vram.heap_size;
ws->info.visible_vram_size = visible_vram.heap_size;
/* convert the shader clock from KHz to MHz */
ws->info.max_shader_clock = ws->amdinfo.max_engine_clk / 1000;
ws->info.max_se = ws->amdinfo.num_shader_engines;
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
ws->info.has_uvd = 0;
ws->info.vce_fw_version = 0;
ws->info.has_userptr = TRUE;
ws->info.num_render_backends = ws->amdinfo.rb_pipes;
ws->info.clock_crystal_freq = ws->amdinfo.gpu_counter_freq;
ws->info.num_tile_pipes = radv_cik_get_num_tile_pipes(&ws->amdinfo);
ws->info.pipe_interleave_bytes = 256 << ((ws->amdinfo.gb_addr_cfg >> 4) & 0x7);
ws->info.has_virtual_memory = TRUE;
ws->info.sdma_rings = MIN2(util_bitcount(dma.available_rings),
MAX_RINGS_PER_TYPE);
ws->info.compute_rings = MIN2(util_bitcount(compute.available_rings),
MAX_RINGS_PER_TYPE);
/* Get the number of good compute units. */
ws->info.num_good_compute_units = 0;
for (i = 0; i < ws->info.max_se; i++)
for (j = 0; j < ws->info.max_sh_per_se; j++)
ws->info.num_good_compute_units +=
util_bitcount(ws->amdinfo.cu_bitmap[i][j]);
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
sizeof(ws->amdinfo.gb_tile_mode));
ws->info.enabled_rb_mask = ws->amdinfo.enabled_rb_pipes_mask;
memcpy(ws->info.cik_macrotile_mode_array, ws->amdinfo.gb_macro_tile_mode,
sizeof(ws->amdinfo.gb_macro_tile_mode));
ws->info.gart_page_size = alignment_info.size_remote;
if (ws->info.chip_class == SI)
ws->info.gfx_ib_pad_with_type2 = TRUE;
ws->use_ib_bos = ws->family >= FAMILY_CI;
return true;
fail:
return false;
}
static void radv_amdgpu_winsys_query_info(struct radeon_winsys *rws,
@@ -82,7 +348,7 @@ static void radv_amdgpu_winsys_destroy(struct radeon_winsys *rws)
}
struct radeon_winsys *
radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
radv_amdgpu_winsys_create(int fd, uint32_t debug_flags)
{
uint32_t drm_major, drm_minor, r;
amdgpu_device_handle dev;
@@ -106,7 +372,6 @@ radv_amdgpu_winsys_create(int fd, uint64_t debug_flags, uint64_t perftest_flags)
if (debug_flags & RADV_DEBUG_NO_IBS)
ws->use_ib_bos = false;
ws->batchchain = !!(perftest_flags & RADV_PERFTEST_BATCHCHAIN);
LIST_INITHEAD(&ws->global_bo_list);
pthread_mutex_init(&ws->global_bo_list_lock, NULL);
ws->base.query_info = radv_amdgpu_winsys_query_info;

View File

@@ -29,7 +29,6 @@
#define RADV_AMDGPU_WINSYS_H
#include "radv_radeon_winsys.h"
#include "ac_gpu_info.h"
#include "addrlib/addrinterface.h"
#include <amdgpu.h>
#include "util/list.h"
@@ -42,8 +41,10 @@ struct radv_amdgpu_winsys {
struct amdgpu_gpu_info amdinfo;
ADDR_HANDLE addrlib;
uint32_t rev_id;
unsigned family;
bool debug_all_bos;
bool batchchain;
pthread_mutex_t global_bo_list_lock;
struct list_head global_bo_list;
unsigned num_buffers;

View File

@@ -29,7 +29,6 @@
#ifndef RADV_AMDGPU_WINSYS_PUBLIC_H
#define RADV_AMDGPU_WINSYS_PUBLIC_H
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint64_t debug_flags,
uint64_t perftest_flags);
struct radeon_winsys *radv_amdgpu_winsys_create(int fd, uint32_t debug_flags);
#endif /* RADV_AMDGPU_WINSYS_PUBLIC_H */

View File

@@ -37,7 +37,6 @@ LOCAL_C_INCLUDES += \
LOCAL_EXPORT_C_INCLUDE_DIRS += \
$(intermediates)/nir \
$(MESA_TOP)/src/compiler \
$(MESA_TOP)/src/compiler/nir
LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \

View File

@@ -186,6 +186,7 @@ NIR_GENERATED_FILES = \
NIR_FILES = \
nir/nir.c \
nir/nir.h \
nir/nir_array.h \
nir/nir_builder.h \
nir/nir_clone.c \
nir/nir_constant_expressions.h \
@@ -207,7 +208,6 @@ NIR_FILES = \
nir/nir_lower_64bit_packing.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics.c \
nir/nir_lower_atomics_to_ssbo.c \
nir/nir_lower_bitmap.c \
nir/nir_lower_clamp_color_outputs.c \
nir/nir_lower_clip.c \

View File

@@ -622,14 +622,6 @@ struct ast_type_qualifier {
* is used.
*/
unsigned inner_coverage:1;
/** \name Layout qualifiers for GL_ARB_bindless_texture */
/** \{ */
unsigned bindless_sampler:1;
unsigned bindless_image:1;
unsigned bound_sampler:1;
unsigned bound_image:1;
/** \} */
}
/** \brief Set of flags, accessed by name. */
q;
@@ -844,7 +836,6 @@ public:
/* List of ast_declarator_list * */
exec_list declarations;
bool is_declaration;
const glsl_type *type;
};

View File

@@ -299,18 +299,12 @@ _mesa_ast_array_index_to_hir(void *mem_ctx,
* values must not diverge between shader invocations run together. If the
* values *do* diverge, then the behavior of the operation requiring a
* dynamically uniform expression is undefined.
*
* From section 4.1.7 of the ARB_bindless_texture spec:
*
* "Samplers aggregated into arrays within a shader (using square
* brackets []) can be indexed with arbitrary integer expressions."
*/
if (array->type->without_array()->is_sampler()) {
if (!state->is_version(400, 320) &&
!state->ARB_gpu_shader5_enable &&
!state->EXT_gpu_shader5_enable &&
!state->OES_gpu_shader5_enable &&
!state->has_bindless()) {
!state->OES_gpu_shader5_enable) {
if (state->is_version(130, 300))
_mesa_glsl_error(&loc, state,
"sampler arrays indexed with non-constant "

View File

@@ -107,35 +107,35 @@ verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state,
* qualifiers. [...] It is legal to have additional qualifiers
* on a formal parameter, but not to have fewer."
*/
if (actual->data.memory_coherent && !formal->data.memory_coherent) {
if (actual->data.image_coherent && !formal->data.image_coherent) {
_mesa_glsl_error(loc, state,
"function call parameter `%s' drops "
"`coherent' qualifier", formal->name);
return false;
}
if (actual->data.memory_volatile && !formal->data.memory_volatile) {
if (actual->data.image_volatile && !formal->data.image_volatile) {
_mesa_glsl_error(loc, state,
"function call parameter `%s' drops "
"`volatile' qualifier", formal->name);
return false;
}
if (actual->data.memory_restrict && !formal->data.memory_restrict) {
if (actual->data.image_restrict && !formal->data.image_restrict) {
_mesa_glsl_error(loc, state,
"function call parameter `%s' drops "
"`restrict' qualifier", formal->name);
return false;
}
if (actual->data.memory_read_only && !formal->data.memory_read_only) {
if (actual->data.image_read_only && !formal->data.image_read_only) {
_mesa_glsl_error(loc, state,
"function call parameter `%s' drops "
"`readonly' qualifier", formal->name);
return false;
}
if (actual->data.memory_write_only && !formal->data.memory_write_only) {
if (actual->data.image_write_only && !formal->data.image_write_only) {
_mesa_glsl_error(loc, state,
"function call parameter `%s' drops "
"`writeonly' qualifier", formal->name);
@@ -283,7 +283,7 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
mode, formal->name,
actual->variable_referenced()->name);
return false;
} else if (!actual->is_lvalue(state)) {
} else if (!actual->is_lvalue()) {
_mesa_glsl_error(&loc, state,
"function parameter '%s %s' is not an lvalue",
mode, formal->name);
@@ -740,8 +740,8 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
if (src->type->is_error())
return src;
assert(a <= GLSL_TYPE_IMAGE);
assert(b <= GLSL_TYPE_IMAGE);
assert(a <= GLSL_TYPE_BOOL);
assert(b <= GLSL_TYPE_BOOL);
if (a == b)
return src;
@@ -769,12 +769,6 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
case GLSL_TYPE_INT64:
result = new(ctx) ir_expression(ir_unop_i642u, src);
break;
case GLSL_TYPE_SAMPLER:
result = new(ctx) ir_expression(ir_unop_unpack_sampler_2x32, src);
break;
case GLSL_TYPE_IMAGE:
result = new(ctx) ir_expression(ir_unop_unpack_image_2x32, src);
break;
}
break;
case GLSL_TYPE_INT:
@@ -917,22 +911,6 @@ convert_component(ir_rvalue *src, const glsl_type *desired_type)
break;
}
break;
case GLSL_TYPE_SAMPLER:
switch (b) {
case GLSL_TYPE_UINT:
result = new(ctx)
ir_expression(ir_unop_pack_sampler_2x32, desired_type, src);
break;
}
break;
case GLSL_TYPE_IMAGE:
switch (b) {
case GLSL_TYPE_UINT:
result = new(ctx)
ir_expression(ir_unop_pack_image_2x32, desired_type, src);
break;
}
break;
}
assert(result != NULL);
@@ -1952,13 +1930,6 @@ ast_function_expression::handle_method(exec_list *instructions,
return ir_rvalue::error_value(ctx);
}
static inline bool is_valid_constructor(const glsl_type *type,
struct _mesa_glsl_parse_state *state)
{
return type->is_numeric() || type->is_boolean() ||
(state->has_bindless() && (type->is_sampler() || type->is_image()));
}
ir_rvalue *
ast_function_expression::hir(exec_list *instructions,
struct _mesa_glsl_parse_state *state)
@@ -1991,21 +1962,9 @@ ast_function_expression::hir(exec_list *instructions,
/* Constructors for opaque types are illegal.
*
* From section 4.1.7 of the ARB_bindless_texture spec:
*
* "Samplers are represented using 64-bit integer handles, and may be "
* converted to and from 64-bit integers using constructors."
*
* From section 4.1.X of the ARB_bindless_texture spec:
*
* "Images are represented using 64-bit integer handles, and may be
* converted to and from 64-bit integers using constructors."
*/
if (constructor_type->contains_atomic() ||
(!state->has_bindless() && constructor_type->contains_opaque())) {
_mesa_glsl_error(& loc, state, "cannot construct %s type `%s'",
state->has_bindless() ? "atomic" : "opaque",
if (constructor_type->contains_opaque()) {
_mesa_glsl_error(& loc, state, "cannot construct opaque type `%s'",
constructor_type->name);
return ir_rvalue::error_value(ctx);
}
@@ -2048,7 +2007,7 @@ ast_function_expression::hir(exec_list *instructions,
state);
}
if (!is_valid_constructor(constructor_type, state))
if (!constructor_type->is_numeric() && !constructor_type->is_boolean())
return ir_rvalue::error_value(ctx);
/* Total number of components of the type being constructed. */
@@ -2078,7 +2037,7 @@ ast_function_expression::hir(exec_list *instructions,
return ir_rvalue::error_value(ctx);
}
if (!is_valid_constructor(result->type, state)) {
if (!result->type->is_numeric() && !result->type->is_boolean()) {
_mesa_glsl_error(& loc, state, "cannot construct `%s' from a "
"non-numeric data type",
constructor_type->name);
@@ -2170,51 +2129,10 @@ ast_function_expression::hir(exec_list *instructions,
/* Type cast each parameter and, if possible, fold constants.*/
foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) {
const glsl_type *desired_type;
/* From section 5.4.1 of the ARB_bindless_texture spec:
*
* "In the following four constructors, the low 32 bits of the sampler
* type correspond to the .x component of the uvec2 and the high 32
* bits correspond to the .y component."
*
* uvec2(any sampler type) // Converts a sampler type to a
* // pair of 32-bit unsigned integers
* any sampler type(uvec2) // Converts a pair of 32-bit unsigned integers to
* // a sampler type
* uvec2(any image type) // Converts an image type to a
* // pair of 32-bit unsigned integers
* any image type(uvec2) // Converts a pair of 32-bit unsigned integers to
* // an image type
*/
if (ir->type->is_sampler() || ir->type->is_image()) {
/* Convert a sampler/image type to a pair of 32-bit unsigned
* integers as defined by ARB_bindless_texture.
*/
if (constructor_type != glsl_type::uvec2_type) {
_mesa_glsl_error(&loc, state, "sampler and image types can only "
"be converted to a pair of 32-bit unsigned "
"integers");
}
desired_type = glsl_type::uvec2_type;
} else if (constructor_type->is_sampler() ||
constructor_type->is_image()) {
/* Convert a pair of 32-bit unsigned integers to a sampler or image
* type as defined by ARB_bindless_texture.
*/
if (ir->type != glsl_type::uvec2_type) {
_mesa_glsl_error(&loc, state, "sampler and image types can only "
"be converted from a pair of 32-bit unsigned "
"integers");
}
desired_type = constructor_type;
} else {
desired_type =
glsl_type::get_instance(constructor_type->base_type,
ir->type->vector_elements,
ir->type->matrix_columns);
}
const glsl_type *desired_type =
glsl_type::get_instance(constructor_type->base_type,
ir->type->vector_elements,
ir->type->matrix_columns);
ir_rvalue *result = convert_component(ir, desired_type);
/* Attempt to convert the parameter to a constant valued expression.

View File

@@ -86,17 +86,17 @@ public:
return visit_continue;
ir_variable *var = ir->variable_referenced();
/* We can have memory_write_only set on both images and buffer variables,
/* We can have image_write_only set on both images and buffer variables,
* but in the former there is a distinction between reads from
* the variable itself (write_only) and from the memory they point to
* (memory_write_only), while in the case of buffer variables there is
* (image_write_only), while in the case of buffer variables there is
* no such distinction, that is why this check here is limited to
* buffer variables alone.
*/
if (!var || var->data.mode != ir_var_shader_storage)
return visit_continue;
if (var->data.memory_write_only) {
if (var->data.image_write_only) {
found = var;
return visit_stop;
}
@@ -947,11 +947,11 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
error_emitted = true;
} else if (lhs_var != NULL && (lhs_var->data.read_only ||
(lhs_var->data.mode == ir_var_shader_storage &&
lhs_var->data.memory_read_only))) {
/* We can have memory_read_only set on both images and buffer variables,
lhs_var->data.image_read_only))) {
/* We can have image_read_only set on both images and buffer variables,
* but in the former there is a distinction between assignments to
* the variable itself (read_only) and to the memory they point to
* (memory_read_only), while in the case of buffer variables there is
* (image_read_only), while in the case of buffer variables there is
* no such distinction, that is why this check here is limited to
* buffer variables alone.
*/
@@ -971,7 +971,7 @@ do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state,
* The restriction on arrays is lifted in GLSL 1.20 and GLSL ES 3.00.
*/
error_emitted = true;
} else if (!lhs->is_lvalue(state)) {
} else if (!lhs->is_lvalue()) {
_mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment");
error_emitted = true;
}
@@ -2359,10 +2359,7 @@ ast_type_specifier::glsl_type(const char **name,
{
const struct glsl_type *type;
if (structure)
type = structure->type;
else
type = state->symbols->get_type(this->type_name);
type = state->symbols->get_type(this->type_name);
*name = this->type_name;
YYLTYPE loc = this->get_location();
@@ -2999,26 +2996,6 @@ validate_fragment_flat_interpolation_input(struct _mesa_glsl_parse_state *state,
_mesa_glsl_error(loc, state, "if a fragment input is (or contains) "
"a double, then it must be qualified with 'flat'");
}
/* Bindless sampler/image fragment inputs must be qualified with 'flat'.
*
* From section 4.3.4 of the ARB_bindless_texture spec:
*
* "(modify last paragraph, p. 35, allowing samplers and images as
* fragment shader inputs) ... Fragment inputs can only be signed and
* unsigned integers and integer vectors, floating point scalars,
* floating-point vectors, matrices, sampler and image types, or arrays
* or structures of these. Fragment shader inputs that are signed or
* unsigned integers, integer vectors, or any double-precision floating-
* point type, or any sampler or image type must be qualified with the
* interpolation qualifier "flat"."
*/
if (state->has_bindless()
&& (var_type->contains_sampler() || var_type->contains_image())) {
_mesa_glsl_error(loc, state, "if a fragment input is (or contains) "
"a bindless sampler (or image), then it must be "
"qualified with 'flat'");
}
}
static void
@@ -3248,9 +3225,6 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
"compute shader variables cannot be given "
"explicit locations");
return;
default:
fail = true;
break;
};
if (fail) {
@@ -3282,7 +3256,7 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
? (qual_location + FRAG_RESULT_DATA0)
: (qual_location + VARYING_SLOT_VAR0);
break;
default:
case MESA_SHADER_COMPUTE:
assert(!"Unexpected shader type");
break;
}
@@ -3319,92 +3293,25 @@ apply_explicit_location(const struct ast_type_qualifier *qual,
}
static bool
validate_storage_for_sampler_image_types(ir_variable *var,
struct _mesa_glsl_parse_state *state,
YYLTYPE *loc)
validate_image_qualifier_for_type(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
const struct ast_type_qualifier *qual,
const glsl_type *type)
{
/* From section 4.1.7 of the GLSL 4.40 spec:
*
* "[Opaque types] can only be declared as function
* parameters or uniform-qualified variables."
*
* From section 4.1.7 of the ARB_bindless_texture spec:
*
* "Samplers may be declared as shader inputs and outputs, as uniform
* variables, as temporary variables, and as function parameters."
*
* From section 4.1.X of the ARB_bindless_texture spec:
*
* "Images may be declared as shader inputs and outputs, as uniform
* variables, as temporary variables, and as function parameters."
*/
if (state->has_bindless()) {
if (var->data.mode != ir_var_auto &&
var->data.mode != ir_var_uniform &&
var->data.mode != ir_var_shader_in &&
var->data.mode != ir_var_shader_out &&
var->data.mode != ir_var_function_in &&
var->data.mode != ir_var_function_out &&
var->data.mode != ir_var_function_inout) {
_mesa_glsl_error(loc, state, "bindless image/sampler variables may "
"only be declared as shader inputs and outputs, as "
"uniform variables, as temporary variables and as "
"function parameters");
return false;
}
} else {
if (var->data.mode != ir_var_uniform &&
var->data.mode != ir_var_function_in) {
_mesa_glsl_error(loc, state, "image/sampler variables may only be "
"declared as function parameters or "
"uniform-qualified global variables");
return false;
}
}
return true;
}
static bool
validate_memory_qualifier_for_type(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
const struct ast_type_qualifier *qual,
const glsl_type *type)
{
/* From Section 4.10 (Memory Qualifiers) of the GLSL 4.50 spec:
*
* "Memory qualifiers are only supported in the declarations of image
* variables, buffer variables, and shader storage blocks; it is an error
* to use such qualifiers in any other declarations.
*/
if (!type->is_image() && !qual->flags.q.buffer) {
if (!type->is_image()) {
if (qual->flags.q.read_only ||
qual->flags.q.write_only ||
qual->flags.q.coherent ||
qual->flags.q._volatile ||
qual->flags.q.restrict_flag) {
_mesa_glsl_error(loc, state, "memory qualifiers may only be applied "
"in the declarations of image variables, buffer "
"variables, and shader storage blocks");
return false;
"to images");
}
}
return true;
}
static bool
validate_image_format_qualifier_for_type(struct _mesa_glsl_parse_state *state,
YYLTYPE *loc,
const struct ast_type_qualifier *qual,
const glsl_type *type)
{
/* From section 4.4.6.2 (Format Layout Qualifiers) of the GLSL 4.50 spec:
*
* "Format layout qualifiers can be used on image variable declarations
* (those declared with a basic type having “image ” in its keyword)."
*/
if (!type->is_image() && qual->flags.q.explicit_image_format) {
_mesa_glsl_error(loc, state, "format layout qualifiers may only be "
"applied to images");
if (qual->flags.q.explicit_image_format) {
_mesa_glsl_error(loc, state, "format layout qualifiers may only be "
"applied to images");
}
return false;
}
return true;
@@ -3418,21 +3325,22 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
{
const glsl_type *base_type = var->type->without_array();
if (!validate_image_format_qualifier_for_type(state, loc, qual, base_type) ||
!validate_memory_qualifier_for_type(state, loc, qual, base_type))
if (!validate_image_qualifier_for_type(state, loc, qual, base_type))
return;
if (!base_type->is_image())
return;
if (var->data.mode != ir_var_uniform &&
var->data.mode != ir_var_function_in) {
_mesa_glsl_error(loc, state, "image variables may only be declared as "
"function parameters or uniform-qualified "
"global variables");
}
if (!validate_storage_for_sampler_image_types(var, state, loc))
return;
var->data.memory_read_only |= qual->flags.q.read_only;
var->data.memory_write_only |= qual->flags.q.write_only;
var->data.memory_coherent |= qual->flags.q.coherent;
var->data.memory_volatile |= qual->flags.q._volatile;
var->data.memory_restrict |= qual->flags.q.restrict_flag;
var->data.image_read_only |= qual->flags.q.read_only;
var->data.image_write_only |= qual->flags.q.write_only;
var->data.image_coherent |= qual->flags.q.coherent;
var->data.image_volatile |= qual->flags.q._volatile;
var->data.image_restrict |= qual->flags.q.restrict_flag;
var->data.read_only = true;
if (qual->flags.q.explicit_image_format) {
if (var->data.mode == ir_var_function_in) {
@@ -3469,8 +3377,8 @@ apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual,
var->data.image_format != GL_R32F &&
var->data.image_format != GL_R32I &&
var->data.image_format != GL_R32UI &&
!var->data.memory_read_only &&
!var->data.memory_write_only) {
!var->data.image_read_only &&
!var->data.image_write_only) {
_mesa_glsl_error(loc, state, "image variables of format other than r32f, "
"r32i or r32ui must be qualified `readonly' or "
"`writeonly'");
@@ -3524,69 +3432,6 @@ validate_array_dimensions(const glsl_type *t,
}
}
static void
apply_bindless_qualifier_to_variable(const struct ast_type_qualifier *qual,
ir_variable *var,
struct _mesa_glsl_parse_state *state,
YYLTYPE *loc)
{
bool has_local_qualifiers = qual->flags.q.bindless_sampler ||
qual->flags.q.bindless_image ||
qual->flags.q.bound_sampler ||
qual->flags.q.bound_image;
/* The ARB_bindless_texture spec says:
*
* "Modify Section 4.4.6 Opaque-Uniform Layout Qualifiers of the GLSL 4.30
* spec"
*
* "If these layout qualifiers are applied to other types of default block
* uniforms, or variables with non-uniform storage, a compile-time error
* will be generated."
*/
if (has_local_qualifiers && !qual->flags.q.uniform) {
_mesa_glsl_error(loc, state, "ARB_bindless_texture layout qualifiers "
"can only be applied to default block uniforms or "
"variables with uniform storage");
return;
}
/* The ARB_bindless_texture spec doesn't state anything in this situation,
* but it makes sense to only allow bindless_sampler/bound_sampler for
* sampler types, and respectively bindless_image/bound_image for image
* types.
*/
if ((qual->flags.q.bindless_sampler || qual->flags.q.bound_sampler) &&
!var->type->contains_sampler()) {
_mesa_glsl_error(loc, state, "bindless_sampler or bound_sampler can only "
"be applied to sampler types");
return;
}
if ((qual->flags.q.bindless_image || qual->flags.q.bound_image) &&
!var->type->contains_image()) {
_mesa_glsl_error(loc, state, "bindless_image or bound_image can only be "
"applied to image types");
return;
}
/* The bindless_sampler/bindless_image (and respectively
* bound_sampler/bound_image) layout qualifiers can be set at global and at
* local scope.
*/
if (var->type->contains_sampler() || var->type->contains_image()) {
var->data.bindless = qual->flags.q.bindless_sampler ||
qual->flags.q.bindless_image ||
state->bindless_sampler_specified ||
state->bindless_image_specified;
var->data.bound = qual->flags.q.bound_sampler ||
qual->flags.q.bound_image ||
state->bound_sampler_specified ||
state->bound_image_specified;
}
}
static void
apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
ir_variable *var,
@@ -3764,9 +3609,14 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
}
}
if (var->type->contains_sampler() &&
!validate_storage_for_sampler_image_types(var, state, loc))
return;
if (var->type->contains_sampler()) {
if (var->data.mode != ir_var_uniform &&
var->data.mode != ir_var_function_in) {
_mesa_glsl_error(loc, state, "sampler variables may only be declared "
"as function parameters or uniform-qualified "
"global variables");
}
}
/* Is the 'layout' keyword used with parameters that allow relaxed checking.
* Many implementations of GL_ARB_fragment_coord_conventions_enable and some
@@ -3879,9 +3729,6 @@ apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual,
_mesa_glsl_error(loc, state, "post_depth_coverage layout qualifier only "
"valid in fragment shader input layout declaration.");
}
if (state->has_bindless())
apply_bindless_qualifier_to_variable(qual, var, state, loc);
}
static void
@@ -4026,21 +3873,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
* Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES
* 3.00 spec allows structs as well. Varying structs are also allowed
* in GLSL 1.50.
*
* From section 4.3.4 of the ARB_bindless_texture spec:
*
* "(modify third paragraph of the section to allow sampler and image
* types) ... Vertex shader inputs can only be float,
* single-precision floating-point scalars, single-precision
* floating-point vectors, matrices, signed and unsigned integers
* and integer vectors, sampler and image types."
*
* From section 4.3.6 of the ARB_bindless_texture spec:
*
* "Output variables can only be floating-point scalars,
* floating-point vectors, matrices, signed or unsigned integers or
* integer vectors, sampler or image types, or arrays or structures
* of any these."
*/
switch (var->type->without_array()->base_type) {
case GLSL_TYPE_FLOAT:
@@ -4064,11 +3896,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
break;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
if (state->has_bindless())
break;
/* fallthrough */
default:
_mesa_glsl_error(loc, state, "illegal type for a varying variable");
break;
@@ -4095,8 +3922,6 @@ apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual,
case MESA_SHADER_COMPUTE:
/* Invariance isn't meaningful in compute shaders. */
break;
default:
break;
}
}
@@ -4284,22 +4109,6 @@ get_variable_being_redeclared(ir_variable *var, YYLTYPE loc,
*/
earlier->data.precision = var->data.precision;
} else if (earlier->data.how_declared == ir_var_declared_implicitly &&
state->allow_builtin_variable_redeclaration) {
/* Allow verbatim redeclarations of built-in variables. Not explicitly
* valid, but some applications do it.
*/
if (earlier->data.mode != var->data.mode &&
!(earlier->data.mode == ir_var_system_value &&
var->data.mode == ir_var_shader_in)) {
_mesa_glsl_error(&loc, state,
"redeclaration of `%s' with incorrect qualifiers",
var->name);
} else if (earlier->type != var->type) {
_mesa_glsl_error(&loc, state,
"redeclaration of `%s' has incorrect type",
var->name);
}
} else if (allow_all_redeclarations) {
if (earlier->data.mode != var->data.mode) {
_mesa_glsl_error(&loc, state,
@@ -4357,22 +4166,11 @@ process_initializer(ir_variable *var, ast_declaration *decl,
* "Opaque variables [...] are initialized only through the
* OpenGL API; they cannot be declared with an initializer in a
* shader."
*
* From section 4.1.7 of the ARB_bindless_texture spec:
*
* "Samplers may be declared as shader inputs and outputs, as uniform
* variables, as temporary variables, and as function parameters."
*
* From section 4.1.X of the ARB_bindless_texture spec:
*
* "Images may be declared as shader inputs and outputs, as uniform
* variables, as temporary variables, and as function parameters."
*/
if (var->type->contains_atomic() ||
(!state->has_bindless() && var->type->contains_opaque())) {
if (var->type->contains_opaque()) {
_mesa_glsl_error(&initializer_loc, state,
"cannot initialize %s variable %s",
var->name, state->has_bindless() ? "atomic" : "opaque");
"cannot initialize opaque variable %s",
var->name);
}
if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) {
@@ -5152,14 +4950,6 @@ ast_declarator_list::hir(exec_list *instructions,
* vectors, matrices, signed and unsigned integers and integer
* vectors. Vertex shader inputs cannot be arrays or
* structures."
*
* From section 4.3.4 of the ARB_bindless_texture spec:
*
* "(modify third paragraph of the section to allow sampler and
* image types) ... Vertex shader inputs can only be float,
* single-precision floating-point scalars, single-precision
* floating-point vectors, matrices, signed and unsigned
* integers and integer vectors, sampler and image types."
*/
const glsl_type *check_type = var->type->without_array();
@@ -5176,12 +4966,6 @@ ast_declarator_list::hir(exec_list *instructions,
case GLSL_TYPE_DOUBLE:
if (check_type->is_double() && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable))
break;
case GLSL_TYPE_SAMPLER:
if (check_type->is_sampler() && state->has_bindless())
break;
case GLSL_TYPE_IMAGE:
if (check_type->is_image() && state->has_bindless())
break;
/* FALLTHROUGH */
default:
_mesa_glsl_error(& loc, state,
@@ -5420,23 +5204,11 @@ ast_declarator_list::hir(exec_list *instructions,
*
* "[Opaque types] can only be declared as function
* parameters or uniform-qualified variables."
*
* From section 4.1.7 of the ARB_bindless_texture spec:
*
* "Samplers may be declared as shader inputs and outputs, as uniform
* variables, as temporary variables, and as function parameters."
*
* From section 4.1.X of the ARB_bindless_texture spec:
*
* "Images may be declared as shader inputs and outputs, as uniform
* variables, as temporary variables, and as function parameters."
*/
if (!this->type->qualifier.flags.q.uniform &&
(var_type->contains_atomic() ||
(!state->has_bindless() && var_type->contains_opaque()))) {
if (var_type->contains_opaque() &&
!this->type->qualifier.flags.q.uniform) {
_mesa_glsl_error(&loc, state,
"%s variables must be declared uniform",
state->has_bindless() ? "atomic" : "opaque");
"opaque variables must be declared uniform");
}
/* Process the initializer and add its instructions to a temporary
@@ -5666,23 +5438,11 @@ ast_parameter_declarator::hir(exec_list *instructions,
* "Opaque variables cannot be treated as l-values; hence cannot
* be used as out or inout function parameters, nor can they be
* assigned into."
*
* From section 4.1.7 of the ARB_bindless_texture spec:
*
* "Samplers can be used as l-values, so can be assigned into and used
* as "out" and "inout" function parameters."
*
* From section 4.1.X of the ARB_bindless_texture spec:
*
* "Images can be used as l-values, so can be assigned into and used as
* "out" and "inout" function parameters."
*/
if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out)
&& (type->contains_atomic() ||
(!state->has_bindless() && type->contains_opaque()))) {
&& type->contains_opaque()) {
_mesa_glsl_error(&loc, state, "out and inout parameters cannot "
"contain %s variables",
state->has_bindless() ? "atomic" : "opaque");
"contain opaque variables");
type = glsl_type::error_type;
}
@@ -5849,33 +5609,16 @@ ast_function::hir(exec_list *instructions,
"sized", name);
}
/* From Section 6.1 (Function Definitions) of the GLSL 1.00 spec:
*
* "Arrays are allowed as arguments, but not as the return type. [...]
* The return type can also be a structure if the structure does not
* contain an array."
*/
if (state->language_version == 100 && return_type->contains_array()) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(& loc, state,
"function `%s' return type contains an array", name);
}
/* From section 4.1.7 of the GLSL 4.40 spec:
*
* "[Opaque types] can only be declared as function parameters
* or uniform-qualified variables."
*
* The ARB_bindless_texture spec doesn't clearly state this, but as it says
* "Replace Section 4.1.7 (Samplers), p. 25" and, "Replace Section 4.1.X,
* (Images)", this should be allowed.
*/
if (return_type->contains_atomic() ||
(!state->has_bindless() && return_type->contains_opaque())) {
if (return_type->contains_opaque()) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(&loc, state,
"function `%s' return type can't contain an %s type",
name, state->has_bindless() ? "atomic" : "opaque");
"function `%s' return type can't contain an opaque type",
name);
}
/**/
@@ -5912,27 +5655,16 @@ ast_function::hir(exec_list *instructions,
* "User code can overload the built-in functions but cannot redefine
* them."
*/
if (state->es_shader) {
if (state->es_shader && state->language_version >= 300) {
/* Local shader has no exact candidates; check the built-ins. */
_mesa_glsl_initialize_builtin_functions();
if (state->language_version >= 300 &&
_mesa_glsl_has_builtin_function(name)) {
if (_mesa_glsl_has_builtin_function(name)) {
YYLTYPE loc = this->get_location();
_mesa_glsl_error(& loc, state,
"A shader cannot redefine or overload built-in "
"function `%s' in GLSL ES 3.00", name);
return NULL;
}
if (state->language_version == 100) {
ir_function_signature *sig =
_mesa_glsl_find_builtin_function(state, name, &hir_parameters);
if (sig && sig->is_builtin()) {
_mesa_glsl_error(& loc, state,
"A shader cannot redefine built-in "
"function `%s' in GLSL ES 1.00", name);
}
}
}
/* Verify that this function's signature either doesn't match a previously
@@ -5968,16 +5700,6 @@ ast_function::hir(exec_list *instructions,
*/
return NULL;
}
} else if (state->language_version == 100 && !is_definition) {
/* From the GLSL 1.00 spec, section 4.2.7:
*
* "A particular variable, structure or function declaration
* may occur at most once within a scope with the exception
* that a single function prototype plus the corresponding
* function definition are allowed."
*/
YYLTYPE loc = this->get_location();
_mesa_glsl_error(&loc, state, "function `%s' redeclared", name);
}
}
}
@@ -7087,19 +6809,9 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
assert(decl_type);
if (is_interface) {
/* From section 4.3.7 of the ARB_bindless_texture spec:
*
* "(remove the following bullet from the last list on p. 39,
* thereby permitting sampler types in interface blocks; image
* types are also permitted in blocks by this extension)"
*
* * sampler types are not allowed
*/
if (decl_type->contains_atomic() ||
(!state->has_bindless() && decl_type->contains_opaque())) {
if (decl_type->contains_opaque()) {
_mesa_glsl_error(&loc, state, "uniform/buffer in non-default "
"interface block contains %s variable",
state->has_bindless() ? "atomic" : "opaque");
"interface block contains opaque variable");
}
} else {
if (decl_type->contains_atomic()) {
@@ -7111,7 +6823,7 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
_mesa_glsl_error(&loc, state, "atomic counter in structure");
}
if (!state->has_bindless() && decl_type->contains_image()) {
if (decl_type->contains_image()) {
/* FINISHME: Same problem as with atomic counters.
* FINISHME: Request clarification from Khronos and add
* FINISHME: spec quotation here.
@@ -7164,9 +6876,6 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
"to struct or interface block members");
}
validate_memory_qualifier_for_type(state, &loc, qual, decl_type);
validate_image_format_qualifier_for_type(state, &loc, qual, decl_type);
/* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec:
*
* "A block member may be declared with a stream identifier, but
@@ -7409,56 +7118,33 @@ ast_process_struct_or_iface_block_members(exec_list *instructions,
|| fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR);
}
/* Memory qualifiers are allowed on buffer and image variables, while
* the format qualifier is only accepted for images.
/* Image qualifiers are allowed on buffer variables, which can only
* be defined inside shader storage buffer objects
*/
if (var_mode == ir_var_shader_storage ||
field_type->without_array()->is_image()) {
if (layout && var_mode == ir_var_shader_storage) {
/* For readonly and writeonly qualifiers the field definition,
* if set, overwrites the layout qualifier.
*/
if (qual->flags.q.read_only) {
fields[i].memory_read_only = true;
fields[i].memory_write_only = false;
fields[i].image_read_only = true;
fields[i].image_write_only = false;
} else if (qual->flags.q.write_only) {
fields[i].memory_read_only = false;
fields[i].memory_write_only = true;
fields[i].image_read_only = false;
fields[i].image_write_only = true;
} else {
fields[i].memory_read_only =
layout ? layout->flags.q.read_only : 0;
fields[i].memory_write_only =
layout ? layout->flags.q.write_only : 0;
fields[i].image_read_only = layout->flags.q.read_only;
fields[i].image_write_only = layout->flags.q.write_only;
}
/* For other qualifiers, we set the flag if either the layout
* qualifier or the field qualifier are set
*/
fields[i].memory_coherent = qual->flags.q.coherent ||
(layout && layout->flags.q.coherent);
fields[i].memory_volatile = qual->flags.q._volatile ||
(layout && layout->flags.q._volatile);
fields[i].memory_restrict = qual->flags.q.restrict_flag ||
(layout && layout->flags.q.restrict_flag);
if (field_type->without_array()->is_image()) {
if (qual->flags.q.explicit_image_format) {
if (qual->image_base_type !=
field_type->without_array()->sampled_type) {
_mesa_glsl_error(&loc, state, "format qualifier doesn't "
"match the base data type of the image");
}
fields[i].image_format = qual->image_format;
} else {
if (!qual->flags.q.write_only) {
_mesa_glsl_error(&loc, state, "image not qualified with "
"`writeonly' must have a format layout "
"qualifier");
}
fields[i].image_format = GL_NONE;
}
}
fields[i].image_coherent = qual->flags.q.coherent ||
layout->flags.q.coherent;
fields[i].image_volatile = qual->flags.q._volatile ||
layout->flags.q._volatile;
fields[i].image_restrict = qual->flags.q.restrict_flag ||
layout->flags.q.restrict_flag;
}
i++;
@@ -7507,12 +7193,13 @@ ast_struct_specifier::hir(exec_list *instructions,
validate_identifier(this->name, loc, state);
type = glsl_type::get_record_instance(fields, decl_count, this->name);
const glsl_type *t =
glsl_type::get_record_instance(fields, decl_count, this->name);
if (!type->is_anonymous() && !state->symbols->add_type(name, type)) {
if (!state->symbols->add_type(name, t)) {
const glsl_type *match = state->symbols->get_type(name);
/* allow struct matching for desktop GL - older UE4 does this */
if (match != NULL && state->is_version(130, 0) && match->record_compare(type, false))
if (match != NULL && state->is_version(130, 0) && match->record_compare(t, false))
_mesa_glsl_warning(& loc, state, "struct `%s' previously defined", name);
else
_mesa_glsl_error(& loc, state, "struct `%s' previously defined", name);
@@ -7521,7 +7208,7 @@ ast_struct_specifier::hir(exec_list *instructions,
const glsl_type *,
state->num_user_structures + 1);
if (s != NULL) {
s[state->num_user_structures] = type;
s[state->num_user_structures] = t;
state->user_structures = s;
state->num_user_structures++;
}
@@ -7581,11 +7268,11 @@ is_unsized_array_last_element(ir_variable *v)
static void
apply_memory_qualifiers(ir_variable *var, glsl_struct_field field)
{
var->data.memory_read_only = field.memory_read_only;
var->data.memory_write_only = field.memory_write_only;
var->data.memory_coherent = field.memory_coherent;
var->data.memory_volatile = field.memory_volatile;
var->data.memory_restrict = field.memory_restrict;
var->data.image_read_only = field.image_read_only;
var->data.image_write_only = field.image_write_only;
var->data.image_coherent = field.image_coherent;
var->data.image_volatile = field.image_volatile;
var->data.image_restrict = field.image_restrict;
}
ir_rvalue *

View File

@@ -69,10 +69,6 @@ ast_type_qualifier::has_layout() const
|| this->flags.q.column_major
|| this->flags.q.row_major
|| this->flags.q.packed
|| this->flags.q.bindless_sampler
|| this->flags.q.bindless_image
|| this->flags.q.bound_sampler
|| this->flags.q.bound_image
|| this->flags.q.explicit_align
|| this->flags.q.explicit_component
|| this->flags.q.explicit_location
@@ -185,33 +181,6 @@ validate_point_mode(MAYBE_UNUSED const ast_type_qualifier &qualifier,
return true;
}
static void
merge_bindless_qualifier(YYLTYPE *loc,
_mesa_glsl_parse_state *state,
const ast_type_qualifier &qualifier,
const ast_type_qualifier &new_qualifier)
{
if (state->default_uniform_qualifier->flags.q.bindless_sampler) {
state->bindless_sampler_specified = true;
state->default_uniform_qualifier->flags.q.bindless_sampler = false;
}
if (state->default_uniform_qualifier->flags.q.bindless_image) {
state->bindless_image_specified = true;
state->default_uniform_qualifier->flags.q.bindless_image = false;
}
if (state->default_uniform_qualifier->flags.q.bound_sampler) {
state->bound_sampler_specified = true;
state->default_uniform_qualifier->flags.q.bound_sampler = false;
}
if (state->default_uniform_qualifier->flags.q.bound_image) {
state->bound_image_specified = true;
state->default_uniform_qualifier->flags.q.bound_image = false;
}
}
/**
* This function merges duplicate layout identifiers.
*
@@ -274,16 +243,6 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
input_layout_mask.flags.q.sample = 1;
input_layout_mask.flags.q.smooth = 1;
if (state->has_bindless()) {
/* Allow to use image qualifiers with shader inputs/outputs. */
input_layout_mask.flags.q.coherent = 1;
input_layout_mask.flags.q._volatile = 1;
input_layout_mask.flags.q.restrict_flag = 1;
input_layout_mask.flags.q.read_only = 1;
input_layout_mask.flags.q.write_only = 1;
input_layout_mask.flags.q.explicit_image_format = 1;
}
/* Uniform block layout qualifiers get to overwrite each
* other (rightmost having priority), while all other
* qualifiers currently don't allow duplicates.
@@ -434,18 +393,6 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
if (q.flags.q.local_size_variable)
this->flags.q.local_size_variable = true;
if (q.flags.q.bindless_sampler)
this->flags.q.bindless_sampler = true;
if (q.flags.q.bindless_image)
this->flags.q.bindless_image = true;
if (q.flags.q.bound_sampler)
this->flags.q.bound_sampler = true;
if (q.flags.q.bound_image)
this->flags.q.bound_image = true;
this->flags.i |= q.flags.i;
if (this->flags.q.in &&
@@ -480,12 +427,6 @@ ast_type_qualifier::merge_qualifier(YYLTYPE *loc,
this->image_base_type = q.image_base_type;
}
if (q.flags.q.bindless_sampler ||
q.flags.q.bindless_image ||
q.flags.q.bound_sampler ||
q.flags.q.bound_image)
merge_bindless_qualifier(loc, state, *this, q);
return r;
}
@@ -837,10 +778,6 @@ ast_type_qualifier::validate_flags(YYLTYPE *loc,
bad.flags.q.subroutine ? " subroutine" : "",
bad.flags.q.blend_support ? " blend_support" : "",
bad.flags.q.inner_coverage ? " inner_coverage" : "",
bad.flags.q.bindless_sampler ? " bindless_sampler" : "",
bad.flags.q.bindless_image ? " bindless_image" : "",
bad.flags.q.bound_sampler ? " bound_sampler" : "",
bad.flags.q.bound_image ? " bound_image" : "",
bad.flags.q.post_depth_coverage ? " post_depth_coverage" : "");
return false;
}

Some files were not shown because too many files have changed in this diff Show More