Compare commits
64 Commits
mesa-17.3.
...
chadv/wip/
Author | SHA1 | Date | |
---|---|---|---|
|
3302281f00 | ||
|
54f07a7ebc | ||
|
bf85c6b160 | ||
|
272fd36b24 | ||
|
2aaeab9fdd | ||
|
0d7eae5847 | ||
|
3e8d93e1ff | ||
|
b239996965 | ||
|
b16fbdb135 | ||
|
c9bda01108 | ||
|
68ceff0712 | ||
|
d7681cc943 | ||
|
a0f76c6a3b | ||
|
7c5b8d163f | ||
|
667a7dfd55 | ||
|
d9619b2fba | ||
|
26796ca5ca | ||
|
f45c9bb5d6 | ||
|
9b34b2cee4 | ||
|
96f37fa7e0 | ||
|
730d7edbd7 | ||
|
64278fcd0c | ||
|
c8541b9630 | ||
|
7f3baaf5c7 | ||
|
5c32cb2c08 | ||
|
fd1aa710bc | ||
|
7d019c72d8 | ||
|
6613048d9a | ||
|
037026e90f | ||
|
ec23d1a68a | ||
|
68a7077012 | ||
|
3519cdfcfa | ||
|
a058539d21 | ||
|
6efb3d854f | ||
|
d78b9b2232 | ||
|
dafe2a86ab | ||
|
7152fe4723 | ||
|
838e746fc9 | ||
|
d1d6bf7605 | ||
|
14c7f4783a | ||
|
69fae1186f | ||
|
26ae9a5650 | ||
|
00898cd71d | ||
|
4e763cb3c1 | ||
|
ed61e74c4d | ||
|
8142ac25cc | ||
|
794d5bbee5 | ||
|
6d25795e51 | ||
|
0647d5800f | ||
|
e07a838408 | ||
|
b17ff37e6a | ||
|
42c96d393b | ||
|
1171bddb74 | ||
|
a10faeae28 | ||
|
5ec83db3fc | ||
|
0a04b702a8 | ||
|
dab369a824 | ||
|
e96314b6cf | ||
|
d226caef7a | ||
|
4bcb10cacc | ||
|
403ab71152 | ||
|
b178753c0a | ||
|
8940a624ae | ||
|
7cc5c96a9a |
10
PRESUBMIT.cfg
Normal file
10
PRESUBMIT.cfg
Normal file
@@ -0,0 +1,10 @@
|
||||
# This sample config file disables all of the ChromiumOS source style checks.
|
||||
# Comment out the disable-flags for any checks you want to leave enabled.
|
||||
|
||||
[Hook Overrides]
|
||||
stray_whitespace_check: false
|
||||
long_line_check: false
|
||||
cros_license_check: false
|
||||
tab_check: false
|
||||
bug_field_check: false
|
||||
test_field_check: false
|
@@ -383,9 +383,11 @@ if test "x$GCC_ATOMIC_BUILTINS_SUPPORTED" = x1; then
|
||||
AC_MSG_CHECKING(whether -latomic is needed)
|
||||
AC_LINK_IFELSE([AC_LANG_SOURCE([[
|
||||
#include <stdint.h>
|
||||
uint64_t v;
|
||||
struct {
|
||||
uint64_t* v;
|
||||
} x;
|
||||
int main() {
|
||||
return (int)__atomic_load_n(&v, __ATOMIC_ACQUIRE);
|
||||
return (int)__atomic_load_n(x.v, __ATOMIC_ACQUIRE);
|
||||
}]])], GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=no, GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC=yes)
|
||||
AC_MSG_RESULT($GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC)
|
||||
if test "x$GCC_ATOMIC_BUILTINS_NEED_LIBATOMIC" = xyes; then
|
||||
@@ -2408,12 +2410,13 @@ dnl Surfaceless is an alternative for the last one.
|
||||
dnl
|
||||
require_basic_egl() {
|
||||
case "$with_platforms" in
|
||||
*drm*|*surfaceless*)
|
||||
*drm*|*surfaceless*|*android*)
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([$1 requires one of these:
|
||||
1) --with-platforms=drm (X, Wayland, offscreen rendering based on DRM)
|
||||
2) --with-platforms=surfaceless (offscreen only)
|
||||
3) --with-platforms=android (Android only)
|
||||
Recommended options: drm,x11])
|
||||
;;
|
||||
esac
|
||||
|
@@ -48,6 +48,7 @@ typedef unsigned int drm_drawable_t;
|
||||
typedef struct drm_clip_rect drm_clip_rect_t;
|
||||
#endif
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/**
|
||||
@@ -704,7 +705,8 @@ struct __DRIuseInvalidateExtensionRec {
|
||||
#define __DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS 46
|
||||
#define __DRI_ATTRIB_YINVERTED 47
|
||||
#define __DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE 48
|
||||
#define __DRI_ATTRIB_MAX (__DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE + 1)
|
||||
#define __DRI_ATTRIB_MUTABLE_RENDER_BUFFER 49 /* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR */
|
||||
#define __DRI_ATTRIB_MAX 50
|
||||
|
||||
/* __DRI_ATTRIB_RENDER_TYPE */
|
||||
#define __DRI_ATTRIB_RGBA_BIT 0x01
|
||||
@@ -1810,7 +1812,48 @@ struct __DRI2rendererQueryExtensionRec {
|
||||
|
||||
enum __DRIimageBufferMask {
|
||||
__DRI_IMAGE_BUFFER_BACK = (1 << 0),
|
||||
__DRI_IMAGE_BUFFER_FRONT = (1 << 1)
|
||||
__DRI_IMAGE_BUFFER_FRONT = (1 << 1),
|
||||
|
||||
/**
|
||||
* A buffer shared between application and compositor. The buffer may be
|
||||
* simultaneously accessed by each.
|
||||
*
|
||||
* A shared buffer is equivalent to an EGLSurface whose EGLConfig contains
|
||||
* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR and whose active EGL_RENDER_BUFFER (as
|
||||
* opposed to any pending, requested change to EGL_RENDER_BUFFER) is
|
||||
* EGL_SINGLE_BUFFER.
|
||||
*
|
||||
* If the loader returns __DRI_IMAGE_BUFFER_SHARED, then it is returned
|
||||
* alone without accompanying back nor front buffer.
|
||||
*
|
||||
* The loader returns __DRI_IMAGE_BUFFER_SHARED if and only if:
|
||||
* - The loader supports __DRI_MUTABLE_RENDER_BUFFER_LOADER.
|
||||
* - The driver supports __DRI_MUTABLE_RENDER_BUFFER_DRIVER.
|
||||
* - The EGLConfig of the drawable EGLSurface contains
|
||||
* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
|
||||
* - The EGLContext's EGL_RENDER_BUFFER is EGL_SINGLE_BUFFER.
|
||||
* Equivalently, the EGLSurface's active EGL_RENDER_BUFFER (as
|
||||
* opposed to any pending,requested change to EGL_RENDER_BUFFER) is
|
||||
* EGL_SINGLE_BUFFER.
|
||||
*
|
||||
* A shared buffer is similar a front buffer in that all rendering to the
|
||||
* buffer should appear promptly on the screen. It is different from
|
||||
* a front buffer in that its behavior is independent from the
|
||||
* GL_DRAW_BUFFER state. Specifically, if GL_DRAW_FRAMEBUFFER is 0 and the
|
||||
* __DRIdrawable's current buffer mask is __DRI_IMAGE_BUFFER_SHARED, then
|
||||
* all rendering should appear promptly on the screen if GL_DRAW_BUFFER is
|
||||
* not GL_NONE.
|
||||
*
|
||||
* The difference between a shared buffer and a front buffer is motivated
|
||||
* by the constraints of Android and OpenGL ES. OpenGL ES does not support
|
||||
* front-buffer rendering. Android's SurfaceFlinger protocol provides the
|
||||
* EGL driver only a back buffer and no front buffer. The shared buffer
|
||||
* mode introduced by EGL_KHR_mutable_render_buffer is a backdoor though
|
||||
* EGL that allows Android OpenGL ES applications to render to what is
|
||||
* effectively the front buffer, a backdoor that required no change to the
|
||||
* OpenGL ES API and little change to the SurfaceFlinger API.
|
||||
*/
|
||||
__DRI_IMAGE_BUFFER_SHARED = (1 << 2),
|
||||
};
|
||||
|
||||
struct __DRIimageList {
|
||||
@@ -1949,4 +1992,83 @@ struct __DRIbackgroundCallableExtensionRec {
|
||||
GLboolean (*isThreadSafe)(void *loaderPrivate);
|
||||
};
|
||||
|
||||
/**
|
||||
* The driver portion of EGL_KHR_mutable_render_buffer.
|
||||
*
|
||||
* If the driver creates a __DRIconfig with
|
||||
* __DRI_ATTRIB_MUTABLE_RENDER_BUFFER, then it must support this extension.
|
||||
*
|
||||
* To support this extension:
|
||||
*
|
||||
* - The driver should create at least one __DRIconfig with
|
||||
* __DRI_ATTRIB_MUTABLE_RENDER_BUFFER. This is strongly recommended but
|
||||
* not required.
|
||||
*
|
||||
* - The driver must be able to handle __DRI_IMAGE_BUFFER_SHARED if
|
||||
* returned by __DRIimageLoaderExtension:getBuffers().
|
||||
*
|
||||
* - When rendering to __DRI_IMAGE_BUFFER_SHARED, it must call
|
||||
* __DRImutableRenderBufferLoaderExtension::displaySharedBuffer() on each
|
||||
* application-initiated flush. This includes glFlush, glFinish,
|
||||
* GL_SYNC_FLUSH_COMMANDS_BIT, EGL_SYNC_FLUSH_COMMANDS_BIT, and possibly
|
||||
* more. (Android applications expect that glFlush will immediately
|
||||
* display the buffer when in shared buffer mode because that is common
|
||||
* behavior among Android drivers). It :may: call displaySharedBuffer()
|
||||
* more often than required.
|
||||
*
|
||||
* - When rendering to __DRI_IMAGE_BUFFER_SHARED, it must ensure that the
|
||||
* buffer is always in a format compatible for display because the
|
||||
* display engine (usually SurfaceFlinger or hwcomposer) may display the
|
||||
* image at any time, even concurrently with 3D rendering. For example,
|
||||
* display hardware and the GL hardware may be able to access the buffer
|
||||
* simultaneously. In particular, if the buffer is compressed than take
|
||||
* care that SurfaceFlinger and hwcomposer can consume the compression
|
||||
* format.
|
||||
*
|
||||
* \see __DRI_IMAGE_BUFFER_SHARED
|
||||
* \see __DRI_ATTRIB_MUTABLE_RENDER_BUFFER
|
||||
* \see __DRI_MUTABLE_RENDER_BUFFER_LOADER
|
||||
*/
|
||||
#define __DRI_MUTABLE_RENDER_BUFFER_DRIVER "DRI_MutableRenderBufferDriver"
|
||||
#define __DRI_MUTABLE_RENDER_BUFFER_DRIVER_VERSION 1
|
||||
|
||||
typedef struct __DRImutableRenderBufferDriverExtensionRec __DRImutableRenderBufferDriverExtension;
|
||||
struct __DRImutableRenderBufferDriverExtensionRec {
|
||||
__DRIextension base;
|
||||
};
|
||||
|
||||
/**
|
||||
* The loader portion of EGL_KHR_mutable_render_buffer.
|
||||
*
|
||||
* Requires loader extension DRI_IMAGE_LOADER, through which the loader sends
|
||||
* __DRI_IMAGE_BUFFER_SHARED to the driver.
|
||||
*
|
||||
* \see __DRI_MUTABLE_RENDER_BUFFER_DRIVER
|
||||
*/
|
||||
#define __DRI_MUTABLE_RENDER_BUFFER_LOADER "DRI_MutableRenderBufferLoader"
|
||||
#define __DRI_MUTABLE_RENDER_BUFFER_LOADER_VERSION 1
|
||||
|
||||
typedef struct __DRImutableRenderBufferLoaderExtensionRec __DRImutableRenderBufferLoaderExtension;
|
||||
struct __DRImutableRenderBufferLoaderExtensionRec {
|
||||
__DRIextension base;
|
||||
|
||||
/**
|
||||
* Inform the display engine (usually SurfaceFlinger or hwcomposer)
|
||||
* that the __DRIdrawable has new content. The display engine may ignore
|
||||
* this, for example, if it continually refreshes and displays the buffer
|
||||
* on every frame, as in EGL_ANDROID_front_buffer_auto_refresh. On the
|
||||
* other extreme, the display engine may refresh and display the buffer
|
||||
* only in frames in which the driver calls this.
|
||||
*
|
||||
* If the fence_fd is not -1, then the display engine will display the
|
||||
* buffer only after the fence signals.
|
||||
*
|
||||
* The drawable's current __DRIimageBufferMask, as returned by
|
||||
* __DRIimageLoaderExtension::getBuffers(), must contain
|
||||
* __DRI_IMAGE_BUFFER_SHARED.
|
||||
*/
|
||||
void (*displaySharedBuffer)(__DRIdrawable *drawable, int fence_fd,
|
||||
void *loaderPrivate);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@@ -269,6 +269,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
||||
vce.available_rings ? vce_version : 0;
|
||||
info->has_userptr = true;
|
||||
info->has_syncobj = has_syncobj(fd);
|
||||
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
|
||||
info->has_sync_file = info->has_syncobj && info->drm_minor >= 21;
|
||||
info->has_ctx_priority = info->drm_minor >= 22;
|
||||
info->num_render_backends = amdinfo->rb_pipes;
|
||||
|
@@ -81,6 +81,7 @@ struct radeon_info {
|
||||
uint32_t drm_patchlevel;
|
||||
bool has_userptr;
|
||||
bool has_syncobj;
|
||||
bool has_syncobj_wait_for_submit;
|
||||
bool has_sync_file;
|
||||
bool has_ctx_priority;
|
||||
|
||||
|
@@ -99,6 +99,13 @@ VULKAN_LIB_DEPS += \
|
||||
$(WAYLAND_CLIENT_LIBS)
|
||||
endif
|
||||
|
||||
if HAVE_PLATFORM_ANDROID
|
||||
AM_CPPFLAGS += $(ANDROID_CPPFLAGS)
|
||||
AM_CFLAGS += $(ANDROID_CFLAGS)
|
||||
VULKAN_LIB_DEPS += $(ANDROID_LIBS)
|
||||
VULKAN_SOURCES += $(VULKAN_ANDROID_FILES)
|
||||
endif
|
||||
|
||||
noinst_LTLIBRARIES = libvulkan_common.la
|
||||
libvulkan_common_la_SOURCES = $(VULKAN_SOURCES)
|
||||
|
||||
@@ -106,11 +113,14 @@ nodist_EXTRA_libvulkan_radeon_la_SOURCES = dummy.cpp
|
||||
libvulkan_radeon_la_SOURCES = $(VULKAN_GEM_FILES)
|
||||
|
||||
vulkan_api_xml = $(top_srcdir)/src/vulkan/registry/vk.xml
|
||||
vk_android_native_buffer_xml = $(top_srcdir)/src/vulkan/registry/vk_android_native_buffer.xml
|
||||
|
||||
radv_entrypoints.c: radv_entrypoints_gen.py radv_extensions.py $(vulkan_api_xml)
|
||||
$(MKDIR_GEN)
|
||||
$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_entrypoints_gen.py \
|
||||
--xml $(vulkan_api_xml) --outdir $(builddir)
|
||||
--xml $(vulkan_api_xml) \
|
||||
--xml $(vk_android_native_buffer_xml) \
|
||||
--outdir $(builddir)
|
||||
radv_entrypoints.h: radv_entrypoints.c
|
||||
|
||||
radv_extensions.c: radv_extensions.py \
|
||||
@@ -118,6 +128,7 @@ radv_extensions.c: radv_extensions.py \
|
||||
$(MKDIR_GEN)
|
||||
$(AM_V_GEN)$(PYTHON2) $(srcdir)/radv_extensions.py \
|
||||
--xml $(vulkan_api_xml) \
|
||||
--xml $(vk_android_native_buffer_xml) \
|
||||
--out $@
|
||||
|
||||
vk_format_table.c: vk_format_table.py \
|
||||
|
@@ -69,6 +69,9 @@ VULKAN_FILES := \
|
||||
vk_format.h \
|
||||
$(RADV_WS_AMDGPU_FILES)
|
||||
|
||||
VULKAN_ANDROID_FILES := \
|
||||
radv_android.c
|
||||
|
||||
VULKAN_WSI_WAYLAND_FILES := \
|
||||
radv_wsi_wayland.c
|
||||
|
||||
|
@@ -29,10 +29,11 @@ radv_entrypoints = custom_target(
|
||||
|
||||
radv_extensions_c = custom_target(
|
||||
'radv_extensions.c',
|
||||
input : ['radv_extensions.py', vk_api_xml],
|
||||
input : ['radv_extensions.py', vk_api_xml, vk_android_native_buffer_xml],
|
||||
output : ['radv_extensions.c'],
|
||||
command : [prog_python2, '@INPUT0@', '--xml', '@INPUT1@',
|
||||
'--out', '@OUTPUT@'],
|
||||
command : [
|
||||
prog_python2, '@INPUT0@', '--xml', '@INPUT1@', '--xml', '@INPUT2@', '--out', '@OUTPUT@',
|
||||
],
|
||||
)
|
||||
|
||||
vk_format_table_c = custom_target(
|
||||
|
366
src/amd/vulkan/radv_android.c
Normal file
366
src/amd/vulkan/radv_android.c
Normal file
@@ -0,0 +1,366 @@
|
||||
/*
|
||||
* Copyright © 2017, Google Inc.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <hardware/gralloc.h>
|
||||
#include <hardware/hardware.h>
|
||||
#include <hardware/hwvulkan.h>
|
||||
#include <vulkan/vk_android_native_buffer.h>
|
||||
#include <vulkan/vk_icd.h>
|
||||
#include <libsync.h>
|
||||
|
||||
#include "radv_private.h"
|
||||
|
||||
static int radv_hal_open(const struct hw_module_t* mod, const char* id, struct hw_device_t** dev);
|
||||
static int radv_hal_close(struct hw_device_t *dev);
|
||||
|
||||
static void UNUSED
|
||||
static_asserts(void)
|
||||
{
|
||||
STATIC_ASSERT(HWVULKAN_DISPATCH_MAGIC == ICD_LOADER_MAGIC);
|
||||
}
|
||||
|
||||
PUBLIC struct hwvulkan_module_t HAL_MODULE_INFO_SYM = {
|
||||
.common = {
|
||||
.tag = HARDWARE_MODULE_TAG,
|
||||
.module_api_version = HWVULKAN_MODULE_API_VERSION_0_1,
|
||||
.hal_api_version = HARDWARE_MAKE_API_VERSION(1, 0),
|
||||
.id = HWVULKAN_HARDWARE_MODULE_ID,
|
||||
.name = "AMD Vulkan HAL",
|
||||
.author = "Google",
|
||||
.methods = &(hw_module_methods_t) {
|
||||
.open = radv_hal_open,
|
||||
},
|
||||
},
|
||||
};
|
||||
|
||||
/* If any bits in test_mask are set, then unset them and return true. */
|
||||
static inline bool
|
||||
unmask32(uint32_t *inout_mask, uint32_t test_mask)
|
||||
{
|
||||
uint32_t orig_mask = *inout_mask;
|
||||
*inout_mask &= ~test_mask;
|
||||
return *inout_mask != orig_mask;
|
||||
}
|
||||
|
||||
static int
|
||||
radv_hal_open(const struct hw_module_t* mod, const char* id,
|
||||
struct hw_device_t** dev)
|
||||
{
|
||||
assert(mod == &HAL_MODULE_INFO_SYM.common);
|
||||
assert(strcmp(id, HWVULKAN_DEVICE_0) == 0);
|
||||
|
||||
hwvulkan_device_t *hal_dev = malloc(sizeof(*hal_dev));
|
||||
if (!hal_dev)
|
||||
return -1;
|
||||
|
||||
*hal_dev = (hwvulkan_device_t) {
|
||||
.common = {
|
||||
.tag = HARDWARE_DEVICE_TAG,
|
||||
.version = HWVULKAN_DEVICE_API_VERSION_0_1,
|
||||
.module = &HAL_MODULE_INFO_SYM.common,
|
||||
.close = radv_hal_close,
|
||||
},
|
||||
.EnumerateInstanceExtensionProperties = radv_EnumerateInstanceExtensionProperties,
|
||||
.CreateInstance = radv_CreateInstance,
|
||||
.GetInstanceProcAddr = radv_GetInstanceProcAddr,
|
||||
};
|
||||
|
||||
*dev = &hal_dev->common;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
radv_hal_close(struct hw_device_t *dev)
|
||||
{
|
||||
/* hwvulkan.h claims that hw_device_t::close() is never called. */
|
||||
return -1;
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_image_from_gralloc(VkDevice device_h,
|
||||
const VkImageCreateInfo *base_info,
|
||||
const VkNativeBufferANDROID *gralloc_info,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
VkImage *out_image_h)
|
||||
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, device_h);
|
||||
VkImage image_h = VK_NULL_HANDLE;
|
||||
struct radv_image *image = NULL;
|
||||
struct radv_bo *bo = NULL;
|
||||
VkResult result;
|
||||
|
||||
result = radv_image_create(device_h,
|
||||
&(struct radv_image_create_info) {
|
||||
.vk_info = base_info,
|
||||
.scanout = true,
|
||||
.no_metadata_planes = true},
|
||||
alloc,
|
||||
&image_h);
|
||||
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
if (gralloc_info->handle->numFds != 1) {
|
||||
return vk_errorf(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR,
|
||||
"VkNativeBufferANDROID::handle::numFds is %d, "
|
||||
"expected 1", gralloc_info->handle->numFds);
|
||||
}
|
||||
|
||||
/* Do not close the gralloc handle's dma_buf. The lifetime of the dma_buf
|
||||
* must exceed that of the gralloc handle, and we do not own the gralloc
|
||||
* handle.
|
||||
*/
|
||||
int dma_buf = gralloc_info->handle->data[0];
|
||||
|
||||
image = radv_image_from_handle(image_h);
|
||||
|
||||
VkDeviceMemory memory_h;
|
||||
|
||||
const VkMemoryDedicatedAllocateInfoKHR ded_alloc = {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO_KHR,
|
||||
.pNext = NULL,
|
||||
.buffer = VK_NULL_HANDLE,
|
||||
.image = image_h
|
||||
};
|
||||
|
||||
const VkImportMemoryFdInfoKHR import_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR,
|
||||
.pNext = &ded_alloc,
|
||||
.handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
|
||||
.fd = dup(dma_buf),
|
||||
};
|
||||
/* Find the first VRAM memory type, or GART for PRIME images. */
|
||||
int memory_type_index = -1;
|
||||
for (int i = 0; i < device->physical_device->memory_properties.memoryTypeCount; ++i) {
|
||||
bool is_local = !!(device->physical_device->memory_properties.memoryTypes[i].propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
if (is_local) {
|
||||
memory_type_index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* fallback */
|
||||
if (memory_type_index == -1)
|
||||
memory_type_index = 0;
|
||||
|
||||
result = radv_AllocateMemory(device_h,
|
||||
&(VkMemoryAllocateInfo) {
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = &import_info,
|
||||
.allocationSize = image->size,
|
||||
.memoryTypeIndex = memory_type_index,
|
||||
},
|
||||
alloc,
|
||||
&memory_h);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_create_image;
|
||||
|
||||
radv_BindImageMemory(device_h, image_h, memory_h, 0);
|
||||
|
||||
image->owned_memory = memory_h;
|
||||
/* Don't clobber the out-parameter until success is certain. */
|
||||
*out_image_h = image_h;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_create_image:
|
||||
fail_size:
|
||||
radv_DestroyImage(device_h, image_h, alloc);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult radv_GetSwapchainGrallocUsageANDROID(
|
||||
VkDevice device_h,
|
||||
VkFormat format,
|
||||
VkImageUsageFlags imageUsage,
|
||||
int* grallocUsage)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, device_h);
|
||||
struct radv_physical_device *phys_dev = device->physical_device;
|
||||
VkPhysicalDevice phys_dev_h = radv_physical_device_to_handle(phys_dev);
|
||||
VkResult result;
|
||||
|
||||
*grallocUsage = 0;
|
||||
|
||||
/* WARNING: Android Nougat's libvulkan.so hardcodes the VkImageUsageFlags
|
||||
* returned to applications via VkSurfaceCapabilitiesKHR::supportedUsageFlags.
|
||||
* The relevant code in libvulkan/swapchain.cpp contains this fun comment:
|
||||
*
|
||||
* TODO(jessehall): I think these are right, but haven't thought hard
|
||||
* about it. Do we need to query the driver for support of any of
|
||||
* these?
|
||||
*
|
||||
* Any disagreement between this function and the hardcoded
|
||||
* VkSurfaceCapabilitiesKHR:supportedUsageFlags causes tests
|
||||
* dEQP-VK.wsi.android.swapchain.*.image_usage to fail.
|
||||
*/
|
||||
|
||||
const VkPhysicalDeviceImageFormatInfo2KHR image_format_info = {
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2_KHR,
|
||||
.format = format,
|
||||
.type = VK_IMAGE_TYPE_2D,
|
||||
.tiling = VK_IMAGE_TILING_OPTIMAL,
|
||||
.usage = imageUsage,
|
||||
};
|
||||
|
||||
VkImageFormatProperties2KHR image_format_props = {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2_KHR,
|
||||
};
|
||||
|
||||
/* Check that requested format and usage are supported. */
|
||||
result = radv_GetPhysicalDeviceImageFormatProperties2KHR(phys_dev_h,
|
||||
&image_format_info, &image_format_props);
|
||||
if (result != VK_SUCCESS) {
|
||||
return vk_errorf(result,
|
||||
"radv_GetPhysicalDeviceImageFormatProperties2KHR failed "
|
||||
"inside %s", __func__);
|
||||
}
|
||||
|
||||
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_DST_BIT |
|
||||
VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT))
|
||||
*grallocUsage |= GRALLOC_USAGE_HW_RENDER;
|
||||
|
||||
if (unmask32(&imageUsage, VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
|
||||
VK_IMAGE_USAGE_SAMPLED_BIT |
|
||||
VK_IMAGE_USAGE_STORAGE_BIT |
|
||||
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT))
|
||||
*grallocUsage |= GRALLOC_USAGE_HW_TEXTURE;
|
||||
|
||||
/* All VkImageUsageFlags not explicitly checked here are unsupported for
|
||||
* gralloc swapchains.
|
||||
*/
|
||||
if (imageUsage != 0) {
|
||||
return vk_errorf(VK_ERROR_FORMAT_NOT_SUPPORTED,
|
||||
"unsupported VkImageUsageFlags(0x%x) for gralloc "
|
||||
"swapchain", imageUsage);
|
||||
}
|
||||
|
||||
/*
|
||||
* FINISHME: Advertise all display-supported formats. Mostly
|
||||
* DRM_FORMAT_ARGB2101010 and DRM_FORMAT_ABGR2101010, but need to check
|
||||
* what we need for 30-bit colors.
|
||||
*/
|
||||
if (format == VK_FORMAT_B8G8R8A8_UNORM ||
|
||||
format == VK_FORMAT_B5G6R5_UNORM_PACK16) {
|
||||
*grallocUsage |= GRALLOC_USAGE_HW_FB |
|
||||
GRALLOC_USAGE_HW_COMPOSER |
|
||||
GRALLOC_USAGE_EXTERNAL_DISP;
|
||||
}
|
||||
|
||||
if (*grallocUsage == 0)
|
||||
return VK_ERROR_FORMAT_NOT_SUPPORTED;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_AcquireImageANDROID(
|
||||
VkDevice device,
|
||||
VkImage image_h,
|
||||
int nativeFenceFd,
|
||||
VkSemaphore semaphore,
|
||||
VkFence fence)
|
||||
{
|
||||
VkResult semaphore_result = VK_SUCCESS, fence_result = VK_SUCCESS;
|
||||
|
||||
if (semaphore != VK_NULL_HANDLE) {
|
||||
int semaphore_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd;
|
||||
semaphore_result = radv_ImportSemaphoreFdKHR(device,
|
||||
&(VkImportSemaphoreFdInfoKHR) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMPORT_SEMAPHORE_FD_INFO_KHR,
|
||||
.flags = VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR,
|
||||
.fd = semaphore_fd,
|
||||
.semaphore = semaphore,
|
||||
});
|
||||
}
|
||||
|
||||
if (fence != VK_NULL_HANDLE) {
|
||||
int fence_fd = nativeFenceFd >= 0 ? dup(nativeFenceFd) : nativeFenceFd;
|
||||
fence_result = radv_ImportFenceFdKHR(device,
|
||||
&(VkImportFenceFdInfoKHR) {
|
||||
.sType = VK_STRUCTURE_TYPE_IMPORT_FENCE_FD_INFO_KHR,
|
||||
.flags = VK_FENCE_IMPORT_TEMPORARY_BIT_KHR,
|
||||
.fd = fence_fd,
|
||||
.fence = fence,
|
||||
});
|
||||
}
|
||||
|
||||
close(nativeFenceFd);
|
||||
|
||||
if (semaphore_result != VK_SUCCESS)
|
||||
return semaphore_result;
|
||||
return fence_result;
|
||||
}
|
||||
|
||||
VkResult
|
||||
radv_QueueSignalReleaseImageANDROID(
|
||||
VkQueue _queue,
|
||||
uint32_t waitSemaphoreCount,
|
||||
const VkSemaphore* pWaitSemaphores,
|
||||
VkImage image,
|
||||
int* pNativeFenceFd)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_queue, queue, _queue);
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
if (waitSemaphoreCount == 0) {
|
||||
if (pNativeFenceFd)
|
||||
*pNativeFenceFd = -1;
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
int fd = -1;
|
||||
|
||||
for (uint32_t i = 0; i < waitSemaphoreCount; ++i) {
|
||||
int tmp_fd;
|
||||
result = radv_GetSemaphoreFdKHR(radv_device_to_handle(queue->device),
|
||||
&(VkSemaphoreGetFdInfoKHR) {
|
||||
.sType = VK_STRUCTURE_TYPE_SEMAPHORE_GET_FD_INFO_KHR,
|
||||
.handleType = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR,
|
||||
.semaphore = pWaitSemaphores[i],
|
||||
}, &tmp_fd);
|
||||
if (result != VK_SUCCESS) {
|
||||
if (fd >= 0)
|
||||
close (fd);
|
||||
return result;
|
||||
}
|
||||
|
||||
if (fd < 0)
|
||||
fd = tmp_fd;
|
||||
else if (tmp_fd >= 0) {
|
||||
sync_accumulate("radv", &fd, tmp_fd);
|
||||
close(tmp_fd);
|
||||
}
|
||||
}
|
||||
|
||||
if (pNativeFenceFd) {
|
||||
*pNativeFenceFd = fd;
|
||||
} else if (fd >= 0) {
|
||||
close(fd);
|
||||
/* We still need to do the exports, to reset the semaphores, but
|
||||
* otherwise we don't wait on them. */
|
||||
}
|
||||
return VK_SUCCESS;
|
||||
}
|
@@ -1038,6 +1038,10 @@ VkResult radv_CreateDevice(
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ANDROID
|
||||
device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
|
||||
#endif
|
||||
|
||||
#if HAVE_LLVM < 0x0400
|
||||
device->llvm_supports_spill = false;
|
||||
#else
|
||||
@@ -1794,12 +1798,14 @@ fail:
|
||||
static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
|
||||
int num_sems,
|
||||
const VkSemaphore *sems,
|
||||
VkFence _fence,
|
||||
bool reset_temp)
|
||||
{
|
||||
int syncobj_idx = 0, sem_idx = 0;
|
||||
|
||||
if (num_sems == 0)
|
||||
if (num_sems == 0 && _fence == VK_NULL_HANDLE)
|
||||
return VK_SUCCESS;
|
||||
|
||||
for (uint32_t i = 0; i < num_sems; i++) {
|
||||
RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
|
||||
|
||||
@@ -1809,6 +1815,12 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
|
||||
counts->sem_count++;
|
||||
}
|
||||
|
||||
if (_fence != VK_NULL_HANDLE) {
|
||||
RADV_FROM_HANDLE(radv_fence, fence, _fence);
|
||||
if (fence->temp_syncobj || fence->syncobj)
|
||||
counts->syncobj_count++;
|
||||
}
|
||||
|
||||
if (counts->syncobj_count) {
|
||||
counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
|
||||
if (!counts->syncobj)
|
||||
@@ -1837,6 +1849,14 @@ static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
|
||||
}
|
||||
}
|
||||
|
||||
if (_fence != VK_NULL_HANDLE) {
|
||||
RADV_FROM_HANDLE(radv_fence, fence, _fence);
|
||||
if (fence->temp_syncobj)
|
||||
counts->syncobj[syncobj_idx++] = fence->temp_syncobj;
|
||||
else if (fence->syncobj)
|
||||
counts->syncobj[syncobj_idx++] = fence->syncobj;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1867,15 +1887,16 @@ VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
|
||||
int num_wait_sems,
|
||||
const VkSemaphore *wait_sems,
|
||||
int num_signal_sems,
|
||||
const VkSemaphore *signal_sems)
|
||||
const VkSemaphore *signal_sems,
|
||||
VkFence fence)
|
||||
{
|
||||
VkResult ret;
|
||||
memset(sem_info, 0, sizeof(*sem_info));
|
||||
|
||||
ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
|
||||
ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, VK_NULL_HANDLE, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
|
||||
ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, fence, false);
|
||||
if (ret)
|
||||
radv_free_sem_info(sem_info);
|
||||
|
||||
@@ -1885,6 +1906,32 @@ VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Signals fence as soon as all the work currently put on queue is done. */
|
||||
static VkResult radv_signal_fence(struct radv_queue *queue,
|
||||
struct radv_fence *fence)
|
||||
{
|
||||
int ret;
|
||||
VkResult result;
|
||||
struct radv_winsys_sem_info sem_info;
|
||||
|
||||
result = radv_alloc_sem_info(&sem_info, 0, NULL, 0, NULL,
|
||||
radv_fence_to_handle(fence));
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
ret = queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
|
||||
&queue->device->empty_cs[queue->queue_family_index],
|
||||
1, NULL, NULL, &sem_info,
|
||||
false, fence->fence);
|
||||
radv_free_sem_info(&sem_info);
|
||||
|
||||
/* TODO: find a better error */
|
||||
if (ret)
|
||||
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult radv_QueueSubmit(
|
||||
VkQueue _queue,
|
||||
uint32_t submitCount,
|
||||
@@ -1941,7 +1988,8 @@ VkResult radv_QueueSubmit(
|
||||
pSubmits[i].waitSemaphoreCount,
|
||||
pSubmits[i].pWaitSemaphores,
|
||||
pSubmits[i].signalSemaphoreCount,
|
||||
pSubmits[i].pSignalSemaphores);
|
||||
pSubmits[i].pSignalSemaphores,
|
||||
_fence);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -2010,11 +2058,7 @@ VkResult radv_QueueSubmit(
|
||||
|
||||
if (fence) {
|
||||
if (!fence_emitted) {
|
||||
struct radv_winsys_sem_info sem_info = {0};
|
||||
ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
|
||||
&queue->device->empty_cs[queue->queue_family_index],
|
||||
1, NULL, NULL, &sem_info,
|
||||
false, base_fence);
|
||||
radv_signal_fence(queue, fence);
|
||||
}
|
||||
fence->submitted = true;
|
||||
}
|
||||
@@ -2506,7 +2550,8 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
|
||||
pBindInfo[i].waitSemaphoreCount,
|
||||
pBindInfo[i].pWaitSemaphores,
|
||||
pBindInfo[i].signalSemaphoreCount,
|
||||
pBindInfo[i].pSignalSemaphores);
|
||||
pBindInfo[i].pSignalSemaphores,
|
||||
_fence);
|
||||
if (result != VK_SUCCESS)
|
||||
return result;
|
||||
|
||||
@@ -2525,8 +2570,11 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
|
||||
|
||||
}
|
||||
|
||||
if (fence && !fence_emitted) {
|
||||
fence->signalled = true;
|
||||
if (fence) {
|
||||
if (!fence_emitted) {
|
||||
radv_signal_fence(queue, fence);
|
||||
}
|
||||
fence->submitted = true;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -2539,6 +2587,11 @@ VkResult radv_CreateFence(
|
||||
VkFence* pFence)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
const VkExportFenceCreateInfoKHR *export =
|
||||
vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO_KHR);
|
||||
VkExternalFenceHandleTypeFlagsKHR handleTypes =
|
||||
export ? export->handleTypes : 0;
|
||||
|
||||
struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator,
|
||||
sizeof(*fence), 8,
|
||||
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
@@ -2549,10 +2602,24 @@ VkResult radv_CreateFence(
|
||||
memset(fence, 0, sizeof(*fence));
|
||||
fence->submitted = false;
|
||||
fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT);
|
||||
fence->fence = device->ws->create_fence();
|
||||
if (!fence->fence) {
|
||||
vk_free2(&device->alloc, pAllocator, fence);
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
fence->temp_syncobj = 0;
|
||||
if (device->always_use_syncobj || handleTypes) {
|
||||
int ret = device->ws->create_syncobj(device->ws, &fence->syncobj);
|
||||
if (ret) {
|
||||
vk_free2(&device->alloc, pAllocator, fence);
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
|
||||
device->ws->signal_syncobj(device->ws, fence->syncobj);
|
||||
}
|
||||
fence->fence = NULL;
|
||||
} else {
|
||||
fence->fence = device->ws->create_fence();
|
||||
if (!fence->fence) {
|
||||
vk_free2(&device->alloc, pAllocator, fence);
|
||||
return VK_ERROR_OUT_OF_HOST_MEMORY;
|
||||
}
|
||||
fence->syncobj = 0;
|
||||
}
|
||||
|
||||
*pFence = radv_fence_to_handle(fence);
|
||||
@@ -2570,7 +2637,13 @@ void radv_DestroyFence(
|
||||
|
||||
if (!fence)
|
||||
return;
|
||||
device->ws->destroy_fence(fence->fence);
|
||||
|
||||
if (fence->temp_syncobj)
|
||||
device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
|
||||
if (fence->syncobj)
|
||||
device->ws->destroy_syncobj(device->ws, fence->syncobj);
|
||||
if (fence->fence)
|
||||
device->ws->destroy_fence(fence->fence);
|
||||
vk_free2(&device->alloc, pAllocator, fence);
|
||||
}
|
||||
|
||||
@@ -2605,6 +2678,18 @@ VkResult radv_WaitForFences(
|
||||
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
|
||||
bool expired = false;
|
||||
|
||||
if (fence->temp_syncobj) {
|
||||
if (!device->ws->wait_syncobj(device->ws, fence->temp_syncobj, timeout))
|
||||
return VK_TIMEOUT;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fence->syncobj) {
|
||||
if (!device->ws->wait_syncobj(device->ws, fence->syncobj, timeout))
|
||||
return VK_TIMEOUT;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (fence->signalled)
|
||||
continue;
|
||||
|
||||
@@ -2621,13 +2706,26 @@ VkResult radv_WaitForFences(
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult radv_ResetFences(VkDevice device,
|
||||
VkResult radv_ResetFences(VkDevice _device,
|
||||
uint32_t fenceCount,
|
||||
const VkFence *pFences)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
|
||||
for (unsigned i = 0; i < fenceCount; ++i) {
|
||||
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
|
||||
fence->submitted = fence->signalled = false;
|
||||
|
||||
/* Per spec, we first restore the permanent payload, and then reset, so
|
||||
* having a temp syncobj should not skip resetting the permanent syncobj. */
|
||||
if (fence->temp_syncobj) {
|
||||
device->ws->destroy_syncobj(device->ws, fence->temp_syncobj);
|
||||
fence->temp_syncobj = 0;
|
||||
}
|
||||
|
||||
if (fence->syncobj) {
|
||||
device->ws->reset_syncobj(device->ws, fence->syncobj);
|
||||
}
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -2638,11 +2736,20 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_fence, fence, _fence);
|
||||
|
||||
if (fence->temp_syncobj) {
|
||||
bool success = device->ws->wait_syncobj(device->ws, fence->temp_syncobj, 0);
|
||||
return success ? VK_SUCCESS : VK_NOT_READY;
|
||||
}
|
||||
|
||||
if (fence->syncobj) {
|
||||
bool success = device->ws->wait_syncobj(device->ws, fence->syncobj, 0);
|
||||
return success ? VK_SUCCESS : VK_NOT_READY;
|
||||
}
|
||||
|
||||
if (fence->signalled)
|
||||
return VK_SUCCESS;
|
||||
if (!fence->submitted)
|
||||
return VK_NOT_READY;
|
||||
|
||||
if (!device->ws->fence_wait(device->ws, fence->fence, false, 0))
|
||||
return VK_NOT_READY;
|
||||
|
||||
@@ -2672,9 +2779,8 @@ VkResult radv_CreateSemaphore(
|
||||
|
||||
sem->temp_syncobj = 0;
|
||||
/* create a syncobject if we are going to export this semaphore */
|
||||
if (handleTypes) {
|
||||
if (device->always_use_syncobj || handleTypes) {
|
||||
assert (device->physical_device->rad_info.has_syncobj);
|
||||
assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
|
||||
int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
|
||||
if (ret) {
|
||||
vk_free2(&device->alloc, pAllocator, sem);
|
||||
@@ -3523,18 +3629,59 @@ VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
|
||||
}
|
||||
|
||||
static VkResult radv_import_opaque_fd(struct radv_device *device,
|
||||
int fd,
|
||||
uint32_t *syncobj)
|
||||
{
|
||||
uint32_t syncobj_handle = 0;
|
||||
int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
|
||||
if (ret != 0)
|
||||
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
|
||||
|
||||
if (*syncobj)
|
||||
device->ws->destroy_syncobj(device->ws, *syncobj);
|
||||
|
||||
*syncobj = syncobj_handle;
|
||||
close(fd);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
static VkResult radv_import_sync_fd(struct radv_device *device,
|
||||
int fd,
|
||||
uint32_t *syncobj)
|
||||
{
|
||||
/* If we create a syncobj we do it locally so that if we have an error, we don't
|
||||
* leave a syncobj in an undetermined state in the fence. */
|
||||
uint32_t syncobj_handle = *syncobj;
|
||||
if (!syncobj_handle) {
|
||||
int ret = device->ws->create_syncobj(device->ws, &syncobj_handle);
|
||||
if (ret) {
|
||||
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
|
||||
}
|
||||
}
|
||||
|
||||
if (fd == -1) {
|
||||
device->ws->signal_syncobj(device->ws, syncobj_handle);
|
||||
} else {
|
||||
int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
|
||||
if (ret != 0)
|
||||
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
|
||||
}
|
||||
|
||||
*syncobj = syncobj_handle;
|
||||
if (fd != -1)
|
||||
close(fd);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
|
||||
const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
|
||||
uint32_t syncobj_handle = 0;
|
||||
uint32_t *syncobj_dst = NULL;
|
||||
assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
|
||||
|
||||
int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
|
||||
if (ret != 0)
|
||||
return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
|
||||
|
||||
if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
|
||||
syncobj_dst = &sem->temp_syncobj;
|
||||
@@ -3542,12 +3689,14 @@ VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
|
||||
syncobj_dst = &sem->syncobj;
|
||||
}
|
||||
|
||||
if (*syncobj_dst)
|
||||
device->ws->destroy_syncobj(device->ws, *syncobj_dst);
|
||||
|
||||
*syncobj_dst = syncobj_handle;
|
||||
close(pImportSemaphoreFdInfo->fd);
|
||||
return VK_SUCCESS;
|
||||
switch(pImportSemaphoreFdInfo->handleType) {
|
||||
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
|
||||
return radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
|
||||
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
|
||||
return radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, syncobj_dst);
|
||||
default:
|
||||
unreachable("Unhandled semaphore handle type");
|
||||
}
|
||||
}
|
||||
|
||||
VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
|
||||
@@ -3559,12 +3708,30 @@ VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
|
||||
int ret;
|
||||
uint32_t syncobj_handle;
|
||||
|
||||
assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
|
||||
if (sem->temp_syncobj)
|
||||
syncobj_handle = sem->temp_syncobj;
|
||||
else
|
||||
syncobj_handle = sem->syncobj;
|
||||
ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
|
||||
|
||||
switch(pGetFdInfo->handleType) {
|
||||
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
|
||||
ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
|
||||
break;
|
||||
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
|
||||
ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
|
||||
if (!ret) {
|
||||
if (sem->temp_syncobj) {
|
||||
close (sem->temp_syncobj);
|
||||
sem->temp_syncobj = 0;
|
||||
} else {
|
||||
device->ws->reset_syncobj(device->ws, syncobj_handle);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled semaphore handle type");
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
|
||||
return VK_SUCCESS;
|
||||
@@ -3575,7 +3742,17 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
|
||||
const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
|
||||
VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
|
||||
{
|
||||
if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
|
||||
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
|
||||
|
||||
/* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
|
||||
if (pdevice->rad_info.has_syncobj_wait_for_submit &&
|
||||
(pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
|
||||
pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
|
||||
pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
|
||||
VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
|
||||
} else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR) {
|
||||
pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
|
||||
pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
|
||||
@@ -3586,3 +3763,86 @@ void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
|
||||
pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
|
||||
}
|
||||
}
|
||||
|
||||
VkResult radv_ImportFenceFdKHR(VkDevice _device,
|
||||
const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
|
||||
uint32_t *syncobj_dst = NULL;
|
||||
|
||||
|
||||
if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT_KHR) {
|
||||
syncobj_dst = &fence->temp_syncobj;
|
||||
} else {
|
||||
syncobj_dst = &fence->syncobj;
|
||||
}
|
||||
|
||||
switch(pImportFenceFdInfo->handleType) {
|
||||
case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
|
||||
return radv_import_opaque_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
|
||||
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
|
||||
return radv_import_sync_fd(device, pImportFenceFdInfo->fd, syncobj_dst);
|
||||
default:
|
||||
unreachable("Unhandled fence handle type");
|
||||
}
|
||||
}
|
||||
|
||||
VkResult radv_GetFenceFdKHR(VkDevice _device,
|
||||
const VkFenceGetFdInfoKHR *pGetFdInfo,
|
||||
int *pFd)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
|
||||
int ret;
|
||||
uint32_t syncobj_handle;
|
||||
|
||||
if (fence->temp_syncobj)
|
||||
syncobj_handle = fence->temp_syncobj;
|
||||
else
|
||||
syncobj_handle = fence->syncobj;
|
||||
|
||||
switch(pGetFdInfo->handleType) {
|
||||
case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR:
|
||||
ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
|
||||
break;
|
||||
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR:
|
||||
ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
|
||||
if (!ret) {
|
||||
if (fence->temp_syncobj) {
|
||||
close (fence->temp_syncobj);
|
||||
fence->temp_syncobj = 0;
|
||||
} else {
|
||||
device->ws->reset_syncobj(device->ws, syncobj_handle);
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
unreachable("Unhandled fence handle type");
|
||||
}
|
||||
|
||||
if (ret)
|
||||
return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
void radv_GetPhysicalDeviceExternalFencePropertiesKHR(
|
||||
VkPhysicalDevice physicalDevice,
|
||||
const VkPhysicalDeviceExternalFenceInfoKHR* pExternalFenceInfo,
|
||||
VkExternalFencePropertiesKHR* pExternalFenceProperties)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
|
||||
|
||||
if (pdevice->rad_info.has_syncobj_wait_for_submit &&
|
||||
(pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR ||
|
||||
pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR)) {
|
||||
pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
|
||||
pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT_KHR;
|
||||
pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT_KHR |
|
||||
VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
|
||||
} else {
|
||||
pExternalFenceProperties->exportFromImportedHandleTypes = 0;
|
||||
pExternalFenceProperties->compatibleHandleTypes = 0;
|
||||
pExternalFenceProperties->externalFenceFeatures = 0;
|
||||
}
|
||||
}
|
||||
|
@@ -237,7 +237,9 @@ def get_entrypoints(doc, entrypoints_to_defines, start_index):
|
||||
if extension.attrib['name'] not in supported:
|
||||
continue
|
||||
|
||||
assert extension.attrib['supported'] == 'vulkan'
|
||||
if extension.attrib['supported'] != 'vulkan':
|
||||
continue
|
||||
|
||||
for command in extension.findall('./require/command'):
|
||||
enabled_commands.add(command.attrib['name'])
|
||||
|
||||
|
@@ -50,9 +50,13 @@ class Extension:
|
||||
# the those extension strings, then tests dEQP-VK.api.info.instance.extensions
|
||||
# and dEQP-VK.api.info.device fail due to the duplicated strings.
|
||||
EXTENSIONS = [
|
||||
Extension('VK_ANDROID_native_buffer', 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
|
||||
Extension('VK_KHR_bind_memory2', 1, True),
|
||||
Extension('VK_KHR_dedicated_allocation', 1, True),
|
||||
Extension('VK_KHR_descriptor_update_template', 1, True),
|
||||
Extension('VK_KHR_external_fence', 1, 'device->rad_info.has_syncobj_wait_for_submit'),
|
||||
Extension('VK_KHR_external_fence_capabilities', 1, True),
|
||||
Extension('VK_KHR_external_fence_fd', 1, 'device->rad_info.has_syncobj_wait_for_submit'),
|
||||
Extension('VK_KHR_external_memory', 1, True),
|
||||
Extension('VK_KHR_external_memory_capabilities', 1, True),
|
||||
Extension('VK_KHR_external_memory_fd', 1, True),
|
||||
@@ -77,7 +81,6 @@ EXTENSIONS = [
|
||||
Extension('VK_KHR_xcb_surface', 6, 'VK_USE_PLATFORM_XCB_KHR'),
|
||||
Extension('VK_KHR_xlib_surface', 6, 'VK_USE_PLATFORM_XLIB_KHR'),
|
||||
Extension('VK_KHX_multiview', 1, True),
|
||||
Extension('VK_EXT_debug_report', 8, True),
|
||||
Extension('VK_EXT_global_priority', 1, 'device->rad_info.has_ctx_priority'),
|
||||
Extension('VK_AMD_draw_indirect_count', 1, True),
|
||||
Extension('VK_AMD_rasterization_order', 1, 'device->rad_info.chip_class >= VI && device->rad_info.max_se >= 2'),
|
||||
|
@@ -904,29 +904,34 @@ radv_image_create(VkDevice _device,
|
||||
image->size = image->surface.surf_size;
|
||||
image->alignment = image->surface.surf_alignment;
|
||||
|
||||
/* Try to enable DCC first. */
|
||||
if (radv_image_can_enable_dcc(image)) {
|
||||
radv_image_alloc_dcc(image);
|
||||
} else {
|
||||
/* When DCC cannot be enabled, try CMASK. */
|
||||
image->surface.dcc_size = 0;
|
||||
if (radv_image_can_enable_cmask(image)) {
|
||||
radv_image_alloc_cmask(device, image);
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to enable FMASK for multisampled images. */
|
||||
if (radv_image_can_enable_fmask(image)) {
|
||||
radv_image_alloc_fmask(device, image);
|
||||
} else {
|
||||
/* Otherwise, try to enable HTILE for depth surfaces. */
|
||||
if (radv_image_can_enable_htile(image) &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
|
||||
radv_image_alloc_htile(image);
|
||||
image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
|
||||
if (!create_info->no_metadata_planes) {
|
||||
/* Try to enable DCC first. */
|
||||
if (radv_image_can_enable_dcc(image)) {
|
||||
radv_image_alloc_dcc(image);
|
||||
} else {
|
||||
image->surface.htile_size = 0;
|
||||
/* When DCC cannot be enabled, try CMASK. */
|
||||
image->surface.dcc_size = 0;
|
||||
if (radv_image_can_enable_cmask(image)) {
|
||||
radv_image_alloc_cmask(device, image);
|
||||
}
|
||||
}
|
||||
|
||||
/* Try to enable FMASK for multisampled images. */
|
||||
if (radv_image_can_enable_fmask(image)) {
|
||||
radv_image_alloc_fmask(device, image);
|
||||
} else {
|
||||
/* Otherwise, try to enable HTILE for depth surfaces. */
|
||||
if (radv_image_can_enable_htile(image) &&
|
||||
!(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
|
||||
radv_image_alloc_htile(image);
|
||||
image->tc_compatible_htile = image->surface.flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
|
||||
} else {
|
||||
image->surface.htile_size = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
image->surface.dcc_size = 0;
|
||||
image->surface.htile_size = 0;
|
||||
}
|
||||
|
||||
if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
|
||||
@@ -1114,6 +1119,15 @@ radv_CreateImage(VkDevice device,
|
||||
const VkAllocationCallbacks *pAllocator,
|
||||
VkImage *pImage)
|
||||
{
|
||||
#ifdef ANDROID
|
||||
const VkNativeBufferANDROID *gralloc_info =
|
||||
vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
|
||||
|
||||
if (gralloc_info)
|
||||
return radv_image_from_gralloc(device, pCreateInfo, gralloc_info,
|
||||
pAllocator, pImage);
|
||||
#endif
|
||||
|
||||
return radv_image_create(device,
|
||||
&(struct radv_image_create_info) {
|
||||
.vk_info = pCreateInfo,
|
||||
@@ -1136,6 +1150,9 @@ radv_DestroyImage(VkDevice _device, VkImage _image,
|
||||
if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT)
|
||||
device->ws->buffer_destroy(image->bo);
|
||||
|
||||
if (image->owned_memory != VK_NULL_HANDLE)
|
||||
radv_FreeMemory(_device, image->owned_memory, pAllocator);
|
||||
|
||||
vk_free2(&device->alloc, pAllocator, image);
|
||||
}
|
||||
|
||||
|
@@ -69,6 +69,7 @@ typedef uint32_t xcb_window_t;
|
||||
#include <vulkan/vulkan.h>
|
||||
#include <vulkan/vulkan_intel.h>
|
||||
#include <vulkan/vk_icd.h>
|
||||
#include <vulkan/vk_android_native_buffer.h>
|
||||
|
||||
#include "radv_entrypoints.h"
|
||||
|
||||
@@ -532,6 +533,7 @@ struct radv_device {
|
||||
int queue_count[RADV_MAX_QUEUE_FAMILIES];
|
||||
struct radeon_winsys_cs *empty_cs[RADV_MAX_QUEUE_FAMILIES];
|
||||
|
||||
bool always_use_syncobj;
|
||||
bool llvm_supports_spill;
|
||||
bool has_distributed_tess;
|
||||
uint32_t tess_offchip_block_dw_size;
|
||||
@@ -1249,6 +1251,9 @@ struct radv_image {
|
||||
struct radv_cmask_info cmask;
|
||||
uint64_t clear_value_offset;
|
||||
uint64_t dcc_pred_offset;
|
||||
|
||||
/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */
|
||||
VkDeviceMemory owned_memory;
|
||||
};
|
||||
|
||||
/* Whether the image has a htile that is known consistent with the contents of
|
||||
@@ -1333,6 +1338,7 @@ struct radv_image_view {
|
||||
struct radv_image_create_info {
|
||||
const VkImageCreateInfo *vk_info;
|
||||
bool scanout;
|
||||
bool no_metadata_planes;
|
||||
};
|
||||
|
||||
VkResult radv_image_create(VkDevice _device,
|
||||
@@ -1340,6 +1346,13 @@ VkResult radv_image_create(VkDevice _device,
|
||||
const VkAllocationCallbacks* alloc,
|
||||
VkImage *pImage);
|
||||
|
||||
VkResult
|
||||
radv_image_from_gralloc(VkDevice device_h,
|
||||
const VkImageCreateInfo *base_info,
|
||||
const VkNativeBufferANDROID *gralloc_info,
|
||||
const VkAllocationCallbacks *alloc,
|
||||
VkImage *out_image_h);
|
||||
|
||||
void radv_image_view_init(struct radv_image_view *view,
|
||||
struct radv_device *device,
|
||||
const VkImageViewCreateInfo* pCreateInfo);
|
||||
@@ -1521,7 +1534,8 @@ VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
|
||||
int num_wait_sems,
|
||||
const VkSemaphore *wait_sems,
|
||||
int num_signal_sems,
|
||||
const VkSemaphore *signal_sems);
|
||||
const VkSemaphore *signal_sems,
|
||||
VkFence fence);
|
||||
void radv_free_sem_info(struct radv_winsys_sem_info *sem_info);
|
||||
|
||||
void
|
||||
@@ -1556,6 +1570,9 @@ struct radv_fence {
|
||||
struct radeon_winsys_fence *fence;
|
||||
bool submitted;
|
||||
bool signalled;
|
||||
|
||||
uint32_t syncobj;
|
||||
uint32_t temp_syncobj;
|
||||
};
|
||||
|
||||
struct radeon_winsys_sem;
|
||||
|
@@ -256,9 +256,18 @@ struct radeon_winsys {
|
||||
int (*create_syncobj)(struct radeon_winsys *ws, uint32_t *handle);
|
||||
void (*destroy_syncobj)(struct radeon_winsys *ws, uint32_t handle);
|
||||
|
||||
void (*reset_syncobj)(struct radeon_winsys *ws, uint32_t handle);
|
||||
void (*signal_syncobj)(struct radeon_winsys *ws, uint32_t handle);
|
||||
bool (*wait_syncobj)(struct radeon_winsys *ws, uint32_t handle, uint64_t timeout);
|
||||
|
||||
int (*export_syncobj)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
|
||||
int (*import_syncobj)(struct radeon_winsys *ws, int fd, uint32_t *syncobj);
|
||||
|
||||
int (*export_syncobj_to_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int *fd);
|
||||
|
||||
/* Note that this, unlike the normal import, uses an existing syncobj. */
|
||||
int (*import_syncobj_from_sync_file)(struct radeon_winsys *ws, uint32_t syncobj, int fd);
|
||||
|
||||
};
|
||||
|
||||
static inline void radeon_emit(struct radeon_winsys_cs *cs, uint32_t value)
|
||||
|
@@ -194,19 +194,22 @@ radv_shader_compile_to_nir(struct radv_device *device,
|
||||
spec_entries[i].data32 = *(const uint32_t *)data;
|
||||
}
|
||||
}
|
||||
const struct nir_spirv_supported_extensions supported_ext = {
|
||||
.draw_parameters = true,
|
||||
.float64 = true,
|
||||
.image_read_without_format = true,
|
||||
.image_write_without_format = true,
|
||||
.tessellation = true,
|
||||
.int64 = true,
|
||||
.multiview = true,
|
||||
.variable_pointers = true,
|
||||
const struct spirv_to_nir_options spirv_options = {
|
||||
.caps = {
|
||||
.draw_parameters = true,
|
||||
.float64 = true,
|
||||
.image_read_without_format = true,
|
||||
.image_write_without_format = true,
|
||||
.tessellation = true,
|
||||
.int64 = true,
|
||||
.multiview = true,
|
||||
.variable_pointers = true,
|
||||
},
|
||||
};
|
||||
entry_point = spirv_to_nir(spirv, module->size / 4,
|
||||
spec_entries, num_spec_entries,
|
||||
stage, entrypoint_name, &supported_ext, &nir_options);
|
||||
stage, entrypoint_name,
|
||||
&spirv_options, &nir_options);
|
||||
nir = entry_point->shader;
|
||||
assert(nir->info.stage == stage);
|
||||
nir_validate_shader(nir);
|
||||
|
@@ -445,13 +445,14 @@ VkResult radv_GetSwapchainImagesKHR(
|
||||
}
|
||||
|
||||
VkResult radv_AcquireNextImageKHR(
|
||||
VkDevice device,
|
||||
VkDevice _device,
|
||||
VkSwapchainKHR _swapchain,
|
||||
uint64_t timeout,
|
||||
VkSemaphore semaphore,
|
||||
VkFence _fence,
|
||||
uint32_t* pImageIndex)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(wsi_swapchain, swapchain, _swapchain);
|
||||
RADV_FROM_HANDLE(radv_fence, fence, _fence);
|
||||
|
||||
@@ -461,6 +462,11 @@ VkResult radv_AcquireNextImageKHR(
|
||||
if (fence && (result == VK_SUCCESS || result == VK_SUBOPTIMAL_KHR)) {
|
||||
fence->submitted = true;
|
||||
fence->signalled = true;
|
||||
if (fence->temp_syncobj) {
|
||||
device->ws->signal_syncobj(device->ws, fence->temp_syncobj);
|
||||
} else if (fence->syncobj) {
|
||||
device->ws->signal_syncobj(device->ws, fence->syncobj);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@@ -479,20 +485,6 @@ VkResult radv_QueuePresentKHR(
|
||||
struct radeon_winsys_cs *cs;
|
||||
const VkPresentRegionKHR *region = NULL;
|
||||
VkResult item_result;
|
||||
struct radv_winsys_sem_info sem_info;
|
||||
|
||||
item_result = radv_alloc_sem_info(&sem_info,
|
||||
pPresentInfo->waitSemaphoreCount,
|
||||
pPresentInfo->pWaitSemaphores,
|
||||
0,
|
||||
NULL);
|
||||
if (pPresentInfo->pResults != NULL)
|
||||
pPresentInfo->pResults[i] = item_result;
|
||||
result = result == VK_SUCCESS ? item_result : result;
|
||||
if (item_result != VK_SUCCESS) {
|
||||
radv_free_sem_info(&sem_info);
|
||||
continue;
|
||||
}
|
||||
|
||||
assert(radv_device_from_handle(swapchain->device) == queue->device);
|
||||
if (swapchain->fences[0] == VK_NULL_HANDLE) {
|
||||
@@ -505,7 +497,6 @@ VkResult radv_QueuePresentKHR(
|
||||
pPresentInfo->pResults[i] = item_result;
|
||||
result = result == VK_SUCCESS ? item_result : result;
|
||||
if (item_result != VK_SUCCESS) {
|
||||
radv_free_sem_info(&sem_info);
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
@@ -513,6 +504,22 @@ VkResult radv_QueuePresentKHR(
|
||||
1, &swapchain->fences[0]);
|
||||
}
|
||||
|
||||
struct radv_winsys_sem_info sem_info;
|
||||
|
||||
item_result = radv_alloc_sem_info(&sem_info,
|
||||
pPresentInfo->waitSemaphoreCount,
|
||||
pPresentInfo->pWaitSemaphores,
|
||||
0,
|
||||
NULL,
|
||||
swapchain->fences[0]);
|
||||
if (pPresentInfo->pResults != NULL)
|
||||
pPresentInfo->pResults[i] = item_result;
|
||||
result = result == VK_SUCCESS ? item_result : result;
|
||||
if (item_result != VK_SUCCESS) {
|
||||
radv_free_sem_info(&sem_info);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (swapchain->needs_linear_copy) {
|
||||
int idx = (queue->queue_family_index * swapchain->image_count) + pPresentInfo->pImageIndices[i];
|
||||
cs = radv_cmd_buffer_from_handle(swapchain->cmd_buffers[idx])->cs;
|
||||
|
@@ -1257,6 +1257,43 @@ static void radv_amdgpu_destroy_syncobj(struct radeon_winsys *_ws,
|
||||
amdgpu_cs_destroy_syncobj(ws->dev, handle);
|
||||
}
|
||||
|
||||
static void radv_amdgpu_reset_syncobj(struct radeon_winsys *_ws,
|
||||
uint32_t handle)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
amdgpu_cs_syncobj_reset(ws->dev, &handle, 1);
|
||||
}
|
||||
|
||||
static void radv_amdgpu_signal_syncobj(struct radeon_winsys *_ws,
|
||||
uint32_t handle)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
amdgpu_cs_syncobj_signal(ws->dev, &handle, 1);
|
||||
}
|
||||
|
||||
static bool radv_amdgpu_wait_syncobj(struct radeon_winsys *_ws,
|
||||
uint32_t handle, uint64_t timeout)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
uint32_t tmp;
|
||||
|
||||
/* The timeouts are signed, while vulkan timeouts are unsigned. */
|
||||
timeout = MIN2(timeout, INT64_MAX);
|
||||
|
||||
int ret = amdgpu_cs_syncobj_wait(ws->dev, &handle, 1, timeout,
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_FOR_SUBMIT |
|
||||
DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL,
|
||||
&tmp);
|
||||
if (ret == 0) {
|
||||
return true;
|
||||
} else if (ret == -1 && errno == ETIME) {
|
||||
return false;
|
||||
} else {
|
||||
fprintf(stderr, "amdgpu: radv_amdgpu_wait_syncobj failed!\nerrno: %d\n", errno);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static int radv_amdgpu_export_syncobj(struct radeon_winsys *_ws,
|
||||
uint32_t syncobj,
|
||||
int *fd)
|
||||
@@ -1275,6 +1312,25 @@ static int radv_amdgpu_import_syncobj(struct radeon_winsys *_ws,
|
||||
return amdgpu_cs_import_syncobj(ws->dev, fd, syncobj);
|
||||
}
|
||||
|
||||
|
||||
static int radv_amdgpu_export_syncobj_to_sync_file(struct radeon_winsys *_ws,
|
||||
uint32_t syncobj,
|
||||
int *fd)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
|
||||
return amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, fd);
|
||||
}
|
||||
|
||||
static int radv_amdgpu_import_syncobj_from_sync_file(struct radeon_winsys *_ws,
|
||||
uint32_t syncobj,
|
||||
int fd)
|
||||
{
|
||||
struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws);
|
||||
|
||||
return amdgpu_cs_syncobj_import_sync_file(ws->dev, syncobj, fd);
|
||||
}
|
||||
|
||||
void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
|
||||
{
|
||||
ws->base.ctx_create = radv_amdgpu_ctx_create;
|
||||
@@ -1295,7 +1351,12 @@ void radv_amdgpu_cs_init_functions(struct radv_amdgpu_winsys *ws)
|
||||
ws->base.destroy_sem = radv_amdgpu_destroy_sem;
|
||||
ws->base.create_syncobj = radv_amdgpu_create_syncobj;
|
||||
ws->base.destroy_syncobj = radv_amdgpu_destroy_syncobj;
|
||||
ws->base.reset_syncobj = radv_amdgpu_reset_syncobj;
|
||||
ws->base.signal_syncobj = radv_amdgpu_signal_syncobj;
|
||||
ws->base.wait_syncobj = radv_amdgpu_wait_syncobj;
|
||||
ws->base.export_syncobj = radv_amdgpu_export_syncobj;
|
||||
ws->base.import_syncobj = radv_amdgpu_import_syncobj;
|
||||
ws->base.export_syncobj_to_sync_file = radv_amdgpu_export_syncobj_to_sync_file;
|
||||
ws->base.import_syncobj_from_sync_file = radv_amdgpu_import_syncobj_from_sync_file;
|
||||
ws->base.fence_wait = radv_amdgpu_fence_wait;
|
||||
}
|
||||
|
@@ -225,10 +225,12 @@ expanded_line:
|
||||
glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
|
||||
_glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value);
|
||||
}
|
||||
| LINE_EXPANDED integer_constant NEWLINE {
|
||||
| LINE_EXPANDED expression NEWLINE {
|
||||
if (parser->is_gles && $2.undefined_macro)
|
||||
glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro);
|
||||
parser->has_new_line_number = 1;
|
||||
parser->new_line_number = $2;
|
||||
_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2);
|
||||
parser->new_line_number = $2.value;
|
||||
_mesa_string_buffer_printf(parser->output, "#line %" PRIiMAX "\n", $2.value);
|
||||
}
|
||||
| LINE_EXPANDED integer_constant integer_constant NEWLINE {
|
||||
parser->has_new_line_number = 1;
|
||||
@@ -239,6 +241,19 @@ expanded_line:
|
||||
"#line %" PRIiMAX " %" PRIiMAX "\n",
|
||||
$2, $3);
|
||||
}
|
||||
| LINE_EXPANDED '(' expression ')' '(' expression ')' NEWLINE {
|
||||
if (parser->is_gles && $3.undefined_macro)
|
||||
glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $3.undefined_macro);
|
||||
if (parser->is_gles && $6.undefined_macro)
|
||||
glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $6.undefined_macro);
|
||||
parser->has_new_line_number = 1;
|
||||
parser->new_line_number = $3.value;
|
||||
parser->has_new_source_number = 1;
|
||||
parser->new_source_number = $6.value;
|
||||
_mesa_string_buffer_printf(parser->output,
|
||||
"#line %" PRIiMAX " %" PRIiMAX "\n",
|
||||
$3.value, $6.value);
|
||||
}
|
||||
;
|
||||
|
||||
define:
|
||||
|
@@ -637,6 +637,9 @@ private:
|
||||
this->record_next_sampler))
|
||||
return;
|
||||
|
||||
/* Avoid overflowing the sampler array. (crbug.com/141901) */
|
||||
this->next_sampler = MIN2(this->next_sampler, MAX_SAMPLERS);
|
||||
|
||||
for (unsigned i = uniform->opaque[shader_type].index;
|
||||
i < MIN2(this->next_sampler, MAX_SAMPLERS);
|
||||
i++) {
|
||||
|
@@ -42,24 +42,34 @@ struct nir_spirv_specialization {
|
||||
};
|
||||
};
|
||||
|
||||
struct nir_spirv_supported_extensions {
|
||||
bool float64;
|
||||
bool image_ms_array;
|
||||
bool tessellation;
|
||||
bool draw_parameters;
|
||||
bool image_read_without_format;
|
||||
bool image_write_without_format;
|
||||
bool int64;
|
||||
bool multiview;
|
||||
bool variable_pointers;
|
||||
struct spirv_to_nir_options {
|
||||
/* Whether or not to lower all workgroup variable access to offsets
|
||||
* up-front. This means you will _shared intrinsics instead of _var
|
||||
* for workgroup data access.
|
||||
*
|
||||
* This is currently required for full variable pointers support.
|
||||
*/
|
||||
bool lower_workgroup_access_to_offsets;
|
||||
|
||||
struct {
|
||||
bool float64;
|
||||
bool image_ms_array;
|
||||
bool tessellation;
|
||||
bool draw_parameters;
|
||||
bool image_read_without_format;
|
||||
bool image_write_without_format;
|
||||
bool int64;
|
||||
bool multiview;
|
||||
bool variable_pointers;
|
||||
} caps;
|
||||
};
|
||||
|
||||
nir_function *spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
struct nir_spirv_specialization *specializations,
|
||||
unsigned num_specializations,
|
||||
gl_shader_stage stage, const char *entry_point_name,
|
||||
const struct nir_spirv_supported_extensions *ext,
|
||||
const nir_shader_compiler_options *options);
|
||||
const struct spirv_to_nir_options *options,
|
||||
const nir_shader_compiler_options *nir_options);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@@ -117,7 +117,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
|
||||
|
||||
load->value = constant->values[0];
|
||||
|
||||
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
|
||||
nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr);
|
||||
val->def = &load->def;
|
||||
} else {
|
||||
assert(glsl_type_is_matrix(type));
|
||||
@@ -133,7 +133,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
|
||||
|
||||
load->value = constant->values[i];
|
||||
|
||||
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
|
||||
nir_instr_insert_before_cf_list(&b->nb.impl->body, &load->instr);
|
||||
col_val->def = &load->def;
|
||||
|
||||
val->elems[i] = col_val;
|
||||
@@ -729,6 +729,64 @@ translate_image_format(SpvImageFormat format)
|
||||
}
|
||||
}
|
||||
|
||||
static struct vtn_type *
|
||||
vtn_type_layout_std430(struct vtn_builder *b, struct vtn_type *type,
|
||||
uint32_t *size_out, uint32_t *align_out)
|
||||
{
|
||||
switch (type->base_type) {
|
||||
case vtn_base_type_scalar: {
|
||||
uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
|
||||
*size_out = comp_size;
|
||||
*align_out = comp_size;
|
||||
return type;
|
||||
}
|
||||
|
||||
case vtn_base_type_vector: {
|
||||
uint32_t comp_size = glsl_get_bit_size(type->type) / 8;
|
||||
assert(type->length > 0 && type->length <= 4);
|
||||
unsigned align_comps = type->length == 3 ? 4 : type->length;
|
||||
*size_out = comp_size * type->length,
|
||||
*align_out = comp_size * align_comps;
|
||||
return type;
|
||||
}
|
||||
|
||||
case vtn_base_type_matrix:
|
||||
case vtn_base_type_array: {
|
||||
/* We're going to add an array stride */
|
||||
type = vtn_type_copy(b, type);
|
||||
uint32_t elem_size, elem_align;
|
||||
type->array_element = vtn_type_layout_std430(b, type->array_element,
|
||||
&elem_size, &elem_align);
|
||||
type->stride = vtn_align_u32(elem_size, elem_align);
|
||||
*size_out = type->stride * type->length;
|
||||
*align_out = elem_align;
|
||||
return type;
|
||||
}
|
||||
|
||||
case vtn_base_type_struct: {
|
||||
/* We're going to add member offsets */
|
||||
type = vtn_type_copy(b, type);
|
||||
uint32_t offset = 0;
|
||||
uint32_t align = 0;
|
||||
for (unsigned i = 0; i < type->length; i++) {
|
||||
uint32_t mem_size, mem_align;
|
||||
type->members[i] = vtn_type_layout_std430(b, type->members[i],
|
||||
&mem_size, &mem_align);
|
||||
offset = vtn_align_u32(offset, mem_align);
|
||||
type->offsets[i] = offset;
|
||||
offset += mem_size;
|
||||
align = MAX2(align, mem_align);
|
||||
}
|
||||
*size_out = offset;
|
||||
*align_out = align;
|
||||
return type;
|
||||
}
|
||||
|
||||
default:
|
||||
unreachable("Invalid SPIR-V type for std430");
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
@@ -878,6 +936,19 @@ vtn_handle_type(struct vtn_builder *b, SpvOp opcode,
|
||||
*/
|
||||
val->type->type = glsl_vector_type(GLSL_TYPE_UINT, 2);
|
||||
}
|
||||
|
||||
if (storage_class == SpvStorageClassWorkgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets) {
|
||||
uint32_t size, align;
|
||||
val->type->deref = vtn_type_layout_std430(b, val->type->deref,
|
||||
&size, &align);
|
||||
val->type->length = size;
|
||||
val->type->align = align;
|
||||
/* These can actually be stored to nir_variables and used as SSA
|
||||
* values so they need a real glsl_type.
|
||||
*/
|
||||
val->type->type = glsl_uint_type();
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -1394,8 +1465,11 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t *w, unsigned count)
|
||||
{
|
||||
struct vtn_type *res_type = vtn_value(b, w[1], vtn_value_type_type)->type;
|
||||
struct nir_function *callee =
|
||||
vtn_value(b, w[3], vtn_value_type_function)->func->impl->function;
|
||||
struct vtn_function *vtn_callee =
|
||||
vtn_value(b, w[3], vtn_value_type_function)->func;
|
||||
struct nir_function *callee = vtn_callee->impl->function;
|
||||
|
||||
vtn_callee->referenced = true;
|
||||
|
||||
nir_call_instr *call = nir_call_instr_create(b->nb.shader, callee);
|
||||
for (unsigned i = 0; i < call->num_params; i++) {
|
||||
@@ -1410,7 +1484,7 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
/* Make a temporary to store the argument in */
|
||||
nir_variable *tmp =
|
||||
nir_local_variable_create(b->impl, arg_ssa->type, "arg_tmp");
|
||||
nir_local_variable_create(b->nb.impl, arg_ssa->type, "arg_tmp");
|
||||
call->params[i] = nir_deref_var_create(call, tmp);
|
||||
|
||||
vtn_local_store(b, arg_ssa, call->params[i]);
|
||||
@@ -1420,7 +1494,7 @@ vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode,
|
||||
nir_variable *out_tmp = NULL;
|
||||
assert(res_type->type == callee->return_type);
|
||||
if (!glsl_type_is_void(callee->return_type)) {
|
||||
out_tmp = nir_local_variable_create(b->impl, callee->return_type,
|
||||
out_tmp = nir_local_variable_create(b->nb.impl, callee->return_type,
|
||||
"out_tmp");
|
||||
call->return_deref = nir_deref_var_create(call, out_tmp);
|
||||
}
|
||||
@@ -2098,6 +2172,32 @@ get_ssbo_nir_atomic_op(SpvOp opcode)
|
||||
|
||||
static nir_intrinsic_op
|
||||
get_shared_nir_atomic_op(SpvOp opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SpvOpAtomicLoad: return nir_intrinsic_load_shared;
|
||||
case SpvOpAtomicStore: return nir_intrinsic_store_shared;
|
||||
#define OP(S, N) case SpvOp##S: return nir_intrinsic_shared_##N;
|
||||
OP(AtomicExchange, atomic_exchange)
|
||||
OP(AtomicCompareExchange, atomic_comp_swap)
|
||||
OP(AtomicIIncrement, atomic_add)
|
||||
OP(AtomicIDecrement, atomic_add)
|
||||
OP(AtomicIAdd, atomic_add)
|
||||
OP(AtomicISub, atomic_add)
|
||||
OP(AtomicSMin, atomic_imin)
|
||||
OP(AtomicUMin, atomic_umin)
|
||||
OP(AtomicSMax, atomic_imax)
|
||||
OP(AtomicUMax, atomic_umax)
|
||||
OP(AtomicAnd, atomic_and)
|
||||
OP(AtomicOr, atomic_or)
|
||||
OP(AtomicXor, atomic_xor)
|
||||
#undef OP
|
||||
default:
|
||||
unreachable("Invalid shared atomic");
|
||||
}
|
||||
}
|
||||
|
||||
static nir_intrinsic_op
|
||||
get_var_nir_atomic_op(SpvOp opcode)
|
||||
{
|
||||
switch (opcode) {
|
||||
case SpvOpAtomicLoad: return nir_intrinsic_load_var;
|
||||
@@ -2161,10 +2261,11 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
||||
SpvMemorySemanticsMask semantics = w[5];
|
||||
*/
|
||||
|
||||
if (ptr->mode == vtn_variable_mode_workgroup) {
|
||||
if (ptr->mode == vtn_variable_mode_workgroup &&
|
||||
!b->options->lower_workgroup_access_to_offsets) {
|
||||
nir_deref_var *deref = vtn_pointer_to_deref(b, ptr);
|
||||
const struct glsl_type *deref_type = nir_deref_tail(&deref->deref)->type;
|
||||
nir_intrinsic_op op = get_shared_nir_atomic_op(opcode);
|
||||
nir_intrinsic_op op = get_var_nir_atomic_op(opcode);
|
||||
atomic = nir_intrinsic_instr_create(b->nb.shader, op);
|
||||
atomic->variables[0] = nir_deref_var_clone(deref, atomic);
|
||||
|
||||
@@ -2201,27 +2302,36 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
}
|
||||
} else {
|
||||
assert(ptr->mode == vtn_variable_mode_ssbo);
|
||||
nir_ssa_def *offset, *index;
|
||||
offset = vtn_pointer_to_offset(b, ptr, &index, NULL);
|
||||
|
||||
nir_intrinsic_op op = get_ssbo_nir_atomic_op(opcode);
|
||||
nir_intrinsic_op op;
|
||||
if (ptr->mode == vtn_variable_mode_ssbo) {
|
||||
op = get_ssbo_nir_atomic_op(opcode);
|
||||
} else {
|
||||
assert(ptr->mode == vtn_variable_mode_workgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets);
|
||||
op = get_shared_nir_atomic_op(opcode);
|
||||
}
|
||||
|
||||
atomic = nir_intrinsic_instr_create(b->nb.shader, op);
|
||||
|
||||
int src = 0;
|
||||
switch (opcode) {
|
||||
case SpvOpAtomicLoad:
|
||||
atomic->num_components = glsl_get_vector_elements(ptr->type->type);
|
||||
atomic->src[0] = nir_src_for_ssa(index);
|
||||
atomic->src[1] = nir_src_for_ssa(offset);
|
||||
if (ptr->mode == vtn_variable_mode_ssbo)
|
||||
atomic->src[src++] = nir_src_for_ssa(index);
|
||||
atomic->src[src++] = nir_src_for_ssa(offset);
|
||||
break;
|
||||
|
||||
case SpvOpAtomicStore:
|
||||
atomic->num_components = glsl_get_vector_elements(ptr->type->type);
|
||||
nir_intrinsic_set_write_mask(atomic, (1 << atomic->num_components) - 1);
|
||||
atomic->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
|
||||
atomic->src[1] = nir_src_for_ssa(index);
|
||||
atomic->src[2] = nir_src_for_ssa(offset);
|
||||
atomic->src[src++] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
|
||||
if (ptr->mode == vtn_variable_mode_ssbo)
|
||||
atomic->src[src++] = nir_src_for_ssa(index);
|
||||
atomic->src[src++] = nir_src_for_ssa(offset);
|
||||
break;
|
||||
|
||||
case SpvOpAtomicExchange:
|
||||
@@ -2238,9 +2348,10 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
|
||||
case SpvOpAtomicAnd:
|
||||
case SpvOpAtomicOr:
|
||||
case SpvOpAtomicXor:
|
||||
atomic->src[0] = nir_src_for_ssa(index);
|
||||
atomic->src[1] = nir_src_for_ssa(offset);
|
||||
fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
|
||||
if (ptr->mode == vtn_variable_mode_ssbo)
|
||||
atomic->src[src++] = nir_src_for_ssa(index);
|
||||
atomic->src[src++] = nir_src_for_ssa(offset);
|
||||
fill_common_atomic_sources(b, opcode, w, &atomic->src[src]);
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -2672,7 +2783,7 @@ stage_for_execution_model(SpvExecutionModel model)
|
||||
}
|
||||
|
||||
#define spv_check_supported(name, cap) do { \
|
||||
if (!(b->ext && b->ext->name)) \
|
||||
if (!(b->options && b->options->caps.name)) \
|
||||
vtn_warn("Unsupported SPIR-V capability: %s", \
|
||||
spirv_capability_to_string(cap)); \
|
||||
} while(0)
|
||||
@@ -3313,8 +3424,8 @@ nir_function *
|
||||
spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
struct nir_spirv_specialization *spec, unsigned num_spec,
|
||||
gl_shader_stage stage, const char *entry_point_name,
|
||||
const struct nir_spirv_supported_extensions *ext,
|
||||
const nir_shader_compiler_options *options)
|
||||
const struct spirv_to_nir_options *options,
|
||||
const nir_shader_compiler_options *nir_options)
|
||||
{
|
||||
const uint32_t *word_end = words + word_count;
|
||||
|
||||
@@ -3336,7 +3447,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
exec_list_make_empty(&b->functions);
|
||||
b->entry_point_stage = stage;
|
||||
b->entry_point_name = entry_point_name;
|
||||
b->ext = ext;
|
||||
b->options = options;
|
||||
|
||||
/* Handle all the preamble instructions */
|
||||
words = vtn_foreach_instruction(b, words, word_end,
|
||||
@@ -3348,7 +3459,7 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
b->shader = nir_shader_create(NULL, stage, options, NULL);
|
||||
b->shader = nir_shader_create(NULL, stage, nir_options, NULL);
|
||||
|
||||
/* Set shader info defaults */
|
||||
b->shader->info.gs.invocations = 1;
|
||||
@@ -3366,13 +3477,22 @@ spirv_to_nir(const uint32_t *words, size_t word_count,
|
||||
|
||||
vtn_build_cfg(b, words, word_end);
|
||||
|
||||
foreach_list_typed(struct vtn_function, func, node, &b->functions) {
|
||||
b->impl = func->impl;
|
||||
b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
assert(b->entry_point->value_type == vtn_value_type_function);
|
||||
b->entry_point->func->referenced = true;
|
||||
|
||||
vtn_function_emit(b, func, vtn_handle_body_instruction);
|
||||
}
|
||||
bool progress;
|
||||
do {
|
||||
progress = false;
|
||||
foreach_list_typed(struct vtn_function, func, node, &b->functions) {
|
||||
if (func->referenced && !func->emitted) {
|
||||
b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
|
||||
vtn_function_emit(b, func, vtn_handle_body_instruction);
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
} while (progress);
|
||||
|
||||
assert(b->entry_point->value_type == vtn_value_type_function);
|
||||
nir_function *entry_point = b->entry_point->func->impl->function;
|
||||
|
@@ -606,7 +606,7 @@ vtn_emit_cf_list(struct vtn_builder *b, struct list_head *cf_list,
|
||||
if ((*block->branch & SpvOpCodeMask) == SpvOpReturnValue) {
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, block->branch[1]);
|
||||
vtn_local_store(b, src,
|
||||
nir_deref_var_create(b, b->impl->return_var));
|
||||
nir_deref_var_create(b, b->nb.impl->return_var));
|
||||
}
|
||||
|
||||
if (block->branch_type != vtn_branch_type_none) {
|
||||
@@ -783,4 +783,6 @@ vtn_function_emit(struct vtn_builder *b, struct vtn_function *func,
|
||||
*/
|
||||
if (b->has_loop_continue)
|
||||
nir_repair_ssa_impl(func->impl);
|
||||
|
||||
func->emitted = true;
|
||||
}
|
||||
|
@@ -159,6 +159,9 @@ struct vtn_block {
|
||||
struct vtn_function {
|
||||
struct exec_node node;
|
||||
|
||||
bool referenced;
|
||||
bool emitted;
|
||||
|
||||
nir_function_impl *impl;
|
||||
struct vtn_block *start_block;
|
||||
|
||||
@@ -217,7 +220,10 @@ struct vtn_type {
|
||||
/* The value that declares this type. Used for finding decorations */
|
||||
struct vtn_value *val;
|
||||
|
||||
/* Specifies the length of complex types. */
|
||||
/* Specifies the length of complex types.
|
||||
*
|
||||
* For Workgroup pointers, this is the size of the referenced type.
|
||||
*/
|
||||
unsigned length;
|
||||
|
||||
/* for arrays, matrices and pointers, the array stride */
|
||||
@@ -268,6 +274,9 @@ struct vtn_type {
|
||||
|
||||
/* Storage class for pointers */
|
||||
SpvStorageClass storage_class;
|
||||
|
||||
/* Required alignment for pointers */
|
||||
uint32_t align;
|
||||
};
|
||||
|
||||
/* Members for image types */
|
||||
@@ -369,13 +378,6 @@ struct vtn_pointer {
|
||||
struct nir_ssa_def *offset;
|
||||
};
|
||||
|
||||
static inline bool
|
||||
vtn_pointer_uses_ssa_offset(struct vtn_pointer *ptr)
|
||||
{
|
||||
return ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_ssbo;
|
||||
}
|
||||
|
||||
struct vtn_variable {
|
||||
enum vtn_variable_mode mode;
|
||||
|
||||
@@ -389,6 +391,8 @@ struct vtn_variable {
|
||||
nir_variable *var;
|
||||
nir_variable **members;
|
||||
|
||||
int shared_location;
|
||||
|
||||
/**
|
||||
* In some early released versions of GLSLang, it implemented all function
|
||||
* calls by making copies of all parameters into temporary variables and
|
||||
@@ -464,8 +468,7 @@ struct vtn_builder {
|
||||
nir_builder nb;
|
||||
|
||||
nir_shader *shader;
|
||||
nir_function_impl *impl;
|
||||
const struct nir_spirv_supported_extensions *ext;
|
||||
const struct spirv_to_nir_options *options;
|
||||
struct vtn_block *block;
|
||||
|
||||
/* Current file, line, and column. Useful for debugging. Set
|
||||
@@ -631,6 +634,13 @@ void vtn_handle_alu(struct vtn_builder *b, SpvOp opcode,
|
||||
bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
|
||||
const uint32_t *words, unsigned count);
|
||||
|
||||
static inline uint32_t
|
||||
vtn_align_u32(uint32_t v, uint32_t a)
|
||||
{
|
||||
assert(a != 0 && a == (a & -a));
|
||||
return (v + a - 1) & ~(a - 1);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
vtn_u64_literal(const uint32_t *w)
|
||||
{
|
||||
|
@@ -57,6 +57,27 @@ vtn_access_chain_extend(struct vtn_builder *b, struct vtn_access_chain *old,
|
||||
return chain;
|
||||
}
|
||||
|
||||
static bool
|
||||
vtn_pointer_uses_ssa_offset(struct vtn_builder *b,
|
||||
struct vtn_pointer *ptr)
|
||||
{
|
||||
return ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_ssbo ||
|
||||
(ptr->mode == vtn_variable_mode_workgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets);
|
||||
}
|
||||
|
||||
static bool
|
||||
vtn_pointer_is_external_block(struct vtn_builder *b,
|
||||
struct vtn_pointer *ptr)
|
||||
{
|
||||
return ptr->mode == vtn_variable_mode_ssbo ||
|
||||
ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_push_constant ||
|
||||
(ptr->mode == vtn_variable_mode_workgroup &&
|
||||
b->options->lower_workgroup_access_to_offsets);
|
||||
}
|
||||
|
||||
/* Dereference the given base pointer by the access chain */
|
||||
static struct vtn_pointer *
|
||||
vtn_access_chain_pointer_dereference(struct vtn_builder *b,
|
||||
@@ -150,7 +171,8 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
|
||||
/* We need ptr_type for the stride */
|
||||
assert(base->ptr_type);
|
||||
/* This must be a pointer to an actual element somewhere */
|
||||
assert(block_index && offset);
|
||||
assert(offset);
|
||||
assert(block_index || base->mode == vtn_variable_mode_workgroup);
|
||||
/* We need at least one element in the chain */
|
||||
assert(deref_chain->length >= 1);
|
||||
|
||||
@@ -161,24 +183,49 @@ vtn_ssa_offset_pointer_dereference(struct vtn_builder *b,
|
||||
idx++;
|
||||
}
|
||||
|
||||
if (!block_index) {
|
||||
if (!offset) {
|
||||
/* This is the first access chain so we don't have a block index */
|
||||
assert(!block_index);
|
||||
|
||||
assert(base->var);
|
||||
if (glsl_type_is_array(type->type)) {
|
||||
/* We need at least one element in the chain */
|
||||
assert(deref_chain->length >= 1);
|
||||
assert(base->ptr_type);
|
||||
switch (base->mode) {
|
||||
case vtn_variable_mode_ubo:
|
||||
case vtn_variable_mode_ssbo:
|
||||
if (glsl_type_is_array(type->type)) {
|
||||
/* We need at least one element in the chain */
|
||||
assert(deref_chain->length >= 1);
|
||||
|
||||
nir_ssa_def *desc_arr_idx =
|
||||
vtn_access_link_as_ssa(b, deref_chain->link[0], 1);
|
||||
block_index = vtn_variable_resource_index(b, base->var, desc_arr_idx);
|
||||
type = type->array_element;
|
||||
idx++;
|
||||
} else {
|
||||
block_index = vtn_variable_resource_index(b, base->var, NULL);
|
||||
nir_ssa_def *desc_arr_idx =
|
||||
vtn_access_link_as_ssa(b, deref_chain->link[0], 1);
|
||||
block_index = vtn_variable_resource_index(b, base->var, desc_arr_idx);
|
||||
type = type->array_element;
|
||||
idx++;
|
||||
} else {
|
||||
block_index = vtn_variable_resource_index(b, base->var, NULL);
|
||||
}
|
||||
offset = nir_imm_int(&b->nb, 0);
|
||||
break;
|
||||
|
||||
case vtn_variable_mode_workgroup:
|
||||
/* Assign location on first use so that we don't end up bloating SLM
|
||||
* address space for variables which are never statically used.
|
||||
*/
|
||||
if (base->var->shared_location < 0) {
|
||||
assert(base->ptr_type->length > 0 && base->ptr_type->align > 0);
|
||||
b->shader->num_shared = vtn_align_u32(b->shader->num_shared,
|
||||
base->ptr_type->align);
|
||||
base->var->shared_location = b->shader->num_shared;
|
||||
b->shader->num_shared += base->ptr_type->length;
|
||||
}
|
||||
|
||||
block_index = NULL;
|
||||
offset = nir_imm_int(&b->nb, base->var->shared_location);
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Invalid offset pointer mode");
|
||||
}
|
||||
|
||||
/* This is the first access chain so we also need an offset */
|
||||
assert(!offset);
|
||||
offset = nir_imm_int(&b->nb, 0);
|
||||
}
|
||||
assert(offset);
|
||||
|
||||
@@ -228,7 +275,7 @@ vtn_pointer_dereference(struct vtn_builder *b,
|
||||
struct vtn_pointer *base,
|
||||
struct vtn_access_chain *deref_chain)
|
||||
{
|
||||
if (vtn_pointer_uses_ssa_offset(base)) {
|
||||
if (vtn_pointer_uses_ssa_offset(b, base)) {
|
||||
return vtn_ssa_offset_pointer_dereference(b, base, deref_chain);
|
||||
} else {
|
||||
return vtn_access_chain_pointer_dereference(b, base, deref_chain);
|
||||
@@ -478,45 +525,27 @@ vtn_local_store(struct vtn_builder *b, struct vtn_ssa_value *src,
|
||||
}
|
||||
}
|
||||
|
||||
static nir_ssa_def *
|
||||
get_vulkan_resource_index(struct vtn_builder *b, struct vtn_pointer *ptr,
|
||||
struct vtn_type **type, unsigned *chain_idx)
|
||||
{
|
||||
/* Push constants have no explicit binding */
|
||||
if (ptr->mode == vtn_variable_mode_push_constant) {
|
||||
*chain_idx = 0;
|
||||
*type = ptr->var->type;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (glsl_type_is_array(ptr->var->type->type)) {
|
||||
assert(ptr->chain->length > 0);
|
||||
nir_ssa_def *desc_array_index =
|
||||
vtn_access_link_as_ssa(b, ptr->chain->link[0], 1);
|
||||
*chain_idx = 1;
|
||||
*type = ptr->var->type->array_element;
|
||||
return vtn_variable_resource_index(b, ptr->var, desc_array_index);
|
||||
} else {
|
||||
*chain_idx = 0;
|
||||
*type = ptr->var->type;
|
||||
return vtn_variable_resource_index(b, ptr->var, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
nir_ssa_def *
|
||||
vtn_pointer_to_offset(struct vtn_builder *b, struct vtn_pointer *ptr,
|
||||
nir_ssa_def **index_out, unsigned *end_idx_out)
|
||||
{
|
||||
if (ptr->offset) {
|
||||
assert(ptr->block_index);
|
||||
if (vtn_pointer_uses_ssa_offset(b, ptr)) {
|
||||
if (!ptr->offset) {
|
||||
assert(ptr->mode == vtn_variable_mode_workgroup);
|
||||
struct vtn_access_chain chain = {
|
||||
.length = 0,
|
||||
};
|
||||
ptr = vtn_ssa_offset_pointer_dereference(b, ptr, &chain);
|
||||
}
|
||||
*index_out = ptr->block_index;
|
||||
return ptr->offset;
|
||||
}
|
||||
|
||||
unsigned idx = 0;
|
||||
struct vtn_type *type;
|
||||
*index_out = get_vulkan_resource_index(b, ptr, &type, &idx);
|
||||
assert(ptr->mode == vtn_variable_mode_push_constant);
|
||||
*index_out = NULL;
|
||||
|
||||
unsigned idx = 0;
|
||||
struct vtn_type *type = ptr->var->type;
|
||||
nir_ssa_def *offset = nir_imm_int(&b->nb, 0);
|
||||
for (; idx < ptr->chain->length; idx++) {
|
||||
enum glsl_base_type base_type = glsl_get_base_type(type->type);
|
||||
@@ -829,6 +858,9 @@ vtn_block_load(struct vtn_builder *b, struct vtn_pointer *src)
|
||||
vtn_access_chain_get_offset_size(src->chain, src->var->type,
|
||||
&access_offset, &access_size);
|
||||
break;
|
||||
case vtn_variable_mode_workgroup:
|
||||
op = nir_intrinsic_load_shared;
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid block variable mode");
|
||||
}
|
||||
@@ -848,22 +880,26 @@ static void
|
||||
vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src,
|
||||
struct vtn_pointer *dst)
|
||||
{
|
||||
nir_intrinsic_op op;
|
||||
switch (dst->mode) {
|
||||
case vtn_variable_mode_ssbo:
|
||||
op = nir_intrinsic_store_ssbo;
|
||||
break;
|
||||
case vtn_variable_mode_workgroup:
|
||||
op = nir_intrinsic_store_shared;
|
||||
break;
|
||||
default:
|
||||
unreachable("Invalid block variable mode");
|
||||
}
|
||||
|
||||
nir_ssa_def *offset, *index = NULL;
|
||||
unsigned chain_idx;
|
||||
offset = vtn_pointer_to_offset(b, dst, &index, &chain_idx);
|
||||
|
||||
_vtn_block_load_store(b, nir_intrinsic_store_ssbo, false, index, offset,
|
||||
_vtn_block_load_store(b, op, false, index, offset,
|
||||
0, 0, dst->chain, chain_idx, dst->type, &src);
|
||||
}
|
||||
|
||||
static bool
|
||||
vtn_pointer_is_external_block(struct vtn_pointer *ptr)
|
||||
{
|
||||
return ptr->mode == vtn_variable_mode_ssbo ||
|
||||
ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_push_constant;
|
||||
}
|
||||
|
||||
static void
|
||||
_vtn_variable_load_store(struct vtn_builder *b, bool load,
|
||||
struct vtn_pointer *ptr,
|
||||
@@ -923,7 +959,7 @@ _vtn_variable_load_store(struct vtn_builder *b, bool load,
|
||||
struct vtn_ssa_value *
|
||||
vtn_variable_load(struct vtn_builder *b, struct vtn_pointer *src)
|
||||
{
|
||||
if (vtn_pointer_is_external_block(src)) {
|
||||
if (vtn_pointer_is_external_block(b, src)) {
|
||||
return vtn_block_load(b, src);
|
||||
} else {
|
||||
struct vtn_ssa_value *val = NULL;
|
||||
@@ -936,8 +972,9 @@ void
|
||||
vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src,
|
||||
struct vtn_pointer *dest)
|
||||
{
|
||||
if (vtn_pointer_is_external_block(dest)) {
|
||||
assert(dest->mode == vtn_variable_mode_ssbo);
|
||||
if (vtn_pointer_is_external_block(b, dest)) {
|
||||
assert(dest->mode == vtn_variable_mode_ssbo ||
|
||||
dest->mode == vtn_variable_mode_workgroup);
|
||||
vtn_block_store(b, src, dest);
|
||||
} else {
|
||||
_vtn_variable_load_store(b, false, dest, &src);
|
||||
@@ -1492,11 +1529,9 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr)
|
||||
assert(ptr->ptr_type);
|
||||
assert(ptr->ptr_type->type);
|
||||
|
||||
if (ptr->offset && ptr->block_index) {
|
||||
return nir_vec2(&b->nb, ptr->block_index, ptr->offset);
|
||||
} else {
|
||||
/* If we don't have an offset or block index, then we must be a pointer
|
||||
* to the variable itself.
|
||||
if (!ptr->offset) {
|
||||
/* If we don't have an offset then we must be a pointer to the variable
|
||||
* itself.
|
||||
*/
|
||||
assert(!ptr->offset && !ptr->block_index);
|
||||
|
||||
@@ -1506,8 +1541,20 @@ vtn_pointer_to_ssa(struct vtn_builder *b, struct vtn_pointer *ptr)
|
||||
*/
|
||||
assert(ptr->var && ptr->var->type->base_type == vtn_base_type_struct);
|
||||
|
||||
return nir_vec2(&b->nb, vtn_variable_resource_index(b, ptr->var, NULL),
|
||||
nir_imm_int(&b->nb, 0));
|
||||
struct vtn_access_chain chain = {
|
||||
.length = 0,
|
||||
};
|
||||
ptr = vtn_ssa_offset_pointer_dereference(b, ptr, &chain);
|
||||
}
|
||||
|
||||
assert(ptr->offset);
|
||||
if (ptr->block_index) {
|
||||
assert(ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_ssbo);
|
||||
return nir_vec2(&b->nb, ptr->block_index, ptr->offset);
|
||||
} else {
|
||||
assert(ptr->mode == vtn_variable_mode_workgroup);
|
||||
return ptr->offset;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1515,7 +1562,7 @@ struct vtn_pointer *
|
||||
vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def *ssa,
|
||||
struct vtn_type *ptr_type)
|
||||
{
|
||||
assert(ssa->num_components == 2 && ssa->bit_size == 32);
|
||||
assert(ssa->num_components <= 2 && ssa->bit_size == 32);
|
||||
assert(ptr_type->base_type == vtn_base_type_pointer);
|
||||
assert(ptr_type->deref->base_type != vtn_base_type_pointer);
|
||||
/* This pointer type needs to have actual storage */
|
||||
@@ -1526,8 +1573,19 @@ vtn_pointer_from_ssa(struct vtn_builder *b, nir_ssa_def *ssa,
|
||||
ptr_type, NULL);
|
||||
ptr->type = ptr_type->deref;
|
||||
ptr->ptr_type = ptr_type;
|
||||
ptr->block_index = nir_channel(&b->nb, ssa, 0);
|
||||
ptr->offset = nir_channel(&b->nb, ssa, 1);
|
||||
|
||||
if (ssa->num_components > 1) {
|
||||
assert(ssa->num_components == 2);
|
||||
assert(ptr->mode == vtn_variable_mode_ubo ||
|
||||
ptr->mode == vtn_variable_mode_ssbo);
|
||||
ptr->block_index = nir_channel(&b->nb, ssa, 0);
|
||||
ptr->offset = nir_channel(&b->nb, ssa, 1);
|
||||
} else {
|
||||
assert(ssa->num_components == 1);
|
||||
assert(ptr->mode == vtn_variable_mode_workgroup);
|
||||
ptr->block_index = NULL;
|
||||
ptr->offset = ssa;
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
@@ -1599,7 +1657,6 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
|
||||
case vtn_variable_mode_global:
|
||||
case vtn_variable_mode_image:
|
||||
case vtn_variable_mode_sampler:
|
||||
case vtn_variable_mode_workgroup:
|
||||
/* For these, we create the variable normally */
|
||||
var->var = rzalloc(b->shader, nir_variable);
|
||||
var->var->name = ralloc_strdup(var->var, val->name);
|
||||
@@ -1617,6 +1674,18 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
|
||||
}
|
||||
break;
|
||||
|
||||
case vtn_variable_mode_workgroup:
|
||||
if (b->options->lower_workgroup_access_to_offsets) {
|
||||
var->shared_location = -1;
|
||||
} else {
|
||||
/* Create the variable normally */
|
||||
var->var = rzalloc(b->shader, nir_variable);
|
||||
var->var->name = ralloc_strdup(var->var, val->name);
|
||||
var->var->type = var->type->type;
|
||||
var->var->data.mode = nir_var_shared;
|
||||
}
|
||||
break;
|
||||
|
||||
case vtn_variable_mode_input:
|
||||
case vtn_variable_mode_output: {
|
||||
/* In order to know whether or not we're a per-vertex inout, we need
|
||||
@@ -1731,7 +1800,7 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
|
||||
|
||||
if (var->mode == vtn_variable_mode_local) {
|
||||
assert(var->members == NULL && var->var != NULL);
|
||||
nir_function_impl_add_variable(b->impl, var->var);
|
||||
nir_function_impl_add_variable(b->nb.impl, var->var);
|
||||
} else if (var->var) {
|
||||
nir_shader_add_variable(b->shader, var->var);
|
||||
} else if (var->members) {
|
||||
@@ -1741,9 +1810,7 @@ vtn_create_variable(struct vtn_builder *b, struct vtn_value *val,
|
||||
nir_shader_add_variable(b->shader, var->members[i]);
|
||||
}
|
||||
} else {
|
||||
assert(var->mode == vtn_variable_mode_ubo ||
|
||||
var->mode == vtn_variable_mode_ssbo ||
|
||||
var->mode == vtn_variable_mode_push_constant);
|
||||
assert(vtn_pointer_is_external_block(b, val->pointer));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1868,15 +1935,19 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
|
||||
const uint32_t offset = ptr->var->type->offsets[w[4]];
|
||||
const uint32_t stride = ptr->var->type->members[w[4]]->stride;
|
||||
|
||||
unsigned chain_idx;
|
||||
struct vtn_type *type;
|
||||
nir_ssa_def *index =
|
||||
get_vulkan_resource_index(b, ptr, &type, &chain_idx);
|
||||
if (!ptr->block_index) {
|
||||
assert(ptr->mode == vtn_variable_mode_workgroup);
|
||||
struct vtn_access_chain chain = {
|
||||
.length = 0,
|
||||
};
|
||||
ptr = vtn_ssa_offset_pointer_dereference(b, ptr, &chain);
|
||||
assert(ptr->block_index);
|
||||
}
|
||||
|
||||
nir_intrinsic_instr *instr =
|
||||
nir_intrinsic_instr_create(b->nb.shader,
|
||||
nir_intrinsic_get_buffer_size);
|
||||
instr->src[0] = nir_src_for_ssa(index);
|
||||
instr->src[0] = nir_src_for_ssa(ptr->block_index);
|
||||
nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
|
||||
nir_builder_instr_insert(&b->nb, &instr->instr);
|
||||
nir_ssa_def *buf_size = &instr->dest.ssa;
|
||||
|
@@ -41,6 +41,7 @@ LOCAL_SRC_FILES := \
|
||||
LOCAL_CFLAGS := \
|
||||
-D_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_ANDROID \
|
||||
-D_EGL_BUILT_IN_DRIVER_DRI2 \
|
||||
-DHAS_GRALLOC_DRM_HEADERS \
|
||||
-DHAVE_ANDROID_PLATFORM
|
||||
|
||||
LOCAL_C_INCLUDES := \
|
||||
|
@@ -105,7 +105,9 @@ endif
|
||||
if HAVE_PLATFORM_ANDROID
|
||||
AM_CFLAGS += $(ANDROID_CFLAGS)
|
||||
libEGL_common_la_LIBADD += $(ANDROID_LIBS)
|
||||
dri2_backend_FILES += drivers/dri2/platform_android.c
|
||||
dri2_backend_FILES += \
|
||||
drivers/dri2/platform_android.c \
|
||||
drivers/dri2/egl_dri2_drm_gralloc.h
|
||||
endif
|
||||
|
||||
AM_CFLAGS += \
|
||||
|
@@ -299,7 +299,10 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
|
||||
_eglSetConfigKey(&base, EGL_MAX_PBUFFER_HEIGHT,
|
||||
_EGL_MAX_PBUFFER_HEIGHT);
|
||||
break;
|
||||
|
||||
case __DRI_ATTRIB_MUTABLE_RENDER_BUFFER:
|
||||
if (disp->Extensions.KHR_mutable_render_buffer)
|
||||
surface_type |= EGL_MUTABLE_RENDER_BUFFER_BIT_KHR;
|
||||
break;
|
||||
default:
|
||||
key = dri2_to_egl_attribute_map[attrib];
|
||||
if (key != 0)
|
||||
@@ -457,6 +460,7 @@ static const struct dri2_extension_match optional_core_extensions[] = {
|
||||
{ __DRI2_RENDERER_QUERY, 1, offsetof(struct dri2_egl_display, rendererQuery) },
|
||||
{ __DRI2_INTEROP, 1, offsetof(struct dri2_egl_display, interop) },
|
||||
{ __DRI_IMAGE, 1, offsetof(struct dri2_egl_display, image) },
|
||||
{ __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1, offsetof(struct dri2_egl_display, mutable_render_buffer) },
|
||||
{ NULL, 0, 0 }
|
||||
};
|
||||
|
||||
@@ -904,10 +908,6 @@ dri2_initialize(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
/* not until swrast_dri is supported */
|
||||
if (disp->Options.UseFallback)
|
||||
return EGL_FALSE;
|
||||
|
||||
switch (disp->Platform) {
|
||||
#ifdef HAVE_SURFACELESS_PLATFORM
|
||||
case _EGL_PLATFORM_SURFACELESS:
|
||||
@@ -1329,12 +1329,6 @@ dri2_create_context(_EGLDriver *drv, _EGLDisplay *disp, _EGLConfig *conf,
|
||||
dri_config = dri2_config->dri_config[1][0];
|
||||
else
|
||||
dri_config = dri2_config->dri_config[0][0];
|
||||
|
||||
/* EGL_WINDOW_BIT is set only when there is a double-buffered dri_config.
|
||||
* This makes sure the back buffer will always be used.
|
||||
*/
|
||||
if (conf->SurfaceType & EGL_WINDOW_BIT)
|
||||
dri2_ctx->base.WindowRenderBuffer = EGL_BACK_BUFFER;
|
||||
}
|
||||
else
|
||||
dri_config = NULL;
|
||||
@@ -1525,6 +1519,8 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
|
||||
{
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
|
||||
struct dri2_egl_context *dri2_ctx = dri2_egl_context(ctx);
|
||||
_EGLDisplay *old_disp = NULL;
|
||||
struct dri2_egl_display *old_dri2_dpy = NULL;
|
||||
_EGLContext *old_ctx;
|
||||
_EGLSurface *old_dsurf, *old_rsurf;
|
||||
_EGLSurface *tmp_dsurf, *tmp_rsurf;
|
||||
@@ -1541,6 +1537,11 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
|
||||
return EGL_FALSE;
|
||||
}
|
||||
|
||||
if (old_ctx) {
|
||||
old_disp = old_ctx->Resource.Display;
|
||||
old_dri2_dpy = dri2_egl_display(old_disp);
|
||||
}
|
||||
|
||||
/* flush before context switch */
|
||||
if (old_ctx)
|
||||
dri2_gl_flush();
|
||||
@@ -1554,31 +1555,30 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
|
||||
|
||||
if (old_dsurf)
|
||||
dri2_surf_update_fence_fd(old_ctx, disp, old_dsurf);
|
||||
|
||||
/* Disable shared buffer mode */
|
||||
if (old_dsurf && _eglSurfaceInSharedBufferMode(old_dsurf) &&
|
||||
old_dri2_dpy->vtbl->set_shared_buffer_mode) {
|
||||
old_dri2_dpy->vtbl->set_shared_buffer_mode(old_disp, old_dsurf, false);
|
||||
}
|
||||
|
||||
dri2_dpy->core->unbindContext(old_cctx);
|
||||
}
|
||||
|
||||
unbind = (cctx == NULL && ddraw == NULL && rdraw == NULL);
|
||||
|
||||
if (unbind || dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
|
||||
dri2_destroy_surface(drv, disp, old_dsurf);
|
||||
dri2_destroy_surface(drv, disp, old_rsurf);
|
||||
|
||||
if (!unbind)
|
||||
dri2_dpy->ref_count++;
|
||||
if (old_ctx) {
|
||||
EGLDisplay old_disp = _eglGetDisplayHandle(old_ctx->Resource.Display);
|
||||
dri2_destroy_context(drv, disp, old_ctx);
|
||||
dri2_display_release(old_disp);
|
||||
}
|
||||
|
||||
return EGL_TRUE;
|
||||
} else {
|
||||
if (!unbind && !dri2_dpy->core->bindContext(cctx, ddraw, rdraw)) {
|
||||
/* undo the previous _eglBindContext */
|
||||
_eglBindContext(old_ctx, old_dsurf, old_rsurf, &ctx, &tmp_dsurf, &tmp_rsurf);
|
||||
assert(&dri2_ctx->base == ctx &&
|
||||
tmp_dsurf == dsurf &&
|
||||
tmp_rsurf == rsurf);
|
||||
|
||||
if (old_dsurf && _eglSurfaceInSharedBufferMode(old_dsurf) &&
|
||||
old_dri2_dpy->vtbl->set_shared_buffer_mode) {
|
||||
old_dri2_dpy->vtbl->set_shared_buffer_mode(old_disp, old_dsurf, true);
|
||||
}
|
||||
|
||||
_eglPutSurface(dsurf);
|
||||
_eglPutSurface(rsurf);
|
||||
_eglPutContext(ctx);
|
||||
@@ -1593,6 +1593,31 @@ dri2_make_current(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *dsurf,
|
||||
*/
|
||||
return _eglError(EGL_BAD_MATCH, "eglMakeCurrent");
|
||||
}
|
||||
|
||||
dri2_destroy_surface(drv, disp, old_dsurf);
|
||||
dri2_destroy_surface(drv, disp, old_rsurf);
|
||||
|
||||
if (!unbind)
|
||||
dri2_dpy->ref_count++;
|
||||
|
||||
if (old_ctx) {
|
||||
dri2_destroy_context(drv, disp, old_ctx);
|
||||
dri2_display_release(old_disp);
|
||||
}
|
||||
|
||||
if (dsurf && _eglSurfaceHasMutableRenderBuffer(dsurf) &&
|
||||
dri2_dpy->vtbl->set_shared_buffer_mode) {
|
||||
/* Always update the shared buffer mode. This is obviously needed when
|
||||
* the active EGL_RENDER_BUFFER is EGL_SINGLE_BUFFER. When
|
||||
* EGL_RENDER_BUFFER is EGL_BACK_BUFFER, the update protects us in the
|
||||
* case where external non-EGL API may have changed window's shared
|
||||
* buffer mode since we last saw it.
|
||||
*/
|
||||
bool mode = (dsurf->ActiveRenderBuffer == EGL_SINGLE_BUFFER);
|
||||
dri2_dpy->vtbl->set_shared_buffer_mode(disp, dsurf, mode);
|
||||
}
|
||||
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
__DRIdrawable *
|
||||
|
@@ -61,7 +61,7 @@ struct zwp_linux_dmabuf_v1;
|
||||
|
||||
#include <system/window.h>
|
||||
#include <hardware/gralloc.h>
|
||||
#include <gralloc_drm_handle.h>
|
||||
#include "platform_android_gralloc_drm.h"
|
||||
|
||||
#endif /* HAVE_ANDROID_PLATFORM */
|
||||
|
||||
@@ -147,6 +147,12 @@ struct dri2_egl_display_vtbl {
|
||||
__DRIdrawable *(*get_dri_drawable)(_EGLSurface *surf);
|
||||
|
||||
void (*close_screen_notify)(_EGLDisplay *dpy);
|
||||
|
||||
/* Used in EGL_KHR_mutable_render_buffer to update the native window's
|
||||
* shared buffer mode.
|
||||
*/
|
||||
bool (*set_shared_buffer_mode)(_EGLDisplay *dpy, _EGLSurface *surf,
|
||||
bool mode);
|
||||
};
|
||||
|
||||
struct dri2_egl_display
|
||||
@@ -172,6 +178,7 @@ struct dri2_egl_display
|
||||
const __DRI2fenceExtension *fence;
|
||||
const __DRI2rendererQueryExtension *rendererQuery;
|
||||
const __DRI2interopExtension *interop;
|
||||
const __DRImutableRenderBufferDriverExtension *mutable_render_buffer;
|
||||
int fd;
|
||||
|
||||
/* dri2_initialize/dri2_terminate increment/decrement this count, so does
|
||||
|
@@ -37,7 +37,7 @@
|
||||
#include "loader.h"
|
||||
#include "egl_dri2.h"
|
||||
#include "egl_dri2_fallbacks.h"
|
||||
#include "gralloc_drm.h"
|
||||
#include "platform_android_gralloc_drm.h"
|
||||
|
||||
#define ALIGN(val, align) (((val) + (align) - 1) & ~((align) - 1))
|
||||
|
||||
@@ -59,6 +59,10 @@ static const struct droid_yuv_format droid_yuv_formats[] = {
|
||||
{ HAL_PIXEL_FORMAT_YCbCr_420_888, 0, 1, __DRI_IMAGE_FOURCC_YUV420 },
|
||||
{ HAL_PIXEL_FORMAT_YCbCr_420_888, 1, 1, __DRI_IMAGE_FOURCC_YVU420 },
|
||||
{ HAL_PIXEL_FORMAT_YV12, 1, 1, __DRI_IMAGE_FOURCC_YVU420 },
|
||||
/* HACK: See droid_create_image_from_prime_fd() and b/32077885. */
|
||||
{ HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, 0, 2, __DRI_IMAGE_FOURCC_NV12 },
|
||||
{ HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, 0, 1, __DRI_IMAGE_FOURCC_YUV420 },
|
||||
{ HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED, 1, 1, __DRI_IMAGE_FOURCC_YVU420 },
|
||||
};
|
||||
|
||||
static int
|
||||
@@ -90,6 +94,11 @@ get_format_bpp(int native)
|
||||
|
||||
switch (native) {
|
||||
case HAL_PIXEL_FORMAT_RGBA_8888:
|
||||
case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED:
|
||||
/*
|
||||
* HACK: Hardcode this to RGBX_8888 as per cros_gralloc hack.
|
||||
* TODO: Remove this once b/32077885 is fixed.
|
||||
*/
|
||||
case HAL_PIXEL_FORMAT_RGBX_8888:
|
||||
case HAL_PIXEL_FORMAT_BGRA_8888:
|
||||
bpp = 4;
|
||||
@@ -112,6 +121,11 @@ static int get_fourcc(int native)
|
||||
case HAL_PIXEL_FORMAT_RGB_565: return __DRI_IMAGE_FOURCC_RGB565;
|
||||
case HAL_PIXEL_FORMAT_BGRA_8888: return __DRI_IMAGE_FOURCC_ARGB8888;
|
||||
case HAL_PIXEL_FORMAT_RGBA_8888: return __DRI_IMAGE_FOURCC_ABGR8888;
|
||||
case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED:
|
||||
/*
|
||||
* HACK: Hardcode this to RGBX_8888 as per cros_gralloc hack.
|
||||
* TODO: Remove this once b/32077885 is fixed.
|
||||
*/
|
||||
case HAL_PIXEL_FORMAT_RGBX_8888: return __DRI_IMAGE_FOURCC_XBGR8888;
|
||||
default:
|
||||
_eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x", native);
|
||||
@@ -125,6 +139,11 @@ static int get_format(int format)
|
||||
case HAL_PIXEL_FORMAT_BGRA_8888: return __DRI_IMAGE_FORMAT_ARGB8888;
|
||||
case HAL_PIXEL_FORMAT_RGB_565: return __DRI_IMAGE_FORMAT_RGB565;
|
||||
case HAL_PIXEL_FORMAT_RGBA_8888: return __DRI_IMAGE_FORMAT_ABGR8888;
|
||||
case HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED:
|
||||
/*
|
||||
* HACK: Hardcode this to RGBX_8888 as per cros_gralloc hack.
|
||||
* TODO: Revert this once b/32077885 is fixed.
|
||||
*/
|
||||
case HAL_PIXEL_FORMAT_RGBX_8888: return __DRI_IMAGE_FORMAT_XBGR8888;
|
||||
default:
|
||||
_eglLog(_EGL_WARNING, "unsupported native buffer format 0x%x", format);
|
||||
@@ -273,6 +292,32 @@ droid_window_cancel_buffer(struct dri2_egl_surface *dri2_surf)
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
droid_set_shared_buffer_mode(_EGLDisplay *disp, _EGLSurface *surf, bool mode)
|
||||
{
|
||||
#if __ANDROID_API__ >= 24
|
||||
struct dri2_egl_display *dri2_dpy = dri2_egl_display(disp);
|
||||
struct dri2_egl_surface *dri2_surf = dri2_egl_surface(surf);
|
||||
struct ANativeWindow *window = dri2_surf->window;
|
||||
|
||||
assert(surf->Type == EGL_WINDOW_BIT);
|
||||
assert(_eglSurfaceHasMutableRenderBuffer(&dri2_surf->base));
|
||||
|
||||
_eglLog(_EGL_DEBUG, "%s: mode=%d", __func__, mode);
|
||||
|
||||
if (native_window_set_shared_buffer_mode(window, mode)) {
|
||||
_eglLog(_EGL_WARNING, "failed native_window_set_shared_buffer_mode"
|
||||
"(window=%p, mode=%d)", window, mode);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
#else
|
||||
_eglLog(_EGL_FATAL, "%s:%d: internal error: unreachable", __FILE__, __LINE__);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static _EGLSurface *
|
||||
droid_create_surface(_EGLDriver *drv, _EGLDisplay *disp, EGLint type,
|
||||
_EGLConfig *conf, void *native_window,
|
||||
@@ -547,6 +592,21 @@ droid_image_get_buffers(__DRIdrawable *driDrawable,
|
||||
if (update_buffers(dri2_surf) < 0)
|
||||
return 0;
|
||||
|
||||
if (_eglSurfaceInSharedBufferMode(&dri2_surf->base)) {
|
||||
if (get_back_bo(dri2_surf) < 0)
|
||||
return 0;
|
||||
|
||||
/* We have dri_image_back because this is a window surface and
|
||||
* get_back_bo() succeeded.
|
||||
*/
|
||||
assert(dri2_surf->dri_image_back);
|
||||
images->back = dri2_surf->dri_image_back;
|
||||
images->image_mask |= __DRI_IMAGE_BUFFER_SHARED;
|
||||
|
||||
/* There exists no accompanying back nor front buffer. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (buffer_mask & __DRI_IMAGE_BUFFER_FRONT) {
|
||||
if (get_front_bo(dri2_surf, format) < 0)
|
||||
return 0;
|
||||
@@ -593,6 +653,21 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
|
||||
if (dri2_surf->base.Type != EGL_WINDOW_BIT)
|
||||
return EGL_TRUE;
|
||||
|
||||
const bool has_mutable_rb = _eglSurfaceHasMutableRenderBuffer(draw);
|
||||
|
||||
/* From the EGL_KHR_mutable_render_buffer spec (v12):
|
||||
*
|
||||
* If surface is a single-buffered window, pixmap, or pbuffer surface
|
||||
* for which there is no pending change to the EGL_RENDER_BUFFER
|
||||
* attribute, eglSwapBuffers has no effect.
|
||||
*/
|
||||
if (has_mutable_rb &&
|
||||
draw->RequestedRenderBuffer == EGL_SINGLE_BUFFER &&
|
||||
draw->ActiveRenderBuffer == EGL_SINGLE_BUFFER) {
|
||||
_eglLog(_EGL_DEBUG, "%s: remain in shared buffer mode", __func__);
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
for (int i = 0; i < ARRAY_SIZE(dri2_surf->color_buffers); i++) {
|
||||
if (dri2_surf->color_buffers[i].age > 0)
|
||||
dri2_surf->color_buffers[i].age++;
|
||||
@@ -617,6 +692,18 @@ droid_swap_buffers(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *draw)
|
||||
|
||||
dri2_dpy->flush->invalidate(dri2_surf->dri_drawable);
|
||||
|
||||
/* Update the shared buffer mode */
|
||||
if (has_mutable_rb &&
|
||||
draw->ActiveRenderBuffer != draw->RequestedRenderBuffer) {
|
||||
bool mode = (draw->RequestedRenderBuffer == EGL_SINGLE_BUFFER);
|
||||
_eglLog(_EGL_DEBUG, "%s: change to shared buffer mode %d",
|
||||
__func__, mode);
|
||||
|
||||
if (!droid_set_shared_buffer_mode(disp, draw, mode))
|
||||
return EGL_FALSE;
|
||||
draw->ActiveRenderBuffer = draw->RequestedRenderBuffer;
|
||||
}
|
||||
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
@@ -678,6 +765,10 @@ droid_create_image_from_prime_fd_yuv(_EGLDisplay *disp, _EGLContext *ctx,
|
||||
ret = dri2_dpy->gralloc->lock_ycbcr(dri2_dpy->gralloc, buf->handle,
|
||||
0, 0, 0, 0, 0, &ycbcr);
|
||||
if (ret) {
|
||||
/* HACK: See droid_create_image_from_prime_fd() and b/32077885. */
|
||||
if (buf->format == HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
|
||||
return NULL;
|
||||
|
||||
_eglLog(_EGL_WARNING, "gralloc->lock_ycbcr failed: %d", ret);
|
||||
return NULL;
|
||||
}
|
||||
@@ -757,8 +848,20 @@ droid_create_image_from_prime_fd(_EGLDisplay *disp, _EGLContext *ctx,
|
||||
{
|
||||
unsigned int pitch;
|
||||
|
||||
if (is_yuv(buf->format))
|
||||
return droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd);
|
||||
if (is_yuv(buf->format)) {
|
||||
_EGLImage *image;
|
||||
|
||||
image = droid_create_image_from_prime_fd_yuv(disp, ctx, buf, fd);
|
||||
/*
|
||||
* HACK: b/32077885
|
||||
* There is no API available to properly query the IMPLEMENTATION_DEFINED
|
||||
* format. As a workaround we rely here on gralloc allocating either
|
||||
* an arbitrary YCbCr 4:2:0 or RGBX_8888, with the latter being recognized
|
||||
* by lock_ycbcr failing.
|
||||
*/
|
||||
if (image || buf->format != HAL_PIXEL_FORMAT_IMPLEMENTATION_DEFINED)
|
||||
return image;
|
||||
}
|
||||
|
||||
const int fourcc = get_fourcc(buf->format);
|
||||
if (fourcc == -1) {
|
||||
@@ -1005,7 +1108,6 @@ droid_add_configs_for_visuals(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
{ HAL_PIXEL_FORMAT_RGBA_8888, { 0x000000ff, 0x0000ff00, 0x00ff0000, 0xff000000 } },
|
||||
{ HAL_PIXEL_FORMAT_RGBX_8888, { 0x000000ff, 0x0000ff00, 0x00ff0000, 0x00000000 } },
|
||||
{ HAL_PIXEL_FORMAT_RGB_565, { 0x0000f800, 0x000007e0, 0x0000001f, 0x00000000 } },
|
||||
{ HAL_PIXEL_FORMAT_BGRA_8888, { 0x00ff0000, 0x0000ff00, 0x000000ff, 0xff000000 } },
|
||||
};
|
||||
|
||||
unsigned int format_count[ARRAY_SIZE(visuals)] = { 0 };
|
||||
@@ -1073,7 +1175,7 @@ droid_open_device(struct dri2_egl_display *dri2_dpy)
|
||||
GRALLOC_MODULE_PERFORM_GET_DRM_FD,
|
||||
&fd);
|
||||
if (err || fd < 0) {
|
||||
_eglLog(_EGL_WARNING, "fail to get drm fd");
|
||||
_eglLog(_EGL_DEBUG, "fail to get drm fd");
|
||||
fd = -1;
|
||||
}
|
||||
|
||||
@@ -1102,6 +1204,7 @@ static const struct dri2_egl_display_vtbl droid_display_vtbl = {
|
||||
.create_wayland_buffer_from_image = dri2_fallback_create_wayland_buffer_from_image,
|
||||
.get_sync_values = dri2_fallback_get_sync_values,
|
||||
.get_dri_drawable = dri2_surface_get_dri_drawable,
|
||||
.set_shared_buffer_mode = droid_set_shared_buffer_mode,
|
||||
};
|
||||
|
||||
static const __DRIdri2LoaderExtension droid_dri2_loader_extension = {
|
||||
@@ -1121,10 +1224,89 @@ static const __DRIimageLoaderExtension droid_image_loader_extension = {
|
||||
.getCapability = droid_get_capability,
|
||||
};
|
||||
|
||||
static void
|
||||
droid_display_shared_buffer(__DRIdrawable *driDrawable, int fence_fd,
|
||||
void *loaderPrivate)
|
||||
{
|
||||
struct dri2_egl_surface *dri2_surf = loaderPrivate;
|
||||
struct ANativeWindowBuffer *old_buffer UNUSED = dri2_surf->buffer;
|
||||
|
||||
if (!_eglSurfaceInSharedBufferMode(&dri2_surf->base)) {
|
||||
_eglLog(_EGL_WARNING, "%s: internal error: buffer is not shared",
|
||||
__func__);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fence_fd >= 0) {
|
||||
/* The driver's fence is more recent than the surface's out fence, if it
|
||||
* exists at all. So use the driver's fence.
|
||||
*/
|
||||
if (dri2_surf->out_fence_fd >= 0) {
|
||||
close(dri2_surf->out_fence_fd);
|
||||
dri2_surf->out_fence_fd = -1;
|
||||
}
|
||||
} else if (dri2_surf->out_fence_fd >= 0) {
|
||||
fence_fd = dri2_surf->out_fence_fd;
|
||||
dri2_surf->out_fence_fd = -1;
|
||||
}
|
||||
|
||||
if (dri2_surf->window->queueBuffer(dri2_surf->window, dri2_surf->buffer,
|
||||
fence_fd)) {
|
||||
_eglLog(_EGL_WARNING, "%s: ANativeWindow::queueBuffer failed", __func__);
|
||||
close(fence_fd);
|
||||
return;
|
||||
}
|
||||
|
||||
fence_fd = -1;
|
||||
|
||||
if (dri2_surf->window->dequeueBuffer(dri2_surf->window, &dri2_surf->buffer,
|
||||
&fence_fd)) {
|
||||
/* Tear down the surface because it no longer has a back buffer. */
|
||||
struct dri2_egl_display *dri2_dpy =
|
||||
dri2_egl_display(dri2_surf->base.Resource.Display);
|
||||
|
||||
_eglLog(_EGL_WARNING, "%s: ANativeWindow::dequeueBuffer failed", __func__);
|
||||
|
||||
dri2_surf->base.Lost = true;
|
||||
dri2_surf->buffer = NULL;
|
||||
dri2_surf->back = NULL;
|
||||
|
||||
if (dri2_surf->dri_image_back) {
|
||||
dri2_dpy->image->destroyImage(dri2_surf->dri_image_back);
|
||||
dri2_surf->dri_image_back = NULL;
|
||||
}
|
||||
|
||||
dri2_dpy->flush->invalidate(dri2_surf->dri_drawable);
|
||||
return;
|
||||
}
|
||||
|
||||
if (fence_fd < 0)
|
||||
return;
|
||||
|
||||
/* Access to the buffer is controlled by a sync fence. Block on it.
|
||||
*
|
||||
* Ideally, we would submit the fence to the driver, and the driver would
|
||||
* postpone command execution until it signalled. But DRI lacks API for
|
||||
* that (as of 2018-04-11).
|
||||
*
|
||||
* SYNC_IOC_WAIT waits forever if timeout < 0
|
||||
*/
|
||||
sync_wait(fence_fd, -1);
|
||||
close(fence_fd);
|
||||
}
|
||||
|
||||
static const __DRImutableRenderBufferLoaderExtension droid_mutable_render_buffer_extension = {
|
||||
.base = { __DRI_MUTABLE_RENDER_BUFFER_LOADER, 1 },
|
||||
.displaySharedBuffer = droid_display_shared_buffer,
|
||||
};
|
||||
|
||||
static const __DRIextension *droid_dri2_loader_extensions[] = {
|
||||
&droid_dri2_loader_extension.base,
|
||||
&image_lookup_extension.base,
|
||||
&use_invalidate.base,
|
||||
/* No __DRI_MUTABLE_RENDER_BUFFER_LOADER because it requires
|
||||
* __DRI_IMAGE_LOADER.
|
||||
*/
|
||||
NULL,
|
||||
};
|
||||
|
||||
@@ -1132,9 +1314,82 @@ static const __DRIextension *droid_image_loader_extensions[] = {
|
||||
&droid_image_loader_extension.base,
|
||||
&image_lookup_extension.base,
|
||||
&use_invalidate.base,
|
||||
&droid_mutable_render_buffer_extension.base,
|
||||
NULL,
|
||||
};
|
||||
|
||||
static bool
|
||||
droid_probe_device(_EGLDisplay *dpy, bool swrast)
|
||||
{
|
||||
struct dri2_egl_display *dri2_dpy = dpy->DriverData;
|
||||
bool loaded;
|
||||
|
||||
dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER;
|
||||
if (!dri2_dpy->is_render_node && !gralloc_supports_gem_names()) {
|
||||
_eglLog(_EGL_WARNING, "DRI2: control nodes not supported without GEM name suport in gralloc\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
if (swrast)
|
||||
dri2_dpy->driver_name = strdup("kms_swrast");
|
||||
else
|
||||
dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd);
|
||||
|
||||
if (dri2_dpy->driver_name == NULL) {
|
||||
_eglLog(_EGL_WARNING, "DRI2: failed to get driver name");
|
||||
return false;
|
||||
}
|
||||
|
||||
/* render nodes cannot use Gem names, and thus do not support
|
||||
* the __DRI_DRI2_LOADER extension */
|
||||
if (!dri2_dpy->is_render_node) {
|
||||
dri2_dpy->loader_extensions = droid_dri2_loader_extensions;
|
||||
loaded = dri2_load_driver(dpy);
|
||||
} else {
|
||||
dri2_dpy->loader_extensions = droid_image_loader_extensions;
|
||||
loaded = dri2_load_driver_dri3(dpy);
|
||||
}
|
||||
|
||||
if (!loaded) {
|
||||
_eglLog(_EGL_WARNING, "DRI2: failed to load driver");
|
||||
free(dri2_dpy->driver_name);
|
||||
dri2_dpy->driver_name = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
droid_probe_devices(_EGLDisplay *dpy, bool swrast)
|
||||
{
|
||||
struct dri2_egl_display *dri2_dpy = dpy->DriverData;
|
||||
const char *name_template = "%s/renderD%d";
|
||||
const int base = 128;
|
||||
const int limit = 64;
|
||||
int minor;
|
||||
|
||||
for (minor = base; minor < base + limit; ++minor) {
|
||||
char *card_path;
|
||||
|
||||
if (asprintf(&card_path, name_template, DRM_DIR_NAME, minor) < 0)
|
||||
continue;
|
||||
|
||||
dri2_dpy->fd = loader_open_device(card_path);
|
||||
free(card_path);
|
||||
if (dri2_dpy->fd < 0)
|
||||
continue;
|
||||
|
||||
if (droid_probe_device(dpy, swrast))
|
||||
return true;
|
||||
|
||||
close(dri2_dpy->fd);
|
||||
dri2_dpy->fd = -1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
EGLBoolean
|
||||
dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
{
|
||||
@@ -1159,35 +1414,17 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
dpy->DriverData = (void *) dri2_dpy;
|
||||
|
||||
dri2_dpy->fd = droid_open_device(dri2_dpy);
|
||||
if (dri2_dpy->fd < 0) {
|
||||
err = "DRI2: failed to open device";
|
||||
if (dri2_dpy->fd >= 0 &&
|
||||
!droid_probe_device(dpy, dpy->Options.UseFallback)) {
|
||||
_eglLog(_EGL_WARNING, "DRI2: Failed to load %s driver",
|
||||
dpy->Options.UseFallback ? "software" : "hardware");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
dri2_dpy->driver_name = loader_get_driver_for_fd(dri2_dpy->fd);
|
||||
if (dri2_dpy->driver_name == NULL) {
|
||||
err = "DRI2: failed to get driver name";
|
||||
} else if (!droid_probe_devices(dpy, dpy->Options.UseFallback)) {
|
||||
_eglLog(_EGL_WARNING, "DRI2: Failed to load %s driver",
|
||||
dpy->Options.UseFallback ? "software" : "hardware");
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
dri2_dpy->is_render_node = drmGetNodeTypeFromFd(dri2_dpy->fd) == DRM_NODE_RENDER;
|
||||
|
||||
/* render nodes cannot use Gem names, and thus do not support
|
||||
* the __DRI_DRI2_LOADER extension */
|
||||
if (!dri2_dpy->is_render_node) {
|
||||
dri2_dpy->loader_extensions = droid_dri2_loader_extensions;
|
||||
if (!dri2_load_driver(dpy)) {
|
||||
err = "DRI2: failed to load driver";
|
||||
goto cleanup;
|
||||
}
|
||||
} else {
|
||||
dri2_dpy->loader_extensions = droid_image_loader_extensions;
|
||||
if (!dri2_load_driver_dri3(dpy)) {
|
||||
err = "DRI3: failed to load driver";
|
||||
goto cleanup;
|
||||
}
|
||||
}
|
||||
|
||||
if (!dri2_create_screen(dpy)) {
|
||||
err = "DRI2: failed to create screen";
|
||||
goto cleanup;
|
||||
@@ -1200,11 +1437,6 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
|
||||
dri2_setup_screen(dpy);
|
||||
|
||||
if (!droid_add_configs_for_visuals(drv, dpy)) {
|
||||
err = "DRI2: failed to add configs";
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
dpy->Extensions.ANDROID_framebuffer_target = EGL_TRUE;
|
||||
dpy->Extensions.ANDROID_image_native_buffer = EGL_TRUE;
|
||||
dpy->Extensions.ANDROID_recordable = EGL_TRUE;
|
||||
@@ -1212,6 +1444,21 @@ dri2_initialize_android(_EGLDriver *drv, _EGLDisplay *dpy)
|
||||
#if ANDROID_API_LEVEL >= 23
|
||||
dpy->Extensions.KHR_partial_update = EGL_TRUE;
|
||||
#endif
|
||||
dpy->Extensions.KHR_image = EGL_TRUE;
|
||||
#if __ANDROID_API__ >= 24
|
||||
if (dri2_dpy->mutable_render_buffer &&
|
||||
dri2_dpy->loader_extensions == droid_image_loader_extensions) {
|
||||
dpy->Extensions.KHR_mutable_render_buffer = EGL_TRUE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Create configs *after* enabling extensions because presence of DRI
|
||||
* driver extensions can affect the capabilities of EGLConfigs.
|
||||
*/
|
||||
if (!droid_add_configs_for_visuals(drv, dpy)) {
|
||||
err = "DRI2: failed to add configs";
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
/* Fill vtbl last to prevent accidentally calling virtual function during
|
||||
* initialization.
|
||||
|
45
src/egl/drivers/dri2/platform_android_gralloc_drm.h
Normal file
45
src/egl/drivers/dri2/platform_android_gralloc_drm.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright 2016 Google Inc. All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifdef HAS_GRALLOC_DRM_HEADERS
|
||||
|
||||
#include <gralloc_drm.h>
|
||||
#include <gralloc_drm_handle.h>
|
||||
|
||||
static inline bool gralloc_supports_gem_names(void) { return true; }
|
||||
|
||||
#else
|
||||
|
||||
#define GRALLOC_MODULE_PERFORM_GET_DRM_FD 0x0FD4DEAD
|
||||
|
||||
static inline int gralloc_drm_get_gem_handle(buffer_handle_t handle)
|
||||
{
|
||||
return 0; /* Not supported, return invalid handle. */
|
||||
}
|
||||
|
||||
static inline bool gralloc_supports_gem_names(void) { return false; }
|
||||
|
||||
#endif
|
@@ -652,6 +652,10 @@ dri2_initialize_drm(_EGLDriver *drv, _EGLDisplay *disp)
|
||||
struct gbm_device *gbm;
|
||||
const char *err;
|
||||
|
||||
/* Not supported yet */
|
||||
if (disp->Options.UseFallback)
|
||||
return EGL_FALSE;
|
||||
|
||||
loader_set_logger(_eglLog);
|
||||
|
||||
dri2_dpy = calloc(1, sizeof *dri2_dpy);
|
||||
|
@@ -504,9 +504,11 @@ _eglCreateExtensionsString(_EGLDisplay *dpy)
|
||||
_EGL_CHECK_EXTENSION(KHR_gl_texture_3D_image);
|
||||
_EGL_CHECK_EXTENSION(KHR_gl_texture_cubemap_image);
|
||||
if (dpy->Extensions.KHR_image_base && dpy->Extensions.KHR_image_pixmap)
|
||||
_eglAppendExtension(&exts, "EGL_KHR_image");
|
||||
dpy->Extensions.KHR_image = EGL_TRUE;
|
||||
_EGL_CHECK_EXTENSION(KHR_image);
|
||||
_EGL_CHECK_EXTENSION(KHR_image_base);
|
||||
_EGL_CHECK_EXTENSION(KHR_image_pixmap);
|
||||
_EGL_CHECK_EXTENSION(KHR_mutable_render_buffer);
|
||||
_EGL_CHECK_EXTENSION(KHR_no_config_context);
|
||||
_EGL_CHECK_EXTENSION(KHR_partial_update);
|
||||
_EGL_CHECK_EXTENSION(KHR_reusable_sync);
|
||||
|
@@ -268,6 +268,7 @@ static const struct {
|
||||
EGLBoolean
|
||||
_eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
|
||||
{
|
||||
_EGLDisplay *disp = conf->Display;
|
||||
EGLint i, attr, val;
|
||||
EGLBoolean valid = EGL_TRUE;
|
||||
|
||||
@@ -331,6 +332,8 @@ _eglValidateConfig(const _EGLConfig *conf, EGLBoolean for_matching)
|
||||
EGL_VG_ALPHA_FORMAT_PRE_BIT |
|
||||
EGL_MULTISAMPLE_RESOLVE_BOX_BIT |
|
||||
EGL_SWAP_BEHAVIOR_PRESERVED_BIT;
|
||||
if (disp->Extensions.KHR_mutable_render_buffer)
|
||||
mask |= EGL_MUTABLE_RENDER_BUFFER_BIT_KHR;
|
||||
break;
|
||||
case EGL_RENDERABLE_TYPE:
|
||||
case EGL_CONFORMANT:
|
||||
|
@@ -579,7 +579,6 @@ _eglInitContext(_EGLContext *ctx, _EGLDisplay *dpy, _EGLConfig *conf,
|
||||
_eglInitResource(&ctx->Resource, sizeof(*ctx), dpy);
|
||||
ctx->ClientAPI = api;
|
||||
ctx->Config = conf;
|
||||
ctx->WindowRenderBuffer = EGL_NONE;
|
||||
ctx->Profile = EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT_KHR;
|
||||
|
||||
ctx->ClientMajorVersion = 1; /* the default, per EGL spec */
|
||||
@@ -611,15 +610,42 @@ static EGLint
|
||||
_eglQueryContextRenderBuffer(_EGLContext *ctx)
|
||||
{
|
||||
_EGLSurface *surf = ctx->DrawSurface;
|
||||
EGLint rb;
|
||||
|
||||
/* From the EGL 1.5 spec:
|
||||
*
|
||||
* - If the context is not bound to a surface, then EGL_NONE will be
|
||||
* returned.
|
||||
*/
|
||||
if (!surf)
|
||||
return EGL_NONE;
|
||||
if (surf->Type == EGL_WINDOW_BIT && ctx->WindowRenderBuffer != EGL_NONE)
|
||||
rb = ctx->WindowRenderBuffer;
|
||||
else
|
||||
rb = surf->RenderBuffer;
|
||||
return rb;
|
||||
|
||||
switch (surf->Type) {
|
||||
default:
|
||||
unreachable("bad EGLSurface type");
|
||||
case EGL_PIXMAP_BIT:
|
||||
/* - If the context is bound to a pixmap surface, then EGL_SINGLE_BUFFER
|
||||
* will be returned.
|
||||
*/
|
||||
return EGL_SINGLE_BUFFER;
|
||||
case EGL_PBUFFER_BIT:
|
||||
/* - If the context is bound to a pbuffer surface, then EGL_BACK_BUFFER
|
||||
* will be returned.
|
||||
*/
|
||||
return EGL_BACK_BUFFER;
|
||||
case EGL_WINDOW_BIT:
|
||||
/* - If the context is bound to a window surface, then either
|
||||
* EGL_BACK_BUFFER or EGL_SINGLE_BUFFER may be returned. The value
|
||||
* returned depends on both the buffer requested by the setting of the
|
||||
* EGL_RENDER_BUFFER property of the surface [...], and on the client
|
||||
* API (not all client APIs support single-buffer Rendering to window
|
||||
* surfaces). Some client APIs allow control of whether rendering goes
|
||||
* to the front or back buffer. This client API-specific choice is not
|
||||
* reflected in the returned value, which only describes the buffer
|
||||
* that will be rendered to by default if not overridden by the client
|
||||
* API.
|
||||
*/
|
||||
return surf->ActiveRenderBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@@ -64,9 +64,6 @@ struct _egl_context
|
||||
EGLint ResetNotificationStrategy;
|
||||
EGLint ContextPriority;
|
||||
EGLBoolean NoError;
|
||||
|
||||
/* The real render buffer when a window surface is bound */
|
||||
EGLint WindowRenderBuffer;
|
||||
};
|
||||
|
||||
|
||||
|
@@ -120,8 +120,10 @@ struct _egl_extensions
|
||||
EGLBoolean KHR_gl_texture_2D_image;
|
||||
EGLBoolean KHR_gl_texture_3D_image;
|
||||
EGLBoolean KHR_gl_texture_cubemap_image;
|
||||
EGLBoolean KHR_image;
|
||||
EGLBoolean KHR_image_base;
|
||||
EGLBoolean KHR_image_pixmap;
|
||||
EGLBoolean KHR_mutable_render_buffer;
|
||||
EGLBoolean KHR_no_config_context;
|
||||
EGLBoolean KHR_partial_update;
|
||||
EGLBoolean KHR_reusable_sync;
|
||||
|
@@ -122,7 +122,13 @@ _eglParseSurfaceAttribList(_EGLSurface *surf, const EGLint *attrib_list)
|
||||
err = EGL_BAD_ATTRIBUTE;
|
||||
break;
|
||||
}
|
||||
surf->RenderBuffer = val;
|
||||
surf->RequestedRenderBuffer = val;
|
||||
if (surf->Config->SurfaceType & EGL_MUTABLE_RENDER_BUFFER_BIT_KHR) {
|
||||
/* Unlike normal EGLSurfaces, one with a mutable render buffer
|
||||
* uses the application-chosen render buffer.
|
||||
*/
|
||||
surf->ActiveRenderBuffer = val;
|
||||
}
|
||||
break;
|
||||
case EGL_POST_SUB_BUFFER_SUPPORTED_NV:
|
||||
if (!dpy->Extensions.NV_post_sub_buffer ||
|
||||
@@ -285,7 +291,8 @@ _eglInitSurface(_EGLSurface *surf, _EGLDisplay *dpy, EGLint type,
|
||||
surf->TextureTarget = EGL_NO_TEXTURE;
|
||||
surf->MipmapTexture = EGL_FALSE;
|
||||
surf->LargestPbuffer = EGL_FALSE;
|
||||
surf->RenderBuffer = renderBuffer;
|
||||
surf->RequestedRenderBuffer = renderBuffer;
|
||||
surf->ActiveRenderBuffer = renderBuffer;
|
||||
surf->VGAlphaFormat = EGL_VG_ALPHA_FORMAT_NONPRE;
|
||||
surf->VGColorspace = EGL_VG_COLORSPACE_sRGB;
|
||||
surf->GLColorspace = EGL_GL_COLORSPACE_LINEAR_KHR;
|
||||
@@ -358,7 +365,35 @@ _eglQuerySurface(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
|
||||
*value = surface->SwapBehavior;
|
||||
break;
|
||||
case EGL_RENDER_BUFFER:
|
||||
*value = surface->RenderBuffer;
|
||||
/* From the EGL_KHR_mutable_render_buffer spec (v12):
|
||||
*
|
||||
* Querying EGL_RENDER_BUFFER returns the buffer which client API
|
||||
* rendering is requested to use. For a window surface, this is the
|
||||
* attribute value specified when the surface was created or last set
|
||||
* via eglSurfaceAttrib.
|
||||
*
|
||||
* In other words, querying a window surface returns the value most
|
||||
* recently *requested* by the user.
|
||||
*
|
||||
* The paragraph continues in the EGL 1.5 spec (2014.08.27):
|
||||
*
|
||||
* For a pbuffer surface, it is always EGL_BACK_BUFFER . For a pixmap
|
||||
* surface, it is always EGL_SINGLE_BUFFER . To determine the actual
|
||||
* buffer being rendered to by a context, call eglQueryContext.
|
||||
*/
|
||||
switch (surface->Type) {
|
||||
default:
|
||||
unreachable("bad EGLSurface type");
|
||||
case EGL_WINDOW_BIT:
|
||||
*value = surface->RequestedRenderBuffer;
|
||||
break;
|
||||
case EGL_PBUFFER_BIT:
|
||||
*value = EGL_BACK_BUFFER;
|
||||
break;
|
||||
case EGL_PIXMAP_BIT:
|
||||
*value = EGL_SINGLE_BUFFER;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case EGL_PIXEL_ASPECT_RATIO:
|
||||
*value = surface->AspectRatio;
|
||||
@@ -450,6 +485,31 @@ _eglSurfaceAttrib(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surface,
|
||||
break;
|
||||
surface->MultisampleResolve = value;
|
||||
break;
|
||||
case EGL_RENDER_BUFFER:
|
||||
if (!dpy->Extensions.KHR_mutable_render_buffer) {
|
||||
err = EGL_BAD_ATTRIBUTE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (value != EGL_BACK_BUFFER && value != EGL_SINGLE_BUFFER) {
|
||||
err = EGL_BAD_PARAMETER;
|
||||
break;
|
||||
}
|
||||
|
||||
/* From the EGL_KHR_mutable_render_buffer spec (v12):
|
||||
*
|
||||
* If attribute is EGL_RENDER_BUFFER, and the EGL_SURFACE_TYPE
|
||||
* attribute of the EGLConfig used to create surface does not contain
|
||||
* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR, [...] an EGL_BAD_MATCH error is
|
||||
* generated [...].
|
||||
*/
|
||||
if (!(surface->Config->SurfaceType & EGL_MUTABLE_RENDER_BUFFER_BIT_KHR)) {
|
||||
err = EGL_BAD_MATCH;
|
||||
break;
|
||||
}
|
||||
|
||||
surface->RequestedRenderBuffer = value;
|
||||
break;
|
||||
case EGL_SWAP_BEHAVIOR:
|
||||
switch (value) {
|
||||
case EGL_BUFFER_DESTROYED:
|
||||
@@ -551,3 +611,18 @@ _eglSwapInterval(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf,
|
||||
{
|
||||
return EGL_TRUE;
|
||||
}
|
||||
|
||||
EGLBoolean
|
||||
_eglSurfaceHasMutableRenderBuffer(_EGLSurface *surf)
|
||||
{
|
||||
return surf->Type == EGL_WINDOW_BIT &&
|
||||
surf->Config &&
|
||||
(surf->Config->SurfaceType & EGL_MUTABLE_RENDER_BUFFER_BIT_KHR);
|
||||
}
|
||||
|
||||
EGLBoolean
|
||||
_eglSurfaceInSharedBufferMode(_EGLSurface *surf)
|
||||
{
|
||||
return _eglSurfaceHasMutableRenderBuffer(surf) &&
|
||||
surf->ActiveRenderBuffer == EGL_SINGLE_BUFFER;
|
||||
}
|
||||
|
@@ -67,7 +67,59 @@ struct _egl_surface
|
||||
EGLenum TextureTarget;
|
||||
EGLBoolean MipmapTexture;
|
||||
EGLBoolean LargestPbuffer;
|
||||
EGLenum RenderBuffer;
|
||||
|
||||
/**
|
||||
* Value of EGL_RENDER_BUFFER selected at creation.
|
||||
*
|
||||
* The user may select, for window surfaces, the EGL_RENDER_BUFFER through
|
||||
* the attribute list of eglCreateWindowSurface(). The EGL spec allows the
|
||||
* implementation to ignore request, though; hence why we maintain both
|
||||
* RequestedRenderBuffer and ActiveRenderBuffer. For pbuffer and pixmap
|
||||
* surfaces, the EGL spec hard-codes the EGL_RENDER_BUFFER value and the
|
||||
* user must not provide it in the attribute list.
|
||||
*
|
||||
* Normally, the attribute is immutable and after surface creation.
|
||||
* However, EGL_KHR_mutable_render_buffer allows the user to change it in
|
||||
* window surfaces via eglSurfaceAttrib, in which case
|
||||
* eglQuerySurface(EGL_RENDER_BUFFER) will immediately afterwards return
|
||||
* the requested value but the actual render buffer used by the context
|
||||
* does not change until completion of the next eglSwapBuffers call.
|
||||
*
|
||||
* From the EGL_KHR_mutable_render_buffer spec (v12):
|
||||
*
|
||||
* Querying EGL_RENDER_BUFFER returns the buffer which client API
|
||||
* rendering is requested to use. For a window surface, this is the
|
||||
* attribute value specified when the surface was created or last set
|
||||
* via eglSurfaceAttrib.
|
||||
*
|
||||
* eglQueryContext(EGL_RENDER_BUFFER) ignores this.
|
||||
*/
|
||||
EGLenum RequestedRenderBuffer;
|
||||
|
||||
/**
|
||||
* The EGL_RENDER_BUFFER in use by the context.
|
||||
*
|
||||
* This is valid only when bound as the draw surface. This may differ from
|
||||
* the RequestedRenderBuffer.
|
||||
*
|
||||
* Refer to eglQueryContext(EGL_RENDER_BUFFER) in the EGL spec.
|
||||
* eglQuerySurface(EGL_RENDER_BUFFER) ignores this.
|
||||
*
|
||||
* If a window surface is bound as the draw surface and has a pending,
|
||||
* user-requested change to EGL_RENDER_BUFFER, then the next eglSwapBuffers
|
||||
* will flush the pending change. (The flush of EGL_RENDER_BUFFER state may
|
||||
* occur without the implicit glFlush induced by eglSwapBuffers). The spec
|
||||
* requires that the flush occur at that time and nowhere else. During the
|
||||
* state-flush, we copy RequestedRenderBuffer to ActiveRenderBuffer.
|
||||
*
|
||||
* From the EGL_KHR_mutable_render_buffer spec (v12):
|
||||
*
|
||||
* If [...] there is a pending change to the EGL_RENDER_BUFFER
|
||||
* attribute, eglSwapBuffers performs an implicit flush operation on the
|
||||
* context and effects the attribute change.
|
||||
*/
|
||||
EGLenum ActiveRenderBuffer;
|
||||
|
||||
EGLenum VGAlphaFormat;
|
||||
EGLenum VGColorspace;
|
||||
EGLenum GLColorspace;
|
||||
@@ -124,6 +176,11 @@ _eglReleaseTexImage(_EGLDriver *drv, _EGLDisplay *disp, _EGLSurface *surf, EGLin
|
||||
extern EGLBoolean
|
||||
_eglSwapInterval(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSurface *surf, EGLint interval);
|
||||
|
||||
extern EGLBoolean
|
||||
_eglSurfaceHasMutableRenderBuffer(_EGLSurface *surf);
|
||||
|
||||
extern EGLBoolean
|
||||
_eglSurfaceInSharedBufferMode(_EGLSurface *surf);
|
||||
|
||||
/**
|
||||
* Increment reference count for the surface.
|
||||
|
@@ -110,7 +110,7 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf)
|
||||
|
||||
if (pipe_reference_described(&(*ptr)->reference, &surf->reference,
|
||||
(debug_reference_descriptor)debug_describe_surface))
|
||||
old_surf->context->surface_destroy(old_surf->context, old_surf);
|
||||
old_surf->surface_destroy(old_surf->context, old_surf);
|
||||
*ptr = surf;
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ pipe_sampler_view_reference(struct pipe_sampler_view **ptr, struct pipe_sampler_
|
||||
|
||||
if (pipe_reference_described(&(*ptr)->reference, &view->reference,
|
||||
(debug_reference_descriptor)debug_describe_sampler_view))
|
||||
old_view->context->sampler_view_destroy(old_view->context, old_view);
|
||||
old_view->sampler_view_destroy(old_view->context, old_view);
|
||||
*ptr = view;
|
||||
}
|
||||
|
||||
|
@@ -44,6 +44,10 @@
|
||||
#include "i915_resource.h"
|
||||
#include "i915_state.h"
|
||||
|
||||
static void
|
||||
i915_sampler_view_destroy(struct pipe_context *pipe,
|
||||
struct pipe_sampler_view *view);
|
||||
|
||||
/* The i915 (and related graphics cores) do not support GL_CLAMP. The
|
||||
* Intel drivers for "other operating systems" implement GL_CLAMP as
|
||||
* GL_CLAMP_TO_EDGE, so the same is done here.
|
||||
@@ -827,6 +831,7 @@ i915_create_sampler_view_custom(struct pipe_context *pipe,
|
||||
view->texture = NULL;
|
||||
pipe_resource_reference(&view->texture, texture);
|
||||
view->context = pipe;
|
||||
view->sampler_view_destroy = i915_sampler_view_destroy;
|
||||
}
|
||||
|
||||
return view;
|
||||
@@ -845,6 +850,7 @@ i915_create_sampler_view(struct pipe_context *pipe,
|
||||
view->texture = NULL;
|
||||
pipe_resource_reference(&view->texture, texture);
|
||||
view->context = pipe;
|
||||
view->sampler_view_destroy = i915_sampler_view_destroy;
|
||||
}
|
||||
|
||||
return view;
|
||||
|
@@ -376,6 +376,7 @@ i915_create_surface_custom(struct pipe_context *ctx,
|
||||
ps->u.tex.first_layer = surf_tmpl->u.tex.first_layer;
|
||||
ps->u.tex.last_layer = surf_tmpl->u.tex.last_layer;
|
||||
ps->context = ctx;
|
||||
ps->surface_destroy = ctx->surface_destroy;
|
||||
}
|
||||
return ps;
|
||||
}
|
||||
|
@@ -40,6 +40,9 @@
|
||||
#include "lp_debug.h"
|
||||
#include "state_tracker/sw_winsys.h"
|
||||
|
||||
static void
|
||||
llvmpipe_sampler_view_destroy(struct pipe_context *pipe,
|
||||
struct pipe_sampler_view *view);
|
||||
|
||||
static void *
|
||||
llvmpipe_create_sampler_state(struct pipe_context *pipe,
|
||||
@@ -183,6 +186,7 @@ llvmpipe_create_sampler_view(struct pipe_context *pipe,
|
||||
view->texture = NULL;
|
||||
pipe_resource_reference(&view->texture, texture);
|
||||
view->context = pipe;
|
||||
view->sampler_view_destroy = llvmpipe_sampler_view_destroy;
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
|
@@ -147,6 +147,7 @@ llvmpipe_create_surface(struct pipe_context *pipe,
|
||||
pipe_resource_reference(&ps->texture, pt);
|
||||
ps->context = pipe;
|
||||
ps->format = surf_tmpl->format;
|
||||
ps->surface_destroy = pipe->surface_destroy;
|
||||
if (llvmpipe_resource_is_texture(pt)) {
|
||||
assert(surf_tmpl->u.tex.level <= pt->last_level);
|
||||
assert(surf_tmpl->u.tex.first_layer <= surf_tmpl->u.tex.last_layer);
|
||||
|
@@ -46,6 +46,10 @@
|
||||
#include "r300_texture.h"
|
||||
#include "r300_vs.h"
|
||||
|
||||
static void
|
||||
r300_sampler_view_destroy(struct pipe_context *pipe,
|
||||
struct pipe_sampler_view *view);
|
||||
|
||||
/* r300_state: Functions used to intialize state context by translating
|
||||
* Gallium state objects into semi-native r300 state objects. */
|
||||
|
||||
@@ -1609,6 +1613,7 @@ r300_create_sampler_view_custom(struct pipe_context *pipe,
|
||||
view->base.reference.count = 1;
|
||||
view->base.context = pipe;
|
||||
view->base.texture = NULL;
|
||||
view->base.sampler_view_destroy = r300_sampler_view_destroy;
|
||||
pipe_resource_reference(&view->base.texture, texture);
|
||||
|
||||
view->width0_override = width0_override;
|
||||
|
@@ -1227,6 +1227,7 @@ struct pipe_surface* r300_create_surface_custom(struct pipe_context * ctx,
|
||||
pipe_reference_init(&surface->base.reference, 1);
|
||||
pipe_resource_reference(&surface->base.texture, texture);
|
||||
surface->base.context = ctx;
|
||||
surface->base.surface_destroy = ctx->surface_destroy;
|
||||
surface->base.format = surf_tmpl->format;
|
||||
surface->base.width = u_minify(width0_override, level);
|
||||
surface->base.height = u_minify(height0_override, level);
|
||||
|
@@ -737,6 +737,8 @@ void r600_sampler_views_dirty(struct r600_context *rctx,
|
||||
struct r600_samplerview_state *state);
|
||||
void r600_sampler_states_dirty(struct r600_context *rctx,
|
||||
struct r600_sampler_states *state);
|
||||
void r600_sampler_view_destroy(struct pipe_context *ctx,
|
||||
struct pipe_sampler_view *state);
|
||||
void r600_constant_buffers_dirty(struct r600_context *rctx, struct r600_constbuf_state *state);
|
||||
void r600_set_sample_locations_constant_buffer(struct r600_context *rctx);
|
||||
uint32_t r600_translate_stencil_op(int s_op);
|
||||
|
@@ -682,6 +682,7 @@ r600_create_sampler_view_custom(struct pipe_context *ctx,
|
||||
view->base.texture = texture;
|
||||
view->base.reference.count = 1;
|
||||
view->base.context = ctx;
|
||||
view->base.sampler_view_destroy = r600_sampler_view_destroy;
|
||||
|
||||
if (texture->target == PIPE_BUFFER)
|
||||
return texture_buffer_sampler_view(view, texture->width0, 1);
|
||||
|
@@ -385,8 +385,8 @@ static void r600_delete_rs_state(struct pipe_context *ctx, void *state)
|
||||
FREE(rs);
|
||||
}
|
||||
|
||||
static void r600_sampler_view_destroy(struct pipe_context *ctx,
|
||||
struct pipe_sampler_view *state)
|
||||
void r600_sampler_view_destroy(struct pipe_context *ctx,
|
||||
struct pipe_sampler_view *state)
|
||||
{
|
||||
struct r600_pipe_sampler_view *view = (struct r600_pipe_sampler_view *)state;
|
||||
|
||||
|
@@ -410,6 +410,8 @@ static int r600_fence_get_fd(struct pipe_screen *screen,
|
||||
|
||||
/* If we don't have FDs at this point, it means we don't have fences
|
||||
* either. */
|
||||
if (sdma_fd == -1 && gfx_fd == -1)
|
||||
return ws->export_signalled_sync_file(ws);
|
||||
if (sdma_fd == -1)
|
||||
return gfx_fd;
|
||||
if (gfx_fd == -1)
|
||||
|
@@ -1928,6 +1928,7 @@ struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,
|
||||
pipe_reference_init(&surface->base.reference, 1);
|
||||
pipe_resource_reference(&surface->base.texture, texture);
|
||||
surface->base.context = pipe;
|
||||
surface->base.surface_destroy = pipe->surface_destroy;
|
||||
surface->base.format = templ->format;
|
||||
surface->base.width = width;
|
||||
surface->base.height = height;
|
||||
|
@@ -609,6 +609,11 @@ struct radeon_winsys {
|
||||
int (*fence_export_sync_file)(struct radeon_winsys *ws,
|
||||
struct pipe_fence_handle *fence);
|
||||
|
||||
/**
|
||||
* Return a sync file FD that is already signalled.
|
||||
*/
|
||||
int (*export_signalled_sync_file)(struct radeon_winsys *ws);
|
||||
|
||||
/**
|
||||
* Initialize surface
|
||||
*
|
||||
|
@@ -3786,6 +3786,15 @@ si_make_texture_descriptor(struct si_screen *screen,
|
||||
}
|
||||
}
|
||||
|
||||
static void si_sampler_view_destroy(struct pipe_context *ctx,
|
||||
struct pipe_sampler_view *state)
|
||||
{
|
||||
struct si_sampler_view *view = (struct si_sampler_view *)state;
|
||||
|
||||
pipe_resource_reference(&state->texture, NULL);
|
||||
FREE(view);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a sampler view.
|
||||
*
|
||||
@@ -3821,6 +3830,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
||||
view->base.texture = NULL;
|
||||
view->base.reference.count = 1;
|
||||
view->base.context = ctx;
|
||||
view->base.sampler_view_destroy = si_sampler_view_destroy;
|
||||
|
||||
assert(texture);
|
||||
pipe_resource_reference(&view->base.texture, texture);
|
||||
@@ -3956,15 +3966,6 @@ si_create_sampler_view(struct pipe_context *ctx,
|
||||
texture ? texture->height0 : 0, 0);
|
||||
}
|
||||
|
||||
static void si_sampler_view_destroy(struct pipe_context *ctx,
|
||||
struct pipe_sampler_view *state)
|
||||
{
|
||||
struct si_sampler_view *view = (struct si_sampler_view *)state;
|
||||
|
||||
pipe_resource_reference(&state->texture, NULL);
|
||||
FREE(view);
|
||||
}
|
||||
|
||||
static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter)
|
||||
{
|
||||
return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
|
||||
|
@@ -3467,6 +3467,7 @@ softpipe_create_sampler_view(struct pipe_context *pipe,
|
||||
view->texture = NULL;
|
||||
pipe_resource_reference(&view->texture, resource);
|
||||
view->context = pipe;
|
||||
view->sampler_view_destroy = pipe->sampler_view_destroy;
|
||||
|
||||
#ifdef DEBUG
|
||||
/*
|
||||
|
@@ -300,6 +300,7 @@ softpipe_create_surface(struct pipe_context *pipe,
|
||||
pipe_resource_reference(&ps->texture, pt);
|
||||
ps->context = pipe;
|
||||
ps->format = surf_tmpl->format;
|
||||
ps->surface_destroy = pipe->surface_destroy;
|
||||
if (pt->target != PIPE_BUFFER) {
|
||||
assert(surf_tmpl->u.tex.level <= pt->last_level);
|
||||
ps->width = u_minify(pt->width0, surf_tmpl->u.tex.level);
|
||||
|
@@ -206,6 +206,7 @@ static struct pipe_surface *virgl_create_surface(struct pipe_context *ctx,
|
||||
pipe_reference_init(&surf->base.reference, 1);
|
||||
pipe_resource_reference(&surf->base.texture, resource);
|
||||
surf->base.context = ctx;
|
||||
surf->base.surface_destroy = ctx->surface_destroy;
|
||||
surf->base.format = templ->format;
|
||||
if (resource->target != PIPE_BUFFER) {
|
||||
surf->base.width = u_minify(resource->width0, templ->u.tex.level);
|
||||
@@ -676,6 +677,7 @@ static struct pipe_sampler_view *virgl_create_sampler_view(struct pipe_context *
|
||||
|
||||
grview->base.texture = NULL;
|
||||
grview->base.context = ctx;
|
||||
grview->base.sampler_view_destroy = ctx->sampler_view_destroy;
|
||||
pipe_resource_reference(&grview->base.texture, texture);
|
||||
grview->handle = handle;
|
||||
return &grview->base;
|
||||
|
@@ -76,7 +76,6 @@ virgl_tgsi_transform_instruction(struct tgsi_transform_context *ctx,
|
||||
for (unsigned i = 0; i < inst->Instruction.NumSrcRegs; i++) {
|
||||
if (inst->Src[i].Register.File == TGSI_FILE_CONSTANT &&
|
||||
inst->Src[i].Register.Dimension &&
|
||||
!inst->Src[i].Register.Indirect &&
|
||||
inst->Src[i].Dimension.Index == 0)
|
||||
inst->Src[i].Register.Dimension = 0;
|
||||
}
|
||||
|
@@ -427,6 +427,9 @@ struct pipe_surface
|
||||
uint16_t height; /**< logical height in pixels */
|
||||
|
||||
union pipe_surface_desc u;
|
||||
|
||||
void (*surface_destroy)(struct pipe_context *ctx,
|
||||
struct pipe_surface *);
|
||||
};
|
||||
|
||||
|
||||
@@ -456,6 +459,9 @@ struct pipe_sampler_view
|
||||
unsigned size; /**< size of the readable sub-range in bytes */
|
||||
} buf;
|
||||
} u;
|
||||
|
||||
void (*sampler_view_destroy)(struct pipe_context *ctx,
|
||||
struct pipe_sampler_view *view);
|
||||
};
|
||||
|
||||
|
||||
|
@@ -249,7 +249,7 @@ dri_fill_in_modes(struct dri_screen *screen)
|
||||
depth_buffer_factor, back_buffer_modes,
|
||||
ARRAY_SIZE(back_buffer_modes),
|
||||
msaa_modes, 1,
|
||||
GL_TRUE, !mixed_color_depth);
|
||||
GL_TRUE, !mixed_color_depth, GL_FALSE);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
|
||||
/* Multi-sample configs without an accumulation buffer. */
|
||||
@@ -259,7 +259,7 @@ dri_fill_in_modes(struct dri_screen *screen)
|
||||
depth_buffer_factor, back_buffer_modes,
|
||||
ARRAY_SIZE(back_buffer_modes),
|
||||
msaa_modes+1, num_msaa_modes-1,
|
||||
GL_FALSE, !mixed_color_depth);
|
||||
GL_FALSE, !mixed_color_depth, GL_FALSE);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
}
|
||||
}
|
||||
|
@@ -113,6 +113,28 @@ static int amdgpu_fence_export_sync_file(struct radeon_winsys *rws,
|
||||
return fd;
|
||||
}
|
||||
|
||||
static int amdgpu_export_signalled_sync_file(struct radeon_winsys *rws)
|
||||
{
|
||||
struct amdgpu_winsys *ws = amdgpu_winsys(rws);
|
||||
uint32_t syncobj;
|
||||
int fd = -1;
|
||||
|
||||
int r = amdgpu_cs_create_syncobj2(ws->dev, DRM_SYNCOBJ_CREATE_SIGNALED,
|
||||
&syncobj);
|
||||
if (r) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
r = amdgpu_cs_syncobj_export_sync_file(ws->dev, syncobj, &fd);
|
||||
if (r) {
|
||||
fd = -1;
|
||||
}
|
||||
|
||||
amdgpu_cs_destroy_syncobj(ws->dev, syncobj);
|
||||
return fd;
|
||||
}
|
||||
|
||||
|
||||
static void amdgpu_fence_submitted(struct pipe_fence_handle *fence,
|
||||
uint64_t seq_no,
|
||||
uint64_t *user_fence_cpu_address)
|
||||
@@ -1552,4 +1574,5 @@ void amdgpu_cs_init_functions(struct amdgpu_winsys *ws)
|
||||
ws->base.fence_reference = amdgpu_fence_reference;
|
||||
ws->base.fence_import_sync_file = amdgpu_fence_import_sync_file;
|
||||
ws->base.fence_export_sync_file = amdgpu_fence_export_sync_file;
|
||||
ws->base.export_signalled_sync_file = amdgpu_export_signalled_sync_file;
|
||||
}
|
||||
|
@@ -59,20 +59,29 @@
|
||||
#define DEBUG_PRINT(msg, ...)
|
||||
#endif
|
||||
|
||||
struct kms_sw_displaytarget;
|
||||
|
||||
struct kms_sw_plane {
|
||||
unsigned width;
|
||||
unsigned height;
|
||||
unsigned stride;
|
||||
unsigned offset;
|
||||
struct kms_sw_displaytarget* dt;
|
||||
struct list_head link;
|
||||
};
|
||||
|
||||
struct kms_sw_displaytarget
|
||||
{
|
||||
enum pipe_format format;
|
||||
unsigned width;
|
||||
unsigned height;
|
||||
unsigned stride;
|
||||
unsigned size;
|
||||
|
||||
uint32_t handle;
|
||||
void *mapped;
|
||||
void *ro_mapped;
|
||||
|
||||
int ref_count;
|
||||
struct list_head link;
|
||||
struct list_head planes;
|
||||
};
|
||||
|
||||
struct kms_sw_winsys
|
||||
@@ -83,10 +92,10 @@ struct kms_sw_winsys
|
||||
struct list_head bo_list;
|
||||
};
|
||||
|
||||
static inline struct kms_sw_displaytarget *
|
||||
kms_sw_displaytarget( struct sw_displaytarget *dt )
|
||||
static inline struct kms_sw_plane *
|
||||
kms_sw_plane( struct sw_displaytarget *dt )
|
||||
{
|
||||
return (struct kms_sw_displaytarget *)dt;
|
||||
return (struct kms_sw_plane *)dt;
|
||||
}
|
||||
|
||||
static inline struct kms_sw_winsys *
|
||||
@@ -105,6 +114,42 @@ kms_sw_is_displaytarget_format_supported( struct sw_winsys *ws,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static struct kms_sw_plane *get_plane(struct kms_sw_displaytarget *kms_sw_dt,
|
||||
enum pipe_format format,
|
||||
unsigned width, unsigned height,
|
||||
unsigned stride, unsigned offset) {
|
||||
struct kms_sw_plane * tmp, * plane = NULL;
|
||||
if (offset + util_format_get_2d_size(format, stride, height) >
|
||||
kms_sw_dt->size) {
|
||||
DEBUG_PRINT("KMS-DEBUG: plane too big. format: %d stride: %d height: %d "
|
||||
"offset: %d size:%d\n", format, stride, height, offset,
|
||||
kms_sw_dt->size);
|
||||
return NULL;
|
||||
}
|
||||
LIST_FOR_EACH_ENTRY(tmp, &kms_sw_dt->planes, link) {
|
||||
if (tmp->offset == offset) {
|
||||
plane = tmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (plane) {
|
||||
assert(plane->width == width);
|
||||
assert(plane->height == height);
|
||||
assert(plane->stride == stride);
|
||||
assert(plane->dt == kms_sw_dt);
|
||||
} else {
|
||||
plane = CALLOC_STRUCT(kms_sw_plane);
|
||||
if (plane == NULL) return NULL;
|
||||
plane->width = width;
|
||||
plane->height = height;
|
||||
plane->stride = stride;
|
||||
plane->offset = offset;
|
||||
plane->dt = kms_sw_dt;
|
||||
list_add(&plane->link, &kms_sw_dt->planes);
|
||||
}
|
||||
return plane;
|
||||
}
|
||||
|
||||
static struct sw_displaytarget *
|
||||
kms_sw_displaytarget_create(struct sw_winsys *ws,
|
||||
unsigned tex_usage,
|
||||
@@ -124,11 +169,10 @@ kms_sw_displaytarget_create(struct sw_winsys *ws,
|
||||
if (!kms_sw_dt)
|
||||
goto no_dt;
|
||||
|
||||
list_inithead(&kms_sw_dt->planes);
|
||||
kms_sw_dt->ref_count = 1;
|
||||
|
||||
kms_sw_dt->format = format;
|
||||
kms_sw_dt->width = width;
|
||||
kms_sw_dt->height = height;
|
||||
|
||||
memset(&create_req, 0, sizeof(create_req));
|
||||
create_req.bpp = 32;
|
||||
@@ -138,17 +182,19 @@ kms_sw_displaytarget_create(struct sw_winsys *ws,
|
||||
if (ret)
|
||||
goto free_bo;
|
||||
|
||||
kms_sw_dt->stride = create_req.pitch;
|
||||
kms_sw_dt->size = create_req.size;
|
||||
kms_sw_dt->handle = create_req.handle;
|
||||
struct kms_sw_plane* plane = get_plane(kms_sw_dt, format, width, height,
|
||||
create_req.pitch, 0);
|
||||
if (plane == NULL)
|
||||
goto free_bo;
|
||||
|
||||
list_add(&kms_sw_dt->link, &kms_sw->bo_list);
|
||||
|
||||
DEBUG_PRINT("KMS-DEBUG: created buffer %u (size %u)\n", kms_sw_dt->handle, kms_sw_dt->size);
|
||||
|
||||
*stride = kms_sw_dt->stride;
|
||||
return (struct sw_displaytarget *)kms_sw_dt;
|
||||
|
||||
*stride = create_req.pitch;
|
||||
return (struct sw_displaytarget *) plane;
|
||||
free_bo:
|
||||
memset(&destroy_req, 0, sizeof destroy_req);
|
||||
destroy_req.handle = create_req.handle;
|
||||
@@ -163,13 +209,19 @@ kms_sw_displaytarget_destroy(struct sw_winsys *ws,
|
||||
struct sw_displaytarget *dt)
|
||||
{
|
||||
struct kms_sw_winsys *kms_sw = kms_sw_winsys(ws);
|
||||
struct kms_sw_displaytarget *kms_sw_dt = kms_sw_displaytarget(dt);
|
||||
struct kms_sw_plane *plane = kms_sw_plane(dt);
|
||||
struct kms_sw_displaytarget *kms_sw_dt = plane->dt;
|
||||
struct drm_mode_destroy_dumb destroy_req;
|
||||
|
||||
kms_sw_dt->ref_count --;
|
||||
if (kms_sw_dt->ref_count > 0)
|
||||
return;
|
||||
|
||||
if (kms_sw_dt->ro_mapped)
|
||||
munmap(kms_sw_dt->ro_mapped, kms_sw_dt->size);
|
||||
if (kms_sw_dt->mapped)
|
||||
munmap(kms_sw_dt->mapped, kms_sw_dt->size);
|
||||
|
||||
memset(&destroy_req, 0, sizeof destroy_req);
|
||||
destroy_req.handle = kms_sw_dt->handle;
|
||||
drmIoctl(kms_sw->fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_req);
|
||||
@@ -178,6 +230,10 @@ kms_sw_displaytarget_destroy(struct sw_winsys *ws,
|
||||
|
||||
DEBUG_PRINT("KMS-DEBUG: destroyed buffer %u\n", kms_sw_dt->handle);
|
||||
|
||||
struct kms_sw_plane * tmp;
|
||||
LIST_FOR_EACH_ENTRY_SAFE(plane, tmp, &kms_sw_dt->planes, link) {
|
||||
FREE(plane);
|
||||
}
|
||||
FREE(kms_sw_dt);
|
||||
}
|
||||
|
||||
@@ -187,7 +243,8 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,
|
||||
unsigned flags)
|
||||
{
|
||||
struct kms_sw_winsys *kms_sw = kms_sw_winsys(ws);
|
||||
struct kms_sw_displaytarget *kms_sw_dt = kms_sw_displaytarget(dt);
|
||||
struct kms_sw_plane *plane = kms_sw_plane(dt);
|
||||
struct kms_sw_displaytarget *kms_sw_dt = plane->dt;
|
||||
struct drm_mode_map_dumb map_req;
|
||||
int prot, ret;
|
||||
|
||||
@@ -198,16 +255,20 @@ kms_sw_displaytarget_map(struct sw_winsys *ws,
|
||||
return NULL;
|
||||
|
||||
prot = (flags == PIPE_TRANSFER_READ) ? PROT_READ : (PROT_READ | PROT_WRITE);
|
||||
kms_sw_dt->mapped = mmap(0, kms_sw_dt->size, prot, MAP_SHARED,
|
||||
kms_sw->fd, map_req.offset);
|
||||
void **ptr = (flags == PIPE_TRANSFER_READ) ? &kms_sw_dt->ro_mapped : &kms_sw_dt->mapped;
|
||||
if (*ptr == NULL) {
|
||||
void * tmp = mmap(0, kms_sw_dt->size, prot, MAP_SHARED,
|
||||
kms_sw->fd, map_req.offset);
|
||||
if (tmp == MAP_FAILED)
|
||||
return NULL;
|
||||
*ptr = tmp;
|
||||
}
|
||||
|
||||
if (kms_sw_dt->mapped == MAP_FAILED)
|
||||
return NULL;
|
||||
DEBUG_PRINT("KMS-DEBUG: mapped buffer %u (size %u) at %p %dx%d \n",
|
||||
kms_sw_dt->handle, kms_sw_dt->size, *ptr,
|
||||
plane->width, plane->height);
|
||||
|
||||
DEBUG_PRINT("KMS-DEBUG: mapped buffer %u (size %u) at %p\n",
|
||||
kms_sw_dt->handle, kms_sw_dt->size, kms_sw_dt->mapped);
|
||||
|
||||
return kms_sw_dt->mapped;
|
||||
return *ptr + plane->offset;
|
||||
}
|
||||
|
||||
static struct kms_sw_displaytarget *
|
||||
@@ -230,10 +291,11 @@ kms_sw_displaytarget_find_and_ref(struct kms_sw_winsys *kms_sw,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct kms_sw_displaytarget *
|
||||
static struct kms_sw_plane *
|
||||
kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
|
||||
enum pipe_format format,
|
||||
unsigned width, unsigned height,
|
||||
unsigned stride)
|
||||
unsigned stride, unsigned offset)
|
||||
{
|
||||
uint32_t handle = -1;
|
||||
struct kms_sw_displaytarget * kms_sw_dt;
|
||||
@@ -245,13 +307,19 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
|
||||
return NULL;
|
||||
|
||||
kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, handle);
|
||||
if (kms_sw_dt)
|
||||
return kms_sw_dt;
|
||||
struct kms_sw_plane * plane = NULL;
|
||||
if (kms_sw_dt) {
|
||||
plane = get_plane(kms_sw_dt, format, width, height, stride, offset);
|
||||
if (plane == NULL)
|
||||
kms_sw_dt->ref_count --;
|
||||
return plane;
|
||||
}
|
||||
|
||||
kms_sw_dt = CALLOC_STRUCT(kms_sw_displaytarget);
|
||||
if (!kms_sw_dt)
|
||||
return NULL;
|
||||
|
||||
list_inithead(&kms_sw_dt->planes);
|
||||
off_t lseek_ret = lseek(fd, 0, SEEK_END);
|
||||
if (lseek_ret == -1) {
|
||||
FREE(kms_sw_dt);
|
||||
@@ -260,27 +328,27 @@ kms_sw_displaytarget_add_from_prime(struct kms_sw_winsys *kms_sw, int fd,
|
||||
kms_sw_dt->size = lseek_ret;
|
||||
kms_sw_dt->ref_count = 1;
|
||||
kms_sw_dt->handle = handle;
|
||||
kms_sw_dt->width = width;
|
||||
kms_sw_dt->height = height;
|
||||
kms_sw_dt->stride = stride;
|
||||
|
||||
lseek(fd, 0, SEEK_SET);
|
||||
plane = get_plane(kms_sw_dt, format, width, height, stride, offset);
|
||||
if (plane == NULL) {
|
||||
FREE(kms_sw_dt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
list_add(&kms_sw_dt->link, &kms_sw->bo_list);
|
||||
|
||||
return kms_sw_dt;
|
||||
return plane;
|
||||
}
|
||||
|
||||
static void
|
||||
kms_sw_displaytarget_unmap(struct sw_winsys *ws,
|
||||
struct sw_displaytarget *dt)
|
||||
{
|
||||
struct kms_sw_displaytarget *kms_sw_dt = kms_sw_displaytarget(dt);
|
||||
struct kms_sw_plane * plane = kms_sw_plane(dt);
|
||||
struct kms_sw_displaytarget *kms_sw_dt = plane->dt;
|
||||
|
||||
DEBUG_PRINT("KMS-DEBUG: unmapped buffer %u (was %p)\n", kms_sw_dt->handle, kms_sw_dt->mapped);
|
||||
|
||||
munmap(kms_sw_dt->mapped, kms_sw_dt->size);
|
||||
kms_sw_dt->mapped = NULL;
|
||||
DEBUG_PRINT("KMS-DEBUG: ignore unmap buffer %u \n", kms_sw_dt->handle);
|
||||
}
|
||||
|
||||
static struct sw_displaytarget *
|
||||
@@ -291,30 +359,34 @@ kms_sw_displaytarget_from_handle(struct sw_winsys *ws,
|
||||
{
|
||||
struct kms_sw_winsys *kms_sw = kms_sw_winsys(ws);
|
||||
struct kms_sw_displaytarget *kms_sw_dt;
|
||||
struct kms_sw_plane *kms_sw_pl;
|
||||
|
||||
assert(whandle->type == DRM_API_HANDLE_TYPE_KMS ||
|
||||
whandle->type == DRM_API_HANDLE_TYPE_FD);
|
||||
|
||||
if (whandle->offset != 0) {
|
||||
DEBUG_PRINT("KMS-DEBUG: attempt to import unsupported winsys offset %d\n",
|
||||
whandle->offset);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
switch(whandle->type) {
|
||||
case DRM_API_HANDLE_TYPE_FD:
|
||||
kms_sw_dt = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle,
|
||||
kms_sw_pl = kms_sw_displaytarget_add_from_prime(kms_sw, whandle->handle,
|
||||
templ->format,
|
||||
templ->width0,
|
||||
templ->height0,
|
||||
whandle->stride);
|
||||
if (kms_sw_dt)
|
||||
*stride = kms_sw_dt->stride;
|
||||
return (struct sw_displaytarget *)kms_sw_dt;
|
||||
whandle->stride,
|
||||
whandle->offset);
|
||||
if (kms_sw_pl) {
|
||||
*stride = kms_sw_pl->stride;
|
||||
}
|
||||
return (struct sw_displaytarget *)kms_sw_pl;
|
||||
case DRM_API_HANDLE_TYPE_KMS:
|
||||
kms_sw_dt = kms_sw_displaytarget_find_and_ref(kms_sw, whandle->handle);
|
||||
if (kms_sw_dt) {
|
||||
*stride = kms_sw_dt->stride;
|
||||
return (struct sw_displaytarget *)kms_sw_dt;
|
||||
struct kms_sw_plane * plane;
|
||||
LIST_FOR_EACH_ENTRY(plane, &kms_sw_dt->planes, link) {
|
||||
if (whandle->offset == plane->offset) {
|
||||
*stride = plane->stride;
|
||||
return (struct sw_displaytarget *)plane;
|
||||
}
|
||||
}
|
||||
kms_sw_dt->ref_count --;
|
||||
}
|
||||
/* fallthrough */
|
||||
default:
|
||||
@@ -331,19 +403,20 @@ kms_sw_displaytarget_get_handle(struct sw_winsys *winsys,
|
||||
struct winsys_handle *whandle)
|
||||
{
|
||||
struct kms_sw_winsys *kms_sw = kms_sw_winsys(winsys);
|
||||
struct kms_sw_displaytarget *kms_sw_dt = kms_sw_displaytarget(dt);
|
||||
struct kms_sw_plane *plane = kms_sw_plane(dt);
|
||||
struct kms_sw_displaytarget *kms_sw_dt = plane->dt;
|
||||
|
||||
switch(whandle->type) {
|
||||
case DRM_API_HANDLE_TYPE_KMS:
|
||||
whandle->handle = kms_sw_dt->handle;
|
||||
whandle->stride = kms_sw_dt->stride;
|
||||
whandle->offset = 0;
|
||||
whandle->stride = plane->stride;
|
||||
whandle->offset = plane->offset;
|
||||
return TRUE;
|
||||
case DRM_API_HANDLE_TYPE_FD:
|
||||
if (!drmPrimeHandleToFD(kms_sw->fd, kms_sw_dt->handle,
|
||||
DRM_CLOEXEC, (int*)&whandle->handle)) {
|
||||
whandle->stride = kms_sw_dt->stride;
|
||||
whandle->offset = 0;
|
||||
whandle->stride = plane->stride;
|
||||
whandle->offset = plane->offset;
|
||||
return TRUE;
|
||||
}
|
||||
/* fallthrough */
|
||||
|
@@ -46,6 +46,7 @@ COMPILER_FILES = \
|
||||
compiler/brw_eu_util.c \
|
||||
compiler/brw_eu_validate.c \
|
||||
compiler/brw_fs_builder.h \
|
||||
compiler/brw_fs_bank_conflicts.cpp \
|
||||
compiler/brw_fs_cmod_propagation.cpp \
|
||||
compiler/brw_fs_combine_constants.cpp \
|
||||
compiler/brw_fs_copy_propagation.cpp \
|
||||
|
@@ -98,6 +98,7 @@ ends_block(const backend_instruction *inst)
|
||||
op == BRW_OPCODE_ELSE ||
|
||||
op == BRW_OPCODE_CONTINUE ||
|
||||
op == BRW_OPCODE_BREAK ||
|
||||
op == BRW_OPCODE_DO ||
|
||||
op == BRW_OPCODE_WHILE;
|
||||
}
|
||||
|
||||
@@ -268,13 +269,57 @@ cfg_t::cfg_t(exec_list *instructions)
|
||||
}
|
||||
|
||||
cur->instructions.push_tail(inst);
|
||||
|
||||
/* Represent divergent execution of the loop as a pair of alternative
|
||||
* edges coming out of the DO instruction: For any physical iteration
|
||||
* of the loop a given logical thread can either start off enabled
|
||||
* (which is represented as the "next" successor), or disabled (if it
|
||||
* has reached a non-uniform exit of the loop during a previous
|
||||
* iteration, which is represented as the "cur_while" successor).
|
||||
*
|
||||
* The disabled edge will be taken by the logical thread anytime we
|
||||
* arrive at the DO instruction through a back-edge coming from a
|
||||
* conditional exit of the loop where divergent control flow started.
|
||||
*
|
||||
* This guarantees that there is a control-flow path from any
|
||||
* divergence point of the loop into the convergence point
|
||||
* (immediately past the WHILE instruction) such that it overlaps the
|
||||
* whole IP region of divergent control flow (potentially the whole
|
||||
* loop) *and* doesn't imply the execution of any instructions part
|
||||
* of the loop (since the corresponding execution mask bit will be
|
||||
* disabled for a diverging thread).
|
||||
*
|
||||
* This way we make sure that any variables that are live throughout
|
||||
* the region of divergence for an inactive logical thread are also
|
||||
* considered to interfere with any other variables assigned by
|
||||
* active logical threads within the same physical region of the
|
||||
* program, since otherwise we would risk cross-channel data
|
||||
* corruption.
|
||||
*/
|
||||
next = new_block();
|
||||
cur->add_successor(mem_ctx, next);
|
||||
cur->add_successor(mem_ctx, cur_while);
|
||||
set_next_block(&cur, next, ip);
|
||||
break;
|
||||
|
||||
case BRW_OPCODE_CONTINUE:
|
||||
cur->instructions.push_tail(inst);
|
||||
|
||||
/* A conditional CONTINUE may start a region of divergent control
|
||||
* flow until the start of the next loop iteration (*not* until the
|
||||
* end of the loop which is why the successor is not the top-level
|
||||
* divergence point at cur_do). The live interval of any variable
|
||||
* extending through a CONTINUE edge is guaranteed to overlap the
|
||||
* whole region of divergent execution, because any variable live-out
|
||||
* at the CONTINUE instruction will also be live-in at the top of the
|
||||
* loop, and therefore also live-out at the bottom-most point of the
|
||||
* loop which is reachable from the top (since a control flow path
|
||||
* exists from a definition of the variable through this CONTINUE
|
||||
* instruction, the top of the loop, the (reachable) bottom of the
|
||||
* loop, the top of the loop again, into a use of the variable).
|
||||
*/
|
||||
assert(cur_do != NULL);
|
||||
cur->add_successor(mem_ctx, cur_do);
|
||||
cur->add_successor(mem_ctx, cur_do->next());
|
||||
|
||||
next = new_block();
|
||||
if (inst->predicate)
|
||||
@@ -286,8 +331,18 @@ cfg_t::cfg_t(exec_list *instructions)
|
||||
case BRW_OPCODE_BREAK:
|
||||
cur->instructions.push_tail(inst);
|
||||
|
||||
assert(cur_while != NULL);
|
||||
cur->add_successor(mem_ctx, cur_while);
|
||||
/* A conditional BREAK instruction may start a region of divergent
|
||||
* control flow until the end of the loop if the condition is
|
||||
* non-uniform, in which case the loop will execute additional
|
||||
* iterations with the present channel disabled. We model this as a
|
||||
* control flow path from the divergence point to the convergence
|
||||
* point that overlaps the whole IP range of the loop and skips over
|
||||
* the execution of any other instructions part of the loop.
|
||||
*
|
||||
* See the DO case for additional explanation.
|
||||
*/
|
||||
assert(cur_do != NULL);
|
||||
cur->add_successor(mem_ctx, cur_do);
|
||||
|
||||
next = new_block();
|
||||
if (inst->predicate)
|
||||
@@ -300,10 +355,18 @@ cfg_t::cfg_t(exec_list *instructions)
|
||||
cur->instructions.push_tail(inst);
|
||||
|
||||
assert(cur_do != NULL && cur_while != NULL);
|
||||
cur->add_successor(mem_ctx, cur_do);
|
||||
|
||||
if (inst->predicate)
|
||||
cur->add_successor(mem_ctx, cur_while);
|
||||
/* A conditional WHILE instruction may start a region of divergent
|
||||
* control flow until the end of the loop, just like the BREAK
|
||||
* instruction. See the BREAK case for more details. OTOH an
|
||||
* unconditional WHILE instruction is non-divergent (just like an
|
||||
* unconditional CONTINUE), and will necessarily lead to the
|
||||
* execution of an additional iteration of the loop for all enabled
|
||||
* channels, so we may skip over the divergence point at the top of
|
||||
* the loop to keep the CFG as unambiguous as possible.
|
||||
*/
|
||||
cur->add_successor(mem_ctx, inst->predicate ? cur_do :
|
||||
cur_do->next());
|
||||
|
||||
set_next_block(&cur, cur_while, ip);
|
||||
|
||||
|
@@ -5961,6 +5961,8 @@ fs_visitor::allocate_registers(bool allow_spilling)
|
||||
if (failed)
|
||||
return;
|
||||
|
||||
opt_bank_conflicts();
|
||||
|
||||
schedule_instructions(SCHEDULE_POST);
|
||||
|
||||
if (last_scratch > 0) {
|
||||
|
@@ -145,6 +145,8 @@ public:
|
||||
exec_list *acp);
|
||||
bool opt_drop_redundant_mov_to_flags();
|
||||
bool opt_register_renaming();
|
||||
bool opt_bank_conflicts();
|
||||
unsigned bank_conflict_cycles(const fs_inst *inst) const;
|
||||
bool register_coalesce();
|
||||
bool compute_to_mrf();
|
||||
bool eliminate_find_live_channel();
|
||||
|
912
src/intel/compiler/brw_fs_bank_conflicts.cpp
Normal file
912
src/intel/compiler/brw_fs_bank_conflicts.cpp
Normal file
@@ -0,0 +1,912 @@
|
||||
/*
|
||||
* Copyright © 2017 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/** @file brw_fs_bank_conflicts.cpp
|
||||
*
|
||||
* This file contains a GRF bank conflict mitigation pass. The pass is
|
||||
* intended to be run after register allocation and works by rearranging the
|
||||
* layout of the GRF space (without altering the semantics of the program) in
|
||||
* a way that minimizes the number of GRF bank conflicts incurred by ternary
|
||||
* instructions.
|
||||
*
|
||||
* Unfortunately there is close to no information about bank conflicts in the
|
||||
* hardware spec, but experimentally on Gen7-Gen9 ternary instructions seem to
|
||||
* incur an average bank conflict penalty of one cycle per SIMD8 op whenever
|
||||
* the second and third source are stored in the same GRF bank (\sa bank_of()
|
||||
* for the exact bank layout) which cannot be fetched during the same cycle by
|
||||
* the EU, unless the EU logic manages to optimize out the read cycle of a
|
||||
* duplicate source register (\sa is_conflict_optimized_out()).
|
||||
*
|
||||
* The asymptotic run-time of the algorithm is dominated by the
|
||||
* shader_conflict_weight_matrix() computation below, which is O(n) on the
|
||||
* number of instructions in the program, however for small and medium-sized
|
||||
* programs the run-time is likely to be dominated by
|
||||
* optimize_reg_permutation() which is O(m^3) on the number of GRF atoms of
|
||||
* the program (\sa partitioning), which is bounded (since the program uses a
|
||||
* bounded number of registers post-regalloc) and of the order of 100. For
|
||||
* that reason optimize_reg_permutation() is vectorized in order to keep the
|
||||
* cubic term within reasonable bounds for m close to its theoretical maximum.
|
||||
*/
|
||||
|
||||
#include "brw_fs.h"
|
||||
#include "brw_cfg.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
|
||||
#include <emmintrin.h>
|
||||
|
||||
/**
|
||||
* Thin layer around vector intrinsics so they can be easily replaced with
|
||||
* e.g. the fall-back scalar path, an implementation with different vector
|
||||
* width or using different SIMD architectures (AVX-512?!).
|
||||
*
|
||||
* This implementation operates on pairs of independent SSE2 integer vectors à
|
||||
* la SIMD16 for somewhat improved throughput. SSE2 is supported by virtually
|
||||
* all platforms that care about bank conflicts, so this path should almost
|
||||
* always be available in practice.
|
||||
*/
|
||||
namespace {
|
||||
/**
|
||||
* SIMD integer vector data type.
|
||||
*/
|
||||
struct vector_type {
|
||||
__m128i v[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* Scalar data type matching the representation of a single component of \p
|
||||
* vector_type.
|
||||
*/
|
||||
typedef int16_t scalar_type;
|
||||
|
||||
/**
|
||||
* Maximum integer value representable as a \p scalar_type.
|
||||
*/
|
||||
const scalar_type max_scalar = INT16_MAX;
|
||||
|
||||
/**
|
||||
* Number of components of a \p vector_type.
|
||||
*/
|
||||
const unsigned vector_width = 2 * sizeof(__m128i) / sizeof(scalar_type);
|
||||
|
||||
/**
|
||||
* Set the i-th component of vector \p v to \p x.
|
||||
*/
|
||||
void
|
||||
set(vector_type &v, unsigned i, scalar_type x)
|
||||
{
|
||||
assert(i < vector_width);
|
||||
memcpy((char *)v.v + i * sizeof(x), &x, sizeof(x));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the i-th component of vector \p v.
|
||||
*/
|
||||
scalar_type
|
||||
get(const vector_type &v, unsigned i)
|
||||
{
|
||||
assert(i < vector_width);
|
||||
scalar_type x;
|
||||
memcpy(&x, (char *)v.v + i * sizeof(x), sizeof(x));
|
||||
return x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add two vectors with saturation.
|
||||
*/
|
||||
vector_type
|
||||
adds(const vector_type &v, const vector_type &w)
|
||||
{
|
||||
const vector_type u = {{
|
||||
_mm_adds_epi16(v.v[0], w.v[0]),
|
||||
_mm_adds_epi16(v.v[1], w.v[1])
|
||||
}};
|
||||
return u;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subtract two vectors with saturation.
|
||||
*/
|
||||
vector_type
|
||||
subs(const vector_type &v, const vector_type &w)
|
||||
{
|
||||
const vector_type u = {{
|
||||
_mm_subs_epi16(v.v[0], w.v[0]),
|
||||
_mm_subs_epi16(v.v[1], w.v[1])
|
||||
}};
|
||||
return u;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the bitwise conjunction of two vectors.
|
||||
*/
|
||||
vector_type
|
||||
mask(const vector_type &v, const vector_type &w)
|
||||
{
|
||||
const vector_type u = {{
|
||||
_mm_and_si128(v.v[0], w.v[0]),
|
||||
_mm_and_si128(v.v[1], w.v[1])
|
||||
}};
|
||||
return u;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduce the components of a vector using saturating addition.
|
||||
*/
|
||||
scalar_type
|
||||
sums(const vector_type &v)
|
||||
{
|
||||
const __m128i v8 = _mm_adds_epi16(v.v[0], v.v[1]);
|
||||
const __m128i v4 = _mm_adds_epi16(v8, _mm_shuffle_epi32(v8, 0x4e));
|
||||
const __m128i v2 = _mm_adds_epi16(v4, _mm_shuffle_epi32(v4, 0xb1));
|
||||
const __m128i v1 = _mm_adds_epi16(v2, _mm_shufflelo_epi16(v2, 0xb1));
|
||||
return _mm_extract_epi16(v1, 0);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/**
|
||||
* Thin layer around vector intrinsics so they can be easily replaced with
|
||||
* e.g. the fall-back scalar path, an implementation with different vector
|
||||
* width or using different SIMD architectures (AVX-512?!).
|
||||
*
|
||||
* This implementation operates on scalar values and doesn't rely on
|
||||
* any vector extensions. This is mainly intended for debugging and
|
||||
* to keep this file building on exotic platforms.
|
||||
*/
|
||||
namespace {
|
||||
/**
|
||||
* SIMD integer vector data type.
|
||||
*/
|
||||
typedef int16_t vector_type;
|
||||
|
||||
/**
|
||||
* Scalar data type matching the representation of a single component of \p
|
||||
* vector_type.
|
||||
*/
|
||||
typedef int16_t scalar_type;
|
||||
|
||||
/**
|
||||
* Maximum integer value representable as a \p scalar_type.
|
||||
*/
|
||||
const scalar_type max_scalar = INT16_MAX;
|
||||
|
||||
/**
|
||||
* Number of components of a \p vector_type.
|
||||
*/
|
||||
const unsigned vector_width = 1;
|
||||
|
||||
/**
|
||||
* Set the i-th component of vector \p v to \p x.
|
||||
*/
|
||||
void
|
||||
set(vector_type &v, unsigned i, scalar_type x)
|
||||
{
|
||||
assert(i < vector_width);
|
||||
v = x;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the i-th component of vector \p v.
|
||||
*/
|
||||
scalar_type
|
||||
get(const vector_type &v, unsigned i)
|
||||
{
|
||||
assert(i < vector_width);
|
||||
return v;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add two vectors with saturation.
|
||||
*/
|
||||
vector_type
|
||||
adds(vector_type v, vector_type w)
|
||||
{
|
||||
return MAX2(INT16_MIN, MIN2(INT16_MAX, int(v) + w));
|
||||
}
|
||||
|
||||
/**
|
||||
* Substract two vectors with saturation.
|
||||
*/
|
||||
vector_type
|
||||
subs(vector_type v, vector_type w)
|
||||
{
|
||||
return MAX2(INT16_MIN, MIN2(INT16_MAX, int(v) - w));
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the bitwise conjunction of two vectors.
|
||||
*/
|
||||
vector_type
|
||||
mask(vector_type v, vector_type w)
|
||||
{
|
||||
return v & w;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reduce the components of a vector using saturating addition.
|
||||
*/
|
||||
scalar_type
|
||||
sums(vector_type v)
|
||||
{
|
||||
return v;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Swap \p x and \p y.
|
||||
*/
|
||||
#define SWAP(x, y) do { \
|
||||
__typeof(y) _swap_tmp = y; \
|
||||
y = x; \
|
||||
x = _swap_tmp; \
|
||||
} while (0)
|
||||
|
||||
namespace {
|
||||
/**
|
||||
* Variable-length vector type intended to represent cycle-count costs for
|
||||
* arbitrary atom-to-bank assignments. It's indexed by a pair of integers
|
||||
* (i, p), where i is an atom index and p in {0, 1} indicates the parity of
|
||||
* the conflict (respectively, whether the cost is incurred whenever the
|
||||
* atoms are assigned the same bank b or opposite-parity banks b and b^1).
|
||||
* \sa shader_conflict_weight_matrix()
|
||||
*/
|
||||
struct weight_vector_type {
|
||||
weight_vector_type() : v(NULL), size(0) {}
|
||||
|
||||
weight_vector_type(unsigned n) :
|
||||
v(new vector_type[DIV_ROUND_UP(n, vector_width)]()),
|
||||
size(n) {}
|
||||
|
||||
weight_vector_type(const weight_vector_type &u) :
|
||||
v(new vector_type[DIV_ROUND_UP(u.size, vector_width)]()),
|
||||
size(u.size)
|
||||
{
|
||||
memcpy(v, u.v,
|
||||
DIV_ROUND_UP(u.size, vector_width) * sizeof(vector_type));
|
||||
}
|
||||
|
||||
~weight_vector_type()
|
||||
{
|
||||
delete[] v;
|
||||
}
|
||||
|
||||
weight_vector_type &
|
||||
operator=(weight_vector_type u)
|
||||
{
|
||||
SWAP(v, u.v);
|
||||
SWAP(size, u.size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
vector_type *v;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
/**
|
||||
* Set the (i, p)-th component of weight vector \p v to \p x.
|
||||
*/
|
||||
void
|
||||
set(weight_vector_type &v, unsigned i, unsigned p, scalar_type x)
|
||||
{
|
||||
set(v.v[(2 * i + p) / vector_width], (2 * i + p) % vector_width, x);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the (i, p)-th component of weight vector \p v.
|
||||
*/
|
||||
scalar_type
|
||||
get(const weight_vector_type &v, unsigned i, unsigned p)
|
||||
{
|
||||
return get(v.v[(2 * i + p) / vector_width], (2 * i + p) % vector_width);
|
||||
}
|
||||
|
||||
/**
|
||||
* Swap the (i, p)-th and (j, q)-th components of weight vector \p v.
|
||||
*/
|
||||
void
|
||||
swap(weight_vector_type &v,
|
||||
unsigned i, unsigned p,
|
||||
unsigned j, unsigned q)
|
||||
{
|
||||
const scalar_type tmp = get(v, i, p);
|
||||
set(v, i, p, get(v, j, q));
|
||||
set(v, j, q, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
/**
|
||||
* Object that represents the partitioning of an arbitrary register space
|
||||
* into indivisible units (referred to as atoms below) that can potentially
|
||||
* be rearranged independently from other registers. The partitioning is
|
||||
* inferred from a number of contiguity requirements specified using
|
||||
* require_contiguous(). This allows efficient look-up of the atom index a
|
||||
* given register address belongs to, or conversely the range of register
|
||||
* addresses that belong to a given atom.
|
||||
*/
|
||||
struct partitioning {
|
||||
/**
|
||||
* Create a (for the moment unrestricted) partitioning of a register
|
||||
* file of size \p n. The units are arbitrary.
|
||||
*/
|
||||
partitioning(unsigned n) :
|
||||
max_reg(n),
|
||||
offsets(new unsigned[n + num_terminator_atoms]),
|
||||
atoms(new unsigned[n + num_terminator_atoms])
|
||||
{
|
||||
for (unsigned i = 0; i < n + num_terminator_atoms; i++) {
|
||||
offsets[i] = i;
|
||||
atoms[i] = i;
|
||||
}
|
||||
}
|
||||
|
||||
partitioning(const partitioning &p) :
|
||||
max_reg(p.max_reg),
|
||||
offsets(new unsigned[p.num_atoms() + num_terminator_atoms]),
|
||||
atoms(new unsigned[p.max_reg + num_terminator_atoms])
|
||||
{
|
||||
memcpy(offsets, p.offsets,
|
||||
sizeof(unsigned) * (p.num_atoms() + num_terminator_atoms));
|
||||
memcpy(atoms, p.atoms,
|
||||
sizeof(unsigned) * (p.max_reg + num_terminator_atoms));
|
||||
}
|
||||
|
||||
~partitioning()
|
||||
{
|
||||
delete[] offsets;
|
||||
delete[] atoms;
|
||||
}
|
||||
|
||||
partitioning &
|
||||
operator=(partitioning p)
|
||||
{
|
||||
SWAP(max_reg, p.max_reg);
|
||||
SWAP(offsets, p.offsets);
|
||||
SWAP(atoms, p.atoms);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Require register range [reg, reg + n[ to be considered part of the
|
||||
* same atom.
|
||||
*/
|
||||
void
|
||||
require_contiguous(unsigned reg, unsigned n)
|
||||
{
|
||||
unsigned r = atoms[reg];
|
||||
|
||||
/* Renumber atoms[reg...] = { r... } and their offsets[r...] for the
|
||||
* case that the specified contiguity requirement leads to the fusion
|
||||
* (yay) of one or more existing atoms.
|
||||
*/
|
||||
for (unsigned reg1 = reg + 1; reg1 <= max_reg; reg1++) {
|
||||
if (offsets[atoms[reg1]] < reg + n) {
|
||||
atoms[reg1] = r;
|
||||
} else {
|
||||
if (offsets[atoms[reg1 - 1]] != offsets[atoms[reg1]])
|
||||
r++;
|
||||
|
||||
offsets[r] = offsets[atoms[reg1]];
|
||||
atoms[reg1] = r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the atom index register address \p reg belongs to.
|
||||
*/
|
||||
unsigned
|
||||
atom_of_reg(unsigned reg) const
|
||||
{
|
||||
return atoms[reg];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the base register address that belongs to atom \p r.
|
||||
*/
|
||||
unsigned
|
||||
reg_of_atom(unsigned r) const
|
||||
{
|
||||
return offsets[r];
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the size of atom \p r in register address units.
|
||||
*/
|
||||
unsigned
|
||||
size_of_atom(unsigned r) const
|
||||
{
|
||||
assert(r < num_atoms());
|
||||
return reg_of_atom(r + 1) - reg_of_atom(r);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of atoms the whole register space is partitioned into.
|
||||
*/
|
||||
unsigned
|
||||
num_atoms() const
|
||||
{
|
||||
return atoms[max_reg];
|
||||
}
|
||||
|
||||
private:
|
||||
/**
|
||||
* Number of trailing atoms inserted for convenience so among other
|
||||
* things we don't need to special-case the last element in
|
||||
* size_of_atom().
|
||||
*/
|
||||
static const unsigned num_terminator_atoms = 1;
|
||||
unsigned max_reg;
|
||||
unsigned *offsets;
|
||||
unsigned *atoms;
|
||||
};
|
||||
|
||||
/**
|
||||
* Only GRF sources (whether they have been register-allocated or not) can
|
||||
* possibly incur bank conflicts.
|
||||
*/
|
||||
bool
|
||||
is_grf(const fs_reg &r)
|
||||
{
|
||||
return r.file == VGRF || r.file == FIXED_GRF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register offset of \p r in GRF units. Useful because the representation
|
||||
* of GRFs post-register allocation is somewhat inconsistent and depends on
|
||||
* whether the register already had a fixed GRF offset prior to register
|
||||
* allocation or whether it was part of a VGRF allocation.
|
||||
*/
|
||||
unsigned
|
||||
reg_of(const fs_reg &r)
|
||||
{
|
||||
assert(is_grf(r));
|
||||
if (r.file == VGRF)
|
||||
return r.nr + r.offset / REG_SIZE;
|
||||
else
|
||||
return reg_offset(r) / REG_SIZE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the finest partitioning of the GRF space compatible with the
|
||||
* register contiguity requirements derived from all instructions part of
|
||||
* the program.
|
||||
*/
|
||||
partitioning
|
||||
shader_reg_partitioning(const fs_visitor *v)
|
||||
{
|
||||
partitioning p(BRW_MAX_GRF);
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||
if (is_grf(inst->dst))
|
||||
p.require_contiguous(reg_of(inst->dst), regs_written(inst));
|
||||
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (is_grf(inst->src[i]))
|
||||
p.require_contiguous(reg_of(inst->src[i]), regs_read(inst, i));
|
||||
}
|
||||
}
|
||||
|
||||
return p;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the set of GRF atoms that should be left untouched at their
|
||||
* original location to avoid violating hardware or software assumptions.
|
||||
*/
|
||||
bool *
|
||||
shader_reg_constraints(const fs_visitor *v, const partitioning &p)
|
||||
{
|
||||
bool *constrained = new bool[p.num_atoms()]();
|
||||
|
||||
/* These are read implicitly by some send-message instructions without
|
||||
* any indication at the IR level. Assume they are unsafe to move
|
||||
* around.
|
||||
*/
|
||||
for (unsigned reg = 0; reg < 2; reg++)
|
||||
constrained[p.atom_of_reg(reg)] = true;
|
||||
|
||||
/* Assume that anything referenced via fixed GRFs is baked into the
|
||||
* hardware's fixed-function logic and may be unsafe to move around.
|
||||
* Also take into account the source GRF restrictions of EOT
|
||||
* send-message instructions.
|
||||
*/
|
||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||
if (inst->dst.file == FIXED_GRF)
|
||||
constrained[p.atom_of_reg(reg_of(inst->dst))] = true;
|
||||
|
||||
for (int i = 0; i < inst->sources; i++) {
|
||||
if (inst->src[i].file == FIXED_GRF ||
|
||||
(is_grf(inst->src[i]) && inst->eot))
|
||||
constrained[p.atom_of_reg(reg_of(inst->src[i]))] = true;
|
||||
}
|
||||
}
|
||||
|
||||
return constrained;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return whether the hardware will be able to prevent a bank conflict by
|
||||
* optimizing out the read cycle of a source register. The formula was
|
||||
* found experimentally.
|
||||
*/
|
||||
bool
|
||||
is_conflict_optimized_out(const gen_device_info *devinfo, const fs_inst *inst)
|
||||
{
|
||||
return devinfo->gen >= 9 &&
|
||||
((is_grf(inst->src[0]) && (reg_of(inst->src[0]) == reg_of(inst->src[1]) ||
|
||||
reg_of(inst->src[0]) == reg_of(inst->src[2]))) ||
|
||||
reg_of(inst->src[1]) == reg_of(inst->src[2]));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a matrix that allows reasonably efficient computation of the
|
||||
* cycle-count cost of bank conflicts incurred throughout the whole program
|
||||
* for any given atom-to-bank assignment.
|
||||
*
|
||||
* More precisely, if C_r_s_p is the result of this function, the total
|
||||
* cost of all bank conflicts involving any given atom r can be readily
|
||||
* recovered as follows:
|
||||
*
|
||||
* S(B) = Sum_s_p(d_(p^B_r)_(B_s) * C_r_s_p)
|
||||
*
|
||||
* where d_i_j is the Kronecker delta, and B_r indicates the bank
|
||||
* assignment of r. \sa delta_conflicts() for a vectorized implementation
|
||||
* of the expression above.
|
||||
*
|
||||
* FINISHME: Teach this about the Gen10+ bank conflict rules, which are
|
||||
* somewhat more relaxed than on previous generations. In the
|
||||
* meantime optimizing based on Gen9 weights is likely to be more
|
||||
* helpful than not optimizing at all.
|
||||
*/
|
||||
weight_vector_type *
|
||||
shader_conflict_weight_matrix(const fs_visitor *v, const partitioning &p)
|
||||
{
|
||||
weight_vector_type *conflicts = new weight_vector_type[p.num_atoms()];
|
||||
for (unsigned r = 0; r < p.num_atoms(); r++)
|
||||
conflicts[r] = weight_vector_type(2 * p.num_atoms());
|
||||
|
||||
/* Crude approximation of the number of times the current basic block
|
||||
* will be executed at run-time.
|
||||
*/
|
||||
unsigned block_scale = 1;
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, v->cfg) {
|
||||
if (inst->opcode == BRW_OPCODE_DO) {
|
||||
block_scale *= 10;
|
||||
|
||||
} else if (inst->opcode == BRW_OPCODE_WHILE) {
|
||||
block_scale /= 10;
|
||||
|
||||
} else if (inst->is_3src(v->devinfo) &&
|
||||
is_grf(inst->src[1]) && is_grf(inst->src[2])) {
|
||||
const unsigned r = p.atom_of_reg(reg_of(inst->src[1]));
|
||||
const unsigned s = p.atom_of_reg(reg_of(inst->src[2]));
|
||||
|
||||
/* Estimate of the cycle-count cost of incurring a bank conflict
|
||||
* for this instruction. This is only true on the average, for a
|
||||
* sequence of back-to-back ternary instructions, since the EU
|
||||
* front-end only seems to be able to issue a new instruction at
|
||||
* an even cycle. The cost of a bank conflict incurred by an
|
||||
* isolated ternary instruction may be higher.
|
||||
*/
|
||||
const unsigned exec_size = inst->dst.component_size(inst->exec_size);
|
||||
const unsigned cycle_scale = block_scale * DIV_ROUND_UP(exec_size,
|
||||
REG_SIZE);
|
||||
|
||||
/* Neglect same-atom conflicts (since they're either trivial or
|
||||
* impossible to avoid without splitting the atom), and conflicts
|
||||
* known to be optimized out by the hardware.
|
||||
*/
|
||||
if (r != s && !is_conflict_optimized_out(v->devinfo, inst)) {
|
||||
/* Calculate the parity of the sources relative to the start of
|
||||
* their respective atoms. If their parity is the same (and
|
||||
* none of the atoms straddle the 2KB mark), the instruction
|
||||
* will incur a conflict iff both atoms are assigned the same
|
||||
* bank b. If their parity is opposite, the instruction will
|
||||
* incur a conflict iff they are assigned opposite banks (b and
|
||||
* b^1).
|
||||
*/
|
||||
const bool p_r = 1 & (reg_of(inst->src[1]) - p.reg_of_atom(r));
|
||||
const bool p_s = 1 & (reg_of(inst->src[2]) - p.reg_of_atom(s));
|
||||
const unsigned p = p_r ^ p_s;
|
||||
|
||||
/* Calculate the updated cost of a hypothetical conflict
|
||||
* between atoms r and s. Note that the weight matrix is
|
||||
* symmetric with respect to indices r and s by construction.
|
||||
*/
|
||||
const scalar_type w = MIN2(unsigned(max_scalar),
|
||||
get(conflicts[r], s, p) + cycle_scale);
|
||||
set(conflicts[r], s, p, w);
|
||||
set(conflicts[s], r, p, w);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return conflicts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the set of GRF atoms that could potentially lead to bank
|
||||
* conflicts if laid out unfavorably in the GRF space according to
|
||||
* the specified \p conflicts matrix (\sa
|
||||
* shader_conflict_weight_matrix()).
|
||||
*/
|
||||
bool *
|
||||
have_any_conflicts(const partitioning &p,
|
||||
const weight_vector_type *conflicts)
|
||||
{
|
||||
bool *any_conflicts = new bool[p.num_atoms()]();
|
||||
|
||||
for (unsigned r = 0; r < p.num_atoms(); r++) {
|
||||
const unsigned m = DIV_ROUND_UP(conflicts[r].size, vector_width);
|
||||
for (unsigned s = 0; s < m; s++)
|
||||
any_conflicts[r] |= sums(conflicts[r].v[s]);
|
||||
}
|
||||
|
||||
return any_conflicts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the difference between two S(B) cost estimates as defined
|
||||
* above (\sa shader_conflict_weight_matrix()). This represents the
|
||||
* (partial) cycle-count benefit from moving an atom r from bank p to n.
|
||||
* The respective bank assignments Bp and Bn are encoded as the \p
|
||||
* bank_mask_p and \p bank_mask_n bitmasks for efficient computation,
|
||||
* according to the formula:
|
||||
*
|
||||
* bank_mask(B)_s_p = -d_(p^B_r)_(B_s)
|
||||
*
|
||||
* Notice the similarity with the delta function in the S(B) expression
|
||||
* above, and how bank_mask(B) can be precomputed for every possible
|
||||
* selection of r since bank_mask(B) only depends on it via B_r that may
|
||||
* only assume one of four different values, so the caller can keep every
|
||||
* possible bank_mask(B) vector in memory without much hassle (\sa
|
||||
* bank_characteristics()).
|
||||
*/
|
||||
int
|
||||
delta_conflicts(const weight_vector_type &bank_mask_p,
|
||||
const weight_vector_type &bank_mask_n,
|
||||
const weight_vector_type &conflicts)
|
||||
{
|
||||
const unsigned m = DIV_ROUND_UP(conflicts.size, vector_width);
|
||||
vector_type s_p = {}, s_n = {};
|
||||
|
||||
for (unsigned r = 0; r < m; r++) {
|
||||
s_p = adds(s_p, mask(bank_mask_p.v[r], conflicts.v[r]));
|
||||
s_n = adds(s_n, mask(bank_mask_n.v[r], conflicts.v[r]));
|
||||
}
|
||||
|
||||
return sums(subs(s_p, s_n));
|
||||
}
|
||||
|
||||
/**
|
||||
* Register atom permutation, represented as the start GRF offset each atom
|
||||
* is mapped into.
|
||||
*/
|
||||
struct permutation {
|
||||
permutation() : v(NULL), size(0) {}
|
||||
|
||||
permutation(unsigned n) :
|
||||
v(new unsigned[n]()), size(n) {}
|
||||
|
||||
permutation(const permutation &p) :
|
||||
v(new unsigned[p.size]), size(p.size)
|
||||
{
|
||||
memcpy(v, p.v, p.size * sizeof(unsigned));
|
||||
}
|
||||
|
||||
~permutation()
|
||||
{
|
||||
delete[] v;
|
||||
}
|
||||
|
||||
permutation &
|
||||
operator=(permutation p)
|
||||
{
|
||||
SWAP(v, p.v);
|
||||
SWAP(size, p.size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
unsigned *v;
|
||||
unsigned size;
|
||||
};
|
||||
|
||||
/**
|
||||
* Return an identity permutation of GRF atoms.
|
||||
*/
|
||||
permutation
|
||||
identity_reg_permutation(const partitioning &p)
|
||||
{
|
||||
permutation map(p.num_atoms());
|
||||
|
||||
for (unsigned r = 0; r < map.size; r++)
|
||||
map.v[r] = p.reg_of_atom(r);
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the bank index of GRF address \p reg, numbered according to the
|
||||
* table:
|
||||
* Even Odd
|
||||
* Lo 0 1
|
||||
* Hi 2 3
|
||||
*/
|
||||
unsigned
|
||||
bank_of(unsigned reg)
|
||||
{
|
||||
return (reg & 0x40) >> 5 | (reg & 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return bitmasks suitable for use as bank mask arguments for the
|
||||
* delta_conflicts() computation. Note that this is just the (negative)
|
||||
* characteristic function of each bank, if you regard it as a set
|
||||
* containing all atoms assigned to it according to the \p map array.
|
||||
*/
|
||||
weight_vector_type *
|
||||
bank_characteristics(const permutation &map)
|
||||
{
|
||||
weight_vector_type *banks = new weight_vector_type[4];
|
||||
|
||||
for (unsigned b = 0; b < 4; b++) {
|
||||
banks[b] = weight_vector_type(2 * map.size);
|
||||
|
||||
for (unsigned j = 0; j < map.size; j++) {
|
||||
for (unsigned p = 0; p < 2; p++)
|
||||
set(banks[b], j, p,
|
||||
(b ^ p) == bank_of(map.v[j]) ? -1 : 0);
|
||||
}
|
||||
}
|
||||
|
||||
return banks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an improved permutation of GRF atoms based on \p map attempting
|
||||
* to reduce the total cycle-count cost of bank conflicts greedily.
|
||||
*
|
||||
* Note that this doesn't attempt to merge multiple atoms into one, which
|
||||
* may allow it to do a better job in some cases -- It simply reorders
|
||||
* existing atoms in the GRF space without affecting their identity.
|
||||
*/
|
||||
permutation
|
||||
optimize_reg_permutation(const partitioning &p,
|
||||
const bool *constrained,
|
||||
const weight_vector_type *conflicts,
|
||||
permutation map)
|
||||
{
|
||||
const bool *any_conflicts = have_any_conflicts(p, conflicts);
|
||||
weight_vector_type *banks = bank_characteristics(map);
|
||||
|
||||
for (unsigned r = 0; r < map.size; r++) {
|
||||
const unsigned bank_r = bank_of(map.v[r]);
|
||||
|
||||
if (!constrained[r]) {
|
||||
unsigned best_s = r;
|
||||
int best_benefit = 0;
|
||||
|
||||
for (unsigned s = 0; s < map.size; s++) {
|
||||
const unsigned bank_s = bank_of(map.v[s]);
|
||||
|
||||
if (bank_r != bank_s && !constrained[s] &&
|
||||
p.size_of_atom(r) == p.size_of_atom(s) &&
|
||||
(any_conflicts[r] || any_conflicts[s])) {
|
||||
const int benefit =
|
||||
delta_conflicts(banks[bank_r], banks[bank_s], conflicts[r]) +
|
||||
delta_conflicts(banks[bank_s], banks[bank_r], conflicts[s]);
|
||||
|
||||
if (benefit > best_benefit) {
|
||||
best_s = s;
|
||||
best_benefit = benefit;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (best_s != r) {
|
||||
for (unsigned b = 0; b < 4; b++) {
|
||||
for (unsigned p = 0; p < 2; p++)
|
||||
swap(banks[b], r, p, best_s, p);
|
||||
}
|
||||
|
||||
SWAP(map.v[r], map.v[best_s]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete[] banks;
|
||||
delete[] any_conflicts;
|
||||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* Apply the GRF atom permutation given by \p map to register \p r and
|
||||
* return the result.
|
||||
*/
|
||||
fs_reg
|
||||
transform(const partitioning &p, const permutation &map, fs_reg r)
|
||||
{
|
||||
if (r.file == VGRF) {
|
||||
const unsigned reg = reg_of(r);
|
||||
const unsigned s = p.atom_of_reg(reg);
|
||||
r.nr = map.v[s] + reg - p.reg_of_atom(s);
|
||||
r.offset = r.offset % REG_SIZE;
|
||||
}
|
||||
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::opt_bank_conflicts()
|
||||
{
|
||||
assert(grf_used || !"Must be called after register allocation");
|
||||
|
||||
/* No ternary instructions -- No bank conflicts. */
|
||||
if (devinfo->gen < 6)
|
||||
return false;
|
||||
|
||||
const partitioning p = shader_reg_partitioning(this);
|
||||
const bool *constrained = shader_reg_constraints(this, p);
|
||||
const weight_vector_type *conflicts =
|
||||
shader_conflict_weight_matrix(this, p);
|
||||
const permutation map =
|
||||
optimize_reg_permutation(p, constrained, conflicts,
|
||||
identity_reg_permutation(p));
|
||||
|
||||
foreach_block_and_inst(block, fs_inst, inst, cfg) {
|
||||
inst->dst = transform(p, map, inst->dst);
|
||||
|
||||
for (int i = 0; i < inst->sources; i++)
|
||||
inst->src[i] = transform(p, map, inst->src[i]);
|
||||
}
|
||||
|
||||
delete[] conflicts;
|
||||
delete[] constrained;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate the number of GRF bank conflict cycles incurred by an instruction.
|
||||
*
|
||||
* Note that this neglects conflict cycles prior to register allocation
|
||||
* because we don't know which bank each VGRF is going to end up aligned to.
|
||||
*/
|
||||
unsigned
|
||||
fs_visitor::bank_conflict_cycles(const fs_inst *inst) const
|
||||
{
|
||||
if (grf_used && inst->is_3src(devinfo) &&
|
||||
is_grf(inst->src[1]) && is_grf(inst->src[2]) &&
|
||||
bank_of(reg_of(inst->src[1])) == bank_of(reg_of(inst->src[2])) &&
|
||||
!is_conflict_optimized_out(devinfo, inst)) {
|
||||
return DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE);
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
@@ -36,9 +36,12 @@
|
||||
|
||||
#include "util/bitset.h"
|
||||
#include "brw_fs.h"
|
||||
#include "brw_fs_live_variables.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "brw_eu.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
namespace { /* avoid conflict with opt_copy_propagation_elements */
|
||||
struct acp_entry : public exec_node {
|
||||
fs_reg dst;
|
||||
@@ -77,12 +80,19 @@ struct block_data {
|
||||
* course of this block.
|
||||
*/
|
||||
BITSET_WORD *kill;
|
||||
|
||||
/**
|
||||
* Which entries in the fs_copy_prop_dataflow acp table are guaranteed to
|
||||
* have a fully uninitialized destination at the end of this block.
|
||||
*/
|
||||
BITSET_WORD *undef;
|
||||
};
|
||||
|
||||
class fs_copy_prop_dataflow
|
||||
{
|
||||
public:
|
||||
fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
|
||||
const fs_live_variables *live,
|
||||
exec_list *out_acp[ACP_HASH_SIZE]);
|
||||
|
||||
void setup_initial_values();
|
||||
@@ -92,6 +102,7 @@ public:
|
||||
|
||||
void *mem_ctx;
|
||||
cfg_t *cfg;
|
||||
const fs_live_variables *live;
|
||||
|
||||
acp_entry **acp;
|
||||
int num_acp;
|
||||
@@ -102,8 +113,9 @@ public:
|
||||
} /* anonymous namespace */
|
||||
|
||||
fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
|
||||
const fs_live_variables *live,
|
||||
exec_list *out_acp[ACP_HASH_SIZE])
|
||||
: mem_ctx(mem_ctx), cfg(cfg)
|
||||
: mem_ctx(mem_ctx), cfg(cfg), live(live)
|
||||
{
|
||||
bd = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);
|
||||
|
||||
@@ -124,6 +136,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
|
||||
bd[block->num].liveout = rzalloc_array(bd, BITSET_WORD, bitset_words);
|
||||
bd[block->num].copy = rzalloc_array(bd, BITSET_WORD, bitset_words);
|
||||
bd[block->num].kill = rzalloc_array(bd, BITSET_WORD, bitset_words);
|
||||
bd[block->num].undef = rzalloc_array(bd, BITSET_WORD, bitset_words);
|
||||
|
||||
for (int i = 0; i < ACP_HASH_SIZE; i++) {
|
||||
foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
|
||||
@@ -173,8 +186,7 @@ fs_copy_prop_dataflow::setup_initial_values()
|
||||
|
||||
/* Populate the initial values for the livein and liveout sets. For the
|
||||
* block at the start of the program, livein = 0 and liveout = copy.
|
||||
* For the others, set liveout to 0 (the empty set) and livein to ~0
|
||||
* (the universal set).
|
||||
* For the others, set liveout and livein to ~0 (the universal set).
|
||||
*/
|
||||
foreach_block (block, cfg) {
|
||||
if (block->parents.is_empty()) {
|
||||
@@ -184,11 +196,23 @@ fs_copy_prop_dataflow::setup_initial_values()
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < bitset_words; i++) {
|
||||
bd[block->num].liveout[i] = 0u;
|
||||
bd[block->num].liveout[i] = ~0u;
|
||||
bd[block->num].livein[i] = ~0u;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Initialize the undef set. */
|
||||
foreach_block (block, cfg) {
|
||||
for (int i = 0; i < num_acp; i++) {
|
||||
BITSET_SET(bd[block->num].undef, i);
|
||||
for (unsigned off = 0; off < acp[i]->size_written; off += REG_SIZE) {
|
||||
if (BITSET_TEST(live->block_data[block->num].defout,
|
||||
live->var_from_reg(byte_offset(acp[i]->dst, off))))
|
||||
BITSET_CLEAR(bd[block->num].undef, i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -203,14 +227,40 @@ fs_copy_prop_dataflow::run()
|
||||
do {
|
||||
progress = false;
|
||||
|
||||
/* Update liveout for all blocks. */
|
||||
foreach_block (block, cfg) {
|
||||
if (block->parents.is_empty())
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < bitset_words; i++) {
|
||||
const BITSET_WORD old_liveout = bd[block->num].liveout[i];
|
||||
BITSET_WORD livein_from_any_block = 0;
|
||||
|
||||
/* Update livein for this block. If a copy is live out of all
|
||||
* parent blocks, it's live coming in to this block.
|
||||
*/
|
||||
bd[block->num].livein[i] = ~0u;
|
||||
foreach_list_typed(bblock_link, parent_link, link, &block->parents) {
|
||||
bblock_t *parent = parent_link->block;
|
||||
/* Consider ACP entries with a known-undefined destination to
|
||||
* be available from the parent. This is valid because we're
|
||||
* free to set the undefined variable equal to the source of
|
||||
* the ACP entry without breaking the application's
|
||||
* expectations, since the variable is undefined.
|
||||
*/
|
||||
bd[block->num].livein[i] &= (bd[parent->num].liveout[i] |
|
||||
bd[parent->num].undef[i]);
|
||||
livein_from_any_block |= bd[parent->num].liveout[i];
|
||||
}
|
||||
|
||||
/* Limit to the set of ACP entries that can possibly be available
|
||||
* at the start of the block, since propagating from a variable
|
||||
* which is guaranteed to be undefined (rather than potentially
|
||||
* undefined for some dynamic control-flow paths) doesn't seem
|
||||
* particularly useful.
|
||||
*/
|
||||
bd[block->num].livein[i] &= livein_from_any_block;
|
||||
|
||||
/* Update liveout for this block. */
|
||||
bd[block->num].liveout[i] =
|
||||
bd[block->num].copy[i] | (bd[block->num].livein[i] &
|
||||
~bd[block->num].kill[i]);
|
||||
@@ -219,27 +269,6 @@ fs_copy_prop_dataflow::run()
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update livein for all blocks. If a copy is live out of all parent
|
||||
* blocks, it's live coming in to this block.
|
||||
*/
|
||||
foreach_block (block, cfg) {
|
||||
if (block->parents.is_empty())
|
||||
continue;
|
||||
|
||||
for (int i = 0; i < bitset_words; i++) {
|
||||
const BITSET_WORD old_livein = bd[block->num].livein[i];
|
||||
|
||||
bd[block->num].livein[i] = ~0u;
|
||||
foreach_list_typed(bblock_link, parent_link, link, &block->parents) {
|
||||
bblock_t *parent = parent_link->block;
|
||||
bd[block->num].livein[i] &= bd[parent->num].liveout[i];
|
||||
}
|
||||
|
||||
if (old_livein != bd[block->num].livein[i])
|
||||
progress = true;
|
||||
}
|
||||
}
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
@@ -830,6 +859,8 @@ fs_visitor::opt_copy_propagation()
|
||||
for (int i = 0; i < cfg->num_blocks; i++)
|
||||
out_acp[i] = new exec_list [ACP_HASH_SIZE];
|
||||
|
||||
calculate_live_intervals();
|
||||
|
||||
/* First, walk through each block doing local copy propagation and getting
|
||||
* the set of copies available at the end of the block.
|
||||
*/
|
||||
@@ -839,7 +870,7 @@ fs_visitor::opt_copy_propagation()
|
||||
}
|
||||
|
||||
/* Do dataflow analysis for those available copies. */
|
||||
fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, out_acp);
|
||||
fs_copy_prop_dataflow dataflow(copy_prop_ctx, cfg, live_intervals, out_acp);
|
||||
|
||||
/* Next, re-run local copy propagation, this time with the set of copies
|
||||
* provided by the dataflow analysis available at the start of a block.
|
||||
|
@@ -83,9 +83,11 @@ fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,
|
||||
/* The def[] bitset marks when an initialization in a block completely
|
||||
* screens off previous updates of that variable (VGRF channel).
|
||||
*/
|
||||
if (inst->dst.file == VGRF && !inst->is_partial_write()) {
|
||||
if (!BITSET_TEST(bd->use, var))
|
||||
if (inst->dst.file == VGRF) {
|
||||
if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))
|
||||
BITSET_SET(bd->def, var);
|
||||
|
||||
BITSET_SET(bd->defout, var);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -199,6 +201,28 @@ fs_live_variables::compute_live_variables()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Propagate defin and defout down the CFG to calculate the union of live
|
||||
* variables potentially defined along any possible control flow path.
|
||||
*/
|
||||
do {
|
||||
cont = false;
|
||||
|
||||
foreach_block (block, cfg) {
|
||||
const struct block_data *bd = &block_data[block->num];
|
||||
|
||||
foreach_list_typed(bblock_link, child_link, link, &block->children) {
|
||||
struct block_data *child_bd = &block_data[child_link->block->num];
|
||||
|
||||
for (int i = 0; i < bitset_words; i++) {
|
||||
const BITSET_WORD new_def = bd->defout[i] & ~child_bd->defin[i];
|
||||
child_bd->defin[i] |= new_def;
|
||||
child_bd->defout[i] |= new_def;
|
||||
cont |= new_def;
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (cont);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -212,12 +236,12 @@ fs_live_variables::compute_start_end()
|
||||
struct block_data *bd = &block_data[block->num];
|
||||
|
||||
for (int i = 0; i < num_vars; i++) {
|
||||
if (BITSET_TEST(bd->livein, i)) {
|
||||
if (BITSET_TEST(bd->livein, i) && BITSET_TEST(bd->defin, i)) {
|
||||
start[i] = MIN2(start[i], block->start_ip);
|
||||
end[i] = MAX2(end[i], block->start_ip);
|
||||
}
|
||||
|
||||
if (BITSET_TEST(bd->liveout, i)) {
|
||||
if (BITSET_TEST(bd->liveout, i) && BITSET_TEST(bd->defout, i)) {
|
||||
start[i] = MIN2(start[i], block->end_ip);
|
||||
end[i] = MAX2(end[i], block->end_ip);
|
||||
}
|
||||
@@ -260,6 +284,8 @@ fs_live_variables::fs_live_variables(fs_visitor *v, const cfg_t *cfg)
|
||||
block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
|
||||
block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
|
||||
block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
|
||||
block_data[i].defin = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
|
||||
block_data[i].defout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);
|
||||
|
||||
block_data[i].flag_def[0] = 0;
|
||||
block_data[i].flag_use[0] = 0;
|
||||
|
@@ -55,6 +55,18 @@ struct block_data {
|
||||
/** Which defs reach the exit point of the block. */
|
||||
BITSET_WORD *liveout;
|
||||
|
||||
/**
|
||||
* Variables such that the entry point of the block may be reached from any
|
||||
* of their definitions.
|
||||
*/
|
||||
BITSET_WORD *defin;
|
||||
|
||||
/**
|
||||
* Variables such that the exit point of the block may be reached from any
|
||||
* of their definitions.
|
||||
*/
|
||||
BITSET_WORD *defout;
|
||||
|
||||
BITSET_WORD flag_def[1];
|
||||
BITSET_WORD flag_use[1];
|
||||
BITSET_WORD flag_livein[1];
|
||||
|
@@ -1543,10 +1543,11 @@ vec4_instruction_scheduler::choose_instruction_to_schedule()
|
||||
int
|
||||
fs_instruction_scheduler::issue_time(backend_instruction *inst)
|
||||
{
|
||||
const unsigned overhead = v->bank_conflict_cycles((fs_inst *)inst);
|
||||
if (is_compressed((fs_inst *)inst))
|
||||
return 4;
|
||||
return 4 + overhead;
|
||||
else
|
||||
return 2;
|
||||
return 2 + overhead;
|
||||
}
|
||||
|
||||
int
|
||||
|
@@ -41,6 +41,7 @@ libintel_compiler_files = files(
|
||||
'brw_eu.h',
|
||||
'brw_eu_util.c',
|
||||
'brw_eu_validate.c',
|
||||
'brw_fs_bank_conflicts.cpp',
|
||||
'brw_fs_builder.h',
|
||||
'brw_fs_cmod_propagation.cpp',
|
||||
'brw_fs_combine_constants.cpp',
|
||||
|
@@ -701,7 +701,7 @@ void anv_GetPhysicalDeviceFeatures2KHR(
|
||||
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTER_FEATURES_KHR: {
|
||||
VkPhysicalDeviceVariablePointerFeaturesKHR *features = (void *)ext;
|
||||
features->variablePointersStorageBuffer = true;
|
||||
features->variablePointers = false;
|
||||
features->variablePointers = true;
|
||||
break;
|
||||
}
|
||||
|
||||
|
@@ -123,20 +123,23 @@ anv_shader_compile_to_nir(struct anv_pipeline *pipeline,
|
||||
}
|
||||
}
|
||||
|
||||
const struct nir_spirv_supported_extensions supported_ext = {
|
||||
.float64 = device->instance->physicalDevice.info.gen >= 8,
|
||||
.int64 = device->instance->physicalDevice.info.gen >= 8,
|
||||
.tessellation = true,
|
||||
.draw_parameters = true,
|
||||
.image_write_without_format = true,
|
||||
.multiview = true,
|
||||
.variable_pointers = true,
|
||||
struct spirv_to_nir_options spirv_options = {
|
||||
.lower_workgroup_access_to_offsets = true,
|
||||
.caps = {
|
||||
.float64 = device->instance->physicalDevice.info.gen >= 8,
|
||||
.int64 = device->instance->physicalDevice.info.gen >= 8,
|
||||
.tessellation = true,
|
||||
.draw_parameters = true,
|
||||
.image_write_without_format = true,
|
||||
.multiview = true,
|
||||
.variable_pointers = true,
|
||||
},
|
||||
};
|
||||
|
||||
nir_function *entry_point =
|
||||
spirv_to_nir(spirv, module->size / 4,
|
||||
spec_entries, num_spec_entries,
|
||||
stage, entrypoint_name, &supported_ext, nir_options);
|
||||
stage, entrypoint_name, &spirv_options, nir_options);
|
||||
nir_shader *nir = entry_point->shader;
|
||||
assert(nir->info.stage == stage);
|
||||
nir_validate_shader(nir);
|
||||
@@ -385,10 +388,8 @@ anv_pipeline_compile(struct anv_pipeline *pipeline,
|
||||
if (stage != MESA_SHADER_COMPUTE)
|
||||
NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask);
|
||||
|
||||
if (stage == MESA_SHADER_COMPUTE) {
|
||||
NIR_PASS_V(nir, brw_nir_lower_cs_shared);
|
||||
if (stage == MESA_SHADER_COMPUTE)
|
||||
prog_data->total_shared = nir->num_shared;
|
||||
}
|
||||
|
||||
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
|
||||
|
||||
|
@@ -81,6 +81,8 @@ setupLoaderExtensions(__DRIscreen *psp,
|
||||
psp->swrast_loader = (__DRIswrastLoaderExtension *) extensions[i];
|
||||
if (strcmp(extensions[i]->name, __DRI_IMAGE_LOADER) == 0)
|
||||
psp->image.loader = (__DRIimageLoaderExtension *) extensions[i];
|
||||
if (strcmp(extensions[i]->name, __DRI_MUTABLE_RENDER_BUFFER_LOADER) == 0)
|
||||
psp->mutableRenderBuffer.loader = (__DRImutableRenderBufferLoaderExtension *) extensions[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -180,6 +180,10 @@ struct __DRIscreenRec {
|
||||
const __DRIimageLoaderExtension *loader;
|
||||
} image;
|
||||
|
||||
struct {
|
||||
const __DRImutableRenderBufferLoaderExtension *loader;
|
||||
} mutableRenderBuffer;
|
||||
|
||||
driOptionCache optionInfo;
|
||||
driOptionCache optionCache;
|
||||
|
||||
|
@@ -147,7 +147,10 @@ driGetRendererString( char * buffer, const char * hardware_name,
|
||||
* \param color_depth_match Whether the color depth must match the zs depth
|
||||
* This forces 32-bit color to have 24-bit depth, and
|
||||
* 16-bit color to have 16-bit depth.
|
||||
*
|
||||
* \param mutable_render_buffer Enable __DRI_ATTRIB_MUTABLE_RENDER_BUFFER,
|
||||
* which translates to
|
||||
* EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
|
||||
*
|
||||
* \returns
|
||||
* Pointer to any array of pointers to the \c __DRIconfig structures created
|
||||
* for the specified formats. If there is an error, \c NULL is returned.
|
||||
@@ -160,7 +163,8 @@ driCreateConfigs(mesa_format format,
|
||||
unsigned num_depth_stencil_bits,
|
||||
const GLenum * db_modes, unsigned num_db_modes,
|
||||
const uint8_t * msaa_samples, unsigned num_msaa_modes,
|
||||
GLboolean enable_accum, GLboolean color_depth_match)
|
||||
GLboolean enable_accum, GLboolean color_depth_match,
|
||||
GLboolean mutable_render_buffer)
|
||||
{
|
||||
static const uint32_t masks_table[][4] = {
|
||||
/* MESA_FORMAT_B5G6R5_UNORM */
|
||||
@@ -314,6 +318,7 @@ driCreateConfigs(mesa_format format,
|
||||
|
||||
modes->yInverted = GL_TRUE;
|
||||
modes->sRGBCapable = is_srgb;
|
||||
modes->mutableRenderBuffer = mutable_render_buffer;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -398,6 +403,7 @@ static const struct { unsigned int attrib, offset; } attribMap[] = {
|
||||
__ATTRIB(__DRI_ATTRIB_BIND_TO_TEXTURE_TARGETS, bindToTextureTargets),
|
||||
__ATTRIB(__DRI_ATTRIB_YINVERTED, yInverted),
|
||||
__ATTRIB(__DRI_ATTRIB_FRAMEBUFFER_SRGB_CAPABLE, sRGBCapable),
|
||||
__ATTRIB(__DRI_ATTRIB_MUTABLE_RENDER_BUFFER, mutableRenderBuffer),
|
||||
|
||||
/* The struct field doesn't matter here, these are handled by the
|
||||
* switch in driGetConfigAttribIndex. We need them in the array
|
||||
|
@@ -45,7 +45,8 @@ driCreateConfigs(mesa_format format,
|
||||
unsigned num_depth_stencil_bits,
|
||||
const GLenum * db_modes, unsigned num_db_modes,
|
||||
const uint8_t * msaa_samples, unsigned num_msaa_modes,
|
||||
GLboolean enable_accum, GLboolean color_depth_match);
|
||||
GLboolean enable_accum, GLboolean color_depth_match,
|
||||
GLboolean mutable_render_buffer);
|
||||
|
||||
__DRIconfig **driConcatConfigs(__DRIconfig **a,
|
||||
__DRIconfig **b);
|
||||
|
@@ -1094,7 +1094,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
|
||||
num_depth_stencil_bits,
|
||||
back_buffer_modes, 2,
|
||||
singlesample_samples, 1,
|
||||
false, false);
|
||||
false, false, false);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
}
|
||||
|
||||
@@ -1116,7 +1116,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
|
||||
depth_bits, stencil_bits, 1,
|
||||
back_buffer_modes, 1,
|
||||
singlesample_samples, 1,
|
||||
true, false);
|
||||
true, false, false);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
}
|
||||
|
||||
|
@@ -235,6 +235,35 @@ intel_flush_front(struct gl_context *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
brw_display_shared_buffer(struct brw_context *brw)
|
||||
{
|
||||
__DRIcontext *dri_context = brw->driContext;
|
||||
__DRIdrawable *dri_drawable = dri_context->driDrawablePriv;
|
||||
__DRIscreen *dri_screen = brw->screen->driScrnPriv;
|
||||
int fence_fd = -1;
|
||||
|
||||
if (!brw->is_shared_buffer_bound)
|
||||
return;
|
||||
|
||||
if (!brw->is_shared_buffer_dirty)
|
||||
return;
|
||||
|
||||
if (brw->screen->has_exec_fence) {
|
||||
/* This function is always called during a flush operation, so there is
|
||||
* no need to flush again here. But we want to provide a fence_fd to the
|
||||
* loader, and a redundant flush is the easiest way to acquire one.
|
||||
*/
|
||||
if (intel_batchbuffer_flush_fence(brw, -1, &fence_fd))
|
||||
return;
|
||||
}
|
||||
|
||||
dri_screen->mutableRenderBuffer.loader
|
||||
->displaySharedBuffer(dri_drawable, fence_fd,
|
||||
dri_drawable->loaderPrivate);
|
||||
brw->is_shared_buffer_dirty = false;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_glFlush(struct gl_context *ctx)
|
||||
{
|
||||
@@ -242,7 +271,7 @@ intel_glFlush(struct gl_context *ctx)
|
||||
|
||||
intel_batchbuffer_flush(brw);
|
||||
intel_flush_front(ctx);
|
||||
|
||||
brw_display_shared_buffer(brw);
|
||||
brw->need_flush_throttle = true;
|
||||
}
|
||||
|
||||
@@ -862,7 +891,9 @@ brwCreateContext(gl_api api,
|
||||
brw->screen = screen;
|
||||
brw->bufmgr = screen->bufmgr;
|
||||
|
||||
brw->has_hiz = devinfo->has_hiz_and_separate_stencil;
|
||||
/* Braswell has hiz issues, disable it. */
|
||||
brw->has_hiz = devinfo->has_hiz_and_separate_stencil &&
|
||||
screen->deviceID != 0x22B1;
|
||||
brw->has_separate_stencil = devinfo->has_hiz_and_separate_stencil;
|
||||
|
||||
brw->has_swizzling = screen->hw_has_swizzling;
|
||||
@@ -942,7 +973,7 @@ brwCreateContext(gl_api api,
|
||||
|
||||
intel_batchbuffer_init(brw);
|
||||
|
||||
if (devinfo->gen >= 6) {
|
||||
if (devinfo->gen >= 7) {
|
||||
/* Create a new hardware context. Using a hardware context means that
|
||||
* our GPU state will be saved/restored on context switch, allowing us
|
||||
* to assume that the GPU is in the same state we left it in.
|
||||
@@ -1258,6 +1289,21 @@ intel_resolve_for_dri2_flush(struct brw_context *brw,
|
||||
intel_miptree_prepare_external(brw, rb->mt);
|
||||
} else {
|
||||
intel_renderbuffer_downsample(brw, rb);
|
||||
|
||||
/* Call prepare_external on the single-sample miptree to do any
|
||||
* needed resolves prior to handing it off to the window system.
|
||||
* This is needed in the case that rb->singlesample_mt is Y-tiled
|
||||
* with CCS_E enabled but without I915_FORMAT_MOD_Y_TILED_CCS_E. In
|
||||
* this case, the MSAA resolve above will write compressed data into
|
||||
* rb->singlesample_mt.
|
||||
*
|
||||
* TODO: Some day, if we decide to care about the tiny performance
|
||||
* hit we're taking by doing the MSAA resolve and then a CCS resolve,
|
||||
* we could detect this case and just allocate the single-sampled
|
||||
* miptree without aux. However, that would be a lot of plumbing and
|
||||
* this is a rather exotic case so it's not really worth it.
|
||||
*/
|
||||
intel_miptree_prepare_external(brw, rb->singlesample_mt);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1393,6 +1439,11 @@ intel_prepare_render(struct brw_context *brw)
|
||||
*/
|
||||
if (_mesa_is_front_buffer_drawing(ctx->DrawBuffer))
|
||||
brw->front_buffer_dirty = true;
|
||||
|
||||
if (brw->is_shared_buffer_bound) {
|
||||
/* Subsequent rendering will probably dirty the shared buffer. */
|
||||
brw->is_shared_buffer_dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1622,8 +1673,12 @@ intel_update_image_buffer(struct brw_context *intel,
|
||||
else
|
||||
last_mt = rb->singlesample_mt;
|
||||
|
||||
if (last_mt && last_mt->bo == buffer->bo)
|
||||
if (last_mt && last_mt->bo == buffer->bo) {
|
||||
if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
|
||||
intel_miptree_make_shareable(intel, last_mt);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
struct intel_mipmap_tree *mt =
|
||||
intel_miptree_create_for_dri_image(intel, buffer, GL_TEXTURE_2D,
|
||||
@@ -1643,6 +1698,35 @@ intel_update_image_buffer(struct brw_context *intel,
|
||||
rb->Base.Base.NumSamples > 1) {
|
||||
intel_renderbuffer_upsample(intel, rb);
|
||||
}
|
||||
|
||||
if (buffer_type == __DRI_IMAGE_BUFFER_SHARED) {
|
||||
/* The compositor and the application may access this image
|
||||
* concurrently. The display hardware may even scanout the image while
|
||||
* the GPU is rendering to it. Aux surfaces cause difficulty with
|
||||
* concurrent access, so permanently disable aux for this miptree.
|
||||
*
|
||||
* Perhaps we could improve overall application performance by
|
||||
* re-enabling the aux surface when EGL_RENDER_BUFFER transitions to
|
||||
* EGL_BACK_BUFFER, then disabling it again when EGL_RENDER_BUFFER
|
||||
* returns to EGL_SINGLE_BUFFER. I expect the wins and losses with this
|
||||
* approach to be highly dependent on the application's GL usage.
|
||||
*
|
||||
* I [chadv] expect clever disabling/reenabling to be counterproductive
|
||||
* in the use cases I care about: applications that render nearly
|
||||
* realtime handwriting to the surface while possibly undergiong
|
||||
* simultaneously scanout as a display plane. The app requires low
|
||||
* render latency. Even though the app spends most of its time in
|
||||
* shared-buffer mode, it also frequently transitions between
|
||||
* shared-buffer (EGL_SINGLE_BUFFER) and double-buffer (EGL_BACK_BUFFER)
|
||||
* mode. Visual sutter during the transitions should be avoided.
|
||||
*
|
||||
* In this case, I [chadv] believe reducing the GPU workload at
|
||||
* shared-buffer/double-buffer transitions would offer a smoother app
|
||||
* experience than any savings due to aux compression. But I've
|
||||
* collected no data to prove my theory.
|
||||
*/
|
||||
intel_miptree_make_shareable(intel, mt);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -1703,4 +1787,19 @@ intel_update_image_buffers(struct brw_context *brw, __DRIdrawable *drawable)
|
||||
images.back,
|
||||
__DRI_IMAGE_BUFFER_BACK);
|
||||
}
|
||||
|
||||
if (images.image_mask & __DRI_IMAGE_BUFFER_SHARED) {
|
||||
assert(images.image_mask == __DRI_IMAGE_BUFFER_SHARED);
|
||||
drawable->w = images.back->width;
|
||||
drawable->h = images.back->height;
|
||||
intel_update_image_buffer(brw,
|
||||
drawable,
|
||||
back_rb,
|
||||
images.back,
|
||||
__DRI_IMAGE_BUFFER_SHARED);
|
||||
brw->is_shared_buffer_bound = true;
|
||||
} else {
|
||||
brw->is_shared_buffer_bound = false;
|
||||
brw->is_shared_buffer_dirty = false;
|
||||
}
|
||||
}
|
||||
|
@@ -711,6 +711,18 @@ struct brw_context
|
||||
*/
|
||||
bool front_buffer_dirty;
|
||||
|
||||
/**
|
||||
* True if the __DRIdrawable's current __DRIimageBufferMask is
|
||||
* __DRI_IMAGE_BUFFER_SHARED.
|
||||
*/
|
||||
bool is_shared_buffer_bound;
|
||||
|
||||
/**
|
||||
* True if a shared buffer is bound and it has received any rendering since
|
||||
* the previous __DRImutableRenderBufferLoaderExtension::displaySharedBuffer().
|
||||
*/
|
||||
bool is_shared_buffer_dirty;
|
||||
|
||||
/** Framerate throttling: @{ */
|
||||
struct brw_bo *throttle_batch[2];
|
||||
|
||||
|
@@ -127,6 +127,8 @@ get_stencil_miptree(struct intel_renderbuffer *irb)
|
||||
{
|
||||
if (!irb)
|
||||
return NULL;
|
||||
if (!irb->mt)
|
||||
return NULL;
|
||||
if (irb->mt->stencil_mt)
|
||||
return irb->mt->stencil_mt;
|
||||
return intel_renderbuffer_get_mt(irb);
|
||||
@@ -225,11 +227,11 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw,
|
||||
/* Check if depth buffer is in depth/stencil format. If so, then it's only
|
||||
* safe to invalidate it if we're also clearing stencil.
|
||||
*/
|
||||
if (depth_irb && invalidate_depth &&
|
||||
if (depth_irb && depth_mt && invalidate_depth &&
|
||||
_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL)
|
||||
invalidate_depth = invalidate_stencil && stencil_irb;
|
||||
|
||||
if (depth_irb) {
|
||||
if (depth_irb && depth_mt) {
|
||||
if (rebase_depth_stencil(brw, depth_irb, invalidate_depth)) {
|
||||
/* In the case of stencil_irb being the same packed depth/stencil
|
||||
* texture but not the same rb, make it point at our rebased mt, too.
|
||||
@@ -242,7 +244,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw,
|
||||
}
|
||||
}
|
||||
|
||||
if (stencil_irb) {
|
||||
if (stencil_irb && stencil_irb->mt) {
|
||||
assert(stencil_irb->mt == depth_irb->mt);
|
||||
assert(stencil_irb->mt_level == depth_irb->mt_level);
|
||||
assert(stencil_irb->mt_layer == depth_irb->mt_layer);
|
||||
@@ -250,7 +252,7 @@ brw_workaround_depthstencil_alignment(struct brw_context *brw,
|
||||
}
|
||||
|
||||
/* If there is no depth attachment, consider if stencil needs rebase. */
|
||||
if (!depth_irb && stencil_irb)
|
||||
if (!(depth_irb && depth_mt) && stencil_irb && stencil_irb->mt)
|
||||
rebase_depth_stencil(brw, stencil_irb, invalidate_stencil);
|
||||
}
|
||||
|
||||
|
@@ -2206,6 +2206,14 @@ const struct brw_tracked_state genX(cc_vp) = {
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
/* Clamp scissors to 16-bit unsigned values; otherwise, the compiler truncates
|
||||
* them to fit inside the bitfields, which is often not what is desired.
|
||||
* My reading of GL and GLES specs suggests that overly-large scissor values are
|
||||
* not an erroring condition and that the actual behavior is undefined, so
|
||||
* switching from truncation to clamping is probably not a problem. ~ C.
|
||||
*/
|
||||
#define CLAMP_SCISSOR(X) CLAMP(X, 0x0000, 0xffff)
|
||||
|
||||
static void
|
||||
set_scissor_bits(const struct gl_context *ctx, int i,
|
||||
bool render_to_fbo, unsigned fb_width, unsigned fb_height,
|
||||
@@ -2232,16 +2240,16 @@ set_scissor_bits(const struct gl_context *ctx, int i,
|
||||
sc->ScissorRectangleYMax = 0;
|
||||
} else if (render_to_fbo) {
|
||||
/* texmemory: Y=0=bottom */
|
||||
sc->ScissorRectangleXMin = bbox[0];
|
||||
sc->ScissorRectangleXMax = bbox[1] - 1;
|
||||
sc->ScissorRectangleYMin = bbox[2];
|
||||
sc->ScissorRectangleYMax = bbox[3] - 1;
|
||||
sc->ScissorRectangleXMin = CLAMP_SCISSOR(bbox[0]);
|
||||
sc->ScissorRectangleXMax = CLAMP_SCISSOR(bbox[1] - 1);
|
||||
sc->ScissorRectangleYMin = CLAMP_SCISSOR(bbox[2]);
|
||||
sc->ScissorRectangleYMax = CLAMP_SCISSOR(bbox[3] - 1);
|
||||
} else {
|
||||
/* memory: Y=0=top */
|
||||
sc->ScissorRectangleXMin = bbox[0];
|
||||
sc->ScissorRectangleXMax = bbox[1] - 1;
|
||||
sc->ScissorRectangleYMin = fb_height - bbox[3];
|
||||
sc->ScissorRectangleYMax = fb_height - bbox[2] - 1;
|
||||
sc->ScissorRectangleXMin = CLAMP_SCISSOR(bbox[0]);
|
||||
sc->ScissorRectangleXMax = CLAMP_SCISSOR(bbox[1] - 1);
|
||||
sc->ScissorRectangleYMin = CLAMP_SCISSOR(fb_height - bbox[3]);
|
||||
sc->ScissorRectangleYMax = CLAMP_SCISSOR(fb_height - bbox[2] - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1423,12 +1423,17 @@ static const __DRIrobustnessExtension dri2Robustness = {
|
||||
.base = { __DRI2_ROBUSTNESS, 1 }
|
||||
};
|
||||
|
||||
static const __DRImutableRenderBufferDriverExtension intelMutableRenderBufferExtension = {
|
||||
.base = { __DRI_MUTABLE_RENDER_BUFFER_DRIVER, 1 },
|
||||
};
|
||||
|
||||
static const __DRIextension *screenExtensions[] = {
|
||||
&intelTexBufferExtension.base,
|
||||
&intelFenceExtension.base,
|
||||
&intelFlushExtension.base,
|
||||
&intelImageExtension.base,
|
||||
&intelRendererQueryExtension.base,
|
||||
&intelMutableRenderBufferExtension.base,
|
||||
&dri2ConfigQueryExtension.base,
|
||||
&dri2NoErrorExtension.base,
|
||||
NULL
|
||||
@@ -1440,6 +1445,7 @@ static const __DRIextension *intelRobustScreenExtensions[] = {
|
||||
&intelFlushExtension.base,
|
||||
&intelImageExtension.base,
|
||||
&intelRendererQueryExtension.base,
|
||||
&intelMutableRenderBufferExtension.base,
|
||||
&dri2ConfigQueryExtension.base,
|
||||
&dri2Robustness.base,
|
||||
&dri2NoErrorExtension.base,
|
||||
@@ -1952,7 +1958,9 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
|
||||
else
|
||||
num_formats = 3;
|
||||
|
||||
/* Generate singlesample configs without accumulation buffer. */
|
||||
/* Generate singlesample configs, each without accumulation buffer
|
||||
* and with EGL_MUTABLE_RENDER_BUFFER_BIT_KHR.
|
||||
*/
|
||||
for (unsigned i = 0; i < num_formats; i++) {
|
||||
__DRIconfig **new_configs;
|
||||
int num_depth_stencil_bits = 2;
|
||||
@@ -1983,7 +1991,8 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
|
||||
num_depth_stencil_bits,
|
||||
back_buffer_modes, 2,
|
||||
singlesample_samples, 1,
|
||||
false, false);
|
||||
false, false,
|
||||
/*mutable_render_buffer*/ true);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
}
|
||||
|
||||
@@ -2005,7 +2014,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
|
||||
depth_bits, stencil_bits, 1,
|
||||
back_buffer_modes, 1,
|
||||
singlesample_samples, 1,
|
||||
true, false);
|
||||
true, false, false);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
}
|
||||
|
||||
@@ -2067,7 +2076,7 @@ intel_screen_make_configs(__DRIscreen *dri_screen)
|
||||
back_buffer_modes, 1,
|
||||
multisample_samples,
|
||||
num_msaa_modes,
|
||||
false, false);
|
||||
false, false, false);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
}
|
||||
|
||||
|
@@ -78,7 +78,7 @@ nouveau_get_configs(uint32_t chipset)
|
||||
ARRAY_SIZE(back_buffer_modes),
|
||||
msaa_samples,
|
||||
ARRAY_SIZE(msaa_samples),
|
||||
GL_TRUE, chipset < 0x10);
|
||||
GL_TRUE, chipset < 0x10, GL_FALSE);
|
||||
assert(config);
|
||||
|
||||
configs = driConcatConfigs(configs, config);
|
||||
|
@@ -804,7 +804,7 @@ __DRIconfig **radeonInitScreen2(__DRIscreen *psp)
|
||||
ARRAY_SIZE(back_buffer_modes),
|
||||
msaa_samples_array,
|
||||
ARRAY_SIZE(msaa_samples_array),
|
||||
GL_TRUE, GL_FALSE);
|
||||
GL_TRUE, GL_FALSE, GL_FALSE);
|
||||
configs = driConcatConfigs(configs, new_configs);
|
||||
}
|
||||
|
||||
|
@@ -275,7 +275,7 @@ swrastFillInModes(__DRIscreen *psp,
|
||||
depth_bits_array, stencil_bits_array,
|
||||
depth_buffer_factor, back_buffer_modes,
|
||||
back_buffer_factor, msaa_samples_array, 1,
|
||||
GL_TRUE, GL_FALSE);
|
||||
GL_TRUE, GL_FALSE, GL_FALSE);
|
||||
if (configs == NULL) {
|
||||
fprintf(stderr, "[%s:%u] Error creating FBConfig!\n", __func__,
|
||||
__LINE__);
|
||||
|
@@ -253,6 +253,9 @@ struct gl_config
|
||||
|
||||
/* EXT_framebuffer_sRGB */
|
||||
GLint sRGBCapable;
|
||||
|
||||
/* EGL_KHR_mutable_render_buffer */
|
||||
GLuint mutableRenderBuffer; /* bool */
|
||||
};
|
||||
|
||||
|
||||
|
@@ -642,9 +642,6 @@ st_context_flush(struct st_context_iface *stctxi, unsigned flags,
|
||||
st->pipe->screen->fence_reference(st->pipe->screen, fence, NULL);
|
||||
}
|
||||
|
||||
if (flags & ST_FLUSH_FRONT)
|
||||
st_manager_flush_frontbuffer(st);
|
||||
|
||||
/* DRI3 changes the framebuffer after SwapBuffers, but we need to invoke
|
||||
* st_manager_validate_framebuffers to notice that.
|
||||
*
|
||||
|
Reference in New Issue
Block a user