Compare commits
60 Commits
mesa-13.0.
...
mesa-13.0.
Author | SHA1 | Date | |
---|---|---|---|
|
f2f487ebbb | ||
|
11b9cdfcf9 | ||
|
42d221723b | ||
|
d6bcbfb36c | ||
|
e19ed2971f | ||
|
5fa2b384f0 | ||
|
e7de2510e5 | ||
|
1a47251da4 | ||
|
77dc3a5b7c | ||
|
3bb0415ab9 | ||
|
a4bc03fdfe | ||
|
d8eea63121 | ||
|
3e616f77bd | ||
|
49e093a2f5 | ||
|
2bbf964af8 | ||
|
fa6c02787e | ||
|
a65b6e12f3 | ||
|
ce555a7d1f | ||
|
621b048734 | ||
|
039a03d8d2 | ||
|
c64f655408 | ||
|
d22958eecb | ||
|
ab3aeab297 | ||
|
5bdd4fc273 | ||
|
c4643f5f1e | ||
|
ceefe979c6 | ||
|
9eca84e052 | ||
|
6f55a66ad3 | ||
|
678b4f6372 | ||
|
8ab9842d2e | ||
|
dba0abdc91 | ||
|
a31947fbf9 | ||
|
f7efc0f0fc | ||
|
9c297c5487 | ||
|
aa947e7a63 | ||
|
d54699135f | ||
|
a0d11b190a | ||
|
aa60c7b1c1 | ||
|
b8f99c6b2f | ||
|
2789bfdbb5 | ||
|
da1ac6bc46 | ||
|
996c20208f | ||
|
9397899aed | ||
|
bd3fde4068 | ||
|
2478cfe41d | ||
|
4514ce8bc7 | ||
|
bc1d7a6ac4 | ||
|
c08a62a0b1 | ||
|
81df3f63cb | ||
|
e016945bdd | ||
|
78fbafedf1 | ||
|
5be463694b | ||
|
88ebff8e25 | ||
|
9c722e8a2e | ||
|
f622d33347 | ||
|
dd5e802d33 | ||
|
ea07a57fc0 | ||
|
620ef8e742 | ||
|
2f8b48d274 | ||
|
405dd26860 |
@@ -82,11 +82,13 @@ LOCAL_CFLAGS += \
|
||||
-D__STDC_LIMIT_MACROS
|
||||
endif
|
||||
|
||||
ifneq ($(LOCAL_IS_HOST_MODULE),true)
|
||||
# add libdrm if there are hardware drivers
|
||||
ifneq ($(filter-out swrast,$(MESA_GPU_DRIVERS)),)
|
||||
LOCAL_CFLAGS += -DHAVE_LIBDRM
|
||||
LOCAL_SHARED_LIBRARIES += libdrm
|
||||
endif
|
||||
endif
|
||||
|
||||
LOCAL_CPPFLAGS += \
|
||||
$(if $(filter true,$(MESA_LOLLIPOP_BUILD)),-D_USING_LIBCXX) \
|
||||
|
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
4a54d7cdc1a94a8dae05a75ccff48356406d51b0d6a64cbdc641c266e3e008eb mesa-13.0.0.tar.gz
|
||||
94edb4ebff82066a68be79d9c2627f15995e1fe10f67ab3fc63deb842027d727 mesa-13.0.0.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
187
docs/relnotes/13.0.1.html
Normal file
187
docs/relnotes/13.0.1.html
Normal file
@@ -0,0 +1,187 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 13.0.1 Release Notes / November 14, 2016</h1>
|
||||
|
||||
<p>
|
||||
Mesa 13.0.1 is a bug fix release which fixes bugs found since the 13.0.0 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 13.0.1 implements the OpenGL 4.4 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.4. OpenGL
|
||||
4.4 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=97715">Bug 97715</a> - [ILK,G45,G965] piglit.spec.arb_separate_shader_objects.misc api error checks</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98012">Bug 98012</a> - [IVB] Segfault when running Dolphin twice with Vulkan</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98512">Bug 98512</a> - radeon r600 vdpau: Invalid command stream: texture bo too small</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Adam Jackson (2):</p>
|
||||
<ul>
|
||||
<li>glx/glvnd: Don't modify the dummy slot in the dispatch table</li>
|
||||
<li>glx/glvnd: Fix dispatch function names and indices</li>
|
||||
</ul>
|
||||
|
||||
<p>Andreas Boll (1):</p>
|
||||
<ul>
|
||||
<li>glx/windows: Add wgl.h to the sources list</li>
|
||||
</ul>
|
||||
|
||||
<p>Anuj Phogat (1):</p>
|
||||
<ul>
|
||||
<li>i965: Fix GPU hang related to multiple render targets and alpha testing</li>
|
||||
</ul>
|
||||
|
||||
<p>Chih-Wei Huang (1):</p>
|
||||
<ul>
|
||||
<li>android: avoid using libdrm with host modules</li>
|
||||
</ul>
|
||||
|
||||
<p>Darren Salt (1):</p>
|
||||
<ul>
|
||||
<li>radv/pipeline: Don't dereference NULL dynamic state pointers</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (8):</p>
|
||||
<ul>
|
||||
<li>radv: expose xlib platform extension</li>
|
||||
<li>radv: fix dual source blending</li>
|
||||
<li>Revert "st/vdpau: use linear layout for output surfaces"</li>
|
||||
<li>radv: emit correct last export when Z/stencil export is enabled</li>
|
||||
<li>ac/nir: add support for discard_if intrinsic (v2)</li>
|
||||
<li>nir: add conditional discard optimisation (v4)</li>
|
||||
<li>radv: enable conditional discard optimisation on radv.</li>
|
||||
<li>radv: fix GetFenceStatus for signaled fences</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (6):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 13.0.0</li>
|
||||
<li>amd/addrlib: limit fastcall/regparm to GCC i386</li>
|
||||
<li>anv: use correct .specVersion for extensions</li>
|
||||
<li>radv: use correct .specVersion for extensions</li>
|
||||
<li>radv: Suffix the radeon_icd file with the host CPU</li>
|
||||
<li>Update version to 13.0.1</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (1):</p>
|
||||
<ul>
|
||||
<li>vc4: Use Newton-Raphson on the 1/W write to fix glmark2 terrain.</li>
|
||||
</ul>
|
||||
|
||||
<p>Francisco Jerez (1):</p>
|
||||
<ul>
|
||||
<li>nir: Flip gl_SamplePosition in nir_lower_wpos_ytransform().</li>
|
||||
</ul>
|
||||
|
||||
<p>Fredrik Höglund (1):</p>
|
||||
<ul>
|
||||
<li>radv: add support for anisotropic filtering on VI+</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (21):</p>
|
||||
<ul>
|
||||
<li>anv/device: Return DEVICE_LOST if execbuf2 fails</li>
|
||||
<li>vulkan/wsi/x11: Better handle wsi_x11_connection_create failure</li>
|
||||
<li>vulkan/wsi/x11: Clean up connections in finish_wsi</li>
|
||||
<li>anv: Better handle return codes from anv_physical_device_init</li>
|
||||
<li>intel/blorp: Use wm_prog_data instead of hand-rolling our own</li>
|
||||
<li>intel/blorp: Pass a brw_stage_prog_data to upload_shader</li>
|
||||
<li>anv/pipeline: Put actual pointers in anv_shader_bin</li>
|
||||
<li>anv/pipeline: Properly cache prog_data::param</li>
|
||||
<li>intel/blorp: Emit all the binding tables</li>
|
||||
<li>anv/device: Add an execbuf wrapper</li>
|
||||
<li>anv: Add a cmd_buffer_execbuf helper</li>
|
||||
<li>anv: Don't presume to know what address is in a surface relocation</li>
|
||||
<li>anv: Add a new bo_pool_init helper</li>
|
||||
<li>anv/allocator: Simplify anv_scratch_pool</li>
|
||||
<li>anv: Initialize anv_bo::offset to -1</li>
|
||||
<li>anv/batch_chain: Improve write_reloc</li>
|
||||
<li>anv: Add an anv_execbuf helper struct</li>
|
||||
<li>anv/batch: Move last_ss_pool_bo_offset to the command buffer</li>
|
||||
<li>anv: Move relocation handling from EndCommandBuffer to QueueSubmit</li>
|
||||
<li>anv/cmd_buffer: Take a command buffer instead of a batch in two helpers</li>
|
||||
<li>anv/cmd_buffer: Enable a CS stall workaround for Sky Lake gt4</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>glsl: Update deref types when resizing implicitly sized arrays.</li>
|
||||
<li>mesa: Fix pixel shader scratch space allocation on Gen9+ platforms.</li>
|
||||
</ul>
|
||||
|
||||
<p>Kristian Høgsberg (1):</p>
|
||||
<ul>
|
||||
<li>anv: Do relocations in userspace before execbuf ioctl</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (4):</p>
|
||||
<ul>
|
||||
<li>egl: use util/macros.h</li>
|
||||
<li>egl: make interop ABI visible again</li>
|
||||
<li>glx: make interop ABI visible again</li>
|
||||
<li>radeonsi: fix an assertion failure in si_decompress_sampler_color_textures</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (4):</p>
|
||||
<ul>
|
||||
<li>radeonsi: fix BFE/BFI lowering for GLSL semantics</li>
|
||||
<li>glsl: fix lowering of UBO references of named blocks</li>
|
||||
<li>st/glsl_to_tgsi: fix dvec[34] loads from SSBO</li>
|
||||
<li>st/mesa: fix the layer of VDPAU surface samplers</li>
|
||||
</ul>
|
||||
|
||||
<p>Steven Toth (3):</p>
|
||||
<ul>
|
||||
<li>gallium/hud: fix a problem where objects are free'd while in use.</li>
|
||||
<li>gallium/hud: close a previously opened handle</li>
|
||||
<li>gallium/hud: protect against and initialization race</li>
|
||||
</ul>
|
||||
|
||||
<p>Timothy Arceri (1):</p>
|
||||
<ul>
|
||||
<li>mesa/glsl: delete previously linked shaders earlier when linking</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -88,7 +88,11 @@ typedef int INT;
|
||||
|
||||
#ifndef ADDR_FASTCALL
|
||||
#if defined(__GNUC__)
|
||||
#define ADDR_FASTCALL __attribute__((regparm(0)))
|
||||
#if defined(__i386__)
|
||||
#define ADDR_FASTCALL __attribute__((regparm(0)))
|
||||
#else
|
||||
#define ADDR_FASTCALL
|
||||
#endif
|
||||
#else
|
||||
#define ADDR_FASTCALL __fastcall
|
||||
#endif
|
||||
|
@@ -2609,6 +2609,24 @@ static void emit_barrier(struct nir_to_llvm_context *ctx)
|
||||
ctx->voidt, NULL, 0, 0);
|
||||
}
|
||||
|
||||
static void emit_discard_if(struct nir_to_llvm_context *ctx,
|
||||
nir_intrinsic_instr *instr)
|
||||
{
|
||||
LLVMValueRef cond;
|
||||
ctx->shader_info->fs.can_discard = true;
|
||||
|
||||
cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
|
||||
get_src(ctx, instr->src[0]),
|
||||
ctx->i32zero, "");
|
||||
|
||||
cond = LLVMBuildSelect(ctx->builder, cond,
|
||||
LLVMConstReal(ctx->f32, -1.0f),
|
||||
ctx->f32zero, "");
|
||||
emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kill",
|
||||
LLVMVoidTypeInContext(ctx->context),
|
||||
&cond, 1, 0);
|
||||
}
|
||||
|
||||
static LLVMValueRef
|
||||
visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
|
||||
{
|
||||
@@ -2921,6 +2939,9 @@ static void visit_intrinsic(struct nir_to_llvm_context *ctx,
|
||||
LLVMVoidTypeInContext(ctx->context),
|
||||
NULL, 0, 0);
|
||||
break;
|
||||
case nir_intrinsic_discard_if:
|
||||
emit_discard_if(ctx, instr);
|
||||
break;
|
||||
case nir_intrinsic_memory_barrier:
|
||||
emit_waitcnt(ctx);
|
||||
break;
|
||||
@@ -4352,12 +4373,10 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx,
|
||||
|
||||
for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
|
||||
LLVMValueRef values[4];
|
||||
bool last;
|
||||
|
||||
if (!(ctx->output_mask & (1ull << i)))
|
||||
continue;
|
||||
|
||||
last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
|
||||
|
||||
if (i == FRAG_RESULT_DEPTH) {
|
||||
ctx->shader_info->fs.writes_z = true;
|
||||
depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
|
||||
@@ -4367,10 +4386,14 @@ handle_fs_outputs_post(struct nir_to_llvm_context *ctx,
|
||||
stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
|
||||
ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
|
||||
} else {
|
||||
bool last = false;
|
||||
for (unsigned j = 0; j < 4; j++)
|
||||
values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
|
||||
ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
|
||||
|
||||
if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
|
||||
last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
|
||||
|
||||
si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
|
||||
index++;
|
||||
}
|
||||
|
1
src/amd/vulkan/.gitignore
vendored
1
src/amd/vulkan/.gitignore
vendored
@@ -4,3 +4,4 @@
|
||||
/radv_timestamp.h
|
||||
/dev_icd.json
|
||||
/vk_format_table.c
|
||||
/radeon_icd.*.json
|
||||
|
@@ -131,11 +131,11 @@ vk_format_table.c: vk_format_table.py \
|
||||
$(PYTHON2) $(srcdir)/vk_format_table.py $(srcdir)/vk_format_layout.csv > $@
|
||||
|
||||
BUILT_SOURCES = $(VULKAN_GENERATED_FILES)
|
||||
CLEANFILES = $(BUILT_SOURCES) dev_icd.json radv_timestamp.h
|
||||
CLEANFILES = $(BUILT_SOURCES) dev_icd.json radeon_icd.@host_cpu@.json
|
||||
EXTRA_DIST = \
|
||||
$(top_srcdir)/include/vulkan/vk_icd.h \
|
||||
dev_icd.json.in \
|
||||
radeon_icd.json \
|
||||
radeon_icd.json.in \
|
||||
radv_entrypoints_gen.py \
|
||||
vk_format_layout.csv \
|
||||
vk_format_parse.py \
|
||||
@@ -155,7 +155,7 @@ libvulkan_radeon_la_LDFLAGS = \
|
||||
|
||||
|
||||
icdconfdir = @VULKAN_ICD_INSTALL_DIR@
|
||||
icdconf_DATA = radeon_icd.json
|
||||
icdconf_DATA = radeon_icd.@host_cpu@.json
|
||||
# The following is used for development purposes, by setting VK_ICD_FILENAMES.
|
||||
noinst_DATA = dev_icd.json
|
||||
|
||||
@@ -164,4 +164,9 @@ dev_icd.json : dev_icd.json.in
|
||||
-e "s#@build_libdir@#${abs_top_builddir}/${LIB_DIR}#" \
|
||||
< $(srcdir)/dev_icd.json.in > $@
|
||||
|
||||
radeon_icd.@host_cpu@.json : radeon_icd.json.in
|
||||
$(AM_V_GEN) $(SED) \
|
||||
-e "s#@install_libdir@#${libdir}#" \
|
||||
< $(srcdir)/radeon_icd.json.in > $@
|
||||
|
||||
include $(top_srcdir)/install-lib-links.mk
|
||||
|
@@ -1,7 +1,7 @@
|
||||
{
|
||||
"file_format_version": "1.0.0",
|
||||
"ICD": {
|
||||
"library_path": "libvulkan_radeon.so",
|
||||
"library_path": "@install_libdir@/libvulkan_radeon.so",
|
||||
"api_version": "1.0.3"
|
||||
}
|
||||
}
|
@@ -113,13 +113,19 @@ static const VkExtensionProperties global_extensions[] = {
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
{
|
||||
.extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
|
||||
.specVersion = 5,
|
||||
.specVersion = 6,
|
||||
},
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_XLIB_KHR
|
||||
{
|
||||
.extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
|
||||
.specVersion = 6,
|
||||
},
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
|
||||
{
|
||||
.extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
|
||||
.specVersion = 4,
|
||||
.specVersion = 5,
|
||||
},
|
||||
#endif
|
||||
};
|
||||
@@ -127,7 +133,7 @@ static const VkExtensionProperties global_extensions[] = {
|
||||
static const VkExtensionProperties device_extensions[] = {
|
||||
{
|
||||
.extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
.specVersion = 67,
|
||||
.specVersion = 68,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -1166,6 +1172,8 @@ VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
|
||||
RADV_FROM_HANDLE(radv_device, device, _device);
|
||||
RADV_FROM_HANDLE(radv_fence, fence, _fence);
|
||||
|
||||
if (fence->signalled)
|
||||
return VK_SUCCESS;
|
||||
if (!fence->submitted)
|
||||
return VK_NOT_READY;
|
||||
|
||||
@@ -1728,26 +1736,50 @@ radv_tex_bordercolor(VkBorderColor bcolor)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static unsigned
|
||||
radv_tex_aniso_filter(unsigned filter)
|
||||
{
|
||||
if (filter < 2)
|
||||
return 0;
|
||||
if (filter < 4)
|
||||
return 1;
|
||||
if (filter < 8)
|
||||
return 2;
|
||||
if (filter < 16)
|
||||
return 3;
|
||||
return 4;
|
||||
}
|
||||
|
||||
static void
|
||||
radv_init_sampler(struct radv_device *device,
|
||||
struct radv_sampler *sampler,
|
||||
const VkSamplerCreateInfo *pCreateInfo)
|
||||
{
|
||||
uint32_t max_aniso = 0;
|
||||
uint32_t max_aniso_ratio = 0;//TODO
|
||||
uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ?
|
||||
(uint32_t) pCreateInfo->maxAnisotropy : 0;
|
||||
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
|
||||
bool is_vi;
|
||||
is_vi = (device->instance->physicalDevice.rad_info.chip_class >= VI);
|
||||
|
||||
if (!is_vi && max_aniso > 0) {
|
||||
radv_finishme("Anisotropic filtering must be disabled manually "
|
||||
"by the shader on SI-CI when BASE_LEVEL == LAST_LEVEL\n");
|
||||
max_aniso = max_aniso_ratio = 0;
|
||||
}
|
||||
|
||||
sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
|
||||
S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
|
||||
S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
|
||||
S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
|
||||
S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) |
|
||||
S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
|
||||
S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
|
||||
S_008F30_ANISO_BIAS(max_aniso_ratio) |
|
||||
S_008F30_DISABLE_CUBE_WRAP(0) |
|
||||
S_008F30_COMPAT_MODE(is_vi));
|
||||
sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
|
||||
S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)));
|
||||
S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
|
||||
S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
|
||||
sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
|
||||
S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
|
||||
S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
|
||||
|
@@ -144,6 +144,7 @@ radv_optimize_nir(struct nir_shader *shader)
|
||||
NIR_PASS(progress, shader, nir_opt_algebraic);
|
||||
NIR_PASS(progress, shader, nir_opt_constant_folding);
|
||||
NIR_PASS(progress, shader, nir_opt_undef);
|
||||
NIR_PASS(progress, shader, nir_opt_conditional_discard);
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
@@ -642,7 +643,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
|
||||
const VkGraphicsPipelineCreateInfo *pCreateInfo,
|
||||
uint32_t blend_enable,
|
||||
uint32_t blend_need_alpha,
|
||||
bool single_cb_enable)
|
||||
bool single_cb_enable,
|
||||
bool blend_mrt0_is_dual_src)
|
||||
{
|
||||
RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass);
|
||||
struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass;
|
||||
@@ -664,6 +666,8 @@ radv_pipeline_compute_spi_color_formats(struct radv_pipeline *pipeline,
|
||||
|
||||
blend->cb_shader_mask = si_get_cb_shader_mask(col_format);
|
||||
|
||||
if (blend_mrt0_is_dual_src)
|
||||
col_format |= (col_format & 0xf) << 4;
|
||||
if (!col_format)
|
||||
col_format |= V_028714_SPI_SHADER_32_R;
|
||||
blend->spi_shader_col_format = col_format;
|
||||
@@ -715,8 +719,13 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
|
||||
struct radv_blend_state *blend = &pipeline->graphics.blend;
|
||||
unsigned mode = V_028808_CB_NORMAL;
|
||||
uint32_t blend_enable = 0, blend_need_alpha = 0;
|
||||
bool blend_mrt0_is_dual_src = false;
|
||||
int i;
|
||||
bool single_cb_enable = false;
|
||||
|
||||
if (!vkblend)
|
||||
return;
|
||||
|
||||
if (extra && extra->custom_blend_mode) {
|
||||
single_cb_enable = true;
|
||||
mode = extra->custom_blend_mode;
|
||||
@@ -755,7 +764,9 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
|
||||
}
|
||||
|
||||
if (is_dual_src(srcRGB) || is_dual_src(dstRGB) || is_dual_src(srcA) || is_dual_src(dstA))
|
||||
radv_finishme("dual source blending");
|
||||
if (i == 0)
|
||||
blend_mrt0_is_dual_src = true;
|
||||
|
||||
if (eqRGB == VK_BLEND_OP_MIN || eqRGB == VK_BLEND_OP_MAX) {
|
||||
srcRGB = VK_BLEND_FACTOR_ONE;
|
||||
dstRGB = VK_BLEND_FACTOR_ONE;
|
||||
@@ -797,7 +808,7 @@ radv_pipeline_init_blend_state(struct radv_pipeline *pipeline,
|
||||
blend->cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE);
|
||||
|
||||
radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo,
|
||||
blend_enable, blend_need_alpha, single_cb_enable);
|
||||
blend_enable, blend_need_alpha, single_cb_enable, blend_mrt0_is_dual_src);
|
||||
}
|
||||
|
||||
static uint32_t si_translate_stencil_op(enum VkStencilOp op)
|
||||
@@ -1069,18 +1080,27 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
|
||||
|
||||
struct radv_dynamic_state *dynamic = &pipeline->dynamic_state;
|
||||
|
||||
dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
|
||||
if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
|
||||
typed_memcpy(dynamic->viewport.viewports,
|
||||
pCreateInfo->pViewportState->pViewports,
|
||||
pCreateInfo->pViewportState->viewportCount);
|
||||
}
|
||||
/* Section 9.2 of the Vulkan 1.0.15 spec says:
|
||||
*
|
||||
* pViewportState is [...] NULL if the pipeline
|
||||
* has rasterization disabled.
|
||||
*/
|
||||
if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable) {
|
||||
assert(pCreateInfo->pViewportState);
|
||||
|
||||
dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
|
||||
if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
|
||||
typed_memcpy(dynamic->scissor.scissors,
|
||||
pCreateInfo->pViewportState->pScissors,
|
||||
pCreateInfo->pViewportState->scissorCount);
|
||||
dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;
|
||||
if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) {
|
||||
typed_memcpy(dynamic->viewport.viewports,
|
||||
pCreateInfo->pViewportState->pViewports,
|
||||
pCreateInfo->pViewportState->viewportCount);
|
||||
}
|
||||
|
||||
dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;
|
||||
if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) {
|
||||
typed_memcpy(dynamic->scissor.scissors,
|
||||
pCreateInfo->pViewportState->pScissors,
|
||||
pCreateInfo->pViewportState->scissorCount);
|
||||
}
|
||||
}
|
||||
|
||||
if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) {
|
||||
@@ -1098,7 +1118,21 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
|
||||
pCreateInfo->pRasterizationState->depthBiasSlopeFactor;
|
||||
}
|
||||
|
||||
if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
|
||||
/* Section 9.2 of the Vulkan 1.0.15 spec says:
|
||||
*
|
||||
* pColorBlendState is [...] NULL if the pipeline has rasterization
|
||||
* disabled or if the subpass of the render pass the pipeline is
|
||||
* created against does not use any color attachments.
|
||||
*/
|
||||
bool uses_color_att = false;
|
||||
for (unsigned i = 0; i < subpass->color_count; ++i) {
|
||||
if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {
|
||||
uses_color_att = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (uses_color_att && states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) {
|
||||
assert(pCreateInfo->pColorBlendState);
|
||||
typed_memcpy(dynamic->blend_constants,
|
||||
pCreateInfo->pColorBlendState->blendConstants, 4);
|
||||
@@ -1110,14 +1144,17 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
|
||||
* no need to override the depthstencil defaults in
|
||||
* radv_pipeline::dynamic_state when there is no depthstencil attachment.
|
||||
*
|
||||
* From the Vulkan spec (20 Oct 2015, git-aa308cb):
|
||||
* Section 9.2 of the Vulkan 1.0.15 spec says:
|
||||
*
|
||||
* pDepthStencilState [...] may only be NULL if renderPass and subpass
|
||||
* specify a subpass that has no depth/stencil attachment.
|
||||
* pDepthStencilState is [...] NULL if the pipeline has rasterization
|
||||
* disabled or if the subpass of the render pass the pipeline is created
|
||||
* against does not use a depth/stencil attachment.
|
||||
*/
|
||||
if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
|
||||
if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&
|
||||
subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
|
||||
assert(pCreateInfo->pDepthStencilState);
|
||||
|
||||
if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) {
|
||||
assert(pCreateInfo->pDepthStencilState);
|
||||
dynamic->depth_bounds.min =
|
||||
pCreateInfo->pDepthStencilState->minDepthBounds;
|
||||
dynamic->depth_bounds.max =
|
||||
@@ -1125,7 +1162,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
|
||||
}
|
||||
|
||||
if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
|
||||
assert(pCreateInfo->pDepthStencilState);
|
||||
dynamic->stencil_compare_mask.front =
|
||||
pCreateInfo->pDepthStencilState->front.compareMask;
|
||||
dynamic->stencil_compare_mask.back =
|
||||
@@ -1133,7 +1169,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
|
||||
}
|
||||
|
||||
if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
|
||||
assert(pCreateInfo->pDepthStencilState);
|
||||
dynamic->stencil_write_mask.front =
|
||||
pCreateInfo->pDepthStencilState->front.writeMask;
|
||||
dynamic->stencil_write_mask.back =
|
||||
@@ -1141,7 +1176,6 @@ radv_pipeline_init_dynamic_state(struct radv_pipeline *pipeline,
|
||||
}
|
||||
|
||||
if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) {
|
||||
assert(pCreateInfo->pDepthStencilState);
|
||||
dynamic->stencil_reference.front =
|
||||
pCreateInfo->pDepthStencilState->front.reference;
|
||||
dynamic->stencil_reference.back =
|
||||
|
@@ -227,6 +227,7 @@ NIR_FILES = \
|
||||
nir/nir_metadata.c \
|
||||
nir/nir_move_vec_src_uses_to_dest.c \
|
||||
nir/nir_normalize_cubemap_coords.c \
|
||||
nir/nir_opt_conditional_discard.c \
|
||||
nir/nir_opt_constant_folding.c \
|
||||
nir/nir_opt_copy_propagate.c \
|
||||
nir/nir_opt_cse.c \
|
||||
|
@@ -181,7 +181,43 @@ private:
|
||||
};
|
||||
|
||||
|
||||
class array_resize_visitor : public ir_hierarchical_visitor {
|
||||
/**
|
||||
* A visitor helper that provides methods for updating the types of
|
||||
* ir_dereferences. Classes that update variable types (say, updating
|
||||
* array sizes) will want to use this so that dereference types stay in sync.
|
||||
*/
|
||||
class deref_type_updater : public ir_hierarchical_visitor {
|
||||
public:
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *ir)
|
||||
{
|
||||
ir->type = ir->var->type;
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
|
||||
{
|
||||
const glsl_type *const vt = ir->array->type;
|
||||
if (vt->is_array())
|
||||
ir->type = vt->fields.array;
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
virtual ir_visitor_status visit_leave(ir_dereference_record *ir)
|
||||
{
|
||||
for (unsigned i = 0; i < ir->record->type->length; i++) {
|
||||
const struct glsl_struct_field *field =
|
||||
&ir->record->type->fields.structure[i];
|
||||
if (strcmp(field->name, ir->field) == 0) {
|
||||
ir->type = field->type;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return visit_continue;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class array_resize_visitor : public deref_type_updater {
|
||||
public:
|
||||
unsigned num_vertices;
|
||||
gl_shader_program *prog;
|
||||
@@ -240,24 +276,6 @@ public:
|
||||
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
/* Dereferences of input variables need to be updated so that their type
|
||||
* matches the newly assigned type of the variable they are accessing. */
|
||||
virtual ir_visitor_status visit(ir_dereference_variable *ir)
|
||||
{
|
||||
ir->type = ir->var->type;
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
/* Dereferences of 2D input arrays need to be updated so that their type
|
||||
* matches the newly assigned type of the array they are accessing. */
|
||||
virtual ir_visitor_status visit_leave(ir_dereference_array *ir)
|
||||
{
|
||||
const glsl_type *const vt = ir->array->type;
|
||||
if (vt->is_array())
|
||||
ir->type = vt->fields.array;
|
||||
return visit_continue;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -1353,7 +1371,7 @@ move_non_declarations(exec_list *instructions, exec_node *last,
|
||||
* it inside that function leads to compiler warnings with some versions of
|
||||
* gcc.
|
||||
*/
|
||||
class array_sizing_visitor : public ir_hierarchical_visitor {
|
||||
class array_sizing_visitor : public deref_type_updater {
|
||||
public:
|
||||
array_sizing_visitor()
|
||||
: mem_ctx(ralloc_context(NULL)),
|
||||
@@ -2273,6 +2291,8 @@ update_array_sizes(struct gl_shader_program *prog)
|
||||
if (prog->_LinkedShaders[i] == NULL)
|
||||
continue;
|
||||
|
||||
bool types_were_updated = false;
|
||||
|
||||
foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) {
|
||||
ir_variable *const var = node->as_variable();
|
||||
|
||||
@@ -2328,11 +2348,15 @@ update_array_sizes(struct gl_shader_program *prog)
|
||||
|
||||
var->type = glsl_type::get_array_instance(var->type->fields.array,
|
||||
size + 1);
|
||||
/* FINISHME: We should update the types of array
|
||||
* dereferences of this variable now.
|
||||
*/
|
||||
types_were_updated = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update the types of dereferences in case we changed any. */
|
||||
if (types_were_updated) {
|
||||
deref_type_updater v;
|
||||
v.run(prog->_LinkedShaders[i]->ir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4785,14 +4809,6 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
"type of shader\n");
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (prog->_LinkedShaders[i] != NULL) {
|
||||
_mesa_delete_linked_shader(ctx, prog->_LinkedShaders[i]);
|
||||
}
|
||||
|
||||
prog->_LinkedShaders[i] = NULL;
|
||||
}
|
||||
|
||||
/* Link all shaders for a particular stage and validate the result.
|
||||
*/
|
||||
for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) {
|
||||
|
@@ -107,7 +107,6 @@ public:
|
||||
|
||||
struct gl_linked_shader *shader;
|
||||
bool clamp_block_indices;
|
||||
struct gl_uniform_buffer_variable *ubo_var;
|
||||
const struct glsl_struct_field *struct_field;
|
||||
ir_variable *variable;
|
||||
ir_rvalue *uniform_block;
|
||||
@@ -308,8 +307,11 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
|
||||
this->uniform_block = index;
|
||||
}
|
||||
|
||||
this->ubo_var = var->is_interface_instance()
|
||||
? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location];
|
||||
if (var->is_interface_instance()) {
|
||||
*const_offset = 0;
|
||||
} else {
|
||||
*const_offset = blocks[i]->Uniforms[var->data.location].Offset;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
@@ -317,8 +319,6 @@ lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx,
|
||||
|
||||
assert(this->uniform_block);
|
||||
|
||||
*const_offset = ubo_var->Offset;
|
||||
|
||||
this->struct_field = NULL;
|
||||
setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major,
|
||||
matrix_columns, &this->struct_field, packing);
|
||||
|
@@ -421,7 +421,7 @@ standalone_compile_shader(const struct standalone_options *_options,
|
||||
}
|
||||
|
||||
if ((status == EXIT_SUCCESS) && options->do_link) {
|
||||
_mesa_clear_shader_program_data(whole_program);
|
||||
_mesa_clear_shader_program_data(ctx, whole_program);
|
||||
|
||||
link_shaders(ctx, whole_program);
|
||||
status = (whole_program->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE;
|
||||
|
@@ -123,8 +123,16 @@ _mesa_delete_linked_shader(struct gl_context *ctx,
|
||||
}
|
||||
|
||||
void
|
||||
_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
||||
_mesa_clear_shader_program_data(struct gl_context *ctx,
|
||||
struct gl_shader_program *shProg)
|
||||
{
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
if (shProg->_LinkedShaders[i] != NULL) {
|
||||
_mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[i]);
|
||||
shProg->_LinkedShaders[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
shProg->NumUniformStorage = 0;
|
||||
shProg->UniformStorage = NULL;
|
||||
shProg->NumUniformRemapTable = 0;
|
||||
|
@@ -56,7 +56,8 @@ _mesa_delete_linked_shader(struct gl_context *ctx,
|
||||
struct gl_linked_shader *sh);
|
||||
|
||||
extern "C" void
|
||||
_mesa_clear_shader_program_data(struct gl_shader_program *);
|
||||
_mesa_clear_shader_program_data(struct gl_context *ctx,
|
||||
struct gl_shader_program *);
|
||||
|
||||
extern "C" void
|
||||
_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id,
|
||||
|
@@ -2625,6 +2625,8 @@ bool nir_opt_remove_phis(nir_shader *shader);
|
||||
|
||||
bool nir_opt_undef(nir_shader *shader);
|
||||
|
||||
bool nir_opt_conditional_discard(nir_shader *shader);
|
||||
|
||||
void nir_sweep(nir_shader *shader);
|
||||
|
||||
nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val);
|
||||
|
@@ -272,6 +272,26 @@ lower_interp_var_at_offset(lower_wpos_ytransform_state *state,
|
||||
flip_y)));
|
||||
}
|
||||
|
||||
static void
|
||||
lower_load_sample_pos(lower_wpos_ytransform_state *state,
|
||||
nir_intrinsic_instr *intr)
|
||||
{
|
||||
nir_builder *b = &state->b;
|
||||
b->cursor = nir_after_instr(&intr->instr);
|
||||
|
||||
nir_ssa_def *pos = &intr->dest.ssa;
|
||||
nir_ssa_def *scale = nir_channel(b, get_transform(state), 0);
|
||||
nir_ssa_def *neg_scale = nir_channel(b, get_transform(state), 2);
|
||||
/* Either y or 1-y for scale equal to 1 or -1 respectively. */
|
||||
nir_ssa_def *flipped_y =
|
||||
nir_fadd(b, nir_fmax(b, neg_scale, nir_imm_float(b, 0.0)),
|
||||
nir_fmul(b, nir_channel(b, pos, 1), scale));
|
||||
nir_ssa_def *flipped_pos = nir_vec2(b, nir_channel(b, pos, 0), flipped_y);
|
||||
|
||||
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, nir_src_for_ssa(flipped_pos),
|
||||
flipped_pos->parent_instr);
|
||||
}
|
||||
|
||||
static void
|
||||
lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block)
|
||||
{
|
||||
@@ -287,6 +307,10 @@ lower_wpos_ytransform_block(lower_wpos_ytransform_state *state, nir_block *block
|
||||
/* gl_FragCoord should not have array/struct deref's: */
|
||||
assert(dvar->deref.child == NULL);
|
||||
lower_fragcoord(state, intr);
|
||||
} else if (var->data.mode == nir_var_system_value &&
|
||||
var->data.location == SYSTEM_VALUE_SAMPLE_POS) {
|
||||
assert(dvar->deref.child == NULL);
|
||||
lower_load_sample_pos(state, intr);
|
||||
}
|
||||
} else if (intr->intrinsic == nir_intrinsic_interp_var_at_offset) {
|
||||
lower_interp_var_at_offset(state, intr);
|
||||
|
125
src/compiler/nir/nir_opt_conditional_discard.c
Normal file
125
src/compiler/nir/nir_opt_conditional_discard.c
Normal file
@@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Copyright © 2016 Red Hat
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
/** @file nir_opt_conditional_discard.c
|
||||
*
|
||||
* Handles optimization of lowering if (cond) discard to discard_if(cond).
|
||||
*/
|
||||
|
||||
static bool
|
||||
nir_opt_conditional_discard_block(nir_block *block, void *mem_ctx)
|
||||
{
|
||||
nir_builder bld;
|
||||
|
||||
if (nir_cf_node_is_first(&block->cf_node))
|
||||
return false;
|
||||
|
||||
nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node);
|
||||
if (prev_node->type != nir_cf_node_if)
|
||||
return false;
|
||||
|
||||
nir_if *if_stmt = nir_cf_node_as_if(prev_node);
|
||||
nir_block *then_block = nir_if_first_then_block(if_stmt);
|
||||
nir_block *else_block = nir_if_first_else_block(if_stmt);
|
||||
|
||||
/* check there is only one else block and it is empty */
|
||||
if (nir_if_last_else_block(if_stmt) != else_block)
|
||||
return false;
|
||||
if (!exec_list_is_empty(&else_block->instr_list))
|
||||
return false;
|
||||
|
||||
/* check there is only one then block and it has only one instruction in it */
|
||||
if (nir_if_last_then_block(if_stmt) != then_block)
|
||||
return false;
|
||||
if (exec_list_is_empty(&then_block->instr_list))
|
||||
return false;
|
||||
if (exec_list_length(&then_block->instr_list) > 1)
|
||||
return false;
|
||||
/*
|
||||
* make sure no subsequent phi nodes point at this if.
|
||||
*/
|
||||
nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node));
|
||||
nir_foreach_instr_safe(instr, after) {
|
||||
if (instr->type != nir_instr_type_phi)
|
||||
break;
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
|
||||
nir_foreach_phi_src(phi_src, phi) {
|
||||
if (phi_src->pred == then_block ||
|
||||
phi_src->pred == else_block)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the first instruction in the then block and confirm it is
|
||||
* a discard or a discard_if
|
||||
*/
|
||||
nir_instr *instr = nir_block_first_instr(then_block);
|
||||
if (instr->type != nir_instr_type_intrinsic)
|
||||
return false;
|
||||
|
||||
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
|
||||
if (intrin->intrinsic != nir_intrinsic_discard &&
|
||||
intrin->intrinsic != nir_intrinsic_discard_if)
|
||||
return false;
|
||||
|
||||
nir_src cond;
|
||||
|
||||
nir_builder_init(&bld, mem_ctx);
|
||||
bld.cursor = nir_before_cf_node(prev_node);
|
||||
if (intrin->intrinsic == nir_intrinsic_discard)
|
||||
cond = if_stmt->condition;
|
||||
else
|
||||
cond = nir_src_for_ssa(nir_iand(&bld,
|
||||
nir_ssa_for_src(&bld, if_stmt->condition, 1),
|
||||
nir_ssa_for_src(&bld, intrin->src[0], 1)));
|
||||
|
||||
nir_intrinsic_instr *discard_if =
|
||||
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_discard_if);
|
||||
nir_src_copy(&discard_if->src[0], &cond, discard_if);
|
||||
|
||||
nir_instr_insert_before_cf(prev_node, &discard_if->instr);
|
||||
nir_instr_remove(&intrin->instr);
|
||||
nir_cf_node_remove(&if_stmt->cf_node);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
nir_opt_conditional_discard(nir_shader *shader)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
nir_foreach_function(function, shader) {
|
||||
if (function->impl) {
|
||||
void *mem_ctx = ralloc_parent(function->impl);
|
||||
nir_foreach_block_safe(block, function->impl) {
|
||||
progress |= nir_opt_conditional_discard_block(block, mem_ctx);
|
||||
}
|
||||
}
|
||||
}
|
||||
return progress;
|
||||
}
|
@@ -80,8 +80,6 @@
|
||||
#include "eglimage.h"
|
||||
#include "eglsync.h"
|
||||
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
|
||||
struct wl_buffer;
|
||||
|
||||
struct dri2_egl_driver
|
||||
|
@@ -2384,7 +2384,7 @@ _eglLockDisplayInterop(EGLDisplay dpy, EGLContext context,
|
||||
return MESA_GLINTEROP_SUCCESS;
|
||||
}
|
||||
|
||||
int
|
||||
PUBLIC int
|
||||
MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
|
||||
struct mesa_glinterop_device_info *out)
|
||||
{
|
||||
@@ -2406,7 +2406,7 @@ MesaGLInteropEGLQueryDeviceInfo(EGLDisplay dpy, EGLContext context,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
PUBLIC int
|
||||
MesaGLInteropEGLExportObject(EGLDisplay dpy, EGLContext context,
|
||||
struct mesa_glinterop_export_in *in,
|
||||
struct mesa_glinterop_export_out *out)
|
||||
|
@@ -34,6 +34,8 @@
|
||||
#ifndef EGLDEFINES_INCLUDED
|
||||
#define EGLDEFINES_INCLUDED
|
||||
|
||||
#include "util/macros.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@@ -48,9 +50,6 @@ extern "C" {
|
||||
|
||||
#define _EGL_VENDOR_STRING "Mesa Project"
|
||||
|
||||
#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
|
||||
#define MIN2(A, B) (((A) < (B)) ? (A) : (B))
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@@ -36,6 +36,7 @@
|
||||
#include "hud/hud_private.h"
|
||||
#include "util/list.h"
|
||||
#include "os/os_time.h"
|
||||
#include "os/os_thread.h"
|
||||
#include "util/u_memory.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
@@ -61,6 +62,7 @@ struct cpufreq_info
|
||||
|
||||
static int gcpufreq_count = 0;
|
||||
static struct list_head gcpufreq_list;
|
||||
pipe_static_mutex(gcpufreq_mutex);
|
||||
|
||||
static struct cpufreq_info *
|
||||
find_cfi_by_index(int cpu_index, int mode)
|
||||
@@ -112,14 +114,6 @@ query_cfi_load(struct hud_graph *gr)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
free_query_data(void *p)
|
||||
{
|
||||
struct cpufreq_info *cfi = (struct cpufreq_info *)p;
|
||||
list_del(&cfi->list);
|
||||
FREE(cfi);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and initialize a new object for a specific CPU.
|
||||
* \param pane parent context.
|
||||
@@ -162,11 +156,6 @@ hud_cpufreq_graph_install(struct hud_pane *pane, int cpu_index,
|
||||
gr->query_data = cfi;
|
||||
gr->query_new_value = query_cfi_load;
|
||||
|
||||
/* Don't use free() as our callback as that messes up Gallium's
|
||||
* memory debugger. Use simple free_query_data() wrapper.
|
||||
*/
|
||||
gr->free_query_data = free_query_data;
|
||||
|
||||
hud_pane_add_graph(pane, gr);
|
||||
hud_pane_set_max_value(pane, 3000000 /* 3 GHz */);
|
||||
}
|
||||
@@ -199,16 +188,21 @@ hud_get_num_cpufreq(bool displayhelp)
|
||||
int cpu_index;
|
||||
|
||||
/* Return the number of CPU metrics we support. */
|
||||
if (gcpufreq_count)
|
||||
pipe_mutex_lock(gcpufreq_mutex);
|
||||
if (gcpufreq_count) {
|
||||
pipe_mutex_unlock(gcpufreq_mutex);
|
||||
return gcpufreq_count;
|
||||
}
|
||||
|
||||
/* Scan /sys/devices.../cpu, for every object type we support, create
|
||||
* and persist an object to represent its different metrics.
|
||||
*/
|
||||
list_inithead(&gcpufreq_list);
|
||||
DIR *dir = opendir("/sys/devices/system/cpu");
|
||||
if (!dir)
|
||||
if (!dir) {
|
||||
pipe_mutex_unlock(gcpufreq_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((dp = readdir(dir)) != NULL) {
|
||||
|
||||
@@ -238,6 +232,7 @@ hud_get_num_cpufreq(bool displayhelp)
|
||||
snprintf(fn, sizeof(fn), "%s/cpufreq/scaling_max_freq", basename);
|
||||
add_object(dp->d_name, fn, CPUFREQ_MAXIMUM, cpu_index);
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
if (displayhelp) {
|
||||
list_for_each_entry(struct cpufreq_info, cfi, &gcpufreq_list, list) {
|
||||
@@ -251,6 +246,7 @@ hud_get_num_cpufreq(bool displayhelp)
|
||||
}
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(gcpufreq_mutex);
|
||||
return gcpufreq_count;
|
||||
}
|
||||
|
||||
|
@@ -35,6 +35,7 @@
|
||||
#include "hud/hud_private.h"
|
||||
#include "util/list.h"
|
||||
#include "os/os_time.h"
|
||||
#include "os/os_thread.h"
|
||||
#include "util/u_memory.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
@@ -81,6 +82,7 @@ struct diskstat_info
|
||||
*/
|
||||
static int gdiskstat_count = 0;
|
||||
static struct list_head gdiskstat_list;
|
||||
pipe_static_mutex(gdiskstat_mutex);
|
||||
|
||||
static struct diskstat_info *
|
||||
find_dsi_by_name(const char *n, int mode)
|
||||
@@ -162,14 +164,6 @@ query_dsi_load(struct hud_graph *gr)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
free_query_data(void *p)
|
||||
{
|
||||
struct diskstat_info *nic = (struct diskstat_info *) p;
|
||||
list_del(&nic->list);
|
||||
FREE(nic);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and initialize a new object for a specific block I/O device.
|
||||
* \param pane parent context.
|
||||
@@ -208,11 +202,6 @@ hud_diskstat_graph_install(struct hud_pane *pane, const char *dev_name,
|
||||
gr->query_data = dsi;
|
||||
gr->query_new_value = query_dsi_load;
|
||||
|
||||
/* Don't use free() as our callback as that messes up Gallium's
|
||||
* memory debugger. Use simple free_query_data() wrapper.
|
||||
*/
|
||||
gr->free_query_data = free_query_data;
|
||||
|
||||
hud_pane_add_graph(pane, gr);
|
||||
hud_pane_set_max_value(pane, 100);
|
||||
}
|
||||
@@ -257,16 +246,21 @@ hud_get_num_disks(bool displayhelp)
|
||||
char name[64];
|
||||
|
||||
/* Return the number of block devices and partitions. */
|
||||
if (gdiskstat_count)
|
||||
pipe_mutex_lock(gdiskstat_mutex);
|
||||
if (gdiskstat_count) {
|
||||
pipe_mutex_unlock(gdiskstat_mutex);
|
||||
return gdiskstat_count;
|
||||
}
|
||||
|
||||
/* Scan /sys/block, for every object type we support, create and
|
||||
* persist an object to represent its different statistics.
|
||||
*/
|
||||
list_inithead(&gdiskstat_list);
|
||||
DIR *dir = opendir("/sys/block/");
|
||||
if (!dir)
|
||||
if (!dir) {
|
||||
pipe_mutex_unlock(gdiskstat_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((dp = readdir(dir)) != NULL) {
|
||||
|
||||
@@ -290,8 +284,11 @@ hud_get_num_disks(bool displayhelp)
|
||||
/* Add any partitions */
|
||||
struct dirent *dpart;
|
||||
DIR *pdir = opendir(basename);
|
||||
if (!pdir)
|
||||
if (!pdir) {
|
||||
pipe_mutex_unlock(gdiskstat_mutex);
|
||||
closedir(dir);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((dpart = readdir(pdir)) != NULL) {
|
||||
/* Avoid 'lo' and '..' and '.' */
|
||||
@@ -311,6 +308,7 @@ hud_get_num_disks(bool displayhelp)
|
||||
add_object_part(basename, dpart->d_name, DISKSTAT_WR);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
if (displayhelp) {
|
||||
list_for_each_entry(struct diskstat_info, dsi, &gdiskstat_list, list) {
|
||||
@@ -322,6 +320,7 @@ hud_get_num_disks(bool displayhelp)
|
||||
puts(line);
|
||||
}
|
||||
}
|
||||
pipe_mutex_unlock(gdiskstat_mutex);
|
||||
|
||||
return gdiskstat_count;
|
||||
}
|
||||
|
@@ -35,6 +35,7 @@
|
||||
#include "hud/hud_private.h"
|
||||
#include "util/list.h"
|
||||
#include "os/os_time.h"
|
||||
#include "os/os_thread.h"
|
||||
#include "util/u_memory.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
@@ -66,6 +67,7 @@ struct nic_info
|
||||
*/
|
||||
static int gnic_count = 0;
|
||||
static struct list_head gnic_list;
|
||||
pipe_static_mutex(gnic_mutex);
|
||||
|
||||
static struct nic_info *
|
||||
find_nic_by_name(const char *n, int mode)
|
||||
@@ -234,14 +236,6 @@ query_nic_load(struct hud_graph *gr)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
free_query_data(void *p)
|
||||
{
|
||||
struct nic_info *nic = (struct nic_info *) p;
|
||||
list_del(&nic->list);
|
||||
FREE(nic);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and initialize a new object for a specific network interface dev.
|
||||
* \param pane parent context.
|
||||
@@ -284,11 +278,6 @@ hud_nic_graph_install(struct hud_pane *pane, const char *nic_name,
|
||||
gr->query_data = nic;
|
||||
gr->query_new_value = query_nic_load;
|
||||
|
||||
/* Don't use free() as our callback as that messes up Gallium's
|
||||
* memory debugger. Use simple free_query_data() wrapper.
|
||||
*/
|
||||
gr->free_query_data = free_query_data;
|
||||
|
||||
hud_pane_add_graph(pane, gr);
|
||||
hud_pane_set_max_value(pane, 100);
|
||||
}
|
||||
@@ -342,16 +331,21 @@ hud_get_num_nics(bool displayhelp)
|
||||
char name[64];
|
||||
|
||||
/* Return the number if network interfaces. */
|
||||
if (gnic_count)
|
||||
pipe_mutex_lock(gnic_mutex);
|
||||
if (gnic_count) {
|
||||
pipe_mutex_unlock(gnic_mutex);
|
||||
return gnic_count;
|
||||
}
|
||||
|
||||
/* Scan /sys/block, for every object type we support, create and
|
||||
* persist an object to represent its different statistics.
|
||||
*/
|
||||
list_inithead(&gnic_list);
|
||||
DIR *dir = opendir("/sys/class/net/");
|
||||
if (!dir)
|
||||
if (!dir) {
|
||||
pipe_mutex_unlock(gnic_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((dp = readdir(dir)) != NULL) {
|
||||
|
||||
@@ -412,6 +406,7 @@ hud_get_num_nics(bool displayhelp)
|
||||
}
|
||||
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
list_for_each_entry(struct nic_info, nic, &gnic_list, list) {
|
||||
char line[64];
|
||||
@@ -424,6 +419,7 @@ hud_get_num_nics(bool displayhelp)
|
||||
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(gnic_mutex);
|
||||
return gnic_count;
|
||||
}
|
||||
|
||||
|
@@ -32,6 +32,7 @@
|
||||
#include "hud/hud_private.h"
|
||||
#include "util/list.h"
|
||||
#include "os/os_time.h"
|
||||
#include "os/os_thread.h"
|
||||
#include "util/u_memory.h"
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
@@ -49,6 +50,7 @@
|
||||
*/
|
||||
static int gsensors_temp_count = 0;
|
||||
static struct list_head gsensors_temp_list;
|
||||
pipe_static_mutex(gsensor_temp_mutex);
|
||||
|
||||
struct sensors_temp_info
|
||||
{
|
||||
@@ -189,17 +191,6 @@ query_sti_load(struct hud_graph *gr)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
free_query_data(void *p)
|
||||
{
|
||||
struct sensors_temp_info *sti = (struct sensors_temp_info *) p;
|
||||
list_del(&sti->list);
|
||||
if (sti->chip)
|
||||
sensors_free_chip_name(sti->chip);
|
||||
FREE(sti);
|
||||
sensors_cleanup();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create and initialize a new object for a specific sensor interface dev.
|
||||
* \param pane parent context.
|
||||
@@ -237,11 +228,6 @@ hud_sensors_temp_graph_install(struct hud_pane *pane, const char *dev_name,
|
||||
gr->query_data = sti;
|
||||
gr->query_new_value = query_sti_load;
|
||||
|
||||
/* Don't use free() as our callback as that messes up Gallium's
|
||||
* memory debugger. Use simple free_query_data() wrapper.
|
||||
*/
|
||||
gr->free_query_data = free_query_data;
|
||||
|
||||
hud_pane_add_graph(pane, gr);
|
||||
switch (sti->mode) {
|
||||
case SENSORS_TEMP_CURRENT:
|
||||
@@ -338,12 +324,17 @@ int
|
||||
hud_get_num_sensors(bool displayhelp)
|
||||
{
|
||||
/* Return the number of sensors detected. */
|
||||
if (gsensors_temp_count)
|
||||
pipe_mutex_lock(gsensor_temp_mutex);
|
||||
if (gsensors_temp_count) {
|
||||
pipe_mutex_unlock(gsensor_temp_mutex);
|
||||
return gsensors_temp_count;
|
||||
}
|
||||
|
||||
int ret = sensors_init(NULL);
|
||||
if (ret)
|
||||
if (ret) {
|
||||
pipe_mutex_unlock(gsensor_temp_mutex);
|
||||
return 0;
|
||||
}
|
||||
|
||||
list_inithead(&gsensors_temp_list);
|
||||
|
||||
@@ -377,6 +368,7 @@ hud_get_num_sensors(bool displayhelp)
|
||||
}
|
||||
}
|
||||
|
||||
pipe_mutex_unlock(gsensor_temp_mutex);
|
||||
return gsensors_temp_count;
|
||||
}
|
||||
|
||||
|
@@ -487,7 +487,9 @@ si_decompress_sampler_color_textures(struct si_context *sctx,
|
||||
assert(view);
|
||||
|
||||
tex = (struct r600_texture *)view->texture;
|
||||
assert(tex->cmask.size || tex->fmask.size || tex->dcc_offset);
|
||||
/* CMASK or DCC can be discarded and we can still end up here. */
|
||||
if (!tex->cmask.size && !tex->fmask.size && !tex->dcc_offset)
|
||||
continue;
|
||||
|
||||
si_blit_decompress_color(&sctx->b.b, tex,
|
||||
view->u.tex.first_level, view->u.tex.last_level,
|
||||
|
@@ -459,6 +459,8 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef bfi_args[3];
|
||||
LLVMValueRef bfi_sm5;
|
||||
LLVMValueRef cond;
|
||||
|
||||
// Calculate the bitmask: (((1 << src3) - 1) << src2
|
||||
bfi_args[0] = LLVMBuildShl(builder,
|
||||
@@ -478,11 +480,40 @@ static void emit_bfi(const struct lp_build_tgsi_action *action,
|
||||
* (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
|
||||
* Use the right-hand side, which the LLVM backend can convert to V_BFI.
|
||||
*/
|
||||
emit_data->output[emit_data->chan] =
|
||||
bfi_sm5 =
|
||||
LLVMBuildXor(builder, bfi_args[2],
|
||||
LLVMBuildAnd(builder, bfi_args[0],
|
||||
LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
|
||||
""), ""), "");
|
||||
|
||||
/* Since shifts of >= 32 bits are undefined in LLVM IR, the backend
|
||||
* uses the convenient V_BFI lowering for the above, which follows SM5
|
||||
* and disagrees with GLSL semantics when bits (src3) is 32.
|
||||
*/
|
||||
cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[3],
|
||||
lp_build_const_int32(gallivm, 32), "");
|
||||
emit_data->output[emit_data->chan] =
|
||||
LLVMBuildSelect(builder, cond, emit_data->args[1], bfi_sm5, "");
|
||||
}
|
||||
|
||||
static void emit_bfe(const struct lp_build_tgsi_action *action,
|
||||
struct lp_build_tgsi_context *bld_base,
|
||||
struct lp_build_emit_data *emit_data)
|
||||
{
|
||||
struct gallivm_state *gallivm = bld_base->base.gallivm;
|
||||
LLVMBuilderRef builder = gallivm->builder;
|
||||
LLVMValueRef bfe_sm5;
|
||||
LLVMValueRef cond;
|
||||
|
||||
bfe_sm5 = lp_build_intrinsic(builder, action->intr_name,
|
||||
emit_data->dst_type, emit_data->args,
|
||||
emit_data->arg_count, LLVMReadNoneAttribute);
|
||||
|
||||
/* Correct for GLSL semantics. */
|
||||
cond = LLVMBuildICmp(builder, LLVMIntUGE, emit_data->args[2],
|
||||
lp_build_const_int32(gallivm, 32), "");
|
||||
emit_data->output[emit_data->chan] =
|
||||
LLVMBuildSelect(builder, cond, emit_data->args[0], bfe_sm5, "");
|
||||
}
|
||||
|
||||
/* this is ffs in C */
|
||||
@@ -783,7 +814,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
|
||||
bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
|
||||
bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
|
||||
bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_IBFE].emit = emit_bfe;
|
||||
bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
|
||||
bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
|
||||
bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
|
||||
@@ -835,7 +866,7 @@ void si_shader_context_init_alu(struct lp_build_tgsi_context *bld_base)
|
||||
bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
|
||||
bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
|
||||
bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
|
||||
bld_base->op_actions[TGSI_OPCODE_UBFE].emit = emit_bfe;
|
||||
bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
|
||||
bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
|
||||
bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
|
||||
|
@@ -1370,7 +1370,7 @@ emit_vert_end(struct vc4_compile *c,
|
||||
struct vc4_varying_slot *fs_inputs,
|
||||
uint32_t num_fs_inputs)
|
||||
{
|
||||
struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]);
|
||||
struct qreg rcp_w = ntq_rcp(c, c->outputs[c->output_position_index + 3]);
|
||||
|
||||
emit_stub_vpm_read(c);
|
||||
|
||||
|
@@ -82,7 +82,7 @@ vlVdpOutputSurfaceCreate(VdpDevice device,
|
||||
res_tmpl.depth0 = 1;
|
||||
res_tmpl.array_size = 1;
|
||||
res_tmpl.bind = PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_RENDER_TARGET |
|
||||
PIPE_BIND_LINEAR | PIPE_BIND_SHARED;
|
||||
PIPE_BIND_SHARED;
|
||||
res_tmpl.usage = PIPE_USAGE_DEFAULT;
|
||||
|
||||
pipe_mutex_lock(dev->mutex);
|
||||
|
@@ -17,16 +17,19 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
|
||||
#define __ATTRIB(field) \
|
||||
[DI_##field] = "glX"#field
|
||||
|
||||
__ATTRIB(BindSwapBarrierSGIX),
|
||||
__ATTRIB(BindTexImageEXT),
|
||||
// glXChooseFBConfig implemented by libglvnd
|
||||
__ATTRIB(ChooseFBConfigSGIX),
|
||||
// glXChooseVisual implemented by libglvnd
|
||||
// glXCopyContext implemented by libglvnd
|
||||
__ATTRIB(CopySubBufferMESA),
|
||||
// glXCreateContext implemented by libglvnd
|
||||
__ATTRIB(CreateContextAttribsARB),
|
||||
__ATTRIB(CreateContextWithConfigSGIX),
|
||||
__ATTRIB(CreateGLXPbufferSGIX),
|
||||
// glXCreateGLXPixmap implemented by libglvnd
|
||||
__ATTRIB(CreateGLXPixmapMESA),
|
||||
__ATTRIB(CreateGLXPixmapWithConfigSGIX),
|
||||
// glXCreateNewContext implemented by libglvnd
|
||||
// glXCreatePbuffer implemented by libglvnd
|
||||
@@ -51,54 +54,50 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
|
||||
__ATTRIB(GetFBConfigAttribSGIX),
|
||||
__ATTRIB(GetFBConfigFromVisualSGIX),
|
||||
// glXGetFBConfigs implemented by libglvnd
|
||||
__ATTRIB(GetMscRateOML),
|
||||
// glXGetProcAddress implemented by libglvnd
|
||||
// glXGetProcAddressARB implemented by libglvnd
|
||||
__ATTRIB(GetScreenDriver),
|
||||
// glXGetSelectedEvent implemented by libglvnd
|
||||
__ATTRIB(GetSelectedEventSGIX),
|
||||
__ATTRIB(GetSwapIntervalMESA),
|
||||
__ATTRIB(GetSyncValuesOML),
|
||||
__ATTRIB(GetVideoSyncSGI),
|
||||
// glXGetVisualFromFBConfig implemented by libglvnd
|
||||
__ATTRIB(GetVisualFromFBConfigSGIX),
|
||||
// glXImportContextEXT implemented by libglvnd
|
||||
// glXIsDirect implemented by libglvnd
|
||||
__ATTRIB(JoinSwapGroupSGIX),
|
||||
// glXMakeContextCurrent implemented by libglvnd
|
||||
// glXMakeCurrent implemented by libglvnd
|
||||
// glXQueryContext implemented by libglvnd
|
||||
__ATTRIB(QueryContextInfoEXT),
|
||||
__ATTRIB(QueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(QueryCurrentRendererStringMESA),
|
||||
// glXQueryDrawable implemented by libglvnd
|
||||
// glXQueryExtension implemented by libglvnd
|
||||
// glXQueryExtensionsString implemented by libglvnd
|
||||
__ATTRIB(QueryGLXPbufferSGIX),
|
||||
__ATTRIB(QueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(QueryRendererIntegerMESA),
|
||||
__ATTRIB(QueryRendererStringMESA),
|
||||
// glXQueryServerString implemented by libglvnd
|
||||
// glXQueryVersion implemented by libglvnd
|
||||
__ATTRIB(ReleaseBuffersMESA),
|
||||
__ATTRIB(ReleaseTexImageEXT),
|
||||
// glXSelectEvent implemented by libglvnd
|
||||
__ATTRIB(SelectEventSGIX),
|
||||
// glXSwapBuffers implemented by libglvnd
|
||||
__ATTRIB(SwapBuffersMscOML),
|
||||
__ATTRIB(SwapIntervalMESA),
|
||||
__ATTRIB(SwapIntervalSGI),
|
||||
// glXUseXFont implemented by libglvnd
|
||||
__ATTRIB(WaitForMscOML),
|
||||
__ATTRIB(WaitForSbcOML),
|
||||
// glXWaitGL implemented by libglvnd
|
||||
__ATTRIB(WaitVideoSyncSGI),
|
||||
// glXWaitX implemented by libglvnd
|
||||
|
||||
__ATTRIB(glXBindSwapBarrierSGIX),
|
||||
__ATTRIB(glXCopySubBufferMESA),
|
||||
__ATTRIB(glXCreateGLXPixmapMESA),
|
||||
__ATTRIB(glXGetMscRateOML),
|
||||
__ATTRIB(glXGetScreenDriver),
|
||||
__ATTRIB(glXGetSwapIntervalMESA),
|
||||
__ATTRIB(glXGetSyncValuesOML),
|
||||
__ATTRIB(glXJoinSwapGroupSGIX),
|
||||
__ATTRIB(glXQueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryCurrentRendererStringMESA),
|
||||
__ATTRIB(glXQueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(glXQueryRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryRendererStringMESA),
|
||||
__ATTRIB(glXReleaseBuffersMESA),
|
||||
__ATTRIB(glXSwapBuffersMscOML),
|
||||
__ATTRIB(glXSwapIntervalMESA),
|
||||
__ATTRIB(glXWaitForMscOML),
|
||||
__ATTRIB(glXWaitForSbcOML),
|
||||
|
||||
#undef __ATTRIB
|
||||
};
|
||||
|
||||
@@ -557,49 +556,49 @@ static int dispatch_WaitVideoSyncSGI(int divisor, int remainder,
|
||||
|
||||
|
||||
|
||||
static void dispatch_glXBindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable,
|
||||
static void dispatch_BindSwapBarrierSGIX(Display *dpy, GLXDrawable drawable,
|
||||
int barrier)
|
||||
{
|
||||
PFNGLXBINDSWAPBARRIERSGIXPROC pglXBindSwapBarrierSGIX;
|
||||
PFNGLXBINDSWAPBARRIERSGIXPROC pBindSwapBarrierSGIX;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXBindSwapBarrierSGIX);
|
||||
if (pglXBindSwapBarrierSGIX == NULL)
|
||||
__FETCH_FUNCTION_PTR(BindSwapBarrierSGIX);
|
||||
if (pBindSwapBarrierSGIX == NULL)
|
||||
return;
|
||||
|
||||
(*pglXBindSwapBarrierSGIX)(dpy, drawable, barrier);
|
||||
(*pBindSwapBarrierSGIX)(dpy, drawable, barrier);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void dispatch_glXCopySubBufferMESA(Display *dpy, GLXDrawable drawable,
|
||||
static void dispatch_CopySubBufferMESA(Display *dpy, GLXDrawable drawable,
|
||||
int x, int y, int width, int height)
|
||||
{
|
||||
PFNGLXCOPYSUBBUFFERMESAPROC pglXCopySubBufferMESA;
|
||||
PFNGLXCOPYSUBBUFFERMESAPROC pCopySubBufferMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXCopySubBufferMESA);
|
||||
if (pglXCopySubBufferMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(CopySubBufferMESA);
|
||||
if (pCopySubBufferMESA == NULL)
|
||||
return;
|
||||
|
||||
(*pglXCopySubBufferMESA)(dpy, drawable, x, y, width, height);
|
||||
(*pCopySubBufferMESA)(dpy, drawable, x, y, width, height);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy,
|
||||
static GLXPixmap dispatch_CreateGLXPixmapMESA(Display *dpy,
|
||||
XVisualInfo *visinfo,
|
||||
Pixmap pixmap, Colormap cmap)
|
||||
{
|
||||
PFNGLXCREATEGLXPIXMAPMESAPROC pglXCreateGLXPixmapMESA;
|
||||
PFNGLXCREATEGLXPIXMAPMESAPROC pCreateGLXPixmapMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
GLXPixmap ret;
|
||||
|
||||
@@ -607,11 +606,11 @@ static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy,
|
||||
if (dd == NULL)
|
||||
return None;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXCreateGLXPixmapMESA);
|
||||
if (pglXCreateGLXPixmapMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(CreateGLXPixmapMESA);
|
||||
if (pCreateGLXPixmapMESA == NULL)
|
||||
return None;
|
||||
|
||||
ret = (*pglXCreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap);
|
||||
ret = (*pCreateGLXPixmapMESA)(dpy, visinfo, pixmap, cmap);
|
||||
if (AddDrawableMapping(dpy, ret, dd)) {
|
||||
/* XXX: Call glXDestroyGLXPixmap which lives in libglvnd. If we're not
|
||||
* allowed to call it from here, should we extend __glXDispatchTableIndices ?
|
||||
@@ -624,47 +623,47 @@ static GLXPixmap dispatch_glXCreateGLXPixmapMESA(Display *dpy,
|
||||
|
||||
|
||||
|
||||
static GLboolean dispatch_glXGetMscRateOML(Display *dpy, GLXDrawable drawable,
|
||||
static GLboolean dispatch_GetMscRateOML(Display *dpy, GLXDrawable drawable,
|
||||
int32_t *numerator, int32_t *denominator)
|
||||
{
|
||||
PFNGLXGETMSCRATEOMLPROC pglXGetMscRateOML;
|
||||
PFNGLXGETMSCRATEOMLPROC pGetMscRateOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return GL_FALSE;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetMscRateOML);
|
||||
if (pglXGetMscRateOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetMscRateOML);
|
||||
if (pGetMscRateOML == NULL)
|
||||
return GL_FALSE;
|
||||
|
||||
return (*pglXGetMscRateOML)(dpy, drawable, numerator, denominator);
|
||||
return (*pGetMscRateOML)(dpy, drawable, numerator, denominator);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const char *dispatch_glXGetScreenDriver(Display *dpy, int scrNum)
|
||||
static const char *dispatch_GetScreenDriver(Display *dpy, int scrNum)
|
||||
{
|
||||
typedef const char *(*fn_glXGetScreenDriver_ptr)(Display *dpy, int scrNum);
|
||||
fn_glXGetScreenDriver_ptr pglXGetScreenDriver;
|
||||
fn_glXGetScreenDriver_ptr pGetScreenDriver;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, scrNum);
|
||||
if (dd == NULL)
|
||||
return NULL;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetScreenDriver);
|
||||
if (pglXGetScreenDriver == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetScreenDriver);
|
||||
if (pGetScreenDriver == NULL)
|
||||
return NULL;
|
||||
|
||||
return (*pglXGetScreenDriver)(dpy, scrNum);
|
||||
return (*pGetScreenDriver)(dpy, scrNum);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int dispatch_glXGetSwapIntervalMESA(void)
|
||||
static int dispatch_GetSwapIntervalMESA(void)
|
||||
{
|
||||
PFNGLXGETSWAPINTERVALMESAPROC pglXGetSwapIntervalMESA;
|
||||
PFNGLXGETSWAPINTERVALMESAPROC pGetSwapIntervalMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -674,57 +673,57 @@ static int dispatch_glXGetSwapIntervalMESA(void)
|
||||
if (dd == NULL)
|
||||
return 0;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetSwapIntervalMESA);
|
||||
if (pglXGetSwapIntervalMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetSwapIntervalMESA);
|
||||
if (pGetSwapIntervalMESA == NULL)
|
||||
return 0;
|
||||
|
||||
return (*pglXGetSwapIntervalMESA)();
|
||||
return (*pGetSwapIntervalMESA)();
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXGetSyncValuesOML(Display *dpy, GLXDrawable drawable,
|
||||
static Bool dispatch_GetSyncValuesOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t *ust, int64_t *msc, int64_t *sbc)
|
||||
{
|
||||
PFNGLXGETSYNCVALUESOMLPROC pglXGetSyncValuesOML;
|
||||
PFNGLXGETSYNCVALUESOMLPROC pGetSyncValuesOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXGetSyncValuesOML);
|
||||
if (pglXGetSyncValuesOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(GetSyncValuesOML);
|
||||
if (pGetSyncValuesOML == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXGetSyncValuesOML)(dpy, drawable, ust, msc, sbc);
|
||||
return (*pGetSyncValuesOML)(dpy, drawable, ust, msc, sbc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static void dispatch_glXJoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable,
|
||||
static void dispatch_JoinSwapGroupSGIX(Display *dpy, GLXDrawable drawable,
|
||||
GLXDrawable member)
|
||||
{
|
||||
PFNGLXJOINSWAPGROUPSGIXPROC pglXJoinSwapGroupSGIX;
|
||||
PFNGLXJOINSWAPGROUPSGIXPROC pJoinSwapGroupSGIX;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXJoinSwapGroupSGIX);
|
||||
if (pglXJoinSwapGroupSGIX == NULL)
|
||||
__FETCH_FUNCTION_PTR(JoinSwapGroupSGIX);
|
||||
if (pJoinSwapGroupSGIX == NULL)
|
||||
return;
|
||||
|
||||
(*pglXJoinSwapGroupSGIX)(dpy, drawable, member);
|
||||
(*pJoinSwapGroupSGIX)(dpy, drawable, member);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXQueryCurrentRendererIntegerMESA(int attribute,
|
||||
static Bool dispatch_QueryCurrentRendererIntegerMESA(int attribute,
|
||||
unsigned int *value)
|
||||
{
|
||||
PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC pglXQueryCurrentRendererIntegerMESA;
|
||||
PFNGLXQUERYCURRENTRENDERERINTEGERMESAPROC pQueryCurrentRendererIntegerMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -734,18 +733,18 @@ static Bool dispatch_glXQueryCurrentRendererIntegerMESA(int attribute,
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryCurrentRendererIntegerMESA);
|
||||
if (pglXQueryCurrentRendererIntegerMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryCurrentRendererIntegerMESA);
|
||||
if (pQueryCurrentRendererIntegerMESA == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXQueryCurrentRendererIntegerMESA)(attribute, value);
|
||||
return (*pQueryCurrentRendererIntegerMESA)(attribute, value);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const char *dispatch_glXQueryCurrentRendererStringMESA(int attribute)
|
||||
static const char *dispatch_QueryCurrentRendererStringMESA(int attribute)
|
||||
{
|
||||
PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC pglXQueryCurrentRendererStringMESA;
|
||||
PFNGLXQUERYCURRENTRENDERERSTRINGMESAPROC pQueryCurrentRendererStringMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -755,114 +754,114 @@ static const char *dispatch_glXQueryCurrentRendererStringMESA(int attribute)
|
||||
if (dd == NULL)
|
||||
return NULL;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryCurrentRendererStringMESA);
|
||||
if (pglXQueryCurrentRendererStringMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryCurrentRendererStringMESA);
|
||||
if (pQueryCurrentRendererStringMESA == NULL)
|
||||
return NULL;
|
||||
|
||||
return (*pglXQueryCurrentRendererStringMESA)(attribute);
|
||||
return (*pQueryCurrentRendererStringMESA)(attribute);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXQueryMaxSwapBarriersSGIX(Display *dpy, int screen,
|
||||
static Bool dispatch_QueryMaxSwapBarriersSGIX(Display *dpy, int screen,
|
||||
int *max)
|
||||
{
|
||||
PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC pglXQueryMaxSwapBarriersSGIX;
|
||||
PFNGLXQUERYMAXSWAPBARRIERSSGIXPROC pQueryMaxSwapBarriersSGIX;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, screen);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryMaxSwapBarriersSGIX);
|
||||
if (pglXQueryMaxSwapBarriersSGIX == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryMaxSwapBarriersSGIX);
|
||||
if (pQueryMaxSwapBarriersSGIX == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXQueryMaxSwapBarriersSGIX)(dpy, screen, max);
|
||||
return (*pQueryMaxSwapBarriersSGIX)(dpy, screen, max);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXQueryRendererIntegerMESA(Display *dpy, int screen,
|
||||
static Bool dispatch_QueryRendererIntegerMESA(Display *dpy, int screen,
|
||||
int renderer, int attribute,
|
||||
unsigned int *value)
|
||||
{
|
||||
PFNGLXQUERYRENDERERINTEGERMESAPROC pglXQueryRendererIntegerMESA;
|
||||
PFNGLXQUERYRENDERERINTEGERMESAPROC pQueryRendererIntegerMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, screen);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryRendererIntegerMESA);
|
||||
if (pglXQueryRendererIntegerMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryRendererIntegerMESA);
|
||||
if (pQueryRendererIntegerMESA == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXQueryRendererIntegerMESA)(dpy, screen, renderer, attribute, value);
|
||||
return (*pQueryRendererIntegerMESA)(dpy, screen, renderer, attribute, value);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static const char *dispatch_glXQueryRendererStringMESA(Display *dpy, int screen,
|
||||
static const char *dispatch_QueryRendererStringMESA(Display *dpy, int screen,
|
||||
int renderer, int attribute)
|
||||
{
|
||||
PFNGLXQUERYRENDERERSTRINGMESAPROC pglXQueryRendererStringMESA;
|
||||
PFNGLXQUERYRENDERERSTRINGMESAPROC pQueryRendererStringMESA;
|
||||
__GLXvendorInfo *dd = NULL;
|
||||
|
||||
dd = __VND->getDynDispatch(dpy, screen);
|
||||
if (dd == NULL)
|
||||
return NULL;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXQueryRendererStringMESA);
|
||||
if (pglXQueryRendererStringMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(QueryRendererStringMESA);
|
||||
if (pQueryRendererStringMESA == NULL)
|
||||
return NULL;
|
||||
|
||||
return (*pglXQueryRendererStringMESA)(dpy, screen, renderer, attribute);
|
||||
return (*pQueryRendererStringMESA)(dpy, screen, renderer, attribute);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXReleaseBuffersMESA(Display *dpy, GLXDrawable d)
|
||||
static Bool dispatch_ReleaseBuffersMESA(Display *dpy, GLXDrawable d)
|
||||
{
|
||||
PFNGLXRELEASEBUFFERSMESAPROC pglXReleaseBuffersMESA;
|
||||
PFNGLXRELEASEBUFFERSMESAPROC pReleaseBuffersMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, d);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXReleaseBuffersMESA);
|
||||
if (pglXReleaseBuffersMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(ReleaseBuffersMESA);
|
||||
if (pReleaseBuffersMESA == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXReleaseBuffersMESA)(dpy, d);
|
||||
return (*pReleaseBuffersMESA)(dpy, d);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int64_t dispatch_glXSwapBuffersMscOML(Display *dpy, GLXDrawable drawable,
|
||||
static int64_t dispatch_SwapBuffersMscOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t target_msc, int64_t divisor,
|
||||
int64_t remainder)
|
||||
{
|
||||
PFNGLXSWAPBUFFERSMSCOMLPROC pglXSwapBuffersMscOML;
|
||||
PFNGLXSWAPBUFFERSMSCOMLPROC pSwapBuffersMscOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return 0;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXSwapBuffersMscOML);
|
||||
if (pglXSwapBuffersMscOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(SwapBuffersMscOML);
|
||||
if (pSwapBuffersMscOML == NULL)
|
||||
return 0;
|
||||
|
||||
return (*pglXSwapBuffersMscOML)(dpy, drawable, target_msc, divisor, remainder);
|
||||
return (*pSwapBuffersMscOML)(dpy, drawable, target_msc, divisor, remainder);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int dispatch_glXSwapIntervalMESA(unsigned int interval)
|
||||
static int dispatch_SwapIntervalMESA(unsigned int interval)
|
||||
{
|
||||
PFNGLXSWAPINTERVALMESAPROC pglXSwapIntervalMESA;
|
||||
PFNGLXSWAPINTERVALMESAPROC pSwapIntervalMESA;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
if (!__VND->getCurrentContext())
|
||||
@@ -872,52 +871,52 @@ static int dispatch_glXSwapIntervalMESA(unsigned int interval)
|
||||
if (dd == NULL)
|
||||
return 0;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXSwapIntervalMESA);
|
||||
if (pglXSwapIntervalMESA == NULL)
|
||||
__FETCH_FUNCTION_PTR(SwapIntervalMESA);
|
||||
if (pSwapIntervalMESA == NULL)
|
||||
return 0;
|
||||
|
||||
return (*pglXSwapIntervalMESA)(interval);
|
||||
return (*pSwapIntervalMESA)(interval);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXWaitForMscOML(Display *dpy, GLXDrawable drawable,
|
||||
static Bool dispatch_WaitForMscOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t target_msc, int64_t divisor,
|
||||
int64_t remainder, int64_t *ust,
|
||||
int64_t *msc, int64_t *sbc)
|
||||
{
|
||||
PFNGLXWAITFORMSCOMLPROC pglXWaitForMscOML;
|
||||
PFNGLXWAITFORMSCOMLPROC pWaitForMscOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXWaitForMscOML);
|
||||
if (pglXWaitForMscOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(WaitForMscOML);
|
||||
if (pWaitForMscOML == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXWaitForMscOML)(dpy, drawable, target_msc, divisor, remainder, ust, msc, sbc);
|
||||
return (*pWaitForMscOML)(dpy, drawable, target_msc, divisor, remainder, ust, msc, sbc);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static Bool dispatch_glXWaitForSbcOML(Display *dpy, GLXDrawable drawable,
|
||||
static Bool dispatch_WaitForSbcOML(Display *dpy, GLXDrawable drawable,
|
||||
int64_t target_sbc, int64_t *ust,
|
||||
int64_t *msc, int64_t *sbc)
|
||||
{
|
||||
PFNGLXWAITFORSBCOMLPROC pglXWaitForSbcOML;
|
||||
PFNGLXWAITFORSBCOMLPROC pWaitForSbcOML;
|
||||
__GLXvendorInfo *dd;
|
||||
|
||||
dd = GetDispatchFromDrawable(dpy, drawable);
|
||||
if (dd == NULL)
|
||||
return False;
|
||||
|
||||
__FETCH_FUNCTION_PTR(glXWaitForSbcOML);
|
||||
if (pglXWaitForSbcOML == NULL)
|
||||
__FETCH_FUNCTION_PTR(WaitForSbcOML);
|
||||
if (pWaitForSbcOML == NULL)
|
||||
return False;
|
||||
|
||||
return (*pglXWaitForSbcOML)(dpy, drawable, target_sbc, ust, msc, sbc);
|
||||
return (*pWaitForSbcOML)(dpy, drawable, target_sbc, ust, msc, sbc);
|
||||
}
|
||||
|
||||
#undef __FETCH_FUNCTION_PTR
|
||||
@@ -928,45 +927,44 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = {
|
||||
#define __ATTRIB(field) \
|
||||
[DI_##field] = (void *)dispatch_##field
|
||||
|
||||
__ATTRIB(BindTexImageEXT),
|
||||
__ATTRIB(BindSwapBarrierSGIX),
|
||||
__ATTRIB(BindTexImageEXT),
|
||||
__ATTRIB(ChooseFBConfigSGIX),
|
||||
__ATTRIB(CopySubBufferMESA),
|
||||
__ATTRIB(CreateContextAttribsARB),
|
||||
__ATTRIB(CreateContextWithConfigSGIX),
|
||||
__ATTRIB(CreateGLXPbufferSGIX),
|
||||
__ATTRIB(CreateGLXPixmapMESA),
|
||||
__ATTRIB(CreateGLXPixmapWithConfigSGIX),
|
||||
__ATTRIB(DestroyGLXPbufferSGIX),
|
||||
__ATTRIB(GetContextIDEXT),
|
||||
__ATTRIB(GetCurrentDisplayEXT),
|
||||
__ATTRIB(GetFBConfigAttribSGIX),
|
||||
__ATTRIB(GetFBConfigFromVisualSGIX),
|
||||
__ATTRIB(GetMscRateOML),
|
||||
__ATTRIB(GetScreenDriver),
|
||||
__ATTRIB(GetSelectedEventSGIX),
|
||||
__ATTRIB(GetSwapIntervalMESA),
|
||||
__ATTRIB(GetSyncValuesOML),
|
||||
__ATTRIB(GetVideoSyncSGI),
|
||||
__ATTRIB(GetVisualFromFBConfigSGIX),
|
||||
__ATTRIB(JoinSwapGroupSGIX),
|
||||
__ATTRIB(QueryContextInfoEXT),
|
||||
__ATTRIB(QueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(QueryCurrentRendererStringMESA),
|
||||
__ATTRIB(QueryGLXPbufferSGIX),
|
||||
__ATTRIB(QueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(QueryRendererIntegerMESA),
|
||||
__ATTRIB(QueryRendererStringMESA),
|
||||
__ATTRIB(ReleaseBuffersMESA),
|
||||
__ATTRIB(ReleaseTexImageEXT),
|
||||
__ATTRIB(SelectEventSGIX),
|
||||
__ATTRIB(SwapBuffersMscOML),
|
||||
__ATTRIB(SwapIntervalMESA),
|
||||
__ATTRIB(SwapIntervalSGI),
|
||||
__ATTRIB(WaitForMscOML),
|
||||
__ATTRIB(WaitForSbcOML),
|
||||
__ATTRIB(WaitVideoSyncSGI),
|
||||
__ATTRIB(glXBindSwapBarrierSGIX),
|
||||
__ATTRIB(glXCopySubBufferMESA),
|
||||
__ATTRIB(glXCreateGLXPixmapMESA),
|
||||
__ATTRIB(glXGetMscRateOML),
|
||||
__ATTRIB(glXGetScreenDriver),
|
||||
__ATTRIB(glXGetSwapIntervalMESA),
|
||||
__ATTRIB(glXGetSyncValuesOML),
|
||||
__ATTRIB(glXJoinSwapGroupSGIX),
|
||||
__ATTRIB(glXQueryCurrentRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryCurrentRendererStringMESA),
|
||||
__ATTRIB(glXQueryMaxSwapBarriersSGIX),
|
||||
__ATTRIB(glXQueryRendererIntegerMESA),
|
||||
__ATTRIB(glXQueryRendererStringMESA),
|
||||
__ATTRIB(glXReleaseBuffersMESA),
|
||||
__ATTRIB(glXSwapBuffersMscOML),
|
||||
__ATTRIB(glXSwapIntervalMESA),
|
||||
__ATTRIB(glXWaitForMscOML),
|
||||
__ATTRIB(glXWaitForSbcOML),
|
||||
|
||||
[DI_LAST_INDEX] = NULL,
|
||||
#undef __ATTRIB
|
||||
|
@@ -6,16 +6,19 @@
|
||||
#define __glxlibglvnd_dispatchindex_h__
|
||||
|
||||
typedef enum __GLXdispatchIndex {
|
||||
DI_BindSwapBarrierSGIX,
|
||||
DI_BindTexImageEXT,
|
||||
// ChooseFBConfig implemented by libglvnd
|
||||
DI_ChooseFBConfigSGIX,
|
||||
// ChooseVisual implemented by libglvnd
|
||||
// CopyContext implemented by libglvnd
|
||||
DI_CopySubBufferMESA,
|
||||
// CreateContext implemented by libglvnd
|
||||
DI_CreateContextAttribsARB,
|
||||
DI_CreateContextWithConfigSGIX,
|
||||
DI_CreateGLXPbufferSGIX,
|
||||
// CreateGLXPixmap implemented by libglvnd
|
||||
DI_CreateGLXPixmapMESA,
|
||||
DI_CreateGLXPixmapWithConfigSGIX,
|
||||
// CreateNewContext implemented by libglvnd
|
||||
// CreatePbuffer implemented by libglvnd
|
||||
@@ -40,6 +43,7 @@ typedef enum __GLXdispatchIndex {
|
||||
DI_GetFBConfigAttribSGIX,
|
||||
DI_GetFBConfigFromVisualSGIX,
|
||||
// GetFBConfigs implemented by libglvnd
|
||||
DI_GetMscRateOML,
|
||||
// GetProcAddress implemented by libglvnd
|
||||
// GetProcAddressARB implemented by libglvnd
|
||||
// GetSelectedEvent implemented by libglvnd
|
||||
@@ -47,45 +51,41 @@ typedef enum __GLXdispatchIndex {
|
||||
DI_GetVideoSyncSGI,
|
||||
// GetVisualFromFBConfig implemented by libglvnd
|
||||
DI_GetVisualFromFBConfigSGIX,
|
||||
DI_GetScreenDriver,
|
||||
DI_GetSwapIntervalMESA,
|
||||
DI_GetSyncValuesOML,
|
||||
// ImportContextEXT implemented by libglvnd
|
||||
// IsDirect implemented by libglvnd
|
||||
DI_JoinSwapGroupSGIX,
|
||||
// MakeContextCurrent implemented by libglvnd
|
||||
// MakeCurrent implemented by libglvnd
|
||||
// QueryContext implemented by libglvnd
|
||||
DI_QueryContextInfoEXT,
|
||||
DI_QueryCurrentRendererIntegerMESA,
|
||||
DI_QueryCurrentRendererStringMESA,
|
||||
// QueryDrawable implemented by libglvnd
|
||||
// QueryExtension implemented by libglvnd
|
||||
// QueryExtensionsString implemented by libglvnd
|
||||
DI_QueryGLXPbufferSGIX,
|
||||
DI_QueryMaxSwapBarriersSGIX,
|
||||
DI_QueryRendererIntegerMESA,
|
||||
DI_QueryRendererStringMESA,
|
||||
// QueryServerString implemented by libglvnd
|
||||
// QueryVersion implemented by libglvnd
|
||||
DI_ReleaseBuffersMESA,
|
||||
DI_ReleaseTexImageEXT,
|
||||
// SelectEvent implemented by libglvnd
|
||||
DI_SelectEventSGIX,
|
||||
// SwapBuffers implemented by libglvnd
|
||||
DI_SwapBuffersMscOML,
|
||||
DI_SwapIntervalMESA,
|
||||
DI_SwapIntervalSGI,
|
||||
// UseXFont implemented by libglvnd
|
||||
// WaitGL implemented by libglvnd
|
||||
DI_WaitForMscOML,
|
||||
DI_WaitForSbcOML,
|
||||
DI_WaitVideoSyncSGI,
|
||||
// WaitX implemented by libglvnd
|
||||
DI_glXBindSwapBarrierSGIX,
|
||||
DI_glXCopySubBufferMESA,
|
||||
DI_glXCreateGLXPixmapMESA,
|
||||
DI_glXGetMscRateOML,
|
||||
DI_glXGetScreenDriver,
|
||||
DI_glXGetSwapIntervalMESA,
|
||||
DI_glXGetSyncValuesOML,
|
||||
DI_glXJoinSwapGroupSGIX,
|
||||
DI_glXQueryCurrentRendererIntegerMESA,
|
||||
DI_glXQueryCurrentRendererStringMESA,
|
||||
DI_glXQueryMaxSwapBarriersSGIX,
|
||||
DI_glXQueryRendererIntegerMESA,
|
||||
DI_glXQueryRendererStringMESA,
|
||||
DI_glXReleaseBuffersMESA,
|
||||
DI_glXSwapBuffersMscOML,
|
||||
DI_glXSwapIntervalMESA,
|
||||
DI_glXWaitForMscOML,
|
||||
DI_glXWaitForSbcOML,
|
||||
DI_LAST_INDEX
|
||||
} __GLXdispatchIndex;
|
||||
|
||||
|
@@ -2713,7 +2713,7 @@ __glXGetUST(int64_t * ust)
|
||||
|
||||
#if defined(GLX_DIRECT_RENDERING) && !defined(GLX_USE_APPLEGL)
|
||||
|
||||
int
|
||||
PUBLIC int
|
||||
MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
|
||||
struct mesa_glinterop_device_info *out)
|
||||
{
|
||||
@@ -2737,7 +2737,7 @@ MesaGLInteropGLXQueryDeviceInfo(Display *dpy, GLXContext context,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
PUBLIC int
|
||||
MesaGLInteropGLXExportObject(Display *dpy, GLXContext context,
|
||||
struct mesa_glinterop_export_in *in,
|
||||
struct mesa_glinterop_export_out *out)
|
||||
|
@@ -50,6 +50,9 @@ static void __glXGLVNDSetDispatchIndex(const GLubyte *procName, int index)
|
||||
{
|
||||
unsigned internalIndex = FindGLXFunction(procName);
|
||||
|
||||
if (internalIndex == DI_FUNCTION_COUNT)
|
||||
return; /* unknown or static dispatch */
|
||||
|
||||
__glXDispatchTableIndices[internalIndex] = index;
|
||||
}
|
||||
|
||||
|
@@ -16,7 +16,8 @@ libwindowsglx_la_SOURCES = \
|
||||
windowsgl.h \
|
||||
windowsgl_internal.h \
|
||||
windows_drawable.c \
|
||||
wgl.c
|
||||
wgl.c \
|
||||
wgl.h
|
||||
|
||||
libwindowsglx_la_CFLAGS = \
|
||||
-I$(top_srcdir)/include \
|
||||
|
@@ -169,7 +169,7 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx,
|
||||
struct nir_shader *nir,
|
||||
const struct brw_wm_prog_key *wm_key,
|
||||
bool use_repclear,
|
||||
struct brw_blorp_prog_data *prog_data,
|
||||
struct brw_wm_prog_data *wm_prog_data,
|
||||
unsigned *program_size)
|
||||
{
|
||||
const struct brw_compiler *compiler = blorp->compiler;
|
||||
@@ -177,15 +177,14 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx,
|
||||
nir->options =
|
||||
compiler->glsl_compiler_options[MESA_SHADER_FRAGMENT].NirOptions;
|
||||
|
||||
struct brw_wm_prog_data wm_prog_data;
|
||||
memset(&wm_prog_data, 0, sizeof(wm_prog_data));
|
||||
memset(wm_prog_data, 0, sizeof(*wm_prog_data));
|
||||
|
||||
wm_prog_data.base.nr_params = 0;
|
||||
wm_prog_data.base.param = NULL;
|
||||
wm_prog_data->base.nr_params = 0;
|
||||
wm_prog_data->base.param = NULL;
|
||||
|
||||
/* BLORP always just uses the first two binding table entries */
|
||||
wm_prog_data.binding_table.render_target_start = BLORP_RENDERBUFFER_BT_INDEX;
|
||||
wm_prog_data.base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX;
|
||||
wm_prog_data->binding_table.render_target_start = BLORP_RENDERBUFFER_BT_INDEX;
|
||||
wm_prog_data->base.binding_table.texture_start = BLORP_TEXTURE_BT_INDEX;
|
||||
|
||||
nir = brw_preprocess_nir(compiler, nir);
|
||||
nir_remove_dead_variables(nir, nir_var_shader_in);
|
||||
@@ -206,22 +205,9 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx,
|
||||
|
||||
const unsigned *program =
|
||||
brw_compile_fs(compiler, blorp->driver_ctx, mem_ctx,
|
||||
wm_key, &wm_prog_data, nir,
|
||||
wm_key, wm_prog_data, nir,
|
||||
NULL, -1, -1, false, use_repclear, program_size, NULL);
|
||||
|
||||
/* Copy the relavent bits of wm_prog_data over into the blorp prog data */
|
||||
prog_data->dispatch_8 = wm_prog_data.dispatch_8;
|
||||
prog_data->dispatch_16 = wm_prog_data.dispatch_16;
|
||||
prog_data->first_curbe_grf_0 = wm_prog_data.base.dispatch_grf_start_reg;
|
||||
prog_data->first_curbe_grf_2 = wm_prog_data.dispatch_grf_start_reg_2;
|
||||
prog_data->ksp_offset_2 = wm_prog_data.prog_offset_2;
|
||||
prog_data->persample_msaa_dispatch = wm_prog_data.persample_dispatch;
|
||||
prog_data->flat_inputs = wm_prog_data.flat_inputs;
|
||||
prog_data->num_varying_inputs = wm_prog_data.num_varying_inputs;
|
||||
prog_data->inputs_read = nir->info.inputs_read;
|
||||
|
||||
assert(wm_prog_data.base.nr_params == 0);
|
||||
|
||||
return program;
|
||||
}
|
||||
|
||||
|
@@ -30,7 +30,7 @@
|
||||
#include "isl/isl.h"
|
||||
|
||||
struct brw_context;
|
||||
struct brw_wm_prog_key;
|
||||
struct brw_stage_prog_data;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@@ -58,7 +58,8 @@ struct blorp_context {
|
||||
void (*upload_shader)(struct blorp_context *blorp,
|
||||
const void *key, uint32_t key_size,
|
||||
const void *kernel, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
uint32_t *kernel_out, void *prog_data_out);
|
||||
void (*exec)(struct blorp_batch *batch, const struct blorp_params *params);
|
||||
};
|
||||
|
@@ -1237,7 +1237,7 @@ brw_blorp_get_blit_kernel(struct blorp_context *blorp,
|
||||
|
||||
const unsigned *program;
|
||||
unsigned program_size;
|
||||
struct brw_blorp_prog_data prog_data;
|
||||
struct brw_wm_prog_data prog_data;
|
||||
|
||||
/* Try and compile with NIR first. If that fails, fall back to the old
|
||||
* method of building shaders manually.
|
||||
@@ -1255,7 +1255,7 @@ brw_blorp_get_blit_kernel(struct blorp_context *blorp,
|
||||
|
||||
blorp->upload_shader(blorp, prog_key, sizeof(*prog_key),
|
||||
program, program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&prog_data.base, sizeof(prog_data),
|
||||
¶ms->wm_prog_kernel, ¶ms->wm_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
@@ -74,7 +74,7 @@ blorp_params_get_clear_kernel(struct blorp_context *blorp,
|
||||
struct brw_wm_prog_key wm_key;
|
||||
brw_blorp_init_wm_prog_key(&wm_key);
|
||||
|
||||
struct brw_blorp_prog_data prog_data;
|
||||
struct brw_wm_prog_data prog_data;
|
||||
unsigned program_size;
|
||||
const unsigned *program =
|
||||
blorp_compile_fs(blorp, mem_ctx, b.shader, &wm_key, use_replicated_data,
|
||||
@@ -82,7 +82,7 @@ blorp_params_get_clear_kernel(struct blorp_context *blorp,
|
||||
|
||||
blorp->upload_shader(blorp, &blorp_key, sizeof(blorp_key),
|
||||
program, program_size,
|
||||
&prog_data, sizeof(prog_data),
|
||||
&prog_data.base, sizeof(prog_data),
|
||||
¶ms->wm_prog_kernel, ¶ms->wm_prog_data);
|
||||
|
||||
ralloc_free(mem_ctx);
|
||||
|
@@ -207,7 +207,8 @@ blorp_emit_input_varying_data(struct blorp_batch *batch,
|
||||
for (unsigned i = 0; i < max_num_varyings; i++) {
|
||||
const gl_varying_slot attr = VARYING_SLOT_VAR0 + i;
|
||||
|
||||
if (!(params->wm_prog_data->inputs_read & (1ull << attr)))
|
||||
const int input_index = params->wm_prog_data->urb_setup[attr];
|
||||
if (input_index < 0)
|
||||
continue;
|
||||
|
||||
memcpy(inputs, inputs_src + i * 4, vec4_size_in_bytes);
|
||||
@@ -401,7 +402,7 @@ static void
|
||||
blorp_emit_sf_config(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
|
||||
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
|
||||
|
||||
/* 3DSTATE_SF
|
||||
*
|
||||
@@ -502,7 +503,7 @@ static void
|
||||
blorp_emit_ps_config(struct blorp_batch *batch,
|
||||
const struct blorp_params *params)
|
||||
{
|
||||
const struct brw_blorp_prog_data *prog_data = params->wm_prog_data;
|
||||
const struct brw_wm_prog_data *prog_data = params->wm_prog_data;
|
||||
|
||||
/* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
|
||||
* nonzero to prevent the GPU from hanging. While the documentation doesn't
|
||||
@@ -527,16 +528,16 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
||||
|
||||
if (prog_data) {
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData0 =
|
||||
prog_data->first_curbe_grf_0;
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
ps.DispatchGRFStartRegisterForConstantSetupData2 =
|
||||
prog_data->first_curbe_grf_2;
|
||||
prog_data->dispatch_grf_start_reg_2;
|
||||
|
||||
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
|
||||
ps.KernelStartPointer0 = params->wm_prog_kernel;
|
||||
ps.KernelStartPointer2 =
|
||||
params->wm_prog_kernel + prog_data->ksp_offset_2;
|
||||
params->wm_prog_kernel + prog_data->prog_offset_2;
|
||||
}
|
||||
|
||||
/* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
|
||||
@@ -577,7 +578,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
||||
if (prog_data) {
|
||||
psx.PixelShaderValid = true;
|
||||
psx.AttributeEnable = prog_data->num_varying_inputs > 0;
|
||||
psx.PixelShaderIsPerSample = prog_data->persample_msaa_dispatch;
|
||||
psx.PixelShaderIsPerSample = prog_data->persample_dispatch;
|
||||
}
|
||||
|
||||
if (params->src.enabled)
|
||||
@@ -612,7 +613,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
||||
if (params->dst.surf.samples > 1) {
|
||||
wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
|
||||
wm.MultisampleDispatchMode =
|
||||
(prog_data && prog_data->persample_msaa_dispatch) ?
|
||||
(prog_data && prog_data->persample_dispatch) ?
|
||||
MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
|
||||
} else {
|
||||
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
|
||||
@@ -630,13 +631,13 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
||||
|
||||
if (prog_data) {
|
||||
ps.DispatchGRFStartRegisterforConstantSetupData0 =
|
||||
prog_data->first_curbe_grf_0;
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
ps.DispatchGRFStartRegisterforConstantSetupData2 =
|
||||
prog_data->first_curbe_grf_2;
|
||||
prog_data->dispatch_grf_start_reg_2;
|
||||
|
||||
ps.KernelStartPointer0 = params->wm_prog_kernel;
|
||||
ps.KernelStartPointer2 =
|
||||
params->wm_prog_kernel + prog_data->ksp_offset_2;
|
||||
params->wm_prog_kernel + prog_data->prog_offset_2;
|
||||
|
||||
ps._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
ps._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
@@ -692,13 +693,13 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
||||
wm.ThreadDispatchEnable = true;
|
||||
|
||||
wm.DispatchGRFStartRegisterforConstantSetupData0 =
|
||||
prog_data->first_curbe_grf_0;
|
||||
prog_data->base.dispatch_grf_start_reg;
|
||||
wm.DispatchGRFStartRegisterforConstantSetupData2 =
|
||||
prog_data->first_curbe_grf_2;
|
||||
prog_data->dispatch_grf_start_reg_2;
|
||||
|
||||
wm.KernelStartPointer0 = params->wm_prog_kernel;
|
||||
wm.KernelStartPointer2 =
|
||||
params->wm_prog_kernel + prog_data->ksp_offset_2;
|
||||
params->wm_prog_kernel + prog_data->prog_offset_2;
|
||||
|
||||
wm._8PixelDispatchEnable = prog_data->dispatch_8;
|
||||
wm._16PixelDispatchEnable = prog_data->dispatch_16;
|
||||
@@ -714,7 +715,7 @@ blorp_emit_ps_config(struct blorp_batch *batch,
|
||||
if (params->dst.surf.samples > 1) {
|
||||
wm.MultisampleRasterizationMode = MSRASTMODE_ON_PATTERN;
|
||||
wm.MultisampleDispatchMode =
|
||||
(prog_data && prog_data->persample_msaa_dispatch) ?
|
||||
(prog_data && prog_data->persample_dispatch) ?
|
||||
MSDISPMODE_PERSAMPLE : MSDISPMODE_PERPIXEL;
|
||||
} else {
|
||||
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
|
||||
@@ -1116,6 +1117,11 @@ blorp_emit_surface_states(struct blorp_batch *batch,
|
||||
}
|
||||
|
||||
#if GEN_GEN >= 7
|
||||
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), bt);
|
||||
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_HS), bt);
|
||||
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_DS), bt);
|
||||
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_GS), bt);
|
||||
|
||||
blorp_emit(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_PS), bt) {
|
||||
bt.PointertoPSBindingTable = bind_offset;
|
||||
}
|
||||
|
@@ -138,33 +138,8 @@ struct brw_blorp_wm_inputs
|
||||
uint32_t pad[1];
|
||||
};
|
||||
|
||||
struct brw_blorp_prog_data
|
||||
{
|
||||
bool dispatch_8;
|
||||
bool dispatch_16;
|
||||
|
||||
uint8_t first_curbe_grf_0;
|
||||
uint8_t first_curbe_grf_2;
|
||||
|
||||
uint32_t ksp_offset_2;
|
||||
|
||||
/**
|
||||
* True if the WM program should be run in MSDISPMODE_PERSAMPLE with more
|
||||
* than one sample per pixel.
|
||||
*/
|
||||
bool persample_msaa_dispatch;
|
||||
|
||||
/**
|
||||
* Mask of which FS inputs are marked flat by the shader source. This is
|
||||
* needed for setting up 3DSTATE_SF/SBE.
|
||||
*/
|
||||
uint32_t flat_inputs;
|
||||
unsigned num_varying_inputs;
|
||||
uint64_t inputs_read;
|
||||
};
|
||||
|
||||
static inline unsigned
|
||||
brw_blorp_get_urb_length(const struct brw_blorp_prog_data *prog_data)
|
||||
brw_blorp_get_urb_length(const struct brw_wm_prog_data *prog_data)
|
||||
{
|
||||
if (prog_data == NULL)
|
||||
return 1;
|
||||
@@ -197,7 +172,7 @@ struct blorp_params
|
||||
unsigned num_draw_buffers;
|
||||
unsigned num_layers;
|
||||
uint32_t wm_prog_kernel;
|
||||
struct brw_blorp_prog_data *wm_prog_data;
|
||||
struct brw_wm_prog_data *wm_prog_data;
|
||||
};
|
||||
|
||||
void blorp_params_init(struct blorp_params *params);
|
||||
@@ -314,7 +289,7 @@ blorp_compile_fs(struct blorp_context *blorp, void *mem_ctx,
|
||||
struct nir_shader *nir,
|
||||
const struct brw_wm_prog_key *wm_key,
|
||||
bool use_repclear,
|
||||
struct brw_blorp_prog_data *prog_data,
|
||||
struct brw_wm_prog_data *wm_prog_data,
|
||||
unsigned *program_size);
|
||||
|
||||
/** \} */
|
||||
|
@@ -335,7 +335,6 @@ static const struct gen_device_info gen_device_info_chv = {
|
||||
.max_gs_threads = 336, \
|
||||
.max_tcs_threads = 336, \
|
||||
.max_tes_threads = 336, \
|
||||
.max_wm_threads = 64 * 9, \
|
||||
.max_cs_threads = 56, \
|
||||
.urb = { \
|
||||
.size = 384, \
|
||||
@@ -388,7 +387,6 @@ static const struct gen_device_info gen_device_info_bxt = {
|
||||
.max_tcs_threads = 112,
|
||||
.max_tes_threads = 112,
|
||||
.max_gs_threads = 112,
|
||||
.max_wm_threads = 64 * 3,
|
||||
.max_cs_threads = 6 * 6,
|
||||
.urb = {
|
||||
.size = 192,
|
||||
@@ -411,7 +409,6 @@ static const struct gen_device_info gen_device_info_bxt_2x6 = {
|
||||
.max_tcs_threads = 56, /* XXX: guess */
|
||||
.max_tes_threads = 56,
|
||||
.max_gs_threads = 56,
|
||||
.max_wm_threads = 64 * 2,
|
||||
.max_cs_threads = 6 * 6,
|
||||
.urb = {
|
||||
.size = 128,
|
||||
@@ -427,18 +424,11 @@ static const struct gen_device_info gen_device_info_bxt_2x6 = {
|
||||
* There's no KBL entry. Using the default SKL (GEN9) GS entries value.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Both SKL and KBL support a maximum of 64 threads per
|
||||
* Pixel Shader Dispatch (PSD) unit.
|
||||
*/
|
||||
#define KBL_MAX_THREADS_PER_PSD 64
|
||||
|
||||
static const struct gen_device_info gen_device_info_kbl_gt1 = {
|
||||
GEN9_FEATURES,
|
||||
.gt = 1,
|
||||
|
||||
.max_cs_threads = 7 * 6,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 2,
|
||||
.urb.size = 192,
|
||||
.num_slices = 1,
|
||||
};
|
||||
@@ -448,7 +438,6 @@ static const struct gen_device_info gen_device_info_kbl_gt1_5 = {
|
||||
.gt = 1,
|
||||
|
||||
.max_cs_threads = 7 * 6,
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3,
|
||||
.num_slices = 1,
|
||||
};
|
||||
|
||||
@@ -456,7 +445,6 @@ static const struct gen_device_info gen_device_info_kbl_gt2 = {
|
||||
GEN9_FEATURES,
|
||||
.gt = 2,
|
||||
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 3,
|
||||
.num_slices = 1,
|
||||
};
|
||||
|
||||
@@ -464,7 +452,6 @@ static const struct gen_device_info gen_device_info_kbl_gt3 = {
|
||||
GEN9_FEATURES,
|
||||
.gt = 3,
|
||||
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 6,
|
||||
.num_slices = 2,
|
||||
};
|
||||
|
||||
@@ -472,7 +459,6 @@ static const struct gen_device_info gen_device_info_kbl_gt4 = {
|
||||
GEN9_FEATURES,
|
||||
.gt = 4,
|
||||
|
||||
.max_wm_threads = KBL_MAX_THREADS_PER_PSD * 9,
|
||||
/*
|
||||
* From the "L3 Allocation and Programming" documentation:
|
||||
*
|
||||
@@ -500,6 +486,25 @@ gen_get_device_info(int devid, struct gen_device_info *devinfo)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* From the Skylake PRM, 3DSTATE_PS::Scratch Space Base Pointer:
|
||||
*
|
||||
* "Scratch Space per slice is computed based on 4 sub-slices. SW must
|
||||
* allocate scratch space enough so that each slice has 4 slices allowed."
|
||||
*
|
||||
* The equivalent internal documentation says that this programming note
|
||||
* applies to all Gen9+ platforms.
|
||||
*
|
||||
* The hardware typically calculates the scratch space pointer by taking
|
||||
* the base address, and adding per-thread-scratch-space * thread ID.
|
||||
* Extra padding can be necessary depending how the thread IDs are
|
||||
* calculated for a particular shader stage.
|
||||
*/
|
||||
if (devinfo->gen >= 9) {
|
||||
devinfo->max_wm_threads = 64 /* threads-per-PSD */
|
||||
* devinfo->num_slices
|
||||
* 4; /* effective subslices per slice */
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -253,10 +253,7 @@ anv_block_pool_init(struct anv_block_pool *pool,
|
||||
assert(util_is_power_of_two(block_size));
|
||||
|
||||
pool->device = device;
|
||||
pool->bo.gem_handle = 0;
|
||||
pool->bo.offset = 0;
|
||||
pool->bo.size = 0;
|
||||
pool->bo.is_winsys_bo = false;
|
||||
anv_bo_init(&pool->bo, 0, 0);
|
||||
pool->block_size = block_size;
|
||||
pool->free_list = ANV_FREE_LIST_EMPTY;
|
||||
pool->back_free_list = ANV_FREE_LIST_EMPTY;
|
||||
@@ -463,10 +460,8 @@ anv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state)
|
||||
* values back into pool. */
|
||||
pool->map = map + center_bo_offset;
|
||||
pool->center_bo_offset = center_bo_offset;
|
||||
pool->bo.gem_handle = gem_handle;
|
||||
pool->bo.size = size;
|
||||
anv_bo_init(&pool->bo, gem_handle, size);
|
||||
pool->bo.map = map;
|
||||
pool->bo.index = 0;
|
||||
|
||||
done:
|
||||
pthread_mutex_unlock(&pool->device->mutex);
|
||||
@@ -892,9 +887,9 @@ anv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool
|
||||
{
|
||||
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
struct anv_bo *bo = &pool->bos[i][s];
|
||||
if (bo->size > 0)
|
||||
anv_gem_close(device, bo->gem_handle);
|
||||
struct anv_scratch_bo *bo = &pool->bos[i][s];
|
||||
if (bo->exists > 0)
|
||||
anv_gem_close(device, bo->bo.gem_handle);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -909,70 +904,59 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
|
||||
unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048);
|
||||
assert(scratch_size_log2 < 16);
|
||||
|
||||
struct anv_bo *bo = &pool->bos[scratch_size_log2][stage];
|
||||
struct anv_scratch_bo *bo = &pool->bos[scratch_size_log2][stage];
|
||||
|
||||
/* From now on, we go into a critical section. In order to remain
|
||||
* thread-safe, we use the bo size as a lock. A value of 0 means we don't
|
||||
* have a valid BO yet. A value of 1 means locked. A value greater than 1
|
||||
* means we have a bo of the given size.
|
||||
/* We can use "exists" to shortcut and ignore the critical section */
|
||||
if (bo->exists)
|
||||
return &bo->bo;
|
||||
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
|
||||
__sync_synchronize();
|
||||
if (bo->exists)
|
||||
return &bo->bo;
|
||||
|
||||
const struct anv_physical_device *physical_device =
|
||||
&device->instance->physicalDevice;
|
||||
const struct gen_device_info *devinfo = &physical_device->info;
|
||||
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating which
|
||||
* subslice, EU within a subslice, and thread within an EU it is.
|
||||
* There's a maximum of two slices and two subslices, so these can be
|
||||
* stored with a single bit. Even though there are only 10 EUs per
|
||||
* subslice, this is stored in 4 bits, so there's an effective maximum
|
||||
* value of 16 EUs. Similarly, although there are only 7 threads per EU,
|
||||
* this is stored in a 3 bit number, giving an effective maximum value
|
||||
* of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
const unsigned subslices = MAX2(physical_device->subslice_total, 1);
|
||||
const unsigned scratch_ids_per_subslice =
|
||||
device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads;
|
||||
|
||||
if (bo->size > 1)
|
||||
return bo;
|
||||
uint32_t max_threads[] = {
|
||||
[MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
|
||||
[MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
|
||||
[MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
|
||||
[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
|
||||
[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
|
||||
[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslices,
|
||||
};
|
||||
|
||||
uint64_t size = __sync_val_compare_and_swap(&bo->size, 0, 1);
|
||||
if (size == 0) {
|
||||
/* We own the lock. Allocate a buffer */
|
||||
uint32_t size = per_thread_scratch * max_threads[stage];
|
||||
|
||||
const struct anv_physical_device *physical_device =
|
||||
&device->instance->physicalDevice;
|
||||
const struct gen_device_info *devinfo = &physical_device->info;
|
||||
anv_bo_init_new(&bo->bo, device, size);
|
||||
|
||||
/* WaCSScratchSize:hsw
|
||||
*
|
||||
* Haswell's scratch space address calculation appears to be sparse
|
||||
* rather than tightly packed. The Thread ID has bits indicating which
|
||||
* subslice, EU within a subslice, and thread within an EU it is.
|
||||
* There's a maximum of two slices and two subslices, so these can be
|
||||
* stored with a single bit. Even though there are only 10 EUs per
|
||||
* subslice, this is stored in 4 bits, so there's an effective maximum
|
||||
* value of 16 EUs. Similarly, although there are only 7 threads per EU,
|
||||
* this is stored in a 3 bit number, giving an effective maximum value
|
||||
* of 8 threads per EU.
|
||||
*
|
||||
* This means that we need to use 16 * 8 instead of 10 * 7 for the
|
||||
* number of threads per subslice.
|
||||
*/
|
||||
const unsigned subslices = MAX2(physical_device->subslice_total, 1);
|
||||
const unsigned scratch_ids_per_subslice =
|
||||
device->info.is_haswell ? 16 * 8 : devinfo->max_cs_threads;
|
||||
/* Set the exists last because it may be read by other threads */
|
||||
__sync_synchronize();
|
||||
bo->exists = true;
|
||||
|
||||
uint32_t max_threads[] = {
|
||||
[MESA_SHADER_VERTEX] = devinfo->max_vs_threads,
|
||||
[MESA_SHADER_TESS_CTRL] = devinfo->max_tcs_threads,
|
||||
[MESA_SHADER_TESS_EVAL] = devinfo->max_tes_threads,
|
||||
[MESA_SHADER_GEOMETRY] = devinfo->max_gs_threads,
|
||||
[MESA_SHADER_FRAGMENT] = devinfo->max_wm_threads,
|
||||
[MESA_SHADER_COMPUTE] = scratch_ids_per_subslice * subslices,
|
||||
};
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
|
||||
size = per_thread_scratch * max_threads[stage];
|
||||
|
||||
struct anv_bo new_bo;
|
||||
anv_bo_init_new(&new_bo, device, size);
|
||||
|
||||
bo->gem_handle = new_bo.gem_handle;
|
||||
|
||||
/* Set the size last because we use it as a lock */
|
||||
__sync_synchronize();
|
||||
bo->size = size;
|
||||
|
||||
futex_wake((uint32_t *)&bo->size, INT_MAX);
|
||||
} else {
|
||||
/* Someone else got here first */
|
||||
while (bo->size == 1)
|
||||
futex_wait((uint32_t *)&bo->size, 1);
|
||||
}
|
||||
|
||||
return bo;
|
||||
return &bo->bo;
|
||||
}
|
||||
|
@@ -32,6 +32,8 @@
|
||||
#include "genxml/gen7_pack.h"
|
||||
#include "genxml/gen8_pack.h"
|
||||
|
||||
#include "util/debug.h"
|
||||
|
||||
/** \file anv_batch_chain.c
|
||||
*
|
||||
* This file contains functions related to anv_cmd_buffer as a data
|
||||
@@ -297,8 +299,6 @@ anv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
|
||||
bbo->length = other_bbo->length;
|
||||
memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
|
||||
|
||||
bbo->last_ss_pool_bo_offset = other_bbo->last_ss_pool_bo_offset;
|
||||
|
||||
*bbo_out = bbo;
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -318,7 +318,6 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
|
||||
batch->next = batch->start = bbo->bo.map;
|
||||
batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
|
||||
batch->relocs = &bbo->relocs;
|
||||
bbo->last_ss_pool_bo_offset = 0;
|
||||
bbo->relocs.num_relocs = 0;
|
||||
}
|
||||
|
||||
@@ -620,13 +619,10 @@ anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
|
||||
&cmd_buffer->pool->alloc);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail_bt_blocks;
|
||||
cmd_buffer->last_ss_pool_center = 0;
|
||||
|
||||
anv_cmd_buffer_new_binding_table_block(cmd_buffer);
|
||||
|
||||
cmd_buffer->execbuf2.objects = NULL;
|
||||
cmd_buffer->execbuf2.bos = NULL;
|
||||
cmd_buffer->execbuf2.array_length = 0;
|
||||
|
||||
return VK_SUCCESS;
|
||||
|
||||
fail_bt_blocks:
|
||||
@@ -658,9 +654,6 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
|
||||
&cmd_buffer->batch_bos, link) {
|
||||
anv_batch_bo_destroy(bbo, cmd_buffer);
|
||||
}
|
||||
|
||||
vk_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.objects);
|
||||
vk_free(&cmd_buffer->pool->alloc, cmd_buffer->execbuf2.bos);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -688,6 +681,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
|
||||
cmd_buffer->bt_next = 0;
|
||||
|
||||
cmd_buffer->surface_relocs.num_relocs = 0;
|
||||
cmd_buffer->last_ss_pool_center = 0;
|
||||
|
||||
/* Reset the list of seen buffers */
|
||||
cmd_buffer->seen_bbos.head = 0;
|
||||
@@ -857,56 +851,83 @@ anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
|
||||
&secondary->surface_relocs, 0);
|
||||
}
|
||||
|
||||
struct anv_execbuf {
|
||||
struct drm_i915_gem_execbuffer2 execbuf;
|
||||
|
||||
struct drm_i915_gem_exec_object2 * objects;
|
||||
uint32_t bo_count;
|
||||
struct anv_bo ** bos;
|
||||
|
||||
/* Allocated length of the 'objects' and 'bos' arrays */
|
||||
uint32_t array_length;
|
||||
};
|
||||
|
||||
static void
|
||||
anv_execbuf_init(struct anv_execbuf *exec)
|
||||
{
|
||||
memset(exec, 0, sizeof(*exec));
|
||||
}
|
||||
|
||||
static void
|
||||
anv_execbuf_finish(struct anv_execbuf *exec,
|
||||
const VkAllocationCallbacks *alloc)
|
||||
{
|
||||
vk_free(alloc, exec->objects);
|
||||
vk_free(alloc, exec->bos);
|
||||
}
|
||||
|
||||
static VkResult
|
||||
anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *bo,
|
||||
struct anv_reloc_list *relocs)
|
||||
anv_execbuf_add_bo(struct anv_execbuf *exec,
|
||||
struct anv_bo *bo,
|
||||
struct anv_reloc_list *relocs,
|
||||
const VkAllocationCallbacks *alloc)
|
||||
{
|
||||
struct drm_i915_gem_exec_object2 *obj = NULL;
|
||||
|
||||
if (bo->index < cmd_buffer->execbuf2.bo_count &&
|
||||
cmd_buffer->execbuf2.bos[bo->index] == bo)
|
||||
obj = &cmd_buffer->execbuf2.objects[bo->index];
|
||||
if (bo->index < exec->bo_count && exec->bos[bo->index] == bo)
|
||||
obj = &exec->objects[bo->index];
|
||||
|
||||
if (obj == NULL) {
|
||||
/* We've never seen this one before. Add it to the list and assign
|
||||
* an id that we can use later.
|
||||
*/
|
||||
if (cmd_buffer->execbuf2.bo_count >= cmd_buffer->execbuf2.array_length) {
|
||||
uint32_t new_len = cmd_buffer->execbuf2.objects ?
|
||||
cmd_buffer->execbuf2.array_length * 2 : 64;
|
||||
if (exec->bo_count >= exec->array_length) {
|
||||
uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
|
||||
|
||||
struct drm_i915_gem_exec_object2 *new_objects =
|
||||
vk_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_objects),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
vk_alloc(alloc, new_len * sizeof(*new_objects),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (new_objects == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
struct anv_bo **new_bos =
|
||||
vk_alloc(&cmd_buffer->pool->alloc, new_len * sizeof(*new_bos),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
|
||||
vk_alloc(alloc, new_len * sizeof(*new_bos),
|
||||
8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
|
||||
if (new_bos == NULL) {
|
||||
vk_free(&cmd_buffer->pool->alloc, new_objects);
|
||||
vk_free(alloc, new_objects);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
}
|
||||
|
||||
if (cmd_buffer->execbuf2.objects) {
|
||||
memcpy(new_objects, cmd_buffer->execbuf2.objects,
|
||||
cmd_buffer->execbuf2.bo_count * sizeof(*new_objects));
|
||||
memcpy(new_bos, cmd_buffer->execbuf2.bos,
|
||||
cmd_buffer->execbuf2.bo_count * sizeof(*new_bos));
|
||||
if (exec->objects) {
|
||||
memcpy(new_objects, exec->objects,
|
||||
exec->bo_count * sizeof(*new_objects));
|
||||
memcpy(new_bos, exec->bos,
|
||||
exec->bo_count * sizeof(*new_bos));
|
||||
}
|
||||
|
||||
cmd_buffer->execbuf2.objects = new_objects;
|
||||
cmd_buffer->execbuf2.bos = new_bos;
|
||||
cmd_buffer->execbuf2.array_length = new_len;
|
||||
vk_free(alloc, exec->objects);
|
||||
vk_free(alloc, exec->bos);
|
||||
|
||||
exec->objects = new_objects;
|
||||
exec->bos = new_bos;
|
||||
exec->array_length = new_len;
|
||||
}
|
||||
|
||||
assert(cmd_buffer->execbuf2.bo_count < cmd_buffer->execbuf2.array_length);
|
||||
assert(exec->bo_count < exec->array_length);
|
||||
|
||||
bo->index = cmd_buffer->execbuf2.bo_count++;
|
||||
obj = &cmd_buffer->execbuf2.objects[bo->index];
|
||||
cmd_buffer->execbuf2.bos[bo->index] = bo;
|
||||
bo->index = exec->bo_count++;
|
||||
obj = &exec->objects[bo->index];
|
||||
exec->bos[bo->index] = bo;
|
||||
|
||||
obj->handle = bo->gem_handle;
|
||||
obj->relocation_count = 0;
|
||||
@@ -929,7 +950,7 @@ anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
|
||||
for (size_t i = 0; i < relocs->num_relocs; i++) {
|
||||
/* A quick sanity check on relocations */
|
||||
assert(relocs->relocs[i].offset < bo->size);
|
||||
anv_cmd_buffer_add_bo(cmd_buffer, relocs->reloc_bos[i], NULL);
|
||||
anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL, alloc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -940,82 +961,62 @@ static void
|
||||
anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_reloc_list *list)
|
||||
{
|
||||
struct anv_bo *bo;
|
||||
|
||||
/* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
|
||||
* struct drm_i915_gem_exec_object2 against the bos current offset and if
|
||||
* all bos haven't moved it will skip relocation processing alltogether.
|
||||
* If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
|
||||
* value of offset so we can set it either way. For that to work we need
|
||||
* to make sure all relocs use the same presumed offset.
|
||||
*/
|
||||
|
||||
for (size_t i = 0; i < list->num_relocs; i++) {
|
||||
bo = list->reloc_bos[i];
|
||||
if (bo->offset != list->relocs[i].presumed_offset)
|
||||
cmd_buffer->execbuf2.need_reloc = true;
|
||||
|
||||
list->relocs[i].target_handle = bo->index;
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
read_reloc(const struct anv_device *device, const void *p)
|
||||
{
|
||||
if (device->info.gen >= 8)
|
||||
return *(uint64_t *)p;
|
||||
else
|
||||
return *(uint32_t *)p;
|
||||
for (size_t i = 0; i < list->num_relocs; i++)
|
||||
list->relocs[i].target_handle = list->reloc_bos[i]->index;
|
||||
}
|
||||
|
||||
static void
|
||||
write_reloc(const struct anv_device *device, void *p, uint64_t v)
|
||||
write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
|
||||
{
|
||||
if (device->info.gen >= 8)
|
||||
*(uint64_t *)p = v;
|
||||
else
|
||||
unsigned reloc_size = 0;
|
||||
if (device->info.gen >= 8) {
|
||||
/* From the Broadwell PRM Vol. 2a, MI_LOAD_REGISTER_MEM::MemoryAddress:
|
||||
*
|
||||
* "This field specifies the address of the memory location where the
|
||||
* register value specified in the DWord above will read from. The
|
||||
* address specifies the DWord location of the data. Range =
|
||||
* GraphicsVirtualAddress[63:2] for a DWord register GraphicsAddress
|
||||
* [63:48] are ignored by the HW and assumed to be in correct
|
||||
* canonical form [63:48] == [47]."
|
||||
*/
|
||||
const int shift = 63 - 47;
|
||||
reloc_size = sizeof(uint64_t);
|
||||
*(uint64_t *)p = (((int64_t)v) << shift) >> shift;
|
||||
} else {
|
||||
reloc_size = sizeof(uint32_t);
|
||||
*(uint32_t *)p = v;
|
||||
}
|
||||
|
||||
if (flush && !device->info.has_llc)
|
||||
anv_clflush_range(p, reloc_size);
|
||||
}
|
||||
|
||||
static void
|
||||
adjust_relocations_from_block_pool(struct anv_block_pool *pool,
|
||||
struct anv_reloc_list *relocs)
|
||||
adjust_relocations_from_state_pool(struct anv_block_pool *pool,
|
||||
struct anv_reloc_list *relocs,
|
||||
uint32_t last_pool_center_bo_offset)
|
||||
{
|
||||
assert(last_pool_center_bo_offset <= pool->center_bo_offset);
|
||||
uint32_t delta = pool->center_bo_offset - last_pool_center_bo_offset;
|
||||
|
||||
for (size_t i = 0; i < relocs->num_relocs; i++) {
|
||||
/* In general, we don't know how stale the relocated value is. It
|
||||
* may have been used last time or it may not. Since we don't want
|
||||
* to stomp it while the GPU may be accessing it, we haven't updated
|
||||
* it anywhere else in the code. Instead, we just set the presumed
|
||||
* offset to what it is now based on the delta and the data in the
|
||||
* block pool. Then the kernel will update it for us if needed.
|
||||
*/
|
||||
assert(relocs->relocs[i].offset < pool->state.end);
|
||||
const void *p = pool->map + relocs->relocs[i].offset;
|
||||
|
||||
/* We're reading back the relocated value from potentially incoherent
|
||||
* memory here. However, any change to the value will be from the kernel
|
||||
* writing out relocations, which will keep the CPU cache up to date.
|
||||
*/
|
||||
relocs->relocs[i].presumed_offset =
|
||||
read_reloc(pool->device, p) - relocs->relocs[i].delta;
|
||||
|
||||
/* All of the relocations from this block pool to other BO's should
|
||||
* have been emitted relative to the surface block pool center. We
|
||||
* need to add the center offset to make them relative to the
|
||||
* beginning of the actual GEM bo.
|
||||
*/
|
||||
relocs->relocs[i].offset += pool->center_bo_offset;
|
||||
relocs->relocs[i].offset += delta;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
adjust_relocations_to_block_pool(struct anv_block_pool *pool,
|
||||
adjust_relocations_to_state_pool(struct anv_block_pool *pool,
|
||||
struct anv_bo *from_bo,
|
||||
struct anv_reloc_list *relocs,
|
||||
uint32_t *last_pool_center_bo_offset)
|
||||
uint32_t last_pool_center_bo_offset)
|
||||
{
|
||||
assert(*last_pool_center_bo_offset <= pool->center_bo_offset);
|
||||
uint32_t delta = pool->center_bo_offset - *last_pool_center_bo_offset;
|
||||
assert(last_pool_center_bo_offset <= pool->center_bo_offset);
|
||||
uint32_t delta = pool->center_bo_offset - last_pool_center_bo_offset;
|
||||
|
||||
/* When we initially emit relocations into a block pool, we don't
|
||||
* actually know what the final center_bo_offset will be so we just emit
|
||||
@@ -1040,37 +1041,147 @@ adjust_relocations_to_block_pool(struct anv_block_pool *pool,
|
||||
assert(relocs->relocs[i].offset < from_bo->size);
|
||||
write_reloc(pool->device, from_bo->map + relocs->relocs[i].offset,
|
||||
relocs->relocs[i].presumed_offset +
|
||||
relocs->relocs[i].delta);
|
||||
relocs->relocs[i].delta, false);
|
||||
}
|
||||
}
|
||||
|
||||
*last_pool_center_bo_offset = pool->center_bo_offset;
|
||||
}
|
||||
|
||||
void
|
||||
anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
|
||||
static void
|
||||
anv_reloc_list_apply(struct anv_device *device,
|
||||
struct anv_reloc_list *list,
|
||||
struct anv_bo *bo,
|
||||
bool always_relocate)
|
||||
{
|
||||
for (size_t i = 0; i < list->num_relocs; i++) {
|
||||
struct anv_bo *target_bo = list->reloc_bos[i];
|
||||
if (list->relocs[i].presumed_offset == target_bo->offset &&
|
||||
!always_relocate)
|
||||
continue;
|
||||
|
||||
void *p = bo->map + list->relocs[i].offset;
|
||||
write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true);
|
||||
list->relocs[i].presumed_offset = target_bo->offset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This function applies the relocation for a command buffer and writes the
|
||||
* actual addresses into the buffers as per what we were told by the kernel on
|
||||
* the previous execbuf2 call. This should be safe to do because, for each
|
||||
* relocated address, we have two cases:
|
||||
*
|
||||
* 1) The target BO is inactive (as seen by the kernel). In this case, it is
|
||||
* not in use by the GPU so updating the address is 100% ok. It won't be
|
||||
* in-use by the GPU (from our context) again until the next execbuf2
|
||||
* happens. If the kernel decides to move it in the next execbuf2, it
|
||||
* will have to do the relocations itself, but that's ok because it should
|
||||
* have all of the information needed to do so.
|
||||
*
|
||||
* 2) The target BO is active (as seen by the kernel). In this case, it
|
||||
* hasn't moved since the last execbuffer2 call because GTT shuffling
|
||||
* *only* happens when the BO is idle. (From our perspective, it only
|
||||
* happens inside the execbuffer2 ioctl, but the shuffling may be
|
||||
* triggered by another ioctl, with full-ppgtt this is limited to only
|
||||
* execbuffer2 ioctls on the same context, or memory pressure.) Since the
|
||||
* target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT
|
||||
* address and the relocated value we are writing into the BO will be the
|
||||
* same as the value that is already there.
|
||||
*
|
||||
* There is also a possibility that the target BO is active but the exact
|
||||
* RENDER_SURFACE_STATE object we are writing the relocation into isn't in
|
||||
* use. In this case, the address currently in the RENDER_SURFACE_STATE
|
||||
* may be stale but it's still safe to write the relocation because that
|
||||
* particular RENDER_SURFACE_STATE object isn't in-use by the GPU and
|
||||
* won't be until the next execbuf2 call.
|
||||
*
|
||||
* By doing relocations on the CPU, we can tell the kernel that it doesn't
|
||||
* need to bother. We want to do this because the surface state buffer is
|
||||
* used by every command buffer so, if the kernel does the relocations, it
|
||||
* will always be busy and the kernel will always stall. This is also
|
||||
* probably the fastest mechanism for doing relocations since the kernel would
|
||||
* have to make a full copy of all the relocations lists.
|
||||
*/
|
||||
static bool
|
||||
relocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_execbuf *exec)
|
||||
{
|
||||
static int userspace_relocs = -1;
|
||||
if (userspace_relocs < 0)
|
||||
userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true);
|
||||
if (!userspace_relocs)
|
||||
return false;
|
||||
|
||||
/* First, we have to check to see whether or not we can even do the
|
||||
* relocation. New buffers which have never been submitted to the kernel
|
||||
* don't have a valid offset so we need to let the kernel do relocations so
|
||||
* that we can get offsets for them. On future execbuf2 calls, those
|
||||
* buffers will have offsets and we will be able to skip relocating.
|
||||
* Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1.
|
||||
*/
|
||||
for (uint32_t i = 0; i < exec->bo_count; i++) {
|
||||
if (exec->bos[i]->offset == (uint64_t)-1)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Since surface states are shared between command buffers and we don't
|
||||
* know what order they will be submitted to the kernel, we don't know
|
||||
* what address is actually written in the surface state object at any
|
||||
* given time. The only option is to always relocate them.
|
||||
*/
|
||||
anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
|
||||
&cmd_buffer->device->surface_state_block_pool.bo,
|
||||
true /* always relocate surface states */);
|
||||
|
||||
/* Since we own all of the batch buffers, we know what values are stored
|
||||
* in the relocated addresses and only have to update them if the offsets
|
||||
* have changed.
|
||||
*/
|
||||
struct anv_batch_bo **bbo;
|
||||
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
|
||||
anv_reloc_list_apply(cmd_buffer->device,
|
||||
&(*bbo)->relocs, &(*bbo)->bo, false);
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < exec->bo_count; i++)
|
||||
exec->objects[i].offset = exec->bos[i]->offset;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_cmd_buffer_execbuf(struct anv_device *device,
|
||||
struct anv_cmd_buffer *cmd_buffer)
|
||||
{
|
||||
struct anv_batch *batch = &cmd_buffer->batch;
|
||||
struct anv_block_pool *ss_pool =
|
||||
&cmd_buffer->device->surface_state_block_pool;
|
||||
|
||||
cmd_buffer->execbuf2.bo_count = 0;
|
||||
cmd_buffer->execbuf2.need_reloc = false;
|
||||
struct anv_execbuf execbuf;
|
||||
anv_execbuf_init(&execbuf);
|
||||
|
||||
adjust_relocations_from_block_pool(ss_pool, &cmd_buffer->surface_relocs);
|
||||
anv_cmd_buffer_add_bo(cmd_buffer, &ss_pool->bo, &cmd_buffer->surface_relocs);
|
||||
adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
|
||||
cmd_buffer->last_ss_pool_center);
|
||||
anv_execbuf_add_bo(&execbuf, &ss_pool->bo, &cmd_buffer->surface_relocs,
|
||||
&cmd_buffer->pool->alloc);
|
||||
|
||||
/* First, we walk over all of the bos we've seen and add them and their
|
||||
* relocations to the validate list.
|
||||
*/
|
||||
struct anv_batch_bo **bbo;
|
||||
u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
|
||||
adjust_relocations_to_block_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
|
||||
&(*bbo)->last_ss_pool_bo_offset);
|
||||
adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
|
||||
cmd_buffer->last_ss_pool_center);
|
||||
|
||||
anv_cmd_buffer_add_bo(cmd_buffer, &(*bbo)->bo, &(*bbo)->relocs);
|
||||
anv_execbuf_add_bo(&execbuf, &(*bbo)->bo, &(*bbo)->relocs,
|
||||
&cmd_buffer->pool->alloc);
|
||||
}
|
||||
|
||||
/* Now that we've adjusted all of the surface state relocations, we need to
|
||||
* record the surface state pool center so future executions of the command
|
||||
* buffer can adjust correctly.
|
||||
*/
|
||||
cmd_buffer->last_ss_pool_center = ss_pool->center_bo_offset;
|
||||
|
||||
struct anv_batch_bo *first_batch_bo =
|
||||
list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
|
||||
|
||||
@@ -1079,20 +1190,19 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
|
||||
* corresponding to the first batch_bo in the chain with the last
|
||||
* element in the list.
|
||||
*/
|
||||
if (first_batch_bo->bo.index != cmd_buffer->execbuf2.bo_count - 1) {
|
||||
if (first_batch_bo->bo.index != execbuf.bo_count - 1) {
|
||||
uint32_t idx = first_batch_bo->bo.index;
|
||||
uint32_t last_idx = cmd_buffer->execbuf2.bo_count - 1;
|
||||
uint32_t last_idx = execbuf.bo_count - 1;
|
||||
|
||||
struct drm_i915_gem_exec_object2 tmp_obj =
|
||||
cmd_buffer->execbuf2.objects[idx];
|
||||
assert(cmd_buffer->execbuf2.bos[idx] == &first_batch_bo->bo);
|
||||
struct drm_i915_gem_exec_object2 tmp_obj = execbuf.objects[idx];
|
||||
assert(execbuf.bos[idx] == &first_batch_bo->bo);
|
||||
|
||||
cmd_buffer->execbuf2.objects[idx] = cmd_buffer->execbuf2.objects[last_idx];
|
||||
cmd_buffer->execbuf2.bos[idx] = cmd_buffer->execbuf2.bos[last_idx];
|
||||
cmd_buffer->execbuf2.bos[idx]->index = idx;
|
||||
execbuf.objects[idx] = execbuf.objects[last_idx];
|
||||
execbuf.bos[idx] = execbuf.bos[last_idx];
|
||||
execbuf.bos[idx]->index = idx;
|
||||
|
||||
cmd_buffer->execbuf2.objects[last_idx] = tmp_obj;
|
||||
cmd_buffer->execbuf2.bos[last_idx] = &first_batch_bo->bo;
|
||||
execbuf.objects[last_idx] = tmp_obj;
|
||||
execbuf.bos[last_idx] = &first_batch_bo->bo;
|
||||
first_batch_bo->bo.index = last_idx;
|
||||
}
|
||||
|
||||
@@ -1113,9 +1223,9 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
|
||||
}
|
||||
}
|
||||
|
||||
cmd_buffer->execbuf2.execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||
.buffers_ptr = (uintptr_t) cmd_buffer->execbuf2.objects,
|
||||
.buffer_count = cmd_buffer->execbuf2.bo_count,
|
||||
execbuf.execbuf = (struct drm_i915_gem_execbuffer2) {
|
||||
.buffers_ptr = (uintptr_t) execbuf.objects,
|
||||
.buffer_count = execbuf.bo_count,
|
||||
.batch_start_offset = 0,
|
||||
.batch_len = batch->next - batch->start,
|
||||
.cliprects_ptr = 0,
|
||||
@@ -1128,6 +1238,49 @@ anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer)
|
||||
.rsvd2 = 0,
|
||||
};
|
||||
|
||||
if (!cmd_buffer->execbuf2.need_reloc)
|
||||
cmd_buffer->execbuf2.execbuf.flags |= I915_EXEC_NO_RELOC;
|
||||
if (relocate_cmd_buffer(cmd_buffer, &execbuf)) {
|
||||
/* If we were able to successfully relocate everything, tell the kernel
|
||||
* that it can skip doing relocations. The requirement for using
|
||||
* NO_RELOC is:
|
||||
*
|
||||
* 1) The addresses written in the objects must match the corresponding
|
||||
* reloc.presumed_offset which in turn must match the corresponding
|
||||
* execobject.offset.
|
||||
*
|
||||
* 2) To avoid stalling, execobject.offset should match the current
|
||||
* address of that object within the active context.
|
||||
*
|
||||
* In order to satisfy all of the invariants that make userspace
|
||||
* relocations to be safe (see relocate_cmd_buffer()), we need to
|
||||
* further ensure that the addresses we use match those used by the
|
||||
* kernel for the most recent execbuf2.
|
||||
*
|
||||
* The kernel may still choose to do relocations anyway if something has
|
||||
* moved in the GTT. In this case, the relocation list still needs to be
|
||||
* valid. All relocations on the batch buffers are already valid and
|
||||
* kept up-to-date. For surface state relocations, by applying the
|
||||
* relocations in relocate_cmd_buffer, we ensured that the address in
|
||||
* the RENDER_SURFACE_STATE matches presumed_offset, so it should be
|
||||
* safe for the kernel to relocate them as needed.
|
||||
*/
|
||||
execbuf.execbuf.flags |= I915_EXEC_NO_RELOC;
|
||||
} else {
|
||||
/* In the case where we fall back to doing kernel relocations, we need
|
||||
* to ensure that the relocation list is valid. All relocations on the
|
||||
* batch buffers are already valid and kept up-to-date. Since surface
|
||||
* states are shared between command buffers and we don't know what
|
||||
* order they will be submitted to the kernel, we don't know what
|
||||
* address is actually written in the surface state object at any given
|
||||
* time. The only option is to set a bogus presumed offset and let the
|
||||
* kernel relocate them.
|
||||
*/
|
||||
for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++)
|
||||
cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
|
||||
}
|
||||
|
||||
VkResult result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
|
||||
|
||||
anv_execbuf_finish(&execbuf, &cmd_buffer->pool->alloc);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@@ -44,8 +44,7 @@ lookup_blorp_shader(struct blorp_context *blorp,
|
||||
anv_shader_bin_unref(device, bin);
|
||||
|
||||
*kernel_out = bin->kernel.offset;
|
||||
*(const struct brw_stage_prog_data **)prog_data_out =
|
||||
anv_shader_bin_get_prog_data(bin);
|
||||
*(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -54,7 +53,8 @@ static void
|
||||
upload_blorp_shader(struct blorp_context *blorp,
|
||||
const void *key, uint32_t key_size,
|
||||
const void *kernel, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
uint32_t *kernel_out, void *prog_data_out)
|
||||
{
|
||||
struct anv_device *device = blorp->driver_ctx;
|
||||
@@ -78,8 +78,7 @@ upload_blorp_shader(struct blorp_context *blorp,
|
||||
anv_shader_bin_unref(device, bin);
|
||||
|
||||
*kernel_out = bin->kernel.offset;
|
||||
*(const struct brw_stage_prog_data **)prog_data_out =
|
||||
anv_shader_bin_get_prog_data(bin);
|
||||
*(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -658,7 +658,7 @@ anv_cmd_buffer_push_constants(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_push_constants *data =
|
||||
cmd_buffer->state.push_constants[stage];
|
||||
const struct brw_stage_prog_data *prog_data =
|
||||
anv_shader_bin_get_prog_data(cmd_buffer->state.pipeline->shaders[stage]);
|
||||
cmd_buffer->state.pipeline->shaders[stage]->prog_data;
|
||||
|
||||
/* If we don't actually have any push constants, bail. */
|
||||
if (data == NULL || prog_data == NULL || prog_data->nr_params == 0)
|
||||
|
@@ -203,19 +203,19 @@ static const VkExtensionProperties global_extensions[] = {
|
||||
#ifdef VK_USE_PLATFORM_XCB_KHR
|
||||
{
|
||||
.extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
|
||||
.specVersion = 5,
|
||||
.specVersion = 6,
|
||||
},
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_XLIB_KHR
|
||||
{
|
||||
.extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
|
||||
.specVersion = 5,
|
||||
.specVersion = 6,
|
||||
},
|
||||
#endif
|
||||
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
|
||||
{
|
||||
.extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
|
||||
.specVersion = 4,
|
||||
.specVersion = 5,
|
||||
},
|
||||
#endif
|
||||
};
|
||||
@@ -223,7 +223,7 @@ static const VkExtensionProperties global_extensions[] = {
|
||||
static const VkExtensionProperties device_extensions[] = {
|
||||
{
|
||||
.extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
|
||||
.specVersion = 67,
|
||||
.specVersion = 68,
|
||||
},
|
||||
};
|
||||
|
||||
@@ -350,7 +350,7 @@ VkResult anv_EnumeratePhysicalDevices(
|
||||
snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
|
||||
result = anv_physical_device_init(&instance->physicalDevice,
|
||||
instance, path);
|
||||
if (result == VK_SUCCESS)
|
||||
if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -770,7 +770,7 @@ anv_device_submit_simple_batch(struct anv_device *device,
|
||||
{
|
||||
struct drm_i915_gem_execbuffer2 execbuf;
|
||||
struct drm_i915_gem_exec_object2 exec2_objects[1];
|
||||
struct anv_bo bo;
|
||||
struct anv_bo bo, *exec_bos[1];
|
||||
VkResult result = VK_SUCCESS;
|
||||
uint32_t size;
|
||||
int64_t timeout;
|
||||
@@ -786,6 +786,7 @@ anv_device_submit_simple_batch(struct anv_device *device,
|
||||
if (!device->info.has_llc)
|
||||
anv_clflush_range(bo.map, size);
|
||||
|
||||
exec_bos[0] = &bo;
|
||||
exec2_objects[0].handle = bo.gem_handle;
|
||||
exec2_objects[0].relocation_count = 0;
|
||||
exec2_objects[0].relocs_ptr = 0;
|
||||
@@ -809,18 +810,15 @@ anv_device_submit_simple_batch(struct anv_device *device,
|
||||
execbuf.rsvd1 = device->context_id;
|
||||
execbuf.rsvd2 = 0;
|
||||
|
||||
ret = anv_gem_execbuffer(device, &execbuf);
|
||||
if (ret != 0) {
|
||||
/* We don't know the real error. */
|
||||
result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
|
||||
result = anv_device_execbuf(device, &execbuf, exec_bos);
|
||||
if (result != VK_SUCCESS)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
timeout = INT64_MAX;
|
||||
ret = anv_gem_wait(device, bo.gem_handle, &timeout);
|
||||
if (ret != 0) {
|
||||
/* We don't know the real error. */
|
||||
result = vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY, "execbuf2 failed: %m");
|
||||
result = vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@@ -1070,6 +1068,24 @@ void anv_GetDeviceQueue(
|
||||
*pQueue = anv_queue_to_handle(&device->queue);
|
||||
}
|
||||
|
||||
VkResult
|
||||
anv_device_execbuf(struct anv_device *device,
|
||||
struct drm_i915_gem_execbuffer2 *execbuf,
|
||||
struct anv_bo **execbuf_bos)
|
||||
{
|
||||
int ret = anv_gem_execbuffer(device, execbuf);
|
||||
if (ret != 0) {
|
||||
/* We don't know the real error. */
|
||||
return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
|
||||
}
|
||||
|
||||
struct drm_i915_gem_exec_object2 *objects = (void *)execbuf->buffers_ptr;
|
||||
for (uint32_t k = 0; k < execbuf->buffer_count; k++)
|
||||
execbuf_bos[k]->offset = objects[k].offset;
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
VkResult anv_QueueSubmit(
|
||||
VkQueue _queue,
|
||||
uint32_t submitCount,
|
||||
@@ -1079,7 +1095,34 @@ VkResult anv_QueueSubmit(
|
||||
ANV_FROM_HANDLE(anv_queue, queue, _queue);
|
||||
ANV_FROM_HANDLE(anv_fence, fence, _fence);
|
||||
struct anv_device *device = queue->device;
|
||||
int ret;
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
/* We lock around QueueSubmit for three main reasons:
|
||||
*
|
||||
* 1) When a block pool is resized, we create a new gem handle with a
|
||||
* different size and, in the case of surface states, possibly a
|
||||
* different center offset but we re-use the same anv_bo struct when
|
||||
* we do so. If this happens in the middle of setting up an execbuf,
|
||||
* we could end up with our list of BOs out of sync with our list of
|
||||
* gem handles.
|
||||
*
|
||||
* 2) The algorithm we use for building the list of unique buffers isn't
|
||||
* thread-safe. While the client is supposed to syncronize around
|
||||
* QueueSubmit, this would be extremely difficult to debug if it ever
|
||||
* came up in the wild due to a broken app. It's better to play it
|
||||
* safe and just lock around QueueSubmit.
|
||||
*
|
||||
* 3) The anv_cmd_buffer_execbuf function may perform relocations in
|
||||
* userspace. Due to the fact that the surface state buffer is shared
|
||||
* between batches, we can't afford to have that happen from multiple
|
||||
* threads at the same time. Even though the user is supposed to
|
||||
* ensure this doesn't happen, we play it safe as in (2) above.
|
||||
*
|
||||
* Since the only other things that ever take the device lock such as block
|
||||
* pool resize only rarely happen, this will almost never be contended so
|
||||
* taking a lock isn't really an expensive operation in this case.
|
||||
*/
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
|
||||
for (uint32_t i = 0; i < submitCount; i++) {
|
||||
for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
|
||||
@@ -1087,28 +1130,23 @@ VkResult anv_QueueSubmit(
|
||||
pSubmits[i].pCommandBuffers[j]);
|
||||
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
|
||||
|
||||
ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf);
|
||||
if (ret != 0) {
|
||||
/* We don't know the real error. */
|
||||
return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"execbuf2 failed: %m");
|
||||
}
|
||||
|
||||
for (uint32_t k = 0; k < cmd_buffer->execbuf2.bo_count; k++)
|
||||
cmd_buffer->execbuf2.bos[k]->offset = cmd_buffer->execbuf2.objects[k].offset;
|
||||
result = anv_cmd_buffer_execbuf(device, cmd_buffer);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
if (fence) {
|
||||
ret = anv_gem_execbuffer(device, &fence->execbuf);
|
||||
if (ret != 0) {
|
||||
/* We don't know the real error. */
|
||||
return vk_errorf(VK_ERROR_OUT_OF_DEVICE_MEMORY,
|
||||
"execbuf2 failed: %m");
|
||||
}
|
||||
struct anv_bo *fence_bo = &fence->bo;
|
||||
result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
|
||||
if (result != VK_SUCCESS)
|
||||
goto out;
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
out:
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
VkResult anv_QueueWaitIdle(
|
||||
@@ -1138,15 +1176,11 @@ VkResult anv_DeviceWaitIdle(
|
||||
VkResult
|
||||
anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
|
||||
{
|
||||
bo->gem_handle = anv_gem_create(device, size);
|
||||
if (!bo->gem_handle)
|
||||
uint32_t gem_handle = anv_gem_create(device, size);
|
||||
if (!gem_handle)
|
||||
return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
bo->map = NULL;
|
||||
bo->index = 0;
|
||||
bo->offset = 0;
|
||||
bo->size = size;
|
||||
bo->is_winsys_bo = false;
|
||||
anv_bo_init(bo, gem_handle, size);
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
@@ -49,16 +49,15 @@ VkResult anv_CreateDmaBufImageINTEL(
|
||||
if (mem == NULL)
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
||||
mem->bo.gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd);
|
||||
if (!mem->bo.gem_handle) {
|
||||
uint32_t gem_handle = anv_gem_fd_to_handle(device, pCreateInfo->fd);
|
||||
if (!gem_handle) {
|
||||
result = vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
mem->bo.map = NULL;
|
||||
mem->bo.index = 0;
|
||||
mem->bo.offset = 0;
|
||||
mem->bo.size = pCreateInfo->strideInBytes * pCreateInfo->extent.height;
|
||||
uint64_t size = pCreateInfo->strideInBytes * pCreateInfo->extent.height;
|
||||
|
||||
anv_bo_init(&mem->bo, gem_handle, size);
|
||||
|
||||
anv_image_create(_device,
|
||||
&(struct anv_image_create_info) {
|
||||
|
@@ -388,7 +388,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
|
||||
struct anv_pipeline_cache *cache,
|
||||
const void *key_data, uint32_t key_size,
|
||||
const void *kernel_data, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
const struct anv_pipeline_bind_map *bind_map)
|
||||
{
|
||||
if (cache) {
|
||||
@@ -399,7 +400,8 @@ anv_pipeline_upload_kernel(struct anv_pipeline *pipeline,
|
||||
} else {
|
||||
return anv_shader_bin_create(pipeline->device, key_data, key_size,
|
||||
kernel_data, kernel_size,
|
||||
prog_data, prog_data_size, bind_map);
|
||||
prog_data, prog_data_size,
|
||||
prog_data->param, bind_map);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -476,7 +478,8 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
||||
|
||||
bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
|
||||
shader_code, code_size,
|
||||
&prog_data, sizeof(prog_data), &map);
|
||||
&prog_data.base.base, sizeof(prog_data),
|
||||
&map);
|
||||
if (!bin) {
|
||||
ralloc_free(mem_ctx);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
@@ -486,7 +489,7 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline,
|
||||
}
|
||||
|
||||
const struct brw_vs_prog_data *vs_prog_data =
|
||||
(const struct brw_vs_prog_data *)anv_shader_bin_get_prog_data(bin);
|
||||
(const struct brw_vs_prog_data *)bin->prog_data;
|
||||
|
||||
if (vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) {
|
||||
pipeline->vs_simd8 = bin->kernel.offset;
|
||||
@@ -563,7 +566,8 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline,
|
||||
/* TODO: SIMD8 GS */
|
||||
bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
|
||||
shader_code, code_size,
|
||||
&prog_data, sizeof(prog_data), &map);
|
||||
&prog_data.base.base, sizeof(prog_data),
|
||||
&map);
|
||||
if (!bin) {
|
||||
ralloc_free(mem_ctx);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
@@ -686,7 +690,8 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline,
|
||||
|
||||
bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
|
||||
shader_code, code_size,
|
||||
&prog_data, sizeof(prog_data), &map);
|
||||
&prog_data.base, sizeof(prog_data),
|
||||
&map);
|
||||
if (!bin) {
|
||||
ralloc_free(mem_ctx);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
@@ -758,7 +763,8 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline,
|
||||
|
||||
bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20,
|
||||
shader_code, code_size,
|
||||
&prog_data, sizeof(prog_data), &map);
|
||||
&prog_data.base, sizeof(prog_data),
|
||||
&map);
|
||||
if (!bin) {
|
||||
ralloc_free(mem_ctx);
|
||||
return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
|
||||
|
@@ -26,13 +26,9 @@
|
||||
#include "util/debug.h"
|
||||
#include "anv_private.h"
|
||||
|
||||
struct shader_bin_key {
|
||||
uint32_t size;
|
||||
uint8_t data[0];
|
||||
};
|
||||
|
||||
static size_t
|
||||
anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
|
||||
anv_shader_bin_size(uint32_t prog_data_size, uint32_t nr_params,
|
||||
uint32_t key_size,
|
||||
uint32_t surface_count, uint32_t sampler_count)
|
||||
{
|
||||
const uint32_t binding_data_size =
|
||||
@@ -40,28 +36,21 @@ anv_shader_bin_size(uint32_t prog_data_size, uint32_t key_size,
|
||||
|
||||
return align_u32(sizeof(struct anv_shader_bin), 8) +
|
||||
align_u32(prog_data_size, 8) +
|
||||
align_u32(nr_params * sizeof(void *), 8) +
|
||||
align_u32(sizeof(uint32_t) + key_size, 8) +
|
||||
align_u32(binding_data_size, 8);
|
||||
}
|
||||
|
||||
static inline const struct shader_bin_key *
|
||||
anv_shader_bin_get_key(const struct anv_shader_bin *shader)
|
||||
{
|
||||
const void *data = shader;
|
||||
data += align_u32(sizeof(struct anv_shader_bin), 8);
|
||||
data += align_u32(shader->prog_data_size, 8);
|
||||
return data;
|
||||
}
|
||||
|
||||
struct anv_shader_bin *
|
||||
anv_shader_bin_create(struct anv_device *device,
|
||||
const void *key_data, uint32_t key_size,
|
||||
const void *kernel_data, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size, const void *prog_data_param,
|
||||
const struct anv_pipeline_bind_map *bind_map)
|
||||
{
|
||||
const size_t size =
|
||||
anv_shader_bin_size(prog_data_size, key_size,
|
||||
anv_shader_bin_size(prog_data_size, prog_data->nr_params, key_size,
|
||||
bind_map->surface_count, bind_map->sampler_count);
|
||||
|
||||
struct anv_shader_bin *shader =
|
||||
@@ -82,10 +71,20 @@ anv_shader_bin_create(struct anv_device *device,
|
||||
void *data = shader;
|
||||
data += align_u32(sizeof(struct anv_shader_bin), 8);
|
||||
|
||||
shader->prog_data = data;
|
||||
struct brw_stage_prog_data *new_prog_data = data;
|
||||
memcpy(data, prog_data, prog_data_size);
|
||||
data += align_u32(prog_data_size, 8);
|
||||
|
||||
struct shader_bin_key *key = data;
|
||||
assert(prog_data->nr_pull_params == 0);
|
||||
assert(prog_data->nr_image_params == 0);
|
||||
new_prog_data->param = data;
|
||||
uint32_t param_size = prog_data->nr_params * sizeof(void *);
|
||||
memcpy(data, prog_data_param, param_size);
|
||||
data += align_u32(param_size, 8);
|
||||
|
||||
shader->key = data;
|
||||
struct anv_shader_bin_key *key = data;
|
||||
key->size = key_size;
|
||||
memcpy(key->data, key_data, key_size);
|
||||
data += align_u32(sizeof(*key) + key_size, 8);
|
||||
@@ -115,7 +114,7 @@ static size_t
|
||||
anv_shader_bin_data_size(const struct anv_shader_bin *shader)
|
||||
{
|
||||
return anv_shader_bin_size(shader->prog_data_size,
|
||||
anv_shader_bin_get_key(shader)->size,
|
||||
shader->prog_data->nr_params, shader->key->size,
|
||||
shader->bind_map.surface_count,
|
||||
shader->bind_map.sampler_count) +
|
||||
align_u32(shader->kernel_size, 8);
|
||||
@@ -126,7 +125,7 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
|
||||
{
|
||||
size_t struct_size =
|
||||
anv_shader_bin_size(shader->prog_data_size,
|
||||
anv_shader_bin_get_key(shader)->size,
|
||||
shader->prog_data->nr_params, shader->key->size,
|
||||
shader->bind_map.surface_count,
|
||||
shader->bind_map.sampler_count);
|
||||
|
||||
@@ -151,14 +150,14 @@ anv_shader_bin_write_data(const struct anv_shader_bin *shader, void *data)
|
||||
static uint32_t
|
||||
shader_bin_key_hash_func(const void *void_key)
|
||||
{
|
||||
const struct shader_bin_key *key = void_key;
|
||||
const struct anv_shader_bin_key *key = void_key;
|
||||
return _mesa_hash_data(key->data, key->size);
|
||||
}
|
||||
|
||||
static bool
|
||||
shader_bin_key_compare_func(const void *void_a, const void *void_b)
|
||||
{
|
||||
const struct shader_bin_key *a = void_a, *b = void_b;
|
||||
const struct anv_shader_bin_key *a = void_a, *b = void_b;
|
||||
if (a->size != b->size)
|
||||
return false;
|
||||
|
||||
@@ -230,7 +229,7 @@ anv_pipeline_cache_search_locked(struct anv_pipeline_cache *cache,
|
||||
const void *key_data, uint32_t key_size)
|
||||
{
|
||||
uint32_t vla[1 + DIV_ROUND_UP(key_size, sizeof(uint32_t))];
|
||||
struct shader_bin_key *key = (void *)vla;
|
||||
struct anv_shader_bin_key *key = (void *)vla;
|
||||
key->size = key_size;
|
||||
memcpy(key->data, key_data, key_size);
|
||||
|
||||
@@ -266,7 +265,9 @@ static struct anv_shader_bin *
|
||||
anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
|
||||
const void *key_data, uint32_t key_size,
|
||||
const void *kernel_data, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
const void *prog_data_param,
|
||||
const struct anv_pipeline_bind_map *bind_map)
|
||||
{
|
||||
struct anv_shader_bin *shader =
|
||||
@@ -277,11 +278,12 @@ anv_pipeline_cache_add_shader(struct anv_pipeline_cache *cache,
|
||||
struct anv_shader_bin *bin =
|
||||
anv_shader_bin_create(cache->device, key_data, key_size,
|
||||
kernel_data, kernel_size,
|
||||
prog_data, prog_data_size, bind_map);
|
||||
prog_data, prog_data_size, prog_data_param,
|
||||
bind_map);
|
||||
if (!bin)
|
||||
return NULL;
|
||||
|
||||
_mesa_hash_table_insert(cache->cache, anv_shader_bin_get_key(bin), bin);
|
||||
_mesa_hash_table_insert(cache->cache, bin->key, bin);
|
||||
|
||||
return bin;
|
||||
}
|
||||
@@ -290,7 +292,8 @@ struct anv_shader_bin *
|
||||
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
||||
const void *key_data, uint32_t key_size,
|
||||
const void *kernel_data, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
const struct anv_pipeline_bind_map *bind_map)
|
||||
{
|
||||
if (cache->cache) {
|
||||
@@ -299,7 +302,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
||||
struct anv_shader_bin *bin =
|
||||
anv_pipeline_cache_add_shader(cache, key_data, key_size,
|
||||
kernel_data, kernel_size,
|
||||
prog_data, prog_data_size, bind_map);
|
||||
prog_data, prog_data_size,
|
||||
prog_data->param, bind_map);
|
||||
|
||||
pthread_mutex_unlock(&cache->mutex);
|
||||
|
||||
@@ -311,7 +315,8 @@ anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
||||
/* In this case, we're not caching it so the caller owns it entirely */
|
||||
return anv_shader_bin_create(cache->device, key_data, key_size,
|
||||
kernel_data, kernel_size,
|
||||
prog_data, prog_data_size, bind_map);
|
||||
prog_data, prog_data_size,
|
||||
prog_data->param, bind_map);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -366,10 +371,16 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
|
||||
memcpy(&bin, p, sizeof(bin));
|
||||
p += align_u32(sizeof(struct anv_shader_bin), 8);
|
||||
|
||||
const void *prog_data = p;
|
||||
const struct brw_stage_prog_data *prog_data = p;
|
||||
p += align_u32(bin.prog_data_size, 8);
|
||||
if (p > end)
|
||||
break;
|
||||
|
||||
struct shader_bin_key key;
|
||||
uint32_t param_size = prog_data->nr_params * sizeof(void *);
|
||||
const void *prog_data_param = p;
|
||||
p += align_u32(param_size, 8);
|
||||
|
||||
struct anv_shader_bin_key key;
|
||||
if (p + sizeof(key) > end)
|
||||
break;
|
||||
memcpy(&key, p, sizeof(key));
|
||||
@@ -392,7 +403,7 @@ anv_pipeline_cache_load(struct anv_pipeline_cache *cache,
|
||||
anv_pipeline_cache_add_shader(cache, key_data, key.size,
|
||||
kernel_data, bin.kernel_size,
|
||||
prog_data, bin.prog_data_size,
|
||||
&bin.bind_map);
|
||||
prog_data_param, &bin.bind_map);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -532,11 +543,11 @@ VkResult anv_MergePipelineCaches(
|
||||
struct hash_entry *entry;
|
||||
hash_table_foreach(src->cache, entry) {
|
||||
struct anv_shader_bin *bin = entry->data;
|
||||
if (_mesa_hash_table_search(dst->cache, anv_shader_bin_get_key(bin)))
|
||||
if (_mesa_hash_table_search(dst->cache, bin->key))
|
||||
continue;
|
||||
|
||||
anv_shader_bin_ref(bin);
|
||||
_mesa_hash_table_insert(dst->cache, anv_shader_bin_get_key(bin), bin);
|
||||
_mesa_hash_table_insert(dst->cache, bin->key, bin);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -267,6 +267,17 @@ struct anv_bo {
|
||||
bool is_winsys_bo;
|
||||
};
|
||||
|
||||
static inline void
|
||||
anv_bo_init(struct anv_bo *bo, uint32_t gem_handle, uint64_t size)
|
||||
{
|
||||
bo->gem_handle = gem_handle;
|
||||
bo->index = 0;
|
||||
bo->offset = -1;
|
||||
bo->size = size;
|
||||
bo->map = NULL;
|
||||
bo->is_winsys_bo = false;
|
||||
}
|
||||
|
||||
/* Represents a lock-free linked list of "free" things. This is used by
|
||||
* both the block pool and the state pools. Unfortunately, in order to
|
||||
* solve the ABA problem, we can't use a single uint32_t head.
|
||||
@@ -439,9 +450,14 @@ VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo,
|
||||
uint32_t size);
|
||||
void anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo);
|
||||
|
||||
struct anv_scratch_bo {
|
||||
bool exists;
|
||||
struct anv_bo bo;
|
||||
};
|
||||
|
||||
struct anv_scratch_pool {
|
||||
/* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */
|
||||
struct anv_bo bos[16][MESA_SHADER_STAGES];
|
||||
struct anv_scratch_bo bos[16][MESA_SHADER_STAGES];
|
||||
};
|
||||
|
||||
void anv_scratch_pool_init(struct anv_device *device,
|
||||
@@ -518,7 +534,8 @@ struct anv_shader_bin *
|
||||
anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,
|
||||
const void *key_data, uint32_t key_size,
|
||||
const void *kernel_data, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
const struct anv_pipeline_bind_map *bind_map);
|
||||
|
||||
struct anv_device {
|
||||
@@ -567,6 +584,10 @@ void anv_device_get_cache_uuid(void *uuid);
|
||||
void anv_device_init_blorp(struct anv_device *device);
|
||||
void anv_device_finish_blorp(struct anv_device *device);
|
||||
|
||||
VkResult anv_device_execbuf(struct anv_device *device,
|
||||
struct drm_i915_gem_execbuffer2 *execbuf,
|
||||
struct anv_bo **execbuf_bos);
|
||||
|
||||
void* anv_gem_mmap(struct anv_device *device,
|
||||
uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);
|
||||
void anv_gem_munmap(void *p, uint64_t size);
|
||||
@@ -617,9 +638,6 @@ struct anv_batch_bo {
|
||||
/* Bytes actually consumed in this batch BO */
|
||||
size_t length;
|
||||
|
||||
/* Last seen surface state block pool bo offset */
|
||||
uint32_t last_ss_pool_bo_offset;
|
||||
|
||||
struct anv_reloc_list relocs;
|
||||
};
|
||||
|
||||
@@ -1153,24 +1171,10 @@ struct anv_cmd_buffer {
|
||||
*/
|
||||
struct u_vector bt_blocks;
|
||||
uint32_t bt_next;
|
||||
|
||||
struct anv_reloc_list surface_relocs;
|
||||
|
||||
/* Information needed for execbuf
|
||||
*
|
||||
* These fields are generated by anv_cmd_buffer_prepare_execbuf().
|
||||
*/
|
||||
struct {
|
||||
struct drm_i915_gem_execbuffer2 execbuf;
|
||||
|
||||
struct drm_i915_gem_exec_object2 * objects;
|
||||
uint32_t bo_count;
|
||||
struct anv_bo ** bos;
|
||||
|
||||
/* Allocated length of the 'objects' and 'bos' arrays */
|
||||
uint32_t array_length;
|
||||
|
||||
bool need_reloc;
|
||||
} execbuf2;
|
||||
/** Last seen surface state block pool center bo offset */
|
||||
uint32_t last_ss_pool_center;
|
||||
|
||||
/* Serial for tracking buffer completion */
|
||||
uint32_t serial;
|
||||
@@ -1192,6 +1196,8 @@ void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);
|
||||
void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
|
||||
struct anv_cmd_buffer *secondary);
|
||||
void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);
|
||||
VkResult anv_cmd_buffer_execbuf(struct anv_device *device,
|
||||
struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);
|
||||
|
||||
@@ -1299,24 +1305,33 @@ struct anv_pipeline_bind_map {
|
||||
struct anv_pipeline_binding * sampler_to_descriptor;
|
||||
};
|
||||
|
||||
struct anv_shader_bin_key {
|
||||
uint32_t size;
|
||||
uint8_t data[0];
|
||||
};
|
||||
|
||||
struct anv_shader_bin {
|
||||
uint32_t ref_cnt;
|
||||
|
||||
const struct anv_shader_bin_key *key;
|
||||
|
||||
struct anv_state kernel;
|
||||
uint32_t kernel_size;
|
||||
|
||||
struct anv_pipeline_bind_map bind_map;
|
||||
|
||||
const struct brw_stage_prog_data *prog_data;
|
||||
uint32_t prog_data_size;
|
||||
|
||||
/* Prog data follows, then the key, both aligned to 8-bytes */
|
||||
struct anv_pipeline_bind_map bind_map;
|
||||
|
||||
/* Prog data follows, then params, then the key, all aligned to 8-bytes */
|
||||
};
|
||||
|
||||
struct anv_shader_bin *
|
||||
anv_shader_bin_create(struct anv_device *device,
|
||||
const void *key, uint32_t key_size,
|
||||
const void *kernel, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size, const void *prog_data_param,
|
||||
const struct anv_pipeline_bind_map *bind_map);
|
||||
|
||||
void
|
||||
@@ -1337,14 +1352,6 @@ anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)
|
||||
anv_shader_bin_destroy(device, shader);
|
||||
}
|
||||
|
||||
static inline const struct brw_stage_prog_data *
|
||||
anv_shader_bin_get_prog_data(const struct anv_shader_bin *shader)
|
||||
{
|
||||
const void *data = shader;
|
||||
data += align_u32(sizeof(struct anv_shader_bin), 8);
|
||||
return data;
|
||||
}
|
||||
|
||||
struct anv_pipeline {
|
||||
struct anv_device * device;
|
||||
struct anv_batch batch;
|
||||
@@ -1411,7 +1418,7 @@ get_##prefix##_prog_data(struct anv_pipeline *pipeline) \
|
||||
{ \
|
||||
if (anv_pipeline_has_stage(pipeline, stage)) { \
|
||||
return (const struct brw_##prefix##_prog_data *) \
|
||||
anv_shader_bin_get_prog_data(pipeline->shaders[stage]); \
|
||||
pipeline->shaders[stage]->prog_data; \
|
||||
} else { \
|
||||
return NULL; \
|
||||
} \
|
||||
|
@@ -200,20 +200,9 @@ genX(EndCommandBuffer)(
|
||||
VkCommandBuffer commandBuffer)
|
||||
{
|
||||
ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
|
||||
struct anv_device *device = cmd_buffer->device;
|
||||
|
||||
anv_cmd_buffer_end_batch_buffer(cmd_buffer);
|
||||
|
||||
if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
|
||||
/* The algorithm used to compute the validate list is not threadsafe as
|
||||
* it uses the bo->index field. We have to lock the device around it.
|
||||
* Fortunately, the chances for contention here are probably very low.
|
||||
*/
|
||||
pthread_mutex_lock(&device->mutex);
|
||||
anv_cmd_buffer_prepare_execbuf(cmd_buffer);
|
||||
pthread_mutex_unlock(&device->mutex);
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -1883,22 +1872,25 @@ void genX(CmdEndRenderPass)(
|
||||
}
|
||||
|
||||
static void
|
||||
emit_ps_depth_count(struct anv_batch *batch,
|
||||
emit_ps_depth_count(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *bo, uint32_t offset)
|
||||
{
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DestinationAddressType = DAT_PPGTT;
|
||||
pc.PostSyncOperation = WritePSDepthCount;
|
||||
pc.DepthStallEnable = true;
|
||||
pc.Address = (struct anv_address) { bo, offset };
|
||||
|
||||
if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
emit_query_availability(struct anv_batch *batch,
|
||||
emit_query_availability(struct anv_cmd_buffer *cmd_buffer,
|
||||
struct anv_bo *bo, uint32_t offset)
|
||||
{
|
||||
anv_batch_emit(batch, GENX(PIPE_CONTROL), pc) {
|
||||
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
|
||||
pc.DestinationAddressType = DAT_PPGTT;
|
||||
pc.PostSyncOperation = WriteImmediateData;
|
||||
pc.Address = (struct anv_address) { bo, offset };
|
||||
@@ -1931,7 +1923,7 @@ void genX(CmdBeginQuery)(
|
||||
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
|
||||
emit_ps_depth_count(cmd_buffer, &pool->bo,
|
||||
query * sizeof(struct anv_query_pool_slot));
|
||||
break;
|
||||
|
||||
@@ -1951,10 +1943,10 @@ void genX(CmdEndQuery)(
|
||||
|
||||
switch (pool->type) {
|
||||
case VK_QUERY_TYPE_OCCLUSION:
|
||||
emit_ps_depth_count(&cmd_buffer->batch, &pool->bo,
|
||||
emit_ps_depth_count(cmd_buffer, &pool->bo,
|
||||
query * sizeof(struct anv_query_pool_slot) + 8);
|
||||
|
||||
emit_query_availability(&cmd_buffer->batch, &pool->bo,
|
||||
emit_query_availability(cmd_buffer, &pool->bo,
|
||||
query * sizeof(struct anv_query_pool_slot) + 16);
|
||||
break;
|
||||
|
||||
@@ -1996,11 +1988,14 @@ void genX(CmdWriteTimestamp)(
|
||||
pc.DestinationAddressType = DAT_PPGTT;
|
||||
pc.PostSyncOperation = WriteTimestamp;
|
||||
pc.Address = (struct anv_address) { &pool->bo, offset };
|
||||
|
||||
if (GEN_GEN == 9 && cmd_buffer->device->info.gt == 4)
|
||||
pc.CommandStreamerStallEnable = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
emit_query_availability(&cmd_buffer->batch, &pool->bo, query + 16);
|
||||
emit_query_availability(cmd_buffer, &pool->bo, query + 16);
|
||||
}
|
||||
|
||||
#if GEN_GEN > 7 || GEN_IS_HASWELL
|
||||
|
@@ -52,7 +52,8 @@ static void
|
||||
brw_blorp_upload_shader(struct blorp_context *blorp,
|
||||
const void *key, uint32_t key_size,
|
||||
const void *kernel, uint32_t kernel_size,
|
||||
const void *prog_data, uint32_t prog_data_size,
|
||||
const struct brw_stage_prog_data *prog_data,
|
||||
uint32_t prog_data_size,
|
||||
uint32_t *kernel_out, void *prog_data_out)
|
||||
{
|
||||
struct brw_context *brw = blorp->driver_ctx;
|
||||
|
@@ -3673,6 +3673,12 @@ lower_fb_write_logical_send(const fs_builder &bld, fs_inst *inst,
|
||||
*/
|
||||
setup_color_payload(bld, key, &sources[length], src0_alpha, 1);
|
||||
length++;
|
||||
} else if (key->replicate_alpha && inst->target != 0) {
|
||||
/* Handle the case when fragment shader doesn't write to draw buffer
|
||||
* zero. No need to call setup_color_payload() for src0_alpha because
|
||||
* alpha value will be undefined.
|
||||
*/
|
||||
length++;
|
||||
}
|
||||
|
||||
setup_color_payload(bld, key, &sources[length], color0, components);
|
||||
|
@@ -291,12 +291,18 @@ _mesa_new_shader_program(GLuint name)
|
||||
* Clear (free) the shader program state that gets produced by linking.
|
||||
*/
|
||||
void
|
||||
_mesa_clear_shader_program_data(struct gl_shader_program *shProg)
|
||||
_mesa_clear_shader_program_data(struct gl_context *ctx,
|
||||
struct gl_shader_program *shProg)
|
||||
{
|
||||
unsigned i;
|
||||
for (gl_shader_stage sh = 0; sh < MESA_SHADER_STAGES; sh++) {
|
||||
if (shProg->_LinkedShaders[sh] != NULL) {
|
||||
_mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[sh]);
|
||||
shProg->_LinkedShaders[sh] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (shProg->UniformStorage) {
|
||||
for (i = 0; i < shProg->NumUniformStorage; ++i)
|
||||
for (unsigned i = 0; i < shProg->NumUniformStorage; ++i)
|
||||
_mesa_uniform_detach_all_driver_storage(&shProg->UniformStorage[i]);
|
||||
ralloc_free(shProg->UniformStorage);
|
||||
shProg->NumUniformStorage = 0;
|
||||
@@ -347,11 +353,10 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
|
||||
struct gl_shader_program *shProg)
|
||||
{
|
||||
GLuint i;
|
||||
gl_shader_stage sh;
|
||||
|
||||
assert(shProg->Type == GL_SHADER_PROGRAM_MESA);
|
||||
|
||||
_mesa_clear_shader_program_data(shProg);
|
||||
_mesa_clear_shader_program_data(ctx, shProg);
|
||||
|
||||
if (shProg->AttributeBindings) {
|
||||
string_to_uint_map_dtor(shProg->AttributeBindings);
|
||||
@@ -385,14 +390,6 @@ _mesa_free_shader_program_data(struct gl_context *ctx,
|
||||
shProg->TransformFeedback.VaryingNames = NULL;
|
||||
shProg->TransformFeedback.NumVarying = 0;
|
||||
|
||||
|
||||
for (sh = 0; sh < MESA_SHADER_STAGES; sh++) {
|
||||
if (shProg->_LinkedShaders[sh] != NULL) {
|
||||
_mesa_delete_linked_shader(ctx, shProg->_LinkedShaders[sh]);
|
||||
shProg->_LinkedShaders[sh] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
free(shProg->Label);
|
||||
shProg->Label = NULL;
|
||||
}
|
||||
|
@@ -99,7 +99,8 @@ extern struct gl_shader_program *
|
||||
_mesa_new_shader_program(GLuint name);
|
||||
|
||||
extern void
|
||||
_mesa_clear_shader_program_data(struct gl_shader_program *shProg);
|
||||
_mesa_clear_shader_program_data(struct gl_context *ctx,
|
||||
struct gl_shader_program *shProg);
|
||||
|
||||
extern void
|
||||
_mesa_free_shader_program_data(struct gl_context *ctx,
|
||||
|
@@ -3052,7 +3052,7 @@ _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
_mesa_clear_shader_program_data(prog);
|
||||
_mesa_clear_shader_program_data(ctx, prog);
|
||||
|
||||
prog->LinkStatus = GL_TRUE;
|
||||
|
||||
|
@@ -772,9 +772,9 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
|
||||
|
||||
int i = u_bit_scan(&writemask);
|
||||
|
||||
/* before emitting the instruction, see if we have to adjust store
|
||||
/* before emitting the instruction, see if we have to adjust load / store
|
||||
* address */
|
||||
if (i > 1 && inst->op == TGSI_OPCODE_STORE &&
|
||||
if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || inst->op == TGSI_OPCODE_STORE) &&
|
||||
addr.file == PROGRAM_UNDEFINED) {
|
||||
/* We have to advance the buffer address by 16 */
|
||||
addr = get_temp(glsl_type::uint_type);
|
||||
@@ -782,7 +782,6 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
|
||||
inst->src[0], st_src_reg_for_int(16));
|
||||
}
|
||||
|
||||
|
||||
/* first time use previous instruction */
|
||||
if (dinst == NULL) {
|
||||
dinst = inst;
|
||||
@@ -802,11 +801,10 @@ glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, unsigned op,
|
||||
dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
|
||||
dinst->dst[j].index = initial_dst_idx[j];
|
||||
if (i > 1) {
|
||||
if (dinst->op == TGSI_OPCODE_STORE) {
|
||||
if (dinst->op == TGSI_OPCODE_LOAD || dinst->op == TGSI_OPCODE_STORE)
|
||||
dinst->src[0] = addr;
|
||||
} else {
|
||||
if (dinst->op != TGSI_OPCODE_STORE)
|
||||
dinst->dst[j].index++;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* if we aren't writing to a double, just get the bit of the initial writemask
|
||||
|
@@ -430,8 +430,12 @@ st_create_texture_sampler_view_from_stobj(struct st_context *st,
|
||||
templ.u.tex.first_level = stObj->base.MinLevel + stObj->base.BaseLevel;
|
||||
templ.u.tex.last_level = last_level(stObj);
|
||||
assert(templ.u.tex.first_level <= templ.u.tex.last_level);
|
||||
templ.u.tex.first_layer = stObj->base.MinLayer;
|
||||
templ.u.tex.last_layer = last_layer(stObj);
|
||||
if (stObj->layer_override) {
|
||||
templ.u.tex.first_layer = templ.u.tex.last_layer = stObj->layer_override;
|
||||
} else {
|
||||
templ.u.tex.first_layer = stObj->base.MinLayer;
|
||||
templ.u.tex.last_layer = last_layer(stObj);
|
||||
}
|
||||
assert(templ.u.tex.first_layer <= templ.u.tex.last_layer);
|
||||
templ.target = gl_target_to_pipe(stObj->base.Target);
|
||||
}
|
||||
@@ -478,8 +482,11 @@ st_get_texture_sampler_view_from_stobj(struct st_context *st,
|
||||
assert(stObj->base.MinLevel + stObj->base.BaseLevel ==
|
||||
view->u.tex.first_level);
|
||||
assert(last_level(stObj) == view->u.tex.last_level);
|
||||
assert(stObj->base.MinLayer == view->u.tex.first_layer);
|
||||
assert(last_layer(stObj) == view->u.tex.last_layer);
|
||||
assert(stObj->layer_override || stObj->base.MinLayer == view->u.tex.first_layer);
|
||||
assert(stObj->layer_override || last_layer(stObj) == view->u.tex.last_layer);
|
||||
assert(!stObj->layer_override ||
|
||||
(stObj->layer_override == view->u.tex.first_layer &&
|
||||
stObj->layer_override == view->u.tex.last_layer));
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
@@ -108,6 +108,15 @@ struct st_texture_object
|
||||
*/
|
||||
enum pipe_format surface_format;
|
||||
|
||||
/* When non-zero, samplers should use this layer instead of the one
|
||||
* specified by the GL state.
|
||||
*
|
||||
* This is used for VDPAU interop, where imported pipe_resources may be
|
||||
* array textures (containing layers with different fields) even though the
|
||||
* GL state describes one non-array texture per field.
|
||||
*/
|
||||
uint layer_override;
|
||||
|
||||
/** The glsl version of the shader seen during the previous validation */
|
||||
unsigned prev_glsl_version;
|
||||
/** The value of the sampler's sRGBDecode state at the previous validation */
|
||||
|
@@ -189,8 +189,8 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
|
||||
struct st_texture_image *stImage = st_texture_image(texImage);
|
||||
|
||||
struct pipe_resource *res;
|
||||
struct pipe_sampler_view templ, **sampler_view;
|
||||
mesa_format texFormat;
|
||||
uint layer_override = 0;
|
||||
|
||||
if (output) {
|
||||
res = st_vdpau_output_surface_dma_buf(ctx, vdpSurface);
|
||||
@@ -201,8 +201,10 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
|
||||
} else {
|
||||
res = st_vdpau_video_surface_dma_buf(ctx, vdpSurface, index);
|
||||
|
||||
if (!res)
|
||||
if (!res) {
|
||||
res = st_vdpau_video_surface_gallium(ctx, vdpSurface, index);
|
||||
layer_override = index & 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (!res) {
|
||||
@@ -233,18 +235,8 @@ st_vdpau_map_surface(struct gl_context *ctx, GLenum target, GLenum access,
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
pipe_resource_reference(&stImage->pt, res);
|
||||
|
||||
u_sampler_view_default_template(&templ, res, res->format);
|
||||
templ.u.tex.first_layer = index & 1;
|
||||
templ.u.tex.last_layer = index & 1;
|
||||
templ.swizzle_r = GET_SWZ(stObj->base._Swizzle, 0);
|
||||
templ.swizzle_g = GET_SWZ(stObj->base._Swizzle, 1);
|
||||
templ.swizzle_b = GET_SWZ(stObj->base._Swizzle, 2);
|
||||
templ.swizzle_a = GET_SWZ(stObj->base._Swizzle, 3);
|
||||
|
||||
sampler_view = st_texture_get_sampler_view(st, stObj);
|
||||
*sampler_view = st->pipe->create_sampler_view(st->pipe, res, &templ);
|
||||
|
||||
stObj->surface_format = res->format;
|
||||
stObj->layer_override = layer_override;
|
||||
|
||||
_mesa_dirty_texobj(ctx, texObj);
|
||||
pipe_resource_reference(&res, NULL);
|
||||
@@ -264,6 +256,8 @@ st_vdpau_unmap_surface(struct gl_context *ctx, GLenum target, GLenum access,
|
||||
st_texture_release_all_sampler_views(st, stObj);
|
||||
pipe_resource_reference(&stImage->pt, NULL);
|
||||
|
||||
stObj->layer_override = 0;
|
||||
|
||||
_mesa_dirty_texobj(ctx, texObj);
|
||||
|
||||
st_flush(st, NULL, 0);
|
||||
|
@@ -117,6 +117,8 @@ wsi_x11_get_connection(struct wsi_device *wsi_dev,
|
||||
|
||||
struct wsi_x11_connection *wsi_conn =
|
||||
wsi_x11_connection_create(alloc, conn);
|
||||
if (!wsi_conn)
|
||||
return NULL;
|
||||
|
||||
pthread_mutex_lock(&wsi->mutex);
|
||||
|
||||
@@ -889,6 +891,10 @@ wsi_x11_finish_wsi(struct wsi_device *wsi_device,
|
||||
(struct wsi_x11 *)wsi_device->wsi[VK_ICD_WSI_PLATFORM_XCB];
|
||||
|
||||
if (wsi) {
|
||||
struct hash_entry *entry;
|
||||
hash_table_foreach(wsi->connections, entry)
|
||||
wsi_x11_connection_destroy(alloc, entry->data);
|
||||
|
||||
_mesa_hash_table_destroy(wsi->connections, NULL);
|
||||
|
||||
pthread_mutex_destroy(&wsi->mutex);
|
||||
|
Reference in New Issue
Block a user