Compare commits
40 Commits
mesa-19.0.
...
mesa-19.0.
Author | SHA1 | Date | |
---|---|---|---|
|
c8cdee5dc3 | ||
|
5cb685a3b8 | ||
|
44ddb884c8 | ||
|
ba1bf6c3ea | ||
|
f223fb98e9 | ||
|
91671ec1f4 | ||
|
b509068164 | ||
|
2397f5d99d | ||
|
ac1ffeab1d | ||
|
229c4abde3 | ||
|
b5ea4378c3 | ||
|
23abb7d310 | ||
|
32e08b2397 | ||
|
bde36e0736 | ||
|
3400359432 | ||
|
ce4b6974cd | ||
|
0ffd4c744d | ||
|
77dbb70e5c | ||
|
332da02f27 | ||
|
2e63686268 | ||
|
f9eaa873cf | ||
|
aacefed521 | ||
|
d41acb4c9e | ||
|
2964ee3ad0 | ||
|
349759165c | ||
|
20db3b0e46 | ||
|
57b7dbbb21 | ||
|
b493686860 | ||
|
73bc3248f4 | ||
|
d1f4c96919 | ||
|
b7769cdfb7 | ||
|
e46e3bfd13 | ||
|
a4d5161d42 | ||
|
a1c30b8b78 | ||
|
9987a3d448 | ||
|
891c4ff633 | ||
|
a175dffe84 | ||
|
29bfb1af10 | ||
|
dc6f00d53e | ||
|
ba3eb3c938 |
@@ -49,7 +49,6 @@ def main():
|
||||
if os.path.lexists(to):
|
||||
os.unlink(to)
|
||||
os.makedirs(to)
|
||||
shutil.copy(args.megadriver, master)
|
||||
|
||||
for driver in args.drivers:
|
||||
abs_driver = os.path.join(to, driver)
|
||||
@@ -71,7 +70,14 @@ def main():
|
||||
name, ext = os.path.splitext(name)
|
||||
finally:
|
||||
os.chdir(ret)
|
||||
|
||||
# Remove meson-created master .so and symlinks
|
||||
os.unlink(master)
|
||||
name, ext = os.path.splitext(master)
|
||||
while ext != '.so':
|
||||
if os.path.lexists(name):
|
||||
os.unlink(name)
|
||||
name, ext = os.path.splitext(name)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
@@ -31,7 +31,8 @@ Compatibility contexts may report a lower version depending on each driver.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
f1dd1980ed628edea3935eed7974fbc5d8353e9578c562728b880d63ac613dbd mesa-19.0.1.tar.gz
|
||||
6884163c0ea9e4c98378ab8fecd72fe7b5f437713a14471beda378df247999d4 mesa-19.0.1.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
122
docs/relnotes/19.0.2.html
Normal file
122
docs/relnotes/19.0.2.html
Normal file
@@ -0,0 +1,122 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 19.0.2 Release Notes / April 10, 2019</h1>
|
||||
|
||||
<p>
|
||||
Mesa 19.0.2 is a bug fix release which fixes bugs found since the 19.0.1 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 19.0.2 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation.
|
||||
Compatibility contexts may report a lower version depending on each driver.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
SHA256: eb972fc11d4e1261d34ec0b91a701f158d4870c0428fb108353ae7eab64b1118 mesa-19.0.2.tar.gz
|
||||
SHA256: 1a2edc3ce56906a676c91e6851298db45903df1f5cb9827395a922c1452db802 mesa-19.0.2.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108766">Bug 108766</a> - Mesa built with meson has RPATH entries</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=109648">Bug 109648</a> - AMD Raven hang during va-api decoding</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110257">Bug 110257</a> - Major artifacts in mpeg2 vaapi hw decoding</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110259">Bug 110259</a> - radv: Sampling depth-stencil image in GENERAL layout returns nothing but zero (regression, bisected)</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
|
||||
<p>Boyuan Zhang (1):</p>
|
||||
<ul>
|
||||
<li>st/va: reverse qt matrix back to its original order</li>
|
||||
</ul>
|
||||
|
||||
<p>Caio Marcelo de Oliveira Filho (1):</p>
|
||||
<ul>
|
||||
<li>nir: Take if_uses into account when repairing SSA</li>
|
||||
</ul>
|
||||
|
||||
<p>Dylan Baker (2):</p>
|
||||
<ul>
|
||||
<li>docs: Add SHA256 sums for mesa 19.0.1</li>
|
||||
<li>VERSION: bump version for 19.0.2</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (3):</p>
|
||||
<ul>
|
||||
<li>dri3: Return the current swap interval from glXGetSwapIntervalMESA().</li>
|
||||
<li>v3d: Bump the maximum texture size to 4k for V3D 4.x.</li>
|
||||
<li>v3d: Don't try to use the TFU blit path if a scissor is enabled.</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (1):</p>
|
||||
<ul>
|
||||
<li>meson: strip rpath from megadrivers</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (1):</p>
|
||||
<ul>
|
||||
<li>Revert "anv/radv: release memory allocated by glsl types during spirv_to_nir"</li>
|
||||
</ul>
|
||||
|
||||
<p>Karol Herbst (1):</p>
|
||||
<ul>
|
||||
<li>nir/print: fix printing the image_array intrinsic index</li>
|
||||
</ul>
|
||||
|
||||
<p>Leo Liu (2):</p>
|
||||
<ul>
|
||||
<li>radeon/vcn: add H.264 constrained baseline support</li>
|
||||
<li>radeon/vcn/vp9: search the render target from the whole list</li>
|
||||
</ul>
|
||||
|
||||
<p>Lionel Landwerlin (1):</p>
|
||||
<ul>
|
||||
<li>intel: add dependency on genxml generated files</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: fix assertion failure by using the correct type</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (2):</p>
|
||||
<ul>
|
||||
<li>radv: skip updating depth/color metadata for conditional rendering</li>
|
||||
<li>radv: do not always initialize HTILE in compressed state</li>
|
||||
</ul>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
147
docs/relnotes/19.0.3.html
Normal file
147
docs/relnotes/19.0.3.html
Normal file
@@ -0,0 +1,147 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 19.0.3 Release Notes / April 24, 2019</h1>
|
||||
|
||||
<p>
|
||||
Mesa 19.0.3 is a bug fix release which fixes bugs found since the l9.0.2 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 19.0.3 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation.
|
||||
Compatibility contexts may report a lower version depending on each driver.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
|
||||
<p>N/A</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=108879">Bug 108879</a> - [CIK] [regression] All opencl apps hangs indefinitely in si_create_context</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110201">Bug 110201</a> - [ivb] mesa 19.0.0 breaks rendering in kitty</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110356">Bug 110356</a> - install_megadrivers.py creates new dangling symlink [bisected]</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=110441">Bug 110441</a> - [llvmpipe] complex-loop-analysis-bug regression</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Andres Gomez (1):</p>
|
||||
<ul>
|
||||
<li>glsl/linker: location aliasing requires types to have the same width</li>
|
||||
</ul>
|
||||
|
||||
<p>Bas Nieuwenhuizen (1):</p>
|
||||
<ul>
|
||||
<li>ac: Move has_local_buffers disable to radeonsi.</li>
|
||||
</ul>
|
||||
|
||||
<p>Chia-I Wu (1):</p>
|
||||
<ul>
|
||||
<li>virgl: fix fence fd version check</li>
|
||||
</ul>
|
||||
|
||||
<p>Danylo Piliaiev (1):</p>
|
||||
<ul>
|
||||
<li>intel/compiler: Do not reswizzle dst if instruction writes to flag register</li>
|
||||
</ul>
|
||||
|
||||
<p>Dylan Baker (2):</p>
|
||||
<ul>
|
||||
<li>docs: Add sha256 sums for 19.0.2</li>
|
||||
<li>Bump version for 19.0.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Anholt (1):</p>
|
||||
<ul>
|
||||
<li>nir: Fix deref offset calculation for structs.</li>
|
||||
</ul>
|
||||
|
||||
<p>Eric Engestrom (1):</p>
|
||||
<ul>
|
||||
<li>meson: remove meson-created megadrivers symlinks</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (2):</p>
|
||||
<ul>
|
||||
<li>anv/pipeline: Fix MEDIA_VFE_STATE::PerThreadScratchSpace on gen7</li>
|
||||
<li>anv: Add a #define for the max binding table size</li>
|
||||
</ul>
|
||||
|
||||
<p>Juan A. Suarez Romero (1):</p>
|
||||
<ul>
|
||||
<li>meson: Add dependency on genxml to anvil genfiles</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>glsl: Set location on structure-split sampler uniform variables</li>
|
||||
<li>Revert "glsl: Set location on structure-split sampler uniform variables"</li>
|
||||
</ul>
|
||||
|
||||
<p>Lionel Landwerlin (2):</p>
|
||||
<ul>
|
||||
<li>anv: fix uninitialized pthread cond clock domain</li>
|
||||
<li>intel/devinfo: fix missing num_thread_per_eu on ICL</li>
|
||||
</ul>
|
||||
|
||||
<p>Lubomir Rintel (2):</p>
|
||||
<ul>
|
||||
<li>gallivm: guess CPU features also on ARM</li>
|
||||
<li>gallivm: disable NEON instructions if they are not supported</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: use CP DMA for the null const buffer clear on CIK</li>
|
||||
</ul>
|
||||
|
||||
<p>Rhys Perry (1):</p>
|
||||
<ul>
|
||||
<li>nir,ac/nir: fix cube_face_coord</li>
|
||||
</ul>
|
||||
|
||||
<p>Roland Scheidegger (1):</p>
|
||||
<ul>
|
||||
<li>gallivm: fix bogus assert in get_indirect_index</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (2):</p>
|
||||
<ul>
|
||||
<li>ac/nir: only use the new raw/struct image atomic intrinsics with LLVM 9+</li>
|
||||
<li>radv: do not load vertex attributes that are not provided by the pipeline</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -367,9 +367,7 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
|
||||
info->has_syncobj_wait_for_submit = info->has_syncobj && info->drm_minor >= 20;
|
||||
info->has_fence_to_handle = info->has_syncobj && info->drm_minor >= 21;
|
||||
info->has_ctx_priority = info->drm_minor >= 22;
|
||||
/* TODO: Enable this once the kernel handles it efficiently. */
|
||||
info->has_local_buffers = info->drm_minor >= 20 &&
|
||||
!info->has_dedicated_vram;
|
||||
info->has_local_buffers = info->drm_minor >= 20;
|
||||
info->kernel_flushes_hdp_before_ib = true;
|
||||
info->htile_cmask_support_1d_tiling = true;
|
||||
info->si_TA_CS_BC_BASE_ADDR_allowed = true;
|
||||
|
@@ -1019,10 +1019,17 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
|
||||
LLVMValueRef in[3];
|
||||
for (unsigned chan = 0; chan < 3; chan++)
|
||||
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
|
||||
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
|
||||
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
|
||||
ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
|
||||
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
|
||||
ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
|
||||
ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
|
||||
results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
|
||||
results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
|
||||
LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
|
||||
results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
|
||||
results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
|
||||
result = ac_build_gather_values(&ctx->ac, results, 2);
|
||||
break;
|
||||
}
|
||||
@@ -2532,7 +2539,10 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
|
||||
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
|
||||
ctx->ac.i32_0, ""); /* vindex */
|
||||
params[param_count++] = ctx->ac.i32_0; /* voffset */
|
||||
if (HAVE_LLVM >= 0x800) {
|
||||
if (HAVE_LLVM >= 0x900) {
|
||||
/* XXX: The new raw/struct atomic intrinsics are buggy
|
||||
* with LLVM 8, see r358579.
|
||||
*/
|
||||
params[param_count++] = ctx->ac.i32_0; /* soffset */
|
||||
params[param_count++] = ctx->ac.i32_0; /* slc */
|
||||
|
||||
|
@@ -1258,7 +1258,7 @@ radv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
|
||||
if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
|
||||
++reg_count;
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + reg_count, cmd_buffer->state.predicating));
|
||||
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
|
||||
S_370_WR_CONFIRM(1) |
|
||||
S_370_ENGINE_SEL(V_370_PFP));
|
||||
@@ -1282,7 +1282,7 @@ radv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
|
||||
uint64_t va = radv_buffer_get_va(image->bo);
|
||||
va += image->offset + image->tc_compat_zrange_offset;
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
|
||||
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
|
||||
S_370_WR_CONFIRM(1) |
|
||||
S_370_ENGINE_SEL(V_370_PFP));
|
||||
@@ -1476,7 +1476,7 @@ radv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
|
||||
|
||||
assert(radv_image_has_cmask(image) || radv_image_has_dcc(image));
|
||||
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, 0));
|
||||
radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 4, cmd_buffer->state.predicating));
|
||||
radeon_emit(cs, S_370_DST_SEL(V_370_MEM) |
|
||||
S_370_WR_CONFIRM(1) |
|
||||
S_370_ENGINE_SEL(V_370_PFP));
|
||||
@@ -4407,8 +4407,14 @@ static void radv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffe
|
||||
return;
|
||||
|
||||
if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
|
||||
/* TODO: merge with the clear if applicable */
|
||||
radv_initialize_htile(cmd_buffer, image, range, 0);
|
||||
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
|
||||
|
||||
if (radv_layout_is_htile_compressed(image, dst_layout,
|
||||
dst_queue_mask)) {
|
||||
clear_value = 0;
|
||||
}
|
||||
|
||||
radv_initialize_htile(cmd_buffer, image, range, clear_value);
|
||||
} else if (!radv_layout_is_htile_compressed(image, src_layout, src_queue_mask) &&
|
||||
radv_layout_is_htile_compressed(image, dst_layout, dst_queue_mask)) {
|
||||
uint32_t clear_value = vk_format_is_stencil(image->vk_format) ? 0xfffff30f : 0xfffc000f;
|
||||
|
@@ -48,7 +48,6 @@
|
||||
#include "util/build_id.h"
|
||||
#include "util/debug.h"
|
||||
#include "util/mesa-sha1.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
|
||||
static int
|
||||
radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
|
||||
@@ -611,7 +610,6 @@ void radv_DestroyInstance(
|
||||
|
||||
VG(VALGRIND_DESTROY_MEMPOOL(instance));
|
||||
|
||||
_mesa_glsl_release_types();
|
||||
_mesa_locale_fini();
|
||||
|
||||
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
||||
|
@@ -2027,10 +2027,32 @@ handle_vs_input_decl(struct radv_shader_context *ctx,
|
||||
|
||||
t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
|
||||
|
||||
input = ac_build_buffer_load_format(&ctx->ac, t_list,
|
||||
buffer_index,
|
||||
ctx->ac.i32_0,
|
||||
num_channels, false, true);
|
||||
if (ctx->options->key.vs.vertex_attribute_provided & (1u << attrib_index)) {
|
||||
input = ac_build_buffer_load_format(&ctx->ac, t_list,
|
||||
buffer_index,
|
||||
ctx->ac.i32_0,
|
||||
num_channels, false, true);
|
||||
} else {
|
||||
/* Per the Vulkan spec, it's invalid to consume vertex
|
||||
* attributes that are not provided by the pipeline but
|
||||
* some (invalid) apps appear to do that. Fill the
|
||||
* input array with (eg. (0, 0, 0, 1)) to workaround
|
||||
* the problem and to avoid possible GPU hangs.
|
||||
*/
|
||||
LLVMValueRef chan[4];
|
||||
|
||||
/* The input_usage mask might be 0 if input variables
|
||||
* are not removed by the compiler.
|
||||
*/
|
||||
num_channels = CLAMP(num_channels, 1, 4);
|
||||
|
||||
for (unsigned i = 0; i < num_channels; i++) {
|
||||
chan[i] = i == 3 ? ctx->ac.f32_1 : ctx->ac.f32_0;
|
||||
chan[i] = ac_to_float(&ctx->ac, chan[i]);
|
||||
}
|
||||
|
||||
input = ac_build_gather_values(&ctx->ac, chan, num_channels);
|
||||
}
|
||||
|
||||
input = ac_build_expand_to_vec4(&ctx->ac, input, num_channels);
|
||||
|
||||
|
@@ -1922,6 +1922,8 @@ radv_generate_graphics_pipeline_key(struct radv_pipeline *pipeline,
|
||||
}
|
||||
key.vertex_alpha_adjust |= adjust << (2 * location);
|
||||
}
|
||||
|
||||
key.vertex_attribute_provided |= 1 << location;
|
||||
}
|
||||
|
||||
if (pCreateInfo->pTessellationState)
|
||||
@@ -1950,6 +1952,7 @@ radv_fill_shader_keys(struct radv_shader_variant_key *keys,
|
||||
{
|
||||
keys[MESA_SHADER_VERTEX].vs.instance_rate_inputs = key->instance_rate_inputs;
|
||||
keys[MESA_SHADER_VERTEX].vs.alpha_adjust = key->vertex_alpha_adjust;
|
||||
keys[MESA_SHADER_VERTEX].vs.vertex_attribute_provided = key->vertex_attribute_provided;
|
||||
for (unsigned i = 0; i < MAX_VERTEX_ATTRIBS; ++i)
|
||||
keys[MESA_SHADER_VERTEX].vs.instance_rate_divisors[i] = key->instance_rate_divisors[i];
|
||||
|
||||
|
@@ -365,6 +365,7 @@ struct radv_pipeline_cache {
|
||||
struct radv_pipeline_key {
|
||||
uint32_t instance_rate_inputs;
|
||||
uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
|
||||
uint32_t vertex_attribute_provided;
|
||||
uint64_t vertex_alpha_adjust;
|
||||
unsigned tess_input_vertices;
|
||||
uint32_t col_format;
|
||||
|
@@ -66,6 +66,9 @@ struct radv_vs_variant_key {
|
||||
uint32_t instance_rate_inputs;
|
||||
uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
|
||||
|
||||
/* Mask of vertex attributes that are provided by the pipeline. */
|
||||
uint32_t vertex_attribute_provided;
|
||||
|
||||
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
|
||||
* so we may need to fix it up. */
|
||||
uint64_t alpha_adjust;
|
||||
|
@@ -820,8 +820,8 @@
|
||||
|
||||
<packet code="120" name="Tile Binning Mode Cfg" min_ver="41">
|
||||
|
||||
<field name="Height (in pixels)" size="12" start="48" type="uint" minus_one="true"/>
|
||||
<field name="Width (in pixels)" size="12" start="32" type="uint" minus_one="true"/>
|
||||
<field name="Height (in pixels)" size="16" start="48" type="uint" minus_one="true"/>
|
||||
<field name="Width (in pixels)" size="16" start="32" type="uint" minus_one="true"/>
|
||||
|
||||
<field name="Double-buffer in non-ms mode" size="1" start="15" type="bool"/>
|
||||
<field name="Multisample Mode (4x)" size="1" start="14" type="bool"/>
|
||||
|
@@ -32,7 +32,8 @@
|
||||
*/
|
||||
#define V3D_MAX_TEXTURE_SAMPLERS 16
|
||||
|
||||
#define V3D_MAX_MIP_LEVELS 12
|
||||
/* The HW can do 16384 (15), but we run into hangs when we expose that. */
|
||||
#define V3D_MAX_MIP_LEVELS 13
|
||||
|
||||
#define V3D_MAX_SAMPLES 4
|
||||
|
||||
|
@@ -167,6 +167,14 @@ lower_deref(nir_builder *b, struct lower_samplers_as_deref_state *state,
|
||||
} else {
|
||||
var = nir_variable_create(state->shader, nir_var_uniform, type, name);
|
||||
var->data.binding = binding;
|
||||
|
||||
/* Don't set var->data.location. The old structure location could be
|
||||
* used to index into gl_uniform_storage, assuming the full structure
|
||||
* was walked in order. With the new split variables, this invariant
|
||||
* no longer holds and there's no meaningful way to start from a base
|
||||
* location and access a particular array element. Just leave it 0.
|
||||
*/
|
||||
|
||||
_mesa_hash_table_insert_pre_hashed(state->remap_table, hash, name, var);
|
||||
}
|
||||
|
||||
|
@@ -424,28 +424,14 @@ compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
|
||||
|
||||
struct explicit_location_info {
|
||||
ir_variable *var;
|
||||
unsigned numerical_type;
|
||||
bool base_type_is_integer;
|
||||
unsigned base_type_bit_size;
|
||||
unsigned interpolation;
|
||||
bool centroid;
|
||||
bool sample;
|
||||
bool patch;
|
||||
};
|
||||
|
||||
static inline unsigned
|
||||
get_numerical_type(const glsl_type *type)
|
||||
{
|
||||
/* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68,
|
||||
* (Location aliasing):
|
||||
*
|
||||
* "Further, when location aliasing, the aliases sharing the location
|
||||
* must have the same underlying numerical type (floating-point or
|
||||
* integer)
|
||||
*/
|
||||
if (type->is_float() || type->is_double())
|
||||
return GLSL_TYPE_FLOAT;
|
||||
return GLSL_TYPE_INT;
|
||||
}
|
||||
|
||||
static bool
|
||||
check_location_aliasing(struct explicit_location_info explicit_locations[][4],
|
||||
ir_variable *var,
|
||||
@@ -461,14 +447,23 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
|
||||
gl_shader_stage stage)
|
||||
{
|
||||
unsigned last_comp;
|
||||
if (type->without_array()->is_record()) {
|
||||
/* The component qualifier can't be used on structs so just treat
|
||||
* all component slots as used.
|
||||
unsigned base_type_bit_size;
|
||||
const glsl_type *type_without_array = type->without_array();
|
||||
const bool base_type_is_integer =
|
||||
glsl_base_type_is_integer(type_without_array->base_type);
|
||||
const bool is_struct = type_without_array->is_record();
|
||||
if (is_struct) {
|
||||
/* structs don't have a defined underlying base type so just treat all
|
||||
* component slots as used and set the bit size to 0. If there is
|
||||
* location aliasing, we'll fail anyway later.
|
||||
*/
|
||||
last_comp = 4;
|
||||
base_type_bit_size = 0;
|
||||
} else {
|
||||
unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
|
||||
last_comp = component + type->without_array()->vector_elements * dmul;
|
||||
unsigned dmul = type_without_array->is_64bit() ? 2 : 1;
|
||||
last_comp = component + type_without_array->vector_elements * dmul;
|
||||
base_type_bit_size =
|
||||
glsl_base_type_get_bit_size(type_without_array->base_type);
|
||||
}
|
||||
|
||||
while (location < location_limit) {
|
||||
@@ -478,8 +473,22 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
|
||||
&explicit_locations[location][comp];
|
||||
|
||||
if (info->var) {
|
||||
/* Component aliasing is not alloed */
|
||||
if (comp >= component && comp < last_comp) {
|
||||
if (info->var->type->without_array()->is_record() || is_struct) {
|
||||
/* Structs cannot share location since they are incompatible
|
||||
* with any other underlying numerical type.
|
||||
*/
|
||||
linker_error(prog,
|
||||
"%s shader has multiple %sputs sharing the "
|
||||
"same location that don't have the same "
|
||||
"underlying numerical type. Struct variable '%s', "
|
||||
"location %u\n",
|
||||
_mesa_shader_stage_to_string(stage),
|
||||
var->data.mode == ir_var_shader_in ? "in" : "out",
|
||||
is_struct ? var->name : info->var->name,
|
||||
location);
|
||||
return false;
|
||||
} else if (comp >= component && comp < last_comp) {
|
||||
/* Component aliasing is not allowed */
|
||||
linker_error(prog,
|
||||
"%s shader has multiple %sputs explicitly "
|
||||
"assigned to location %d and component %d\n",
|
||||
@@ -488,27 +497,52 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
|
||||
location, comp);
|
||||
return false;
|
||||
} else {
|
||||
/* For all other used components we need to have matching
|
||||
* types, interpolation and auxiliary storage
|
||||
/* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
|
||||
* Qualifiers, Page 67, (Location aliasing):
|
||||
*
|
||||
* " Further, when location aliasing, the aliases sharing the
|
||||
* location must have the same underlying numerical type
|
||||
* and bit width (floating-point or integer, 32-bit versus
|
||||
* 64-bit, etc.) and the same auxiliary storage and
|
||||
* interpolation qualification."
|
||||
*/
|
||||
if (info->numerical_type !=
|
||||
get_numerical_type(type->without_array())) {
|
||||
|
||||
/* If the underlying numerical type isn't integer, implicitly
|
||||
* it will be float or else we would have failed by now.
|
||||
*/
|
||||
if (info->base_type_is_integer != base_type_is_integer) {
|
||||
linker_error(prog,
|
||||
"Varyings sharing the same location must "
|
||||
"have the same underlying numerical type. "
|
||||
"Location %u component %u\n",
|
||||
location, comp);
|
||||
"%s shader has multiple %sputs sharing the "
|
||||
"same location that don't have the same "
|
||||
"underlying numerical type. Location %u "
|
||||
"component %u.\n",
|
||||
_mesa_shader_stage_to_string(stage),
|
||||
var->data.mode == ir_var_shader_in ?
|
||||
"in" : "out", location, comp);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info->base_type_bit_size != base_type_bit_size) {
|
||||
linker_error(prog,
|
||||
"%s shader has multiple %sputs sharing the "
|
||||
"same location that don't have the same "
|
||||
"underlying numerical bit size. Location %u "
|
||||
"component %u.\n",
|
||||
_mesa_shader_stage_to_string(stage),
|
||||
var->data.mode == ir_var_shader_in ?
|
||||
"in" : "out", location, comp);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (info->interpolation != interpolation) {
|
||||
linker_error(prog,
|
||||
"%s shader has multiple %sputs at explicit "
|
||||
"location %u with different interpolation "
|
||||
"settings\n",
|
||||
"%s shader has multiple %sputs sharing the "
|
||||
"same location that don't have the same "
|
||||
"interpolation qualification. Location %u "
|
||||
"component %u.\n",
|
||||
_mesa_shader_stage_to_string(stage),
|
||||
var->data.mode == ir_var_shader_in ?
|
||||
"in" : "out", location);
|
||||
"in" : "out", location, comp);
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -516,17 +550,20 @@ check_location_aliasing(struct explicit_location_info explicit_locations[][4],
|
||||
info->sample != sample ||
|
||||
info->patch != patch) {
|
||||
linker_error(prog,
|
||||
"%s shader has multiple %sputs at explicit "
|
||||
"location %u with different aux storage\n",
|
||||
"%s shader has multiple %sputs sharing the "
|
||||
"same location that don't have the same "
|
||||
"auxiliary storage qualification. Location %u "
|
||||
"component %u.\n",
|
||||
_mesa_shader_stage_to_string(stage),
|
||||
var->data.mode == ir_var_shader_in ?
|
||||
"in" : "out", location);
|
||||
"in" : "out", location, comp);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if (comp >= component && comp < last_comp) {
|
||||
info->var = var;
|
||||
info->numerical_type = get_numerical_type(type->without_array());
|
||||
info->base_type_is_integer = base_type_is_integer;
|
||||
info->base_type_bit_size = base_type_bit_size;
|
||||
info->interpolation = interpolation;
|
||||
info->centroid = centroid;
|
||||
info->sample = sample;
|
||||
|
@@ -31,6 +31,7 @@
|
||||
#include "shader_enums.h"
|
||||
#include "blob.h"
|
||||
#include "c11/threads.h"
|
||||
#include "util/macros.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
#include "main/config.h"
|
||||
@@ -114,6 +115,42 @@ static inline bool glsl_base_type_is_integer(enum glsl_base_type type)
|
||||
type == GLSL_TYPE_IMAGE;
|
||||
}
|
||||
|
||||
static inline unsigned int
|
||||
glsl_base_type_get_bit_size(const enum glsl_base_type base_type)
|
||||
{
|
||||
switch (base_type) {
|
||||
case GLSL_TYPE_BOOL:
|
||||
return 1;
|
||||
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
return 32;
|
||||
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
case GLSL_TYPE_UINT16:
|
||||
case GLSL_TYPE_INT16:
|
||||
return 16;
|
||||
|
||||
case GLSL_TYPE_UINT8:
|
||||
case GLSL_TYPE_INT8:
|
||||
return 8;
|
||||
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
case GLSL_TYPE_INT64:
|
||||
case GLSL_TYPE_UINT64:
|
||||
case GLSL_TYPE_IMAGE:
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
return 64;
|
||||
|
||||
default:
|
||||
unreachable("unknown base type");
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
enum glsl_sampler_dim {
|
||||
GLSL_SAMPLER_DIM_1D = 0,
|
||||
GLSL_SAMPLER_DIM_2D,
|
||||
|
@@ -215,7 +215,7 @@ nir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
|
||||
unsigned field_offset =
|
||||
struct_type_get_field_offset(parent->type, size_align,
|
||||
(*p)->strct.index);
|
||||
nir_iadd(b, offset, nir_imm_int(b, field_offset));
|
||||
offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
|
||||
} else {
|
||||
unreachable("Unsupported deref type");
|
||||
}
|
||||
|
@@ -404,12 +404,21 @@ dst.x = dst.y = 0.0;
|
||||
float absX = fabs(src0.x);
|
||||
float absY = fabs(src0.y);
|
||||
float absZ = fabs(src0.z);
|
||||
if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.y; dst.y = -src0.z; }
|
||||
if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = -src0.y; dst.y = src0.z; }
|
||||
if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.z; dst.y = src0.x; }
|
||||
if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = -src0.z; dst.y = src0.x; }
|
||||
if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.y; dst.y = src0.x; }
|
||||
if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.y; dst.y = -src0.x; }
|
||||
|
||||
float ma = 0.0;
|
||||
if (absX >= absY && absX >= absZ) { ma = 2 * src0.x; }
|
||||
if (absY >= absX && absY >= absZ) { ma = 2 * src0.y; }
|
||||
if (absZ >= absX && absZ >= absY) { ma = 2 * src0.z; }
|
||||
|
||||
if (src0.x >= 0 && absX >= absY && absX >= absZ) { dst.x = -src0.z; dst.y = -src0.y; }
|
||||
if (src0.x < 0 && absX >= absY && absX >= absZ) { dst.x = src0.z; dst.y = -src0.y; }
|
||||
if (src0.y >= 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = src0.z; }
|
||||
if (src0.y < 0 && absY >= absX && absY >= absZ) { dst.x = src0.x; dst.y = -src0.z; }
|
||||
if (src0.z >= 0 && absZ >= absX && absZ >= absY) { dst.x = src0.x; dst.y = -src0.y; }
|
||||
if (src0.z < 0 && absZ >= absX && absZ >= absY) { dst.x = -src0.x; dst.y = -src0.y; }
|
||||
|
||||
dst.x = dst.x / ma + 0.5;
|
||||
dst.y = dst.y / ma + 0.5;
|
||||
""")
|
||||
|
||||
unop_horiz("cube_face_index", 1, tfloat32, 3, tfloat32, """
|
||||
|
@@ -812,8 +812,8 @@ print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state)
|
||||
assert(dim < ARRAY_SIZE(dim_name) && dim_name[dim]);
|
||||
fprintf(fp, " image_dim=%s", dim_name[dim]);
|
||||
} else if (idx == NIR_INTRINSIC_IMAGE_ARRAY) {
|
||||
bool array = nir_intrinsic_image_dim(instr);
|
||||
fprintf(fp, " image_dim=%s", array ? "true" : "false");
|
||||
bool array = nir_intrinsic_image_array(instr);
|
||||
fprintf(fp, " image_array=%s", array ? "true" : "false");
|
||||
} else if (idx == NIR_INTRINSIC_DESC_TYPE) {
|
||||
VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);
|
||||
fprintf(fp, " desc_type=%s", vulkan_descriptor_type_name(desc_type));
|
||||
|
@@ -77,6 +77,15 @@ repair_ssa_def(nir_ssa_def *def, void *void_state)
|
||||
}
|
||||
}
|
||||
|
||||
nir_foreach_if_use(src, def) {
|
||||
nir_block *block_before_if =
|
||||
nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node));
|
||||
if (!nir_block_dominates(def->parent_instr->block, block_before_if)) {
|
||||
is_valid = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_valid)
|
||||
return true;
|
||||
|
||||
@@ -98,6 +107,15 @@ repair_ssa_def(nir_ssa_def *def, void *void_state)
|
||||
}
|
||||
}
|
||||
|
||||
nir_foreach_if_use_safe(src, def) {
|
||||
nir_block *block_before_if =
|
||||
nir_cf_node_as_block(nir_cf_node_prev(&src->parent_if->cf_node));
|
||||
if (!nir_block_dominates(def->parent_instr->block, block_before_if)) {
|
||||
nir_if_rewrite_condition(src->parent_if, nir_src_for_ssa(
|
||||
nir_phi_builder_value_get_block_def(val, block_before_if)));
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@@ -97,37 +97,7 @@ unsigned glsl_atomic_size(const struct glsl_type *type);
|
||||
static inline unsigned
|
||||
glsl_get_bit_size(const struct glsl_type *type)
|
||||
{
|
||||
switch (glsl_get_base_type(type)) {
|
||||
case GLSL_TYPE_BOOL:
|
||||
return 1;
|
||||
|
||||
case GLSL_TYPE_INT:
|
||||
case GLSL_TYPE_UINT:
|
||||
case GLSL_TYPE_FLOAT: /* TODO handle mediump */
|
||||
case GLSL_TYPE_SUBROUTINE:
|
||||
return 32;
|
||||
|
||||
case GLSL_TYPE_FLOAT16:
|
||||
case GLSL_TYPE_UINT16:
|
||||
case GLSL_TYPE_INT16:
|
||||
return 16;
|
||||
|
||||
case GLSL_TYPE_UINT8:
|
||||
case GLSL_TYPE_INT8:
|
||||
return 8;
|
||||
|
||||
case GLSL_TYPE_DOUBLE:
|
||||
case GLSL_TYPE_INT64:
|
||||
case GLSL_TYPE_UINT64:
|
||||
case GLSL_TYPE_IMAGE:
|
||||
case GLSL_TYPE_SAMPLER:
|
||||
return 64;
|
||||
|
||||
default:
|
||||
unreachable("unknown base type");
|
||||
}
|
||||
|
||||
return 0;
|
||||
return glsl_base_type_get_bit_size(glsl_get_base_type(type));
|
||||
}
|
||||
|
||||
bool glsl_type_is_16bit(const struct glsl_type *type);
|
||||
|
@@ -556,11 +556,11 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
|
||||
|
||||
llvm::SmallVector<std::string, 16> MAttrs;
|
||||
|
||||
#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
#if HAVE_LLVM >= 0x0400
|
||||
/* llvm-3.7+ implements sys::getHostCPUFeatures for x86,
|
||||
* which allows us to enable/disable code generation based
|
||||
* on the results of cpuid.
|
||||
#if HAVE_LLVM >= 0x0400 && (defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) || defined(PIPE_ARCH_ARM))
|
||||
/* llvm-3.3+ implements sys::getHostCPUFeatures for Arm
|
||||
* and llvm-3.7+ for x86, which allows us to enable/disable
|
||||
* code generation based on the results of cpuid on these
|
||||
* architectures.
|
||||
*/
|
||||
llvm::StringMap<bool> features;
|
||||
llvm::sys::getHostCPUFeatures(features);
|
||||
@@ -570,7 +570,7 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
|
||||
++f) {
|
||||
MAttrs.push_back(((*f).second ? "+" : "-") + (*f).first().str());
|
||||
}
|
||||
#else
|
||||
#elif defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
|
||||
/*
|
||||
* We need to unset attributes because sometimes LLVM mistakenly assumes
|
||||
* certain features are present given the processor name.
|
||||
@@ -625,6 +625,12 @@ lp_build_create_jit_compiler_for_module(LLVMExecutionEngineRef *OutJIT,
|
||||
MAttrs.push_back("-avx512vl");
|
||||
#endif
|
||||
#endif
|
||||
#if defined(PIPE_ARCH_ARM)
|
||||
if (!util_cpu_caps.has_neon) {
|
||||
MAttrs.push_back("-neon");
|
||||
MAttrs.push_back("-crypto");
|
||||
MAttrs.push_back("-vfp2");
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(PIPE_ARCH_PPC)
|
||||
|
@@ -1108,7 +1108,7 @@ get_indirect_index(struct lp_build_tgsi_soa_context *bld,
|
||||
* larger than the declared size but smaller than the buffer size.
|
||||
*/
|
||||
if (reg_file != TGSI_FILE_CONSTANT) {
|
||||
assert(index_limit > 0);
|
||||
assert(index_limit >= 0);
|
||||
max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
|
||||
uint_bld->type, index_limit);
|
||||
|
||||
|
@@ -64,6 +64,7 @@ static rvcn_dec_message_avc_t get_h264_msg(struct radeon_decoder *dec,
|
||||
memset(&result, 0, sizeof(result));
|
||||
switch (pic->base.profile) {
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_BASELINE:
|
||||
case PIPE_VIDEO_PROFILE_MPEG4_AVC_CONSTRAINED_BASELINE:
|
||||
result.profile = RDECODE_H264_PROFILE_BASELINE;
|
||||
break;
|
||||
|
||||
@@ -490,7 +491,7 @@ static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec,
|
||||
|
||||
assert(dec->base.max_references + 1 <= 16);
|
||||
|
||||
for (i = 0 ; i < dec->base.max_references + 1 ; ++i) {
|
||||
for (i = 0 ; i < 16 ; ++i) {
|
||||
if (dec->render_pic_list[i] && dec->render_pic_list[i] == target) {
|
||||
result.curr_pic_idx =
|
||||
(uintptr_t)vl_video_buffer_get_associated_data(target, &dec->base);
|
||||
|
@@ -272,7 +272,7 @@ void vi_dcc_clear_level(struct si_context *sctx,
|
||||
}
|
||||
|
||||
si_clear_buffer(sctx, dcc_buffer, dcc_offset, clear_size,
|
||||
&clear_value, 4, SI_COHERENCY_CB_META);
|
||||
&clear_value, 4, SI_COHERENCY_CB_META, false);
|
||||
}
|
||||
|
||||
/* Set the same micro tile mode as the destination of the last MSAA resolve.
|
||||
@@ -505,7 +505,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
|
||||
uint32_t clear_value = 0xCCCCCCCC;
|
||||
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
|
||||
tex->cmask_offset, tex->surface.cmask_size,
|
||||
&clear_value, 4, SI_COHERENCY_CB_META);
|
||||
&clear_value, 4, SI_COHERENCY_CB_META, false);
|
||||
fmask_decompress_needed = true;
|
||||
}
|
||||
|
||||
@@ -533,7 +533,7 @@ static void si_do_fast_color_clear(struct si_context *sctx,
|
||||
uint32_t clear_value = 0;
|
||||
si_clear_buffer(sctx, &tex->cmask_buffer->b.b,
|
||||
tex->cmask_offset, tex->surface.cmask_size,
|
||||
&clear_value, 4, SI_COHERENCY_CB_META);
|
||||
&clear_value, 4, SI_COHERENCY_CB_META, false);
|
||||
eliminate_needed = true;
|
||||
}
|
||||
|
||||
|
@@ -177,7 +177,8 @@ static void si_compute_do_clear_or_copy(struct si_context *sctx,
|
||||
|
||||
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||
uint64_t offset, uint64_t size, uint32_t *clear_value,
|
||||
uint32_t clear_value_size, enum si_coherency coher)
|
||||
uint32_t clear_value_size, enum si_coherency coher,
|
||||
bool force_cpdma)
|
||||
{
|
||||
if (!size)
|
||||
return;
|
||||
@@ -241,7 +242,8 @@ void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||
* about buffer placements.
|
||||
*/
|
||||
if (clear_value_size > 4 ||
|
||||
(clear_value_size == 4 &&
|
||||
(!force_cpdma &&
|
||||
clear_value_size == 4 &&
|
||||
offset % 4 == 0 &&
|
||||
(size > 32*1024 || sctx->chip_class <= VI))) {
|
||||
si_compute_do_clear_or_copy(sctx, dst, offset, NULL, 0,
|
||||
@@ -282,7 +284,7 @@ static void si_pipe_clear_buffer(struct pipe_context *ctx,
|
||||
coher = SI_COHERENCY_SHADER;
|
||||
|
||||
si_clear_buffer((struct si_context*)ctx, dst, offset, size, (uint32_t*)clear_value,
|
||||
clear_value_size, coher);
|
||||
clear_value_size, coher, false);
|
||||
}
|
||||
|
||||
void si_copy_buffer(struct si_context *sctx,
|
||||
|
@@ -609,11 +609,14 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||
si_begin_new_gfx_cs(sctx);
|
||||
|
||||
if (sctx->chip_class == CIK) {
|
||||
/* Clear the NULL constant buffer, because loads should return zeros. */
|
||||
/* Clear the NULL constant buffer, because loads should return zeros.
|
||||
* Note that this forces CP DMA to be used, because clover deadlocks
|
||||
* for some reason when the compute codepath is used.
|
||||
*/
|
||||
uint32_t clear_value = 0;
|
||||
si_clear_buffer(sctx, sctx->null_const_buf.buffer, 0,
|
||||
sctx->null_const_buf.buffer->width0,
|
||||
&clear_value, 4, SI_COHERENCY_SHADER);
|
||||
&clear_value, 4, SI_COHERENCY_SHADER, true);
|
||||
}
|
||||
return &sctx->b;
|
||||
fail:
|
||||
|
@@ -1168,7 +1168,8 @@ unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,
|
||||
enum si_cache_policy cache_policy);
|
||||
void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,
|
||||
uint64_t offset, uint64_t size, uint32_t *clear_value,
|
||||
uint32_t clear_value_size, enum si_coherency coher);
|
||||
uint32_t clear_value_size, enum si_coherency coher,
|
||||
bool force_cpdma);
|
||||
void si_copy_buffer(struct si_context *sctx,
|
||||
struct pipe_resource *dst, struct pipe_resource *src,
|
||||
uint64_t dst_offset, uint64_t src_offset, unsigned size);
|
||||
|
@@ -186,7 +186,7 @@ static void si_emit_guardband(struct si_context *ctx)
|
||||
ctx->chip_class >= VI ? 16 : MAX2(ctx->screen->se_tile_repeat, 16);
|
||||
|
||||
/* Indexed by quantization modes */
|
||||
static unsigned max_viewport_size[] = {65535, 16383, 4095};
|
||||
static int max_viewport_size[] = {65535, 16383, 4095};
|
||||
|
||||
/* Ensure that the whole viewport stays representable in
|
||||
* absolute coordinates.
|
||||
|
@@ -309,7 +309,7 @@ void si_test_dma(struct si_screen *sscreen)
|
||||
/* clear dst pixels */
|
||||
uint32_t zero = 0;
|
||||
si_clear_buffer(sctx, dst, 0, sdst->surface.surf_size, &zero, 4,
|
||||
SI_COHERENCY_SHADER);
|
||||
SI_COHERENCY_SHADER, false);
|
||||
memset(dst_cpu.ptr, 0, dst_cpu.layer_stride * tdst.array_size);
|
||||
|
||||
/* preparation */
|
||||
|
@@ -491,7 +491,8 @@ v3d_tfu_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
|
||||
if ((info->mask & PIPE_MASK_RGBA) == 0)
|
||||
return false;
|
||||
|
||||
if (info->dst.box.x != 0 ||
|
||||
if (info->scissor_enable ||
|
||||
info->dst.box.x != 0 ||
|
||||
info->dst.box.y != 0 ||
|
||||
info->dst.box.width != dst_width ||
|
||||
info->dst.box.height != dst_height ||
|
||||
|
@@ -185,7 +185,10 @@ v3d_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
|
||||
case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
|
||||
return V3D_MAX_MIP_LEVELS;
|
||||
if (screen->devinfo.ver < 40)
|
||||
return 12;
|
||||
else
|
||||
return V3D_MAX_MIP_LEVELS;
|
||||
case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
|
||||
return 2048;
|
||||
|
||||
|
@@ -55,7 +55,28 @@ v3d_start_draw(struct v3d_context *v3d)
|
||||
job->submit.bcl_start = job->bcl.bo->offset;
|
||||
v3d_job_add_bo(job, job->bcl.bo);
|
||||
|
||||
job->tile_alloc = v3d_bo_alloc(v3d->screen, 1024 * 1024, "tile_alloc");
|
||||
/* The PTB will request the tile alloc initial size per tile at start
|
||||
* of tile binning.
|
||||
*/
|
||||
uint32_t tile_alloc_size = (job->draw_tiles_x *
|
||||
job->draw_tiles_y) * 64;
|
||||
/* The PTB allocates in aligned 4k chunks after the initial setup. */
|
||||
tile_alloc_size = align(tile_alloc_size, 4096);
|
||||
|
||||
/* Include the first two chunk allocations that the PTB does so that
|
||||
* we definitely clear the OOM condition before triggering one (the HW
|
||||
* won't trigger OOM during the first allocations).
|
||||
*/
|
||||
tile_alloc_size += 8192;
|
||||
|
||||
/* For performance, allocate some extra initial memory after the PTB's
|
||||
* minimal allocations, so that we hopefully don't have to block the
|
||||
* GPU on the kernel handling an OOM signal.
|
||||
*/
|
||||
tile_alloc_size += 512 * 1024;
|
||||
|
||||
job->tile_alloc = v3d_bo_alloc(v3d->screen, tile_alloc_size,
|
||||
"tile_alloc");
|
||||
uint32_t tsda_per_tile_size = v3d->screen->devinfo.ver >= 40 ? 256 : 64;
|
||||
job->tile_state = v3d_bo_alloc(v3d->screen,
|
||||
job->draw_tiles_y *
|
||||
|
@@ -846,6 +846,9 @@ v3d_setup_texture_shader_state(struct V3DX(TEXTURE_SHADER_STATE) *tex,
|
||||
prsc->target == PIPE_TEXTURE_1D_ARRAY) {
|
||||
tex->image_height = tex->image_width >> 14;
|
||||
}
|
||||
|
||||
tex->image_width &= (1 << 14) - 1;
|
||||
tex->image_height &= (1 << 14) - 1;
|
||||
#endif
|
||||
|
||||
if (prsc->target == PIPE_TEXTURE_3D) {
|
||||
|
@@ -27,6 +27,19 @@
|
||||
|
||||
#include "va_private.h"
|
||||
|
||||
const int reverse_inverse_zscan[] =
|
||||
{
|
||||
/* Reverse inverse z scan pattern */
|
||||
0, 2, 3, 9, 10, 20, 21, 35,
|
||||
1, 4, 8, 11, 19, 22, 34, 36,
|
||||
5, 7, 12, 18, 23, 33, 37, 48,
|
||||
6, 13, 17, 24, 32, 38, 47, 49,
|
||||
14, 16, 25, 31, 39, 46, 50, 57,
|
||||
15, 26, 30, 40, 45, 51, 56, 58,
|
||||
27, 29, 41, 44, 52, 55, 59, 62,
|
||||
28, 42, 43, 53, 54, 60, 61, 63,
|
||||
};
|
||||
|
||||
void vlVaHandlePictureParameterBufferMPEG12(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
|
||||
{
|
||||
VAPictureParameterBufferMPEG2 *mpeg2 = buf->data;
|
||||
@@ -66,16 +79,29 @@ void vlVaHandlePictureParameterBufferMPEG12(vlVaDriver *drv, vlVaContext *contex
|
||||
void vlVaHandleIQMatrixBufferMPEG12(vlVaContext *context, vlVaBuffer *buf)
|
||||
{
|
||||
VAIQMatrixBufferMPEG2 *mpeg2 = buf->data;
|
||||
static uint8_t temp_intra_matrix[64];
|
||||
static uint8_t temp_nonintra_matrix[64];
|
||||
|
||||
assert(buf->size >= sizeof(VAIQMatrixBufferMPEG2) && buf->num_elements == 1);
|
||||
if (mpeg2->load_intra_quantiser_matrix)
|
||||
context->desc.mpeg12.intra_matrix = mpeg2->intra_quantiser_matrix;
|
||||
else
|
||||
if (mpeg2->load_intra_quantiser_matrix) {
|
||||
/* The quantiser matrix that VAAPI provides has been applied
|
||||
with inverse z-scan. However, what we expect in MPEG2
|
||||
picture description is the original order. Therefore,
|
||||
we need to reverse it back to its original order.
|
||||
*/
|
||||
for (int i = 0; i < 64; i++)
|
||||
temp_intra_matrix[i] =
|
||||
mpeg2->intra_quantiser_matrix[reverse_inverse_zscan[i]];
|
||||
context->desc.mpeg12.intra_matrix = temp_intra_matrix;
|
||||
} else
|
||||
context->desc.mpeg12.intra_matrix = NULL;
|
||||
|
||||
if (mpeg2->load_non_intra_quantiser_matrix)
|
||||
context->desc.mpeg12.non_intra_matrix = mpeg2->non_intra_quantiser_matrix;
|
||||
else
|
||||
if (mpeg2->load_non_intra_quantiser_matrix) {
|
||||
for (int i = 0; i < 64; i++)
|
||||
temp_nonintra_matrix[i] =
|
||||
mpeg2->non_intra_quantiser_matrix[reverse_inverse_zscan[i]];
|
||||
context->desc.mpeg12.non_intra_matrix = temp_nonintra_matrix;
|
||||
} else
|
||||
context->desc.mpeg12.non_intra_matrix = NULL;
|
||||
}
|
||||
|
||||
|
@@ -60,6 +60,9 @@ libgallium_dri = shared_library(
|
||||
driver_tegra, driver_i915, driver_svga, driver_virgl,
|
||||
driver_swr,
|
||||
],
|
||||
# Will be deleted during installation, see install_megadrivers.py
|
||||
install : true,
|
||||
install_dir : dri_drivers_path,
|
||||
)
|
||||
|
||||
foreach d : [[with_gallium_kmsro, 'pl111_dri.so'],
|
||||
|
@@ -49,6 +49,7 @@ libva_gallium = shared_library(
|
||||
dep_libdrm, dep_thread, driver_r600, driver_radeonsi, driver_nouveau,
|
||||
],
|
||||
link_depends : va_link_depends,
|
||||
# Will be deleted during installation, see install_megadrivers.py
|
||||
install : true,
|
||||
install_dir : va_drivers_path,
|
||||
)
|
||||
|
@@ -55,6 +55,9 @@ libvdpau_gallium = shared_library(
|
||||
],
|
||||
link_depends : vdpau_link_depends,
|
||||
soversion : '@0@.@1@.0'.format(VDPAU_MAJOR, VDPAU_MINOR),
|
||||
# Will be deleted during installation, see install_megadrivers.py
|
||||
install : true,
|
||||
install_dir : vdpau_drivers_path,
|
||||
)
|
||||
foreach d : [[with_gallium_r300, 'r300'],
|
||||
[with_gallium_r600, 'r600'],
|
||||
|
@@ -47,6 +47,9 @@ libxvmc_gallium = shared_library(
|
||||
],
|
||||
dependencies : [dep_thread, driver_r600, driver_nouveau],
|
||||
link_depends : xvmc_link_depends,
|
||||
# Will be deleted during installation, see install_megadrivers.py
|
||||
install : true,
|
||||
install_dir : xvmc_drivers_path,
|
||||
)
|
||||
|
||||
foreach d : [[with_gallium_r600, 'r600'], [with_gallium_nouveau, 'nouveau']]
|
||||
|
@@ -92,6 +92,10 @@ static bool do_winsys_init(struct amdgpu_winsys *ws,
|
||||
if (!ac_query_gpu_info(fd, ws->dev, &ws->info, &ws->amdinfo))
|
||||
goto fail;
|
||||
|
||||
/* TODO: Enable this once the kernel handles it efficiently. */
|
||||
if (ws->info.has_dedicated_vram)
|
||||
ws->info.has_local_buffers = false;
|
||||
|
||||
handle_env_var_force_family(ws);
|
||||
|
||||
ws->addrlib = amdgpu_addr_create(&ws->info, &ws->amdinfo, &ws->info.max_alignment);
|
||||
|
@@ -46,7 +46,7 @@
|
||||
|
||||
|
||||
#define VIRGL_DRM_VERSION(major, minor) ((major) << 16 | (minor))
|
||||
#define VIRGL_DRM_VERSION_FENCE_FD VIRGL_DRM_VERSION(1, 0)
|
||||
#define VIRGL_DRM_VERSION_FENCE_FD VIRGL_DRM_VERSION(0, 1)
|
||||
|
||||
|
||||
static inline boolean can_cache_resource(struct virgl_hw_res *res)
|
||||
@@ -870,7 +870,7 @@ static int virgl_drm_get_version(int fd)
|
||||
else if (version->version_major != 0)
|
||||
ret = -EINVAL;
|
||||
else
|
||||
ret = version->version_minor;
|
||||
ret = VIRGL_DRM_VERSION(0, version->version_minor);
|
||||
|
||||
drmFreeVersion(version);
|
||||
|
||||
|
@@ -642,7 +642,6 @@ dri3_set_swap_interval(__GLXDRIdrawable *pdraw, int interval)
|
||||
break;
|
||||
}
|
||||
|
||||
priv->swap_interval = interval;
|
||||
loader_dri3_set_swap_interval(&priv->loader_drawable, interval);
|
||||
|
||||
return 0;
|
||||
@@ -659,7 +658,7 @@ dri3_get_swap_interval(__GLXDRIdrawable *pdraw)
|
||||
|
||||
struct dri3_drawable *priv = (struct dri3_drawable *) pdraw;
|
||||
|
||||
return priv->swap_interval;
|
||||
return priv->loader_drawable.swap_interval;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -117,7 +117,6 @@ struct dri3_context
|
||||
struct dri3_drawable {
|
||||
__GLXDRIdrawable base;
|
||||
struct loader_dri3_drawable loader_drawable;
|
||||
int swap_interval;
|
||||
|
||||
/* LIBGL_SHOW_FPS support */
|
||||
uint64_t previous_ust;
|
||||
|
@@ -33,5 +33,5 @@ libblorp = static_library(
|
||||
files_libblorp,
|
||||
include_directories : [inc_common, inc_intel],
|
||||
c_args : [c_vis_args, no_override_init_args],
|
||||
dependencies : idep_nir_headers,
|
||||
dependencies : [idep_nir_headers, idep_genxml],
|
||||
)
|
||||
|
@@ -43,5 +43,5 @@ libintel_common = static_library(
|
||||
include_directories : [inc_common, inc_intel],
|
||||
c_args : [c_vis_args, no_override_init_args],
|
||||
link_with : [libisl],
|
||||
dependencies : [dep_expat, dep_libdrm, dep_thread],
|
||||
dependencies : [dep_expat, dep_libdrm, dep_thread, idep_genxml],
|
||||
)
|
||||
|
@@ -1160,6 +1160,12 @@ vec4_instruction::can_reswizzle(const struct gen_device_info *devinfo,
|
||||
if (devinfo->gen == 6 && is_math() && swizzle != BRW_SWIZZLE_XYZW)
|
||||
return false;
|
||||
|
||||
/* If we write to the flag register changing the swizzle would change
|
||||
* what channels are written to the flag register.
|
||||
*/
|
||||
if (writes_flag())
|
||||
return false;
|
||||
|
||||
/* We can't swizzle implicit accumulator access. We'd have to
|
||||
* reswizzle the producer of the accumulator value in addition
|
||||
* to the consumer (i.e. both MUL and MACH). Just skip this.
|
||||
|
@@ -414,6 +414,7 @@ static const struct gen_device_info gen_device_info_hsw_gt3 = {
|
||||
.has_64bit_types = true, \
|
||||
.supports_simd16_3src = true, \
|
||||
.has_surface_tile_offset = true, \
|
||||
.num_thread_per_eu = 7, \
|
||||
.max_vs_threads = 504, \
|
||||
.max_tcs_threads = 504, \
|
||||
.max_tes_threads = 504, \
|
||||
@@ -427,7 +428,6 @@ static const struct gen_device_info gen_device_info_bdw_gt1 = {
|
||||
.num_slices = 1,
|
||||
.num_subslices = { 2, },
|
||||
.num_eu_per_subslice = 8,
|
||||
.num_thread_per_eu = 7,
|
||||
.l3_banks = 2,
|
||||
.max_cs_threads = 42,
|
||||
.urb = {
|
||||
@@ -452,7 +452,6 @@ static const struct gen_device_info gen_device_info_bdw_gt2 = {
|
||||
.num_slices = 1,
|
||||
.num_subslices = { 3, },
|
||||
.num_eu_per_subslice = 8,
|
||||
.num_thread_per_eu = 7,
|
||||
.l3_banks = 4,
|
||||
.max_cs_threads = 56,
|
||||
.urb = {
|
||||
@@ -477,7 +476,6 @@ static const struct gen_device_info gen_device_info_bdw_gt3 = {
|
||||
.num_slices = 2,
|
||||
.num_subslices = { 3, 3, },
|
||||
.num_eu_per_subslice = 8,
|
||||
.num_thread_per_eu = 7,
|
||||
.l3_banks = 8,
|
||||
.max_cs_threads = 56,
|
||||
.urb = {
|
||||
@@ -503,7 +501,6 @@ static const struct gen_device_info gen_device_info_chv = {
|
||||
.num_slices = 1,
|
||||
.num_subslices = { 2, },
|
||||
.num_eu_per_subslice = 8,
|
||||
.num_thread_per_eu = 7,
|
||||
.l3_banks = 2,
|
||||
.max_vs_threads = 80,
|
||||
.max_tcs_threads = 80,
|
||||
@@ -609,8 +606,7 @@ static const struct gen_device_info gen_device_info_chv = {
|
||||
#define GEN9_FEATURES \
|
||||
GEN8_FEATURES, \
|
||||
GEN9_HW_INFO, \
|
||||
.has_sample_with_hiz = true, \
|
||||
.num_thread_per_eu = 7
|
||||
.has_sample_with_hiz = true
|
||||
|
||||
static const struct gen_device_info gen_device_info_skl_gt1 = {
|
||||
GEN9_FEATURES, .gt = 1,
|
||||
|
@@ -57,3 +57,5 @@ foreach f : gen_xml_files
|
||||
capture : true,
|
||||
)
|
||||
endforeach
|
||||
|
||||
idep_genxml = declare_dependency(sources : [gen_xml_pack, genX_bits_h, genX_xml_h])
|
||||
|
@@ -21,9 +21,9 @@
|
||||
c_sse2_args = ['-msse2', '-mstackrealign']
|
||||
inc_intel = include_directories('.')
|
||||
|
||||
subdir('genxml')
|
||||
subdir('blorp')
|
||||
subdir('dev')
|
||||
subdir('genxml')
|
||||
subdir('isl')
|
||||
subdir('common')
|
||||
subdir('compiler')
|
||||
|
@@ -73,10 +73,10 @@ void anv_GetDescriptorSetLayoutSupport(
|
||||
|
||||
bool supported = true;
|
||||
for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
|
||||
/* Our maximum binding table size is 250 and we need to reserve 8 for
|
||||
* render targets. 240 is a nice round number.
|
||||
/* Our maximum binding table size is 240 and we need to reserve 8 for
|
||||
* render targets.
|
||||
*/
|
||||
if (surface_count[s] >= 240)
|
||||
if (surface_count[s] >= MAX_BINDING_TABLE_SIZE - MAX_RTS)
|
||||
supported = false;
|
||||
}
|
||||
|
||||
|
@@ -41,7 +41,6 @@
|
||||
#include "git_sha1.h"
|
||||
#include "vk_util.h"
|
||||
#include "common/gen_defines.h"
|
||||
#include "compiler/glsl_types.h"
|
||||
|
||||
#include "genxml/gen7_pack.h"
|
||||
|
||||
@@ -704,7 +703,6 @@ void anv_DestroyInstance(
|
||||
|
||||
vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
|
||||
|
||||
_mesa_glsl_release_types();
|
||||
_mesa_locale_fini();
|
||||
|
||||
vk_free(&instance->alloc, instance);
|
||||
@@ -1031,7 +1029,7 @@ void anv_GetPhysicalDeviceProperties(
|
||||
.maxPerStageDescriptorSampledImages = max_samplers,
|
||||
.maxPerStageDescriptorStorageImages = max_images,
|
||||
.maxPerStageDescriptorInputAttachments = 64,
|
||||
.maxPerStageResources = 250,
|
||||
.maxPerStageResources = MAX_BINDING_TABLE_SIZE - MAX_RTS,
|
||||
.maxDescriptorSetSamplers = 6 * max_samplers, /* number of stages * maxPerStageDescriptorSamplers */
|
||||
.maxDescriptorSetUniformBuffers = 6 * 64, /* number of stages * maxPerStageDescriptorUniformBuffers */
|
||||
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2,
|
||||
@@ -1868,7 +1866,7 @@ VkResult anv_CreateDevice(
|
||||
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
|
||||
goto fail_mutex;
|
||||
}
|
||||
if (pthread_cond_init(&device->queue_submit, NULL) != 0) {
|
||||
if (pthread_cond_init(&device->queue_submit, &condattr) != 0) {
|
||||
pthread_condattr_destroy(&condattr);
|
||||
result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
|
||||
goto fail_mutex;
|
||||
|
@@ -163,6 +163,18 @@ struct gen_l3_config;
|
||||
#define MAX_GEN8_IMAGES 8
|
||||
#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */
|
||||
|
||||
/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":
|
||||
*
|
||||
* "The surface state model is used when a Binding Table Index (specified
|
||||
* in the message descriptor) of less than 240 is specified. In this model,
|
||||
* the Binding Table Index is used to index into the binding table, and the
|
||||
* binding table entry contains a pointer to the SURFACE_STATE."
|
||||
*
|
||||
* Binding table values above 240 are used for various things in the hardware
|
||||
* such as stateless, stateless with incoherent cache, SLM, and bindless.
|
||||
*/
|
||||
#define MAX_BINDING_TABLE_SIZE 240
|
||||
|
||||
/* The kernel relocation API has a limitation of a 32-bit delta value
|
||||
* applied to the address before it is written which, in spite of it being
|
||||
* unsigned, is treated as signed . Because of the way that this maps to
|
||||
|
@@ -2087,9 +2087,29 @@ compute_pipeline_create(
|
||||
vfe.URBEntryAllocationSize = GEN_GEN <= 7 ? 0 : 2;
|
||||
vfe.CURBEAllocationSize = vfe_curbe_allocation;
|
||||
|
||||
vfe.PerThreadScratchSpace = get_scratch_space(cs_bin);
|
||||
vfe.ScratchSpaceBasePointer =
|
||||
get_scratch_address(pipeline, MESA_SHADER_COMPUTE, cs_bin);
|
||||
if (cs_bin->prog_data->total_scratch) {
|
||||
if (GEN_GEN >= 8) {
|
||||
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
|
||||
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
|
||||
*/
|
||||
vfe.PerThreadScratchSpace =
|
||||
ffs(cs_bin->prog_data->total_scratch) - 11;
|
||||
} else if (GEN_IS_HASWELL) {
|
||||
/* Haswell's Per Thread Scratch Space is in the range [0, 10]
|
||||
* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
|
||||
*/
|
||||
vfe.PerThreadScratchSpace =
|
||||
ffs(cs_bin->prog_data->total_scratch) - 12;
|
||||
} else {
|
||||
/* IVB and BYT use the range [0, 11] to mean [1kB, 12kB]
|
||||
* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
|
||||
*/
|
||||
vfe.PerThreadScratchSpace =
|
||||
cs_bin->prog_data->total_scratch / 1024 - 1;
|
||||
}
|
||||
vfe.ScratchSpaceBasePointer =
|
||||
get_scratch_address(pipeline, MESA_SHADER_COMPUTE, cs_bin);
|
||||
}
|
||||
}
|
||||
|
||||
struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
|
||||
|
@@ -105,7 +105,7 @@ foreach g : [['70', ['gen7_cmd_buffer.c']], ['75', ['gen7_cmd_buffer.c']],
|
||||
c_vis_args, no_override_init_args, c_sse2_args,
|
||||
'-DGEN_VERSIONx10=@0@'.format(_gen),
|
||||
],
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml],
|
||||
)
|
||||
endforeach
|
||||
|
||||
@@ -203,7 +203,7 @@ libvulkan_intel = shared_library(
|
||||
libvulkan_util, libvulkan_wsi, libmesa_util,
|
||||
],
|
||||
dependencies : [
|
||||
dep_thread, dep_dl, dep_m, anv_deps, idep_nir,
|
||||
dep_thread, dep_dl, dep_m, anv_deps, idep_nir, idep_genxml,
|
||||
],
|
||||
c_args : anv_flags,
|
||||
link_args : ['-Wl,--build-id=sha1', ld_args_bsymbolic, ld_args_gc_sections],
|
||||
|
@@ -187,7 +187,7 @@ libi965 = static_library(
|
||||
i965_gen_libs, libintel_common, libintel_dev, libisl, libintel_compiler,
|
||||
libblorp
|
||||
],
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers],
|
||||
dependencies : [dep_libdrm, dep_valgrind, idep_nir_headers, idep_genxml],
|
||||
)
|
||||
|
||||
dri_drivers += libi965
|
||||
|
@@ -54,6 +54,9 @@ if dri_drivers != []
|
||||
dep_selinux, dep_libdrm, dep_expat, dep_m, dep_thread, dep_dl, idep_nir,
|
||||
],
|
||||
link_args : [ld_args_build_id, ld_args_bsymbolic, ld_args_gc_sections],
|
||||
# Will be deleted during installation, see install_megadrivers.py
|
||||
install : true,
|
||||
install_dir : dri_drivers_path,
|
||||
)
|
||||
|
||||
meson.add_install_script(
|
||||
|
Reference in New Issue
Block a user