Compare commits
22 Commits
mesa-18.0.
...
mesa-18.0.
Author | SHA1 | Date | |
---|---|---|---|
|
d38da7bd2d | ||
|
ff629ffcd3 | ||
|
53ff157c33 | ||
|
3b9b66560a | ||
|
60c5cf011d | ||
|
95d88ba0da | ||
|
6bd2fba19d | ||
|
ead5bf4f6a | ||
|
d45bb9f505 | ||
|
d75054d0d0 | ||
|
7673c72f3d | ||
|
264cda58ab | ||
|
40ed4b0285 | ||
|
251a36d629 | ||
|
b62b3eb259 | ||
|
f581dc608b | ||
|
e1b87631a9 | ||
|
279c628560 | ||
|
e7709adf7a | ||
|
cd52573fac | ||
|
5edd3192e7 | ||
|
a1c421c638 |
@@ -39,12 +39,12 @@ matrix:
|
||||
addons:
|
||||
apt:
|
||||
sources:
|
||||
- llvm-toolchain-trusty-3.9
|
||||
- llvm-toolchain-trusty-4.0
|
||||
packages:
|
||||
# LLVM packaging is broken and misses these dependencies
|
||||
- libedit-dev
|
||||
# From sources above
|
||||
- llvm-3.9-dev
|
||||
- llvm-4.0-dev
|
||||
# Common
|
||||
- xz-utils
|
||||
- libexpat1-dev
|
||||
|
@@ -21,3 +21,14 @@ ac4437b20b87c7285b89466f05b51518ae616873 automake: small cleanup after the meson
|
||||
# b2f2236dc565dd1460f0 and c62cf1f165919bc74296 which did not land in
|
||||
# branch.
|
||||
880c1718b6d14b33fe5ba918af70fea5be890c6b omx: always define ENABLE_ST_OMX_{BELLAGIO,TIZONIA}
|
||||
|
||||
# stable: There is a specific port for this patch for stable branch.
|
||||
d15fb766aa3c98ffbe16d050b2af4804e4b12c57 radeonsi/gfx9: fix a hang with an empty first IB
|
||||
|
||||
# stable: Explicit 18.1 only nominations
|
||||
0e945fdf23bac5a62c15edfcbfd9d6ac4eee592f nir: Do not use progress for unreachable code in return lowering.
|
||||
84fef802fb16cef68ec358cbfed1cac9c3bfa410 ac/nir: add missing round_slice for 1D arrays
|
||||
d136a5fad9c7e67c1362453388914ecc60420883 ac: fix the number of coordinates for ac_image_get_lod and arrays
|
||||
|
||||
# stable: There is a specific port for this patch for stable branch.
|
||||
fedd0a4215bcd387525000d76b77993ca38916ae radv/winsys: allow to submit up to 4 IBs for chips without chaining
|
||||
|
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python
|
||||
# encoding=utf-8
|
||||
# Copyright © 2017 Intel Corporation
|
||||
# Copyright © 2017-2018 Intel Corporation
|
||||
|
||||
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
# of this software and associated documentation files (the "Software"), to deal
|
||||
@@ -35,7 +35,11 @@ def main():
|
||||
parser.add_argument('drivers', nargs='+')
|
||||
args = parser.parse_args()
|
||||
|
||||
to = os.path.join(os.environ.get('MESON_INSTALL_DESTDIR_PREFIX'), args.libdir)
|
||||
if os.path.isabs(args.libdir):
|
||||
to = os.path.join(os.environ.get('DESTDIR', '/'), args.libdir[1:])
|
||||
else:
|
||||
to = os.path.join(os.environ['MESON_INSTALL_DESTDIR_PREFIX'], args.libdir)
|
||||
|
||||
master = os.path.join(to, os.path.basename(args.megadriver))
|
||||
|
||||
if not os.path.exists(to):
|
||||
|
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
0c93ba892c0610f5dd87f2e2673b9445187995c395b3ddb33fd4260bfb291e89 mesa-18.0.1.tar.gz
|
||||
b2d2f5b5dbaab13e15cb0dcb5ec81887467f55ebc9625945b303a3647cd87954 mesa-18.0.1.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
143
docs/relnotes/18.0.2.html
Normal file
143
docs/relnotes/18.0.2.html
Normal file
@@ -0,0 +1,143 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 18.0.2 Release Notes / April 28, 2018</h1>
|
||||
|
||||
<p>
|
||||
Mesa 18.0.2 is a bug fix release which fixes bugs found since the 18.0.1 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 18.0.2 implements the OpenGL 4.5 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.5. OpenGL
|
||||
4.5 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95009">Bug 95009</a> - [SNB] amd_shader_trinary_minmax.execution.built-in-functions.gs-mid3-ivec2-ivec2-ivec2 intermittent</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95012">Bug 95012</a> - [SNB] glsl-1_50.execution.built-in-functions.gs-op tests intermittent</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98281">Bug 98281</a> - 'message's in ctx->Debug.LogMessages[] seem to leak.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105320">Bug 105320</a> - Storage texel buffer access produces wrong results (RX Vega)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105775">Bug 105775</a> - SI reaches the maximum IB size in dwords and fail to submit</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=105994">Bug 105994</a> - surface state leak when creating and destroying image views with aspectMask depth and stencil</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106074">Bug 106074</a> - radv: si_scissor_from_viewport returns incorrect result when using half-pixel viewport offset</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=106126">Bug 106126</a> - eglMakeCurrent does not always ensure dri_drawable->update_drawable_info has been called for a new EGLSurface if another has been created and destroyed first</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Bas Nieuwenhuizen (2):</p>
|
||||
<ul>
|
||||
<li>ac/nir: Make the GFX9 buffer size fix apply to image loads/atomics too.</li>
|
||||
<li>radv: Mark GTT memory as device local for APUs.</li>
|
||||
</ul>
|
||||
|
||||
<p>Dylan Baker (2):</p>
|
||||
<ul>
|
||||
<li>bin/install_megadrivers: fix DESTDIR and -D*-path</li>
|
||||
<li>meson: don't build classic mesa tests without dri_drivers</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (1):</p>
|
||||
<ul>
|
||||
<li>intel/compiler: Add scheduler deps for instructions that implicitly read g0</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (1):</p>
|
||||
<ul>
|
||||
<li>i965/fs: Return mlen * 8 for size_read() for INTERPOLATE_AT_*</li>
|
||||
</ul>
|
||||
|
||||
<p>Johan Klokkhammer Helsing (1):</p>
|
||||
<ul>
|
||||
<li>st/dri: Fix dangling pointer to a destroyed dri_drawable</li>
|
||||
</ul>
|
||||
|
||||
<p>Juan A. Suarez Romero (4):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 18.0.1</li>
|
||||
<li>travis: radv needs LLVM 4.0</li>
|
||||
<li>cherry-ignore: add explicit 18.1 only nominations</li>
|
||||
<li>Update version to 18.0.2</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (1):</p>
|
||||
<ul>
|
||||
<li>i965: Fix shadow batches to be the same size as the real BO.</li>
|
||||
</ul>
|
||||
|
||||
<p>Lionel Landwerlin (1):</p>
|
||||
<ul>
|
||||
<li>anv: fix number of planes for depth & stencil</li>
|
||||
</ul>
|
||||
|
||||
<p>Lucas Stach (1):</p>
|
||||
<ul>
|
||||
<li>etnaviv: fix texture_format_needs_swiz</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (3):</p>
|
||||
<ul>
|
||||
<li>radeonsi/gfx9: fix a hang with an empty first IB</li>
|
||||
<li>glsl_to_tgsi: try harder to lower unsupported ir_binop_vector_extract</li>
|
||||
<li>Revert "st/dri: Fix dangling pointer to a destroyed dri_drawable"</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (2):</p>
|
||||
<ul>
|
||||
<li>radv: fix scissor computation when using half-pixel viewport offset</li>
|
||||
<li>radv/winsys: allow to submit up to 4 IBs for chips without chaining</li>
|
||||
</ul>
|
||||
|
||||
<p>Thomas Hellstrom (1):</p>
|
||||
<ul>
|
||||
<li>svga: Fix incorrect advertizing of EGL_KHR_gl_colorspace</li>
|
||||
</ul>
|
||||
|
||||
<p>Timothy Arceri (1):</p>
|
||||
<ul>
|
||||
<li>mesa: free debug messages when destroying the debug state</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -3617,6 +3617,25 @@ static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
|
||||
return res;
|
||||
}
|
||||
|
||||
static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
|
||||
const nir_intrinsic_instr *instr, bool write)
|
||||
{
|
||||
LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, write);
|
||||
if (ctx->abi->gfx9_stride_size_workaround) {
|
||||
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
|
||||
stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
|
||||
|
||||
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
|
||||
elem_count, stride, "");
|
||||
|
||||
rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
|
||||
LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
}
|
||||
return rsrc;
|
||||
}
|
||||
|
||||
static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
|
||||
const nir_intrinsic_instr *instr)
|
||||
{
|
||||
@@ -3631,7 +3650,7 @@ static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
|
||||
|
||||
type = glsl_without_array(type);
|
||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
||||
params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false);
|
||||
params[0] = get_image_buffer_descriptor(ctx, instr, false);
|
||||
params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
||||
ctx->ac.i32_0, ""); /* vindex */
|
||||
params[2] = ctx->ac.i32_0; /* voffset */
|
||||
@@ -3693,20 +3712,7 @@ static void visit_image_store(struct ac_nir_context *ctx,
|
||||
glc = ctx->ac.i1true;
|
||||
|
||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
||||
LLVMValueRef rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
|
||||
|
||||
if (ctx->abi->gfx9_stride_size_workaround) {
|
||||
LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
|
||||
stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
|
||||
|
||||
LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
|
||||
LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
|
||||
elem_count, stride, "");
|
||||
|
||||
rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
|
||||
LLVMConstInt(ctx->ac.i32, 2, 0), "");
|
||||
}
|
||||
LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
|
||||
|
||||
params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
|
||||
params[1] = rsrc;
|
||||
@@ -3801,8 +3807,7 @@ static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
|
||||
params[param_count++] = get_src(ctx, instr->src[2]);
|
||||
|
||||
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
|
||||
params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER,
|
||||
NULL, true, true);
|
||||
params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
|
||||
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
|
||||
ctx->ac.i32_0, ""); /* vindex */
|
||||
params[param_count++] = ctx->ac.i32_0; /* voffset */
|
||||
|
@@ -141,7 +141,7 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
|
||||
gart_index = device->memory_properties.memoryHeapCount++;
|
||||
device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
|
||||
.size = device->rad_info.gart_size,
|
||||
.flags = 0,
|
||||
.flags = device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -158,7 +158,8 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
|
||||
device->mem_type_indices[type_count] = RADV_MEM_TYPE_GTT_WRITE_COMBINE;
|
||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
|
||||
.heapIndex = gart_index,
|
||||
};
|
||||
}
|
||||
@@ -176,7 +177,8 @@ radv_physical_device_init_mem_types(struct radv_physical_device *device)
|
||||
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
|
||||
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT |
|
||||
(device->rad_info.has_dedicated_vram ? 0 : VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
|
||||
.heapIndex = gart_index,
|
||||
};
|
||||
}
|
||||
|
@@ -647,10 +647,10 @@ static VkRect2D si_scissor_from_viewport(const VkViewport *viewport)
|
||||
|
||||
get_viewport_xform(viewport, scale, translate);
|
||||
|
||||
rect.offset.x = translate[0] - abs(scale[0]);
|
||||
rect.offset.y = translate[1] - abs(scale[1]);
|
||||
rect.extent.width = ceilf(translate[0] + abs(scale[0])) - rect.offset.x;
|
||||
rect.extent.height = ceilf(translate[1] + abs(scale[1])) - rect.offset.y;
|
||||
rect.offset.x = translate[0] - fabs(scale[0]);
|
||||
rect.offset.y = translate[1] - fabs(scale[1]);
|
||||
rect.extent.width = ceilf(translate[0] + fabs(scale[0])) - rect.offset.x;
|
||||
rect.extent.height = ceilf(translate[1] + fabs(scale[1])) - rect.offset.y;
|
||||
|
||||
return rect;
|
||||
}
|
||||
|
@@ -66,6 +66,10 @@ struct radv_amdgpu_cs {
|
||||
struct radeon_winsys_bo **virtual_buffers;
|
||||
uint8_t *virtual_buffer_priorities;
|
||||
int *virtual_buffer_hash_table;
|
||||
|
||||
/* For chips that don't support chaining. */
|
||||
struct radeon_winsys_cs *old_cs_buffers;
|
||||
unsigned num_old_cs_buffers;
|
||||
};
|
||||
|
||||
static inline struct radv_amdgpu_cs *
|
||||
@@ -166,6 +170,12 @@ static void radv_amdgpu_cs_destroy(struct radeon_winsys_cs *rcs)
|
||||
for (unsigned i = 0; i < cs->num_old_ib_buffers; ++i)
|
||||
cs->ws->base.buffer_destroy(cs->old_ib_buffers[i]);
|
||||
|
||||
for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
|
||||
struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
|
||||
free(rcs->buf);
|
||||
}
|
||||
|
||||
free(cs->old_cs_buffers);
|
||||
free(cs->old_ib_buffers);
|
||||
free(cs->virtual_buffers);
|
||||
free(cs->virtual_buffer_priorities);
|
||||
@@ -251,9 +261,46 @@ static void radv_amdgpu_cs_grow(struct radeon_winsys_cs *_cs, size_t min_size)
|
||||
/* The total ib size cannot exceed limit_dws dwords. */
|
||||
if (ib_dws > limit_dws)
|
||||
{
|
||||
cs->failed = true;
|
||||
/* The maximum size in dwords has been reached,
|
||||
* try to allocate a new one.
|
||||
*/
|
||||
if (cs->num_old_cs_buffers + 1 >= AMDGPU_CS_MAX_IBS_PER_SUBMIT) {
|
||||
/* TODO: Allow to submit more than 4 IBs. */
|
||||
fprintf(stderr, "amdgpu: Maximum number of IBs "
|
||||
"per submit reached.\n");
|
||||
cs->failed = true;
|
||||
cs->base.cdw = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
cs->old_cs_buffers =
|
||||
realloc(cs->old_cs_buffers,
|
||||
(cs->num_old_cs_buffers + 1) * sizeof(*cs->old_cs_buffers));
|
||||
if (!cs->old_cs_buffers) {
|
||||
cs->failed = true;
|
||||
cs->base.cdw = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* Store the current one for submitting it later. */
|
||||
cs->old_cs_buffers[cs->num_old_cs_buffers].cdw = cs->base.cdw;
|
||||
cs->old_cs_buffers[cs->num_old_cs_buffers].max_dw = cs->base.max_dw;
|
||||
cs->old_cs_buffers[cs->num_old_cs_buffers].buf = cs->base.buf;
|
||||
cs->num_old_cs_buffers++;
|
||||
|
||||
/* Reset the cs, it will be re-allocated below. */
|
||||
cs->base.cdw = 0;
|
||||
return;
|
||||
cs->base.buf = NULL;
|
||||
|
||||
/* Re-compute the number of dwords to allocate. */
|
||||
ib_dws = MAX2(cs->base.cdw + min_size,
|
||||
MIN2(cs->base.max_dw * 2, limit_dws));
|
||||
if (ib_dws > limit_dws) {
|
||||
fprintf(stderr, "amdgpu: Too high number of "
|
||||
"dwords to allocate\n");
|
||||
cs->failed = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t *new_buf = realloc(cs->base.buf, ib_dws * 4);
|
||||
@@ -365,6 +412,15 @@ static void radv_amdgpu_cs_reset(struct radeon_winsys_cs *_cs)
|
||||
cs->ib.ib_mc_address = radv_amdgpu_winsys_bo(cs->ib_buffer)->base.va;
|
||||
cs->ib_size_ptr = &cs->ib.size;
|
||||
cs->ib.size = 0;
|
||||
} else {
|
||||
for (unsigned i = 0; i < cs->num_old_cs_buffers; ++i) {
|
||||
struct radeon_winsys_cs *rcs = &cs->old_cs_buffers[i];
|
||||
free(rcs->buf);
|
||||
}
|
||||
|
||||
free(cs->old_cs_buffers);
|
||||
cs->old_cs_buffers = NULL;
|
||||
cs->num_old_cs_buffers = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -515,7 +571,8 @@ static void radv_amdgpu_cs_execute_secondary(struct radeon_winsys_cs *_parent,
|
||||
static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
||||
struct radeon_winsys_cs **cs_array,
|
||||
unsigned count,
|
||||
struct radv_amdgpu_winsys_bo *extra_bo,
|
||||
struct radv_amdgpu_winsys_bo **extra_bo_array,
|
||||
unsigned num_extra_bo,
|
||||
struct radeon_winsys_cs *extra_cs,
|
||||
amdgpu_bo_list_handle *bo_list)
|
||||
{
|
||||
@@ -544,7 +601,7 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
||||
bo_list);
|
||||
free(handles);
|
||||
pthread_mutex_unlock(&ws->global_bo_list_lock);
|
||||
} else if (count == 1 && !extra_bo && !extra_cs &&
|
||||
} else if (count == 1 && !num_extra_bo && !extra_cs &&
|
||||
!radv_amdgpu_cs(cs_array[0])->num_virtual_buffers) {
|
||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[0];
|
||||
if (cs->num_buffers == 0) {
|
||||
@@ -554,8 +611,8 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
||||
r = amdgpu_bo_list_create(ws->dev, cs->num_buffers, cs->handles,
|
||||
cs->priorities, bo_list);
|
||||
} else {
|
||||
unsigned total_buffer_count = !!extra_bo;
|
||||
unsigned unique_bo_count = !!extra_bo;
|
||||
unsigned total_buffer_count = num_extra_bo;
|
||||
unsigned unique_bo_count = num_extra_bo;
|
||||
for (unsigned i = 0; i < count; ++i) {
|
||||
struct radv_amdgpu_cs *cs = (struct radv_amdgpu_cs*)cs_array[i];
|
||||
total_buffer_count += cs->num_buffers;
|
||||
@@ -578,9 +635,9 @@ static int radv_amdgpu_create_bo_list(struct radv_amdgpu_winsys *ws,
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
if (extra_bo) {
|
||||
handles[0] = extra_bo->bo;
|
||||
priorities[0] = 8;
|
||||
for (unsigned i = 0; i < num_extra_bo; i++) {
|
||||
handles[i] = extra_bo_array[i]->bo;
|
||||
priorities[i] = 8;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < count + !!extra_cs; ++i) {
|
||||
@@ -710,7 +767,8 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
|
||||
}
|
||||
}
|
||||
|
||||
r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, initial_preamble_cs, &bo_list);
|
||||
r = radv_amdgpu_create_bo_list(cs0->ws, cs_array, cs_count, NULL, 0, initial_preamble_cs,
|
||||
&bo_list);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: buffer list creation failed for the "
|
||||
"chained submission(%d)\n", r);
|
||||
@@ -777,7 +835,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
|
||||
|
||||
memset(&request, 0, sizeof(request));
|
||||
|
||||
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL,
|
||||
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt, NULL, 0,
|
||||
preamble_cs, &bo_list);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: buffer list creation failed "
|
||||
@@ -857,68 +915,127 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
|
||||
assert(cs_count);
|
||||
|
||||
for (unsigned i = 0; i < cs_count;) {
|
||||
struct amdgpu_cs_ib_info ib = {0};
|
||||
struct radeon_winsys_bo *bo = NULL;
|
||||
struct amdgpu_cs_ib_info ibs[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
|
||||
unsigned number_of_ibs = 1;
|
||||
struct radeon_winsys_bo *bos[AMDGPU_CS_MAX_IBS_PER_SUBMIT] = {0};
|
||||
struct radeon_winsys_cs *preamble_cs = i ? continue_preamble_cs : initial_preamble_cs;
|
||||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i]);
|
||||
uint32_t *ptr;
|
||||
unsigned cnt = 0;
|
||||
unsigned size = 0;
|
||||
unsigned pad_words = 0;
|
||||
if (preamble_cs)
|
||||
size += preamble_cs->cdw;
|
||||
|
||||
while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
|
||||
size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
|
||||
++cnt;
|
||||
if (cs->num_old_cs_buffers > 0) {
|
||||
/* Special path when the maximum size in dwords has
|
||||
* been reached because we need to handle more than one
|
||||
* IB per submit.
|
||||
*/
|
||||
unsigned new_cs_count = cs->num_old_cs_buffers + 1;
|
||||
struct radeon_winsys_cs *new_cs_array[AMDGPU_CS_MAX_IBS_PER_SUBMIT];
|
||||
unsigned idx = 0;
|
||||
|
||||
for (unsigned j = 0; j < cs->num_old_cs_buffers; j++)
|
||||
new_cs_array[idx++] = &cs->old_cs_buffers[j];
|
||||
new_cs_array[idx++] = cs_array[i];
|
||||
|
||||
for (unsigned j = 0; j < new_cs_count; j++) {
|
||||
struct radeon_winsys_cs *rcs = new_cs_array[j];
|
||||
bool needs_preamble = preamble_cs && j == 0;
|
||||
unsigned size = 0;
|
||||
|
||||
if (needs_preamble)
|
||||
size += preamble_cs->cdw;
|
||||
size += rcs->cdw;
|
||||
|
||||
assert(size < 0xffff8);
|
||||
|
||||
while (!size || (size & 7)) {
|
||||
size++;
|
||||
pad_words++;
|
||||
}
|
||||
|
||||
bos[j] = ws->buffer_create(ws, 4 * size, 4096,
|
||||
RADEON_DOMAIN_GTT,
|
||||
RADEON_FLAG_CPU_ACCESS |
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_READ_ONLY);
|
||||
ptr = ws->buffer_map(bos[j]);
|
||||
|
||||
if (needs_preamble) {
|
||||
memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
|
||||
ptr += preamble_cs->cdw;
|
||||
}
|
||||
|
||||
memcpy(ptr, rcs->buf, 4 * rcs->cdw);
|
||||
ptr += rcs->cdw;
|
||||
|
||||
for (unsigned k = 0; k < pad_words; ++k)
|
||||
*ptr++ = pad_word;
|
||||
|
||||
ibs[j].size = size;
|
||||
ibs[j].ib_mc_address = radv_buffer_get_va(bos[j]);
|
||||
}
|
||||
|
||||
number_of_ibs = new_cs_count;
|
||||
cnt++;
|
||||
} else {
|
||||
if (preamble_cs)
|
||||
size += preamble_cs->cdw;
|
||||
|
||||
while (i + cnt < cs_count && 0xffff8 - size >= radv_amdgpu_cs(cs_array[i + cnt])->base.cdw) {
|
||||
size += radv_amdgpu_cs(cs_array[i + cnt])->base.cdw;
|
||||
++cnt;
|
||||
}
|
||||
|
||||
while (!size || (size & 7)) {
|
||||
size++;
|
||||
pad_words++;
|
||||
}
|
||||
assert(cnt);
|
||||
|
||||
bos[0] = ws->buffer_create(ws, 4 * size, 4096,
|
||||
RADEON_DOMAIN_GTT,
|
||||
RADEON_FLAG_CPU_ACCESS |
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_READ_ONLY);
|
||||
ptr = ws->buffer_map(bos[0]);
|
||||
|
||||
if (preamble_cs) {
|
||||
memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
|
||||
ptr += preamble_cs->cdw;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < cnt; ++j) {
|
||||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
|
||||
memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
|
||||
ptr += cs->base.cdw;
|
||||
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < pad_words; ++j)
|
||||
*ptr++ = pad_word;
|
||||
|
||||
ibs[0].size = size;
|
||||
ibs[0].ib_mc_address = radv_buffer_get_va(bos[0]);
|
||||
}
|
||||
|
||||
while(!size || (size & 7)) {
|
||||
size++;
|
||||
pad_words++;
|
||||
}
|
||||
assert(cnt);
|
||||
|
||||
bo = ws->buffer_create(ws, 4 * size, 4096, RADEON_DOMAIN_GTT,
|
||||
RADEON_FLAG_CPU_ACCESS |
|
||||
RADEON_FLAG_NO_INTERPROCESS_SHARING |
|
||||
RADEON_FLAG_READ_ONLY);
|
||||
ptr = ws->buffer_map(bo);
|
||||
|
||||
if (preamble_cs) {
|
||||
memcpy(ptr, preamble_cs->buf, preamble_cs->cdw * 4);
|
||||
ptr += preamble_cs->cdw;
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < cnt; ++j) {
|
||||
struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[i + j]);
|
||||
memcpy(ptr, cs->base.buf, 4 * cs->base.cdw);
|
||||
ptr += cs->base.cdw;
|
||||
|
||||
}
|
||||
|
||||
for (unsigned j = 0; j < pad_words; ++j)
|
||||
*ptr++ = pad_word;
|
||||
|
||||
memset(&request, 0, sizeof(request));
|
||||
|
||||
|
||||
r = radv_amdgpu_create_bo_list(cs0->ws, &cs_array[i], cnt,
|
||||
(struct radv_amdgpu_winsys_bo*)bo,
|
||||
preamble_cs, &bo_list);
|
||||
(struct radv_amdgpu_winsys_bo **)bos,
|
||||
number_of_ibs, preamble_cs,
|
||||
&bo_list);
|
||||
if (r) {
|
||||
fprintf(stderr, "amdgpu: buffer list creation failed "
|
||||
"for the sysmem submission (%d)\n", r);
|
||||
return r;
|
||||
}
|
||||
|
||||
ib.size = size;
|
||||
ib.ib_mc_address = radv_buffer_get_va(bo);
|
||||
memset(&request, 0, sizeof(request));
|
||||
|
||||
request.ip_type = cs0->hw_ip;
|
||||
request.ring = queue_idx;
|
||||
request.resources = bo_list;
|
||||
request.number_of_ibs = 1;
|
||||
request.ibs = &ib;
|
||||
request.number_of_ibs = number_of_ibs;
|
||||
request.ibs = ibs;
|
||||
request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
|
||||
|
||||
sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
|
||||
@@ -934,9 +1051,11 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
|
||||
if (bo_list)
|
||||
amdgpu_bo_list_destroy(bo_list);
|
||||
|
||||
ws->buffer_destroy(bo);
|
||||
if (r)
|
||||
return r;
|
||||
for (unsigned j = 0; j < number_of_ibs; j++) {
|
||||
ws->buffer_destroy(bos[j]);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
|
||||
i += cnt;
|
||||
}
|
||||
|
@@ -302,7 +302,7 @@ texture_format_needs_swiz(enum pipe_format fmt)
|
||||
bool swiz = false;
|
||||
|
||||
if (formats[fmt].present)
|
||||
swiz = !memcmp(def, formats[fmt].tex_swiz, sizeof(formats[fmt].tex_swiz));
|
||||
swiz = !!memcmp(def, formats[fmt].tex_swiz, sizeof(formats[fmt].tex_swiz));
|
||||
|
||||
return swiz;
|
||||
}
|
||||
|
@@ -26,6 +26,7 @@
|
||||
#include "si_shader_internal.h"
|
||||
#include "sid.h"
|
||||
|
||||
#include "radeon/r600_cs.h"
|
||||
#include "radeon/radeon_uvd.h"
|
||||
#include "util/hash_table.h"
|
||||
#include "util/u_log.h"
|
||||
@@ -333,9 +334,6 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||
|
||||
sctx->sample_mask.sample_mask = 0xffff;
|
||||
|
||||
/* these must be last */
|
||||
si_begin_new_cs(sctx);
|
||||
|
||||
if (sctx->b.chip_class >= GFX9) {
|
||||
sctx->wait_mem_scratch = (struct r600_resource*)
|
||||
pipe_buffer_create(screen, 0, PIPE_USAGE_DEFAULT, 4);
|
||||
@@ -351,6 +349,9 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address);
|
||||
radeon_emit(cs, sctx->wait_mem_scratch->gpu_address >> 32);
|
||||
radeon_emit(cs, sctx->wait_mem_number);
|
||||
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
|
||||
sctx->wait_mem_scratch,
|
||||
RADEON_USAGE_WRITE, RADEON_PRIO_FENCE);
|
||||
}
|
||||
|
||||
/* CIK cannot unbind a constant buffer (S_BUFFER_LOAD doesn't skip loads
|
||||
@@ -423,6 +424,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||
util_dynarray_init(&sctx->resident_img_needs_color_decompress, NULL);
|
||||
util_dynarray_init(&sctx->resident_tex_needs_depth_decompress, NULL);
|
||||
|
||||
/* this must be last */
|
||||
si_begin_new_cs(sctx);
|
||||
return &sctx->b.b;
|
||||
fail:
|
||||
fprintf(stderr, "radeonsi: Failed to create a context.\n");
|
||||
|
@@ -2106,7 +2106,7 @@ svga_is_format_supported(struct pipe_screen *screen,
|
||||
|
||||
if (!ss->sws->have_vgpu10 &&
|
||||
util_format_is_srgb(format) &&
|
||||
(bindings & PIPE_BIND_DISPLAY_TARGET)) {
|
||||
(bindings & (PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_RENDER_TARGET))) {
|
||||
/* We only support sRGB rendering with vgpu10 */
|
||||
return FALSE;
|
||||
}
|
||||
|
@@ -816,6 +816,8 @@ fs_inst::size_read(int arg) const
|
||||
case SHADER_OPCODE_TYPED_ATOMIC:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_READ:
|
||||
case SHADER_OPCODE_TYPED_SURFACE_WRITE:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SAMPLE:
|
||||
case FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET:
|
||||
case FS_OPCODE_INTERPOLATE_AT_PER_SLOT_OFFSET:
|
||||
case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
|
||||
case SHADER_OPCODE_BYTE_SCATTERED_READ:
|
||||
|
@@ -332,6 +332,31 @@ public:
|
||||
opcode != BRW_OPCODE_IF &&
|
||||
opcode != BRW_OPCODE_WHILE));
|
||||
}
|
||||
|
||||
bool reads_g0_implicitly() const
|
||||
{
|
||||
switch (opcode) {
|
||||
case SHADER_OPCODE_TEX:
|
||||
case SHADER_OPCODE_TXL:
|
||||
case SHADER_OPCODE_TXD:
|
||||
case SHADER_OPCODE_TXF:
|
||||
case SHADER_OPCODE_TXF_CMS_W:
|
||||
case SHADER_OPCODE_TXF_CMS:
|
||||
case SHADER_OPCODE_TXF_MCS:
|
||||
case SHADER_OPCODE_TXS:
|
||||
case SHADER_OPCODE_TG4:
|
||||
case SHADER_OPCODE_TG4_OFFSET:
|
||||
case SHADER_OPCODE_SAMPLEINFO:
|
||||
case VS_OPCODE_PULL_CONSTANT_LOAD:
|
||||
case GS_OPCODE_SET_PRIMITIVE_ID:
|
||||
case GS_OPCODE_GET_INSTANCE_ID:
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_READ:
|
||||
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -1267,6 +1267,9 @@ vec4_instruction_scheduler::calculate_deps()
|
||||
}
|
||||
}
|
||||
|
||||
if (inst->reads_g0_implicitly())
|
||||
add_dep(last_fixed_grf_write, n);
|
||||
|
||||
if (!inst->is_send_from_grf()) {
|
||||
for (int i = 0; i < inst->mlen; i++) {
|
||||
/* It looks like the MRF regs are released in the send
|
||||
|
@@ -2282,6 +2282,10 @@ anv_image_aspect_get_planes(VkImageAspectFlags aspect_mask)
|
||||
if (aspect_mask & VK_IMAGE_ASPECT_PLANE_2_BIT_KHR)
|
||||
planes++;
|
||||
|
||||
if ((aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) != 0 &&
|
||||
(aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0)
|
||||
planes++;
|
||||
|
||||
return planes;
|
||||
}
|
||||
|
||||
|
@@ -875,10 +875,10 @@ genX(cmd_buffer_setup_attachments)(struct anv_cmd_buffer *cmd_buffer,
|
||||
|
||||
struct anv_image_view *iview = framebuffer->attachments[i];
|
||||
anv_assert(iview->vk_format == att->format);
|
||||
anv_assert(iview->n_planes == 1);
|
||||
|
||||
union isl_color_value clear_color = { .u32 = { 0, } };
|
||||
if (att_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
|
||||
anv_assert(iview->n_planes == 1);
|
||||
assert(att_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
|
||||
color_attachment_compute_aux_usage(cmd_buffer->device,
|
||||
state, i, begin->renderArea,
|
||||
|
@@ -339,8 +339,11 @@ grow_buffer(struct brw_context *brw,
|
||||
/* We can't safely use realloc, as it may move the existing buffer,
|
||||
* breaking existing pointers the caller may still be using. Just
|
||||
* malloc a new copy and memcpy it like the normal BO path.
|
||||
*
|
||||
* Use bo->size rather than new_size because the bufmgr may have
|
||||
* rounded up the size, and we want the shadow size to match.
|
||||
*/
|
||||
grow->map = malloc(new_size);
|
||||
grow->map = malloc(new_bo->size);
|
||||
} else {
|
||||
grow->map = brw_bo_map(brw, new_bo, MAP_READ | MAP_WRITE);
|
||||
}
|
||||
|
@@ -501,6 +501,28 @@ debug_clear_group(struct gl_debug_state *debug)
|
||||
debug->Groups[gstack] = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the oldest debug messages out of the log.
|
||||
*/
|
||||
static void
|
||||
debug_delete_messages(struct gl_debug_state *debug, int count)
|
||||
{
|
||||
struct gl_debug_log *log = &debug->Log;
|
||||
|
||||
if (count > log->NumMessages)
|
||||
count = log->NumMessages;
|
||||
|
||||
while (count--) {
|
||||
struct gl_debug_message *msg = &log->Messages[log->NextMessage];
|
||||
|
||||
debug_message_clear(msg);
|
||||
|
||||
log->NumMessages--;
|
||||
log->NextMessage++;
|
||||
log->NextMessage %= MAX_DEBUG_LOGGED_MESSAGES;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Loop through debug group stack tearing down states for
|
||||
* filtering debug messages. Then free debug output state.
|
||||
@@ -514,6 +536,7 @@ debug_destroy(struct gl_debug_state *debug)
|
||||
}
|
||||
|
||||
debug_clear_group(debug);
|
||||
debug_delete_messages(debug, debug->Log.NumMessages);
|
||||
free(debug);
|
||||
}
|
||||
|
||||
@@ -648,28 +671,6 @@ debug_fetch_message(const struct gl_debug_state *debug)
|
||||
return (log->NumMessages) ? &log->Messages[log->NextMessage] : NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete the oldest debug messages out of the log.
|
||||
*/
|
||||
static void
|
||||
debug_delete_messages(struct gl_debug_state *debug, int count)
|
||||
{
|
||||
struct gl_debug_log *log = &debug->Log;
|
||||
|
||||
if (count > log->NumMessages)
|
||||
count = log->NumMessages;
|
||||
|
||||
while (count--) {
|
||||
struct gl_debug_message *msg = &log->Messages[log->NextMessage];
|
||||
|
||||
debug_message_clear(msg);
|
||||
|
||||
log->NumMessages--;
|
||||
log->NextMessage++;
|
||||
log->NextMessage %= MAX_DEBUG_LOGGED_MESSAGES;
|
||||
}
|
||||
}
|
||||
|
||||
static struct gl_debug_message *
|
||||
debug_get_group_message(struct gl_debug_state *debug)
|
||||
{
|
||||
|
@@ -732,6 +732,6 @@ endif
|
||||
if with_glx == 'xlib'
|
||||
subdir('drivers/x11')
|
||||
endif
|
||||
if with_tests
|
||||
if with_tests and dri_drivers != []
|
||||
subdir('main/tests')
|
||||
endif
|
||||
|
@@ -7049,6 +7049,11 @@ st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
} while (progress);
|
||||
}
|
||||
|
||||
/* Do this again to lower ir_binop_vector_extract introduced
|
||||
* by optimization passes.
|
||||
*/
|
||||
do_vec_index_to_cond_assign(ir);
|
||||
|
||||
validate_ir_tree(ir);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user