Compare commits
43 Commits
mesa-12.0.
...
mesa-12.0.
Author | SHA1 | Date | |
---|---|---|---|
|
555885a0bf | ||
|
ab62405953 | ||
|
806de4a224 | ||
|
2b87bb9b90 | ||
|
689ca381b5 | ||
|
cc2894d376 | ||
|
febf22ff55 | ||
|
3c7b53bba3 | ||
|
c880deef41 | ||
|
09973d9a99 | ||
|
36a54c27fd | ||
|
57708155d2 | ||
|
cf18ee4fcc | ||
|
76816e70a9 | ||
|
c0934035a5 | ||
|
08a9f69a8b | ||
|
d780f89966 | ||
|
a02edabb67 | ||
|
70bb67febc | ||
|
9126479017 | ||
|
f76da483a2 | ||
|
4ac5633618 | ||
|
32d7a060fa | ||
|
eb96145c74 | ||
|
ddd048bbf5 | ||
|
236ecd3c4e | ||
|
81e78ee65c | ||
|
6ebb536800 | ||
|
89a8fd71af | ||
|
8a293e6a0c | ||
|
231ace7eec | ||
|
c07386e2c8 | ||
|
bb4195ca26 | ||
|
0386f956b3 | ||
|
eb9127d224 | ||
|
d37d8d81d5 | ||
|
630c41e2aa | ||
|
d278c15a17 | ||
|
ce56dfca9a | ||
|
3197612a1a | ||
|
6d919a6fc6 | ||
|
f71c3734ce | ||
|
6b1c3c3aa0 |
@@ -40,7 +40,7 @@ AM_DISTCHECK_CONFIGURE_FLAGS = \
|
||||
--enable-vdpau \
|
||||
--enable-xa \
|
||||
--enable-xvmc \
|
||||
--disable-llvm-shared-libs \
|
||||
--enable-llvm-shared-libs \
|
||||
--with-egl-platforms=x11,wayland,drm,surfaceless \
|
||||
--with-dri-drivers=i915,i965,nouveau,radeon,r200,swrast \
|
||||
--with-gallium-drivers=i915,ilo,nouveau,r300,r600,radeonsi,freedreno,svga,swrast,vc4,virgl,swr \
|
||||
|
39
bin/get-typod-pick-list.sh
Executable file
39
bin/get-typod-pick-list.sh
Executable file
@@ -0,0 +1,39 @@
|
||||
#!/bin/sh
|
||||
|
||||
# Script for generating a list of candidates which have typos in the nomination line
|
||||
#
|
||||
# Usage examples:
|
||||
#
|
||||
# $ bin/get-typod-pick-list.sh
|
||||
# $ bin/get-typod-pick-list.sh > picklist
|
||||
# $ bin/get-typod-pick-list.sh | tee picklist
|
||||
|
||||
# NB:
|
||||
# This script intentionally _never_ checks for specific version tag
|
||||
# Should we consider folding it with the original get-pick-list.sh
|
||||
|
||||
# Grep for commits with "cherry picked from commit" in the commit message.
|
||||
git log --reverse --grep="cherry picked from commit" origin/master..HEAD |\
|
||||
grep "cherry picked from commit" |\
|
||||
sed -e 's/^[[:space:]]*(cherry picked from commit[[:space:]]*//' -e 's/)//' > already_picked
|
||||
|
||||
# Grep for commits that were marked as a candidate for the stable tree.
|
||||
git log --reverse --pretty=%H -i --grep='^CC:.*mesa-dev' HEAD..origin/master |\
|
||||
while read sha
|
||||
do
|
||||
# Check to see whether the patch is on the ignore list.
|
||||
if [ -f bin/.cherry-ignore ] ; then
|
||||
if grep -q ^$sha bin/.cherry-ignore ; then
|
||||
continue
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check to see if it has already been picked over.
|
||||
if grep -q ^$sha already_picked ; then
|
||||
continue
|
||||
fi
|
||||
|
||||
git log -n1 --pretty=oneline $sha | cat
|
||||
done
|
||||
|
||||
rm -f already_picked
|
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
44d08a27d98bfeacd864381189e434d98afbf451689d01f80380dc1d66450e5b mesa-12.0.5.tar.gz
|
||||
2b0a972d8282860a11291c09c3ef01ac45171405951eb21a83c45ed2b4321924 mesa-12.0.5.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
147
docs/relnotes/12.0.6.html
Normal file
147
docs/relnotes/12.0.6.html
Normal file
@@ -0,0 +1,147 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 12.0.6 Release Notes / January 23, 2016</h1>
|
||||
|
||||
<p>
|
||||
Mesa 12.0.6 is a bug fix release which fixes bugs found since the 12.0.5 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 12.0.6 implements the OpenGL 4.3 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.3. OpenGL
|
||||
4.3 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92234">Bug 92234</a> - [BDW] GPU hang in Shogun2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=95130">Bug 95130</a> - Derivatives of gl_Color wrong when helper pixels used</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=98329">Bug 98329</a> - [dEQP, EGL, SKL, BDW, BSW] dEQP-EGL.functional.image.render_multiple_contexts.gles2_renderbuffer_depth16_depth_buffer</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99030">Bug 99030</a> - [HSW, regression] transform feedback fails on Linux 4.8</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=99354">Bug 99354</a> - [G71] "Assertion `bkref' failed" reproducible with glmark2</li>
|
||||
|
||||
</ul>
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Chad Versace (3):</p>
|
||||
<ul>
|
||||
<li>i965/mt: Disable aux surfaces after making miptree shareable</li>
|
||||
<li>i965/mt: Disable HiZ when sharing depth buffer externally (v2)</li>
|
||||
<li>anv: Handle vkGetPhysicalDeviceQueueFamilyProperties with count == 0</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (5):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 12.0.5</li>
|
||||
<li>get-typod-pick-list.sh: add new script</li>
|
||||
<li>automake: use shared llvm libs for make distcheck</li>
|
||||
<li>egl/wayland: use the destroy_window_callback for swrast</li>
|
||||
<li>Update version to 12.0.6</li>
|
||||
</ul>
|
||||
|
||||
<p>Fredrik Höglund (1):</p>
|
||||
<ul>
|
||||
<li>dri3: Fix MakeCurrent without a default framebuffer</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (1):</p>
|
||||
<ul>
|
||||
<li>nouveau: take extra push space into account for pushbuf_space calls</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (19):</p>
|
||||
<ul>
|
||||
<li>spirv/nir: Fix some texture opcode asserts</li>
|
||||
<li>spirv/nir: Add support for shadow samplers that return vec4</li>
|
||||
<li>spirv/nir: Properly handle gather components</li>
|
||||
<li>anv/pipeline: Set binding_table.gather_texture_start</li>
|
||||
<li>nir: Add a helper for determining the type of a texture source</li>
|
||||
<li>nir/lower_tex: Add some helpers for working with tex sources</li>
|
||||
<li>nir/lower_tex: Add support for lowering coordinate offsets</li>
|
||||
<li>i965/nir: Enable NIR lowering of txf and rect offsets</li>
|
||||
<li>i965: Get rid of the do_lower_unnormalized_offsets pass</li>
|
||||
<li>spirv/nir: Don't increment coord_components for array lod queries</li>
|
||||
<li>anv/image: Assert that the image format is actually supported</li>
|
||||
<li>spirv/nir: Move opcode selection higher up in handle_texture</li>
|
||||
<li>spirv/nir: Refactor type handling in handle_texture</li>
|
||||
<li>nir/spirv: Refactor coordinate handling in handle_texture</li>
|
||||
<li>spirv/nir: Handle texture projectors</li>
|
||||
<li>spirv/nir: Add support for ImageQuerySamples</li>
|
||||
<li>anv/device: Return the right error for failed maps</li>
|
||||
<li>anv/device: Implicitly unmap memory objects in FreeMemory</li>
|
||||
<li>anv/descriptor_set: Write the state offset in the surface state free list.</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>spirv: Move cursor before calling vtn_ssa_value() in phi 2nd pass.</li>
|
||||
<li>i965: Properly flush in hsw_pause_transform_feedback().</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (6):</p>
|
||||
<ul>
|
||||
<li>cso: don't release sampler states that are bound</li>
|
||||
<li>radeonsi: always restore sampler states when unbinding sampler views</li>
|
||||
<li>radeonsi: fix incorrect FMASK checking in bind_sampler_states</li>
|
||||
<li>radeonsi: disable CE on SI + AMDGPU</li>
|
||||
<li>radeonsi: disable the constant engine (CE) on Carrizo and Stoney</li>
|
||||
<li>gallium/radeon: fix the draw-calls HUD query</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Turner (3):</p>
|
||||
<ul>
|
||||
<li>i965/fs: Rename opt_copy_propagate -> opt_copy_propagation.</li>
|
||||
<li>i965/fs: Add unit tests for copy propagation pass.</li>
|
||||
<li>i965/fs: Reject copy propagation into SEL if not min/max.</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>cso: Don't restore nr_samplers in cso_restore_fragment_samplers</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (1):</p>
|
||||
<ul>
|
||||
<li>radeonsi: enable WQM in PS prolog when needed</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -1234,6 +1234,50 @@ nir_tex_instr_is_query(nir_tex_instr *instr)
|
||||
}
|
||||
}
|
||||
|
||||
static inline nir_alu_type
|
||||
nir_tex_instr_src_type(nir_tex_instr *instr, unsigned src)
|
||||
{
|
||||
switch (instr->src[src].src_type) {
|
||||
case nir_tex_src_coord:
|
||||
switch (instr->op) {
|
||||
case nir_texop_txf:
|
||||
case nir_texop_txf_ms:
|
||||
case nir_texop_txf_ms_mcs:
|
||||
case nir_texop_samples_identical:
|
||||
return nir_type_int;
|
||||
|
||||
default:
|
||||
return nir_type_float;
|
||||
}
|
||||
|
||||
case nir_tex_src_lod:
|
||||
switch (instr->op) {
|
||||
case nir_texop_txs:
|
||||
case nir_texop_txf:
|
||||
return nir_type_int;
|
||||
|
||||
default:
|
||||
return nir_type_float;
|
||||
}
|
||||
|
||||
case nir_tex_src_projector:
|
||||
case nir_tex_src_comparitor:
|
||||
case nir_tex_src_bias:
|
||||
case nir_tex_src_ddx:
|
||||
case nir_tex_src_ddy:
|
||||
return nir_type_float;
|
||||
|
||||
case nir_tex_src_offset:
|
||||
case nir_tex_src_ms_index:
|
||||
case nir_tex_src_texture_offset:
|
||||
case nir_tex_src_sampler_offset:
|
||||
return nir_type_int;
|
||||
|
||||
default:
|
||||
unreachable("Invalid texture source type");
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned
|
||||
nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src)
|
||||
{
|
||||
@@ -2344,6 +2388,16 @@ typedef struct nir_lower_tex_options {
|
||||
*/
|
||||
unsigned lower_txp;
|
||||
|
||||
/**
|
||||
* If true, lower away nir_tex_src_offset for all texelfetch instructions.
|
||||
*/
|
||||
bool lower_txf_offset;
|
||||
|
||||
/**
|
||||
* If true, lower away nir_tex_src_offset for all rect textures.
|
||||
*/
|
||||
bool lower_rect_offset;
|
||||
|
||||
/**
|
||||
* If true, lower rect textures to 2D, using txs to fetch the
|
||||
* texture dimensions and dividing the texture coords by the
|
||||
|
@@ -38,16 +38,39 @@
|
||||
#include "nir.h"
|
||||
#include "nir_builder.h"
|
||||
|
||||
static int
|
||||
tex_instr_find_src(nir_tex_instr *tex, nir_tex_src_type src_type)
|
||||
{
|
||||
for (unsigned i = 0; i < tex->num_srcs; i++) {
|
||||
if (tex->src[i].src_type == src_type)
|
||||
return i;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
tex_instr_remove_src(nir_tex_instr *tex, unsigned src_idx)
|
||||
{
|
||||
assert(src_idx < tex->num_srcs);
|
||||
|
||||
/* First rewrite the source to NIR_SRC_INIT */
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[src_idx].src, NIR_SRC_INIT);
|
||||
|
||||
/* Now, move all of the other sources down */
|
||||
for (unsigned i = src_idx + 1; i < tex->num_srcs; i++) {
|
||||
tex->src[i-1].src_type = tex->src[i].src_type;
|
||||
nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
|
||||
}
|
||||
tex->num_srcs--;
|
||||
}
|
||||
|
||||
static void
|
||||
project_src(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
/* Find the projector in the srcs list, if present. */
|
||||
unsigned proj_index;
|
||||
for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) {
|
||||
if (tex->src[proj_index].src_type == nir_tex_src_projector)
|
||||
break;
|
||||
}
|
||||
if (proj_index == tex->num_srcs)
|
||||
int proj_index = tex_instr_find_src(tex, nir_tex_src_projector);
|
||||
if (proj_index < 0)
|
||||
return;
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
@@ -102,18 +125,57 @@ project_src(nir_builder *b, nir_tex_instr *tex)
|
||||
nir_src_for_ssa(projected));
|
||||
}
|
||||
|
||||
/* Now move the later tex sources down the array so that the projector
|
||||
* disappears.
|
||||
*/
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src,
|
||||
NIR_SRC_INIT);
|
||||
for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) {
|
||||
tex->src[i-1].src_type = tex->src[i].src_type;
|
||||
nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src);
|
||||
}
|
||||
tex->num_srcs--;
|
||||
tex_instr_remove_src(tex, proj_index);
|
||||
}
|
||||
|
||||
static bool
|
||||
lower_offset(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
int offset_index = tex_instr_find_src(tex, nir_tex_src_offset);
|
||||
if (offset_index < 0)
|
||||
return false;
|
||||
|
||||
int coord_index = tex_instr_find_src(tex, nir_tex_src_coord);
|
||||
assert(coord_index >= 0);
|
||||
|
||||
assert(tex->src[offset_index].src.is_ssa);
|
||||
assert(tex->src[coord_index].src.is_ssa);
|
||||
nir_ssa_def *offset = tex->src[offset_index].src.ssa;
|
||||
nir_ssa_def *coord = tex->src[coord_index].src.ssa;
|
||||
|
||||
b->cursor = nir_before_instr(&tex->instr);
|
||||
|
||||
nir_ssa_def *offset_coord;
|
||||
if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
|
||||
assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT);
|
||||
offset_coord = nir_fadd(b, coord, nir_i2f(b, offset));
|
||||
} else {
|
||||
offset_coord = nir_iadd(b, coord, offset);
|
||||
}
|
||||
|
||||
if (tex->is_array) {
|
||||
/* The offset is not applied to the array index */
|
||||
if (tex->coord_components == 2) {
|
||||
offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
|
||||
nir_channel(b, coord, 1));
|
||||
} else if (tex->coord_components == 3) {
|
||||
offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
|
||||
nir_channel(b, offset_coord, 1),
|
||||
nir_channel(b, coord, 2));
|
||||
} else {
|
||||
unreachable("Invalid number of components");
|
||||
}
|
||||
}
|
||||
|
||||
nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
|
||||
nir_src_for_ssa(offset_coord));
|
||||
|
||||
tex_instr_remove_src(tex, offset_index);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static nir_ssa_def *
|
||||
get_texture_size(nir_builder *b, nir_tex_instr *tex)
|
||||
{
|
||||
@@ -444,6 +506,12 @@ nir_lower_tex_block(nir_block *block, nir_builder *b,
|
||||
progress = true;
|
||||
}
|
||||
|
||||
if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
|
||||
(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT &&
|
||||
options->lower_rect_offset)) {
|
||||
progress = lower_offset(b, tex) || progress;
|
||||
}
|
||||
|
||||
if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && options->lower_rect) {
|
||||
lower_rect(b, tex);
|
||||
progress = true;
|
||||
|
@@ -1335,54 +1335,9 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
} else {
|
||||
image_type = sampled.sampler->var->var->interface_type;
|
||||
}
|
||||
|
||||
nir_tex_src srcs[8]; /* 8 should be enough */
|
||||
nir_tex_src *p = srcs;
|
||||
|
||||
unsigned idx = 4;
|
||||
|
||||
bool has_coord = false;
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleImplicitLod:
|
||||
case SpvOpImageSampleExplicitLod:
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjImplicitLod:
|
||||
case SpvOpImageSampleProjExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
case SpvOpImageFetch:
|
||||
case SpvOpImageGather:
|
||||
case SpvOpImageDrefGather:
|
||||
case SpvOpImageQueryLod: {
|
||||
/* All these types have the coordinate as their first real argument */
|
||||
struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]);
|
||||
has_coord = true;
|
||||
p->src = nir_src_for_ssa(coord->def);
|
||||
p->src_type = nir_tex_src_coord;
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* These all have an explicit depth value as their next source */
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* For OpImageQuerySizeLod, we always have an LOD */
|
||||
if (opcode == SpvOpImageQuerySizeLod)
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
|
||||
const enum glsl_sampler_dim sampler_dim = glsl_get_sampler_dim(image_type);
|
||||
const bool is_array = glsl_sampler_type_is_array(image_type);
|
||||
const bool is_shadow = glsl_sampler_type_is_shadow(image_type);
|
||||
|
||||
/* Figure out the base texture operation */
|
||||
nir_texop texop;
|
||||
@@ -1428,10 +1383,108 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
break;
|
||||
|
||||
case SpvOpImageQuerySamples:
|
||||
texop = nir_texop_texture_samples;
|
||||
break;
|
||||
|
||||
default:
|
||||
unreachable("Unhandled opcode");
|
||||
}
|
||||
|
||||
nir_tex_src srcs[8]; /* 8 should be enough */
|
||||
nir_tex_src *p = srcs;
|
||||
|
||||
unsigned idx = 4;
|
||||
|
||||
struct nir_ssa_def *coord;
|
||||
unsigned coord_components;
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleImplicitLod:
|
||||
case SpvOpImageSampleExplicitLod:
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjImplicitLod:
|
||||
case SpvOpImageSampleProjExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
case SpvOpImageFetch:
|
||||
case SpvOpImageGather:
|
||||
case SpvOpImageDrefGather:
|
||||
case SpvOpImageQueryLod: {
|
||||
/* All these types have the coordinate as their first real argument */
|
||||
switch (sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
case GLSL_SAMPLER_DIM_BUF:
|
||||
coord_components = 1;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
coord_components = 2;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
coord_components = 3;
|
||||
break;
|
||||
default:
|
||||
assert("Invalid sampler type");
|
||||
}
|
||||
|
||||
if (is_array && texop != nir_texop_lod)
|
||||
coord_components++;
|
||||
|
||||
coord = vtn_ssa_value(b, w[idx++])->def;
|
||||
p->src = nir_src_for_ssa(coord);
|
||||
p->src_type = nir_tex_src_coord;
|
||||
p++;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
coord = NULL;
|
||||
coord_components = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleProjImplicitLod:
|
||||
case SpvOpImageSampleProjExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
/* These have the projector as the last coordinate component */
|
||||
p->src = nir_src_for_ssa(nir_channel(&b->nb, coord, coord_components));
|
||||
p->src_type = nir_tex_src_projector;
|
||||
p++;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
unsigned gather_component = 0;
|
||||
switch (opcode) {
|
||||
case SpvOpImageSampleDrefImplicitLod:
|
||||
case SpvOpImageSampleDrefExplicitLod:
|
||||
case SpvOpImageSampleProjDrefImplicitLod:
|
||||
case SpvOpImageSampleProjDrefExplicitLod:
|
||||
case SpvOpImageDrefGather:
|
||||
/* These all have an explicit depth value as their next source */
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor);
|
||||
break;
|
||||
|
||||
case SpvOpImageGather:
|
||||
/* This has a component as its next source */
|
||||
gather_component =
|
||||
vtn_value(b, w[idx++], vtn_value_type_constant)->constant->value.u[0];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
/* For OpImageQuerySizeLod, we always have an LOD */
|
||||
if (opcode == SpvOpImageQuerySizeLod)
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
|
||||
|
||||
/* Now we need to handle some number of optional arguments */
|
||||
if (idx < count) {
|
||||
uint32_t operands = w[idx++];
|
||||
@@ -1444,12 +1497,12 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
if (operands & SpvImageOperandsLodMask) {
|
||||
assert(texop == nir_texop_txl || texop == nir_texop_txf ||
|
||||
texop == nir_texop_txf_ms || texop == nir_texop_txs);
|
||||
texop == nir_texop_txs);
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod);
|
||||
}
|
||||
|
||||
if (operands & SpvImageOperandsGradMask) {
|
||||
assert(texop == nir_texop_tex);
|
||||
assert(texop == nir_texop_txl);
|
||||
texop = nir_texop_txd;
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx);
|
||||
(*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy);
|
||||
@@ -1476,35 +1529,13 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
|
||||
|
||||
memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src));
|
||||
|
||||
instr->sampler_dim = glsl_get_sampler_dim(image_type);
|
||||
instr->is_array = glsl_sampler_type_is_array(image_type);
|
||||
instr->is_shadow = glsl_sampler_type_is_shadow(image_type);
|
||||
instr->is_new_style_shadow = instr->is_shadow;
|
||||
|
||||
if (has_coord) {
|
||||
switch (instr->sampler_dim) {
|
||||
case GLSL_SAMPLER_DIM_1D:
|
||||
case GLSL_SAMPLER_DIM_BUF:
|
||||
instr->coord_components = 1;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_2D:
|
||||
case GLSL_SAMPLER_DIM_RECT:
|
||||
case GLSL_SAMPLER_DIM_MS:
|
||||
instr->coord_components = 2;
|
||||
break;
|
||||
case GLSL_SAMPLER_DIM_3D:
|
||||
case GLSL_SAMPLER_DIM_CUBE:
|
||||
instr->coord_components = 3;
|
||||
break;
|
||||
default:
|
||||
assert("Invalid sampler type");
|
||||
}
|
||||
|
||||
if (instr->is_array)
|
||||
instr->coord_components++;
|
||||
} else {
|
||||
instr->coord_components = 0;
|
||||
}
|
||||
instr->coord_components = coord_components;
|
||||
instr->sampler_dim = sampler_dim;
|
||||
instr->is_array = is_array;
|
||||
instr->is_shadow = is_shadow;
|
||||
instr->is_new_style_shadow =
|
||||
is_shadow && glsl_get_components(ret_type->type) == 1;
|
||||
instr->component = gather_component;
|
||||
|
||||
switch (glsl_get_sampler_result_type(image_type)) {
|
||||
case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break;
|
||||
|
@@ -527,12 +527,13 @@ vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode,
|
||||
nir_variable *phi_var = phi_entry->data;
|
||||
|
||||
for (unsigned i = 3; i < count; i += 2) {
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
|
||||
struct vtn_block *pred =
|
||||
vtn_value(b, w[i + 1], vtn_value_type_block)->block;
|
||||
|
||||
b->nb.cursor = nir_after_instr(&pred->end_nop->instr);
|
||||
|
||||
struct vtn_ssa_value *src = vtn_ssa_value(b, w[i]);
|
||||
|
||||
vtn_local_store(b, src, nir_deref_var_create(b, phi_var));
|
||||
}
|
||||
|
||||
|
@@ -1706,6 +1706,8 @@ dri2_wl_swrast_create_window_surface(_EGLDriver *drv, _EGLDisplay *disp,
|
||||
dri2_surf->format = WL_SHM_FORMAT_ARGB8888;
|
||||
|
||||
dri2_surf->wl_win = window;
|
||||
dri2_surf->wl_win->private = dri2_surf;
|
||||
dri2_surf->wl_win->destroy_window_callback = destroy_window_callback;
|
||||
|
||||
dri2_surf->base.Width = -1;
|
||||
dri2_surf->base.Height = -1;
|
||||
|
@@ -188,7 +188,9 @@ cso_insert_state(struct cso_cache *sc,
|
||||
void *state)
|
||||
{
|
||||
struct cso_hash *hash = _cso_hash_for_type(sc, type);
|
||||
sanitize_hash(sc, hash, type, sc->max_size);
|
||||
|
||||
if (type != CSO_SAMPLER)
|
||||
sanitize_hash(sc, hash, type, sc->max_size);
|
||||
|
||||
return cso_hash_insert(hash, hash_key, state);
|
||||
}
|
||||
|
@@ -1268,7 +1268,6 @@ cso_restore_fragment_samplers(struct cso_context *ctx)
|
||||
{
|
||||
struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
|
||||
|
||||
info->nr_samplers = ctx->nr_fragment_samplers_saved;
|
||||
memcpy(info->samplers, ctx->fragment_samplers_saved,
|
||||
sizeof(info->samplers));
|
||||
cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
|
||||
|
@@ -73,7 +73,7 @@ nouveau_vpe_fini(struct nouveau_decoder *dec) {
|
||||
if (!dec->cmds)
|
||||
return;
|
||||
|
||||
nouveau_pushbuf_space(push, 8, 2, 0);
|
||||
nouveau_pushbuf_space(push, 16, 2, 0);
|
||||
nouveau_bufctx_reset(dec->bufctx, NV31_VIDEO_BIND_CMD);
|
||||
|
||||
#define BCTX_ARGS dec->bufctx, NV31_VIDEO_BIND_CMD, NOUVEAU_BO_RD
|
||||
|
@@ -127,7 +127,7 @@ nv30_clear_render_target(struct pipe_context *pipe, struct pipe_surface *ps,
|
||||
|
||||
refn.bo = mt->base.bo;
|
||||
refn.flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_WR;
|
||||
if (nouveau_pushbuf_space(push, 16, 1, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 1, 0) ||
|
||||
nouveau_pushbuf_refn (push, &refn, 1))
|
||||
return;
|
||||
|
||||
|
@@ -431,7 +431,7 @@ nv30_transfer_rect_sifm(XFER_ARGS)
|
||||
si_arg |= NV03_SIFM_FORMAT_FILTER_BILINEAR;
|
||||
}
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32, 6, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 64, 6, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -516,7 +516,7 @@ nv30_transfer_rect_m2mf(XFER_ARGS)
|
||||
while (h) {
|
||||
unsigned lines = (h > 2047) ? 2047 : h;
|
||||
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -709,7 +709,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
|
||||
lines = (pages > 2047) ? 2047 : pages;
|
||||
pages -= lines;
|
||||
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
@@ -733,7 +733,7 @@ nv30_transfer_copy_data(struct nouveau_context *nv,
|
||||
}
|
||||
|
||||
if (size) {
|
||||
if (nouveau_pushbuf_space(push, 13, 2, 0) ||
|
||||
if (nouveau_pushbuf_space(push, 32, 2, 0) ||
|
||||
nouveau_pushbuf_refn (push, refs, 2))
|
||||
return;
|
||||
|
||||
|
@@ -294,7 +294,7 @@ nv50_clear_render_target(struct pipe_context *pipe,
|
||||
PUSH_DATAf(push, color->f[2]);
|
||||
PUSH_DATAf(push, color->f[3]);
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
|
||||
@@ -388,7 +388,7 @@ nv50_clear_depth_stencil(struct pipe_context *pipe,
|
||||
mode |= NV50_3D_CLEAR_BUFFERS_S;
|
||||
}
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32 + sf->depth, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64 + sf->depth, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, bo, mt->base.domain | NOUVEAU_BO_WR);
|
||||
@@ -742,7 +742,7 @@ nv50_clear_buffer(struct pipe_context *pipe,
|
||||
PUSH_DATAf(push, color.f[2]);
|
||||
PUSH_DATAf(push, color.f[3]);
|
||||
|
||||
if (nouveau_pushbuf_space(push, 32, 1, 0))
|
||||
if (nouveau_pushbuf_space(push, 64, 1, 0))
|
||||
return;
|
||||
|
||||
PUSH_REFN(push, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
|
@@ -636,7 +636,7 @@ nv50_draw_elements(struct nv50_context *nv50, bool shorten,
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (push, prim);
|
||||
|
||||
nouveau_pushbuf_space(push, 8, 0, 1);
|
||||
nouveau_pushbuf_space(push, 16, 0, 1);
|
||||
PUSH_REFN(push, buf->bo, NOUVEAU_BO_RD | buf->domain);
|
||||
|
||||
switch (index_size) {
|
||||
|
@@ -273,7 +273,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
|
||||
|
||||
/* So lets test if the fence is working? */
|
||||
nouveau_pushbuf_space(push[0], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[0], 16, 1, 0);
|
||||
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[0], SUBC_BSP(0x240), 3);
|
||||
PUSH_DATAh(push[0], dec->fence_bo->offset);
|
||||
@@ -284,7 +284,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[0], 0);
|
||||
PUSH_KICK (push[0]);
|
||||
|
||||
nouveau_pushbuf_space(push[1], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[1], 16, 1, 0);
|
||||
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[1], SUBC_VP(0x240), 3);
|
||||
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
|
||||
@@ -295,7 +295,7 @@ nv98_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[1], 0);
|
||||
PUSH_KICK (push[1]);
|
||||
|
||||
nouveau_pushbuf_space(push[2], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[2], 16, 1, 0);
|
||||
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NV04(push[2], SUBC_PPP(0x240), 3);
|
||||
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
|
||||
|
@@ -47,7 +47,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
int ret;
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
unsigned fence_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -61,10 +60,6 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!dec->bitplane_bo)
|
||||
num_refs--;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
|
||||
for (i = 0; i < num_buffers; i++)
|
||||
bsp_size += num_bytes[i];
|
||||
@@ -112,7 +107,7 @@ nv98_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
|
||||
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
|
||||
|
||||
nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 8) + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32, num_refs, 0);
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
bsp_addr = bsp_bo->offset >> 8;
|
||||
|
@@ -93,13 +93,8 @@ nv98_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_pushbuf *push = dec->pushbuf[2];
|
||||
unsigned ppp_caps = 0x10;
|
||||
unsigned fence_extra = 0;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
|
||||
nouveau_pushbuf_space(push, 32, 4, 0);
|
||||
|
||||
switch (codec) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG12: {
|
||||
|
@@ -76,7 +76,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
u32 fence_extra = 0, codec_extra = 0;
|
||||
u32 codec_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -88,10 +88,6 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
};
|
||||
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
|
||||
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
|
||||
codec_extra += 2;
|
||||
@@ -115,8 +111,7 @@ nv98_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
|
||||
nv98_decoder_kick_ref(dec, target);
|
||||
|
||||
nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
|
||||
6 + codec_extra + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
|
||||
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
|
@@ -403,7 +403,7 @@ nvc0_hw_get_query_result_resource(struct nvc0_context *nvc0,
|
||||
if (wait && hq->state != NVC0_HW_QUERY_STATE_READY)
|
||||
nvc0_hw_query_fifo_wait(nvc0, q);
|
||||
|
||||
nouveau_pushbuf_space(push, 16, 2, 0);
|
||||
nouveau_pushbuf_space(push, 32, 2, 0);
|
||||
PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
|
||||
PUSH_REFN (push, buf->bo, buf->domain | NOUVEAU_BO_WR);
|
||||
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 2);
|
||||
|
@@ -799,7 +799,7 @@ nvc0_draw_stream_output(struct nvc0_context *nvc0,
|
||||
}
|
||||
|
||||
while (num_instances--) {
|
||||
nouveau_pushbuf_space(push, 9, 0, 1);
|
||||
nouveau_pushbuf_space(push, 16, 0, 1);
|
||||
BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
|
||||
PUSH_DATA (push, mode);
|
||||
BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
|
||||
|
@@ -297,7 +297,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
|
||||
|
||||
/* So lets test if the fence is working? */
|
||||
nouveau_pushbuf_space(push[0], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[0], 16, 1, 0);
|
||||
PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
|
||||
PUSH_DATAh(push[0], dec->fence_bo->offset);
|
||||
@@ -308,7 +308,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[0], 0);
|
||||
PUSH_KICK (push[0]);
|
||||
|
||||
nouveau_pushbuf_space(push[1], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[1], 16, 1, 0);
|
||||
PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
|
||||
PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
|
||||
@@ -319,7 +319,7 @@ nvc0_create_decoder(struct pipe_context *context,
|
||||
PUSH_DATA (push[1], 0);
|
||||
PUSH_KICK (push[1]);
|
||||
|
||||
nouveau_pushbuf_space(push[2], 6, 1, 0);
|
||||
nouveau_pushbuf_space(push[2], 16, 1, 0);
|
||||
PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
|
||||
BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
|
||||
PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
|
||||
|
@@ -143,7 +143,6 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
uint32_t caps;
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
unsigned fence_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -157,15 +156,11 @@ nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!dec->bitplane_bo)
|
||||
num_refs--;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
caps = nouveau_vp3_bsp_end(dec, desc);
|
||||
|
||||
nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
|
||||
|
||||
nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32, num_refs, 0);
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
bsp_addr = bsp_bo->offset >> 8;
|
||||
|
@@ -93,13 +93,8 @@ nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct n
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_pushbuf *push = dec->pushbuf[2];
|
||||
unsigned ppp_caps = 0x10;
|
||||
unsigned fence_extra = 0;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
|
||||
nouveau_pushbuf_space(push, 32, 4, 0);
|
||||
|
||||
switch (codec) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG12: {
|
||||
|
@@ -76,7 +76,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
|
||||
struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
|
||||
struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
|
||||
u32 fence_extra = 0, codec_extra = 0;
|
||||
u32 codec_extra = 0;
|
||||
struct nouveau_pushbuf_refn bo_refs[] = {
|
||||
{ inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
{ dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
|
||||
@@ -88,10 +88,6 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
};
|
||||
int num_refs = ARRAY_SIZE(bo_refs) - !dec->fw_bo;
|
||||
|
||||
#if NOUVEAU_VP3_DEBUG_FENCE
|
||||
fence_extra = 4;
|
||||
#endif
|
||||
|
||||
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
|
||||
nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
|
||||
codec_extra += 2;
|
||||
@@ -115,8 +111,7 @@ nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
|
||||
if (!is_ref && (dec->refs[target->valid_ref].decoded_top && dec->refs[target->valid_ref].decoded_bottom))
|
||||
nvc0_decoder_kick_ref(dec, target);
|
||||
|
||||
nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
|
||||
6 + codec_extra + fence_extra + 2, num_refs, 0);
|
||||
nouveau_pushbuf_space(push, 32 + codec_extra, num_refs, 0);
|
||||
|
||||
nouveau_pushbuf_refn(push, bo_refs, num_refs);
|
||||
|
||||
|
@@ -1139,7 +1139,7 @@ err:
|
||||
static struct pipe_driver_query_info r600_driver_query_list[] = {
|
||||
X("num-compilations", NUM_COMPILATIONS, UINT64, CUMULATIVE),
|
||||
X("num-shaders-created", NUM_SHADERS_CREATED, UINT64, CUMULATIVE),
|
||||
X("draw-calls", DRAW_CALLS, UINT64, CUMULATIVE),
|
||||
X("draw-calls", DRAW_CALLS, UINT64, AVERAGE),
|
||||
X("requested-VRAM", REQUESTED_VRAM, BYTES, AVERAGE),
|
||||
X("requested-GTT", REQUESTED_GTT, BYTES, AVERAGE),
|
||||
X("buffer-wait-time", BUFFER_WAIT_TIME, MICROSECONDS, CUMULATIVE),
|
||||
|
@@ -311,6 +311,7 @@ static void si_set_sampler_view(struct si_context *sctx,
|
||||
unsigned slot, struct pipe_sampler_view *view)
|
||||
{
|
||||
struct si_sampler_view *rview = (struct si_sampler_view*)view;
|
||||
uint32_t *desc = views->desc.list + slot * 16;
|
||||
|
||||
if (view && view->texture && view->texture->target != PIPE_BUFFER &&
|
||||
G_008F28_COMPRESSION_EN(rview->state[6]) &&
|
||||
@@ -346,9 +347,14 @@ static void si_set_sampler_view(struct si_context *sctx,
|
||||
views->desc.enabled_mask |= 1u << slot;
|
||||
} else {
|
||||
pipe_sampler_view_reference(&views->views[slot], NULL);
|
||||
memcpy(views->desc.list + slot*16, null_texture_descriptor, 8*4);
|
||||
memcpy(desc, null_texture_descriptor, 8*4);
|
||||
/* Only clear the lower dwords of FMASK. */
|
||||
memcpy(views->desc.list + slot*16 + 8, null_texture_descriptor, 4*4);
|
||||
memcpy(desc + 8, null_texture_descriptor, 4*4);
|
||||
/* Re-set the sampler state if we are transitioning from FMASK. */
|
||||
if (views->sampler_states[slot])
|
||||
memcpy(desc + 12,
|
||||
views->sampler_states[slot], 4*4);
|
||||
|
||||
views->desc.enabled_mask &= ~(1u << slot);
|
||||
}
|
||||
|
||||
@@ -631,10 +637,10 @@ static void si_bind_sampler_states(struct pipe_context *ctx, unsigned shader,
|
||||
/* If FMASK is bound, don't overwrite it.
|
||||
* The sampler state will be set after FMASK is unbound.
|
||||
*/
|
||||
if (samplers->views.views[i] &&
|
||||
samplers->views.views[i]->texture &&
|
||||
samplers->views.views[i]->texture->target != PIPE_BUFFER &&
|
||||
((struct r600_texture*)samplers->views.views[i]->texture)->fmask.size)
|
||||
if (samplers->views.views[slot] &&
|
||||
samplers->views.views[slot]->texture &&
|
||||
samplers->views.views[slot]->texture->target != PIPE_BUFFER &&
|
||||
((struct r600_texture*)samplers->views.views[slot]->texture)->fmask.size)
|
||||
continue;
|
||||
|
||||
memcpy(desc->list + slot * 16 + 12, sstates[i]->val, 4*4);
|
||||
|
@@ -147,7 +147,12 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
|
||||
sctx->b.gfx.cs = ws->cs_create(sctx->b.ctx, RING_GFX,
|
||||
si_context_gfx_flush, sctx);
|
||||
|
||||
if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib) {
|
||||
/* SI + AMDGPU + CE = GPU hang */
|
||||
if (!(sscreen->b.debug_flags & DBG_NO_CE) && ws->cs_add_const_ib &&
|
||||
sscreen->b.chip_class != SI &&
|
||||
/* These can't use CE due to a power gating bug in the kernel. */
|
||||
sscreen->b.family != CHIP_CARRIZO &&
|
||||
sscreen->b.family != CHIP_STONEY) {
|
||||
sctx->ce_ib = ws->cs_add_const_ib(sctx->b.gfx.cs);
|
||||
if (!sctx->ce_ib)
|
||||
goto fail;
|
||||
|
@@ -7238,6 +7238,12 @@ static bool si_compile_ps_prolog(struct si_screen *sscreen,
|
||||
linear_sample[i], base + 10 + i, "");
|
||||
}
|
||||
|
||||
/* Tell LLVM to insert WQM instruction sequence when needed. */
|
||||
if (key->ps_prolog.wqm) {
|
||||
LLVMAddTargetDependentFunctionAttr(func,
|
||||
"amdgpu-ps-wqm-outputs", "");
|
||||
}
|
||||
|
||||
/* Compile. */
|
||||
LLVMBuildRet(gallivm->builder, ret);
|
||||
radeon_llvm_finalize_module(&ctx.radeon_bld);
|
||||
@@ -7388,6 +7394,9 @@ static bool si_shader_select_ps_parts(struct si_screen *sscreen,
|
||||
prolog_key.ps_prolog.colors_read = info->colors_read;
|
||||
prolog_key.ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
|
||||
prolog_key.ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
|
||||
prolog_key.ps_prolog.wqm = info->uses_derivatives &&
|
||||
(prolog_key.ps_prolog.colors_read ||
|
||||
prolog_key.ps_prolog.states.force_persample_interp);
|
||||
|
||||
if (info->colors_read) {
|
||||
unsigned *color = shader->selector->color_attr_index;
|
||||
|
@@ -355,6 +355,7 @@ union si_shader_part_key {
|
||||
unsigned colors_read:8; /* color input components read */
|
||||
unsigned num_interp_inputs:5; /* BCOLOR is at this location */
|
||||
unsigned face_vgpr_index:5;
|
||||
unsigned wqm:1;
|
||||
char color_attr_index[2];
|
||||
char color_interp_vgpr_index[2]; /* -1 == constant */
|
||||
} ps_prolog;
|
||||
|
@@ -209,18 +209,24 @@ dri3_bind_context(struct glx_context *context, struct glx_context *old,
|
||||
struct dri3_context *pcp = (struct dri3_context *) context;
|
||||
struct dri3_screen *psc = (struct dri3_screen *) pcp->base.psc;
|
||||
struct dri3_drawable *pdraw, *pread;
|
||||
__DRIdrawable *dri_draw = NULL, *dri_read = NULL;
|
||||
|
||||
pdraw = (struct dri3_drawable *) driFetchDrawable(context, draw);
|
||||
pread = (struct dri3_drawable *) driFetchDrawable(context, read);
|
||||
|
||||
driReleaseDrawables(&pcp->base);
|
||||
|
||||
if (pdraw == NULL || pread == NULL)
|
||||
if (pdraw)
|
||||
dri_draw = pdraw->loader_drawable.dri_drawable;
|
||||
else if (draw != None)
|
||||
return GLXBadDrawable;
|
||||
|
||||
if (!(*psc->core->bindContext) (pcp->driContext,
|
||||
pdraw->loader_drawable.dri_drawable,
|
||||
pread->loader_drawable.dri_drawable))
|
||||
if (pread)
|
||||
dri_read = pread->loader_drawable.dri_drawable;
|
||||
else if (read != None)
|
||||
return GLXBadDrawable;
|
||||
|
||||
if (!(*psc->core->bindContext) (pcp->driContext, dri_draw, dri_read))
|
||||
return GLXBadContext;
|
||||
|
||||
return Success;
|
||||
|
@@ -489,6 +489,7 @@ anv_descriptor_set_destroy(struct anv_device *device,
|
||||
struct surface_state_free_list_entry *entry =
|
||||
set->buffer_views[b].surface_state.map;
|
||||
entry->next = pool->surface_state_free_list;
|
||||
entry->offset = set->buffer_views[b].surface_state.offset;
|
||||
pool->surface_state_free_list = entry;
|
||||
}
|
||||
|
||||
|
@@ -24,6 +24,7 @@
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
@@ -582,7 +583,14 @@ void anv_GetPhysicalDeviceQueueFamilyProperties(
|
||||
return;
|
||||
}
|
||||
|
||||
assert(*pCount >= 1);
|
||||
/* The spec implicitly allows the incoming count to be 0. From the Vulkan
|
||||
* 1.0.38 spec, Section 4.1 Physical Devices:
|
||||
*
|
||||
* If the value referenced by pQueueFamilyPropertyCount is not 0 [then
|
||||
* do stuff].
|
||||
*/
|
||||
if (*pCount == 0)
|
||||
return;
|
||||
|
||||
*pQueueFamilyProperties = (VkQueueFamilyProperties) {
|
||||
.queueFlags = VK_QUEUE_GRAPHICS_BIT |
|
||||
@@ -1160,6 +1168,9 @@ VkResult anv_AllocateMemory(
|
||||
|
||||
mem->type_index = pAllocateInfo->memoryTypeIndex;
|
||||
|
||||
mem->map = NULL;
|
||||
mem->map_size = 0;
|
||||
|
||||
*pMem = anv_device_memory_to_handle(mem);
|
||||
|
||||
return VK_SUCCESS;
|
||||
@@ -1181,6 +1192,9 @@ void anv_FreeMemory(
|
||||
if (mem == NULL)
|
||||
return;
|
||||
|
||||
if (mem->map)
|
||||
anv_UnmapMemory(_device, _mem);
|
||||
|
||||
if (mem->bo.map)
|
||||
anv_gem_munmap(mem->bo.map, mem->bo.size);
|
||||
|
||||
@@ -1227,8 +1241,12 @@ VkResult anv_MapMemory(
|
||||
/* Let's map whole pages */
|
||||
map_size = align_u64(map_size, 4096);
|
||||
|
||||
mem->map = anv_gem_mmap(device, mem->bo.gem_handle,
|
||||
map_offset, map_size, gem_flags);
|
||||
void *map = anv_gem_mmap(device, mem->bo.gem_handle,
|
||||
map_offset, map_size, gem_flags);
|
||||
if (map == MAP_FAILED)
|
||||
return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
|
||||
|
||||
mem->map = map;
|
||||
mem->map_size = map_size;
|
||||
|
||||
*ppData = mem->map + (offset - map_offset);
|
||||
@@ -1246,6 +1264,9 @@ void anv_UnmapMemory(
|
||||
return;
|
||||
|
||||
anv_gem_munmap(mem->map, mem->map_size);
|
||||
|
||||
mem->map = NULL;
|
||||
mem->map_size = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -88,10 +88,8 @@ anv_gem_mmap(struct anv_device *device, uint32_t gem_handle,
|
||||
};
|
||||
|
||||
int ret = anv_ioctl(device->fd, DRM_IOCTL_I915_GEM_MMAP, &gem_mmap);
|
||||
if (ret != 0) {
|
||||
/* FIXME: Is NULL the right error return? Cf MAP_INVALID */
|
||||
return NULL;
|
||||
}
|
||||
if (ret != 0)
|
||||
return MAP_FAILED;
|
||||
|
||||
VG(VALGRIND_MALLOCLIKE_BLOCK(gem_mmap.addr_ptr, gem_mmap.size, 0, 1));
|
||||
return (void *)(uintptr_t) gem_mmap.addr_ptr;
|
||||
|
@@ -129,10 +129,13 @@ make_surface(const struct anv_device *dev,
|
||||
image->extent = anv_sanitize_image_extent(vk_info->imageType,
|
||||
vk_info->extent);
|
||||
|
||||
enum isl_format format = anv_get_isl_format(&dev->info, vk_info->format,
|
||||
aspect, vk_info->tiling);
|
||||
assert(format != ISL_FORMAT_UNSUPPORTED);
|
||||
|
||||
ok = isl_surf_init(&dev->isl_dev, &anv_surf->isl,
|
||||
.dim = vk_to_isl_surf_dim[vk_info->imageType],
|
||||
.format = anv_get_isl_format(&dev->info, vk_info->format,
|
||||
aspect, vk_info->tiling),
|
||||
.format = format,
|
||||
.width = image->extent.width,
|
||||
.height = image->extent.height,
|
||||
.depth = image->extent.depth,
|
||||
|
@@ -392,6 +392,7 @@ anv_fill_binding_table(struct brw_stage_prog_data *prog_data, unsigned bias)
|
||||
{
|
||||
prog_data->binding_table.size_bytes = 0;
|
||||
prog_data->binding_table.texture_start = bias;
|
||||
prog_data->binding_table.gather_texture_start = bias;
|
||||
prog_data->binding_table.ubo_start = bias;
|
||||
prog_data->binding_table.ssbo_start = bias;
|
||||
prog_data->binding_table.image_start = bias;
|
||||
|
@@ -74,6 +74,7 @@ TEST_LIBS = \
|
||||
|
||||
TESTS = \
|
||||
test_fs_cmod_propagation \
|
||||
test_fs_copy_propagation \
|
||||
test_fs_saturate_propagation \
|
||||
test_eu_compact \
|
||||
test_vf_float_conversions \
|
||||
@@ -89,6 +90,12 @@ test_fs_cmod_propagation_LDADD = \
|
||||
$(top_builddir)/src/gtest/libgtest.la \
|
||||
$(TEST_LIBS)
|
||||
|
||||
test_fs_copy_propagation_SOURCES = \
|
||||
test_fs_copy_propagation.cpp
|
||||
test_fs_copy_propagation_LDADD = \
|
||||
$(top_builddir)/src/gtest/libgtest.la \
|
||||
$(TEST_LIBS)
|
||||
|
||||
test_fs_saturate_propagation_SOURCES = \
|
||||
test_fs_saturate_propagation.cpp
|
||||
test_fs_saturate_propagation_LDADD = \
|
||||
|
@@ -134,7 +134,6 @@ i965_FILES = \
|
||||
brw_gs_surface_state.c \
|
||||
brw_link.cpp \
|
||||
brw_lower_texture_gradients.cpp \
|
||||
brw_lower_unnormalized_offset.cpp \
|
||||
brw_meta_util.c \
|
||||
brw_meta_util.h \
|
||||
brw_misc_state.c \
|
||||
|
@@ -1824,7 +1824,6 @@ brw_program_reloc(struct brw_context *brw, uint32_t state_offset,
|
||||
bool brw_do_cubemap_normalize(struct exec_list *instructions);
|
||||
bool brw_lower_texture_gradients(struct brw_context *brw,
|
||||
struct exec_list *instructions);
|
||||
bool brw_do_lower_unnormalized_offset(struct exec_list *instructions);
|
||||
|
||||
extern const char * const conditional_modifier[16];
|
||||
extern const char *const pred_ctrl_align16[16];
|
||||
|
@@ -5829,7 +5829,7 @@ fs_visitor::optimize()
|
||||
|
||||
OPT(opt_algebraic);
|
||||
OPT(opt_cse);
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
OPT(opt_predicated_break, this);
|
||||
OPT(opt_cmod_propagation);
|
||||
OPT(dead_code_eliminate);
|
||||
@@ -5855,12 +5855,12 @@ fs_visitor::optimize()
|
||||
OPT(lower_logical_sends);
|
||||
|
||||
if (progress) {
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
/* Only run after logical send lowering because it's easier to implement
|
||||
* in terms of physical sends.
|
||||
*/
|
||||
if (OPT(opt_zero_samples))
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
/* Run after logical send lowering to give it a chance to CSE the
|
||||
* LOAD_PAYLOAD instructions created to construct the payloads of
|
||||
* e.g. texturing messages in cases where it wasn't possible to CSE the
|
||||
@@ -5889,7 +5889,7 @@ fs_visitor::optimize()
|
||||
}
|
||||
|
||||
if (OPT(lower_d2x)) {
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
||||
@@ -5899,7 +5899,7 @@ fs_visitor::optimize()
|
||||
if (devinfo->gen <= 5 && OPT(lower_minmax)) {
|
||||
OPT(opt_cmod_propagation);
|
||||
OPT(opt_cse);
|
||||
OPT(opt_copy_propagate);
|
||||
OPT(opt_copy_propagation);
|
||||
OPT(dead_code_eliminate);
|
||||
}
|
||||
|
||||
|
@@ -133,11 +133,11 @@ public:
|
||||
bool opt_redundant_discard_jumps();
|
||||
bool opt_cse();
|
||||
bool opt_cse_local(bblock_t *block);
|
||||
bool opt_copy_propagate();
|
||||
bool opt_copy_propagation();
|
||||
bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry);
|
||||
bool try_constant_propagate(fs_inst *inst, acp_entry *entry);
|
||||
bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block,
|
||||
exec_list *acp);
|
||||
bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block,
|
||||
exec_list *acp);
|
||||
bool opt_drop_redundant_mov_to_flags();
|
||||
bool opt_register_renaming();
|
||||
bool register_coalesce();
|
||||
|
@@ -129,7 +129,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg,
|
||||
foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) {
|
||||
acp[next_acp] = entry;
|
||||
|
||||
/* opt_copy_propagate_local populates out_acp with copies created
|
||||
/* opt_copy_propagation_local populates out_acp with copies created
|
||||
* in a block which are still live at the end of the block. This
|
||||
* is exactly what we want in the COPY set.
|
||||
*/
|
||||
@@ -445,7 +445,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry)
|
||||
if (entry->saturate) {
|
||||
switch(inst->opcode) {
|
||||
case BRW_OPCODE_SEL:
|
||||
if (inst->src[1].file != IMM ||
|
||||
if ((inst->conditional_mod != BRW_CONDITIONAL_GE &&
|
||||
inst->conditional_mod != BRW_CONDITIONAL_L) ||
|
||||
inst->src[1].file != IMM ||
|
||||
inst->src[1].f < 0.0 ||
|
||||
inst->src[1].f > 1.0) {
|
||||
return false;
|
||||
@@ -759,8 +761,8 @@ can_propagate_from(fs_inst *inst)
|
||||
* list.
|
||||
*/
|
||||
bool
|
||||
fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
|
||||
exec_list *acp)
|
||||
fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block,
|
||||
exec_list *acp)
|
||||
{
|
||||
bool progress = false;
|
||||
|
||||
@@ -844,7 +846,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block,
|
||||
}
|
||||
|
||||
bool
|
||||
fs_visitor::opt_copy_propagate()
|
||||
fs_visitor::opt_copy_propagation()
|
||||
{
|
||||
bool progress = false;
|
||||
void *copy_prop_ctx = ralloc_context(NULL);
|
||||
@@ -857,8 +859,8 @@ fs_visitor::opt_copy_propagate()
|
||||
* the set of copies available at the end of the block.
|
||||
*/
|
||||
foreach_block (block, cfg) {
|
||||
progress = opt_copy_propagate_local(copy_prop_ctx, block,
|
||||
out_acp[block->num]) || progress;
|
||||
progress = opt_copy_propagation_local(copy_prop_ctx, block,
|
||||
out_acp[block->num]) || progress;
|
||||
}
|
||||
|
||||
/* Do dataflow analysis for those available copies. */
|
||||
@@ -877,7 +879,8 @@ fs_visitor::opt_copy_propagate()
|
||||
}
|
||||
}
|
||||
|
||||
progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress;
|
||||
progress = opt_copy_propagation_local(copy_prop_ctx, block, in_acp) ||
|
||||
progress;
|
||||
}
|
||||
|
||||
for (int i = 0; i < cfg->num_blocks; i++)
|
||||
|
@@ -126,7 +126,6 @@ process_glsl_ir(gl_shader_stage stage,
|
||||
do_vec_index_to_cond_assign(shader->ir);
|
||||
lower_vector_insert(shader->ir, true);
|
||||
lower_offset_arrays(shader->ir);
|
||||
brw_do_lower_unnormalized_offset(shader->ir);
|
||||
lower_noise(shader->ir);
|
||||
lower_quadop_vector(shader->ir, false);
|
||||
|
||||
|
@@ -1,106 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2013 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
/**
|
||||
* \file brw_lower_unnormalized_offset.cpp
|
||||
*
|
||||
* IR lower pass to convert a texture offset into an adjusted coordinate,
|
||||
* for use with unnormalized coordinates. At least the gather4* messages
|
||||
* on Ivybridge and Haswell make a mess with nonzero offsets.
|
||||
*
|
||||
* \author Chris Forbes <chrisf@ijw.co.nz>
|
||||
*/
|
||||
|
||||
#include "compiler/glsl_types.h"
|
||||
#include "compiler/glsl/ir.h"
|
||||
#include "compiler/glsl/ir_builder.h"
|
||||
|
||||
using namespace ir_builder;
|
||||
|
||||
class brw_lower_unnormalized_offset_visitor : public ir_hierarchical_visitor {
|
||||
public:
|
||||
brw_lower_unnormalized_offset_visitor()
|
||||
{
|
||||
progress = false;
|
||||
}
|
||||
|
||||
ir_visitor_status visit_leave(ir_texture *ir);
|
||||
|
||||
bool progress;
|
||||
};
|
||||
|
||||
ir_visitor_status
|
||||
brw_lower_unnormalized_offset_visitor::visit_leave(ir_texture *ir)
|
||||
{
|
||||
if (!ir->offset)
|
||||
return visit_continue;
|
||||
|
||||
if (ir->op == ir_tg4 || ir->op == ir_tex) {
|
||||
if (ir->sampler->type->sampler_dimensionality != GLSL_SAMPLER_DIM_RECT)
|
||||
return visit_continue;
|
||||
}
|
||||
else if (ir->op != ir_txf) {
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
void *mem_ctx = ralloc_parent(ir);
|
||||
|
||||
if (ir->op == ir_txf) {
|
||||
/* It appears that the ld instruction used for txf does its
|
||||
* address bounds check before adding in the offset. To work
|
||||
* around this, just add the integer offset to the integer texel
|
||||
* coordinate, and don't put the offset in the header.
|
||||
*/
|
||||
ir_variable *var = new(mem_ctx) ir_variable(ir->coordinate->type,
|
||||
"coordinate",
|
||||
ir_var_temporary);
|
||||
base_ir->insert_before(var);
|
||||
base_ir->insert_before(assign(var, ir->coordinate));
|
||||
base_ir->insert_before(assign(var,
|
||||
add(swizzle_for_size(var, ir->offset->type->vector_elements), ir->offset),
|
||||
(1 << ir->offset->type->vector_elements) - 1));
|
||||
|
||||
ir->coordinate = new(mem_ctx) ir_dereference_variable(var);
|
||||
} else {
|
||||
ir->coordinate = add(ir->coordinate, i2f(ir->offset));
|
||||
}
|
||||
|
||||
ir->offset = NULL;
|
||||
|
||||
progress = true;
|
||||
return visit_continue;
|
||||
}
|
||||
|
||||
extern "C" {
|
||||
|
||||
bool
|
||||
brw_do_lower_unnormalized_offset(exec_list *instructions)
|
||||
{
|
||||
brw_lower_unnormalized_offset_visitor v;
|
||||
|
||||
visit_list_elements(&v, instructions);
|
||||
|
||||
return v.progress;
|
||||
}
|
||||
|
||||
}
|
@@ -419,6 +419,8 @@ brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
|
||||
|
||||
static const nir_lower_tex_options tex_options = {
|
||||
.lower_txp = ~0,
|
||||
.lower_txf_offset = true,
|
||||
.lower_rect_offset = true,
|
||||
};
|
||||
|
||||
OPT(nir_lower_tex, &tex_options);
|
||||
|
@@ -201,6 +201,9 @@ hsw_pause_transform_feedback(struct gl_context *ctx,
|
||||
(struct brw_transform_feedback_object *) obj;
|
||||
|
||||
if (brw->is_haswell) {
|
||||
/* Flush any drawing so that the counters have the right values. */
|
||||
brw_emit_mi_flush(brw);
|
||||
|
||||
/* Save the SOL buffer offset register values. */
|
||||
for (int i = 0; i < BRW_MAX_XFB_STREAMS; i++) {
|
||||
BEGIN_BATCH(3);
|
||||
|
@@ -984,6 +984,19 @@ intel_miptree_reference(struct intel_mipmap_tree **dst,
|
||||
*dst = src;
|
||||
}
|
||||
|
||||
static void
|
||||
intel_miptree_hiz_buffer_free(struct intel_miptree_aux_buffer *hiz_buf)
|
||||
{
|
||||
if (hiz_buf == NULL)
|
||||
return;
|
||||
|
||||
if (hiz_buf->mt)
|
||||
intel_miptree_release(&hiz_buf->mt);
|
||||
else
|
||||
drm_intel_bo_unreference(hiz_buf->bo);
|
||||
|
||||
free(hiz_buf);
|
||||
}
|
||||
|
||||
void
|
||||
intel_miptree_release(struct intel_mipmap_tree **mt)
|
||||
@@ -999,13 +1012,7 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
|
||||
|
||||
drm_intel_bo_unreference((*mt)->bo);
|
||||
intel_miptree_release(&(*mt)->stencil_mt);
|
||||
if ((*mt)->hiz_buf) {
|
||||
if ((*mt)->hiz_buf->mt)
|
||||
intel_miptree_release(&(*mt)->hiz_buf->mt);
|
||||
else
|
||||
drm_intel_bo_unreference((*mt)->hiz_buf->bo);
|
||||
free((*mt)->hiz_buf);
|
||||
}
|
||||
intel_miptree_hiz_buffer_free((*mt)->hiz_buf);
|
||||
intel_miptree_release(&(*mt)->mcs_mt);
|
||||
intel_resolve_map_clear(&(*mt)->hiz_map);
|
||||
|
||||
@@ -2184,6 +2191,8 @@ intel_miptree_resolve_color(struct brw_context *brw,
|
||||
* then discard the MCS buffer, if present. We also set the fast_clear_state
|
||||
* to INTEL_FAST_CLEAR_STATE_NO_MCS to ensure that no MCS buffer gets
|
||||
* allocated in the future.
|
||||
*
|
||||
* HiZ is similarly unsafe with shared buffers.
|
||||
*/
|
||||
void
|
||||
intel_miptree_make_shareable(struct brw_context *brw,
|
||||
@@ -2201,6 +2210,14 @@ intel_miptree_make_shareable(struct brw_context *brw,
|
||||
intel_miptree_release(&mt->mcs_mt);
|
||||
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS;
|
||||
}
|
||||
|
||||
if (mt->hiz_buf) {
|
||||
intel_miptree_all_slices_resolve_depth(brw, mt);
|
||||
intel_miptree_hiz_buffer_free(mt->hiz_buf);
|
||||
mt->hiz_buf = NULL;
|
||||
}
|
||||
|
||||
mt->disable_aux_buffers = true;
|
||||
}
|
||||
|
||||
|
||||
|
213
src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp
Normal file
213
src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp
Normal file
@@ -0,0 +1,213 @@
|
||||
/*
|
||||
* Copyright © 2016 Intel Corporation
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice (including the next
|
||||
* paragraph) shall be included in all copies or substantial portions of the
|
||||
* Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
||||
* IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include "brw_fs.h"
|
||||
#include "brw_cfg.h"
|
||||
#include "program/program.h"
|
||||
|
||||
using namespace brw;
|
||||
|
||||
class copy_propagation_test : public ::testing::Test {
|
||||
virtual void SetUp();
|
||||
|
||||
public:
|
||||
struct brw_compiler *compiler;
|
||||
struct brw_device_info *devinfo;
|
||||
struct gl_context *ctx;
|
||||
struct brw_wm_prog_data *prog_data;
|
||||
struct gl_shader_program *shader_prog;
|
||||
fs_visitor *v;
|
||||
};
|
||||
|
||||
class copy_propagation_fs_visitor : public fs_visitor
|
||||
{
|
||||
public:
|
||||
copy_propagation_fs_visitor(struct brw_compiler *compiler,
|
||||
struct brw_wm_prog_data *prog_data,
|
||||
nir_shader *shader)
|
||||
: fs_visitor(compiler, NULL, NULL, NULL,
|
||||
&prog_data->base, (struct gl_program *) NULL,
|
||||
shader, 8, -1) {}
|
||||
};
|
||||
|
||||
|
||||
void copy_propagation_test::SetUp()
|
||||
{
|
||||
ctx = (struct gl_context *)calloc(1, sizeof(*ctx));
|
||||
compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler));
|
||||
devinfo = (struct brw_device_info *)calloc(1, sizeof(*devinfo));
|
||||
compiler->devinfo = devinfo;
|
||||
|
||||
prog_data = ralloc(NULL, struct brw_wm_prog_data);
|
||||
nir_shader *shader =
|
||||
nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
|
||||
|
||||
v = new copy_propagation_fs_visitor(compiler, prog_data, shader);
|
||||
|
||||
devinfo->gen = 4;
|
||||
}
|
||||
|
||||
static fs_inst *
|
||||
instruction(bblock_t *block, int num)
|
||||
{
|
||||
fs_inst *inst = (fs_inst *)block->start();
|
||||
for (int i = 0; i < num; i++) {
|
||||
inst = (fs_inst *)inst->next;
|
||||
}
|
||||
return inst;
|
||||
}
|
||||
|
||||
static bool
|
||||
copy_propagation(fs_visitor *v)
|
||||
{
|
||||
const bool print = getenv("TEST_DEBUG");
|
||||
|
||||
if (print) {
|
||||
fprintf(stderr, "= Before =\n");
|
||||
v->cfg->dump(v);
|
||||
}
|
||||
|
||||
bool ret = v->opt_copy_propagation();
|
||||
|
||||
if (print) {
|
||||
fprintf(stderr, "\n= After =\n");
|
||||
v->cfg->dump(v);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
TEST_F(copy_propagation_test, basic)
|
||||
{
|
||||
const fs_builder &bld = v->bld;
|
||||
fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf3 = v->vgrf(glsl_type::float_type);
|
||||
bld.MOV(vgrf0, vgrf2);
|
||||
bld.ADD(vgrf1, vgrf0, vgrf3);
|
||||
|
||||
/* = Before =
|
||||
*
|
||||
* 0: mov(8) vgrf0 vgrf2
|
||||
* 1: add(8) vgrf1 vgrf0 vgrf3
|
||||
*
|
||||
* = After =
|
||||
* 0: mov(8) vgrf0 vgrf2
|
||||
* 1: add(8) vgrf1 vgrf2 vgrf3
|
||||
*/
|
||||
|
||||
v->calculate_cfg();
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_TRUE(copy_propagation(v));
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
fs_inst *mov = instruction(block0, 0);
|
||||
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
|
||||
EXPECT_TRUE(mov->dst.equals(vgrf0));
|
||||
EXPECT_TRUE(mov->src[0].equals(vgrf2));
|
||||
|
||||
fs_inst *add = instruction(block0, 1);
|
||||
EXPECT_EQ(BRW_OPCODE_ADD, add->opcode);
|
||||
EXPECT_TRUE(add->dst.equals(vgrf1));
|
||||
EXPECT_TRUE(add->src[0].equals(vgrf2));
|
||||
EXPECT_TRUE(add->src[1].equals(vgrf3));
|
||||
}
|
||||
|
||||
TEST_F(copy_propagation_test, maxmax_sat_imm)
|
||||
{
|
||||
const fs_builder &bld = v->bld;
|
||||
fs_reg vgrf0 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf1 = v->vgrf(glsl_type::float_type);
|
||||
fs_reg vgrf2 = v->vgrf(glsl_type::float_type);
|
||||
|
||||
static const struct {
|
||||
enum brw_conditional_mod conditional_mod;
|
||||
float immediate;
|
||||
bool expected_result;
|
||||
} test[] = {
|
||||
/* conditional mod, imm, expected_result */
|
||||
{ BRW_CONDITIONAL_GE , 0.1f, true },
|
||||
{ BRW_CONDITIONAL_L , 0.1f, true },
|
||||
{ BRW_CONDITIONAL_GE , 0.5f, true },
|
||||
{ BRW_CONDITIONAL_L , 0.5f, true },
|
||||
{ BRW_CONDITIONAL_GE , 0.9f, true },
|
||||
{ BRW_CONDITIONAL_L , 0.9f, true },
|
||||
{ BRW_CONDITIONAL_GE , -1.5f, false },
|
||||
{ BRW_CONDITIONAL_L , -1.5f, false },
|
||||
{ BRW_CONDITIONAL_GE , 1.5f, false },
|
||||
{ BRW_CONDITIONAL_L , 1.5f, false },
|
||||
|
||||
{ BRW_CONDITIONAL_NONE, 0.5f, false },
|
||||
{ BRW_CONDITIONAL_Z , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_NZ , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_G , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_LE , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_R , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_O , 0.5f, false },
|
||||
{ BRW_CONDITIONAL_U , 0.5f, false },
|
||||
};
|
||||
|
||||
for (unsigned i = 0; i < sizeof(test) / sizeof(test[0]); i++) {
|
||||
fs_inst *mov = set_saturate(true, bld.MOV(vgrf0, vgrf1));
|
||||
fs_inst *sel = set_condmod(test[i].conditional_mod,
|
||||
bld.SEL(vgrf2, vgrf0,
|
||||
brw_imm_f(test[i].immediate)));
|
||||
|
||||
v->calculate_cfg();
|
||||
|
||||
bblock_t *block0 = v->cfg->blocks[0];
|
||||
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(test[i].expected_result, copy_propagation(v));
|
||||
EXPECT_EQ(0, block0->start_ip);
|
||||
EXPECT_EQ(1, block0->end_ip);
|
||||
|
||||
EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode);
|
||||
EXPECT_TRUE(mov->saturate);
|
||||
EXPECT_TRUE(mov->dst.equals(vgrf0));
|
||||
EXPECT_TRUE(mov->src[0].equals(vgrf1));
|
||||
|
||||
EXPECT_EQ(BRW_OPCODE_SEL, sel->opcode);
|
||||
EXPECT_EQ(test[i].conditional_mod, sel->conditional_mod);
|
||||
EXPECT_EQ(test[i].expected_result, sel->saturate);
|
||||
EXPECT_TRUE(sel->dst.equals(vgrf2));
|
||||
if (test[i].expected_result) {
|
||||
EXPECT_TRUE(sel->src[0].equals(vgrf1));
|
||||
} else {
|
||||
EXPECT_TRUE(sel->src[0].equals(vgrf0));
|
||||
}
|
||||
EXPECT_TRUE(sel->src[1].equals(brw_imm_f(test[i].immediate)));
|
||||
|
||||
delete v->cfg;
|
||||
v->cfg = NULL;
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user