Compare commits
65 Commits
mesa-11.0.
...
mesa-11.0.
Author | SHA1 | Date | |
---|---|---|---|
|
261daab6b4 | ||
|
9f3bb782c6 | ||
|
94ac4b3e84 | ||
|
d126fffe9d | ||
|
4b4ca9ca38 | ||
|
9c9e843733 | ||
|
0f98683c83 | ||
|
eff2eea145 | ||
|
0452dcd92d | ||
|
86f18de1c0 | ||
|
494da6217b | ||
|
b8bac2f99b | ||
|
32a7c9c9fb | ||
|
a82422f4a4 | ||
|
6777c64548 | ||
|
b26945c2ed | ||
|
646be4a262 | ||
|
62060f0809 | ||
|
b6aeef5e43 | ||
|
3e1e68f2e1 | ||
|
6367271f75 | ||
|
e1a5b7a863 | ||
|
e9ebfebb79 | ||
|
58f914c506 | ||
|
00fec0e4e1 | ||
|
276bd08900 | ||
|
97df531987 | ||
|
0eff2eb2ae | ||
|
ac72425a1e | ||
|
fd7be2d07c | ||
|
09c4907da0 | ||
|
e7960ad194 | ||
|
e86c612691 | ||
|
10773ed249 | ||
|
ea8d4b0f4e | ||
|
f35a84ba31 | ||
|
4acb394f45 | ||
|
683d65dae3 | ||
|
147c3fbdb3 | ||
|
0ae22b3ebd | ||
|
34cbde2e63 | ||
|
0b82519b48 | ||
|
accb4cdb3b | ||
|
c18d27b720 | ||
|
b33f009557 | ||
|
9c049c3ff2 | ||
|
52aa4cc42d | ||
|
a322e3b115 | ||
|
fd2cf11ba8 | ||
|
65f8299459 | ||
|
538c06282f | ||
|
49eab2dfaf | ||
|
1d35278829 | ||
|
b8e398d4eb | ||
|
837f316ec7 | ||
|
75d5558cc3 | ||
|
d5420e7545 | ||
|
c124cda443 | ||
|
b21a5a37b8 | ||
|
b9dbe20910 | ||
|
3965a21e95 | ||
|
1ee592b095 | ||
|
ad43f5a524 | ||
|
9aee0ceb3a | ||
|
f9715bc449 |
@@ -3,3 +3,6 @@
|
||||
|
||||
# Somewhat of a mixed feature/bugfix patch, causing some 200 piglit regressions
|
||||
2b676570960277d47477822ffeccc672613f9142 gallium/swrast: fix front buffer blitting. (v2)
|
||||
|
||||
# causes regression in xwayland, kde/plasma, mpv, steam ... fdo#92759
|
||||
839793680f99b8387bee9489733d5071c10f3ace i965: Use MESA_FORMAT_B8G8R8X8_SRGB for RGB visuals
|
||||
|
19
configure.ac
19
configure.ac
@@ -97,7 +97,7 @@ AC_PROG_CXX
|
||||
AM_PROG_CC_C_O
|
||||
AM_PROG_AS
|
||||
AX_CHECK_GNU_MAKE
|
||||
AC_CHECK_PROGS([PYTHON2], [python2 python])
|
||||
AC_CHECK_PROGS([PYTHON2], [python2.7 python2 python])
|
||||
AC_PROG_SED
|
||||
AC_PROG_MKDIR_P
|
||||
|
||||
@@ -375,10 +375,11 @@ save_CFLAGS="$CFLAGS"
|
||||
CFLAGS="$SSE41_CFLAGS $CFLAGS"
|
||||
AC_COMPILE_IFELSE([AC_LANG_SOURCE([[
|
||||
#include <smmintrin.h>
|
||||
int param;
|
||||
int main () {
|
||||
__m128i a = _mm_set1_epi32 (0), b = _mm_set1_epi32 (0), c;
|
||||
__m128i a = _mm_set1_epi32 (param), b = _mm_set1_epi32 (param + 1), c;
|
||||
c = _mm_max_epu32(a, b);
|
||||
return 0;
|
||||
return _mm_cvtsi128_si32(c);
|
||||
}]])], SSE41_SUPPORTED=1)
|
||||
CFLAGS="$save_CFLAGS"
|
||||
if test "x$SSE41_SUPPORTED" = x1; then
|
||||
@@ -1529,7 +1530,15 @@ AC_ARG_WITH([clang-libdir],
|
||||
[CLANG_LIBDIR=''])
|
||||
|
||||
PKG_CHECK_EXISTS([libclc], [have_libclc=yes], [have_libclc=no])
|
||||
AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;ELF_LIB=-lelf])
|
||||
PKG_CHECK_MODULES([LIBELF], [libelf], [have_libelf=yes], [have_libelf=no])
|
||||
|
||||
if test "x$have_libelf" = xno; then
|
||||
LIBELF_LIBS=''
|
||||
LIBELF_CFLAGS=''
|
||||
AC_CHECK_LIB([elf], [elf_memory], [have_libelf=yes;LIBELF_LIBS=-lelf], [have_libelf=no])
|
||||
AC_SUBST([LIBELF_LIBS])
|
||||
AC_SUBST([LIBELF_CFLAGS])
|
||||
fi
|
||||
|
||||
if test "x$enable_opencl" = xyes; then
|
||||
if test -z "$with_gallium_drivers"; then
|
||||
@@ -2127,8 +2136,6 @@ if test "x$USE_VC4_SIMULATOR" = xyes -a "x$HAVE_GALLIUM_ILO" = xyes; then
|
||||
AC_MSG_ERROR([VC4 simulator on x86 replaces i965 driver build, so ilo must be disabled.])
|
||||
fi
|
||||
|
||||
AC_SUBST([ELF_LIB])
|
||||
|
||||
AM_CONDITIONAL(HAVE_LIBDRM, test "x$have_libdrm" = xyes)
|
||||
AM_CONDITIONAL(HAVE_X11_DRIVER, test "x$enable_xlib_glx" = xyes)
|
||||
AM_CONDITIONAL(HAVE_OSMESA, test "x$enable_osmesa" = xyes)
|
||||
|
@@ -31,7 +31,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
07c27004ff68b288097d17b2faa7bdf15ec73c96b7e6c9835266e544adf0a62f mesa-11.0.7.tar.gz
|
||||
e7e90a332ede6c8fd08eff90786a3fd1605a4e62ebf3a9b514047838194538cb mesa-11.0.7.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
199
docs/relnotes/11.0.8.html
Normal file
199
docs/relnotes/11.0.8.html
Normal file
@@ -0,0 +1,199 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.8 Release Notes / December 9, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.8 is a bug fix release which fixes bugs found since the 11.0.7 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.8 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91806">Bug 91806</a> - configure does not test whether assembler supports sse4.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92849">Bug 92849</a> - [IVB HSW BDW] piglit image load/store load-from-cleared-image.shader_test fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92909">Bug 92909</a> - Offset/alignment issue with layout std140 and vec3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93004">Bug 93004</a> - Guild Wars 2 crash on nouveau DX11 cards</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93215">Bug 93215</a> - [Regression bisected] Ogles1conform Automatic mipmap generation test is fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=93266">Bug 93266</a> - gl_arb_shading_language_420pack does not allow binding of image variables</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Boyuan Zhang (1):</p>
|
||||
<ul>
|
||||
<li>radeon/uvd: uv pitch separation for stoney</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (9):</p>
|
||||
<ul>
|
||||
<li>r600: do SQ flush ES ring rolling workaround</li>
|
||||
<li>r600: SMX returns CONTEXT_DONE early workaround</li>
|
||||
<li>r600/shader: split address get out to a function.</li>
|
||||
<li>r600/shader: add utility functions to do single slot arithmatic</li>
|
||||
<li>r600g: fix geom shader input indirect indexing.</li>
|
||||
<li>r600: handle geometry dynamic input array index</li>
|
||||
<li>radeonsi: handle doubles in lds load path.</li>
|
||||
<li>mesa/varray: set double arrays to non-normalised.</li>
|
||||
<li>mesa/shader: return correct attribute location for double matrix arrays</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (8):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.7</li>
|
||||
<li>cherry-ignore: don't pick a specific i965 formats patch</li>
|
||||
<li>Revert "i965/nir: Remove unused indirect handling"</li>
|
||||
<li>Revert "i965/state: Get rid of dword_pitch arguments to buffer functions"</li>
|
||||
<li>Revert "i965/vec4: Use a stride of 1 and byte offsets for UBOs"</li>
|
||||
<li>Revert "i965/fs: Use a stride of 1 and byte offsets for UBOs"</li>
|
||||
<li>Revert "i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge"</li>
|
||||
<li>Update version to 11.0.8</li>
|
||||
</ul>
|
||||
|
||||
<p>Francisco Jerez (1):</p>
|
||||
<ul>
|
||||
<li>i965: Resolve color and flush for all active shader images in intel_update_state().</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (1):</p>
|
||||
<ul>
|
||||
<li>meta/generate_mipmap: Work-around GLES 1.x problem with GL_DRAW_FRAMEBUFFER</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (17):</p>
|
||||
<ul>
|
||||
<li>freedreno/a4xx: support lod_bias</li>
|
||||
<li>freedreno/a4xx: fix 5_5_5_1 texture sampler format</li>
|
||||
<li>freedreno/a4xx: point regid to "red" even for alpha-only rb formats</li>
|
||||
<li>nvc0/ir: fold postfactor into immediate</li>
|
||||
<li>nv50/ir: deal with loops with no breaks</li>
|
||||
<li>nv50/ir: the mad source might not have a defining instruction</li>
|
||||
<li>nv50/ir: fix instruction permutation logic</li>
|
||||
<li>nv50/ir: don't forget to mark flagsDef on cvt in txb lowering</li>
|
||||
<li>nv50/ir: fix DCE to not generate 96-bit loads</li>
|
||||
<li>nv50/ir: avoid looking at uninitialized srcMods entries</li>
|
||||
<li>gk110/ir: fix imul hi emission with limm arg</li>
|
||||
<li>gk104/ir: sampler doesn't matter for txf</li>
|
||||
<li>gk110/ir: fix imad sat/hi flag emission for immediate args</li>
|
||||
<li>nv50/ir: fix cutoff for using r63 vs r127 when replacing zero</li>
|
||||
<li>nv50/ir: can't have predication and immediates</li>
|
||||
<li>glsl: assign varying locations to tess shaders when doing SSO</li>
|
||||
<li>ttn: add TEX2 support</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (5):</p>
|
||||
<ul>
|
||||
<li>i965/vec4: Use byte offsets for UBO pulls on Sandy Bridge</li>
|
||||
<li>i965/fs: Use a stride of 1 and byte offsets for UBOs</li>
|
||||
<li>i965/vec4: Use a stride of 1 and byte offsets for UBOs</li>
|
||||
<li>i965/state: Get rid of dword_pitch arguments to buffer functions</li>
|
||||
<li>i965/nir: Remove unused indirect handling</li>
|
||||
</ul>
|
||||
|
||||
<p>Jonathan Gray (2):</p>
|
||||
<ul>
|
||||
<li>configure.ac: use pkg-config for libelf</li>
|
||||
<li>configure: check for python2.7 for PYTHON2</li>
|
||||
</ul>
|
||||
|
||||
<p>Kenneth Graunke (2):</p>
|
||||
<ul>
|
||||
<li>i965: Fix fragment shader struct inputs.</li>
|
||||
<li>i965: Fix scalar vertex shader struct outputs.</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (8):</p>
|
||||
<ul>
|
||||
<li>radeonsi: fix occlusion queries on Fiji</li>
|
||||
<li>radeonsi: fix a hang due to uninitialized border color registers</li>
|
||||
<li>radeonsi: fix Fiji for LLVM <= 3.7</li>
|
||||
<li>radeonsi: don't call of u_prims_for_vertices for patches and rectangles</li>
|
||||
<li>radeonsi: apply the streamout workaround to Fiji as well</li>
|
||||
<li>gallium/radeon: fix Hyper-Z hangs by programming PA_SC_MODE_CNTL_1 correctly</li>
|
||||
<li>tgsi/scan: add flag colors_written</li>
|
||||
<li>r600g: write all MRTs only if there is exactly one output (fixes a hang)</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Turner (1):</p>
|
||||
<ul>
|
||||
<li>glsl: Allow binding of image variables with 420pack.</li>
|
||||
</ul>
|
||||
|
||||
<p>Neil Roberts (2):</p>
|
||||
<ul>
|
||||
<li>i965: Add MESA_FORMAT_B8G8R8X8_SRGB to brw_format_for_mesa_format</li>
|
||||
<li>i965: Add B8G8R8X8_SRGB to the alpha format override</li>
|
||||
</ul>
|
||||
|
||||
<p>Oded Gabbay (1):</p>
|
||||
<ul>
|
||||
<li>configura.ac: fix test for SSE4.1 assembler support</li>
|
||||
</ul>
|
||||
|
||||
<p>Patrick Rudolph (2):</p>
|
||||
<ul>
|
||||
<li>nv50,nvc0: fix use-after-free when vertex buffers are unbound</li>
|
||||
<li>gallium/util: return correct number of bound vertex buffers</li>
|
||||
</ul>
|
||||
|
||||
<p>Samuel Pitoiset (1):</p>
|
||||
<ul>
|
||||
<li>nvc0: free memory allocated by the prog which reads MP perf counters</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (1):</p>
|
||||
<ul>
|
||||
<li>i965: use _Shader to get fragment program when updating surface state</li>
|
||||
</ul>
|
||||
|
||||
<p>Tom Stellard (2):</p>
|
||||
<ul>
|
||||
<li>radeonsi: Rename si_shader::ls_rsrc{1,2} to si_shader::rsrc{1,2}</li>
|
||||
<li>radeonsi/compute: Use the compiler's COMPUTE_PGM_RSRC* register values</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -1087,6 +1087,11 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
|
||||
op = nir_texop_tex;
|
||||
num_srcs = 1;
|
||||
break;
|
||||
case TGSI_OPCODE_TEX2:
|
||||
op = nir_texop_tex;
|
||||
num_srcs = 1;
|
||||
samp = 2;
|
||||
break;
|
||||
case TGSI_OPCODE_TXP:
|
||||
op = nir_texop_tex;
|
||||
num_srcs = 2;
|
||||
@@ -1242,10 +1247,12 @@ ttn_tex(struct ttn_compile *c, nir_alu_dest dest, nir_ssa_def **src)
|
||||
}
|
||||
|
||||
if (instr->is_shadow) {
|
||||
if (instr->coord_components < 3)
|
||||
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
|
||||
else
|
||||
if (instr->coord_components == 4)
|
||||
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[1], X));
|
||||
else if (instr->coord_components == 3)
|
||||
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], W));
|
||||
else
|
||||
instr->src[src_number].src = nir_src_for_ssa(ttn_channel(b, src[0], Z));
|
||||
|
||||
instr->src[src_number].src_type = nir_tex_src_comparitor;
|
||||
src_number++;
|
||||
@@ -1651,6 +1658,7 @@ ttn_emit_instruction(struct ttn_compile *c)
|
||||
case TGSI_OPCODE_TXL:
|
||||
case TGSI_OPCODE_TXB:
|
||||
case TGSI_OPCODE_TXD:
|
||||
case TGSI_OPCODE_TEX2:
|
||||
case TGSI_OPCODE_TXL2:
|
||||
case TGSI_OPCODE_TXB2:
|
||||
case TGSI_OPCODE_TXQ_LZ:
|
||||
|
@@ -258,6 +258,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens,
|
||||
info->output_semantic_index[reg] = (ubyte) semIndex;
|
||||
info->num_outputs++;
|
||||
|
||||
if (semName == TGSI_SEMANTIC_COLOR)
|
||||
info->colors_written |= 1 << semIndex;
|
||||
|
||||
if (procType == TGSI_PROCESSOR_VERTEX ||
|
||||
procType == TGSI_PROCESSOR_GEOMETRY ||
|
||||
procType == TGSI_PROCESSOR_TESS_CTRL ||
|
||||
|
@@ -76,6 +76,7 @@ struct tgsi_shader_info
|
||||
|
||||
uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */
|
||||
|
||||
ubyte colors_written;
|
||||
boolean reads_position; /**< does fragment shader read position? */
|
||||
boolean reads_z; /**< does fragment shader read depth? */
|
||||
boolean writes_z; /**< does fragment shader write Z value? */
|
||||
|
@@ -81,7 +81,13 @@ void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
|
||||
const struct pipe_vertex_buffer *src,
|
||||
unsigned start_slot, unsigned count)
|
||||
{
|
||||
uint32_t enabled_buffers = (1ull << *dst_count) - 1;
|
||||
unsigned i;
|
||||
uint32_t enabled_buffers = 0;
|
||||
|
||||
for (i = 0; i < *dst_count; i++) {
|
||||
if (dst[i].buffer || dst[i].user_buffer)
|
||||
enabled_buffers |= (1ull << i);
|
||||
}
|
||||
|
||||
util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
|
||||
count);
|
||||
|
@@ -153,7 +153,7 @@ enum a4xx_vtx_fmt {
|
||||
|
||||
enum a4xx_tex_fmt {
|
||||
TFMT4_5_6_5_UNORM = 11,
|
||||
TFMT4_5_5_5_1_UNORM = 10,
|
||||
TFMT4_5_5_5_1_UNORM = 9,
|
||||
TFMT4_4_4_4_4_UNORM = 8,
|
||||
TFMT4_X8Z24_UNORM = 71,
|
||||
TFMT4_10_10_10_2_UNORM = 33,
|
||||
@@ -2718,6 +2718,12 @@ static inline uint32_t A4XX_TEX_SAMP_0_ANISO(enum a4xx_tex_aniso val)
|
||||
{
|
||||
return ((val) << A4XX_TEX_SAMP_0_ANISO__SHIFT) & A4XX_TEX_SAMP_0_ANISO__MASK;
|
||||
}
|
||||
#define A4XX_TEX_SAMP_0_LOD_BIAS__MASK 0xfff80000
|
||||
#define A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT 19
|
||||
static inline uint32_t A4XX_TEX_SAMP_0_LOD_BIAS(float val)
|
||||
{
|
||||
return ((((int32_t)(val * 256.0))) << A4XX_TEX_SAMP_0_LOD_BIAS__SHIFT) & A4XX_TEX_SAMP_0_LOD_BIAS__MASK;
|
||||
}
|
||||
|
||||
#define REG_A4XX_TEX_SAMP_1 0x00000001
|
||||
#define A4XX_TEX_SAMP_1_COMPARE_FUNC__MASK 0x0000000e
|
||||
|
@@ -250,14 +250,6 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
|
||||
}
|
||||
}
|
||||
|
||||
/* adjust regids for alpha output formats. there is no alpha render
|
||||
* format, so it's just treated like red
|
||||
*/
|
||||
for (i = 0; i < nr; i++)
|
||||
if (util_format_is_alpha(pipe_surface_format(bufs[i])))
|
||||
color_regid[i] += 3;
|
||||
|
||||
|
||||
/* TODO get these dynamically: */
|
||||
face_regid = s[FS].v->frag_face ? regid(0,0) : regid(63,0);
|
||||
coord_regid = s[FS].v->frag_coord ? regid(0,0) : regid(63,0);
|
||||
|
@@ -111,6 +111,7 @@ fd4_sampler_state_create(struct pipe_context *pctx,
|
||||
COND(!cso->normalized_coords, A4XX_TEX_SAMP_1_UNNORM_COORDS);
|
||||
|
||||
if (cso->min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
|
||||
so->texsamp0 |= A4XX_TEX_SAMP_0_LOD_BIAS(cso->lod_bias);
|
||||
so->texsamp1 |=
|
||||
A4XX_TEX_SAMP_1_MIN_LOD(cso->min_lod) |
|
||||
A4XX_TEX_SAMP_1_MAX_LOD(cso->max_lod);
|
||||
|
@@ -291,7 +291,7 @@ void BasicBlock::permuteAdjacent(Instruction *a, Instruction *b)
|
||||
|
||||
if (b->prev)
|
||||
b->prev->next = b;
|
||||
if (a->prev)
|
||||
if (a->next)
|
||||
a->next->prev = a;
|
||||
}
|
||||
|
||||
|
@@ -575,8 +575,8 @@ CodeEmitterGK110::emitIMUL(const Instruction *i)
|
||||
if (isLIMM(i->src(1), TYPE_S32)) {
|
||||
emitForm_L(i, 0x280, 2, Modifier(0));
|
||||
|
||||
assert(i->subOp != NV50_IR_SUBOP_MUL_HIGH);
|
||||
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
code[1] |= 1 << 24;
|
||||
if (i->sType == TYPE_S32)
|
||||
code[1] |= 3 << 25;
|
||||
} else {
|
||||
@@ -695,14 +695,9 @@ CodeEmitterGK110::emitIMAD(const Instruction *i)
|
||||
if (i->sType == TYPE_S32)
|
||||
code[1] |= (1 << 19) | (1 << 24);
|
||||
|
||||
if (code[0] & 0x1) {
|
||||
assert(!i->subOp);
|
||||
SAT_(39);
|
||||
} else {
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
code[1] |= 1 << 25;
|
||||
SAT_(35);
|
||||
}
|
||||
if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
|
||||
code[1] |= 1 << 25;
|
||||
SAT_(35);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -2870,6 +2870,12 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
|
||||
bb->cfg.attach(&loopBB->cfg, Graph::Edge::BACK);
|
||||
}
|
||||
setPosition(reinterpret_cast<BasicBlock *>(breakBBs.pop().u.p), true);
|
||||
|
||||
// If the loop never breaks (e.g. only has RET's inside), then there
|
||||
// will be no way to get to the break bb. However BGNLOOP will have
|
||||
// already made a PREBREAK to it, so it must be in the CFG.
|
||||
if (getBB()->cfg.incidentCount() == 0)
|
||||
loopBB->cfg.attach(&getBB()->cfg, Graph::Edge::TREE);
|
||||
}
|
||||
break;
|
||||
case TGSI_OPCODE_BRK:
|
||||
|
@@ -202,7 +202,8 @@ NV50LegalizePostRA::visit(Function *fn)
|
||||
Program *prog = fn->getProgram();
|
||||
|
||||
r63 = new_LValue(fn, FILE_GPR);
|
||||
if (prog->maxGPR < 63)
|
||||
// GPR units on nv50 are in half-regs
|
||||
if (prog->maxGPR < 126)
|
||||
r63->reg.data.id = 63;
|
||||
else
|
||||
r63->reg.data.id = 127;
|
||||
@@ -831,7 +832,7 @@ NV50LoweringPreSSA::handleTXB(TexInstruction *i)
|
||||
}
|
||||
Value *flags = bld.getScratch(1, FILE_FLAGS);
|
||||
bld.setPosition(cond, true);
|
||||
bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0));
|
||||
bld.mkCvt(OP_CVT, TYPE_U8, flags, TYPE_U32, cond->getDef(0))->flagsDef = 0;
|
||||
|
||||
Instruction *tex[4];
|
||||
for (l = 0; l < 4; ++l) {
|
||||
|
@@ -686,7 +686,7 @@ NVC0LoweringPass::handleTEX(TexInstruction *i)
|
||||
i->tex.s = 0x1f;
|
||||
i->setIndirectR(hnd);
|
||||
i->setIndirectS(NULL);
|
||||
} else if (i->tex.r == i->tex.s) {
|
||||
} else if (i->tex.r == i->tex.s || i->op == OP_TXF) {
|
||||
i->tex.r += prog->driver->io.texBindBase / 4;
|
||||
i->tex.s = 0; // only a single cX[] value possible here
|
||||
} else {
|
||||
|
@@ -842,6 +842,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
|
||||
i->src(0).mod = i->src(t).mod;
|
||||
i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
|
||||
i->src(1).mod = 0;
|
||||
} else
|
||||
if (i->postFactor && i->sType == TYPE_F32) {
|
||||
/* Can't emit a postfactor with an immediate, have to fold it in */
|
||||
i->setSrc(s, new_ImmediateValue(
|
||||
prog, imm0.reg.data.f32 * exp2f(i->postFactor)));
|
||||
i->postFactor = 0;
|
||||
}
|
||||
break;
|
||||
case OP_MAD:
|
||||
@@ -2606,8 +2612,11 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
|
||||
i->getSrc(0)->reg.data.id >= 64)
|
||||
break;
|
||||
|
||||
if (i->getPredicate())
|
||||
break;
|
||||
|
||||
def = i->getSrc(1)->getInsn();
|
||||
if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
|
||||
if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
|
||||
vtmp = i->getSrc(1);
|
||||
i->setSrc(1, def->getSrc(0));
|
||||
|
||||
@@ -2909,6 +2918,16 @@ DeadCodeElim::visit(BasicBlock *bb)
|
||||
return true;
|
||||
}
|
||||
|
||||
// Each load can go into up to 4 destinations, any of which might potentially
|
||||
// be dead (i.e. a hole). These can always be split into 2 loads, independent
|
||||
// of where the holes are. We find the first contiguous region, put it into
|
||||
// the first load, and then put the second contiguous region into the second
|
||||
// load. There can be at most 2 contiguous regions.
|
||||
//
|
||||
// Note that there are some restrictions, for example it's not possible to do
|
||||
// a 64-bit load that's not 64-bit aligned, so such a load has to be split
|
||||
// up. Also hardware doesn't support 96-bit loads, so those also have to be
|
||||
// split into a 64-bit and 32-bit load.
|
||||
void
|
||||
DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
||||
{
|
||||
@@ -2929,6 +2948,8 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
||||
addr1 = ld1->getSrc(0)->reg.data.offset;
|
||||
n1 = n2 = 0;
|
||||
size1 = size2 = 0;
|
||||
|
||||
// Compute address/width for first load
|
||||
for (d = 0; ld1->defExists(d); ++d) {
|
||||
if (mask & (1 << d)) {
|
||||
if (size1 && (addr1 & 0x7))
|
||||
@@ -2942,16 +2963,34 @@ DeadCodeElim::checkSplitLoad(Instruction *ld1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Scale back the size of the first load until it can be loaded. This
|
||||
// typically happens for TYPE_B96 loads.
|
||||
while (n1 &&
|
||||
!prog->getTarget()->isAccessSupported(ld1->getSrc(0)->reg.file,
|
||||
typeOfSize(size1))) {
|
||||
size1 -= def1[--n1]->reg.size;
|
||||
d--;
|
||||
}
|
||||
|
||||
// Compute address/width for second load
|
||||
for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
|
||||
if (mask & (1 << d)) {
|
||||
assert(!size2 || !(addr2 & 0x7));
|
||||
def2[n2] = ld1->getDef(d);
|
||||
size2 += def2[n2++]->reg.size;
|
||||
} else {
|
||||
} else if (!n2) {
|
||||
assert(!n2);
|
||||
addr2 += ld1->getDef(d)->reg.size;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Make sure that we've processed all the values
|
||||
for (; ld1->defExists(d); ++d)
|
||||
assert(!(mask & (1 << d)));
|
||||
|
||||
updateLdStOffset(ld1, addr1, func);
|
||||
ld1->setType(typeOfSize(size1));
|
||||
for (d = 0; d < 4; ++d)
|
||||
|
@@ -454,7 +454,7 @@ TargetNV50::isModSupported(const Instruction *insn, int s, Modifier mod) const
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (s >= 3)
|
||||
if (s >= opInfo[insn->op].srcNr || s >= 3)
|
||||
return false;
|
||||
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
|
||||
}
|
||||
|
@@ -426,7 +426,7 @@ TargetNVC0::isModSupported(const Instruction *insn, int s, Modifier mod) const
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (s >= 3)
|
||||
if (s >= opInfo[insn->op].srcNr || s >= 3)
|
||||
return false;
|
||||
return (mod & Modifier(opInfo[insn->op].srcMods[s])) == mod;
|
||||
}
|
||||
|
@@ -960,6 +960,9 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
unsigned i;
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
|
||||
nv50->dirty |= NV50_NEW_ARRAYS;
|
||||
|
||||
util_set_vertex_buffers_count(nv50->vtxbuf, &nv50->num_vtxbufs, vb,
|
||||
start_slot, count);
|
||||
|
||||
@@ -983,10 +986,6 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
|
||||
nv50->vbo_constant &= ~(1 << dst_index);
|
||||
}
|
||||
}
|
||||
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_VERTEX);
|
||||
|
||||
nv50->dirty |= NV50_NEW_ARRAYS;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -417,6 +417,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
|
||||
if (screen->pm.prog) {
|
||||
screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
|
||||
nvc0_program_destroy(NULL, screen->pm.prog);
|
||||
FREE(screen->pm.prog);
|
||||
}
|
||||
|
||||
nouveau_bo_ref(NULL, &screen->text);
|
||||
|
@@ -998,6 +998,9 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
|
||||
struct nvc0_context *nvc0 = nvc0_context(pipe);
|
||||
unsigned i;
|
||||
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
|
||||
nvc0->dirty |= NVC0_NEW_ARRAYS;
|
||||
|
||||
util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb,
|
||||
start_slot, count);
|
||||
|
||||
@@ -1021,9 +1024,6 @@ nvc0_set_vertex_buffers(struct pipe_context *pipe,
|
||||
nvc0->constant_vbos &= ~(1 << dst_index);
|
||||
}
|
||||
}
|
||||
|
||||
nvc0->dirty |= NVC0_NEW_ARRAYS;
|
||||
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -1527,12 +1527,17 @@ static void evergreen_emit_msaa_state(struct r600_context *rctx, int nr_samples,
|
||||
S_028C00_EXPAND_LINE_WIDTH(1)); /* R_028C00_PA_SC_LINE_CNTL */
|
||||
radeon_emit(cs, S_028C04_MSAA_NUM_SAMPLES(util_logbase2(nr_samples)) |
|
||||
S_028C04_MAX_SAMPLE_DIST(max_dist)); /* R_028C04_PA_SC_AA_CONFIG */
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
} else {
|
||||
r600_write_context_reg_seq(cs, R_028C00_PA_SC_LINE_CNTL, 2);
|
||||
radeon_emit(cs, S_028C00_LAST_PIXEL(1)); /* R_028C00_PA_SC_LINE_CNTL */
|
||||
radeon_emit(cs, 0); /* R_028C04_PA_SC_AA_CONFIG */
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -57,7 +57,7 @@
|
||||
|
||||
/* the number of CS dwords for flushing and drawing */
|
||||
#define R600_MAX_FLUSH_CS_DWORDS 16
|
||||
#define R600_MAX_DRAW_CS_DWORDS 47
|
||||
#define R600_MAX_DRAW_CS_DWORDS 52
|
||||
#define R600_TRACE_CS_DWORDS 7
|
||||
|
||||
#define R600_MAX_USER_CONST_BUFFERS 13
|
||||
|
@@ -598,6 +598,106 @@ static int select_twoside_color(struct r600_shader_ctx *ctx, int front, int back
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* execute a single slot ALU calculation */
|
||||
static int single_alu_op2(struct r600_shader_ctx *ctx, int op,
|
||||
int dst_sel, int dst_chan,
|
||||
int src0_sel, unsigned src0_chan_val,
|
||||
int src1_sel, unsigned src1_chan_val)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r, i;
|
||||
|
||||
if (ctx->bc->chip_class == CAYMAN && op == ALU_OP2_MULLO_INT) {
|
||||
for (i = 0; i < 4; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.src[0].sel = src0_sel;
|
||||
if (src0_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[0].value = src0_chan_val;
|
||||
else
|
||||
alu.src[0].chan = src0_chan_val;
|
||||
alu.src[1].sel = src1_sel;
|
||||
if (src1_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[1].value = src1_chan_val;
|
||||
else
|
||||
alu.src[1].chan = src1_chan_val;
|
||||
alu.dst.sel = dst_sel;
|
||||
alu.dst.chan = i;
|
||||
alu.dst.write = i == dst_chan;
|
||||
alu.last = (i == 3);
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.src[0].sel = src0_sel;
|
||||
if (src0_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[0].value = src0_chan_val;
|
||||
else
|
||||
alu.src[0].chan = src0_chan_val;
|
||||
alu.src[1].sel = src1_sel;
|
||||
if (src1_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[1].value = src1_chan_val;
|
||||
else
|
||||
alu.src[1].chan = src1_chan_val;
|
||||
alu.dst.sel = dst_sel;
|
||||
alu.dst.chan = dst_chan;
|
||||
alu.dst.write = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* execute a single slot ALU calculation */
|
||||
static int single_alu_op3(struct r600_shader_ctx *ctx, int op,
|
||||
int dst_sel, int dst_chan,
|
||||
int src0_sel, unsigned src0_chan_val,
|
||||
int src1_sel, unsigned src1_chan_val,
|
||||
int src2_sel, unsigned src2_chan_val)
|
||||
{
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
|
||||
/* validate this for other ops */
|
||||
assert(op == ALU_OP3_MULADD_UINT24);
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = op;
|
||||
alu.src[0].sel = src0_sel;
|
||||
if (src0_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[0].value = src0_chan_val;
|
||||
else
|
||||
alu.src[0].chan = src0_chan_val;
|
||||
alu.src[1].sel = src1_sel;
|
||||
if (src1_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[1].value = src1_chan_val;
|
||||
else
|
||||
alu.src[1].chan = src1_chan_val;
|
||||
alu.src[2].sel = src2_sel;
|
||||
if (src2_sel == V_SQ_ALU_SRC_LITERAL)
|
||||
alu.src[2].value = src2_chan_val;
|
||||
else
|
||||
alu.src[2].chan = src2_chan_val;
|
||||
alu.dst.sel = dst_sel;
|
||||
alu.dst.chan = dst_chan;
|
||||
alu.is_op3 = 1;
|
||||
alu.last = 1;
|
||||
r = r600_bytecode_add_alu(ctx->bc, &alu);
|
||||
if (r)
|
||||
return r;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int get_address_file_reg(struct r600_shader_ctx *ctx, int index)
|
||||
{
|
||||
return index > 0 ? ctx->bc->index_reg[index - 1] : ctx->bc->ar_reg;
|
||||
}
|
||||
|
||||
static int vs_add_primid_output(struct r600_shader_ctx *ctx, int prim_id_sid)
|
||||
{
|
||||
int i;
|
||||
@@ -1129,6 +1229,7 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
unsigned vtx_id = src->Dimension.Index;
|
||||
int offset_reg = vtx_id / 3;
|
||||
int offset_chan = vtx_id % 3;
|
||||
int t2 = 0;
|
||||
|
||||
/* offsets of per-vertex data in ESGS ring are passed to GS in R0.x, R0.y,
|
||||
* R0.w, R1.x, R1.y, R1.z (it seems R0.z is used for PrimitiveID) */
|
||||
@@ -1136,13 +1237,24 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
if (offset_reg == 0 && offset_chan == 2)
|
||||
offset_chan = 3;
|
||||
|
||||
if (src->Dimension.Indirect || src->Register.Indirect)
|
||||
t2 = r600_get_temp(ctx);
|
||||
|
||||
if (src->Dimension.Indirect) {
|
||||
int treg[3];
|
||||
int t2;
|
||||
struct r600_bytecode_alu alu;
|
||||
int r, i;
|
||||
|
||||
/* you have got to be shitting me -
|
||||
unsigned addr_reg;
|
||||
addr_reg = get_address_file_reg(ctx, src->DimIndirect.Index);
|
||||
if (src->DimIndirect.Index > 0) {
|
||||
r = single_alu_op2(ctx, ALU_OP1_MOV,
|
||||
ctx->bc->ar_reg, 0,
|
||||
addr_reg, 0,
|
||||
0, 0);
|
||||
if (r)
|
||||
return r;
|
||||
}
|
||||
/*
|
||||
we have to put the R0.x/y/w into Rt.x Rt+1.x Rt+2.x then index reg from Rt.
|
||||
at least this is what fglrx seems to do. */
|
||||
for (i = 0; i < 3; i++) {
|
||||
@@ -1150,7 +1262,6 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
}
|
||||
r600_add_gpr_array(ctx->shader, treg[0], 3, 0x0F);
|
||||
|
||||
t2 = r600_get_temp(ctx);
|
||||
for (i = 0; i < 3; i++) {
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
alu.op = ALU_OP1_MOV;
|
||||
@@ -1175,8 +1286,33 @@ static int fetch_gs_input(struct r600_shader_ctx *ctx, struct tgsi_full_src_regi
|
||||
if (r)
|
||||
return r;
|
||||
offset_reg = t2;
|
||||
offset_chan = 0;
|
||||
}
|
||||
|
||||
if (src->Register.Indirect) {
|
||||
int addr_reg;
|
||||
unsigned first = ctx->info.input_array_first[src->Indirect.ArrayID];
|
||||
|
||||
addr_reg = get_address_file_reg(ctx, src->Indirect.Index);
|
||||
|
||||
/* pull the value from index_reg */
|
||||
r = single_alu_op2(ctx, ALU_OP2_ADD_INT,
|
||||
t2, 1,
|
||||
addr_reg, 0,
|
||||
V_SQ_ALU_SRC_LITERAL, first);
|
||||
if (r)
|
||||
return r;
|
||||
r = single_alu_op3(ctx, ALU_OP3_MULADD_UINT24,
|
||||
t2, 0,
|
||||
t2, 1,
|
||||
V_SQ_ALU_SRC_LITERAL, 4,
|
||||
offset_reg, offset_chan);
|
||||
if (r)
|
||||
return r;
|
||||
offset_reg = t2;
|
||||
offset_chan = 0;
|
||||
index = src->Register.Index - first;
|
||||
}
|
||||
|
||||
memset(&vtx, 0, sizeof(vtx));
|
||||
vtx.buffer_id = R600_GS_RING_CONST_BUFFER;
|
||||
@@ -1222,6 +1358,7 @@ static int tgsi_split_gs_inputs(struct r600_shader_ctx *ctx)
|
||||
|
||||
fetch_gs_input(ctx, src, treg);
|
||||
ctx->src[i].sel = treg;
|
||||
ctx->src[i].rel = 0;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
@@ -1972,7 +2109,9 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
|
||||
ctx.nliterals = 0;
|
||||
ctx.literals = NULL;
|
||||
shader->fs_write_all = FALSE;
|
||||
|
||||
shader->fs_write_all = ctx.info.properties[TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS] &&
|
||||
ctx.info.colors_written == 1;
|
||||
|
||||
if (shader->vs_as_gs_a)
|
||||
vs_add_primid_output(&ctx, key.vs.prim_id_out);
|
||||
@@ -2003,10 +2142,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
case TGSI_TOKEN_TYPE_PROPERTY:
|
||||
property = &ctx.parse.FullToken.FullProperty;
|
||||
switch (property->Property.PropertyName) {
|
||||
case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
|
||||
if (property->u[0].Data == 1)
|
||||
shader->fs_write_all = TRUE;
|
||||
break;
|
||||
case TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION:
|
||||
if (property->u[0].Data == 1)
|
||||
shader->vs_position_window_space = TRUE;
|
||||
@@ -6675,7 +6810,7 @@ static int tgsi_eg_arl(struct r600_shader_ctx *ctx)
|
||||
struct r600_bytecode_alu alu;
|
||||
int r;
|
||||
int i, lasti = tgsi_last_instruction(inst->Dst[0].Register.WriteMask);
|
||||
unsigned reg = inst->Dst[0].Register.Index > 0 ? ctx->bc->index_reg[inst->Dst[0].Register.Index - 1] : ctx->bc->ar_reg;
|
||||
unsigned reg = get_address_file_reg(ctx, inst->Dst[0].Register.Index);
|
||||
|
||||
assert(inst->Dst[0].Register.Index < 3);
|
||||
memset(&alu, 0, sizeof(struct r600_bytecode_alu));
|
||||
|
@@ -1691,6 +1691,24 @@ static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info
|
||||
(info.count_from_stream_output ? S_0287F0_USE_OPAQUE(1) : 0);
|
||||
}
|
||||
|
||||
/* SMX returns CONTEXT_DONE too early workaround */
|
||||
if (rctx->b.family == CHIP_R600 ||
|
||||
rctx->b.family == CHIP_RV610 ||
|
||||
rctx->b.family == CHIP_RV630 ||
|
||||
rctx->b.family == CHIP_RV635) {
|
||||
/* if we have gs shader or streamout
|
||||
we need to do a wait idle after every draw */
|
||||
if (rctx->gs_shader || rctx->b.streamout.streamout_enabled) {
|
||||
r600_write_config_reg(cs, R_008040_WAIT_UNTIL, S_008040_WAIT_3D_IDLE(1));
|
||||
}
|
||||
}
|
||||
|
||||
/* ES ring rolling over at EOP - workaround */
|
||||
if (rctx->b.chip_class == R600) {
|
||||
cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
|
||||
cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SQ_NON_EVENT);
|
||||
}
|
||||
|
||||
if (rctx->screen->b.trace_bo) {
|
||||
r600_trace_emit(rctx);
|
||||
}
|
||||
|
@@ -130,6 +130,7 @@
|
||||
#define EVENT_TYPE_SAMPLE_STREAMOUTSTATS 0x20
|
||||
#define EVENT_TYPE_FLUSH_AND_INV_DB_META 0x2c /* supported on r700+ */
|
||||
#define EVENT_TYPE_VGT_FLUSH 0x24
|
||||
#define EVENT_TYPE_SQ_NON_EVENT 0x26
|
||||
#define EVENT_TYPE_FLUSH_AND_INV_CB_META 46 /* supported on r700+ */
|
||||
#define EVENT_TYPE(x) ((x) << 0)
|
||||
#define EVENT_INDEX(x) ((x) << 8)
|
||||
|
@@ -16,7 +16,8 @@ libradeon_la_SOURCES = \
|
||||
if NEED_RADEON_LLVM
|
||||
|
||||
AM_CFLAGS += \
|
||||
$(LLVM_CFLAGS)
|
||||
$(LLVM_CFLAGS) \
|
||||
$(LIBELF_CFLAGS)
|
||||
|
||||
libradeon_la_SOURCES += \
|
||||
$(LLVM_C_FILES)
|
||||
@@ -24,7 +25,7 @@ libradeon_la_SOURCES += \
|
||||
libradeon_la_LIBADD = \
|
||||
$(CLOCK_LIB) \
|
||||
$(LLVM_LIBS) \
|
||||
$(ELF_LIB)
|
||||
$(LIBELF_LIBS)
|
||||
|
||||
libradeon_la_LDFLAGS = \
|
||||
$(LLVM_LDFLAGS)
|
||||
|
@@ -229,13 +229,17 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
||||
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1));
|
||||
EG_S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1) |
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
} else if (overrast_samples > 1) {
|
||||
r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
|
||||
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1) |
|
||||
S_028804_OVERRASTERIZATION_AMOUNT(log_samples));
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
}
|
||||
} else {
|
||||
r600_write_context_reg_seq(cs, CM_R_028BDC_PA_SC_LINE_CNTL, 2);
|
||||
@@ -245,6 +249,8 @@ void cayman_emit_msaa_config(struct radeon_winsys_cs *cs, int nr_samples,
|
||||
r600_write_context_reg(cs, CM_R_028804_DB_EQAA,
|
||||
S_028804_HIGH_QUALITY_INTERSECTIONS(1) |
|
||||
S_028804_STATIC_ANCHOR_ASSOCIATIONS(1));
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1, 0);
|
||||
r600_write_context_reg(cs, EG_R_028A4C_PA_SC_MODE_CNTL_1,
|
||||
EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) |
|
||||
EG_S_028A4C_FORCE_EOV_REZ_ENABLE(1));
|
||||
}
|
||||
}
|
||||
|
@@ -226,8 +226,8 @@ bool r600_common_context_init(struct r600_common_context *rctx,
|
||||
rctx->family = rscreen->family;
|
||||
rctx->chip_class = rscreen->chip_class;
|
||||
|
||||
if (rscreen->family == CHIP_HAWAII)
|
||||
rctx->max_db = 16;
|
||||
if (rscreen->chip_class >= CIK)
|
||||
rctx->max_db = MAX2(8, rscreen->info.r600_num_backends);
|
||||
else if (rscreen->chip_class >= EVERGREEN)
|
||||
rctx->max_db = 8;
|
||||
else
|
||||
@@ -543,10 +543,11 @@ const char *r600_get_llvm_processor_name(enum radeon_family family)
|
||||
case CHIP_TONGA: return "tonga";
|
||||
case CHIP_ICELAND: return "iceland";
|
||||
case CHIP_CARRIZO: return "carrizo";
|
||||
case CHIP_FIJI: return "fiji";
|
||||
#if HAVE_LLVM <= 0x0307
|
||||
case CHIP_FIJI: return "tonga";
|
||||
case CHIP_STONEY: return "carrizo";
|
||||
#else
|
||||
case CHIP_FIJI: return "fiji";
|
||||
case CHIP_STONEY: return "stoney";
|
||||
#endif
|
||||
default: return "";
|
||||
|
@@ -168,6 +168,8 @@
|
||||
|
||||
#define EG_R_028A4C_PA_SC_MODE_CNTL_1 0x028A4C
|
||||
#define EG_S_028A4C_PS_ITER_SAMPLE(x) (((x) & 0x1) << 16)
|
||||
#define EG_S_028A4C_FORCE_EOV_CNTDWN_ENABLE(x) (((x) & 0x1) << 25)
|
||||
#define EG_S_028A4C_FORCE_EOV_REZ_ENABLE(x) (((x) & 0x1) << 26)
|
||||
|
||||
#define CM_R_028804_DB_EQAA 0x00028804
|
||||
#define S_028804_MAX_ANCHOR_SAMPLES(x) (((x) & 0x7) << 0)
|
||||
|
@@ -951,6 +951,8 @@ static void ruvd_end_frame(struct pipe_video_codec *decoder,
|
||||
dec->msg->body.decode.db_pitch = dec->base.width;
|
||||
|
||||
dt = dec->set_dtb(dec->msg, (struct vl_video_buffer *)target);
|
||||
if (((struct r600_common_screen*)dec->screen)->family >= CHIP_STONEY)
|
||||
dec->msg->body.decode.dt_wa_chroma_top_offset = dec->msg->body.decode.dt_pitch / 2;
|
||||
|
||||
switch (u_reduce_video_profile(picture->profile)) {
|
||||
case PIPE_VIDEO_FORMAT_MPEG4_AVC:
|
||||
|
@@ -385,7 +385,10 @@ struct ruvd_msg {
|
||||
uint32_t dt_chroma_top_offset;
|
||||
uint32_t dt_chroma_bottom_offset;
|
||||
uint32_t dt_surf_tile_config;
|
||||
uint32_t dt_reserved[3];
|
||||
uint32_t dt_uv_surf_tile_config;
|
||||
// re-use dt_wa_chroma_top_offset as dt_ext_info for UV pitch in stoney
|
||||
uint32_t dt_wa_chroma_top_offset;
|
||||
uint32_t dt_wa_chroma_bottom_offset;
|
||||
|
||||
uint32_t reserved[16];
|
||||
|
||||
|
@@ -33,14 +33,6 @@
|
||||
#include "sid.h"
|
||||
|
||||
#define MAX_GLOBAL_BUFFERS 20
|
||||
#if HAVE_LLVM < 0x0305
|
||||
#define NUM_USER_SGPRS 2
|
||||
#else
|
||||
/* XXX: Even though we don't pass the scratch buffer via user sgprs any more
|
||||
* LLVM still expects that we specify 4 USER_SGPRS so it can remain compatible
|
||||
* with older mesa. */
|
||||
#define NUM_USER_SGPRS 4
|
||||
#endif
|
||||
|
||||
struct si_compute {
|
||||
struct si_context *ctx;
|
||||
@@ -241,7 +233,6 @@ static void si_launch_grid(
|
||||
uint64_t kernel_args_va;
|
||||
uint64_t scratch_buffer_va = 0;
|
||||
uint64_t shader_va;
|
||||
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
|
||||
unsigned i;
|
||||
struct si_shader *shader = &program->shader;
|
||||
unsigned lds_blocks;
|
||||
@@ -365,20 +356,7 @@ static void si_launch_grid(
|
||||
si_pm4_set_reg(pm4, R_00B830_COMPUTE_PGM_LO, (shader_va >> 8) & 0xffffffff);
|
||||
si_pm4_set_reg(pm4, R_00B834_COMPUTE_PGM_HI, shader_va >> 40);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1,
|
||||
/* We always use at least 3 VGPRS, these come from
|
||||
* TIDIG_COMP_CNT.
|
||||
* XXX: The compiler should account for this.
|
||||
*/
|
||||
S_00B848_VGPRS((MAX2(3, shader->num_vgprs) - 1) / 4)
|
||||
/* We always use at least 4 + arg_user_sgpr_count. The 4 extra
|
||||
* sgprs are from TGID_X_EN, TGID_Y_EN, TGID_Z_EN, TG_SIZE_EN
|
||||
* XXX: The compiler should account for this.
|
||||
*/
|
||||
| S_00B848_SGPRS(((MAX2(4 + arg_user_sgpr_count,
|
||||
shader->num_sgprs)) - 1) / 8)
|
||||
| S_00B028_FLOAT_MODE(shader->float_mode))
|
||||
;
|
||||
si_pm4_set_reg(pm4, R_00B848_COMPUTE_PGM_RSRC1, shader->rsrc1);
|
||||
|
||||
lds_blocks = shader->lds_size;
|
||||
/* XXX: We are over allocating LDS. For SI, the shader reports LDS in
|
||||
@@ -394,17 +372,10 @@ static void si_launch_grid(
|
||||
|
||||
assert(lds_blocks <= 0xFF);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2,
|
||||
S_00B84C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0)
|
||||
| S_00B84C_USER_SGPR(arg_user_sgpr_count)
|
||||
| S_00B84C_TGID_X_EN(1)
|
||||
| S_00B84C_TGID_Y_EN(1)
|
||||
| S_00B84C_TGID_Z_EN(1)
|
||||
| S_00B84C_TG_SIZE_EN(1)
|
||||
| S_00B84C_TIDIG_COMP_CNT(2)
|
||||
| S_00B84C_LDS_SIZE(lds_blocks)
|
||||
| S_00B84C_EXCP_EN(0))
|
||||
;
|
||||
shader->rsrc2 &= C_00B84C_LDS_SIZE;
|
||||
shader->rsrc2 |= S_00B84C_LDS_SIZE(lds_blocks);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B84C_COMPUTE_PGM_RSRC2, shader->rsrc2);
|
||||
si_pm4_set_reg(pm4, R_00B854_COMPUTE_RESOURCE_LIMITS, 0);
|
||||
|
||||
si_pm4_set_reg(pm4, R_00B858_COMPUTE_STATIC_THREAD_MGMT_SE0,
|
||||
|
@@ -637,6 +637,14 @@ static LLVMValueRef lds_load(struct lp_build_tgsi_context *bld_base,
|
||||
lp_build_const_int32(gallivm, swizzle));
|
||||
|
||||
value = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
|
||||
if (type == TGSI_TYPE_DOUBLE) {
|
||||
LLVMValueRef value2;
|
||||
dw_addr = lp_build_add(&bld_base->uint_bld, dw_addr,
|
||||
lp_build_const_int32(gallivm, swizzle + 1));
|
||||
value2 = build_indexed_load(si_shader_ctx, si_shader_ctx->lds, dw_addr);
|
||||
return radeon_llvm_emit_fetch_double(bld_base, value, value2);
|
||||
}
|
||||
|
||||
return LLVMBuildBitCast(gallivm->builder, value,
|
||||
tgsi2llvmtype(bld_base, type), "");
|
||||
}
|
||||
@@ -3752,12 +3760,14 @@ void si_shader_binary_read_config(const struct si_screen *sscreen,
|
||||
shader->num_sgprs = MAX2(shader->num_sgprs, (G_00B028_SGPRS(value) + 1) * 8);
|
||||
shader->num_vgprs = MAX2(shader->num_vgprs, (G_00B028_VGPRS(value) + 1) * 4);
|
||||
shader->float_mode = G_00B028_FLOAT_MODE(value);
|
||||
shader->rsrc1 = value;
|
||||
break;
|
||||
case R_00B02C_SPI_SHADER_PGM_RSRC2_PS:
|
||||
shader->lds_size = MAX2(shader->lds_size, G_00B02C_EXTRA_LDS_SIZE(value));
|
||||
break;
|
||||
case R_00B84C_COMPUTE_PGM_RSRC2:
|
||||
shader->lds_size = MAX2(shader->lds_size, G_00B84C_LDS_SIZE(value));
|
||||
shader->rsrc2 = value;
|
||||
break;
|
||||
case R_0286CC_SPI_PS_INPUT_ENA:
|
||||
shader->spi_ps_input_ena = value;
|
||||
|
@@ -268,8 +268,8 @@ struct si_shader {
|
||||
bool is_gs_copy_shader;
|
||||
bool dx10_clamp_mode; /* convert NaNs to 0 */
|
||||
|
||||
unsigned ls_rsrc1;
|
||||
unsigned ls_rsrc2;
|
||||
unsigned rsrc1;
|
||||
unsigned rsrc2;
|
||||
};
|
||||
|
||||
static inline struct tgsi_shader_info *si_get_vs_info(struct si_context *sctx)
|
||||
|
@@ -2979,6 +2979,28 @@ static void si_need_gfx_cs_space(struct pipe_context *ctx, unsigned num_dw,
|
||||
si_need_cs_space((struct si_context*)ctx, num_dw, include_draw_vbo);
|
||||
}
|
||||
|
||||
static void si_init_border_color_buffer(struct si_context *sctx)
|
||||
{
|
||||
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
if (!pm4)
|
||||
return;
|
||||
|
||||
assert(sctx->scratch_buffer == NULL);
|
||||
r600_resource_reference(&sctx->scratch_buffer, NULL);
|
||||
sctx->scratch_buffer = si_resource_create_custom(&sctx->screen->b.b,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
4096 * 16);
|
||||
|
||||
uint64_t va = sctx->scratch_buffer->gpu_address;
|
||||
|
||||
si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, va >> 8);
|
||||
if (sctx->b.chip_class >= CIK)
|
||||
si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, va >> 40);
|
||||
si_pm4_add_bo(pm4, sctx->scratch_buffer, RADEON_USAGE_READ,
|
||||
RADEON_PRIO_SHADER_DATA);
|
||||
si_pm4_set_state(sctx, ta_bordercolor_base, pm4);
|
||||
}
|
||||
|
||||
static void si_init_config(struct si_context *sctx);
|
||||
|
||||
void si_init_state_functions(struct si_context *sctx)
|
||||
@@ -3045,6 +3067,7 @@ void si_init_state_functions(struct si_context *sctx)
|
||||
}
|
||||
|
||||
si_init_config(sctx);
|
||||
si_init_border_color_buffer(sctx);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -163,7 +163,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
perpatch_output_offset = output_patch0_offset + pervertex_output_patch_size;
|
||||
|
||||
lds_size = output_patch0_offset + output_patch_size * *num_patches;
|
||||
ls_rsrc2 = ls->current->ls_rsrc2;
|
||||
ls_rsrc2 = ls->current->rsrc2;
|
||||
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
assert(lds_size <= 65536);
|
||||
@@ -178,7 +178,7 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
if (sctx->b.chip_class == CIK && sctx->b.family != CHIP_HAWAII)
|
||||
si_write_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, ls_rsrc2);
|
||||
si_write_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2);
|
||||
radeon_emit(cs, ls->current->ls_rsrc1);
|
||||
radeon_emit(cs, ls->current->rsrc1);
|
||||
radeon_emit(cs, ls_rsrc2);
|
||||
|
||||
/* Compute userdata SGPRs. */
|
||||
@@ -216,6 +216,18 @@ static void si_emit_derived_tess_state(struct si_context *sctx,
|
||||
radeon_emit(cs, tcs_out_layout | (num_tcs_output_cp << 26));
|
||||
}
|
||||
|
||||
static unsigned si_num_prims_for_vertices(const struct pipe_draw_info *info)
|
||||
{
|
||||
switch (info->mode) {
|
||||
case PIPE_PRIM_PATCHES:
|
||||
return info->count / info->vertices_per_patch;
|
||||
case R600_PRIM_RECTANGLE_LIST:
|
||||
return info->count / 3;
|
||||
default:
|
||||
return u_prims_for_vertices(info->mode, info->count);
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
||||
const struct pipe_draw_info *info,
|
||||
unsigned num_patches)
|
||||
@@ -305,7 +317,7 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
|
||||
if (sctx->b.screen->info.max_se >= 2 && ia_switch_on_eoi &&
|
||||
(info->indirect ||
|
||||
(info->instance_count > 1 &&
|
||||
u_prims_for_vertices(info->mode, info->count) <= 1)))
|
||||
si_num_prims_for_vertices(info) <= 1)))
|
||||
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
|
||||
/* Instancing bug on 2 SE chips. */
|
||||
@@ -849,7 +861,9 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
|
||||
/* Workaround for a VGT hang when streamout is enabled.
|
||||
* It must be done after drawing. */
|
||||
if ((sctx->b.family == CHIP_HAWAII || sctx->b.family == CHIP_TONGA) &&
|
||||
if ((sctx->b.family == CHIP_HAWAII ||
|
||||
sctx->b.family == CHIP_TONGA ||
|
||||
sctx->b.family == CHIP_FIJI) &&
|
||||
(sctx->b.streamout.streamout_enabled ||
|
||||
sctx->b.streamout.prims_gen_query_enabled)) {
|
||||
sctx->b.flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
|
||||
|
@@ -119,10 +119,10 @@ static void si_shader_ls(struct si_shader *shader)
|
||||
si_pm4_set_reg(pm4, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8);
|
||||
si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, va >> 40);
|
||||
|
||||
shader->ls_rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
|
||||
shader->rsrc1 = S_00B528_VGPRS((shader->num_vgprs - 1) / 4) |
|
||||
S_00B528_SGPRS((num_sgprs - 1) / 8) |
|
||||
S_00B528_VGPR_COMP_CNT(vgpr_comp_cnt);
|
||||
shader->ls_rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
|
||||
shader->rsrc2 = S_00B52C_USER_SGPR(num_user_sgprs) |
|
||||
S_00B52C_SCRATCH_EN(shader->scratch_bytes_per_wave > 0);
|
||||
}
|
||||
|
||||
|
@@ -2,6 +2,9 @@ include $(top_srcdir)/src/gallium/Automake.inc
|
||||
|
||||
lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
|
||||
|
||||
AM_CPPFLAGS = \
|
||||
$(LIBELF_CFLAGS)
|
||||
|
||||
lib@OPENCL_LIBNAME@_la_LDFLAGS = \
|
||||
$(LLVM_LDFLAGS) \
|
||||
-no-undefined \
|
||||
@@ -20,7 +23,7 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
|
||||
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
|
||||
$(top_builddir)/src/util/libmesautil.la \
|
||||
$(GALLIUM_PIPE_LOADER_WINSYS_LIBS) \
|
||||
$(ELF_LIB) \
|
||||
$(LIBELF_LIBS) \
|
||||
$(DLOPEN_LIBS) \
|
||||
-lclangCodeGen \
|
||||
-lclangFrontendTool \
|
||||
|
@@ -2281,7 +2281,9 @@ validate_binding_qualifier(struct _mesa_glsl_parse_state *state,
|
||||
|
||||
return false;
|
||||
}
|
||||
} else if (state->is_version(420, 310) && base_type->is_image()) {
|
||||
} else if ((state->is_version(420, 310) ||
|
||||
state->ARB_shading_language_420pack_enable) &&
|
||||
base_type->is_image()) {
|
||||
assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS);
|
||||
if (max_index >= ctx->Const.MaxImageUnits) {
|
||||
_mesa_glsl_error(loc, state, "Image binding %d exceeds the "
|
||||
|
@@ -3754,13 +3754,13 @@ link_shaders(struct gl_context *ctx, struct gl_shader_program *prog)
|
||||
if (first < MESA_SHADER_FRAGMENT) {
|
||||
gl_shader *const sh = prog->_LinkedShaders[last];
|
||||
|
||||
if (first == MESA_SHADER_GEOMETRY) {
|
||||
if (first != MESA_SHADER_VERTEX) {
|
||||
/* There was no vertex shader, but we still have to assign varying
|
||||
* locations for use by geometry shader inputs in SSO.
|
||||
* locations for use by tessellation/geometry shader inputs in SSO.
|
||||
*
|
||||
* If the shader is not separable (i.e., prog->SeparateShader is
|
||||
* false), linking will have already failed when first is
|
||||
* MESA_SHADER_GEOMETRY.
|
||||
* false), linking will have already failed when first is not
|
||||
* MESA_SHADER_VERTEX.
|
||||
*/
|
||||
if (!assign_varying_locations(ctx, mem_ctx, prog,
|
||||
NULL, prog->_LinkedShaders[first],
|
||||
|
@@ -62,6 +62,15 @@ fallback_required(struct gl_context *ctx, GLenum target,
|
||||
GLuint srcLevel;
|
||||
GLenum status;
|
||||
|
||||
/* GL_DRAW_FRAMEBUFFER does not exist in OpenGL ES 1.x, and since
|
||||
* _mesa_meta_begin hasn't been called yet, we have to work-around API
|
||||
* difficulties. The whole reason that GL_DRAW_FRAMEBUFFER is used instead
|
||||
* of GL_FRAMEBUFFER is that the read framebuffer may be different. This
|
||||
* is moot in OpenGL ES 1.x.
|
||||
*/
|
||||
const GLenum fbo_target = ctx->API == API_OPENGLES
|
||||
? GL_FRAMEBUFFER : GL_DRAW_FRAMEBUFFER;
|
||||
|
||||
/* check for fallbacks */
|
||||
if (target == GL_TEXTURE_3D) {
|
||||
_mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH,
|
||||
@@ -102,13 +111,13 @@ fallback_required(struct gl_context *ctx, GLenum target,
|
||||
*/
|
||||
if (!mipmap->FBO)
|
||||
_mesa_GenFramebuffers(1, &mipmap->FBO);
|
||||
_mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, mipmap->FBO);
|
||||
_mesa_BindFramebuffer(fbo_target, mipmap->FBO);
|
||||
|
||||
_mesa_meta_bind_fbo_image(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, baseImage, 0);
|
||||
_mesa_meta_bind_fbo_image(fbo_target, GL_COLOR_ATTACHMENT0, baseImage, 0);
|
||||
|
||||
status = _mesa_CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER);
|
||||
status = _mesa_CheckFramebufferStatus(fbo_target);
|
||||
|
||||
_mesa_BindFramebuffer(GL_DRAW_FRAMEBUFFER, fboSave);
|
||||
_mesa_BindFramebuffer(fbo_target, fboSave);
|
||||
|
||||
if (status != GL_FRAMEBUFFER_COMPLETE_EXT) {
|
||||
_mesa_perf_debug(ctx, MESA_DEBUG_SEVERITY_HIGH,
|
||||
|
@@ -189,6 +189,24 @@ intel_update_state(struct gl_context * ctx, GLuint new_state)
|
||||
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
|
||||
}
|
||||
|
||||
/* Resolve color for each active shader image. */
|
||||
for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
|
||||
const struct gl_shader *shader = ctx->_Shader->CurrentProgram[i] ?
|
||||
ctx->_Shader->CurrentProgram[i]->_LinkedShaders[i] : NULL;
|
||||
|
||||
if (unlikely(shader && shader->NumImages)) {
|
||||
for (unsigned j = 0; j < shader->NumImages; j++) {
|
||||
struct gl_image_unit *u = &ctx->ImageUnits[shader->ImageUnits[j]];
|
||||
tex_obj = intel_texture_object(u->TexObj);
|
||||
|
||||
if (tex_obj && tex_obj->mt) {
|
||||
intel_miptree_resolve_color(brw, tex_obj->mt);
|
||||
brw_render_cache_set_check_flush(brw, tex_obj->mt->bo);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_mesa_lock_context_textures(ctx);
|
||||
}
|
||||
|
||||
|
@@ -1039,33 +1039,19 @@ fs_visitor::emit_linterp(const fs_reg &attr, const fs_reg &interp,
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
|
||||
fs_visitor::emit_general_interpolation(fs_reg *attr, const char *name,
|
||||
const glsl_type *type,
|
||||
glsl_interp_qualifier interpolation_mode,
|
||||
int location, bool mod_centroid,
|
||||
int *location, bool mod_centroid,
|
||||
bool mod_sample)
|
||||
{
|
||||
attr.type = brw_type_for_base_type(type->get_scalar_type());
|
||||
|
||||
assert(stage == MESA_SHADER_FRAGMENT);
|
||||
brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
|
||||
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
|
||||
|
||||
unsigned int array_elements;
|
||||
|
||||
if (type->is_array()) {
|
||||
array_elements = type->length;
|
||||
if (array_elements == 0) {
|
||||
fail("dereferenced array '%s' has length 0\n", name);
|
||||
}
|
||||
type = type->fields.array;
|
||||
} else {
|
||||
array_elements = 1;
|
||||
}
|
||||
|
||||
if (interpolation_mode == INTERP_QUALIFIER_NONE) {
|
||||
bool is_gl_Color =
|
||||
location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1;
|
||||
*location == VARYING_SLOT_COL0 || *location == VARYING_SLOT_COL1;
|
||||
if (key->flat_shade && is_gl_Color) {
|
||||
interpolation_mode = INTERP_QUALIFIER_FLAT;
|
||||
} else {
|
||||
@@ -1073,71 +1059,86 @@ fs_visitor::emit_general_interpolation(fs_reg attr, const char *name,
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < array_elements; i++) {
|
||||
for (unsigned int j = 0; j < type->matrix_columns; j++) {
|
||||
if (prog_data->urb_setup[location] == -1) {
|
||||
/* If there's no incoming setup data for this slot, don't
|
||||
* emit interpolation for it.
|
||||
*/
|
||||
attr = offset(attr, bld, type->vector_elements);
|
||||
location++;
|
||||
continue;
|
||||
}
|
||||
if (type->is_array() || type->is_matrix()) {
|
||||
const glsl_type *elem_type = glsl_get_array_element(type);
|
||||
const unsigned length = glsl_get_length(type);
|
||||
|
||||
if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
|
||||
/* Constant interpolation (flat shading) case. The SF has
|
||||
* handed us defined values in only the constant offset
|
||||
* field of the setup reg.
|
||||
*/
|
||||
for (unsigned int k = 0; k < type->vector_elements; k++) {
|
||||
struct brw_reg interp = interp_reg(location, k);
|
||||
interp = suboffset(interp, 3);
|
||||
interp.type = attr.type;
|
||||
bld.emit(FS_OPCODE_CINTERP, attr, fs_reg(interp));
|
||||
attr = offset(attr, bld, 1);
|
||||
}
|
||||
} else {
|
||||
/* Smooth/noperspective interpolation case. */
|
||||
for (unsigned int k = 0; k < type->vector_elements; k++) {
|
||||
struct brw_reg interp = interp_reg(location, k);
|
||||
if (devinfo->needs_unlit_centroid_workaround && mod_centroid) {
|
||||
/* Get the pixel/sample mask into f0 so that we know
|
||||
* which pixels are lit. Then, for each channel that is
|
||||
* unlit, replace the centroid data with non-centroid
|
||||
* data.
|
||||
*/
|
||||
bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
|
||||
|
||||
fs_inst *inst;
|
||||
inst = emit_linterp(attr, fs_reg(interp), interpolation_mode,
|
||||
false, false);
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->predicate_inverse = true;
|
||||
if (devinfo->has_pln)
|
||||
inst->no_dd_clear = true;
|
||||
|
||||
inst = emit_linterp(attr, fs_reg(interp), interpolation_mode,
|
||||
mod_centroid && !key->persample_shading,
|
||||
mod_sample || key->persample_shading);
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->predicate_inverse = false;
|
||||
if (devinfo->has_pln)
|
||||
inst->no_dd_check = true;
|
||||
|
||||
} else {
|
||||
emit_linterp(attr, fs_reg(interp), interpolation_mode,
|
||||
mod_centroid && !key->persample_shading,
|
||||
mod_sample || key->persample_shading);
|
||||
}
|
||||
if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
|
||||
bld.MUL(attr, attr, this->pixel_w);
|
||||
}
|
||||
attr = offset(attr, bld, 1);
|
||||
}
|
||||
|
||||
}
|
||||
location++;
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
emit_general_interpolation(attr, name, elem_type, interpolation_mode,
|
||||
location, mod_centroid, mod_sample);
|
||||
}
|
||||
} else if (type->is_record()) {
|
||||
for (unsigned i = 0; i < type->length; i++) {
|
||||
const glsl_type *field_type = type->fields.structure[i].type;
|
||||
emit_general_interpolation(attr, name, field_type, interpolation_mode,
|
||||
location, mod_centroid, mod_sample);
|
||||
}
|
||||
} else {
|
||||
assert(type->is_scalar() || type->is_vector());
|
||||
|
||||
if (prog_data->urb_setup[*location] == -1) {
|
||||
/* If there's no incoming setup data for this slot, don't
|
||||
* emit interpolation for it.
|
||||
*/
|
||||
*attr = offset(*attr, bld, type->vector_elements);
|
||||
(*location)++;
|
||||
return;
|
||||
}
|
||||
|
||||
attr->type = brw_type_for_base_type(type->get_scalar_type());
|
||||
|
||||
if (interpolation_mode == INTERP_QUALIFIER_FLAT) {
|
||||
/* Constant interpolation (flat shading) case. The SF has
|
||||
* handed us defined values in only the constant offset
|
||||
* field of the setup reg.
|
||||
*/
|
||||
for (unsigned int i = 0; i < type->vector_elements; i++) {
|
||||
struct brw_reg interp = interp_reg(*location, i);
|
||||
interp = suboffset(interp, 3);
|
||||
interp.type = attr->type;
|
||||
bld.emit(FS_OPCODE_CINTERP, *attr, fs_reg(interp));
|
||||
*attr = offset(*attr, bld, 1);
|
||||
}
|
||||
} else {
|
||||
/* Smooth/noperspective interpolation case. */
|
||||
for (unsigned int i = 0; i < type->vector_elements; i++) {
|
||||
struct brw_reg interp = interp_reg(*location, i);
|
||||
if (devinfo->needs_unlit_centroid_workaround && mod_centroid) {
|
||||
/* Get the pixel/sample mask into f0 so that we know
|
||||
* which pixels are lit. Then, for each channel that is
|
||||
* unlit, replace the centroid data with non-centroid
|
||||
* data.
|
||||
*/
|
||||
bld.emit(FS_OPCODE_MOV_DISPATCH_TO_FLAGS);
|
||||
|
||||
fs_inst *inst;
|
||||
inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode,
|
||||
false, false);
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->predicate_inverse = true;
|
||||
if (devinfo->has_pln)
|
||||
inst->no_dd_clear = true;
|
||||
|
||||
inst = emit_linterp(*attr, fs_reg(interp), interpolation_mode,
|
||||
mod_centroid && !key->persample_shading,
|
||||
mod_sample || key->persample_shading);
|
||||
inst->predicate = BRW_PREDICATE_NORMAL;
|
||||
inst->predicate_inverse = false;
|
||||
if (devinfo->has_pln)
|
||||
inst->no_dd_check = true;
|
||||
|
||||
} else {
|
||||
emit_linterp(*attr, fs_reg(interp), interpolation_mode,
|
||||
mod_centroid && !key->persample_shading,
|
||||
mod_sample || key->persample_shading);
|
||||
}
|
||||
if (devinfo->gen < 6 && interpolation_mode == INTERP_QUALIFIER_SMOOTH) {
|
||||
bld.MUL(*attr, *attr, this->pixel_w);
|
||||
}
|
||||
*attr = offset(*attr, bld, 1);
|
||||
}
|
||||
}
|
||||
(*location)++;
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -197,10 +197,10 @@ public:
|
||||
fs_reg *emit_frontfacing_interpolation();
|
||||
fs_reg *emit_samplepos_setup();
|
||||
fs_reg *emit_sampleid_setup();
|
||||
void emit_general_interpolation(fs_reg attr, const char *name,
|
||||
void emit_general_interpolation(fs_reg *attr, const char *name,
|
||||
const glsl_type *type,
|
||||
glsl_interp_qualifier interpolation_mode,
|
||||
int location, bool mod_centroid,
|
||||
int *location, bool mod_centroid,
|
||||
bool mod_sample);
|
||||
fs_reg *emit_vs_system_value(int location);
|
||||
void emit_interpolation_setup_gen4();
|
||||
@@ -240,6 +240,8 @@ public:
|
||||
|
||||
void emit_nir_code();
|
||||
void nir_setup_inputs(nir_shader *shader);
|
||||
void nir_setup_single_output_varying(fs_reg ®, const glsl_type *type,
|
||||
unsigned &location);
|
||||
void nir_setup_outputs(nir_shader *shader);
|
||||
void nir_setup_uniforms(nir_shader *shader);
|
||||
void nir_setup_uniform(nir_variable *var);
|
||||
|
@@ -105,9 +105,10 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
|
||||
emit_percomp(bld, fs_inst(BRW_OPCODE_MOV, bld.dispatch_width(),
|
||||
input, reg), 0xF);
|
||||
} else {
|
||||
emit_general_interpolation(input, var->name, var->type,
|
||||
int location = var->data.location;
|
||||
emit_general_interpolation(&input, var->name, var->type,
|
||||
(glsl_interp_qualifier) var->data.interpolation,
|
||||
var->data.location, var->data.centroid,
|
||||
&location, var->data.centroid,
|
||||
var->data.sample);
|
||||
}
|
||||
break;
|
||||
@@ -115,6 +116,32 @@ fs_visitor::nir_setup_inputs(nir_shader *shader)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_setup_single_output_varying(fs_reg ®,
|
||||
const glsl_type *type,
|
||||
unsigned &location)
|
||||
{
|
||||
if (type->is_array() || type->is_matrix()) {
|
||||
const struct glsl_type *elem_type = glsl_get_array_element(type);
|
||||
const unsigned length = glsl_get_length(type);
|
||||
|
||||
for (unsigned i = 0; i < length; i++) {
|
||||
nir_setup_single_output_varying(reg, elem_type, location);
|
||||
}
|
||||
} else if (type->is_record()) {
|
||||
for (unsigned i = 0; i < type->length; i++) {
|
||||
const struct glsl_type *field_type = type->fields.structure[i].type;
|
||||
nir_setup_single_output_varying(reg, field_type, location);
|
||||
}
|
||||
} else {
|
||||
assert(type->is_scalar() || type->is_vector());
|
||||
this->outputs[location] = reg;
|
||||
this->output_components[location] = type->vector_elements;
|
||||
reg = offset(reg, bld, 4);
|
||||
location++;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
fs_visitor::nir_setup_outputs(nir_shader *shader)
|
||||
{
|
||||
@@ -130,13 +157,11 @@ fs_visitor::nir_setup_outputs(nir_shader *shader)
|
||||
: var->type->vector_elements;
|
||||
|
||||
switch (stage) {
|
||||
case MESA_SHADER_VERTEX:
|
||||
for (int i = 0; i < ALIGN(type_size(var->type), 4) / 4; i++) {
|
||||
int output = var->data.location + i;
|
||||
this->outputs[output] = offset(reg, bld, 4 * i);
|
||||
this->output_components[output] = vector_elements;
|
||||
}
|
||||
case MESA_SHADER_VERTEX: {
|
||||
unsigned location = var->data.location;
|
||||
nir_setup_single_output_varying(reg, var->type, location);
|
||||
break;
|
||||
}
|
||||
case MESA_SHADER_FRAGMENT:
|
||||
if (var->data.index > 0) {
|
||||
assert(var->data.location == FRAG_RESULT_DATA0);
|
||||
|
@@ -379,6 +379,7 @@ brw_format_for_mesa_format(mesa_format mesa_format)
|
||||
[MESA_FORMAT_A8R8G8B8_SRGB] = 0,
|
||||
[MESA_FORMAT_R8G8B8A8_SRGB] = BRW_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB,
|
||||
[MESA_FORMAT_X8R8G8B8_SRGB] = 0,
|
||||
[MESA_FORMAT_B8G8R8X8_SRGB] = BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB,
|
||||
[MESA_FORMAT_L_SRGB8] = BRW_SURFACEFORMAT_L8_UNORM_SRGB,
|
||||
[MESA_FORMAT_L8A8_SRGB] = BRW_SURFACEFORMAT_L8A8_UNORM_SRGB,
|
||||
[MESA_FORMAT_A8L8_SRGB] = 0,
|
||||
@@ -614,6 +615,10 @@ brw_init_surface_formats(struct brw_context *brw)
|
||||
*/
|
||||
render = BRW_SURFACEFORMAT_B8G8R8A8_UNORM;
|
||||
break;
|
||||
case BRW_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB:
|
||||
if (gen < tinfo->render_target)
|
||||
render = BRW_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB;
|
||||
break;
|
||||
case BRW_SURFACEFORMAT_R8G8B8X8_UNORM:
|
||||
render = BRW_SURFACEFORMAT_R8G8B8A8_UNORM;
|
||||
break;
|
||||
|
@@ -981,7 +981,7 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* _NEW_PROGRAM */
|
||||
struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
|
||||
struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
|
||||
|
||||
if (prog) {
|
||||
/* BRW_NEW_FS_PROG_DATA */
|
||||
@@ -1257,7 +1257,7 @@ brw_upload_wm_image_surfaces(struct brw_context *brw)
|
||||
{
|
||||
struct gl_context *ctx = &brw->ctx;
|
||||
/* BRW_NEW_FRAGMENT_PROGRAM */
|
||||
struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
|
||||
struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
|
||||
|
||||
if (prog) {
|
||||
/* BRW_NEW_FS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
|
||||
|
@@ -776,13 +776,18 @@ program_resource_location(struct gl_shader_program *shProg,
|
||||
* and user-defined attributes.
|
||||
*/
|
||||
switch (res->Type) {
|
||||
case GL_PROGRAM_INPUT:
|
||||
case GL_PROGRAM_INPUT: {
|
||||
const ir_variable *var = RESOURCE_VAR(res);
|
||||
|
||||
/* If the input is an array, fail if the index is out of bounds. */
|
||||
if (array_index > 0
|
||||
&& array_index >= RESOURCE_VAR(res)->type->length) {
|
||||
&& array_index >= var->type->length) {
|
||||
return -1;
|
||||
}
|
||||
return RESOURCE_VAR(res)->data.location + array_index - VERT_ATTRIB_GENERIC0;
|
||||
return (var->data.location +
|
||||
(array_index * var->type->without_array()->matrix_columns) -
|
||||
VERT_ATTRIB_GENERIC0);
|
||||
}
|
||||
case GL_PROGRAM_OUTPUT:
|
||||
/* If the output is an array, fail if the index is out of bounds. */
|
||||
if (array_index > 0
|
||||
|
@@ -766,7 +766,7 @@ _mesa_VertexAttribLPointer(GLuint index, GLint size, GLenum type,
|
||||
|
||||
update_array(ctx, "glVertexAttribLPointer", VERT_ATTRIB_GENERIC(index),
|
||||
legalTypes, 1, 4,
|
||||
size, type, stride, GL_TRUE, GL_FALSE, GL_TRUE, ptr);
|
||||
size, type, stride, GL_FALSE, GL_FALSE, GL_TRUE, ptr);
|
||||
}
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user