Compare commits
121 Commits
mesa-11.0.
...
mesa-11.0.
Author | SHA1 | Date | |
---|---|---|---|
|
31bf247031 | ||
|
b530dccbff | ||
|
6d6a4d7c76 | ||
|
13276962c7 | ||
|
03ab39fa70 | ||
|
4d215a25d5 | ||
|
6766a36e19 | ||
|
42364b33d1 | ||
|
54a30ed94f | ||
|
6f48b8957e | ||
|
b91ed628c1 | ||
|
141109cc52 | ||
|
5d41a78769 | ||
|
da1d57faf3 | ||
|
0d87f75763 | ||
|
009890a0de | ||
|
938df905ea | ||
|
9b561ed2d1 | ||
|
b0b31397e2 | ||
|
c0b85c5a4c | ||
|
a9da1ead7b | ||
|
dab0c565d3 | ||
|
96931dbf14 | ||
|
2d5b8efd7d | ||
|
0c6b210749 | ||
|
8e9b698c24 | ||
|
3b238aa08f | ||
|
cd6ff70856 | ||
|
2ee32ffe7c | ||
|
25e1e90937 | ||
|
22aae69aa5 | ||
|
4779eb04a4 | ||
|
df361e2311 | ||
|
7259f17eca | ||
|
37b647b979 | ||
|
41cc0965bb | ||
|
3f802ebaf8 | ||
|
b4bfea0094 | ||
|
914966befc | ||
|
3c86315ca3 | ||
|
d0c22560a1 | ||
|
1a866b3e49 | ||
|
b1230e3e01 | ||
|
d09b37e7d5 | ||
|
30570b2629 | ||
|
f114967ca9 | ||
|
39a3871b1e | ||
|
28373c75ba | ||
|
eabc656324 | ||
|
1f2d007e49 | ||
|
00425de657 | ||
|
16d9e62107 | ||
|
776bcb2042 | ||
|
ac75afff88 | ||
|
de936892db | ||
|
b2a04cfcc2 | ||
|
dca86265a2 | ||
|
d0684f3d58 | ||
|
7d78578b06 | ||
|
88ed45b033 | ||
|
fbcd36ddb6 | ||
|
531309a5f0 | ||
|
0ae914f65d | ||
|
b2c8b0e546 | ||
|
d6ee06e9fe | ||
|
7b8b044ee4 | ||
|
ec7cda29b6 | ||
|
ab68081ffb | ||
|
46dc4946a2 | ||
|
01e197c21a | ||
|
0c5aacf446 | ||
|
fb5dd33166 | ||
|
b2d3012e35 | ||
|
154573e427 | ||
|
7e64e887f0 | ||
|
10382380f0 | ||
|
815b595b5f | ||
|
4e0ae01588 | ||
|
33ed153214 | ||
|
3cd7493f11 | ||
|
dacccf8e22 | ||
|
288d9a06cc | ||
|
62ac723a34 | ||
|
766a0b4661 | ||
|
f2e8b94f84 | ||
|
02a631bfbc | ||
|
94e9c52b62 | ||
|
4c0b484612 | ||
|
51e0b06d99 | ||
|
f2bfaa8271 | ||
|
f15a7f3c6e | ||
|
cfddc456ae | ||
|
25e2a4136b | ||
|
ead4ce53f7 | ||
|
dace17bfd4 | ||
|
7f1a77ae66 | ||
|
bcb9e1d26b | ||
|
de1637c7fe | ||
|
cf716563a8 | ||
|
2c65e64881 | ||
|
8be6b32d65 | ||
|
0e0d008b2b | ||
|
007aae740e | ||
|
575f5a94c3 | ||
|
b1203ec9f3 | ||
|
c29e3f1bca | ||
|
c98217178b | ||
|
278739eb01 | ||
|
ae6dcfee56 | ||
|
9fcf28bb14 | ||
|
5fe09ffe6a | ||
|
395cd23690 | ||
|
d04024cffa | ||
|
370c2b344b | ||
|
bcb3bfd510 | ||
|
ebfa2ea34f | ||
|
3736ef3a17 | ||
|
d9e4a3ae6a | ||
|
1afea31ad8 | ||
|
d9b54a01be | ||
|
c4bae5792b |
12
configure.ac
12
configure.ac
@@ -106,6 +106,8 @@ AC_SYS_LARGEFILE
|
||||
LT_PREREQ([2.2])
|
||||
LT_INIT([disable-static])
|
||||
|
||||
AC_CHECK_PROG(RM, rm, [rm -f])
|
||||
|
||||
AX_PROG_BISON([],
|
||||
AS_IF([test ! -f "$srcdir/src/glsl/glcpp/glcpp-parse.c"],
|
||||
[AC_MSG_ERROR([bison not found - unable to compile glcpp-parse.y])]))
|
||||
@@ -1150,6 +1152,16 @@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
|
||||
AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
|
||||
[DEFINES="${DEFINES} -DGLX_USE_TLS"])
|
||||
|
||||
dnl Read-only text section on x86 hardened platforms
|
||||
AC_ARG_ENABLE([glx-read-only-text],
|
||||
[AS_HELP_STRING([--enable-glx-read-only-text],
|
||||
[Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
|
||||
[enable_glx_read_only_text="$enableval"],
|
||||
[enable_glx_read_only_text=no])
|
||||
if test "x$enable_glx_read_only_text" = xyes; then
|
||||
DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
|
||||
fi
|
||||
|
||||
dnl
|
||||
dnl More DRI setup
|
||||
dnl
|
||||
|
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3 mesa-11.0.0.tar.gz
|
||||
e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32 mesa-11.0.0.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
|
134
docs/relnotes/11.0.1.html
Normal file
134
docs/relnotes/11.0.1.html
Normal file
@@ -0,0 +1,134 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.1 Release Notes / September 26, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.1 is a bug fix release which fixes bugs found since the 11.0.0 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.1 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
6dab262877e12c0546a0e2970c6835a0f217e6d4026ccecb3cd5dd733d1ce867 mesa-11.0.1.tar.gz
|
||||
43d0dfcd1f1e36f07f8228cd76d90175d3fc74c1ed25d7071794a100a98ef2a6 mesa-11.0.1.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Antia Puentes (2):</p>
|
||||
<ul>
|
||||
<li>i965/vec4: Fix saturation errors when coalescing registers</li>
|
||||
<li>i965/vec4_nir: Load constants as integers</li>
|
||||
</ul>
|
||||
|
||||
<p>Anuj Phogat (1):</p>
|
||||
<ul>
|
||||
<li>meta: Abort meta pbo path if TexSubImage need signed unsigned conversion</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (2):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.0</li>
|
||||
<li>Update version to 11.0.1</li>
|
||||
</ul>
|
||||
|
||||
<p>Iago Toral Quiroga (1):</p>
|
||||
<ul>
|
||||
<li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (5):</p>
|
||||
<ul>
|
||||
<li>t_dd_dmatmp: Make "count" actually be the count</li>
|
||||
<li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
|
||||
<li>t_dd_dmatmp: Use '& 3' instead of '% 4' everywhere</li>
|
||||
<li>t_dd_dmatmp: Pull out common 'count -= count & 3' code</li>
|
||||
<li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (6):</p>
|
||||
<ul>
|
||||
<li>st/mesa: avoid integer overflows with buffers >= 512MB</li>
|
||||
<li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
|
||||
<li>freedreno/a3xx: fix blending of L8 format</li>
|
||||
<li>nv50,nvc0: detect underlying resource changes and update tic</li>
|
||||
<li>nv50,nvc0: flush texture cache in presence of coherent bufs</li>
|
||||
<li>radeonsi: load fmask ptr relative to the resources array</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (2):</p>
|
||||
<ul>
|
||||
<li>nir: Fix a bunch of ralloc parenting errors</li>
|
||||
<li>i965/vec4: Don't reswizzle hardware registers</li>
|
||||
</ul>
|
||||
|
||||
<p>Jeremy Huddleston (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: Add support to enable read-only text segment on x86.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ray Strode (1):</p>
|
||||
<ul>
|
||||
<li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (2):</p>
|
||||
<ul>
|
||||
<li>mesa: fix errors when reading depth with glReadPixels</li>
|
||||
<li>i965: fix textureGrad for cubemaps</li>
|
||||
</ul>
|
||||
|
||||
<p>Ulrich Weigand (1):</p>
|
||||
<ul>
|
||||
<li>mesa: Fix texture compression on big-endian systems</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
85
docs/relnotes/11.0.2.html
Normal file
85
docs/relnotes/11.0.2.html
Normal file
@@ -0,0 +1,85 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.2 Release Notes / September 28, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.2 is a bug fix release which fixes bugs found since the 11.0.1 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.2 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
45170773500d6ae2f9eb93fc85efee69f7c97084411ada4eddf92f78bca56d20 mesa-11.0.2.tar.gz
|
||||
fce11fb27eb87adf1e620a76455d635c6136dfa49ae58c53b34ef8d0c7b7eae4 mesa-11.0.2.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91582">Bug 91582</a> - [bisected] Regression in DEQP gles2.functional.negative_api.texture.texsubimage2d_neg_offset</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91970">Bug 91970</a> - [BSW regression] dEQP-GLES3.functional.shaders.precision.int.highp_mul_vertex</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92095">Bug 92095</a> - [Regression, bisected] arb_shader_atomic_counters.compiler.builtins.frag</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Eduardo Lima Mitev (3):</p>
|
||||
<ul>
|
||||
<li>mesa: Fix order of format+type and internal format checks for glTexImageXD ops</li>
|
||||
<li>mesa: Move _mesa_base_tex_format() from teximage to glformats files</li>
|
||||
<li>mesa: Use the effective internal format instead for validation</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (2):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.1</li>
|
||||
<li>Update version to 11.0.2</li>
|
||||
</ul>
|
||||
|
||||
<p>Kristian Høgsberg Kristensen (1):</p>
|
||||
<ul>
|
||||
<li>i965: Respect stride and subreg_offset for ATTR registers</li>
|
||||
</ul>
|
||||
|
||||
<p>Matt Turner (1):</p>
|
||||
<ul>
|
||||
<li>glsl: Expose gl_MaxTess{Control,Evaluation}AtomicCounters.</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
185
docs/relnotes/11.0.3.html
Normal file
185
docs/relnotes/11.0.3.html
Normal file
@@ -0,0 +1,185 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.3 Release Notes / October 10, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.3 is a bug fix release which fixes bugs found since the 11.0.2 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.3 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
c2210e3daecc10ed9fdcea500327652ed6effc2f47c4b9cee63fb08f560d7117 mesa-11.0.3.tar.gz
|
||||
ab2992eece21adc23c398720ef8c6933cb69ea42e1b2611dc09d031e17e033d6 mesa-11.0.3.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=55552">Bug 55552</a> - Compile errors with --enable-mangling</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=71789">Bug 71789</a> - [r300g] Visuals not found in (default) depth = 24</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91044">Bug 91044</a> - piglit spec/egl_khr_create_context/valid debug flag gles* fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91342">Bug 91342</a> - Very dark textures on some objects in indoors environments in Postal 2</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91596">Bug 91596</a> - EGL_KHR_gl_colorspace (v2) causes problem with Android-x86 GUI</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91718">Bug 91718</a> - piglit.spec.arb_shader_image_load_store.invalid causes intermittent GPU HANG</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92072">Bug 92072</a> - Wine breakage since d082c5324 (st/mesa: don't call st_validate_state in BlitFramebuffer)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92265">Bug 92265</a> - Black windows in weston after update mesa to 11.0.2-1</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Brian Paul (1):</p>
|
||||
<ul>
|
||||
<li>st/mesa: try PIPE_BIND_RENDER_TARGET when choosing float texture formats</li>
|
||||
</ul>
|
||||
|
||||
<p>Daniel Scharrer (1):</p>
|
||||
<ul>
|
||||
<li>mesa: Add abs input modifier to base for POW in ffvertex_prog</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (3):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.2</li>
|
||||
<li>Revert "nouveau: make sure there's always room to emit a fence"</li>
|
||||
<li>Update version to 11.0.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Francisco Jerez (1):</p>
|
||||
<ul>
|
||||
<li>i965/fs: Fix hang on IVB and VLV with image format mismatch.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (1):</p>
|
||||
<ul>
|
||||
<li>meta: Handle array textures in scaled MSAA blits</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (6):</p>
|
||||
<ul>
|
||||
<li>nouveau: be more careful about freeing temporary transfer buffers</li>
|
||||
<li>nouveau: delay deleting buffer with unflushed fence</li>
|
||||
<li>nouveau: wait to unref the transfer's bo until it's no longer used</li>
|
||||
<li>nv30: pretend to have packed texture/surface formats</li>
|
||||
<li>nv30: always go through translate module on big-endian</li>
|
||||
<li>nouveau: make sure there's always room to emit a fence</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (1):</p>
|
||||
<ul>
|
||||
<li>mesa: Correctly handle GL_BGRA_EXT in ES3 format_and_type checks</li>
|
||||
</ul>
|
||||
|
||||
<p>Kyle Brenneman (3):</p>
|
||||
<ul>
|
||||
<li>glx: Fix build errors with --enable-mangling (v2)</li>
|
||||
<li>mapi: Make _glapi_get_stub work with "gl" or "mgl" prefix.</li>
|
||||
<li>glx: Don't hard-code the name "libGL.so.1" in driOpenDriver (v3)</li>
|
||||
</ul>
|
||||
|
||||
<p>Leo Liu (1):</p>
|
||||
<ul>
|
||||
<li>radeon/vce: fix vui time_scale zero error</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (21):</p>
|
||||
<ul>
|
||||
<li>st/mesa: fix front buffer regression after dropping st_validate_state in Blit</li>
|
||||
<li>radeonsi: handle index buffer alloc failures</li>
|
||||
<li>radeonsi: handle constant buffer alloc failures</li>
|
||||
<li>gallium/radeon: handle buffer_map staging buffer failures better</li>
|
||||
<li>gallium/radeon: handle buffer alloc failures in r600_draw_rectangle</li>
|
||||
<li>gallium/radeon: add a fail path for depth MSAA texture readback</li>
|
||||
<li>radeonsi: report alloc failure from si_shader_binary_read</li>
|
||||
<li>radeonsi: add malloc fail paths to si_create_shader_state</li>
|
||||
<li>radeonsi: skip drawing if the tess factor ring allocation fails</li>
|
||||
<li>radeonsi: skip drawing if GS ring allocations fail</li>
|
||||
<li>radeonsi: handle shader precompile failures</li>
|
||||
<li>radeonsi: handle fixed-func TCS shader create failure</li>
|
||||
<li>radeonsi: skip drawing if VS, TCS, TES, GS fail to compile or upload</li>
|
||||
<li>radeonsi: skip drawing if PS fails to compile or upload</li>
|
||||
<li>radeonsi: skip drawing if updating the scratch buffer fails</li>
|
||||
<li>radeonsi: don't forget to update scratch relocations for LS, HS, ES shaders</li>
|
||||
<li>radeonsi: handle dummy constant buffer allocation failure</li>
|
||||
<li>gallium/u_blitter: handle allocation failures</li>
|
||||
<li>radeonsi: add scratch buffer to the buffer list when it's re-allocated</li>
|
||||
<li>st/dri: don't use _ctx in client_wait_sync</li>
|
||||
<li>egl/dri2: don't require a context for ClientWaitSync (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Matthew Waters (1):</p>
|
||||
<ul>
|
||||
<li>egl: rework handling EGL_CONTEXT_FLAGS</li>
|
||||
</ul>
|
||||
|
||||
<p>Michel Dänzer (1):</p>
|
||||
<ul>
|
||||
<li>st/dri: Use packed RGB formats</li>
|
||||
</ul>
|
||||
|
||||
<p>Roland Scheidegger (1):</p>
|
||||
<ul>
|
||||
<li>mesa: fix mipmap generation for immutable, compressed textures</li>
|
||||
</ul>
|
||||
|
||||
<p>Tom Stellard (3):</p>
|
||||
<ul>
|
||||
<li>gallium/radeon: Use call_once() when initailizing LLVM targets</li>
|
||||
<li>gallivm: Allow drivers and state trackers to initialize gallivm LLVM targets v2</li>
|
||||
<li>radeon/llvm: Initialize gallivm targets when initializing the AMDGPU target v2</li>
|
||||
</ul>
|
||||
|
||||
<p>Varad Gautam (1):</p>
|
||||
<ul>
|
||||
<li>egl: restore surface type before linking config to its display</li>
|
||||
</ul>
|
||||
|
||||
<p>Ville Syrjälä (3):</p>
|
||||
<ul>
|
||||
<li>i830: Fix collision between I830_UPLOAD_RASTER_RULES and I830_UPLOAD_TEX(0)</li>
|
||||
<li>i915: Fix texcoord vs. varying collision in fragment programs</li>
|
||||
<li>i915: Remember to call intel_prepare_render() before blitting</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
167
docs/relnotes/11.0.4.html
Normal file
167
docs/relnotes/11.0.4.html
Normal file
@@ -0,0 +1,167 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.4 Release Notes / October 24, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.4 is a bug fix release which fixes bugs found since the 11.0.3 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.4 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86281">Bug 86281</a> - brw_meta_fast_clear (brw=brw@entry=0x7fffd4097a08, fb=fb@entry=0x7fffd40fa900, buffers=buffers@entry=2, partial_clear=partial_clear@entry=false)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=86720">Bug 86720</a> - [radeon] Europa Universalis 4 freezing during game start (10.3.3+, still broken on 11.0.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91788">Bug 91788</a> - [HSW Regression] Synmark2_v6 Multithread performance case FPS reduced by 36%</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92304">Bug 92304</a> - [cts] cts.shaders.negative conformance tests fail</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Alejandro Piñeiro (2):</p>
|
||||
<ul>
|
||||
<li>i965/vec4: check writemask when bailing out at register coalesce</li>
|
||||
<li>i965/vec4: fill src_reg type using the constructor type parameter</li>
|
||||
</ul>
|
||||
|
||||
<p>Brian Paul (2):</p>
|
||||
<ul>
|
||||
<li>vbo: fix incorrect switch statement in init_mat_currval()</li>
|
||||
<li>mesa: fix incorrect opcode in save_BlendFunci()</li>
|
||||
</ul>
|
||||
|
||||
<p>Chih-Wei Huang (3):</p>
|
||||
<ul>
|
||||
<li>mesa: android: Fix the incorrect path of sse_minmax.c</li>
|
||||
<li>nv50/ir: use C++11 standard std::unordered_map if possible</li>
|
||||
<li>nv30: include the header of ffs prototype</li>
|
||||
</ul>
|
||||
|
||||
<p>Chris Wilson (1):</p>
|
||||
<ul>
|
||||
<li>i965: Remove early release of DRI2 miptree</li>
|
||||
</ul>
|
||||
|
||||
<p>Dave Airlie (1):</p>
|
||||
<ul>
|
||||
<li>mesa/uniforms: fix get_uniform for doubles (v2)</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (1):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.3</li>
|
||||
</ul>
|
||||
|
||||
<p>Francisco Jerez (5):</p>
|
||||
<ul>
|
||||
<li>i965: Don't tell the hardware about our UAV access.</li>
|
||||
<li>mesa: Expose function to calculate whether a shader image unit is valid.</li>
|
||||
<li>mesa: Skip redundant texture completeness checking during image validation.</li>
|
||||
<li>i965: Use _mesa_is_image_unit_valid() instead of gl_image_unit::_Valid.</li>
|
||||
<li>mesa: Get rid of texture-dependent image unit derived state.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (8):</p>
|
||||
<ul>
|
||||
<li>glsl: Allow built-in functions as constant expressions in OpenGL ES 1.00</li>
|
||||
<li>ff_fragment_shader: Use binding to set the sampler unit</li>
|
||||
<li>glsl/linker: Use constant_initializer instead of constant_value to initialize uniforms</li>
|
||||
<li>glsl: Use constant_initializer instead of constant_value to determine whether to keep an unused uniform</li>
|
||||
<li>glsl: Only set ir_variable::constant_value for const-decorated variables</li>
|
||||
<li>glsl: Restrict initializers for global variables to constant expression in ES</li>
|
||||
<li>glsl: Add method to determine whether an expression contains the sequence operator</li>
|
||||
<li>glsl: In later GLSL versions, sequence operator is cannot be a constant expression</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (1):</p>
|
||||
<ul>
|
||||
<li>nouveau: make sure there's always room to emit a fence</li>
|
||||
</ul>
|
||||
|
||||
<p>Indrajit Das (1):</p>
|
||||
<ul>
|
||||
<li>st/va: Used correct parameter to derive the value of the "h" variable in vlVaCreateImage</li>
|
||||
</ul>
|
||||
|
||||
<p>Jonathan Gray (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: ensure RM is set</li>
|
||||
</ul>
|
||||
|
||||
<p>Krzysztof Sobiecki (1):</p>
|
||||
<ul>
|
||||
<li>st/fbo: use pipe_surface_release instead of pipe_surface_reference</li>
|
||||
</ul>
|
||||
|
||||
<p>Leo Liu (1):</p>
|
||||
<ul>
|
||||
<li>st/omx/dec/h264: fix field picture type 0 poc disorder</li>
|
||||
</ul>
|
||||
|
||||
<p>Marek Olšák (3):</p>
|
||||
<ul>
|
||||
<li>st/mesa: fix clip state dependencies</li>
|
||||
<li>radeonsi: fix a GS copy shader leak</li>
|
||||
<li>gallium: add PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT</li>
|
||||
</ul>
|
||||
|
||||
<p>Nicolai Hähnle (1):</p>
|
||||
<ul>
|
||||
<li>u_vbuf: fix vb slot assignment for translated buffers</li>
|
||||
</ul>
|
||||
|
||||
<p>Rob Clark (1):</p>
|
||||
<ul>
|
||||
<li>freedreno/a3xx: cache-flush is needed after MEM_WRITE</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (3):</p>
|
||||
<ul>
|
||||
<li>mesa: add GL_UNSIGNED_INT_24_8 to _mesa_pack_depth_span</li>
|
||||
<li>mesa: Set api prefix to version string when overriding version</li>
|
||||
<li>mesa: fix ARRAY_SIZE query for GetProgramResourceiv</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -312,6 +312,8 @@ dri2_add_config(_EGLDisplay *disp, const __DRIconfig *dri_config, int id,
|
||||
else
|
||||
conf->dri_single_config = dri_config;
|
||||
}
|
||||
|
||||
conf->base.SurfaceType = 0;
|
||||
conf->base.ConfigID = config_id;
|
||||
|
||||
_eglLinkConfig(&conf->base);
|
||||
@@ -2384,13 +2386,18 @@ dri2_client_wait_sync(_EGLDriver *drv, _EGLDisplay *dpy, _EGLSync *sync,
|
||||
unsigned wait_flags = 0;
|
||||
EGLint ret = EGL_CONDITION_SATISFIED_KHR;
|
||||
|
||||
if (flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
|
||||
/* The EGL_KHR_fence_sync spec states:
|
||||
*
|
||||
* "If no context is current for the bound API,
|
||||
* the EGL_SYNC_FLUSH_COMMANDS_BIT_KHR bit is ignored.
|
||||
*/
|
||||
if (dri2_ctx && flags & EGL_SYNC_FLUSH_COMMANDS_BIT_KHR)
|
||||
wait_flags |= __DRI2_FENCE_FLAG_FLUSH_COMMANDS;
|
||||
|
||||
/* the sync object should take a reference while waiting */
|
||||
dri2_egl_ref_sync(dri2_sync);
|
||||
|
||||
if (dri2_dpy->fence->client_wait_sync(dri2_ctx->dri_context,
|
||||
if (dri2_dpy->fence->client_wait_sync(dri2_ctx ? dri2_ctx->dri_context : NULL,
|
||||
dri2_sync->fence, wait_flags,
|
||||
timeout))
|
||||
dri2_sync->base.SyncStatus = EGL_SIGNALED_KHR;
|
||||
|
@@ -152,12 +152,51 @@ _eglParseContextAttribList(_EGLContext *ctx, _EGLDisplay *dpy,
|
||||
|
||||
/* The EGL_KHR_create_context spec says:
|
||||
*
|
||||
* "Flags are only defined for OpenGL context creation, and
|
||||
* specifying a flags value other than zero for other types of
|
||||
* contexts, including OpenGL ES contexts, will generate an
|
||||
* error."
|
||||
* "If the EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR flag bit is set in
|
||||
* EGL_CONTEXT_FLAGS_KHR, then a <debug context> will be created.
|
||||
* [...]
|
||||
* In some cases a debug context may be identical to a non-debug
|
||||
* context. This bit is supported for OpenGL and OpenGL ES
|
||||
* contexts."
|
||||
*/
|
||||
if (api != EGL_OPENGL_API && val != 0) {
|
||||
if ((val & EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR) &&
|
||||
(api != EGL_OPENGL_API && api != EGL_OPENGL_ES_API)) {
|
||||
err = EGL_BAD_ATTRIBUTE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The EGL_KHR_create_context spec says:
|
||||
*
|
||||
* "If the EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR flag bit
|
||||
* is set in EGL_CONTEXT_FLAGS_KHR, then a <forward-compatible>
|
||||
* context will be created. Forward-compatible contexts are
|
||||
* defined only for OpenGL versions 3.0 and later. They must not
|
||||
* support functionality marked as <deprecated> by that version of
|
||||
* the API, while a non-forward-compatible context must support
|
||||
* all functionality in that version, deprecated or not. This bit
|
||||
* is supported for OpenGL contexts, and requesting a
|
||||
* forward-compatible context for OpenGL versions less than 3.0
|
||||
* will generate an error."
|
||||
*/
|
||||
if ((val & EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE_BIT_KHR) &&
|
||||
(api != EGL_OPENGL_API || ctx->ClientMajorVersion < 3)) {
|
||||
err = EGL_BAD_ATTRIBUTE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* The EGL_KHR_create_context_spec says:
|
||||
*
|
||||
* "If the EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR bit is set in
|
||||
* EGL_CONTEXT_FLAGS_KHR, then a context supporting <robust buffer
|
||||
* access> will be created. Robust buffer access is defined in the
|
||||
* GL_ARB_robustness extension specification, and the resulting
|
||||
* context must also support either the GL_ARB_robustness
|
||||
* extension, or a version of OpenGL incorporating equivalent
|
||||
* functionality. This bit is supported for OpenGL contexts.
|
||||
*/
|
||||
if ((val & EGL_CONTEXT_OPENGL_ROBUST_ACCESS_BIT_KHR) &&
|
||||
(api != EGL_OPENGL_API ||
|
||||
!dpy->Extensions.EXT_create_context_robustness)) {
|
||||
err = EGL_BAD_ATTRIBUTE;
|
||||
break;
|
||||
}
|
||||
|
@@ -137,6 +137,8 @@ gallivm_get_shader_param(enum pipe_shader_cap param)
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* if we get here, we missed a shader cap above (and should have seen
|
||||
* a compiler warning.)
|
||||
|
@@ -81,6 +81,8 @@
|
||||
# pragma pop_macro("DEBUG")
|
||||
#endif
|
||||
|
||||
#include "c11/threads.h"
|
||||
#include "os/os_thread.h"
|
||||
#include "pipe/p_config.h"
|
||||
#include "util/u_debug.h"
|
||||
#include "util/u_cpu_detect.h"
|
||||
@@ -103,6 +105,33 @@ static LLVMEnsureMultithreaded lLVMEnsureMultithreaded;
|
||||
|
||||
}
|
||||
|
||||
static once_flag init_native_targets_once_flag;
|
||||
|
||||
static void init_native_targets()
|
||||
{
|
||||
// If we have a native target, initialize it to ensure it is linked in and
|
||||
// usable by the JIT.
|
||||
llvm::InitializeNativeTarget();
|
||||
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
|
||||
llvm::InitializeNativeTargetDisassembler();
|
||||
}
|
||||
|
||||
/**
|
||||
* The llvm target registry is not thread-safe, so drivers and state-trackers
|
||||
* that want to initialize targets should use the gallivm_init_llvm_targets()
|
||||
* function to safely initialize targets.
|
||||
*
|
||||
* LLVM targets should be initialized before the driver or state-tracker tries
|
||||
* to access the registry.
|
||||
*/
|
||||
extern "C" void
|
||||
gallivm_init_llvm_targets(void)
|
||||
{
|
||||
call_once(&init_native_targets_once_flag, init_native_targets);
|
||||
}
|
||||
|
||||
extern "C" void
|
||||
lp_set_target_options(void)
|
||||
{
|
||||
@@ -115,13 +144,7 @@ lp_set_target_options(void)
|
||||
llvm::DisablePrettyStackTrace = true;
|
||||
#endif
|
||||
|
||||
// If we have a native target, initialize it to ensure it is linked in and
|
||||
// usable by the JIT.
|
||||
llvm::InitializeNativeTarget();
|
||||
|
||||
llvm::InitializeNativeTargetAsmPrinter();
|
||||
|
||||
llvm::InitializeNativeTargetDisassembler();
|
||||
gallivm_init_llvm_targets();
|
||||
}
|
||||
|
||||
|
||||
|
@@ -41,6 +41,8 @@ extern "C" {
|
||||
|
||||
struct lp_generated_code;
|
||||
|
||||
extern void
|
||||
gallivm_init_llvm_targets(void);
|
||||
|
||||
extern void
|
||||
lp_set_target_options(void);
|
||||
|
@@ -463,6 +463,8 @@ tgsi_exec_get_shader_param(enum pipe_shader_cap param)
|
||||
case PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* if we get here, we missed a shader cap above (and should have seen
|
||||
* a compiler warning.)
|
||||
|
@@ -1190,6 +1190,8 @@ static void blitter_draw(struct blitter_context_priv *ctx,
|
||||
|
||||
u_upload_data(ctx->upload, 0, sizeof(ctx->vertices), ctx->vertices,
|
||||
&vb.buffer_offset, &vb.buffer);
|
||||
if (!vb.buffer)
|
||||
return;
|
||||
u_upload_unmap(ctx->upload);
|
||||
|
||||
pipe->set_vertex_buffers(pipe, ctx->base.vb_slot, 1, &vb);
|
||||
@@ -2089,6 +2091,9 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
|
||||
|
||||
u_upload_data(ctx->upload, 0, num_channels*4, clear_value,
|
||||
&vb.buffer_offset, &vb.buffer);
|
||||
if (!vb.buffer)
|
||||
goto out;
|
||||
|
||||
vb.stride = 0;
|
||||
|
||||
blitter_set_running_flag(ctx);
|
||||
@@ -2112,6 +2117,7 @@ void util_blitter_clear_buffer(struct blitter_context *blitter,
|
||||
|
||||
util_draw_arrays(pipe, PIPE_PRIM_POINTS, 0, size / 4);
|
||||
|
||||
out:
|
||||
blitter_restore_vertex_states(ctx);
|
||||
blitter_restore_render_cond(ctx);
|
||||
blitter_unset_running_flag(ctx);
|
||||
|
@@ -545,6 +545,7 @@ u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
|
||||
|
||||
index = ffs(unused_vb_mask) - 1;
|
||||
fallback_vbs[type] = index;
|
||||
unused_vb_mask &= ~(1 << index);
|
||||
/*printf("found slot=%i for type=%i\n", index, type);*/
|
||||
}
|
||||
}
|
||||
|
@@ -355,6 +355,10 @@ to be 0.
|
||||
are supported.
|
||||
* ``PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE``: Whether the driver doesn't
|
||||
ignore tgsi_declaration_range::Last for shader inputs and outputs.
|
||||
* ``PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT``: This is the maximum number
|
||||
of iterations that loops are allowed to have to be unrolled. It is only
|
||||
a hint to state trackers. Whether any loops will be unrolled is not
|
||||
guaranteed.
|
||||
|
||||
|
||||
.. _pipe_compute_cap:
|
||||
|
@@ -828,11 +828,7 @@ fd3_emit_restore(struct fd_context *ctx)
|
||||
OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) |
|
||||
A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0));
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
|
||||
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
|
||||
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
|
||||
A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
|
||||
A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
|
||||
fd3_emit_cache_flush(ctx, ring);
|
||||
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
|
||||
OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */
|
||||
|
@@ -90,4 +90,15 @@ void fd3_emit_restore(struct fd_context *ctx);
|
||||
|
||||
void fd3_emit_init(struct pipe_context *pctx);
|
||||
|
||||
static inline void
|
||||
fd3_emit_cache_flush(struct fd_context *ctx, struct fd_ringbuffer *ring)
|
||||
{
|
||||
fd_wfi(ctx, ring);
|
||||
OUT_PKT0(ring, REG_A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
|
||||
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE0_REG_ADDR(0));
|
||||
OUT_RING(ring, A3XX_UCHE_CACHE_INVALIDATE1_REG_ADDR(0) |
|
||||
A3XX_UCHE_CACHE_INVALIDATE1_REG_OPCODE(INVALIDATE) |
|
||||
A3XX_UCHE_CACHE_INVALIDATE1_REG_ENTIRE_CACHE);
|
||||
}
|
||||
|
||||
#endif /* FD3_EMIT_H */
|
||||
|
@@ -355,6 +355,8 @@ fd3_fs_output_format(enum pipe_format format)
|
||||
case PIPE_FORMAT_R16G16_FLOAT:
|
||||
case PIPE_FORMAT_R11G11B10_FLOAT:
|
||||
return RB_R16G16B16A16_FLOAT;
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
return RB_R8G8B8A8_UNORM;
|
||||
default:
|
||||
return fd3_pipe2color(format);
|
||||
}
|
||||
|
@@ -558,6 +558,8 @@ fd3_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
|
||||
OUT_RING(ring, fui(x1));
|
||||
OUT_RING(ring, fui(y1));
|
||||
|
||||
fd3_emit_cache_flush(ctx, ring);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1);
|
||||
OUT_RING(ring, A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY) |
|
||||
|
@@ -407,6 +407,8 @@ fd_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
return 16;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
debug_printf("unknown shader param %d\n", param);
|
||||
return 0;
|
||||
|
@@ -167,6 +167,8 @@ i915_get_shader_param(struct pipe_screen *screen, unsigned shader, enum pipe_sha
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
debug_printf("%s: Unknown cap %u.\n", __FUNCTION__, cap);
|
||||
return 0;
|
||||
|
@@ -138,6 +138,8 @@ ilo_get_shader_param(struct pipe_screen *screen, unsigned shader,
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
|
||||
default:
|
||||
return 0;
|
||||
|
@@ -25,10 +25,24 @@
|
||||
|
||||
#include <stack>
|
||||
#include <limits>
|
||||
#if __cplusplus >= 201103L
|
||||
#include <unordered_map>
|
||||
#else
|
||||
#include <tr1/unordered_map>
|
||||
#endif
|
||||
|
||||
namespace nv50_ir {
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
using std::hash;
|
||||
using std::unordered_map;
|
||||
#elif !defined(ANDROID)
|
||||
using std::tr1::hash;
|
||||
using std::tr1::unordered_map;
|
||||
#else
|
||||
#error Android release before Lollipop is not supported!
|
||||
#endif
|
||||
|
||||
#define MAX_REGISTER_FILE_SIZE 256
|
||||
|
||||
class RegisterSet
|
||||
@@ -349,12 +363,12 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
|
||||
|
||||
struct PhiMapHash {
|
||||
size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
|
||||
return std::tr1::hash<Instruction*>()(val.first) * 31 +
|
||||
std::tr1::hash<BasicBlock*>()(val.second);
|
||||
return hash<Instruction*>()(val.first) * 31 +
|
||||
hash<BasicBlock*>()(val.second);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::tr1::unordered_map<
|
||||
typedef unordered_map<
|
||||
std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
|
||||
|
||||
// Critical edges need to be split up so that work can be inserted along
|
||||
|
@@ -80,7 +80,12 @@ release_allocation(struct nouveau_mm_allocation **mm,
|
||||
inline void
|
||||
nouveau_buffer_release_gpu_storage(struct nv04_resource *buf)
|
||||
{
|
||||
nouveau_bo_ref(NULL, &buf->bo);
|
||||
if (buf->fence && buf->fence->state < NOUVEAU_FENCE_STATE_FLUSHED) {
|
||||
nouveau_fence_work(buf->fence, nouveau_fence_unref_bo, buf->bo);
|
||||
buf->bo = NULL;
|
||||
} else {
|
||||
nouveau_bo_ref(NULL, &buf->bo);
|
||||
}
|
||||
|
||||
if (buf->mm)
|
||||
release_allocation(&buf->mm, buf->fence);
|
||||
@@ -281,7 +286,8 @@ nouveau_buffer_transfer_del(struct nouveau_context *nv,
|
||||
{
|
||||
if (tx->map) {
|
||||
if (likely(tx->bo)) {
|
||||
nouveau_bo_ref(NULL, &tx->bo);
|
||||
nouveau_fence_work(nv->screen->fence.current,
|
||||
nouveau_fence_unref_bo, tx->bo);
|
||||
if (tx->mm)
|
||||
release_allocation(&tx->mm, nv->screen->fence.current);
|
||||
} else {
|
||||
@@ -782,7 +788,7 @@ nouveau_buffer_migrate(struct nouveau_context *nv,
|
||||
nv->copy_data(nv, buf->bo, buf->offset, new_domain,
|
||||
bo, offset, old_domain, buf->base.width0);
|
||||
|
||||
nouveau_bo_ref(NULL, &bo);
|
||||
nouveau_fence_work(screen->fence.current, nouveau_fence_unref_bo, bo);
|
||||
if (mm)
|
||||
release_allocation(&mm, screen->fence.current);
|
||||
} else
|
||||
|
@@ -190,8 +190,14 @@ nouveau_fence_wait(struct nouveau_fence *fence)
|
||||
/* wtf, someone is waiting on a fence in flush_notify handler? */
|
||||
assert(fence->state != NOUVEAU_FENCE_STATE_EMITTING);
|
||||
|
||||
if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
|
||||
nouveau_fence_emit(fence);
|
||||
if (fence->state < NOUVEAU_FENCE_STATE_EMITTED) {
|
||||
PUSH_SPACE(screen->pushbuf, 8);
|
||||
/* The space allocation might trigger a flush, which could emit the
|
||||
* current fence. So check again.
|
||||
*/
|
||||
if (fence->state < NOUVEAU_FENCE_STATE_EMITTED)
|
||||
nouveau_fence_emit(fence);
|
||||
}
|
||||
|
||||
if (fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
|
||||
if (nouveau_pushbuf_kick(screen->pushbuf, screen->pushbuf->channel))
|
||||
@@ -224,10 +230,22 @@ nouveau_fence_wait(struct nouveau_fence *fence)
|
||||
void
|
||||
nouveau_fence_next(struct nouveau_screen *screen)
|
||||
{
|
||||
if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING)
|
||||
nouveau_fence_emit(screen->fence.current);
|
||||
if (screen->fence.current->state < NOUVEAU_FENCE_STATE_EMITTING) {
|
||||
if (screen->fence.current->ref > 1)
|
||||
nouveau_fence_emit(screen->fence.current);
|
||||
else
|
||||
return;
|
||||
}
|
||||
|
||||
nouveau_fence_ref(NULL, &screen->fence.current);
|
||||
|
||||
nouveau_fence_new(screen, &screen->fence.current, false);
|
||||
}
|
||||
|
||||
void
|
||||
nouveau_fence_unref_bo(void *data)
|
||||
{
|
||||
struct nouveau_bo *bo = data;
|
||||
|
||||
nouveau_bo_ref(NULL, &bo);
|
||||
}
|
||||
|
@@ -37,6 +37,9 @@ void nouveau_fence_next(struct nouveau_screen *);
|
||||
bool nouveau_fence_wait(struct nouveau_fence *);
|
||||
bool nouveau_fence_signalled(struct nouveau_fence *);
|
||||
|
||||
void nouveau_fence_unref_bo(void *data); /* generic unref bo callback */
|
||||
|
||||
|
||||
static inline void
|
||||
nouveau_fence_ref(struct nouveau_fence *fence, struct nouveau_fence **ref)
|
||||
{
|
||||
|
@@ -24,6 +24,8 @@ PUSH_AVAIL(struct nouveau_pushbuf *push)
|
||||
static inline bool
|
||||
PUSH_SPACE(struct nouveau_pushbuf *push, uint32_t size)
|
||||
{
|
||||
/* Provide a buffer so that fences always have room to be emitted */
|
||||
size += 8;
|
||||
if (PUSH_AVAIL(push) < size)
|
||||
return nouveau_pushbuf_space(push, size, 0, 0) == 0;
|
||||
return true;
|
||||
|
@@ -78,12 +78,12 @@ nv30_format_info_table[PIPE_FORMAT_COUNT] = {
|
||||
_(B4G4R4X4_UNORM , S___),
|
||||
_(B4G4R4A4_UNORM , S___),
|
||||
_(B5G6R5_UNORM , SB__),
|
||||
_(B8G8R8X8_UNORM , SB__),
|
||||
_(B8G8R8X8_SRGB , S___),
|
||||
_(B8G8R8A8_UNORM , SB__),
|
||||
_(B8G8R8A8_SRGB , S___),
|
||||
_(BGRX8888_UNORM , SB__),
|
||||
_(BGRX8888_SRGB , S___),
|
||||
_(BGRA8888_UNORM , SB__),
|
||||
_(BGRA8888_SRGB , S___),
|
||||
_(R8G8B8A8_UNORM , __V_),
|
||||
_(R8G8B8A8_SNORM , S___),
|
||||
_(RGBA8888_SNORM , S___),
|
||||
_(DXT1_RGB , S___),
|
||||
_(DXT1_SRGB , S___),
|
||||
_(DXT1_RGBA , S___),
|
||||
@@ -138,8 +138,8 @@ const struct nv30_format
|
||||
nv30_format_table[PIPE_FORMAT_COUNT] = {
|
||||
R_(B5G5R5X1_UNORM , X1R5G5B5 ),
|
||||
R_(B5G6R5_UNORM , R5G6B5 ),
|
||||
R_(B8G8R8X8_UNORM , X8R8G8B8 ),
|
||||
R_(B8G8R8A8_UNORM , A8R8G8B8 ),
|
||||
R_(BGRX8888_UNORM , X8R8G8B8 ),
|
||||
R_(BGRA8888_UNORM , A8R8G8B8 ),
|
||||
Z_(Z16_UNORM , Z16 ),
|
||||
Z_(X8Z24_UNORM , Z24S8 ),
|
||||
Z_(S8_UINT_Z24_UNORM , Z24S8 ),
|
||||
@@ -223,11 +223,11 @@ nv30_texfmt_table[PIPE_FORMAT_COUNT] = {
|
||||
_(B4G4R4X4_UNORM , A4R4G4B4, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(B4G4R4A4_UNORM , A4R4G4B4, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
_(B5G6R5_UNORM , R5G6B5 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(B8G8R8X8_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(B8G8R8X8_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
|
||||
_(B8G8R8A8_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
_(B8G8R8A8_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
|
||||
_(R8G8B8A8_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
|
||||
_(BGRX8888_UNORM , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(BGRX8888_SRGB , A8R8G8B8, 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
|
||||
_(BGRA8888_UNORM , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
_(BGRA8888_SRGB , A8R8G8B8, 0, C, C, C, C, 2, 1, 0, 3, SRGB, ____),
|
||||
_(RGBA8888_SNORM , A8R8G8B8, 0, C, C, C, C, 0, 1, 2, 3, NONE, SSSS),
|
||||
_(DXT1_RGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, NONE, ____),
|
||||
_(DXT1_SRGB , DXT1 , 0, C, C, C, 1, 2, 1, 0, x, SRGB, ____),
|
||||
_(DXT1_RGBA , DXT1 , 0, C, C, C, C, 2, 1, 0, 3, NONE, ____),
|
||||
|
@@ -339,10 +339,15 @@ nv30_miptree_transfer_unmap(struct pipe_context *pipe,
|
||||
struct nv30_context *nv30 = nv30_context(pipe);
|
||||
struct nv30_transfer *tx = nv30_transfer(ptx);
|
||||
|
||||
if (ptx->usage & PIPE_TRANSFER_WRITE)
|
||||
if (ptx->usage & PIPE_TRANSFER_WRITE) {
|
||||
nv30_transfer_rect(nv30, NEAREST, &tx->tmp, &tx->img);
|
||||
|
||||
nouveau_bo_ref(NULL, &tx->tmp.bo);
|
||||
/* Allow the copies above to finish executing before freeing the source */
|
||||
nouveau_fence_work(nv30->screen->base.fence.current,
|
||||
nouveau_fence_unref_bo, tx->tmp.bo);
|
||||
} else {
|
||||
nouveau_bo_ref(NULL, &tx->tmp.bo);
|
||||
}
|
||||
pipe_resource_reference(&ptx->resource, NULL);
|
||||
FREE(tx);
|
||||
}
|
||||
|
@@ -261,6 +261,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
debug_printf("unknown vertex shader param %d\n", param);
|
||||
return 0;
|
||||
@@ -302,6 +304,8 @@ nv30_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
debug_printf("unknown fragment shader param %d\n", param);
|
||||
return 0;
|
||||
@@ -345,7 +349,9 @@ nv30_screen_fence_emit(struct pipe_screen *pscreen, uint32_t *sequence)
|
||||
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
BEGIN_NV04(push, NV30_3D(FENCE_OFFSET), 2);
|
||||
assert(PUSH_AVAIL(push) >= 3);
|
||||
PUSH_DATA (push, NV30_3D_FENCE_OFFSET |
|
||||
(2 /* size */ << 18) | (7 /* subchan */ << 13));
|
||||
PUSH_DATA (push, 0);
|
||||
PUSH_DATA (push, *sequence);
|
||||
}
|
||||
|
@@ -191,7 +191,11 @@ nv30_vbo_validate(struct nv30_context *nv30)
|
||||
if (!nv30->vertex || nv30->draw_flags)
|
||||
return;
|
||||
|
||||
#ifdef PIPE_ARCH_BIG_ENDIAN
|
||||
if (1) { /* Figure out where the buffers are getting messed up */
|
||||
#else
|
||||
if (unlikely(vertex->need_conversion)) {
|
||||
#endif
|
||||
nv30->vbo_fifo = ~0;
|
||||
nv30->vbo_user = 0;
|
||||
} else {
|
||||
|
@@ -1,3 +1,4 @@
|
||||
#include <strings.h>
|
||||
#include "pipe/p_context.h"
|
||||
#include "pipe/p_defines.h"
|
||||
#include "pipe/p_state.h"
|
||||
|
@@ -163,7 +163,10 @@ nv50_miptree_destroy(struct pipe_screen *pscreen, struct pipe_resource *pt)
|
||||
{
|
||||
struct nv50_miptree *mt = nv50_miptree(pt);
|
||||
|
||||
nouveau_bo_ref(NULL, &mt->base.bo);
|
||||
if (mt->base.fence && mt->base.fence->state < NOUVEAU_FENCE_STATE_FLUSHED)
|
||||
nouveau_fence_work(mt->base.fence, nouveau_fence_unref_bo, mt->base.bo);
|
||||
else
|
||||
nouveau_bo_ref(NULL, &mt->base.bo);
|
||||
|
||||
nouveau_fence_ref(NULL, &mt->base.fence);
|
||||
nouveau_fence_ref(NULL, &mt->base.fence_wr);
|
||||
|
@@ -100,7 +100,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_MAX_TEXEL_OFFSET:
|
||||
return 7;
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
return 65536;
|
||||
return 128 * 1024 * 1024;
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL:
|
||||
return 330;
|
||||
case PIPE_CAP_MAX_RENDER_TARGETS:
|
||||
@@ -297,6 +297,8 @@ nv50_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
|
||||
return 0;
|
||||
@@ -386,6 +388,7 @@ nv50_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
|
||||
/* we need to do it after possible flush in MARK_RING */
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
assert(PUSH_AVAIL(push) >= 5);
|
||||
PUSH_DATA (push, NV50_FIFO_PKHDR(NV50_3D(QUERY_ADDRESS_HIGH), 4));
|
||||
PUSH_DATAh(push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, screen->fence.bo->offset);
|
||||
|
@@ -65,14 +65,9 @@ nv50_constbufs_validate(struct nv50_context *nv50)
|
||||
PUSH_DATA (push, (b << 12) | (i << 8) | p | 1);
|
||||
}
|
||||
while (words) {
|
||||
unsigned nr;
|
||||
|
||||
if (!PUSH_SPACE(push, 16))
|
||||
break;
|
||||
nr = PUSH_AVAIL(push);
|
||||
assert(nr >= 16);
|
||||
nr = MIN2(MIN2(nr - 3, words), NV04_PFIFO_MAX_PACKET_LEN);
|
||||
unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
|
||||
|
||||
PUSH_SPACE(push, nr + 3);
|
||||
BEGIN_NV04(push, NV50_3D(CB_ADDR), 1);
|
||||
PUSH_DATA (push, (start << 8) | b);
|
||||
BEGIN_NI04(push, NV50_3D(CB_DATA(0)), nr);
|
||||
|
@@ -221,6 +221,26 @@ nv50_create_texture_view(struct pipe_context *pipe,
|
||||
return &view->pipe;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
|
||||
struct nv04_resource *res)
|
||||
{
|
||||
uint64_t address = res->address;
|
||||
if (res->base.target != PIPE_BUFFER)
|
||||
return;
|
||||
address += tic->pipe.u.buf.first_element *
|
||||
util_format_get_blocksize(tic->pipe.format);
|
||||
if (tic->tic[1] == (uint32_t)address &&
|
||||
(tic->tic[2] & 0xff) == address >> 32)
|
||||
return;
|
||||
|
||||
nv50_screen_tic_unlock(nv50->screen, tic);
|
||||
tic->id = -1;
|
||||
tic->tic[1] = address;
|
||||
tic->tic[2] &= 0xffffff00;
|
||||
tic->tic[2] |= address >> 32;
|
||||
}
|
||||
|
||||
static bool
|
||||
nv50_validate_tic(struct nv50_context *nv50, int s)
|
||||
{
|
||||
@@ -240,6 +260,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
|
||||
continue;
|
||||
}
|
||||
res = &nv50_miptree(tic->pipe.texture)->base;
|
||||
nv50_update_tic(nv50, tic, res);
|
||||
|
||||
if (tic->id < 0) {
|
||||
tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
|
||||
|
@@ -187,14 +187,7 @@ nv50_sifc_linear_u8(struct nouveau_context *nv,
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
while (count) {
|
||||
unsigned nr;
|
||||
|
||||
if (!PUSH_SPACE(push, 16))
|
||||
break;
|
||||
nr = PUSH_AVAIL(push);
|
||||
assert(nr >= 16);
|
||||
nr = MIN2(count, nr - 1);
|
||||
nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
|
||||
unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
|
||||
|
||||
BEGIN_NI04(push, NV50_2D(SIFC_DATA), nr);
|
||||
PUSH_DATAp(push, src, nr);
|
||||
@@ -365,9 +358,14 @@ nv50_miptree_transfer_unmap(struct pipe_context *pctx,
|
||||
tx->rect[0].base += mt->layer_stride;
|
||||
tx->rect[1].base += tx->nblocksy * tx->base.stride;
|
||||
}
|
||||
|
||||
/* Allow the copies above to finish executing before freeing the source */
|
||||
nouveau_fence_work(nv50->screen->base.fence.current,
|
||||
nouveau_fence_unref_bo, tx->rect[1].bo);
|
||||
} else {
|
||||
nouveau_bo_ref(NULL, &tx->rect[1].bo);
|
||||
}
|
||||
|
||||
nouveau_bo_ref(NULL, &tx->rect[1].bo);
|
||||
pipe_resource_reference(&transfer->resource, NULL);
|
||||
|
||||
FREE(tx);
|
||||
@@ -390,12 +388,9 @@ nv50_cb_push(struct nouveau_context *nv,
|
||||
nouveau_pushbuf_validate(push);
|
||||
|
||||
while (words) {
|
||||
unsigned nr;
|
||||
|
||||
nr = PUSH_AVAIL(push);
|
||||
nr = MIN2(nr - 7, words);
|
||||
nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
|
||||
unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN);
|
||||
|
||||
PUSH_SPACE(push, nr + 7);
|
||||
BEGIN_NV04(push, NV50_3D(CB_DEF_ADDRESS_HIGH), 3);
|
||||
PUSH_DATAh(push, bo->offset + base);
|
||||
PUSH_DATA (push, bo->offset + base);
|
||||
|
@@ -768,6 +768,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
bool tex_dirty = false;
|
||||
int i, s;
|
||||
|
||||
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
|
||||
@@ -797,6 +798,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
|
||||
push->kick_notify = nv50_draw_vbo_kick_notify;
|
||||
|
||||
/* TODO: Instead of iterating over all the buffer resources looking for
|
||||
* coherent buffers, keep track of a context-wide count.
|
||||
*/
|
||||
for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
|
||||
uint32_t valid = nv50->constbuf_valid[s];
|
||||
|
||||
@@ -824,6 +828,21 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
nv50->cb_dirty = false;
|
||||
}
|
||||
|
||||
for (s = 0; s < 3 && !tex_dirty; ++s) {
|
||||
for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
|
||||
if (!nv50->textures[s][i] ||
|
||||
nv50->textures[s][i]->texture->target != PIPE_BUFFER)
|
||||
continue;
|
||||
if (nv50->textures[s][i]->texture->flags &
|
||||
PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
tex_dirty = true;
|
||||
}
|
||||
}
|
||||
if (tex_dirty) {
|
||||
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
|
||||
PUSH_DATA (push, 0x20);
|
||||
}
|
||||
|
||||
if (nv50->vbo_fifo) {
|
||||
nv50_push_vbo(nv50, info);
|
||||
push->kick_notify = nv50_default_kick_notify;
|
||||
|
@@ -87,7 +87,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
|
||||
return 31;
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
return 65536;
|
||||
return 128 * 1024 * 1024;
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL:
|
||||
return 410;
|
||||
case PIPE_CAP_MAX_RENDER_TARGETS:
|
||||
@@ -310,6 +310,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
return 16; /* would be 32 in linked (OpenGL-style) mode */
|
||||
case PIPE_SHADER_CAP_MAX_SAMPLER_VIEWS:
|
||||
return 16; /* XXX not sure if more are really safe */
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
|
||||
return 0;
|
||||
@@ -535,7 +537,8 @@ nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
|
||||
/* we need to do it after possible flush in MARK_RING */
|
||||
*sequence = ++screen->base.fence.sequence;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
|
||||
assert(PUSH_AVAIL(push) >= 5);
|
||||
PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(NVC0_3D(QUERY_ADDRESS_HIGH), 4));
|
||||
PUSH_DATAh(push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, screen->fence.bo->offset);
|
||||
PUSH_DATA (push, *sequence);
|
||||
|
@@ -226,6 +226,26 @@ nvc0_create_texture_view(struct pipe_context *pipe,
|
||||
return &view->pipe;
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
|
||||
struct nv04_resource *res)
|
||||
{
|
||||
uint64_t address = res->address;
|
||||
if (res->base.target != PIPE_BUFFER)
|
||||
return;
|
||||
address += tic->pipe.u.buf.first_element *
|
||||
util_format_get_blocksize(tic->pipe.format);
|
||||
if (tic->tic[1] == (uint32_t)address &&
|
||||
(tic->tic[2] & 0xff) == address >> 32)
|
||||
return;
|
||||
|
||||
nvc0_screen_tic_unlock(nvc0->screen, tic);
|
||||
tic->id = -1;
|
||||
tic->tic[1] = address;
|
||||
tic->tic[2] &= 0xffffff00;
|
||||
tic->tic[2] |= address >> 32;
|
||||
}
|
||||
|
||||
static bool
|
||||
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
|
||||
{
|
||||
@@ -247,6 +267,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
|
||||
continue;
|
||||
}
|
||||
res = nv04_resource(tic->pipe.texture);
|
||||
nvc0_update_tic(nvc0, tic, res);
|
||||
|
||||
if (tic->id < 0) {
|
||||
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
|
||||
@@ -313,6 +334,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
|
||||
continue;
|
||||
}
|
||||
res = nv04_resource(tic->pipe.texture);
|
||||
nvc0_update_tic(nvc0, tic, res);
|
||||
|
||||
if (tic->id < 0) {
|
||||
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
|
||||
|
@@ -188,14 +188,10 @@ nvc0_m2mf_push_linear(struct nouveau_context *nv,
|
||||
nouveau_pushbuf_validate(push);
|
||||
|
||||
while (count) {
|
||||
unsigned nr;
|
||||
unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
|
||||
|
||||
if (!PUSH_SPACE(push, 16))
|
||||
if (!PUSH_SPACE(push, nr + 9))
|
||||
break;
|
||||
nr = PUSH_AVAIL(push);
|
||||
assert(nr >= 16);
|
||||
nr = MIN2(count, nr - 9);
|
||||
nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
|
||||
|
||||
BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
|
||||
PUSH_DATAh(push, dst->offset + offset);
|
||||
@@ -234,14 +230,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
|
||||
nouveau_pushbuf_validate(push);
|
||||
|
||||
while (count) {
|
||||
unsigned nr;
|
||||
unsigned nr = MIN2(count, (NV04_PFIFO_MAX_PACKET_LEN - 1));
|
||||
|
||||
if (!PUSH_SPACE(push, 16))
|
||||
if (!PUSH_SPACE(push, nr + 10))
|
||||
break;
|
||||
nr = PUSH_AVAIL(push);
|
||||
assert(nr >= 16);
|
||||
nr = MIN2(count, nr - 8);
|
||||
nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
|
||||
|
||||
BEGIN_NVC0(push, NVE4_P2MF(UPLOAD_DST_ADDRESS_HIGH), 2);
|
||||
PUSH_DATAh(push, dst->offset + offset);
|
||||
@@ -495,11 +487,16 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
|
||||
tx->rect[1].base += tx->nblocksy * tx->base.stride;
|
||||
}
|
||||
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
|
||||
|
||||
/* Allow the copies above to finish executing before freeing the source */
|
||||
nouveau_fence_work(nvc0->screen->base.fence.current,
|
||||
nouveau_fence_unref_bo, tx->rect[1].bo);
|
||||
} else {
|
||||
nouveau_bo_ref(NULL, &tx->rect[1].bo);
|
||||
}
|
||||
if (tx->base.usage & PIPE_TRANSFER_READ)
|
||||
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);
|
||||
|
||||
nouveau_bo_ref(NULL, &tx->rect[1].bo);
|
||||
pipe_resource_reference(&transfer->resource, NULL);
|
||||
|
||||
FREE(tx);
|
||||
@@ -566,9 +563,7 @@ nvc0_cb_bo_push(struct nouveau_context *nv,
|
||||
PUSH_DATA (push, bo->offset + base);
|
||||
|
||||
while (words) {
|
||||
unsigned nr = PUSH_AVAIL(push);
|
||||
nr = MIN2(nr, words);
|
||||
nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
|
||||
unsigned nr = MIN2(words, NV04_PFIFO_MAX_PACKET_LEN - 1);
|
||||
|
||||
PUSH_SPACE(push, nr + 2);
|
||||
PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
|
||||
|
@@ -899,6 +899,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
|
||||
push->kick_notify = nvc0_draw_vbo_kick_notify;
|
||||
|
||||
/* TODO: Instead of iterating over all the buffer resources looking for
|
||||
* coherent buffers, keep track of a context-wide count.
|
||||
*/
|
||||
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
|
||||
uint32_t valid = nvc0->constbuf_valid[s];
|
||||
|
||||
@@ -924,6 +927,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
nvc0->cb_dirty = false;
|
||||
}
|
||||
|
||||
for (s = 0; s < 5; ++s) {
|
||||
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
struct pipe_resource *res;
|
||||
if (!tic)
|
||||
continue;
|
||||
res = nvc0->textures[s][i]->texture;
|
||||
if (res->target != PIPE_BUFFER ||
|
||||
!(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
|
||||
continue;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
|
||||
PUSH_DATA (push, (tic->id << 4) | 1);
|
||||
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nvc0->state.vbo_mode) {
|
||||
nvc0_push_vbo(nvc0, info);
|
||||
push->kick_notify = nvc0_default_kick_notify;
|
||||
|
@@ -300,6 +300,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
}
|
||||
@@ -356,6 +358,8 @@ static int r300_get_shader_param(struct pipe_screen *pscreen, unsigned shader, e
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
}
|
||||
|
@@ -504,6 +504,12 @@ static int r600_get_shader_param(struct pipe_screen* pscreen, unsigned shader, e
|
||||
case PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
/* due to a bug in the shader compiler, some loops hang
|
||||
* if they are not unrolled, see:
|
||||
* https://bugs.freedesktop.org/show_bug.cgi?id=86720
|
||||
*/
|
||||
return 255;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@@ -305,12 +305,11 @@ static void *r600_buffer_transfer_map(struct pipe_context *ctx,
|
||||
data += box->x % R600_MAP_BUFFER_ALIGNMENT;
|
||||
return r600_buffer_get_transfer(ctx, resource, level, usage, box,
|
||||
ptransfer, data, staging, offset);
|
||||
} else {
|
||||
return NULL; /* error, shouldn't occur though */
|
||||
}
|
||||
} else {
|
||||
/* At this point, the buffer is always idle (we checked it above). */
|
||||
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||||
}
|
||||
/* At this point, the buffer is always idle (we checked it above). */
|
||||
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
|
||||
}
|
||||
/* Using a staging buffer in GTT for larger reads is much faster. */
|
||||
else if ((usage & PIPE_TRANSFER_READ) &&
|
||||
|
@@ -78,6 +78,9 @@ void r600_draw_rectangle(struct blitter_context *blitter,
|
||||
* I guess the 4th one is derived from the first 3.
|
||||
* The vertex specification should match u_blitter's vertex element state. */
|
||||
u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, &offset, &buf, (void**)&vb);
|
||||
if (!buf)
|
||||
return;
|
||||
|
||||
vb[0] = x1;
|
||||
vb[1] = y1;
|
||||
vb[2] = depth;
|
||||
|
@@ -989,6 +989,11 @@ static void *r600_texture_transfer_map(struct pipe_context *ctx,
|
||||
|
||||
if (usage & PIPE_TRANSFER_READ) {
|
||||
struct pipe_resource *temp = ctx->screen->resource_create(ctx->screen, &resource);
|
||||
if (!temp) {
|
||||
R600_ERR("failed to create a temporary depth texture\n");
|
||||
FREE(trans);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
r600_copy_region_with_blit(ctx, temp, 0, 0, 0, 0, texture, level, box);
|
||||
rctx->blit_decompress_depth(ctx, (struct r600_texture*)temp, staging_depth,
|
||||
|
@@ -25,6 +25,8 @@
|
||||
*/
|
||||
#include "radeon_llvm_emit.h"
|
||||
#include "radeon_elf_util.h"
|
||||
#include "c11/threads.h"
|
||||
#include "gallivm/lp_bld_misc.h"
|
||||
#include "util/u_memory.h"
|
||||
#include "pipe/p_shader_tokens.h"
|
||||
|
||||
@@ -86,30 +88,29 @@ void radeon_llvm_shader_type(LLVMValueRef F, unsigned type)
|
||||
|
||||
static void init_r600_target()
|
||||
{
|
||||
static unsigned initialized = 0;
|
||||
if (!initialized) {
|
||||
gallivm_init_llvm_targets();
|
||||
#if HAVE_LLVM < 0x0307
|
||||
LLVMInitializeR600TargetInfo();
|
||||
LLVMInitializeR600Target();
|
||||
LLVMInitializeR600TargetMC();
|
||||
LLVMInitializeR600AsmPrinter();
|
||||
LLVMInitializeR600TargetInfo();
|
||||
LLVMInitializeR600Target();
|
||||
LLVMInitializeR600TargetMC();
|
||||
LLVMInitializeR600AsmPrinter();
|
||||
#else
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
LLVMInitializeAMDGPUTargetInfo();
|
||||
LLVMInitializeAMDGPUTarget();
|
||||
LLVMInitializeAMDGPUTargetMC();
|
||||
LLVMInitializeAMDGPUAsmPrinter();
|
||||
|
||||
#endif
|
||||
initialized = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static once_flag init_r600_target_once_flag = ONCE_FLAG_INIT;
|
||||
|
||||
LLVMTargetRef radeon_llvm_get_r600_target(const char *triple)
|
||||
{
|
||||
LLVMTargetRef target = NULL;
|
||||
char *err_message = NULL;
|
||||
|
||||
init_r600_target();
|
||||
call_once(&init_r600_target_once_flag, init_r600_target);
|
||||
|
||||
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
|
||||
fprintf(stderr, "Cannot find target for triple %s ", triple);
|
||||
|
@@ -233,6 +233,9 @@ static void vui(struct rvce_encoder *enc)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!enc->pic.rate_ctrl.frame_rate_num)
|
||||
return;
|
||||
|
||||
RVCE_BEGIN(0x04000009); // vui
|
||||
RVCE_CS(0x00000000); //aspectRatioInfoPresentFlag
|
||||
RVCE_CS(0x00000000); //aspectRatioInfo.aspectRatioIdc
|
||||
|
@@ -468,7 +468,8 @@ void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuf
|
||||
|
||||
u_upload_alloc(sctx->b.uploader, 0, size, const_offset,
|
||||
(struct pipe_resource**)rbuffer, &tmp);
|
||||
util_memcpy_cpu_to_le32(tmp, ptr, size);
|
||||
if (rbuffer)
|
||||
util_memcpy_cpu_to_le32(tmp, ptr, size);
|
||||
}
|
||||
|
||||
static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint slot,
|
||||
@@ -500,6 +501,11 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
|
||||
si_upload_const_buffer(sctx,
|
||||
(struct r600_resource**)&buffer, input->user_buffer,
|
||||
input->buffer_size, &buffer_offset);
|
||||
if (!buffer) {
|
||||
/* Just unbind on failure. */
|
||||
si_set_constant_buffer(ctx, shader, slot, NULL);
|
||||
return;
|
||||
}
|
||||
va = r600_resource(buffer)->gpu_address + buffer_offset;
|
||||
} else {
|
||||
pipe_resource_reference(&buffer, input->buffer);
|
||||
|
@@ -170,6 +170,8 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
|
||||
if (sctx->b.chip_class == CIK) {
|
||||
sctx->null_const_buf.buffer = pipe_buffer_create(screen, PIPE_BIND_CONSTANT_BUFFER,
|
||||
PIPE_USAGE_DEFAULT, 16);
|
||||
if (!sctx->null_const_buf.buffer)
|
||||
goto fail;
|
||||
sctx->null_const_buf.buffer_size = sctx->null_const_buf.buffer->width0;
|
||||
|
||||
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
|
||||
@@ -487,6 +489,8 @@ static int si_get_shader_param(struct pipe_screen* pscreen, unsigned shader, enu
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 1;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
@@ -2300,7 +2300,7 @@ static void tex_fetch_args(
|
||||
lp_build_const_int32(gallivm,
|
||||
SI_FMASK_TEX_OFFSET), "");
|
||||
fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
|
||||
fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
|
||||
fmask_ptr = build_indexed_load_const(si_shader_ctx, fmask_ptr, ind_index);
|
||||
}
|
||||
} else {
|
||||
res_ptr = si_shader_ctx->resources[sampler_index];
|
||||
@@ -3829,11 +3829,14 @@ int si_shader_binary_read(struct si_screen *sscreen, struct si_shader *shader)
|
||||
{
|
||||
const struct radeon_shader_binary *binary = &shader->binary;
|
||||
unsigned i;
|
||||
int r;
|
||||
bool dump = r600_can_dump_shader(&sscreen->b,
|
||||
shader->selector ? shader->selector->tokens : NULL);
|
||||
|
||||
si_shader_binary_read_config(sscreen, shader, 0);
|
||||
si_shader_binary_upload(sscreen, shader);
|
||||
r = si_shader_binary_upload(sscreen, shader);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
if (dump) {
|
||||
if (!(sscreen->b.debug_flags & DBG_NO_ASM)) {
|
||||
@@ -4198,8 +4201,10 @@ out:
|
||||
|
||||
void si_shader_destroy(struct pipe_context *ctx, struct si_shader *shader)
|
||||
{
|
||||
if (shader->gs_copy_shader)
|
||||
if (shader->gs_copy_shader) {
|
||||
si_shader_destroy(ctx, shader->gs_copy_shader);
|
||||
FREE(shader->gs_copy_shader);
|
||||
}
|
||||
|
||||
if (shader->scratch_bo)
|
||||
r600_resource_reference(&shader->scratch_bo, NULL);
|
||||
|
@@ -274,7 +274,7 @@ si_create_sampler_view_custom(struct pipe_context *ctx,
|
||||
unsigned force_level);
|
||||
|
||||
/* si_state_shader.c */
|
||||
void si_update_shaders(struct si_context *sctx);
|
||||
bool si_update_shaders(struct si_context *sctx);
|
||||
void si_init_shader_functions(struct si_context *sctx);
|
||||
|
||||
/* si_state_draw.c */
|
||||
|
@@ -760,8 +760,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
else
|
||||
sctx->current_rast_prim = info->mode;
|
||||
|
||||
si_update_shaders(sctx);
|
||||
if (!si_upload_shader_descriptors(sctx))
|
||||
if (!si_update_shaders(sctx) ||
|
||||
!si_upload_shader_descriptors(sctx))
|
||||
return;
|
||||
|
||||
if (info->indexed) {
|
||||
@@ -783,6 +783,10 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
|
||||
u_upload_alloc(sctx->b.uploader, start_offset, count * 2,
|
||||
&out_offset, &out_buffer, &ptr);
|
||||
if (!out_buffer) {
|
||||
pipe_resource_reference(&ib.buffer, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
util_shorten_ubyte_elts_to_userptr(&sctx->b.b, &ib, 0,
|
||||
ib.offset + start_offset,
|
||||
@@ -803,6 +807,8 @@ void si_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info)
|
||||
u_upload_data(sctx->b.uploader, start_offset, count * ib.index_size,
|
||||
(char*)ib.user_buffer + start_offset,
|
||||
&ib.offset, &ib.buffer);
|
||||
if (!ib.buffer)
|
||||
return;
|
||||
/* info->start will be added by the drawing code */
|
||||
ib.offset -= start_offset;
|
||||
}
|
||||
|
@@ -665,8 +665,16 @@ static void *si_create_shader_state(struct pipe_context *ctx,
|
||||
struct si_shader_selector *sel = CALLOC_STRUCT(si_shader_selector);
|
||||
int i;
|
||||
|
||||
if (!sel)
|
||||
return NULL;
|
||||
|
||||
sel->type = pipe_shader_type;
|
||||
sel->tokens = tgsi_dup_tokens(state->tokens);
|
||||
if (!sel->tokens) {
|
||||
FREE(sel);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sel->so = state->stream_output;
|
||||
tgsi_scan_shader(state->tokens, &sel->info);
|
||||
p_atomic_inc(&sscreen->b.num_shaders_created);
|
||||
@@ -725,7 +733,12 @@ static void *si_create_shader_state(struct pipe_context *ctx,
|
||||
}
|
||||
|
||||
if (sscreen->b.debug_flags & DBG_PRECOMPILE)
|
||||
si_shader_select(ctx, sel);
|
||||
if (si_shader_select(ctx, sel)) {
|
||||
fprintf(stderr, "radeonsi: can't create a shader\n");
|
||||
tgsi_free_tokens(sel->tokens);
|
||||
FREE(sel);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return sel;
|
||||
}
|
||||
@@ -1031,11 +1044,23 @@ static void si_init_gs_rings(struct si_context *sctx)
|
||||
assert(!sctx->gs_rings);
|
||||
sctx->gs_rings = CALLOC_STRUCT(si_pm4_state);
|
||||
|
||||
if (!sctx->gs_rings)
|
||||
return;
|
||||
|
||||
sctx->esgs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT, esgs_ring_size);
|
||||
if (!sctx->esgs_ring) {
|
||||
FREE(sctx->gs_rings);
|
||||
return;
|
||||
}
|
||||
|
||||
sctx->gsvs_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT, gsvs_ring_size);
|
||||
if (!sctx->gsvs_ring) {
|
||||
pipe_resource_reference(&sctx->esgs_ring, NULL);
|
||||
FREE(sctx->gs_rings);
|
||||
return;
|
||||
}
|
||||
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
if (sctx->b.chip_class >= VI) {
|
||||
@@ -1094,14 +1119,16 @@ static void si_update_gs_rings(struct si_context *sctx)
|
||||
|
||||
}
|
||||
/**
|
||||
* @returns 1 if \p sel has been updated to use a new scratch buffer and 0
|
||||
* otherwise.
|
||||
* @returns 1 if \p sel has been updated to use a new scratch buffer
|
||||
* 0 if not
|
||||
* < 0 if there was a failure
|
||||
*/
|
||||
static unsigned si_update_scratch_buffer(struct si_context *sctx,
|
||||
static int si_update_scratch_buffer(struct si_context *sctx,
|
||||
struct si_shader_selector *sel)
|
||||
{
|
||||
struct si_shader *shader;
|
||||
uint64_t scratch_va = sctx->scratch_buffer->gpu_address;
|
||||
int r;
|
||||
|
||||
if (!sel)
|
||||
return 0;
|
||||
@@ -1122,7 +1149,9 @@ static unsigned si_update_scratch_buffer(struct si_context *sctx,
|
||||
si_shader_apply_scratch_relocs(sctx, shader, scratch_va);
|
||||
|
||||
/* Replace the shader bo with a new bo that has the relocs applied. */
|
||||
si_shader_binary_upload(sctx->screen, shader);
|
||||
r = si_shader_binary_upload(sctx->screen, shader);
|
||||
if (r)
|
||||
return r;
|
||||
|
||||
/* Update the shader state to use the new shader bo. */
|
||||
si_shader_init_pm4_state(shader);
|
||||
@@ -1161,7 +1190,7 @@ static unsigned si_get_max_scratch_bytes_per_wave(struct si_context *sctx)
|
||||
return bytes;
|
||||
}
|
||||
|
||||
static void si_update_spi_tmpring_size(struct si_context *sctx)
|
||||
static bool si_update_spi_tmpring_size(struct si_context *sctx)
|
||||
{
|
||||
unsigned current_scratch_buffer_size =
|
||||
si_get_current_scratch_buffer_size(sctx);
|
||||
@@ -1169,6 +1198,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
|
||||
si_get_max_scratch_bytes_per_wave(sctx);
|
||||
unsigned scratch_needed_size = scratch_bytes_per_wave *
|
||||
sctx->scratch_waves;
|
||||
int r;
|
||||
|
||||
if (scratch_needed_size > 0) {
|
||||
|
||||
@@ -1181,6 +1211,9 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
|
||||
sctx->scratch_buffer =
|
||||
si_resource_create_custom(&sctx->screen->b.b,
|
||||
PIPE_USAGE_DEFAULT, scratch_needed_size);
|
||||
if (!sctx->scratch_buffer)
|
||||
return false;
|
||||
sctx->emit_scratch_reloc = true;
|
||||
}
|
||||
|
||||
/* Update the shaders, so they are using the latest scratch. The
|
||||
@@ -1188,31 +1221,57 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
|
||||
* last used, so we still need to try to update them, even if
|
||||
* they require scratch buffers smaller than the current size.
|
||||
*/
|
||||
if (si_update_scratch_buffer(sctx, sctx->ps_shader))
|
||||
r = si_update_scratch_buffer(sctx, sctx->ps_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
|
||||
if (si_update_scratch_buffer(sctx, sctx->gs_shader))
|
||||
|
||||
r = si_update_scratch_buffer(sctx, sctx->gs_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
|
||||
if (si_update_scratch_buffer(sctx, sctx->tcs_shader))
|
||||
|
||||
r = si_update_scratch_buffer(sctx, sctx->tcs_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
|
||||
|
||||
/* VS can be bound as LS, ES, or VS. */
|
||||
if (sctx->tes_shader) {
|
||||
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
|
||||
r = si_update_scratch_buffer(sctx, sctx->vs_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
|
||||
} else if (sctx->gs_shader) {
|
||||
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
|
||||
r = si_update_scratch_buffer(sctx, sctx->vs_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
|
||||
} else {
|
||||
if (si_update_scratch_buffer(sctx, sctx->vs_shader))
|
||||
r = si_update_scratch_buffer(sctx, sctx->vs_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
|
||||
}
|
||||
|
||||
/* TES can be bound as ES or VS. */
|
||||
if (sctx->gs_shader) {
|
||||
if (si_update_scratch_buffer(sctx, sctx->tes_shader))
|
||||
r = si_update_scratch_buffer(sctx, sctx->tes_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
|
||||
} else {
|
||||
if (si_update_scratch_buffer(sctx, sctx->tes_shader))
|
||||
r = si_update_scratch_buffer(sctx, sctx->tes_shader);
|
||||
if (r < 0)
|
||||
return false;
|
||||
if (r == 1)
|
||||
si_pm4_bind_state(sctx, vs, sctx->tes_shader->current->pm4);
|
||||
}
|
||||
}
|
||||
@@ -1223,6 +1282,7 @@ static void si_update_spi_tmpring_size(struct si_context *sctx)
|
||||
|
||||
sctx->spi_tmpring_size = S_0286E8_WAVES(sctx->scratch_waves) |
|
||||
S_0286E8_WAVESIZE(scratch_bytes_per_wave >> 10);
|
||||
return true;
|
||||
}
|
||||
|
||||
static void si_init_tess_factor_ring(struct si_context *sctx)
|
||||
@@ -1230,11 +1290,20 @@ static void si_init_tess_factor_ring(struct si_context *sctx)
|
||||
assert(!sctx->tf_state);
|
||||
sctx->tf_state = CALLOC_STRUCT(si_pm4_state);
|
||||
|
||||
if (!sctx->tf_state)
|
||||
return;
|
||||
|
||||
sctx->tf_ring = pipe_buffer_create(sctx->b.b.screen, PIPE_BIND_CUSTOM,
|
||||
PIPE_USAGE_DEFAULT,
|
||||
32768 * sctx->screen->b.info.max_se);
|
||||
if (!sctx->tf_ring) {
|
||||
FREE(sctx->tf_state);
|
||||
return;
|
||||
}
|
||||
|
||||
sctx->b.clear_buffer(&sctx->b.b, sctx->tf_ring, 0,
|
||||
sctx->tf_ring->width0, fui(0), false);
|
||||
|
||||
assert(((sctx->tf_ring->width0 / 4) & C_030938_SIZE) == 0);
|
||||
|
||||
if (sctx->b.chip_class >= CIK) {
|
||||
@@ -1290,7 +1359,6 @@ static void si_generate_fixed_func_tcs(struct si_context *sctx)
|
||||
|
||||
sctx->fixed_func_tcs_shader =
|
||||
ureg_create_shader_and_destroy(ureg, &sctx->b.b);
|
||||
assert(sctx->fixed_func_tcs_shader);
|
||||
}
|
||||
|
||||
static void si_update_vgt_shader_config(struct si_context *sctx)
|
||||
@@ -1338,32 +1406,49 @@ static void si_update_so(struct si_context *sctx, struct si_shader_selector *sha
|
||||
sctx->b.streamout.stride_in_dw = shader->so.stride;
|
||||
}
|
||||
|
||||
void si_update_shaders(struct si_context *sctx)
|
||||
bool si_update_shaders(struct si_context *sctx)
|
||||
{
|
||||
struct pipe_context *ctx = (struct pipe_context*)sctx;
|
||||
struct si_state_rasterizer *rs = sctx->queued.named.rasterizer;
|
||||
int r;
|
||||
|
||||
/* Update stages before GS. */
|
||||
if (sctx->tes_shader) {
|
||||
if (!sctx->tf_state)
|
||||
if (!sctx->tf_state) {
|
||||
si_init_tess_factor_ring(sctx);
|
||||
if (!sctx->tf_state)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* VS as LS */
|
||||
si_shader_select(ctx, sctx->vs_shader);
|
||||
r = si_shader_select(ctx, sctx->vs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, ls, sctx->vs_shader->current->pm4);
|
||||
|
||||
if (sctx->tcs_shader) {
|
||||
si_shader_select(ctx, sctx->tcs_shader);
|
||||
r = si_shader_select(ctx, sctx->tcs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, hs, sctx->tcs_shader->current->pm4);
|
||||
} else {
|
||||
if (!sctx->fixed_func_tcs_shader)
|
||||
if (!sctx->fixed_func_tcs_shader) {
|
||||
si_generate_fixed_func_tcs(sctx);
|
||||
si_shader_select(ctx, sctx->fixed_func_tcs_shader);
|
||||
if (!sctx->fixed_func_tcs_shader)
|
||||
return false;
|
||||
}
|
||||
|
||||
r = si_shader_select(ctx, sctx->fixed_func_tcs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, hs,
|
||||
sctx->fixed_func_tcs_shader->current->pm4);
|
||||
}
|
||||
|
||||
si_shader_select(ctx, sctx->tes_shader);
|
||||
r = si_shader_select(ctx, sctx->tes_shader);
|
||||
if (r)
|
||||
return false;
|
||||
|
||||
if (sctx->gs_shader) {
|
||||
/* TES as ES */
|
||||
si_pm4_bind_state(sctx, es, sctx->tes_shader->current->pm4);
|
||||
@@ -1374,24 +1459,33 @@ void si_update_shaders(struct si_context *sctx)
|
||||
}
|
||||
} else if (sctx->gs_shader) {
|
||||
/* VS as ES */
|
||||
si_shader_select(ctx, sctx->vs_shader);
|
||||
r = si_shader_select(ctx, sctx->vs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, es, sctx->vs_shader->current->pm4);
|
||||
} else {
|
||||
/* VS as VS */
|
||||
si_shader_select(ctx, sctx->vs_shader);
|
||||
r = si_shader_select(ctx, sctx->vs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, vs, sctx->vs_shader->current->pm4);
|
||||
si_update_so(sctx, sctx->vs_shader);
|
||||
}
|
||||
|
||||
/* Update GS. */
|
||||
if (sctx->gs_shader) {
|
||||
si_shader_select(ctx, sctx->gs_shader);
|
||||
r = si_shader_select(ctx, sctx->gs_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, gs, sctx->gs_shader->current->pm4);
|
||||
si_pm4_bind_state(sctx, vs, sctx->gs_shader->current->gs_copy_shader->pm4);
|
||||
si_update_so(sctx, sctx->gs_shader);
|
||||
|
||||
if (!sctx->gs_rings)
|
||||
if (!sctx->gs_rings) {
|
||||
si_init_gs_rings(sctx);
|
||||
if (!sctx->gs_rings)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sctx->emitted.named.gs_rings != sctx->gs_rings)
|
||||
sctx->b.flags |= SI_CONTEXT_VGT_FLUSH;
|
||||
@@ -1406,18 +1500,9 @@ void si_update_shaders(struct si_context *sctx)
|
||||
|
||||
si_update_vgt_shader_config(sctx);
|
||||
|
||||
si_shader_select(ctx, sctx->ps_shader);
|
||||
|
||||
if (!sctx->ps_shader->current) {
|
||||
struct si_shader_selector *sel;
|
||||
|
||||
/* use a dummy shader if compiling the shader (variant) failed */
|
||||
si_make_dummy_ps(sctx);
|
||||
sel = sctx->dummy_pixel_shader;
|
||||
si_shader_select(ctx, sel);
|
||||
sctx->ps_shader->current = sel->current;
|
||||
}
|
||||
|
||||
r = si_shader_select(ctx, sctx->ps_shader);
|
||||
if (r)
|
||||
return false;
|
||||
si_pm4_bind_state(sctx, ps, sctx->ps_shader->current->pm4);
|
||||
|
||||
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
|
||||
@@ -1428,9 +1513,14 @@ void si_update_shaders(struct si_context *sctx)
|
||||
si_update_spi_map(sctx);
|
||||
}
|
||||
|
||||
if (si_pm4_state_changed(sctx, ps) || si_pm4_state_changed(sctx, vs) ||
|
||||
si_pm4_state_changed(sctx, gs)) {
|
||||
si_update_spi_tmpring_size(sctx);
|
||||
if (si_pm4_state_changed(sctx, ls) ||
|
||||
si_pm4_state_changed(sctx, hs) ||
|
||||
si_pm4_state_changed(sctx, es) ||
|
||||
si_pm4_state_changed(sctx, gs) ||
|
||||
si_pm4_state_changed(sctx, vs) ||
|
||||
si_pm4_state_changed(sctx, ps)) {
|
||||
if (!si_update_spi_tmpring_size(sctx))
|
||||
return false;
|
||||
}
|
||||
|
||||
if (sctx->ps_db_shader_control != sctx->ps_shader->current->db_shader_control) {
|
||||
@@ -1445,6 +1535,7 @@ void si_update_shaders(struct si_context *sctx)
|
||||
if (sctx->b.chip_class == SI)
|
||||
si_mark_atom_dirty(sctx, &sctx->db_render_state);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void si_init_shader_functions(struct si_context *sctx)
|
||||
|
@@ -383,6 +383,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* If we get here, we failed to handle a cap above */
|
||||
debug_printf("Unexpected fragment shader query %u\n", param);
|
||||
@@ -441,6 +443,8 @@ static int svga_get_shader_param(struct pipe_screen *screen, unsigned shader, en
|
||||
case PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED:
|
||||
case PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE:
|
||||
return 0;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
}
|
||||
/* If we get here, we failed to handle a cap above */
|
||||
debug_printf("Unexpected vertex shader query %u\n", param);
|
||||
|
@@ -334,6 +334,8 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
|
||||
return VC4_MAX_TEXTURE_SAMPLERS;
|
||||
case PIPE_SHADER_CAP_PREFERRED_IR:
|
||||
return PIPE_SHADER_IR_TGSI;
|
||||
case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
|
||||
return 32;
|
||||
default:
|
||||
fprintf(stderr, "unknown shader param %d\n", param);
|
||||
return 0;
|
||||
|
@@ -674,7 +674,8 @@ enum pipe_shader_cap
|
||||
PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED, /* all rounding modes */
|
||||
PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED,
|
||||
PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED,
|
||||
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE
|
||||
PIPE_SHADER_CAP_TGSI_ANY_INOUT_DECL_RANGE,
|
||||
PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@@ -188,10 +188,10 @@ dri2_drawable_get_buffers(struct dri_drawable *drawable,
|
||||
* may occur as the stvis->color_format.
|
||||
*/
|
||||
switch(format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
depth = 32;
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_BGRX8888_UNORM:
|
||||
depth = 24;
|
||||
break;
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
@@ -261,13 +261,13 @@ dri_image_drawable_get_buffers(struct dri_drawable *drawable,
|
||||
case PIPE_FORMAT_B5G6R5_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_RGB565;
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8X8_UNORM:
|
||||
case PIPE_FORMAT_BGRX8888_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_XRGB8888;
|
||||
break;
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_ARGB8888;
|
||||
break;
|
||||
case PIPE_FORMAT_R8G8B8A8_UNORM:
|
||||
case PIPE_FORMAT_RGBA8888_UNORM:
|
||||
image_format = __DRI_IMAGE_FORMAT_ABGR8888;
|
||||
break;
|
||||
default:
|
||||
@@ -314,10 +314,10 @@ dri2_allocate_buffer(__DRIscreen *sPriv,
|
||||
|
||||
switch (format) {
|
||||
case 32:
|
||||
pf = PIPE_FORMAT_B8G8R8A8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRA8888_UNORM;
|
||||
break;
|
||||
case 24:
|
||||
pf = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case 16:
|
||||
pf = PIPE_FORMAT_Z16_UNORM;
|
||||
@@ -724,13 +724,13 @@ dri2_create_image_from_winsys(__DRIscreen *_screen,
|
||||
pf = PIPE_FORMAT_B5G6R5_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_XRGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ARGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8A8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRA8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ABGR8888:
|
||||
pf = PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
pf = PIPE_FORMAT_RGBA8888_UNORM;
|
||||
break;
|
||||
default:
|
||||
pf = PIPE_FORMAT_NONE;
|
||||
@@ -845,13 +845,13 @@ dri2_create_image(__DRIscreen *_screen,
|
||||
pf = PIPE_FORMAT_B5G6R5_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_XRGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ARGB8888:
|
||||
pf = PIPE_FORMAT_B8G8R8A8_UNORM;
|
||||
pf = PIPE_FORMAT_BGRA8888_UNORM;
|
||||
break;
|
||||
case __DRI_IMAGE_FORMAT_ABGR8888:
|
||||
pf = PIPE_FORMAT_R8G8B8A8_UNORM;
|
||||
pf = PIPE_FORMAT_RGBA8888_UNORM;
|
||||
break;
|
||||
default:
|
||||
pf = PIPE_FORMAT_NONE;
|
||||
@@ -1293,6 +1293,7 @@ dri2_load_opencl_interop(struct dri_screen *screen)
|
||||
}
|
||||
|
||||
struct dri2_fence {
|
||||
struct dri_screen *driscreen;
|
||||
struct pipe_fence_handle *pipe_fence;
|
||||
void *cl_event;
|
||||
};
|
||||
@@ -1313,6 +1314,7 @@ dri2_create_fence(__DRIcontext *_ctx)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fence->driscreen = dri_screen(_ctx->driScreenPriv);
|
||||
return fence;
|
||||
}
|
||||
|
||||
@@ -1336,6 +1338,7 @@ dri2_get_fence_from_cl_event(__DRIscreen *_screen, intptr_t cl_event)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
fence->driscreen = driscreen;
|
||||
return fence;
|
||||
}
|
||||
|
||||
@@ -1360,9 +1363,9 @@ static GLboolean
|
||||
dri2_client_wait_sync(__DRIcontext *_ctx, void *_fence, unsigned flags,
|
||||
uint64_t timeout)
|
||||
{
|
||||
struct dri_screen *driscreen = dri_screen(_ctx->driScreenPriv);
|
||||
struct pipe_screen *screen = driscreen->base.screen;
|
||||
struct dri2_fence *fence = (struct dri2_fence*)_fence;
|
||||
struct dri_screen *driscreen = fence->driscreen;
|
||||
struct pipe_screen *screen = driscreen->base.screen;
|
||||
|
||||
/* No need to flush. The context was flushed when the fence was created. */
|
||||
|
||||
|
@@ -231,11 +231,11 @@ dri_set_tex_buffer2(__DRIcontext *pDRICtx, GLint target,
|
||||
if (format == __DRI_TEXTURE_FORMAT_RGB) {
|
||||
/* only need to cover the formats recognized by dri_fill_st_visual */
|
||||
switch (internal_format) {
|
||||
case PIPE_FORMAT_B8G8R8A8_UNORM:
|
||||
internal_format = PIPE_FORMAT_B8G8R8X8_UNORM;
|
||||
case PIPE_FORMAT_BGRA8888_UNORM:
|
||||
internal_format = PIPE_FORMAT_BGRX8888_UNORM;
|
||||
break;
|
||||
case PIPE_FORMAT_A8R8G8B8_UNORM:
|
||||
internal_format = PIPE_FORMAT_X8R8G8B8_UNORM;
|
||||
case PIPE_FORMAT_ARGB8888_UNORM:
|
||||
internal_format = PIPE_FORMAT_XRGB8888_UNORM;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@@ -753,10 +753,14 @@ static void slice_header(vid_dec_PrivateType *priv, struct vl_rbsp *rbsp,
|
||||
priv->codec_data.h264.delta_pic_order_cnt_bottom = delta_pic_order_cnt_bottom;
|
||||
}
|
||||
|
||||
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
if (!priv->picture.h264.field_pic_flag)
|
||||
priv->picture.h264.field_order_cnt[1] += priv->codec_data.h264.delta_pic_order_cnt_bottom;
|
||||
if (!priv->picture.h264.field_pic_flag) {
|
||||
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
priv->picture.h264.field_order_cnt[1] = priv->picture.h264.field_order_cnt [0] +
|
||||
priv->codec_data.h264.delta_pic_order_cnt_bottom;
|
||||
} else if (!priv->picture.h264.bottom_field_flag)
|
||||
priv->picture.h264.field_order_cnt[0] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
else
|
||||
priv->picture.h264.field_order_cnt[1] = pic_order_cnt_msb + pic_order_cnt_lsb;
|
||||
|
||||
} else if (sps->pic_order_cnt_type == 1) {
|
||||
unsigned MaxFrameNum = 1 << (sps->log2_max_frame_num_minus4 + 4);
|
||||
|
@@ -116,7 +116,7 @@ vlVaCreateImage(VADriverContextP ctx, VAImageFormat *format, int width, int heig
|
||||
img->width = width;
|
||||
img->height = height;
|
||||
w = align(width, 2);
|
||||
h = align(width, 2);
|
||||
h = align(height, 2);
|
||||
|
||||
switch (format->fourcc) {
|
||||
case VA_FOURCC('N','V','1','2'):
|
||||
|
@@ -35,7 +35,8 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
|
||||
-lclangEdit \
|
||||
-lclangLex \
|
||||
-lclangBasic \
|
||||
$(LLVM_LIBS)
|
||||
$(LLVM_LIBS) \
|
||||
$(PTHREAD_LIBS)
|
||||
|
||||
nodist_EXTRA_lib@OPENCL_LIBNAME@_la_SOURCES = dummy.cpp
|
||||
lib@OPENCL_LIBNAME@_la_SOURCES =
|
||||
|
@@ -706,14 +706,30 @@ gbm_dri_bo_import(struct gbm_device *gbm,
|
||||
{
|
||||
struct gbm_import_fd_data *fd_data = buffer;
|
||||
int stride = fd_data->stride, offset = 0;
|
||||
int dri_format;
|
||||
|
||||
switch (fd_data->format) {
|
||||
case GBM_BO_FORMAT_XRGB8888:
|
||||
dri_format = GBM_FORMAT_XRGB8888;
|
||||
break;
|
||||
case GBM_BO_FORMAT_ARGB8888:
|
||||
dri_format = GBM_FORMAT_ARGB8888;
|
||||
break;
|
||||
default:
|
||||
dri_format = fd_data->format;
|
||||
}
|
||||
|
||||
image = dri->image->createImageFromFds(dri->screen,
|
||||
fd_data->width,
|
||||
fd_data->height,
|
||||
fd_data->format,
|
||||
dri_format,
|
||||
&fd_data->fd, 1,
|
||||
&stride, &offset,
|
||||
NULL);
|
||||
if (image == NULL) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
gbm_format = fd_data->format;
|
||||
break;
|
||||
}
|
||||
|
@@ -62,6 +62,8 @@ public:
|
||||
virtual ir_rvalue *hir(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state);
|
||||
|
||||
virtual bool has_sequence_subexpression() const;
|
||||
|
||||
/**
|
||||
* Retrieve the source location of an AST node
|
||||
*
|
||||
@@ -221,6 +223,8 @@ public:
|
||||
virtual void hir_no_rvalue(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state);
|
||||
|
||||
virtual bool has_sequence_subexpression() const;
|
||||
|
||||
ir_rvalue *do_hir(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state,
|
||||
bool needs_rvalue);
|
||||
@@ -299,6 +303,8 @@ public:
|
||||
virtual void hir_no_rvalue(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state);
|
||||
|
||||
virtual bool has_sequence_subexpression() const;
|
||||
|
||||
private:
|
||||
/**
|
||||
* Is this function call actually a constructor?
|
||||
|
@@ -395,13 +395,54 @@ generate_call(exec_list *instructions, ir_function_signature *sig,
|
||||
}
|
||||
}
|
||||
|
||||
/* If the function call is a constant expression, don't generate any
|
||||
* instructions; just generate an ir_constant.
|
||||
/* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says:
|
||||
*
|
||||
* Function calls were first allowed to be constant expressions in GLSL
|
||||
* 1.20 and GLSL ES 3.00.
|
||||
* "Initializers for const declarations must be formed from literal
|
||||
* values, other const variables (not including function call
|
||||
* paramaters), or expressions of these.
|
||||
*
|
||||
* Constructors may be used in such expressions, but function calls may
|
||||
* not."
|
||||
*
|
||||
* Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says:
|
||||
*
|
||||
* "A constant expression is one of
|
||||
*
|
||||
* ...
|
||||
*
|
||||
* - a built-in function call whose arguments are all constant
|
||||
* expressions, with the exception of the texture lookup
|
||||
* functions, the noise functions, and ftransform. The built-in
|
||||
* functions dFdx, dFdy, and fwidth must return 0 when evaluated
|
||||
* inside an initializer with an argument that is a constant
|
||||
* expression."
|
||||
*
|
||||
* Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says:
|
||||
*
|
||||
* "A constant expression is one of
|
||||
*
|
||||
* ...
|
||||
*
|
||||
* - a built-in function call whose arguments are all constant
|
||||
* expressions, with the exception of the texture lookup
|
||||
* functions."
|
||||
*
|
||||
* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says:
|
||||
*
|
||||
* "A constant expression is one of
|
||||
*
|
||||
* ...
|
||||
*
|
||||
* - a built-in function call whose arguments are all constant
|
||||
* expressions, with the exception of the texture lookup
|
||||
* functions. The built-in functions dFdx, dFdy, and fwidth must
|
||||
* return 0 when evaluated inside an initializer with an argument
|
||||
* that is a constant expression."
|
||||
*
|
||||
* If the function call is a constant expression, don't generate any
|
||||
* instructions; just generate an ir_constant.
|
||||
*/
|
||||
if (state->is_version(120, 300)) {
|
||||
if (state->is_version(120, 100)) {
|
||||
ir_constant *value = sig->constant_expression_value(actual_parameters, NULL);
|
||||
if (value != NULL) {
|
||||
return value;
|
||||
@@ -1911,6 +1952,17 @@ ast_function_expression::hir(exec_list *instructions,
|
||||
unreachable("not reached");
|
||||
}
|
||||
|
||||
bool
|
||||
ast_function_expression::has_sequence_subexpression() const
|
||||
{
|
||||
foreach_list_typed(const ast_node, ast, link, &this->expressions) {
|
||||
if (ast->has_sequence_subexpression())
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ir_rvalue *
|
||||
ast_aggregate_initializer::hir(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state)
|
||||
|
@@ -939,6 +939,12 @@ ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool
|
||||
ast_node::has_sequence_subexpression() const
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
ast_function_expression::hir_no_rvalue(exec_list *instructions,
|
||||
struct _mesa_glsl_parse_state *state)
|
||||
@@ -1850,6 +1856,80 @@ ast_expression::do_hir(exec_list *instructions,
|
||||
return result;
|
||||
}
|
||||
|
||||
bool
|
||||
ast_expression::has_sequence_subexpression() const
|
||||
{
|
||||
switch (this->oper) {
|
||||
case ast_plus:
|
||||
case ast_neg:
|
||||
case ast_bit_not:
|
||||
case ast_logic_not:
|
||||
case ast_pre_inc:
|
||||
case ast_pre_dec:
|
||||
case ast_post_inc:
|
||||
case ast_post_dec:
|
||||
return this->subexpressions[0]->has_sequence_subexpression();
|
||||
|
||||
case ast_assign:
|
||||
case ast_add:
|
||||
case ast_sub:
|
||||
case ast_mul:
|
||||
case ast_div:
|
||||
case ast_mod:
|
||||
case ast_lshift:
|
||||
case ast_rshift:
|
||||
case ast_less:
|
||||
case ast_greater:
|
||||
case ast_lequal:
|
||||
case ast_gequal:
|
||||
case ast_nequal:
|
||||
case ast_equal:
|
||||
case ast_bit_and:
|
||||
case ast_bit_xor:
|
||||
case ast_bit_or:
|
||||
case ast_logic_and:
|
||||
case ast_logic_or:
|
||||
case ast_logic_xor:
|
||||
case ast_array_index:
|
||||
case ast_mul_assign:
|
||||
case ast_div_assign:
|
||||
case ast_add_assign:
|
||||
case ast_sub_assign:
|
||||
case ast_mod_assign:
|
||||
case ast_ls_assign:
|
||||
case ast_rs_assign:
|
||||
case ast_and_assign:
|
||||
case ast_xor_assign:
|
||||
case ast_or_assign:
|
||||
return this->subexpressions[0]->has_sequence_subexpression() ||
|
||||
this->subexpressions[1]->has_sequence_subexpression();
|
||||
|
||||
case ast_conditional:
|
||||
return this->subexpressions[0]->has_sequence_subexpression() ||
|
||||
this->subexpressions[1]->has_sequence_subexpression() ||
|
||||
this->subexpressions[2]->has_sequence_subexpression();
|
||||
|
||||
case ast_sequence:
|
||||
return true;
|
||||
|
||||
case ast_field_selection:
|
||||
case ast_identifier:
|
||||
case ast_int_constant:
|
||||
case ast_uint_constant:
|
||||
case ast_float_constant:
|
||||
case ast_bool_constant:
|
||||
case ast_double_constant:
|
||||
return false;
|
||||
|
||||
case ast_aggregate:
|
||||
unreachable("ast_aggregate: Should never get here.");
|
||||
|
||||
case ast_function_call:
|
||||
unreachable("should be handled by ast_function_expression::hir");
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
ir_rvalue *
|
||||
ast_expression_statement::hir(exec_list *instructions,
|
||||
@@ -3146,16 +3226,72 @@ process_initializer(ir_variable *var, ast_declaration *decl,
|
||||
|
||||
/* Calculate the constant value if this is a const or uniform
|
||||
* declaration.
|
||||
*
|
||||
* Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says:
|
||||
*
|
||||
* "Declarations of globals without a storage qualifier, or with
|
||||
* just the const qualifier, may include initializers, in which case
|
||||
* they will be initialized before the first line of main() is
|
||||
* executed. Such initializers must be a constant expression."
|
||||
*
|
||||
* The same section of the GLSL ES 3.00.4 spec has similar language.
|
||||
*/
|
||||
if (type->qualifier.flags.q.constant
|
||||
|| type->qualifier.flags.q.uniform) {
|
||||
|| type->qualifier.flags.q.uniform
|
||||
|| (state->es_shader && state->current_function == NULL)) {
|
||||
ir_rvalue *new_rhs = validate_assignment(state, initializer_loc,
|
||||
lhs, rhs, true);
|
||||
if (new_rhs != NULL) {
|
||||
rhs = new_rhs;
|
||||
|
||||
/* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec
|
||||
* says:
|
||||
*
|
||||
* "A constant expression is one of
|
||||
*
|
||||
* ...
|
||||
*
|
||||
* - an expression formed by an operator on operands that are
|
||||
* all constant expressions, including getting an element of
|
||||
* a constant array, or a field of a constant structure, or
|
||||
* components of a constant vector. However, the sequence
|
||||
* operator ( , ) and the assignment operators ( =, +=, ...)
|
||||
* are not included in the operators that can create a
|
||||
* constant expression."
|
||||
*
|
||||
* Section 12.43 (Sequence operator and constant expressions) says:
|
||||
*
|
||||
* "Should the following construct be allowed?
|
||||
*
|
||||
* float a[2,3];
|
||||
*
|
||||
* The expression within the brackets uses the sequence operator
|
||||
* (',') and returns the integer 3 so the construct is declaring
|
||||
* a single-dimensional array of size 3. In some languages, the
|
||||
* construct declares a two-dimensional array. It would be
|
||||
* preferable to make this construct illegal to avoid confusion.
|
||||
*
|
||||
* One possibility is to change the definition of the sequence
|
||||
* operator so that it does not return a constant-expression and
|
||||
* hence cannot be used to declare an array size.
|
||||
*
|
||||
* RESOLUTION: The result of a sequence operator is not a
|
||||
* constant-expression."
|
||||
*
|
||||
* Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec
|
||||
* contains language almost identical to the section 4.3.3 in the
|
||||
* GLSL ES 3.00.4 spec. This is a new limitation for these GLSL
|
||||
* versions.
|
||||
*/
|
||||
ir_constant *constant_value = rhs->constant_expression_value();
|
||||
if (!constant_value) {
|
||||
if (!constant_value ||
|
||||
(state->is_version(430, 300) &&
|
||||
decl->initializer->has_sequence_subexpression())) {
|
||||
const char *const variable_mode =
|
||||
(type->qualifier.flags.q.constant)
|
||||
? "const"
|
||||
: ((type->qualifier.flags.q.uniform) ? "uniform" : "global");
|
||||
|
||||
/* If ARB_shading_language_420pack is enabled, initializers of
|
||||
* const-qualified local variables do not have to be constant
|
||||
* expressions. Const-qualified global variables must still be
|
||||
@@ -3166,22 +3302,24 @@ process_initializer(ir_variable *var, ast_declaration *decl,
|
||||
_mesa_glsl_error(& initializer_loc, state,
|
||||
"initializer of %s variable `%s' must be a "
|
||||
"constant expression",
|
||||
(type->qualifier.flags.q.constant)
|
||||
? "const" : "uniform",
|
||||
variable_mode,
|
||||
decl->identifier);
|
||||
if (var->type->is_numeric()) {
|
||||
/* Reduce cascading errors. */
|
||||
var->constant_value = ir_constant::zero(state, var->type);
|
||||
var->constant_value = type->qualifier.flags.q.constant
|
||||
? ir_constant::zero(state, var->type) : NULL;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
rhs = constant_value;
|
||||
var->constant_value = constant_value;
|
||||
var->constant_value = type->qualifier.flags.q.constant
|
||||
? constant_value : NULL;
|
||||
}
|
||||
} else {
|
||||
if (var->type->is_numeric()) {
|
||||
/* Reduce cascading errors. */
|
||||
var->constant_value = ir_constant::zero(state, var->type);
|
||||
var->constant_value = type->qualifier.flags.q.constant
|
||||
? ir_constant::zero(state, var->type) : NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -673,14 +673,10 @@ builtin_variable_generator::generate_constants()
|
||||
if (!state->es_shader) {
|
||||
add_const("gl_MaxGeometryAtomicCounters",
|
||||
state->Const.MaxGeometryAtomicCounters);
|
||||
|
||||
if (state->is_version(400, 0) ||
|
||||
state->ARB_tessellation_shader_enable) {
|
||||
add_const("gl_MaxTessControlAtomicCounters",
|
||||
state->Const.MaxTessControlAtomicCounters);
|
||||
add_const("gl_MaxTessEvaluationAtomicCounters",
|
||||
state->Const.MaxTessEvaluationAtomicCounters);
|
||||
}
|
||||
add_const("gl_MaxTessControlAtomicCounters",
|
||||
state->Const.MaxTessControlAtomicCounters);
|
||||
add_const("gl_MaxTessEvaluationAtomicCounters",
|
||||
state->Const.MaxTessEvaluationAtomicCounters);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -326,9 +326,9 @@ link_set_uniform_initializers(struct gl_shader_program *prog,
|
||||
} else {
|
||||
assert(!"Explicit binding not on a sampler, UBO or atomic.");
|
||||
}
|
||||
} else if (var->constant_value) {
|
||||
} else if (var->constant_initializer) {
|
||||
linker::set_uniform_initializer(mem_ctx, prog, var->name,
|
||||
var->type, var->constant_value,
|
||||
var->type, var->constant_initializer,
|
||||
boolean_true);
|
||||
}
|
||||
}
|
||||
|
@@ -145,7 +145,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
|
||||
}
|
||||
}
|
||||
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
|
||||
{
|
||||
/* Copying an SSA definition makes no sense whatsoever. */
|
||||
assert(!src->is_ssa);
|
||||
@@ -155,17 +155,18 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
|
||||
dest->reg.base_offset = src->reg.base_offset;
|
||||
dest->reg.reg = src->reg.reg;
|
||||
if (src->reg.indirect) {
|
||||
dest->reg.indirect = ralloc(mem_ctx, nir_src);
|
||||
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
|
||||
dest->reg.indirect = ralloc(instr, nir_src);
|
||||
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
|
||||
} else {
|
||||
dest->reg.indirect = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
|
||||
nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
|
||||
nir_alu_instr *instr)
|
||||
{
|
||||
nir_src_copy(&dest->src, &src->src, mem_ctx);
|
||||
nir_src_copy(&dest->src, &src->src, &instr->instr);
|
||||
dest->abs = src->abs;
|
||||
dest->negate = src->negate;
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
@@ -173,9 +174,10 @@ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
|
||||
}
|
||||
|
||||
void
|
||||
nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
|
||||
nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
||||
nir_alu_instr *instr)
|
||||
{
|
||||
nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
|
||||
nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
|
||||
dest->write_mask = src->write_mask;
|
||||
dest->saturate = src->saturate;
|
||||
}
|
||||
@@ -1921,14 +1923,14 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
|
||||
nir_foreach_use_safe(def, use_src) {
|
||||
nir_instr *src_parent_instr = use_src->parent_instr;
|
||||
list_del(&use_src->use_link);
|
||||
nir_src_copy(use_src, &new_src, mem_ctx);
|
||||
nir_src_copy(use_src, &new_src, src_parent_instr);
|
||||
src_add_all_uses(use_src, src_parent_instr, NULL);
|
||||
}
|
||||
|
||||
nir_foreach_if_use_safe(def, use_src) {
|
||||
nir_if *src_parent_if = use_src->parent_if;
|
||||
list_del(&use_src->use_link);
|
||||
nir_src_copy(use_src, &new_src, mem_ctx);
|
||||
nir_src_copy(use_src, &new_src, src_parent_if);
|
||||
src_add_all_uses(use_src, NULL, src_parent_if);
|
||||
}
|
||||
}
|
||||
|
@@ -580,8 +580,8 @@ nir_dest_for_reg(nir_register *reg)
|
||||
return dest;
|
||||
}
|
||||
|
||||
void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
|
||||
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
|
||||
|
||||
typedef struct {
|
||||
nir_src src;
|
||||
@@ -630,10 +630,6 @@ typedef struct {
|
||||
unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
|
||||
} nir_alu_dest;
|
||||
|
||||
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
|
||||
void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
||||
void *mem_ctx);
|
||||
|
||||
typedef enum {
|
||||
nir_type_invalid = 0, /* Not a valid type */
|
||||
nir_type_float,
|
||||
@@ -702,6 +698,11 @@ typedef struct nir_alu_instr {
|
||||
nir_alu_src src[];
|
||||
} nir_alu_instr;
|
||||
|
||||
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
|
||||
nir_alu_instr *instr);
|
||||
void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
||||
nir_alu_instr *instr);
|
||||
|
||||
/* is this source channel used? */
|
||||
static inline bool
|
||||
nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
|
||||
|
@@ -561,7 +561,7 @@ emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
|
||||
assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
|
||||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
|
||||
nir_src_copy(&mov->src[0].src, &src, mem_ctx);
|
||||
nir_src_copy(&mov->src[0].src, &src, mov);
|
||||
mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
|
||||
mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
|
||||
|
||||
|
@@ -46,11 +46,11 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
|
||||
nir_alu_ssa_dest_init(chan, 1);
|
||||
nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
|
||||
nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
|
||||
chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
|
||||
if (nir_op_infos[chan_op].num_inputs > 1) {
|
||||
assert(nir_op_infos[chan_op].num_inputs == 2);
|
||||
nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
|
||||
nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
|
||||
chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
|
||||
unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
|
||||
0 : chan);
|
||||
|
||||
nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
|
||||
nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
|
||||
for (int j = 0; j < 4; j++)
|
||||
lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
|
||||
}
|
||||
|
@@ -91,7 +91,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
|
||||
nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
|
||||
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
|
||||
mul->dest.write_mask = 0x1;
|
||||
nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
|
||||
nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
|
||||
mul->src[1].src.is_ssa = true;
|
||||
mul->src[1].src.ssa = &atomic_counter_size->def;
|
||||
nir_instr_insert_before(&instr->instr, &mul->instr);
|
||||
|
@@ -376,7 +376,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
||||
|
||||
store->const_index[0] = offset;
|
||||
|
||||
nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
|
||||
nir_src_copy(&store->src[0], &intrin->src[0], store);
|
||||
|
||||
if (has_indirect)
|
||||
store->src[1] = indirect;
|
||||
|
@@ -183,8 +183,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
|
||||
nir_alu_instr *add = nir_alu_instr_create(state->shader,
|
||||
nir_op_iadd);
|
||||
add->src[0].src = *src.reg.indirect;
|
||||
nir_src_copy(&add->src[1].src, &deref_array->indirect,
|
||||
state->shader);
|
||||
nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
|
||||
add->dest.write_mask = 1;
|
||||
nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
|
||||
nir_instr_insert_before(instr, &add->instr);
|
||||
@@ -225,7 +224,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
|
||||
nir_src_for_ssa(&mov->dest.dest.ssa),
|
||||
state->shader);
|
||||
} else {
|
||||
nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader);
|
||||
nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
|
||||
}
|
||||
nir_instr_insert_before(&intrin->instr, &mov->instr);
|
||||
|
||||
@@ -241,7 +240,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
|
||||
&intrin->instr, state);
|
||||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
|
||||
nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader);
|
||||
nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
|
||||
mov->dest.write_mask = (1 << intrin->num_components) - 1;
|
||||
mov->dest.dest.is_ssa = false;
|
||||
mov->dest.dest.reg.reg = reg_src.reg.reg;
|
||||
|
@@ -60,8 +60,8 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
|
||||
assert(src_idx < nir_op_infos[vec->op].num_inputs);
|
||||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
|
||||
nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
|
||||
nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
|
||||
nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
|
||||
nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
|
||||
|
||||
mov->dest.write_mask = (1u << start_channel);
|
||||
mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
|
||||
|
@@ -216,8 +216,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
|
||||
for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
|
||||
ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
|
||||
}
|
||||
nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
|
||||
state->mem_ctx);
|
||||
nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma);
|
||||
|
||||
assert(add->dest.dest.is_ssa);
|
||||
|
||||
|
@@ -195,7 +195,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
|
||||
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
|
||||
nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
|
||||
nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
|
||||
/* Splat the condition to all channels */
|
||||
memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
|
||||
|
||||
@@ -205,7 +205,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
|
||||
assert(src->src.is_ssa);
|
||||
|
||||
unsigned idx = src->pred == then_block ? 1 : 2;
|
||||
nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
|
||||
nir_src_copy(&sel->src[idx].src, &src->src, sel);
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
|
||||
|
@@ -103,7 +103,7 @@ do_dead_code(exec_list *instructions, bool uniform_locations_assigned)
|
||||
*/
|
||||
if (entry->var->data.mode == ir_var_uniform ||
|
||||
entry->var->data.mode == ir_var_shader_storage) {
|
||||
if (uniform_locations_assigned || entry->var->constant_value)
|
||||
if (uniform_locations_assigned || entry->var->constant_initializer)
|
||||
continue;
|
||||
|
||||
/* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec
|
||||
|
@@ -46,6 +46,7 @@ AM_CFLAGS = \
|
||||
$(EXTRA_DEFINES_XF86VIDMODE) \
|
||||
-D_REENTRANT \
|
||||
-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
|
||||
-DGL_LIB_NAME=\"lib@GL_LIB@.so.1\" \
|
||||
$(DEFINES) \
|
||||
$(LIBDRM_CFLAGS) \
|
||||
$(DRI2PROTO_CFLAGS) \
|
||||
|
@@ -73,6 +73,10 @@ dri_message(int level, const char *f, ...)
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef GL_LIB_NAME
|
||||
#define GL_LIB_NAME "libGL.so.1"
|
||||
#endif
|
||||
|
||||
#ifndef DEFAULT_DRIVER_DIR
|
||||
/* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */
|
||||
#define DEFAULT_DRIVER_DIR "/usr/local/lib/dri"
|
||||
@@ -99,7 +103,7 @@ driOpenDriver(const char *driverName)
|
||||
int len;
|
||||
|
||||
/* Attempt to make sure libGL symbols will be visible to the driver */
|
||||
glhandle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL);
|
||||
glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL);
|
||||
|
||||
libPaths = NULL;
|
||||
if (geteuid() == getuid()) {
|
||||
|
@@ -2646,7 +2646,11 @@ _X_EXPORT void (*glXGetProcAddressARB(const GLubyte * procName)) (void)
|
||||
*/
|
||||
_X_EXPORT void (*glXGetProcAddress(const GLubyte * procName)) (void)
|
||||
#if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
|
||||
# if defined(USE_MGL_NAMESPACE)
|
||||
__attribute__ ((alias("mglXGetProcAddressARB")));
|
||||
# else
|
||||
__attribute__ ((alias("glXGetProcAddressARB")));
|
||||
# endif
|
||||
#else
|
||||
{
|
||||
return glXGetProcAddressARB(procName);
|
||||
|
@@ -281,11 +281,17 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char *name);
|
||||
# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func)
|
||||
#else
|
||||
# if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
|
||||
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
|
||||
/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
|
||||
* extra expansion means that the name mangling macros in glx_mangle.h will
|
||||
* apply before stringification, so the alias attribute will have a string like
|
||||
* "mglXFoo" instead of "glXFoo". */
|
||||
# define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
|
||||
return_type real_func proto_args \
|
||||
__attribute__ ((alias( # aliased_func ) ));
|
||||
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
|
||||
GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
|
||||
# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
|
||||
GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
|
||||
GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
|
||||
# else
|
||||
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
|
||||
return_type real_func proto_args \
|
||||
|
@@ -175,7 +175,7 @@ _glapi_get_stub(const char *name, int generate)
|
||||
const struct mapi_stub *stub;
|
||||
|
||||
#ifdef USE_MGL_NAMESPACE
|
||||
if (name)
|
||||
if (name && name[0] == 'm')
|
||||
name++;
|
||||
#endif
|
||||
|
||||
|
@@ -50,7 +50,7 @@ endif # MESA_ENABLE_ASM
|
||||
ifeq ($(ARCH_X86_HAVE_SSE4_1),true)
|
||||
LOCAL_SRC_FILES += \
|
||||
main/streaming-load-memcpy.c \
|
||||
mesa/main/sse_minmax.c
|
||||
main/sse_minmax.c
|
||||
LOCAL_CFLAGS := \
|
||||
-msse4.1 \
|
||||
-DUSE_SSE41
|
||||
|
@@ -71,9 +71,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
||||
char *sample_map_str = rzalloc_size(mem_ctx, 1);
|
||||
char *sample_map_expr = rzalloc_size(mem_ctx, 1);
|
||||
char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
|
||||
const char *vs_source;
|
||||
const char *sampler_array_suffix = "";
|
||||
const char *texcoord_type = "vec2";
|
||||
float y_scale;
|
||||
enum blit_msaa_shader shader_index;
|
||||
|
||||
@@ -99,7 +97,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
||||
shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE -
|
||||
BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE;
|
||||
sampler_array_suffix = "Array";
|
||||
texcoord_type = "vec3";
|
||||
}
|
||||
|
||||
if (blit->msaa_shaders[shader_index]) {
|
||||
@@ -150,28 +147,37 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
||||
" const int sample_map[%d] = int[%d](%s);\n",
|
||||
samples, samples, sample_map_str);
|
||||
|
||||
ralloc_asprintf_append(&texel_fetch_macro,
|
||||
"#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n",
|
||||
texcoord_type, sample_number);
|
||||
if (target == GL_TEXTURE_2D_MULTISAMPLE) {
|
||||
ralloc_asprintf_append(&texel_fetch_macro,
|
||||
"#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n",
|
||||
sample_number);
|
||||
} else {
|
||||
ralloc_asprintf_append(&texel_fetch_macro,
|
||||
"#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n",
|
||||
sample_number);
|
||||
}
|
||||
|
||||
vs_source = ralloc_asprintf(mem_ctx,
|
||||
static const char vs_source[] =
|
||||
"#version 130\n"
|
||||
"in vec2 position;\n"
|
||||
"in %s textureCoords;\n"
|
||||
"out %s texCoords;\n"
|
||||
"in vec3 textureCoords;\n"
|
||||
"out vec2 texCoords;\n"
|
||||
"flat out int layer;\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" texCoords = textureCoords;\n"
|
||||
" texCoords = textureCoords.xy;\n"
|
||||
" layer = int(textureCoords.z);\n"
|
||||
" gl_Position = vec4(position, 0.0, 1.0);\n"
|
||||
"}\n",
|
||||
texcoord_type,
|
||||
texcoord_type);
|
||||
"}\n"
|
||||
;
|
||||
|
||||
fs_source = ralloc_asprintf(mem_ctx,
|
||||
"#version 130\n"
|
||||
"#extension GL_ARB_texture_multisample : enable\n"
|
||||
"uniform sampler2DMS%s texSampler;\n"
|
||||
"uniform float src_width, src_height;\n"
|
||||
"in %s texCoords;\n"
|
||||
"in vec2 texCoords;\n"
|
||||
"flat in int layer;\n"
|
||||
"out vec4 out_color;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
@@ -212,7 +218,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx,
|
||||
" out_color = mix(x_0_color, x_1_color, interp.y);\n"
|
||||
"}\n",
|
||||
sampler_array_suffix,
|
||||
texcoord_type,
|
||||
sample_map_expr,
|
||||
y_scale,
|
||||
1.0f / y_scale,
|
||||
|
@@ -45,6 +45,24 @@
|
||||
#include "uniforms.h"
|
||||
#include "varray.h"
|
||||
|
||||
static bool
|
||||
need_signed_unsigned_int_conversion(mesa_format mesaFormat,
|
||||
GLenum format, GLenum type)
|
||||
{
|
||||
const GLenum mesaFormatType = _mesa_get_format_datatype(mesaFormat);
|
||||
const bool is_format_integer = _mesa_is_enum_format_integer(format);
|
||||
return (mesaFormatType == GL_INT &&
|
||||
is_format_integer &&
|
||||
(type == GL_UNSIGNED_INT ||
|
||||
type == GL_UNSIGNED_SHORT ||
|
||||
type == GL_UNSIGNED_BYTE)) ||
|
||||
(mesaFormatType == GL_UNSIGNED_INT &&
|
||||
is_format_integer &&
|
||||
(type == GL_INT ||
|
||||
type == GL_SHORT ||
|
||||
type == GL_BYTE));
|
||||
}
|
||||
|
||||
static struct gl_texture_image *
|
||||
create_texture_for_pbo(struct gl_context *ctx, bool create_pbo,
|
||||
GLenum pbo_target, int width, int height,
|
||||
@@ -166,6 +184,13 @@ _mesa_meta_pbo_TexSubImage(struct gl_context *ctx, GLuint dims,
|
||||
if (ctx->_ImageTransferState)
|
||||
return false;
|
||||
|
||||
/* This function rely on BlitFramebuffer to fill in the pixel data for
|
||||
* glTex[Sub]Image*D. But, BlitFrameBuffer doesn't support signed to
|
||||
* unsigned or unsigned to signed integer conversions.
|
||||
*/
|
||||
if (need_signed_unsigned_int_conversion(tex_image->TexFormat, format, type))
|
||||
return false;
|
||||
|
||||
/* For arrays, use a tall (height * depth) 2D texture but taking into
|
||||
* account the inter-image padding specified with the image height packing
|
||||
* property.
|
||||
@@ -250,24 +275,6 @@ fail:
|
||||
return success;
|
||||
}
|
||||
|
||||
static bool
|
||||
need_signed_unsigned_int_conversion(mesa_format rbFormat,
|
||||
GLenum format, GLenum type)
|
||||
{
|
||||
const GLenum srcType = _mesa_get_format_datatype(rbFormat);
|
||||
const bool is_dst_format_integer = _mesa_is_enum_format_integer(format);
|
||||
return (srcType == GL_INT &&
|
||||
is_dst_format_integer &&
|
||||
(type == GL_UNSIGNED_INT ||
|
||||
type == GL_UNSIGNED_SHORT ||
|
||||
type == GL_UNSIGNED_BYTE)) ||
|
||||
(srcType == GL_UNSIGNED_INT &&
|
||||
is_dst_format_integer &&
|
||||
(type == GL_INT ||
|
||||
type == GL_SHORT ||
|
||||
type == GL_BYTE));
|
||||
}
|
||||
|
||||
bool
|
||||
_mesa_meta_pbo_GetTexSubImage(struct gl_context *ctx, GLuint dims,
|
||||
struct gl_texture_image *tex_image,
|
||||
|
@@ -42,10 +42,10 @@
|
||||
#define I830_UPLOAD_STIPPLE 0x4
|
||||
#define I830_UPLOAD_INVARIENT 0x8
|
||||
#define I830_UPLOAD_RASTER_RULES 0x10
|
||||
#define I830_UPLOAD_TEX(i) (0x10<<(i))
|
||||
#define I830_UPLOAD_TEXBLEND(i) (0x100<<(i))
|
||||
#define I830_UPLOAD_TEX_ALL (0x0f0)
|
||||
#define I830_UPLOAD_TEXBLEND_ALL (0xf00)
|
||||
#define I830_UPLOAD_TEX(i) (0x0100<<(i))
|
||||
#define I830_UPLOAD_TEXBLEND(i) (0x1000<<(i))
|
||||
#define I830_UPLOAD_TEX_ALL (0x0f00)
|
||||
#define I830_UPLOAD_TEXBLEND_ALL (0xf000)
|
||||
|
||||
/* State structure offsets - these will probably disappear.
|
||||
*/
|
||||
|
@@ -115,6 +115,8 @@ enum {
|
||||
I915_RASTER_RULES_SETUP_SIZE,
|
||||
};
|
||||
|
||||
#define I915_TEX_UNITS 8
|
||||
|
||||
#define I915_MAX_CONSTANT 32
|
||||
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
|
||||
|
||||
@@ -194,7 +196,8 @@ struct i915_fragment_program
|
||||
|
||||
/* Helpers for i915_fragprog.c:
|
||||
*/
|
||||
GLuint wpos_tex;
|
||||
uint8_t texcoord_mapping[I915_TEX_UNITS];
|
||||
uint8_t wpos_tex;
|
||||
bool depth_written;
|
||||
|
||||
struct
|
||||
@@ -205,15 +208,6 @@ struct i915_fragment_program
|
||||
GLuint nr_params;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#define I915_TEX_UNITS 8
|
||||
|
||||
|
||||
struct i915_hw_state
|
||||
{
|
||||
GLuint Ctx[I915_CTX_SETUP_SIZE];
|
||||
|
@@ -72,6 +72,22 @@ static const GLfloat cos_constants[4] = { 1.0,
|
||||
-1.0 / (6 * 5 * 4 * 3 * 2 * 1)
|
||||
};
|
||||
|
||||
/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
|
||||
#define TEXCOORD_TEX (0<<7)
|
||||
#define TEXCOORD_VAR (1<<7)
|
||||
|
||||
static unsigned
|
||||
get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
|
||||
{
|
||||
for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (p->texcoord_mapping[i] == texcoord)
|
||||
return i;
|
||||
}
|
||||
|
||||
/* blah */
|
||||
return p->ctx->Const.MaxTextureCoordUnits - 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieve a ureg for the given source register. Will emit
|
||||
* constants, apply swizzling and negation as needed.
|
||||
@@ -82,6 +98,7 @@ src_vector(struct i915_fragment_program *p,
|
||||
const struct gl_fragment_program *program)
|
||||
{
|
||||
GLuint src;
|
||||
unsigned unit;
|
||||
|
||||
switch (source->File) {
|
||||
|
||||
@@ -119,8 +136,10 @@ src_vector(struct i915_fragment_program *p,
|
||||
case VARYING_SLOT_TEX5:
|
||||
case VARYING_SLOT_TEX6:
|
||||
case VARYING_SLOT_TEX7:
|
||||
unit = get_texcoord_mapping(p, (source->Index -
|
||||
VARYING_SLOT_TEX0) | TEXCOORD_TEX);
|
||||
src = i915_emit_decl(p, REG_TYPE_T,
|
||||
T_TEX0 + (source->Index - VARYING_SLOT_TEX0),
|
||||
T_TEX0 + unit,
|
||||
D0_CHANNEL_ALL);
|
||||
break;
|
||||
|
||||
@@ -132,8 +151,10 @@ src_vector(struct i915_fragment_program *p,
|
||||
case VARYING_SLOT_VAR0 + 5:
|
||||
case VARYING_SLOT_VAR0 + 6:
|
||||
case VARYING_SLOT_VAR0 + 7:
|
||||
unit = get_texcoord_mapping(p, (source->Index -
|
||||
VARYING_SLOT_VAR0) | TEXCOORD_VAR);
|
||||
src = i915_emit_decl(p, REG_TYPE_T,
|
||||
T_TEX0 + (source->Index - VARYING_SLOT_VAR0),
|
||||
T_TEX0 + unit,
|
||||
D0_CHANNEL_ALL);
|
||||
break;
|
||||
|
||||
@@ -1176,27 +1197,54 @@ fixup_depth_write(struct i915_fragment_program *p)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
check_texcoord_mapping(struct i915_fragment_program *p)
|
||||
{
|
||||
GLbitfield64 inputs = p->FragProg.Base.InputsRead;
|
||||
unsigned unit = 0;
|
||||
|
||||
for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (inputs & VARYING_BIT_TEX(i)) {
|
||||
if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
|
||||
unit++;
|
||||
break;
|
||||
}
|
||||
p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
|
||||
}
|
||||
if (inputs & VARYING_BIT_VAR(i)) {
|
||||
if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
|
||||
unit++;
|
||||
break;
|
||||
}
|
||||
p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
|
||||
}
|
||||
}
|
||||
|
||||
if (unit > p->ctx->Const.MaxTextureCoordUnits)
|
||||
i915_program_error(p, "Too many texcoord units");
|
||||
}
|
||||
|
||||
static void
|
||||
check_wpos(struct i915_fragment_program *p)
|
||||
{
|
||||
GLbitfield64 inputs = p->FragProg.Base.InputsRead;
|
||||
GLint i;
|
||||
unsigned unit = 0;
|
||||
|
||||
p->wpos_tex = -1;
|
||||
|
||||
if ((inputs & VARYING_BIT_POS) == 0)
|
||||
return;
|
||||
|
||||
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i)))
|
||||
continue;
|
||||
else if (inputs & VARYING_BIT_POS) {
|
||||
p->wpos_tex = i;
|
||||
inputs &= ~VARYING_BIT_POS;
|
||||
}
|
||||
unit += !!(inputs & VARYING_BIT_TEX(i));
|
||||
unit += !!(inputs & VARYING_BIT_VAR(i));
|
||||
}
|
||||
|
||||
if (inputs & VARYING_BIT_POS) {
|
||||
if (unit < p->ctx->Const.MaxTextureCoordUnits)
|
||||
p->wpos_tex = unit;
|
||||
else
|
||||
i915_program_error(p, "No free texcoord for wpos value");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1212,6 +1260,7 @@ translate_program(struct i915_fragment_program *p)
|
||||
}
|
||||
|
||||
i915_init_program(i915, p);
|
||||
check_texcoord_mapping(p);
|
||||
check_wpos(p);
|
||||
upload_program(p);
|
||||
fixup_depth_write(p);
|
||||
@@ -1420,22 +1469,24 @@ i915ValidateFragmentProgram(struct i915_context *i915)
|
||||
|
||||
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
|
||||
if (inputsRead & VARYING_BIT_TEX(i)) {
|
||||
int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
|
||||
int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
|
||||
|
||||
s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
|
||||
s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
|
||||
|
||||
EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
|
||||
}
|
||||
else if (inputsRead & VARYING_BIT_VAR(i)) {
|
||||
if (inputsRead & VARYING_BIT_VAR(i)) {
|
||||
int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
|
||||
int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
|
||||
|
||||
s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
|
||||
s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
|
||||
s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
|
||||
|
||||
EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
|
||||
}
|
||||
else if (i == p->wpos_tex) {
|
||||
if (i == p->wpos_tex) {
|
||||
int wpos_size = 4 * sizeof(float);
|
||||
/* If WPOS is required, duplicate the XYZ position data in an
|
||||
* unused texture coordinate:
|
||||
|
@@ -658,6 +658,11 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx,
|
||||
{
|
||||
struct intel_context *intel = intel_context(ctx);
|
||||
|
||||
/* Sync up the state of window system buffers. We need to do this before
|
||||
* we go looking for the buffers.
|
||||
*/
|
||||
intel_prepare_render(intel);
|
||||
|
||||
if (mask & GL_COLOR_BUFFER_BIT) {
|
||||
GLint i;
|
||||
struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
|
||||
|
@@ -251,7 +251,7 @@ intel_run_render(struct gl_context * ctx, struct tnl_pipeline_stage *stage)
|
||||
continue;
|
||||
|
||||
intel_render_tab_verts[prim & PRIM_MODE_MASK] (ctx, start,
|
||||
start + length, prim);
|
||||
length, prim);
|
||||
}
|
||||
|
||||
tnl->Driver.Render.Finish(ctx);
|
||||
|
@@ -1412,7 +1412,6 @@ intel_process_dri2_buffer(struct brw_context *brw,
|
||||
buffer->cpp, buffer->pitch);
|
||||
}
|
||||
|
||||
intel_miptree_release(&rb->mt);
|
||||
bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
|
||||
buffer->name);
|
||||
if (!bo) {
|
||||
|
@@ -1556,7 +1556,10 @@ fs_visitor::assign_vs_urb_setup()
|
||||
|
||||
inst->src[i].file = HW_REG;
|
||||
inst->src[i].fixed_hw_reg =
|
||||
retype(brw_vec8_grf(grf, 0), inst->src[i].type);
|
||||
stride(byte_offset(retype(brw_vec8_grf(grf, 0), inst->src[i].type),
|
||||
inst->src[i].subreg_offset),
|
||||
inst->exec_size * inst->src[i].stride,
|
||||
inst->exec_size, inst->src[i].stride);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -312,13 +312,43 @@ namespace {
|
||||
}
|
||||
|
||||
namespace image_validity {
|
||||
/**
|
||||
* Check whether the bound image is suitable for untyped access.
|
||||
*/
|
||||
brw_predicate
|
||||
emit_untyped_image_check(const fs_builder &bld, const fs_reg &image,
|
||||
brw_predicate pred)
|
||||
{
|
||||
const brw_device_info *devinfo = bld.shader->devinfo;
|
||||
const fs_reg stride = offset(image, bld, BRW_IMAGE_PARAM_STRIDE_OFFSET);
|
||||
|
||||
if (devinfo->gen == 7 && !devinfo->is_haswell) {
|
||||
/* Check whether the first stride component (i.e. the Bpp value)
|
||||
* is greater than four, what on Gen7 indicates that a surface of
|
||||
* type RAW has been bound for untyped access. Reading or writing
|
||||
* to a surface of type other than RAW using untyped surface
|
||||
* messages causes a hang on IVB and VLV.
|
||||
*/
|
||||
set_predicate(pred,
|
||||
bld.CMP(bld.null_reg_ud(), stride, fs_reg(4),
|
||||
BRW_CONDITIONAL_G));
|
||||
|
||||
return BRW_PREDICATE_NORMAL;
|
||||
} else {
|
||||
/* More recent generations handle the format mismatch
|
||||
* gracefully.
|
||||
*/
|
||||
return pred;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether there is an image bound at the given index and write
|
||||
* the comparison result to f0.0. Returns an appropriate predication
|
||||
* mode to use on subsequent image operations.
|
||||
*/
|
||||
brw_predicate
|
||||
emit_surface_check(const fs_builder &bld, const fs_reg &image)
|
||||
emit_typed_atomic_check(const fs_builder &bld, const fs_reg &image)
|
||||
{
|
||||
const brw_device_info *devinfo = bld.shader->devinfo;
|
||||
const fs_reg size = offset(image, bld, BRW_IMAGE_PARAM_SIZE_OFFSET);
|
||||
@@ -895,7 +925,9 @@ namespace brw {
|
||||
* surface read on the result,
|
||||
*/
|
||||
const brw_predicate pred =
|
||||
emit_bounds_check(bld, image, saddr, dims);
|
||||
emit_untyped_image_check(bld, image,
|
||||
emit_bounds_check(bld, image,
|
||||
saddr, dims));
|
||||
|
||||
/* and they don't know about surface coordinates, we need to
|
||||
* convert them to a raw memory offset.
|
||||
@@ -1041,7 +1073,9 @@ namespace brw {
|
||||
* the surface write on the result,
|
||||
*/
|
||||
const brw_predicate pred =
|
||||
emit_bounds_check(bld, image, saddr, dims);
|
||||
emit_untyped_image_check(bld, image,
|
||||
emit_bounds_check(bld, image,
|
||||
saddr, dims));
|
||||
|
||||
/* and, phew, they don't know about surface coordinates, we
|
||||
* need to convert them to a raw memory offset.
|
||||
@@ -1072,7 +1106,7 @@ namespace brw {
|
||||
using namespace image_coordinates;
|
||||
using namespace surface_access;
|
||||
/* Avoid performing an atomic operation on an unbound surface. */
|
||||
const brw_predicate pred = emit_surface_check(bld, image);
|
||||
const brw_predicate pred = emit_typed_atomic_check(bld, image);
|
||||
|
||||
/* Transform the image coordinates into actual surface coordinates. */
|
||||
const fs_reg saddr =
|
||||
|
@@ -129,7 +129,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)
|
||||
ctx->_Shader->CurrentProgram[MESA_SHADER_GEOMETRY];
|
||||
|
||||
if (prog) {
|
||||
/* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS */
|
||||
/* BRW_NEW_GS_PROG_DATA, BRW_NEW_IMAGE_UNITS, _NEW_TEXTURE */
|
||||
brw_upload_image_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
|
||||
&brw->gs.base, &brw->gs.prog_data->base.base);
|
||||
}
|
||||
@@ -137,6 +137,7 @@ brw_upload_gs_image_surfaces(struct brw_context *brw)
|
||||
|
||||
const struct brw_tracked_state brw_gs_image_surfaces = {
|
||||
.dirty = {
|
||||
.mesa = _NEW_TEXTURE,
|
||||
.brw = BRW_NEW_BATCH |
|
||||
BRW_NEW_GEOMETRY_PROGRAM |
|
||||
BRW_NEW_GS_PROG_DATA |
|
||||
|
@@ -48,6 +48,7 @@ public:
|
||||
|
||||
private:
|
||||
void emit(ir_variable *, ir_rvalue *);
|
||||
ir_variable *temp(void *ctx, const glsl_type *type, const char *name);
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -60,6 +61,17 @@ lower_texture_grad_visitor::emit(ir_variable *var, ir_rvalue *value)
|
||||
base_ir->insert_before(assign(var, value));
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit a temporary variable declaration
|
||||
*/
|
||||
ir_variable *
|
||||
lower_texture_grad_visitor::temp(void *ctx, const glsl_type *type, const char *name)
|
||||
{
|
||||
ir_variable *var = new(ctx) ir_variable(type, name, ir_var_temporary);
|
||||
base_ir->insert_before(var);
|
||||
return var;
|
||||
}
|
||||
|
||||
static const glsl_type *
|
||||
txs_type(const glsl_type *type)
|
||||
{
|
||||
@@ -144,28 +156,179 @@ lower_texture_grad_visitor::visit_leave(ir_texture *ir)
|
||||
new(mem_ctx) ir_variable(grad_type, "dPdy", ir_var_temporary);
|
||||
emit(dPdy, mul(size, ir->lod_info.grad.dPdy));
|
||||
|
||||
/* Calculate rho from equation 3.20 of the GL 3.0 specification. */
|
||||
ir_rvalue *rho;
|
||||
if (dPdx->type->is_scalar()) {
|
||||
rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
|
||||
expr(ir_unop_abs, dPdy));
|
||||
} else {
|
||||
rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
|
||||
expr(ir_unop_sqrt, dot(dPdy, dPdy)));
|
||||
}
|
||||
|
||||
/* lambda_base = log2(rho). We're ignoring GL state biases for now.
|
||||
*
|
||||
* For cube maps the result of these formulas is giving us a value of rho
|
||||
* that is twice the value we should use, so divide it by 2 or,
|
||||
* alternatively, remove one unit from the result of the log2 computation.
|
||||
*/
|
||||
ir->op = ir_txl;
|
||||
if (ir->sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
|
||||
ir->lod_info.lod = expr(ir_binop_add,
|
||||
expr(ir_unop_log2, rho),
|
||||
new(mem_ctx) ir_constant(-1.0f));
|
||||
/* Cubemap texture lookups first generate a texture coordinate normalized
|
||||
* to [-1, 1] on the appropiate face. The appropiate face is determined
|
||||
* by which component has largest magnitude and its sign. The texture
|
||||
* coordinate is the quotient of the remaining texture coordinates against
|
||||
* that absolute value of the component of largest magnitude. This
|
||||
* division requires that the computing of the derivative of the texel
|
||||
* coordinate must use the quotient rule. The high level GLSL code is as
|
||||
* follows:
|
||||
*
|
||||
* Step 1: selection
|
||||
*
|
||||
* vec3 abs_p, Q, dQdx, dQdy;
|
||||
* abs_p = abs(ir->coordinate);
|
||||
* if (abs_p.x >= max(abs_p.y, abs_p.z)) {
|
||||
* Q = ir->coordinate.yzx;
|
||||
* dQdx = ir->lod_info.grad.dPdx.yzx;
|
||||
* dQdy = ir->lod_info.grad.dPdy.yzx;
|
||||
* }
|
||||
* if (abs_p.y >= max(abs_p.x, abs_p.z)) {
|
||||
* Q = ir->coordinate.xzy;
|
||||
* dQdx = ir->lod_info.grad.dPdx.xzy;
|
||||
* dQdy = ir->lod_info.grad.dPdy.xzy;
|
||||
* }
|
||||
* if (abs_p.z >= max(abs_p.x, abs_p.y)) {
|
||||
* Q = ir->coordinate;
|
||||
* dQdx = ir->lod_info.grad.dPdx;
|
||||
* dQdy = ir->lod_info.grad.dPdy;
|
||||
* }
|
||||
*
|
||||
* Step 2: use quotient rule to compute derivative. The normalized to
|
||||
* [-1, 1] texel coordinate is given by Q.xy / (sign(Q.z) * Q.z). We are
|
||||
* only concerned with the magnitudes of the derivatives whose values are
|
||||
* not affected by the sign. We drop the sign from the computation.
|
||||
*
|
||||
* vec2 dx, dy;
|
||||
* float recip;
|
||||
*
|
||||
* recip = 1.0 / Q.z;
|
||||
* dx = recip * ( dQdx.xy - Q.xy * (dQdx.z * recip) );
|
||||
* dy = recip * ( dQdy.xy - Q.xy * (dQdy.z * recip) );
|
||||
*
|
||||
* Step 3: compute LOD. At this point we have the derivatives of the
|
||||
* texture coordinates normalized to [-1,1]. We take the LOD to be
|
||||
* result = log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * 0.5 * L)
|
||||
* = -1.0 + log2(max(sqrt(dot(dx, dx)), sqrt(dy, dy)) * L)
|
||||
* = -1.0 + log2(sqrt(max(dot(dx, dx), dot(dy,dy))) * L)
|
||||
* = -1.0 + log2(sqrt(L * L * max(dot(dx, dx), dot(dy,dy))))
|
||||
* = -1.0 + 0.5 * log2(L * L * max(dot(dx, dx), dot(dy,dy)))
|
||||
* where L is the dimension of the cubemap. The code is:
|
||||
*
|
||||
* float M, result;
|
||||
* M = max(dot(dx, dx), dot(dy, dy));
|
||||
* L = textureSize(sampler, 0).x;
|
||||
* result = -1.0 + 0.5 * log2(L * L * M);
|
||||
*/
|
||||
|
||||
/* Helpers to make code more human readable. */
|
||||
#define EMIT(instr) base_ir->insert_before(instr)
|
||||
#define THEN(irif, instr) irif->then_instructions.push_tail(instr)
|
||||
#define CLONE(x) x->clone(mem_ctx, NULL)
|
||||
|
||||
ir_variable *abs_p = temp(mem_ctx, glsl_type::vec3_type, "abs_p");
|
||||
|
||||
EMIT(assign(abs_p, swizzle_for_size(abs(CLONE(ir->coordinate)), 3)));
|
||||
|
||||
ir_variable *Q = temp(mem_ctx, glsl_type::vec3_type, "Q");
|
||||
ir_variable *dQdx = temp(mem_ctx, glsl_type::vec3_type, "dQdx");
|
||||
ir_variable *dQdy = temp(mem_ctx, glsl_type::vec3_type, "dQdy");
|
||||
|
||||
/* unmodified dPdx, dPdy values */
|
||||
ir_rvalue *dPdx = ir->lod_info.grad.dPdx;
|
||||
ir_rvalue *dPdy = ir->lod_info.grad.dPdy;
|
||||
|
||||
/* 1. compute selector */
|
||||
|
||||
/* if (abs_p.x >= max(abs_p.y, abs_p.z)) ... */
|
||||
ir_if *branch_x =
|
||||
new(mem_ctx) ir_if(gequal(swizzle_x(abs_p),
|
||||
max2(swizzle_y(abs_p), swizzle_z(abs_p))));
|
||||
|
||||
/* Q = p.yzx;
|
||||
* dQdx = dPdx.yzx;
|
||||
* dQdy = dPdy.yzx;
|
||||
*/
|
||||
int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0);
|
||||
THEN(branch_x, assign(Q, swizzle(CLONE(ir->coordinate), yzx, 3)));
|
||||
THEN(branch_x, assign(dQdx, swizzle(CLONE(dPdx), yzx, 3)));
|
||||
THEN(branch_x, assign(dQdy, swizzle(CLONE(dPdy), yzx, 3)));
|
||||
EMIT(branch_x);
|
||||
|
||||
/* if (abs_p.y >= max(abs_p.x, abs_p.z)) */
|
||||
ir_if *branch_y =
|
||||
new(mem_ctx) ir_if(gequal(swizzle_y(abs_p),
|
||||
max2(swizzle_x(abs_p), swizzle_z(abs_p))));
|
||||
|
||||
/* Q = p.xzy;
|
||||
* dQdx = dPdx.xzy;
|
||||
* dQdy = dPdy.xzy;
|
||||
*/
|
||||
int xzy = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Z, SWIZZLE_Y, 0);
|
||||
THEN(branch_y, assign(Q, swizzle(CLONE(ir->coordinate), xzy, 3)));
|
||||
THEN(branch_y, assign(dQdx, swizzle(CLONE(dPdx), xzy, 3)));
|
||||
THEN(branch_y, assign(dQdy, swizzle(CLONE(dPdy), xzy, 3)));
|
||||
EMIT(branch_y);
|
||||
|
||||
/* if (abs_p.z >= max(abs_p.x, abs_p.y)) */
|
||||
ir_if *branch_z =
|
||||
new(mem_ctx) ir_if(gequal(swizzle_z(abs_p),
|
||||
max2(swizzle_x(abs_p), swizzle_y(abs_p))));
|
||||
|
||||
/* Q = p;
|
||||
* dQdx = dPdx;
|
||||
* dQdy = dPdy;
|
||||
*/
|
||||
THEN(branch_z, assign(Q, swizzle_for_size(CLONE(ir->coordinate), 3)));
|
||||
THEN(branch_z, assign(dQdx, CLONE(dPdx)));
|
||||
THEN(branch_z, assign(dQdy, CLONE(dPdy)));
|
||||
EMIT(branch_z);
|
||||
|
||||
/* 2. quotient rule */
|
||||
ir_variable *recip = temp(mem_ctx, glsl_type::float_type, "recip");
|
||||
EMIT(assign(recip, div(new(mem_ctx) ir_constant(1.0f), swizzle_z(Q))));
|
||||
|
||||
ir_variable *dx = temp(mem_ctx, glsl_type::vec2_type, "dx");
|
||||
ir_variable *dy = temp(mem_ctx, glsl_type::vec2_type, "dy");
|
||||
|
||||
/* tmp = Q.xy * recip;
|
||||
* dx = recip * ( dQdx.xy - (tmp * dQdx.z) );
|
||||
* dy = recip * ( dQdy.xy - (tmp * dQdy.z) );
|
||||
*/
|
||||
ir_variable *tmp = temp(mem_ctx, glsl_type::vec2_type, "tmp");
|
||||
EMIT(assign(tmp, mul(swizzle_xy(Q), recip)));
|
||||
EMIT(assign(dx, mul(recip, sub(swizzle_xy(dQdx),
|
||||
mul(tmp, swizzle_z(dQdx))))));
|
||||
EMIT(assign(dy, mul(recip, sub(swizzle_xy(dQdy),
|
||||
mul(tmp, swizzle_z(dQdy))))));
|
||||
|
||||
/* M = max(dot(dx, dx), dot(dy, dy)); */
|
||||
ir_variable *M = temp(mem_ctx, glsl_type::float_type, "M");
|
||||
EMIT(assign(M, max2(dot(dx, dx), dot(dy, dy))));
|
||||
|
||||
/* size has textureSize() of LOD 0 */
|
||||
ir_variable *L = temp(mem_ctx, glsl_type::float_type, "L");
|
||||
EMIT(assign(L, swizzle_x(size)));
|
||||
|
||||
ir_variable *result = temp(mem_ctx, glsl_type::float_type, "result");
|
||||
|
||||
/* result = -1.0 + 0.5 * log2(L * L * M); */
|
||||
EMIT(assign(result,
|
||||
add(new(mem_ctx)ir_constant(-1.0f),
|
||||
mul(new(mem_ctx)ir_constant(0.5f),
|
||||
expr(ir_unop_log2, mul(mul(L, L), M))))));
|
||||
|
||||
/* 3. final assignment of parameters to textureLod call */
|
||||
ir->lod_info.lod = new (mem_ctx) ir_dereference_variable(result);
|
||||
|
||||
#undef THEN
|
||||
#undef EMIT
|
||||
|
||||
} else {
|
||||
/* Calculate rho from equation 3.20 of the GL 3.0 specification. */
|
||||
ir_rvalue *rho;
|
||||
if (dPdx->type->is_scalar()) {
|
||||
rho = expr(ir_binop_max, expr(ir_unop_abs, dPdx),
|
||||
expr(ir_unop_abs, dPdy));
|
||||
} else {
|
||||
rho = expr(ir_binop_max, expr(ir_unop_sqrt, dot(dPdx, dPdx)),
|
||||
expr(ir_unop_sqrt, dot(dPdy, dPdy)));
|
||||
}
|
||||
|
||||
/* lambda_base = log2(rho). We're ignoring GL state biases for now. */
|
||||
ir->lod_info.lod = expr(ir_unop_log2, rho);
|
||||
}
|
||||
|
||||
|
@@ -61,6 +61,8 @@ src_reg::src_reg(register_file file, int reg, const glsl_type *type)
|
||||
this->swizzle = brw_swizzle_for_size(type->vector_elements);
|
||||
else
|
||||
this->swizzle = BRW_SWIZZLE_XYZW;
|
||||
if (type)
|
||||
this->type = brw_type_for_base_type(type);
|
||||
}
|
||||
|
||||
/** Generic unset register constructor. */
|
||||
@@ -950,6 +952,14 @@ vec4_instruction::can_reswizzle(int dst_writemask,
|
||||
if (mlen > 0)
|
||||
return false;
|
||||
|
||||
/* We can't use swizzles on the accumulator and that's really the only
|
||||
* HW_REG we would care to reswizzle so just disallow them all.
|
||||
*/
|
||||
for (int i = 0; i < 3; i++) {
|
||||
if (src[i].file == HW_REG)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -1053,6 +1063,17 @@ vec4_visitor::opt_register_coalesce()
|
||||
}
|
||||
}
|
||||
|
||||
/* This doesn't handle saturation on the instruction we
|
||||
* want to coalesce away if the register types do not match.
|
||||
* But if scan_inst is a non type-converting 'mov', we can fix
|
||||
* the types later.
|
||||
*/
|
||||
if (inst->saturate &&
|
||||
inst->dst.type != scan_inst->dst.type &&
|
||||
!(scan_inst->opcode == BRW_OPCODE_MOV &&
|
||||
scan_inst->dst.type == scan_inst->src[0].type))
|
||||
break;
|
||||
|
||||
/* If we can't handle the swizzle, bail. */
|
||||
if (!scan_inst->can_reswizzle(inst->dst.writemask,
|
||||
inst->src[0].swizzle,
|
||||
@@ -1086,11 +1107,13 @@ vec4_visitor::opt_register_coalesce()
|
||||
if (interfered)
|
||||
break;
|
||||
|
||||
/* If somebody else writes our destination here, we can't coalesce
|
||||
* before that.
|
||||
/* If somebody else writes the same channels of our destination here,
|
||||
* we can't coalesce before that.
|
||||
*/
|
||||
if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written))
|
||||
break;
|
||||
if (inst->dst.in_range(scan_inst->dst, scan_inst->regs_written) &&
|
||||
(inst->dst.writemask & scan_inst->dst.writemask) != 0) {
|
||||
break;
|
||||
}
|
||||
|
||||
/* Check for reads of the register we're trying to coalesce into. We
|
||||
* can't go rewriting instructions above that to put some other value
|
||||
@@ -1128,6 +1151,16 @@ vec4_visitor::opt_register_coalesce()
|
||||
scan_inst->dst.file = inst->dst.file;
|
||||
scan_inst->dst.reg = inst->dst.reg;
|
||||
scan_inst->dst.reg_offset = inst->dst.reg_offset;
|
||||
if (inst->saturate &&
|
||||
inst->dst.type != scan_inst->dst.type) {
|
||||
/* If we have reached this point, scan_inst is a non
|
||||
* type-converting 'mov' and we can modify its register types
|
||||
* to match the ones in inst. Otherwise, we could have an
|
||||
* incorrect saturation result.
|
||||
*/
|
||||
scan_inst->dst.type = inst->dst.type;
|
||||
scan_inst->src[0].type = inst->src[0].type;
|
||||
}
|
||||
scan_inst->saturate |= inst->saturate;
|
||||
}
|
||||
scan_inst = (vec4_instruction *)scan_inst->next;
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user