Compare commits
125 Commits
24.1
...
mesa-11.0.
Author | SHA1 | Date | |
---|---|---|---|
|
bcb9e1d26b | ||
|
de1637c7fe | ||
|
cf716563a8 | ||
|
2c65e64881 | ||
|
8be6b32d65 | ||
|
0e0d008b2b | ||
|
007aae740e | ||
|
575f5a94c3 | ||
|
b1203ec9f3 | ||
|
c29e3f1bca | ||
|
c98217178b | ||
|
278739eb01 | ||
|
ae6dcfee56 | ||
|
9fcf28bb14 | ||
|
5fe09ffe6a | ||
|
395cd23690 | ||
|
d04024cffa | ||
|
370c2b344b | ||
|
bcb3bfd510 | ||
|
ebfa2ea34f | ||
|
3736ef3a17 | ||
|
d9e4a3ae6a | ||
|
1afea31ad8 | ||
|
d9b54a01be | ||
|
c4bae5792b | ||
|
4f1e500150 | ||
|
bd46093103 | ||
|
766d11e8f0 | ||
|
5923bd6d78 | ||
|
d0cf5100b5 | ||
|
e36ca8c2bb | ||
|
901744b2ff | ||
|
c62f82980c | ||
|
151f84f2db | ||
|
7d79ad95fd | ||
|
2becc98645 | ||
|
7cca7f71da | ||
|
94b8f60146 | ||
|
94bf2e2e05 | ||
|
78612aba51 | ||
|
0878187488 | ||
|
4ae2ffbff1 | ||
|
b0578c0061 | ||
|
b3dfd67feb | ||
|
017085efaf | ||
|
9e3528a844 | ||
|
84060d35bb | ||
|
2c581d04cc | ||
|
a1ac93fc4b | ||
|
1f2b601f8b | ||
|
4ca5756766 | ||
|
7023899ab9 | ||
|
2190f218ad | ||
|
2c27775a44 | ||
|
b7b8d4982d | ||
|
0d1f600c94 | ||
|
0c9f66829c | ||
|
11dc43424d | ||
|
ec9bafda70 | ||
|
6654483bc6 | ||
|
4b1ef5e842 | ||
|
95bc059c50 | ||
|
254a07841d | ||
|
271290f077 | ||
|
7bf27c2393 | ||
|
7f80a2383e | ||
|
3e1fde76b6 | ||
|
747e1b03bf | ||
|
b85ec1e34b | ||
|
acb822f1bd | ||
|
ddf459492d | ||
|
fcdaa190e5 | ||
|
0abcd9c8fc | ||
|
0b14d35863 | ||
|
a6710090af | ||
|
0c98ba7abf | ||
|
eef8258a86 | ||
|
747cd2c273 | ||
|
ecdd69cd05 | ||
|
74fa106932 | ||
|
1153420017 | ||
|
5704d473c8 | ||
|
eb2b88c44b | ||
|
5c08afc894 | ||
|
5fb758a418 | ||
|
bb37824959 | ||
|
8fc2cbb00e | ||
|
b497b88dbe | ||
|
dcb220f2f7 | ||
|
d9534e4785 | ||
|
63b4e6bfc9 | ||
|
a5dee22767 | ||
|
1aea7812b0 | ||
|
f0180a37d7 | ||
|
fe77d714f2 | ||
|
fb119b2260 | ||
|
50306a33b4 | ||
|
cf007af859 | ||
|
7d576419b2 | ||
|
893caebf44 | ||
|
3f8d44210c | ||
|
579ca506ae | ||
|
94205d0aa2 | ||
|
1b40221850 | ||
|
2fe87a1b68 | ||
|
b83b452eea | ||
|
68bd2ddda0 | ||
|
9db5c2ca2e | ||
|
08c41221d7 | ||
|
896ef5cb95 | ||
|
594388e577 | ||
|
812f2855dd | ||
|
5d8ce45d90 | ||
|
33b0f6e5e1 | ||
|
6659fba2c0 | ||
|
adae777f24 | ||
|
0b690e39dc | ||
|
67fc4b417a | ||
|
7a8d2048bc | ||
|
bf84c85130 | ||
|
aab6075613 | ||
|
2ef3434328 | ||
|
3d58fea2e3 | ||
|
ab94875352 | ||
|
f077632030 |
@@ -42,6 +42,7 @@ LOCAL_CFLAGS += \
|
||||
-DANDROID_VERSION=0x0$(MESA_ANDROID_MAJOR_VERSION)0$(MESA_ANDROID_MINOR_VERSION)
|
||||
|
||||
LOCAL_CFLAGS += \
|
||||
-D__STDC_LIMIT_MACROS \
|
||||
-DHAVE___BUILTIN_EXPECT \
|
||||
-DHAVE___BUILTIN_FFS \
|
||||
-DHAVE___BUILTIN_FFSLL \
|
||||
@@ -70,7 +71,7 @@ endif
|
||||
|
||||
ifeq ($(MESA_ENABLE_LLVM),true)
|
||||
LOCAL_CFLAGS += \
|
||||
-DHAVE_LLVM=0x0305 -DLLVM_VERSION_PATCH=2 \
|
||||
-DHAVE_LLVM=0x0305 -DMESA_LLVM_VERSION_PATCH=2 \
|
||||
-D__STDC_CONSTANT_MACROS \
|
||||
-D__STDC_FORMAT_MACROS \
|
||||
-D__STDC_LIMIT_MACROS
|
||||
|
155
configure.ac
155
configure.ac
@@ -9,7 +9,6 @@ dnl Copyright © 2009-2014 Jon TURNEY
|
||||
dnl Copyright © 2011-2012 Benjamin Franzke
|
||||
dnl Copyright © 2008-2014 David Airlie
|
||||
dnl Copyright © 2009-2013 Brian Paul
|
||||
dnl Copyright © 2003-2007 Keith Packard, Daniel Stone
|
||||
dnl
|
||||
dnl Permission is hereby granted, free of charge, to any person obtaining a
|
||||
dnl copy of this software and associated documentation files (the "Software"),
|
||||
@@ -988,144 +987,6 @@ fi
|
||||
|
||||
AC_SUBST([MESA_LLVM])
|
||||
|
||||
# SHA1 hashing
|
||||
AC_ARG_WITH([sha1],
|
||||
[AS_HELP_STRING([--with-sha1=libc|libmd|libnettle|libgcrypt|libcrypto|libsha1|CommonCrypto|CryptoAPI],
|
||||
[choose SHA1 implementation])])
|
||||
case "x$with_sha1" in
|
||||
x | xlibc | xlibmd | xlibnettle | xlibgcrypt | xlibcrypto | xlibsha1 | xCommonCrypto | xCryptoAPI)
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([Illegal value for --with-sha1: $with_sha1])
|
||||
esac
|
||||
|
||||
AC_CHECK_FUNC([SHA1Init], [HAVE_SHA1_IN_LIBC=yes])
|
||||
if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_LIBC" = xyes; then
|
||||
with_sha1=libc
|
||||
fi
|
||||
if test "x$with_sha1" = xlibc && test "x$HAVE_SHA1_IN_LIBC" != xyes; then
|
||||
AC_MSG_ERROR([sha1 in libc requested but not found])
|
||||
fi
|
||||
if test "x$with_sha1" = xlibc; then
|
||||
AC_DEFINE([HAVE_SHA1_IN_LIBC], [1],
|
||||
[Use libc SHA1 functions])
|
||||
SHA1_LIBS=""
|
||||
fi
|
||||
AC_CHECK_FUNC([CC_SHA1_Init], [HAVE_SHA1_IN_COMMONCRYPTO=yes])
|
||||
if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_COMMONCRYPTO" = xyes; then
|
||||
with_sha1=CommonCrypto
|
||||
fi
|
||||
if test "x$with_sha1" = xCommonCrypto && test "x$HAVE_SHA1_IN_COMMONCRYPTO" != xyes; then
|
||||
AC_MSG_ERROR([CommonCrypto requested but not found])
|
||||
fi
|
||||
if test "x$with_sha1" = xCommonCrypto; then
|
||||
AC_DEFINE([HAVE_SHA1_IN_COMMONCRYPTO], [1],
|
||||
[Use CommonCrypto SHA1 functions])
|
||||
SHA1_LIBS=""
|
||||
fi
|
||||
dnl stdcall functions cannot be tested with AC_CHECK_LIB
|
||||
AC_CHECK_HEADER([wincrypt.h], [HAVE_SHA1_IN_CRYPTOAPI=yes], [], [#include <windows.h>])
|
||||
if test "x$with_sha1" = x && test "x$HAVE_SHA1_IN_CRYPTOAPI" = xyes; then
|
||||
with_sha1=CryptoAPI
|
||||
fi
|
||||
if test "x$with_sha1" = xCryptoAPI && test "x$HAVE_SHA1_IN_CRYPTOAPI" != xyes; then
|
||||
AC_MSG_ERROR([CryptoAPI requested but not found])
|
||||
fi
|
||||
if test "x$with_sha1" = xCryptoAPI; then
|
||||
AC_DEFINE([HAVE_SHA1_IN_CRYPTOAPI], [1],
|
||||
[Use CryptoAPI SHA1 functions])
|
||||
SHA1_LIBS=""
|
||||
fi
|
||||
AC_CHECK_LIB([md], [SHA1Init], [HAVE_LIBMD=yes])
|
||||
if test "x$with_sha1" = x && test "x$HAVE_LIBMD" = xyes; then
|
||||
with_sha1=libmd
|
||||
fi
|
||||
if test "x$with_sha1" = xlibmd && test "x$HAVE_LIBMD" != xyes; then
|
||||
AC_MSG_ERROR([libmd requested but not found])
|
||||
fi
|
||||
if test "x$with_sha1" = xlibmd; then
|
||||
AC_DEFINE([HAVE_SHA1_IN_LIBMD], [1],
|
||||
[Use libmd SHA1 functions])
|
||||
SHA1_LIBS=-lmd
|
||||
fi
|
||||
PKG_CHECK_MODULES([LIBSHA1], [libsha1], [HAVE_LIBSHA1=yes], [HAVE_LIBSHA1=no])
|
||||
if test "x$with_sha1" = x && test "x$HAVE_LIBSHA1" = xyes; then
|
||||
with_sha1=libsha1
|
||||
fi
|
||||
if test "x$with_sha1" = xlibsha1 && test "x$HAVE_LIBSHA1" != xyes; then
|
||||
AC_MSG_ERROR([libsha1 requested but not found])
|
||||
fi
|
||||
if test "x$with_sha1" = xlibsha1; then
|
||||
AC_DEFINE([HAVE_SHA1_IN_LIBSHA1], [1],
|
||||
[Use libsha1 for SHA1])
|
||||
SHA1_LIBS=-lsha1
|
||||
fi
|
||||
AC_CHECK_LIB([nettle], [nettle_sha1_init], [HAVE_LIBNETTLE=yes])
|
||||
if test "x$with_sha1" = x && test "x$HAVE_LIBNETTLE" = xyes; then
|
||||
with_sha1=libnettle
|
||||
fi
|
||||
if test "x$with_sha1" = xlibnettle && test "x$HAVE_LIBNETTLE" != xyes; then
|
||||
AC_MSG_ERROR([libnettle requested but not found])
|
||||
fi
|
||||
if test "x$with_sha1" = xlibnettle; then
|
||||
AC_DEFINE([HAVE_SHA1_IN_LIBNETTLE], [1],
|
||||
[Use libnettle SHA1 functions])
|
||||
SHA1_LIBS=-lnettle
|
||||
fi
|
||||
AC_CHECK_LIB([gcrypt], [gcry_md_open], [HAVE_LIBGCRYPT=yes])
|
||||
if test "x$with_sha1" = x && test "x$HAVE_LIBGCRYPT" = xyes; then
|
||||
with_sha1=libgcrypt
|
||||
fi
|
||||
if test "x$with_sha1" = xlibgcrypt && test "x$HAVE_LIBGCRYPT" != xyes; then
|
||||
AC_MSG_ERROR([libgcrypt requested but not found])
|
||||
fi
|
||||
if test "x$with_sha1" = xlibgcrypt; then
|
||||
AC_DEFINE([HAVE_SHA1_IN_LIBGCRYPT], [1],
|
||||
[Use libgcrypt SHA1 functions])
|
||||
SHA1_LIBS=-lgcrypt
|
||||
fi
|
||||
# We don't need all of the OpenSSL libraries, just libcrypto
|
||||
AC_CHECK_LIB([crypto], [SHA1_Init], [HAVE_LIBCRYPTO=yes])
|
||||
PKG_CHECK_MODULES([OPENSSL], [openssl], [HAVE_OPENSSL_PKC=yes],
|
||||
[HAVE_OPENSSL_PKC=no])
|
||||
if test "x$HAVE_LIBCRYPTO" = xyes || test "x$HAVE_OPENSSL_PKC" = xyes; then
|
||||
if test "x$with_sha1" = x; then
|
||||
with_sha1=libcrypto
|
||||
fi
|
||||
else
|
||||
if test "x$with_sha1" = xlibcrypto; then
|
||||
AC_MSG_ERROR([OpenSSL libcrypto requested but not found])
|
||||
fi
|
||||
fi
|
||||
if test "x$with_sha1" = xlibcrypto; then
|
||||
if test "x$HAVE_LIBCRYPTO" = xyes; then
|
||||
SHA1_LIBS=-lcrypto
|
||||
else
|
||||
SHA1_LIBS="$OPENSSL_LIBS"
|
||||
SHA1_CFLAGS="$OPENSSL_CFLAGS"
|
||||
fi
|
||||
fi
|
||||
AC_MSG_CHECKING([for SHA1 implementation])
|
||||
AC_MSG_RESULT([$with_sha1])
|
||||
AC_SUBST(SHA1_LIBS)
|
||||
AC_SUBST(SHA1_CFLAGS)
|
||||
|
||||
# Allow user to configure out the shader-cache feature
|
||||
AC_ARG_ENABLE([shader-cache],
|
||||
AS_HELP_STRING([--disable-shader-cache], [Disable binary shader cache]),
|
||||
[enable_shader_cache="$enableval"],
|
||||
[if test "x$with_sha1" != "x"; then
|
||||
enable_shader_cache=yes
|
||||
else
|
||||
enable_shader_cache=no
|
||||
fi])
|
||||
if test "x$with_sha1" = "x"; then
|
||||
if test "x$enable_shader_cache" = "xyes"; then
|
||||
AC_MSG_ERROR([Cannot enable shader cache (no SHA-1 implementation found)])
|
||||
fi
|
||||
fi
|
||||
AM_CONDITIONAL([ENABLE_SHADER_CACHE], [test x$enable_shader_cache = xyes])
|
||||
|
||||
case "$host_os" in
|
||||
linux*)
|
||||
need_pci_id=yes ;;
|
||||
@@ -1289,6 +1150,16 @@ AC_SUBST(GLX_TLS, ${GLX_USE_TLS})
|
||||
AS_IF([test "x$GLX_USE_TLS" = xyes -a "x$ax_pthread_ok" = xyes],
|
||||
[DEFINES="${DEFINES} -DGLX_USE_TLS"])
|
||||
|
||||
dnl Read-only text section on x86 hardened platforms
|
||||
AC_ARG_ENABLE([glx-read-only-text],
|
||||
[AS_HELP_STRING([--enable-glx-read-only-text],
|
||||
[Disable writable .text section on x86 (decreases performance) @<:@default=disabled@:>@])],
|
||||
[enable_glx_read_only_text="$enableval"],
|
||||
[enable_glx_read_only_text=no])
|
||||
if test "x$enable_glx_read_only_text" = xyes; then
|
||||
DEFINES="$DEFINES -DGLX_X86_READONLY_TEXT"
|
||||
fi
|
||||
|
||||
dnl
|
||||
dnl More DRI setup
|
||||
dnl
|
||||
@@ -2484,12 +2355,6 @@ else
|
||||
echo " Gallium: no"
|
||||
fi
|
||||
|
||||
dnl Shader cache
|
||||
echo ""
|
||||
echo " Shader cache: $enable_shader_cache"
|
||||
if test "x$enable_shader_cache" = "xyes"; then
|
||||
echo " With SHA1 from: $with_sha1"
|
||||
fi
|
||||
|
||||
dnl Libraries
|
||||
echo ""
|
||||
|
@@ -14,7 +14,7 @@
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.0 Release Notes / TBD</h1>
|
||||
<h1>Mesa 11.0.0 Release Notes / September 12, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.0 is a new development release.
|
||||
@@ -33,7 +33,8 @@ because compatibility contexts are not supported.
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD.
|
||||
7d7e4ddffa3b162506efa01e2cc41e329caa4995336b92e5cc21f2e1fb36c1b3 mesa-11.0.0.tar.gz
|
||||
e095a3eb2eca9dfde7efca8946527c8ae20a0cc938a8c78debc7f158ad44af32 mesa-11.0.0.tar.xz
|
||||
</pre>
|
||||
|
||||
|
||||
@@ -83,13 +84,175 @@ Note: some of the new features are only available with certain drivers.
|
||||
<li>EGL 1.5 on r600, radeonsi, nv50, nvc0</li>
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
TBD.
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=51658">Bug 51658</a> - r200 (& possibly radeon) DRI fixes for gnome shell on Mesa 8.0.3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=65525">Bug 65525</a> - [llvmpipe] lp_scene.h:210:lp_scene_alloc: Assertion `size <= (64 * 1024)' failed.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=66346">Bug 66346</a> - shader_query.cpp:49: error: invalid conversion from 'void*' to 'GLuint'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73512">Bug 73512</a> - [clover] mesa.icd. should contain full path</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=73528">Bug 73528</a> - Deferred lighting in Second Life causes system hiccups and screen flickering</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=74329">Bug 74329</a> - Please expose OES_texture_float and OES_texture_half_float on the ES3 context</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=80500">Bug 80500</a> - Flickering shadows in unreleased title trace</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=82186">Bug 82186</a> - [r600g] BARTS GPU lockup with minecraft shaders</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84225">Bug 84225</a> - Allow constant-index-expression sampler array indexing with GLSL-ES < 300</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=84677">Bug 84677</a> - Triangle disappears with glPolygonMode GL_LINE</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=85252">Bug 85252</a> - Segfault in compiler while processing ternary operator with void arguments</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=89131">Bug 89131</a> - [Bisected] Graphical corruption in Weston, shows old framebuffer pieces</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90000">Bug 90000</a> - [i965 Bisected NIR] Piglit/gglean_fragprog1-z-write_test fail</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90073">Bug 90073</a> - Leaks in xcb_dri3_open_reply_fds() and get_render_node_from_id_path_tag</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90249">Bug 90249</a> - Fails to build egl_dri2 on osx</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90310">Bug 90310</a> - Fails to build gallium_dri.so at linking stage with clang because of multiple redefinitions</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90347">Bug 90347</a> - [NVE0+] Failure to insert texbar under some circumstances (causing bad colors in Terasology)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90466">Bug 90466</a> - arm: linker error ndefined reference to `nir_metadata_preserve'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90520">Bug 90520</a> - Register spilling clobbers registers used elsewhere in the shader</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90537">Bug 90537</a> - radeonsi bo/va conflict on RADEON_GEM_VA (rscreen->ws->buffer_from_handle returns NULL)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90547">Bug 90547</a> - [BDW/BSW/SKL Bisected]Piglit/glean@vertprog1-rsq_test_2_(reciprocal_square_root_of_negative_value) fais</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90580">Bug 90580</a> - [HSW bisected] integer multiplication bug</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90600">Bug 90600</a> - IOError: [Errno 2] No such file or directory: 'gl_API.xml'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90621">Bug 90621</a> - Mesa fail to build from git</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90629">Bug 90629</a> - [i965] SIMD16 dual_source_blend assertion `src[i].file != GRF || src[i].width == dst.width' failed</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90691">Bug 90691</a> - [BSW]Piglit/spec/nv_conditional_render/dlist fails intermittently</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90728">Bug 90728</a> - dvd playback with vlc and vdpau causes segmentation fault</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90734">Bug 90734</a> - glBufferSubData is corrupting data when buffer is > 32k</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90748">Bug 90748</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.depth.rg_half_float_oes fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90749">Bug 90749</a> - [BDW Bisected]dEQP-GLES3.functional.rasterization.fbo.rbo_multisample_max.primitives.lines_wide fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90751">Bug 90751</a> - [BDW Bisected]dEQP-GLES3.functional.fbo.completeness.renderable.texture.stencil.stencil_index8 fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90797">Bug 90797</a> - [ALL bisected] Mesa change cause performance case manhattan fail.</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90817">Bug 90817</a> - swrast fails to load with certain remote X servers</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90830">Bug 90830</a> - [bsw bisected regression] GPU hang for spec.arb_gpu_shader5.execution.sampler_array_indexing.vs-nonzero-base</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90839">Bug 90839</a> - [10.5.5/10.6 regression, bisected] PBO glDrawPixels no longer using blit fastpath</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90873">Bug 90873</a> - Kernel hang, TearFree On, Mate desktop environment</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90887">Bug 90887</a> - PhiMovesPass in register allocator broken</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90895">Bug 90895</a> - [IVB/HSW/BDW/BSW Bisected] GLB2.7 Egypt, GfxBench3.0 T-Rex & ALU and many SynMark cases performance reduced by 10-23%</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90902">Bug 90902</a> - [bsw][regression] dEQP: "Found invalid pixel values"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90903">Bug 90903</a> - egl_dri2.c:dri2_load fails to load libglapi on osx</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90904">Bug 90904</a> - OSX: EXC_BAD_ACCESS when using translate_sse + gallium + softpipe/llvmpipe</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90905">Bug 90905</a> - mesa: Finish subdir-objects transition</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=90925">Bug 90925</a> - "high fidelity": Segfault in _mesa_program_resource_find_name</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91022">Bug 91022</a> - [g45 g965 bisected] assertions generated from textureGrad cube samplers fix</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91047">Bug 91047</a> - [SNB Bisected] Messed up Fog in Super Smash Bros. Melee in Dolphin</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91056">Bug 91056</a> - The Bard's Tale (2005, native) has rendering issues</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91077">Bug 91077</a> - dri2_glx.c:1186: undefined reference to `loader_open_device'</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91099">Bug 91099</a> - [llvmpipe] piglit glsl-max-varyings >max_varying_components regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91101">Bug 91101</a> - [softpipe] piglit glsl-1.50@execution@geometry@max-input-components regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91117">Bug 91117</a> - Nimbus (running in wine) has rendering issues, objects are semi-transparent</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91124">Bug 91124</a> - Civilization V (in Wine) has rendering issues: text missing, menu bar corrupted</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91173">Bug 91173</a> - Oddworld: Stranger's Wrath HD: disfigured models in wrong colors</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91193">Bug 91193</a> - [290x] Dota2 reborn ingame rendering breaks with git-af4b9c7</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91222">Bug 91222</a> - lp_test_format regression on CentOS 7</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91226">Bug 91226</a> - Crash in glLinkProgram (NEW)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91231">Bug 91231</a> - [NV92] Psychonauts (native) segfaults on start when DRI3 enabled</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91254">Bug 91254</a> - (regresion) video using VA-API on Intel slow and freeze system with mesa 10.6 or 10.6.1</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91290">Bug 91290</a> - SIGSEGV glcpp/glcpp-parse.y:1077</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91292">Bug 91292</a> - [BDW+] glVertexAttribDivisor not working in combination with glPolygonMode</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91337">Bug 91337</a> - OSMesaGetProcAdress("OSMesaPixelStore") returns nil</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91418">Bug 91418</a> - Visual Studio 2015 vsnprintf build error</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91425">Bug 91425</a> - [regression, bisected] Piglit spec/ext_packed_float/ getteximage-invalid-format-for-packed-type fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91441">Bug 91441</a> - make check DispatchSanity_test.GL30 regression</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91444">Bug 91444</a> - regression bisected radeonsi: don't change pipe_resource in resource_copy_region</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91461">Bug 91461</a> - gl_TessLevel* writes have no effect for all but the last TCS invocation</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91513">Bug 91513</a> - [IVB/HSW/BDW/SKL Bisected] Lightsmark performance reduced by 7%-10%</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91526">Bug 91526</a> - World of Warcraft (on Wine) has UI corruption with nouveau</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91544">Bug 91544</a> - [i965, regression, bisected] regression of several tests in 93977d3a151675946c03e</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91551">Bug 91551</a> - DXTn compressed normal maps produce severe artifacts on all NV5x and NVDx chipsets</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91570">Bug 91570</a> - Upgrading mesa to 10.6 causes segfault in OpenGL applications with GeForce4 MX 440 / AGP 8X</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91591">Bug 91591</a> - rounding.h:102:2: error: #error "Unsupported or undefined LONG_BIT"</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91610">Bug 91610</a> - [BSW] GPU hang for spec.shaders.point-vertex-id gl_instanceid divisor</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91673">Bug 91673</a> - Segfault when calling glTexSubImage2D on storage texture to bound FBO</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91726">Bug 91726</a> - R600 asserts in tgsi_cmp/make_src_for_op3</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91847">Bug 91847</a> - glGenerateTextureMipmap not working (no errors) unless glActiveTexture(GL_TEXTURE1) is called before</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91857">Bug 91857</a> - Mesa 10.6.3 linker is slow</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91881">Bug 91881</a> - regression: GPU lockups since mesa-11.0.0_rc1 on RV620 (r600) driver</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91890">Bug 91890</a> - [nve7] witcher2: blurry image & DATA_ERRORs (class 0xa097 mthd 0x2380/0x238c)</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
TBD.
|
||||
<li>Removed the EGL loader from the Linux SCons build.</li>
|
||||
|
||||
</div>
|
||||
</body>
|
||||
|
133
docs/relnotes/11.0.1.html
Normal file
133
docs/relnotes/11.0.1.html
Normal file
@@ -0,0 +1,133 @@
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
||||
<title>Mesa Release Notes</title>
|
||||
<link rel="stylesheet" type="text/css" href="../mesa.css">
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div class="header">
|
||||
<h1>The Mesa 3D Graphics Library</h1>
|
||||
</div>
|
||||
|
||||
<iframe src="../contents.html"></iframe>
|
||||
<div class="content">
|
||||
|
||||
<h1>Mesa 11.0.1 Release Notes / September 26, 2015</h1>
|
||||
|
||||
<p>
|
||||
Mesa 11.0.1 is a bug fix release which fixes bugs found since the 11.0.0 release.
|
||||
</p>
|
||||
<p>
|
||||
Mesa 11.0.1 implements the OpenGL 4.1 API, but the version reported by
|
||||
glGetString(GL_VERSION) or glGetIntegerv(GL_MAJOR_VERSION) /
|
||||
glGetIntegerv(GL_MINOR_VERSION) depends on the particular driver being used.
|
||||
Some drivers don't support all the features required in OpenGL 4.1. OpenGL
|
||||
4.1 is <strong>only</strong> available if requested at context creation
|
||||
because compatibility contexts are not supported.
|
||||
</p>
|
||||
|
||||
|
||||
<h2>SHA256 checksums</h2>
|
||||
<pre>
|
||||
TBD
|
||||
</pre>
|
||||
|
||||
|
||||
<h2>New features</h2>
|
||||
<p>None</p>
|
||||
|
||||
<h2>Bug fixes</h2>
|
||||
|
||||
<p>This list is likely incomplete.</p>
|
||||
|
||||
<ul>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=38109">Bug 38109</a> - i915 driver crashes if too few vertices are submitted (Mesa 7.10.2)</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91114">Bug 91114</a> - ES3-CTS.gtf.GL3Tests.shadow.shadow_execution_vert fails</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91716">Bug 91716</a> - [bisected] piglit.shaders.glsl-vs-int-attrib regresses on 32 bit BYT, HSW, IVB, SNB</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=91719">Bug 91719</a> - [SNB,HSW,BYT] dEQP regressions associated with using NIR for vertex shaders</li>
|
||||
|
||||
<li><a href="https://bugs.freedesktop.org/show_bug.cgi?id=92009">Bug 92009</a> - ES3-CTS.gtf.GL3Tests.packed_pixels.packed_pixels fails</li>
|
||||
|
||||
</ul>
|
||||
|
||||
|
||||
<h2>Changes</h2>
|
||||
|
||||
<p>Antia Puentes (2):</p>
|
||||
<ul>
|
||||
<li>i965/vec4: Fix saturation errors when coalescing registers</li>
|
||||
<li>i965/vec4_nir: Load constants as integers</li>
|
||||
</ul>
|
||||
|
||||
<p>Anuj Phogat (1):</p>
|
||||
<ul>
|
||||
<li>meta: Abort meta pbo path if TexSubImage need signed unsigned conversion</li>
|
||||
</ul>
|
||||
|
||||
<p>Emil Velikov (2):</p>
|
||||
<ul>
|
||||
<li>docs: add sha256 checksums for 11.0.0</li>
|
||||
<li>Update version to 11.0.1</li>
|
||||
</ul>
|
||||
|
||||
<p>Iago Toral Quiroga (1):</p>
|
||||
<ul>
|
||||
<li>mesa: Fix GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE for default framebuffer.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ian Romanick (5):</p>
|
||||
<ul>
|
||||
<li>t_dd_dmatmp: Make "count" actually be the count</li>
|
||||
<li>t_dd_dmatmp: Clean up improper code formatting from previous patch</li>
|
||||
<li>t_dd_dmatmp: Use '& 3' instead of '% 4' everywhere</li>
|
||||
<li>t_dd_dmatmp: Pull out common 'count -= count & 3' code</li>
|
||||
<li>t_dd_dmatmp: Use addition instead of subtraction in loop bounds</li>
|
||||
</ul>
|
||||
|
||||
<p>Ilia Mirkin (6):</p>
|
||||
<ul>
|
||||
<li>st/mesa: avoid integer overflows with buffers >= 512MB</li>
|
||||
<li>nv50, nvc0: fix max texture buffer size to 128M elements</li>
|
||||
<li>freedreno/a3xx: fix blending of L8 format</li>
|
||||
<li>nv50,nvc0: detect underlying resource changes and update tic</li>
|
||||
<li>nv50,nvc0: flush texture cache in presence of coherent bufs</li>
|
||||
<li>radeonsi: load fmask ptr relative to the resources array</li>
|
||||
</ul>
|
||||
|
||||
<p>Jason Ekstrand (2):</p>
|
||||
<ul>
|
||||
<li>nir: Fix a bunch of ralloc parenting errors</li>
|
||||
<li>i965/vec4: Don't reswizzle hardware registers</li>
|
||||
</ul>
|
||||
|
||||
<p>Jeremy Huddleston (1):</p>
|
||||
<ul>
|
||||
<li>configure.ac: Add support to enable read-only text segment on x86.</li>
|
||||
</ul>
|
||||
|
||||
<p>Ray Strode (1):</p>
|
||||
<ul>
|
||||
<li>gbm: convert gbm bo format to fourcc format on dma-buf import</li>
|
||||
</ul>
|
||||
|
||||
<p>Tapani Pälli (2):</p>
|
||||
<ul>
|
||||
<li>mesa: fix errors when reading depth with glReadPixels</li>
|
||||
<li>i965: fix textureGrad for cubemaps</li>
|
||||
</ul>
|
||||
|
||||
<p>Ulrich Weigand (1):</p>
|
||||
<ul>
|
||||
<li>mesa: Fix texture compression on big-endian systems</li>
|
||||
</ul>
|
||||
|
||||
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
@@ -15,7 +15,6 @@ env.Append(CPPPATH = [
|
||||
|
||||
# parse Makefile.sources
|
||||
egl_sources = env.ParseSourceList('Makefile.sources', 'LIBEGL_C_FILES')
|
||||
egl_sources.append(env.ParseSourceList('Makefile.sources', 'dri2_backend_core_FILES'))
|
||||
|
||||
env.Append(CPPDEFINES = [
|
||||
'_EGL_NATIVE_PLATFORM=_EGL_PLATFORM_HAIKU',
|
||||
|
@@ -38,18 +38,23 @@ libgallium_la_SOURCES += \
|
||||
|
||||
endif
|
||||
|
||||
indices/u_indices_gen.c: $(srcdir)/indices/u_indices_gen.py
|
||||
$(AM_V_at)$(MKDIR_P) indices
|
||||
$(AM_V_GEN) $(PYTHON2) $< > $@
|
||||
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
|
||||
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
|
||||
|
||||
indices/u_unfilled_gen.c: $(srcdir)/indices/u_unfilled_gen.py
|
||||
$(AM_V_at)$(MKDIR_P) indices
|
||||
$(AM_V_GEN) $(PYTHON2) $< > $@
|
||||
indices/u_indices_gen.c: indices/u_indices_gen.py
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/indices/u_indices_gen.py > $@
|
||||
|
||||
util/u_format_table.c: $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format_pack.py $(srcdir)/util/u_format_parse.py $(srcdir)/util/u_format.csv
|
||||
$(AM_V_at)$(MKDIR_P) util
|
||||
$(AM_V_GEN) $(PYTHON2) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
|
||||
indices/u_unfilled_gen.c: indices/u_unfilled_gen.py
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/indices/u_unfilled_gen.py > $@
|
||||
|
||||
util/u_format_table.c: util/u_format_table.py \
|
||||
util/u_format_pack.py \
|
||||
util/u_format_parse.py \
|
||||
util/u_format.csv
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/util/u_format_table.py $(srcdir)/util/u_format.csv > $@
|
||||
|
||||
noinst_LTLIBRARIES += libgalliumvl_stub.la
|
||||
libgalliumvl_stub_la_SOURCES = \
|
||||
|
@@ -311,7 +311,7 @@ lp_build_const_elem(struct gallivm_state *gallivm,
|
||||
else {
|
||||
double dscale = lp_const_scale(type);
|
||||
|
||||
elem = LLVMConstInt(elem_type, round(val*dscale), 0);
|
||||
elem = LLVMConstInt(elem_type, (long long) round(val*dscale), 0);
|
||||
}
|
||||
|
||||
return elem;
|
||||
|
@@ -166,6 +166,11 @@ pb_cache_manager_create(struct pb_manager *provider,
|
||||
unsigned bypass_usage,
|
||||
uint64_t maximum_cache_size);
|
||||
|
||||
/**
|
||||
* Remove a buffer from the cache, but keep it alive.
|
||||
*/
|
||||
void
|
||||
pb_cache_manager_remove_buffer(struct pb_buffer *buf);
|
||||
|
||||
struct pb_fence_ops;
|
||||
|
||||
|
@@ -104,18 +104,42 @@ pb_cache_manager(struct pb_manager *mgr)
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
_pb_cache_manager_remove_buffer_locked(struct pb_cache_buffer *buf)
|
||||
{
|
||||
struct pb_cache_manager *mgr = buf->mgr;
|
||||
|
||||
if (buf->head.next) {
|
||||
LIST_DEL(&buf->head);
|
||||
assert(mgr->numDelayed);
|
||||
--mgr->numDelayed;
|
||||
mgr->cache_size -= buf->base.size;
|
||||
}
|
||||
buf->mgr = NULL;
|
||||
}
|
||||
|
||||
void
|
||||
pb_cache_manager_remove_buffer(struct pb_buffer *pb_buf)
|
||||
{
|
||||
struct pb_cache_buffer *buf = (struct pb_cache_buffer*)pb_buf;
|
||||
struct pb_cache_manager *mgr = buf->mgr;
|
||||
|
||||
if (!mgr)
|
||||
return;
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
_pb_cache_manager_remove_buffer_locked(buf);
|
||||
pipe_mutex_unlock(mgr->mutex);
|
||||
}
|
||||
|
||||
/**
|
||||
* Actually destroy the buffer.
|
||||
*/
|
||||
static inline void
|
||||
_pb_cache_buffer_destroy(struct pb_cache_buffer *buf)
|
||||
{
|
||||
struct pb_cache_manager *mgr = buf->mgr;
|
||||
|
||||
LIST_DEL(&buf->head);
|
||||
assert(mgr->numDelayed);
|
||||
--mgr->numDelayed;
|
||||
mgr->cache_size -= buf->base.size;
|
||||
if (buf->mgr)
|
||||
_pb_cache_manager_remove_buffer_locked(buf);
|
||||
assert(!pipe_is_referenced(&buf->base.reference));
|
||||
pb_reference(&buf->buffer, NULL);
|
||||
FREE(buf);
|
||||
@@ -156,6 +180,12 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf)
|
||||
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
|
||||
struct pb_cache_manager *mgr = buf->mgr;
|
||||
|
||||
if (!mgr) {
|
||||
pb_reference(&buf->buffer, NULL);
|
||||
FREE(buf);
|
||||
return;
|
||||
}
|
||||
|
||||
pipe_mutex_lock(mgr->mutex);
|
||||
assert(!pipe_is_referenced(&buf->base.reference));
|
||||
|
||||
|
@@ -41,6 +41,7 @@
|
||||
#include "util/u_tile.h"
|
||||
#include "util/u_prim.h"
|
||||
#include "util/u_surface.h"
|
||||
#include <inttypes.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <limits.h> /* CHAR_BIT */
|
||||
@@ -275,7 +276,7 @@ debug_get_flags_option(const char *name,
|
||||
for (; flags->name; ++flags)
|
||||
namealign = MAX2(namealign, strlen(flags->name));
|
||||
for (flags = orig; flags->name; ++flags)
|
||||
_debug_printf("| %*s [0x%0*lx]%s%s\n", namealign, flags->name,
|
||||
_debug_printf("| %*s [0x%0*"PRIu64"]%s%s\n", namealign, flags->name,
|
||||
(int)sizeof(uint64_t)*CHAR_BIT/4, flags->value,
|
||||
flags->desc ? " " : "", flags->desc ? flags->desc : "");
|
||||
}
|
||||
@@ -290,9 +291,9 @@ debug_get_flags_option(const char *name,
|
||||
|
||||
if (debug_get_option_should_print()) {
|
||||
if (str) {
|
||||
debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str);
|
||||
debug_printf("%s: %s = 0x%"PRIu64" (%s)\n", __FUNCTION__, name, result, str);
|
||||
} else {
|
||||
debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result);
|
||||
debug_printf("%s: %s = 0x%"PRIu64"\n", __FUNCTION__, name, result);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63785 bytes, from 2015-08-14 18:27:06)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63785 bytes, from 2015-08-14 18:27:06)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
@@ -563,10 +563,29 @@ fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
|
||||
val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE);
|
||||
val |= COND(fp->frag_coord, A3XX_GRAS_CL_CLIP_CNTL_ZCOORD |
|
||||
A3XX_GRAS_CL_CLIP_CNTL_WCOORD);
|
||||
/* TODO only use if prog doesn't use clipvertex/clipdist */
|
||||
val |= MIN2(util_bitcount(ctx->rasterizer->clip_plane_enable), 6) << 26;
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1);
|
||||
OUT_RING(ring, val);
|
||||
}
|
||||
|
||||
if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_UCP)) {
|
||||
uint32_t planes = ctx->rasterizer->clip_plane_enable;
|
||||
int count = 0;
|
||||
|
||||
while (planes && count < 6) {
|
||||
int i = ffs(planes) - 1;
|
||||
|
||||
planes &= ~(1U << i);
|
||||
fd_wfi(ctx, ring);
|
||||
OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4);
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][0]));
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][1]));
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][2]));
|
||||
OUT_RING(ring, fui(ctx->ucp.ucp[i][3]));
|
||||
}
|
||||
}
|
||||
|
||||
/* NOTE: since primitive_restart is not actually part of any
|
||||
* state object, we need to make sure that we always emit
|
||||
* PRIM_VTX_CNTL.. either that or be more clever and detect
|
||||
|
@@ -355,6 +355,8 @@ fd3_fs_output_format(enum pipe_format format)
|
||||
case PIPE_FORMAT_R16G16_FLOAT:
|
||||
case PIPE_FORMAT_R11G11B10_FLOAT:
|
||||
return RB_R16G16B16A16_FLOAT;
|
||||
case PIPE_FORMAT_L8_UNORM:
|
||||
return RB_R8G8B8A8_UNORM;
|
||||
default:
|
||||
return fd3_pipe2color(format);
|
||||
}
|
||||
|
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63785 bytes, from 2015-08-14 18:27:06)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
@@ -162,10 +162,13 @@ enum a4xx_tex_fmt {
|
||||
TFMT4_8_UNORM = 4,
|
||||
TFMT4_8_8_UNORM = 14,
|
||||
TFMT4_8_8_8_8_UNORM = 28,
|
||||
TFMT4_8_SNORM = 5,
|
||||
TFMT4_8_8_SNORM = 15,
|
||||
TFMT4_8_8_8_8_SNORM = 29,
|
||||
TFMT4_8_UINT = 6,
|
||||
TFMT4_8_8_UINT = 16,
|
||||
TFMT4_8_8_8_8_UINT = 30,
|
||||
TFMT4_8_SINT = 7,
|
||||
TFMT4_8_8_SINT = 17,
|
||||
TFMT4_8_8_8_8_SINT = 31,
|
||||
TFMT4_16_UINT = 21,
|
||||
|
@@ -79,9 +79,9 @@ struct fd4_format {
|
||||
static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
|
||||
/* 8-bit */
|
||||
VT(R8_UNORM, 8_UNORM, R8_UNORM, WZYX),
|
||||
V_(R8_SNORM, 8_SNORM, NONE, WZYX),
|
||||
V_(R8_UINT, 8_UINT, NONE, WZYX),
|
||||
V_(R8_SINT, 8_SINT, NONE, WZYX),
|
||||
VT(R8_SNORM, 8_SNORM, NONE, WZYX),
|
||||
VT(R8_UINT, 8_UINT, NONE, WZYX),
|
||||
VT(R8_SINT, 8_SINT, NONE, WZYX),
|
||||
V_(R8_USCALED, 8_UINT, NONE, WZYX),
|
||||
V_(R8_SSCALED, 8_UINT, NONE, WZYX),
|
||||
|
||||
@@ -115,8 +115,8 @@ static struct fd4_format formats[PIPE_FORMAT_COUNT] = {
|
||||
|
||||
VT(R8G8_UNORM, 8_8_UNORM, R8G8_UNORM, WZYX),
|
||||
VT(R8G8_SNORM, 8_8_SNORM, R8G8_SNORM, WZYX),
|
||||
VT(R8G8_UINT, 8_8_UINT, NONE, WZYX),
|
||||
VT(R8G8_SINT, 8_8_SINT, NONE, WZYX),
|
||||
VT(R8G8_UINT, 8_8_UINT, R8G8_UINT, WZYX),
|
||||
VT(R8G8_SINT, 8_8_SINT, R8G8_SINT, WZYX),
|
||||
V_(R8G8_USCALED, 8_8_UINT, NONE, WZYX),
|
||||
V_(R8G8_SSCALED, 8_8_SINT, NONE, WZYX),
|
||||
|
||||
|
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63785 bytes, from 2015-08-14 18:27:06)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
@@ -14,7 +14,7 @@ The rules-ng-ng source files this header was generated from are:
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10551 bytes, from 2015-05-20 20:03:14)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 67120 bytes, from 2015-08-14 23:22:03)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63785 bytes, from 2015-08-14 18:27:06)
|
||||
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 63915 bytes, from 2015-08-24 16:56:28)
|
||||
|
||||
Copyright (C) 2013-2015 by the following authors:
|
||||
- Rob Clark <robdclark@gmail.com> (robclark)
|
||||
|
@@ -334,6 +334,7 @@ struct fd_context {
|
||||
FD_DIRTY_INDEXBUF = (1 << 16),
|
||||
FD_DIRTY_SCISSOR = (1 << 17),
|
||||
FD_DIRTY_STREAMOUT = (1 << 18),
|
||||
FD_DIRTY_UCP = (1 << 19),
|
||||
} dirty;
|
||||
|
||||
struct pipe_blend_state *blend;
|
||||
@@ -355,6 +356,7 @@ struct fd_context {
|
||||
struct fd_constbuf_stateobj constbuf[PIPE_SHADER_TYPES];
|
||||
struct pipe_index_buffer indexbuf;
|
||||
struct fd_streamout_stateobj streamout;
|
||||
struct pipe_clip_state ucp;
|
||||
|
||||
/* GMEM/tile handling fxns: */
|
||||
void (*emit_tile_init)(struct fd_context *ctx);
|
||||
|
@@ -65,7 +65,9 @@ static void
|
||||
fd_set_clip_state(struct pipe_context *pctx,
|
||||
const struct pipe_clip_state *clip)
|
||||
{
|
||||
DBG("TODO: ");
|
||||
struct fd_context *ctx = fd_context(pctx);
|
||||
ctx->ucp = *clip;
|
||||
ctx->dirty |= FD_DIRTY_UCP;
|
||||
}
|
||||
|
||||
static void
|
||||
|
@@ -884,7 +884,7 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
|
||||
defId(i->def(0), 2);
|
||||
srcAddr8(i->src(0), 16);
|
||||
|
||||
if (i->getInterpMode() == NV50_IR_INTERP_FLAT) {
|
||||
if (i->encSize != 8 && i->getInterpMode() == NV50_IR_INTERP_FLAT) {
|
||||
code[0] |= 1 << 8;
|
||||
} else {
|
||||
if (i->op == OP_PINTERP) {
|
||||
@@ -896,10 +896,11 @@ CodeEmitterNV50::emitINTERP(const Instruction *i)
|
||||
}
|
||||
|
||||
if (i->encSize == 8) {
|
||||
code[1] =
|
||||
(code[0] & (3 << 24)) >> (24 - 16) |
|
||||
(code[0] & (1 << 8)) << (18 - 8);
|
||||
code[0] &= ~0x03000100;
|
||||
if (i->getInterpMode() == NV50_IR_INTERP_FLAT)
|
||||
code[1] = 4 << 16;
|
||||
else
|
||||
code[1] = (code[0] & (3 << 24)) >> (24 - 16);
|
||||
code[0] &= ~0x03000000;
|
||||
code[0] |= 1;
|
||||
emitFlagsRd(i);
|
||||
}
|
||||
|
@@ -202,7 +202,10 @@ NV50LegalizePostRA::visit(Function *fn)
|
||||
Program *prog = fn->getProgram();
|
||||
|
||||
r63 = new_LValue(fn, FILE_GPR);
|
||||
r63->reg.data.id = 63;
|
||||
if (prog->maxGPR < 63)
|
||||
r63->reg.data.id = 63;
|
||||
else
|
||||
r63->reg.data.id = 127;
|
||||
|
||||
// this is actually per-program, but we can do it all on visiting main()
|
||||
std::list<Instruction *> *outWrites =
|
||||
|
@@ -2602,6 +2602,10 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
|
||||
!isFloatType(i->dType))
|
||||
break;
|
||||
|
||||
if (i->getDef(0)->reg.data.id >= 64 ||
|
||||
i->getSrc(0)->reg.data.id >= 64)
|
||||
break;
|
||||
|
||||
def = i->getSrc(1)->getInsn();
|
||||
if (def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
|
||||
vtmp = i->getSrc(1);
|
||||
|
@@ -411,7 +411,7 @@ int ImmediateValue::print(char *buf, size_t size, DataType ty) const
|
||||
case TYPE_U64:
|
||||
case TYPE_S64:
|
||||
default:
|
||||
PRINT("0x%016"PRIx64, reg.data.u64);
|
||||
PRINT("0x%016" PRIx64, reg.data.u64);
|
||||
break;
|
||||
}
|
||||
return pos;
|
||||
|
@@ -25,6 +25,7 @@
|
||||
|
||||
#include <stack>
|
||||
#include <limits>
|
||||
#include <tr1/unordered_map>
|
||||
|
||||
namespace nv50_ir {
|
||||
|
||||
@@ -222,6 +223,7 @@ private:
|
||||
private:
|
||||
virtual bool visit(BasicBlock *);
|
||||
inline bool needNewElseBlock(BasicBlock *b, BasicBlock *p);
|
||||
inline void splitEdges(BasicBlock *b);
|
||||
};
|
||||
|
||||
class ArgumentMovesPass : public Pass {
|
||||
@@ -345,28 +347,55 @@ RegAlloc::PhiMovesPass::needNewElseBlock(BasicBlock *b, BasicBlock *p)
|
||||
return (n == 2);
|
||||
}
|
||||
|
||||
// For each operand of each PHI in b, generate a new value by inserting a MOV
|
||||
// at the end of the block it is coming from and replace the operand with its
|
||||
// result. This eliminates liveness conflicts and enables us to let values be
|
||||
// copied to the right register if such a conflict exists nonetheless.
|
||||
struct PhiMapHash {
|
||||
size_t operator()(const std::pair<Instruction *, BasicBlock *>& val) const {
|
||||
return std::tr1::hash<Instruction*>()(val.first) * 31 +
|
||||
std::tr1::hash<BasicBlock*>()(val.second);
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::tr1::unordered_map<
|
||||
std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
|
||||
|
||||
// Critical edges need to be split up so that work can be inserted along
|
||||
// specific edge transitions. Unfortunately manipulating incident edges into a
|
||||
// BB invalidates all the PHI nodes since their sources are implicitly ordered
|
||||
// by incident edge order.
|
||||
//
|
||||
// These MOVs are also crucial in making sure the live intervals of phi srces
|
||||
// are extended until the end of the loop, since they are not included in the
|
||||
// live-in sets.
|
||||
bool
|
||||
RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
|
||||
// TODO: Make it so that that is not the case, and PHI nodes store pointers to
|
||||
// the original BBs.
|
||||
void
|
||||
RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
|
||||
{
|
||||
Instruction *phi, *mov;
|
||||
BasicBlock *pb, *pn;
|
||||
|
||||
Instruction *phi;
|
||||
Graph::EdgeIterator ei;
|
||||
std::stack<BasicBlock *> stack;
|
||||
int j = 0;
|
||||
|
||||
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
|
||||
for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
|
||||
pb = BasicBlock::get(ei.getNode());
|
||||
assert(pb);
|
||||
if (needNewElseBlock(bb, pb))
|
||||
stack.push(pb);
|
||||
}
|
||||
|
||||
// No critical edges were found, no need to perform any work.
|
||||
if (stack.empty())
|
||||
return;
|
||||
|
||||
// We're about to, potentially, reorder the inbound edges. This means that
|
||||
// we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
|
||||
// nodes after the graph has been modified.
|
||||
PhiMap phis;
|
||||
|
||||
j = 0;
|
||||
for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
|
||||
pb = BasicBlock::get(ei.getNode());
|
||||
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
|
||||
phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
|
||||
}
|
||||
|
||||
while (!stack.empty()) {
|
||||
pb = stack.top();
|
||||
pn = new BasicBlock(func);
|
||||
@@ -379,12 +408,47 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
|
||||
assert(pb->getExit()->op != OP_CALL);
|
||||
if (pb->getExit()->asFlow()->target.bb == bb)
|
||||
pb->getExit()->asFlow()->target.bb = pn;
|
||||
|
||||
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
|
||||
PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
|
||||
assert(it != phis.end());
|
||||
phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
|
||||
phis.erase(it);
|
||||
}
|
||||
}
|
||||
|
||||
// Now go through and fix up all of the phi node sources.
|
||||
j = 0;
|
||||
for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
|
||||
pb = BasicBlock::get(ei.getNode());
|
||||
for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
|
||||
PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
|
||||
assert(it != phis.end());
|
||||
|
||||
phi->setSrc(j, it->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// For each operand of each PHI in b, generate a new value by inserting a MOV
|
||||
// at the end of the block it is coming from and replace the operand with its
|
||||
// result. This eliminates liveness conflicts and enables us to let values be
|
||||
// copied to the right register if such a conflict exists nonetheless.
|
||||
//
|
||||
// These MOVs are also crucial in making sure the live intervals of phi srces
|
||||
// are extended until the end of the loop, since they are not included in the
|
||||
// live-in sets.
|
||||
bool
|
||||
RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
|
||||
{
|
||||
Instruction *phi, *mov;
|
||||
|
||||
splitEdges(bb);
|
||||
|
||||
// insert MOVs (phi->src(j) should stem from j-th in-BB)
|
||||
int j = 0;
|
||||
for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
|
||||
pb = BasicBlock::get(ei.getNode());
|
||||
BasicBlock *pb = BasicBlock::get(ei.getNode());
|
||||
if (!pb->isTerminated())
|
||||
pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
|
||||
|
||||
|
@@ -206,8 +206,8 @@ nouveau_transfer_write(struct nouveau_context *nv, struct nouveau_transfer *tx,
|
||||
nv->copy_data(nv, buf->bo, buf->offset + base, buf->domain,
|
||||
tx->bo, tx->offset + offset, NOUVEAU_BO_GART, size);
|
||||
else
|
||||
if ((buf->base.bind & PIPE_BIND_CONSTANT_BUFFER) && nv->push_cb && can_cb)
|
||||
nv->push_cb(nv, buf->bo, buf->domain, buf->offset, buf->base.width0,
|
||||
if (nv->push_cb && can_cb)
|
||||
nv->push_cb(nv, buf,
|
||||
base, size / 4, (const uint32_t *)data);
|
||||
else
|
||||
nv->push_data(nv, buf->bo, buf->offset + base, buf->domain, size, data);
|
||||
@@ -532,8 +532,13 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
|
||||
struct nv04_resource *buf = nv04_resource(transfer->resource);
|
||||
|
||||
if (tx->base.usage & PIPE_TRANSFER_WRITE) {
|
||||
if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT) && tx->map)
|
||||
nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
|
||||
if (!(tx->base.usage & PIPE_TRANSFER_FLUSH_EXPLICIT)) {
|
||||
if (tx->map)
|
||||
nouveau_transfer_write(nv, tx, 0, tx->base.box.width);
|
||||
|
||||
util_range_add(&buf->valid_buffer_range,
|
||||
tx->base.box.x, tx->base.box.x + tx->base.box.width);
|
||||
}
|
||||
|
||||
if (likely(buf->domain)) {
|
||||
const uint8_t bind = buf->base.bind;
|
||||
@@ -541,9 +546,6 @@ nouveau_buffer_transfer_unmap(struct pipe_context *pipe,
|
||||
if (bind & (PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER))
|
||||
nv->vbo_dirty = true;
|
||||
}
|
||||
|
||||
util_range_add(&buf->valid_buffer_range,
|
||||
tx->base.box.x, tx->base.box.x + tx->base.box.width);
|
||||
}
|
||||
|
||||
if (!tx->bo && (tx->base.usage & PIPE_TRANSFER_WRITE))
|
||||
|
@@ -41,6 +41,8 @@ struct nv04_resource {
|
||||
uint8_t status;
|
||||
uint8_t domain;
|
||||
|
||||
uint16_t cb_bindings[6]; /* per-shader per-slot bindings */
|
||||
|
||||
struct nouveau_fence *fence;
|
||||
struct nouveau_fence *fence_wr;
|
||||
|
||||
|
@@ -6,6 +6,8 @@
|
||||
|
||||
#define NOUVEAU_MAX_SCRATCH_BUFS 4
|
||||
|
||||
struct nv04_resource;
|
||||
|
||||
struct nouveau_context {
|
||||
struct pipe_context pipe;
|
||||
struct nouveau_screen *screen;
|
||||
@@ -23,8 +25,7 @@ struct nouveau_context {
|
||||
unsigned, const void *);
|
||||
/* base, size refer to the whole constant buffer */
|
||||
void (*push_cb)(struct nouveau_context *,
|
||||
struct nouveau_bo *, unsigned domain,
|
||||
unsigned base, unsigned size,
|
||||
struct nv04_resource *,
|
||||
unsigned offset, unsigned words, const uint32_t *);
|
||||
|
||||
/* @return: @ref reduced by nr of references found in context */
|
||||
|
@@ -28,6 +28,7 @@
|
||||
#include "util/u_surface.h"
|
||||
|
||||
#include "nv_m2mf.xml.h"
|
||||
#include "nv_object.xml.h"
|
||||
#include "nv30/nv30_screen.h"
|
||||
#include "nv30/nv30_context.h"
|
||||
#include "nv30/nv30_resource.h"
|
||||
@@ -144,21 +145,54 @@ nv30_resource_copy_region(struct pipe_context *pipe,
|
||||
nv30_transfer_rect(nv30, NEAREST, &src, &dst);
|
||||
}
|
||||
|
||||
void
|
||||
nv30_resource_resolve(struct pipe_context *pipe,
|
||||
const struct pipe_resolve_info *info)
|
||||
static void
|
||||
nv30_resource_resolve(struct nv30_context *nv30,
|
||||
const struct pipe_blit_info *info)
|
||||
{
|
||||
#if 0
|
||||
struct nv30_context *nv30 = nv30_context(pipe);
|
||||
struct nv30_miptree *src_mt = nv30_miptree(info->src.resource);
|
||||
struct nv30_rect src, dst;
|
||||
unsigned x, x0, x1, y, y1, w, h;
|
||||
|
||||
define_rect(info->src.res, 0, 0, info->src.x0, info->src.y0,
|
||||
info->src.x1 - info->src.x0, info->src.y1 - info->src.y0, &src);
|
||||
define_rect(info->dst.res, info->dst.level, 0, info->dst.x0, info->dst.y0,
|
||||
info->dst.x1 - info->dst.x0, info->dst.y1 - info->dst.y0, &dst);
|
||||
define_rect(info->src.resource, 0, info->src.box.z, info->src.box.x,
|
||||
info->src.box.y, info->src.box.width, info->src.box.height, &src);
|
||||
define_rect(info->dst.resource, 0, info->dst.box.z, info->dst.box.x,
|
||||
info->dst.box.y, info->dst.box.width, info->dst.box.height, &dst);
|
||||
|
||||
nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
|
||||
#endif
|
||||
x0 = src.x0;
|
||||
x1 = src.x1;
|
||||
y1 = src.y1;
|
||||
|
||||
/* On nv3x we must use sifm which is restricted to 1024x1024 tiles */
|
||||
for (y = src.y0; y < y1; y += h) {
|
||||
h = y1 - y;
|
||||
if (h > 1024)
|
||||
h = 1024;
|
||||
|
||||
src.y0 = 0;
|
||||
src.y1 = h;
|
||||
src.h = h;
|
||||
|
||||
dst.y1 = dst.y0 + (h >> src_mt->ms_y);
|
||||
dst.h = h >> src_mt->ms_y;
|
||||
|
||||
for (x = x0; x < x1; x += w) {
|
||||
w = x1 - x;
|
||||
if (w > 1024)
|
||||
w = 1024;
|
||||
|
||||
src.offset = y * src.pitch + x * src.cpp;
|
||||
src.x0 = 0;
|
||||
src.x1 = w;
|
||||
src.w = w;
|
||||
|
||||
dst.offset = (y >> src_mt->ms_y) * dst.pitch +
|
||||
(x >> src_mt->ms_x) * dst.cpp;
|
||||
dst.x1 = dst.x0 + (w >> src_mt->ms_x);
|
||||
dst.w = w >> src_mt->ms_x;
|
||||
|
||||
nv30_transfer_rect(nv30, BILINEAR, &src, &dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@@ -172,7 +206,7 @@ nv30_blit(struct pipe_context *pipe,
|
||||
info.dst.resource->nr_samples <= 1 &&
|
||||
!util_format_is_depth_or_stencil(info.src.resource->format) &&
|
||||
!util_format_is_pure_integer(info.src.resource->format)) {
|
||||
debug_printf("nv30: color resolve unimplemented\n");
|
||||
nv30_resource_resolve(nv30, blit_info);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -362,6 +396,7 @@ nv30_miptree_create(struct pipe_screen *pscreen,
|
||||
blocksz = util_format_get_blocksize(pt->format);
|
||||
|
||||
if ((pt->target == PIPE_TEXTURE_RECT) ||
|
||||
(pt->bind & PIPE_BIND_SCANOUT) ||
|
||||
!util_is_power_of_two(pt->width0) ||
|
||||
!util_is_power_of_two(pt->height0) ||
|
||||
!util_is_power_of_two(pt->depth0) ||
|
||||
@@ -369,6 +404,14 @@ nv30_miptree_create(struct pipe_screen *pscreen,
|
||||
util_format_is_float(pt->format) || mt->ms_mode) {
|
||||
mt->uniform_pitch = util_format_get_nblocksx(pt->format, w) * blocksz;
|
||||
mt->uniform_pitch = align(mt->uniform_pitch, 64);
|
||||
if (pt->bind & PIPE_BIND_SCANOUT) {
|
||||
struct nv30_screen *screen = nv30_screen(pscreen);
|
||||
int pitch_align = MAX2(
|
||||
screen->eng3d->oclass >= NV40_3D_CLASS ? 1024 : 256,
|
||||
/* round_down_pow2(mt->uniform_pitch / 4) */
|
||||
1 << (util_last_bit(mt->uniform_pitch / 4) - 1));
|
||||
mt->uniform_pitch = align(mt->uniform_pitch, pitch_align);
|
||||
}
|
||||
}
|
||||
|
||||
if (!mt->uniform_pitch)
|
||||
|
@@ -65,9 +65,6 @@ nv30_resource_copy_region(struct pipe_context *pipe,
|
||||
struct pipe_resource *src, unsigned src_level,
|
||||
const struct pipe_box *src_box);
|
||||
|
||||
void
|
||||
nv30_resource_resolve(struct pipe_context *, const struct pipe_resolve_info *);
|
||||
|
||||
void
|
||||
nv30_blit(struct pipe_context *pipe,
|
||||
const struct pipe_blit_info *blit_info);
|
||||
|
@@ -319,8 +319,9 @@ nv30_screen_is_format_supported(struct pipe_screen *pscreen,
|
||||
unsigned sample_count,
|
||||
unsigned bindings)
|
||||
{
|
||||
if (sample_count > 4)
|
||||
if (sample_count > nv30_screen(pscreen)->max_sample_count)
|
||||
return false;
|
||||
|
||||
if (!(0x00000017 & (1 << sample_count)))
|
||||
return false;
|
||||
|
||||
@@ -450,6 +451,23 @@ nv30_screen_create(struct nouveau_device *dev)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some modern apps try to use msaa without keeping in mind the
|
||||
* restrictions on videomem of older cards. Resulting in dmesg saying:
|
||||
* [ 1197.850642] nouveau E[soffice.bin[3785]] fail ttm_validate
|
||||
* [ 1197.850648] nouveau E[soffice.bin[3785]] validating bo list
|
||||
* [ 1197.850654] nouveau E[soffice.bin[3785]] validate: -12
|
||||
*
|
||||
* Because we are running out of video memory, after which the program
|
||||
* using the msaa visual freezes, and eventually the entire system freezes.
|
||||
*
|
||||
* To work around this we do not allow msaa visauls by default and allow
|
||||
* the user to override this via NV30_MAX_MSAA.
|
||||
*/
|
||||
screen->max_sample_count = debug_get_num_option("NV30_MAX_MSAA", 0);
|
||||
if (screen->max_sample_count > 4)
|
||||
screen->max_sample_count = 4;
|
||||
|
||||
pscreen = &screen->base.base;
|
||||
pscreen->destroy = nv30_screen_destroy;
|
||||
pscreen->get_param = nv30_screen_get_param;
|
||||
|
@@ -38,6 +38,8 @@ struct nv30_screen {
|
||||
/*XXX: nvfx state */
|
||||
struct nouveau_heap *vp_exec_heap;
|
||||
struct nouveau_heap *vp_data_heap;
|
||||
|
||||
unsigned max_sample_count;
|
||||
};
|
||||
|
||||
static inline struct nv30_screen *
|
||||
|
@@ -371,7 +371,7 @@ nv30_transfer_rect_blit(XFER_ARGS)
|
||||
static bool
|
||||
nv30_transfer_sifm(XFER_ARGS)
|
||||
{
|
||||
if (!src->pitch || (src->w | src->h) > 1024 || src->w < 2 || src->h < 2)
|
||||
if (!src->pitch || src->w > 1024 || src->h > 1024 || src->w < 2 || src->h < 2)
|
||||
return false;
|
||||
|
||||
if (src->d > 1 || dst->d > 1)
|
||||
@@ -381,7 +381,7 @@ nv30_transfer_sifm(XFER_ARGS)
|
||||
return false;
|
||||
|
||||
if (!dst->pitch) {
|
||||
if ((dst->w | dst->h) > 2048 || dst->w < 2 || dst->h < 2)
|
||||
if (dst->w > 2048 || dst->h > 2048 || dst->w < 2 || dst->h < 2)
|
||||
return false;
|
||||
} else {
|
||||
if (dst->domain != NOUVEAU_BO_VRAM)
|
||||
|
@@ -199,9 +199,13 @@ nv50_invalidate_resource_storage(struct nouveau_context *ctx,
|
||||
}
|
||||
}
|
||||
|
||||
if (nv50->idxbuf.buffer == res)
|
||||
if (nv50->idxbuf.buffer == res) {
|
||||
/* Just rebind to the bufctx as there is no separate dirty bit */
|
||||
nouveau_bufctx_reset(nv50->bufctx_3d, NV50_BIND_INDEX);
|
||||
BCTX_REFN(nv50->bufctx_3d, INDEX, nv04_resource(res), RD);
|
||||
if (!--ref)
|
||||
return ref;
|
||||
}
|
||||
|
||||
for (s = 0; s < 3; ++s) {
|
||||
assert(nv50->num_textures[s] <= PIPE_MAX_SAMPLERS);
|
||||
|
@@ -197,7 +197,7 @@ extern struct draw_stage *nv50_draw_render_stage(struct nv50_context *);
|
||||
|
||||
/* nv50_query.c */
|
||||
void nv50_init_query_functions(struct nv50_context *);
|
||||
void nv50_query_pushbuf_submit(struct nouveau_pushbuf *,
|
||||
void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t method,
|
||||
struct pipe_query *, unsigned result_offset);
|
||||
void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
|
||||
void nva0_so_target_save_offset(struct pipe_context *,
|
||||
|
@@ -203,8 +203,10 @@ const struct nv50_format nv50_format_table[PIPE_FORMAT_COUNT] =
|
||||
F3B(B5G6R5_UNORM, B5G6R5_UNORM, C2, C1, C0, xx, UNORM, 5_6_5, TD),
|
||||
C4B(B5G5R5A1_UNORM, BGR5_A1_UNORM, C2, C1, C0, C3, UNORM, 5_5_5_1, TD),
|
||||
F3B(B5G5R5X1_UNORM, BGR5_X1_UNORM, C2, C1, C0, xx, UNORM, 5_5_5_1, TD),
|
||||
#if NOUVEAU_DRIVER != 0xc0
|
||||
C4B(B4G4R4A4_UNORM, NONE, C2, C1, C0, C3, UNORM, 4_4_4_4, T),
|
||||
F3B(B4G4R4X4_UNORM, NONE, C2, C1, C0, xx, UNORM, 4_4_4_4, T),
|
||||
#endif
|
||||
F3B(R9G9B9E5_FLOAT, NONE, C0, C1, C2, xx, FLOAT, 9_9_9_E5, T),
|
||||
|
||||
C4A(R10G10B10A2_UNORM, RGB10_A2_UNORM, C0, C1, C2, C3, UNORM, 10_10_10_2,
|
||||
|
@@ -66,6 +66,7 @@ nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info)
|
||||
case TGSI_SEMANTIC_VERTEXID:
|
||||
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID;
|
||||
prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID_DRAW_ARRAYS_ADD_START;
|
||||
prog->vp.vertexid = 1;
|
||||
continue;
|
||||
default:
|
||||
break;
|
||||
|
@@ -76,6 +76,7 @@ struct nv50_program {
|
||||
ubyte psiz; /* output slot of point size */
|
||||
ubyte bfc[2]; /* indices into varying for FFC (FP) or BFC (VP) */
|
||||
ubyte edgeflag;
|
||||
ubyte vertexid;
|
||||
ubyte clpd[2]; /* output slot of clip distance[i]'s 1st component */
|
||||
ubyte clpd_nr;
|
||||
} vp;
|
||||
|
@@ -266,6 +266,7 @@ nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq)
|
||||
nv50_query_get(push, q, 0, 0x1000f010);
|
||||
break;
|
||||
case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET:
|
||||
q->sequence++;
|
||||
nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5));
|
||||
break;
|
||||
case PIPE_QUERY_TIMESTAMP_DISJOINT:
|
||||
@@ -451,18 +452,18 @@ nv50_render_condition(struct pipe_context *pipe,
|
||||
}
|
||||
|
||||
void
|
||||
nv50_query_pushbuf_submit(struct nouveau_pushbuf *push,
|
||||
nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method,
|
||||
struct pipe_query *pq, unsigned result_offset)
|
||||
{
|
||||
struct nv50_query *q = nv50_query(pq);
|
||||
|
||||
/* XXX: does this exist ? */
|
||||
#define NV50_IB_ENTRY_1_NO_PREFETCH (0 << (31 - 8))
|
||||
nv50_query_update(q);
|
||||
if (q->state != NV50_QUERY_STATE_READY)
|
||||
nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client);
|
||||
q->state = NV50_QUERY_STATE_READY;
|
||||
|
||||
PUSH_REFN(push, q->bo, NOUVEAU_BO_RD | NOUVEAU_BO_GART);
|
||||
nouveau_pushbuf_space(push, 0, 0, 1);
|
||||
nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
|
||||
NV50_IB_ENTRY_1_NO_PREFETCH);
|
||||
BEGIN_NV04(push, SUBC_3D(method), 1);
|
||||
PUSH_DATA (push, q->data[result_offset / 4]);
|
||||
}
|
||||
|
||||
void
|
||||
|
@@ -100,7 +100,7 @@ nv50_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_MAX_TEXEL_OFFSET:
|
||||
return 7;
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
return 65536;
|
||||
return 128 * 1024 * 1024;
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL:
|
||||
return 330;
|
||||
case PIPE_CAP_MAX_RENDER_TARGETS:
|
||||
|
@@ -641,12 +641,12 @@ nv50_stream_output_validate(struct nv50_context *nv50)
|
||||
PUSH_DATA (push, so->num_attribs[i]);
|
||||
if (n == 4) {
|
||||
PUSH_DATA(push, targ->pipe.buffer_size);
|
||||
|
||||
BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
|
||||
if (!targ->clean) {
|
||||
assert(targ->pq);
|
||||
nv50_query_pushbuf_submit(push, targ->pq, 0x4);
|
||||
nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i),
|
||||
targ->pq, 0x4);
|
||||
} else {
|
||||
BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1);
|
||||
PUSH_DATA(push, 0);
|
||||
targ->clean = false;
|
||||
}
|
||||
@@ -655,6 +655,7 @@ nv50_stream_output_validate(struct nv50_context *nv50)
|
||||
(so->stride[i] * nv50->state.prim_size);
|
||||
prims = MIN2(prims, limit);
|
||||
}
|
||||
targ->stride = so->stride[i];
|
||||
BCTX_REFN(nv50->bufctx_3d, SO, buf, WR);
|
||||
}
|
||||
if (prims != ~0) {
|
||||
|
@@ -117,7 +117,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
|
||||
struct nv50_blend_stateobj *so = CALLOC_STRUCT(nv50_blend_stateobj);
|
||||
int i;
|
||||
bool emit_common_func = cso->rt[0].blend_enable;
|
||||
uint32_t ms;
|
||||
|
||||
if (nv50_context(pipe)->screen->tesla->oclass >= NVA3_3D_CLASS) {
|
||||
SB_BEGIN_3D(so, BLEND_INDEPENDENT, 1);
|
||||
@@ -189,15 +188,6 @@ nv50_blend_state_create(struct pipe_context *pipe,
|
||||
SB_DATA (so, nv50_colormask(cso->rt[0].colormask));
|
||||
}
|
||||
|
||||
ms = 0;
|
||||
if (cso->alpha_to_coverage)
|
||||
ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
|
||||
if (cso->alpha_to_one)
|
||||
ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
|
||||
|
||||
SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
|
||||
SB_DATA (so, ms);
|
||||
|
||||
assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
|
||||
return so;
|
||||
}
|
||||
|
@@ -1,4 +1,6 @@
|
||||
|
||||
#include "util/u_format.h"
|
||||
|
||||
#include "nv50/nv50_context.h"
|
||||
#include "nv50/nv50_defs.xml.h"
|
||||
|
||||
@@ -313,6 +315,25 @@ nv50_validate_derived_2(struct nv50_context *nv50)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_validate_derived_3(struct nv50_context *nv50)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct pipe_framebuffer_state *fb = &nv50->framebuffer;
|
||||
uint32_t ms = 0;
|
||||
|
||||
if ((!fb->nr_cbufs || !fb->cbufs[0] ||
|
||||
!util_format_is_pure_integer(fb->cbufs[0]->format)) && nv50->blend) {
|
||||
if (nv50->blend->pipe.alpha_to_coverage)
|
||||
ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
|
||||
if (nv50->blend->pipe.alpha_to_one)
|
||||
ms |= NV50_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
|
||||
}
|
||||
|
||||
BEGIN_NV04(push, NV50_3D(MULTISAMPLE_CTRL), 1);
|
||||
PUSH_DATA (push, ms);
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_validate_clip(struct nv50_context *nv50)
|
||||
{
|
||||
@@ -474,6 +495,7 @@ static struct state_validate {
|
||||
{ nv50_validate_derived_rs, NV50_NEW_FRAGPROG | NV50_NEW_RASTERIZER |
|
||||
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
|
||||
{ nv50_validate_derived_2, NV50_NEW_ZSA | NV50_NEW_FRAMEBUFFER },
|
||||
{ nv50_validate_derived_3, NV50_NEW_BLEND | NV50_NEW_FRAMEBUFFER },
|
||||
{ nv50_validate_clip, NV50_NEW_CLIP | NV50_NEW_RASTERIZER |
|
||||
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
|
||||
{ nv50_constbufs_validate, NV50_NEW_CONSTBUF },
|
||||
@@ -481,7 +503,8 @@ static struct state_validate {
|
||||
{ nv50_validate_samplers, NV50_NEW_SAMPLERS },
|
||||
{ nv50_stream_output_validate, NV50_NEW_STRMOUT |
|
||||
NV50_NEW_VERTPROG | NV50_NEW_GMTYPROG },
|
||||
{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS },
|
||||
{ nv50_vertex_arrays_validate, NV50_NEW_VERTEX | NV50_NEW_ARRAYS |
|
||||
NV50_NEW_VERTPROG },
|
||||
{ nv50_validate_min_samples, NV50_NEW_MIN_SAMPLES },
|
||||
};
|
||||
#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
|
||||
|
@@ -19,7 +19,7 @@
|
||||
struct nv50_blend_stateobj {
|
||||
struct pipe_blend_state pipe;
|
||||
int size;
|
||||
uint32_t state[84]; // TODO: allocate less if !independent_blend_enable
|
||||
uint32_t state[82]; // TODO: allocate less if !independent_blend_enable
|
||||
};
|
||||
|
||||
struct nv50_rasterizer_stateobj {
|
||||
|
@@ -68,6 +68,10 @@ nv50_2d_format(enum pipe_format format, bool dst, bool dst_src_equal)
|
||||
return NV50_SURFACE_FORMAT_R16_UNORM;
|
||||
case 4:
|
||||
return NV50_SURFACE_FORMAT_BGRA8_UNORM;
|
||||
case 8:
|
||||
return NV50_SURFACE_FORMAT_RGBA16_FLOAT;
|
||||
case 16:
|
||||
return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
@@ -1003,6 +1007,8 @@ nv50_blitctx_prepare_state(struct nv50_blitctx *blit)
|
||||
/* zsa state */
|
||||
BEGIN_NV04(push, NV50_3D(DEPTH_TEST_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_3D(DEPTH_BOUNDS_EN), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_3D(STENCIL_ENABLE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NV50_3D(ALPHA_TEST_ENABLE), 1);
|
||||
|
@@ -221,6 +221,26 @@ nv50_create_texture_view(struct pipe_context *pipe,
|
||||
return &view->pipe;
|
||||
}
|
||||
|
||||
static void
|
||||
nv50_update_tic(struct nv50_context *nv50, struct nv50_tic_entry *tic,
|
||||
struct nv04_resource *res)
|
||||
{
|
||||
uint64_t address = res->address;
|
||||
if (res->base.target != PIPE_BUFFER)
|
||||
return;
|
||||
address += tic->pipe.u.buf.first_element *
|
||||
util_format_get_blocksize(tic->pipe.format);
|
||||
if (tic->tic[1] == (uint32_t)address &&
|
||||
(tic->tic[2] & 0xff) == address >> 32)
|
||||
return;
|
||||
|
||||
nv50_screen_tic_unlock(nv50->screen, tic);
|
||||
tic->id = -1;
|
||||
tic->tic[1] = address;
|
||||
tic->tic[2] &= 0xffffff00;
|
||||
tic->tic[2] |= address >> 32;
|
||||
}
|
||||
|
||||
static bool
|
||||
nv50_validate_tic(struct nv50_context *nv50, int s)
|
||||
{
|
||||
@@ -240,6 +260,7 @@ nv50_validate_tic(struct nv50_context *nv50, int s)
|
||||
continue;
|
||||
}
|
||||
res = &nv50_miptree(tic->pipe.texture)->base;
|
||||
nv50_update_tic(nv50, tic, res);
|
||||
|
||||
if (tic->id < 0) {
|
||||
tic->id = nv50_screen_tic_alloc(nv50->screen, tic);
|
||||
|
@@ -293,7 +293,8 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
|
||||
uint64_t addrs[PIPE_MAX_ATTRIBS];
|
||||
uint32_t limits[PIPE_MAX_ATTRIBS];
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
struct nv50_vertex_stateobj *vertex = nv50->vertex;
|
||||
struct nv50_vertex_stateobj dummy = {};
|
||||
struct nv50_vertex_stateobj *vertex = nv50->vertex ? nv50->vertex : &dummy;
|
||||
struct pipe_vertex_buffer *vb;
|
||||
struct nv50_vertex_element *ve;
|
||||
uint32_t mask;
|
||||
@@ -301,6 +302,14 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
|
||||
unsigned i;
|
||||
const unsigned n = MAX2(vertex->num_elements, nv50->state.num_vtxelts);
|
||||
|
||||
/* A vertexid is not generated for inline data uploads. Have to use a
|
||||
* VBO. This check must come after the vertprog has been validated,
|
||||
* otherwise vertexid may be unset.
|
||||
*/
|
||||
assert(nv50->vertprog->translated);
|
||||
if (nv50->vertprog->vp.vertexid)
|
||||
nv50->vbo_push_hint = 0;
|
||||
|
||||
if (unlikely(vertex->need_conversion))
|
||||
nv50->vbo_fifo = ~0;
|
||||
else
|
||||
@@ -317,7 +326,6 @@ nv50_vertex_arrays_validate(struct nv50_context *nv50)
|
||||
if (buf && buf->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
|
||||
buf->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
|
||||
nv50->base.vbo_dirty = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -736,9 +744,8 @@ nva0_draw_stream_output(struct nv50_context *nv50,
|
||||
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BASE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
BEGIN_NV04(push, NVA0_3D(DRAW_TFB_BYTES), 1);
|
||||
nv50_query_pushbuf_submit(push, so->pq, 0x4);
|
||||
PUSH_DATA (push, so->stride);
|
||||
nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, so->pq, 0x4);
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
||||
@@ -761,6 +768,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
{
|
||||
struct nv50_context *nv50 = nv50_context(pipe);
|
||||
struct nouveau_pushbuf *push = nv50->base.pushbuf;
|
||||
bool tex_dirty = false;
|
||||
int i, s;
|
||||
|
||||
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
|
||||
@@ -790,6 +798,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
|
||||
push->kick_notify = nv50_draw_vbo_kick_notify;
|
||||
|
||||
/* TODO: Instead of iterating over all the buffer resources looking for
|
||||
* coherent buffers, keep track of a context-wide count.
|
||||
*/
|
||||
for (s = 0; s < 3 && !nv50->cb_dirty; ++s) {
|
||||
uint32_t valid = nv50->constbuf_valid[s];
|
||||
|
||||
@@ -817,6 +828,21 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
nv50->cb_dirty = false;
|
||||
}
|
||||
|
||||
for (s = 0; s < 3 && !tex_dirty; ++s) {
|
||||
for (i = 0; i < nv50->num_textures[s] && !tex_dirty; ++i) {
|
||||
if (!nv50->textures[s][i] ||
|
||||
nv50->textures[s][i]->texture->target != PIPE_BUFFER)
|
||||
continue;
|
||||
if (nv50->textures[s][i]->texture->flags &
|
||||
PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
tex_dirty = true;
|
||||
}
|
||||
}
|
||||
if (tex_dirty) {
|
||||
BEGIN_NV04(push, NV50_3D(TEX_CACHE_CTL), 1);
|
||||
PUSH_DATA (push, 0x20);
|
||||
}
|
||||
|
||||
if (nv50->vbo_fifo) {
|
||||
nv50_push_vbo(nv50, info);
|
||||
push->kick_notify = nv50_default_kick_notify;
|
||||
@@ -838,10 +864,6 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
nv50->base.vbo_dirty = true;
|
||||
}
|
||||
|
||||
if (!nv50->base.vbo_dirty && nv50->idxbuf.buffer &&
|
||||
nv50->idxbuf.buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
|
||||
nv50->base.vbo_dirty = true;
|
||||
|
||||
if (nv50->base.vbo_dirty) {
|
||||
BEGIN_NV04(push, NV50_3D(VERTEX_ARRAY_FLUSH), 1);
|
||||
PUSH_DATA (push, 0);
|
||||
|
@@ -299,10 +299,10 @@ nve4_p2mf_push_linear(struct nouveau_context *nv,
|
||||
struct nouveau_bo *dst, unsigned offset, unsigned domain,
|
||||
unsigned size, const void *data);
|
||||
void
|
||||
nvc0_cb_push(struct nouveau_context *,
|
||||
struct nouveau_bo *bo, unsigned domain,
|
||||
unsigned base, unsigned size,
|
||||
unsigned offset, unsigned words, const uint32_t *data);
|
||||
nvc0_cb_bo_push(struct nouveau_context *,
|
||||
struct nouveau_bo *bo, unsigned domain,
|
||||
unsigned base, unsigned size,
|
||||
unsigned offset, unsigned words, const uint32_t *data);
|
||||
|
||||
/* nvc0_vbo.c */
|
||||
void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
|
||||
|
@@ -449,7 +449,7 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
|
||||
|
||||
for (i = 0; i < info->numOutputs; ++i) {
|
||||
if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
|
||||
fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
|
||||
fp->hdr[18] |= 0xf << info->out[i].slot[0];
|
||||
}
|
||||
|
||||
fp->fp.early_z = info->prop.fp.earlyFragTests;
|
||||
|
@@ -87,7 +87,7 @@ nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
|
||||
case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
|
||||
return 31;
|
||||
case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
|
||||
return 65536;
|
||||
return 128 * 1024 * 1024;
|
||||
case PIPE_CAP_GLSL_FEATURE_LEVEL:
|
||||
return 410;
|
||||
case PIPE_CAP_MAX_RENDER_TARGETS:
|
||||
|
@@ -831,6 +831,8 @@ nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
|
||||
}
|
||||
nvc0->constbuf_dirty[s] |= 1 << i;
|
||||
|
||||
if (nvc0->constbuf[s][i].u.buf)
|
||||
nv04_resource(nvc0->constbuf[s][i].u.buf)->cb_bindings[s] &= ~(1 << i);
|
||||
pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
|
||||
|
||||
nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? true : false;
|
||||
|
@@ -440,7 +440,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
|
||||
PUSH_DATA (push, (0 << 4) | 1);
|
||||
}
|
||||
nvc0_cb_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
|
||||
nvc0_cb_bo_push(&nvc0->base, bo, NV_VRAM_DOMAIN(&nvc0->screen->base),
|
||||
base, nvc0->state.uniform_buffer_bound[s],
|
||||
0, (size + 3) / 4,
|
||||
nvc0->constbuf[s][0].u.data);
|
||||
@@ -458,6 +458,7 @@ nvc0_constbufs_validate(struct nvc0_context *nvc0)
|
||||
BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
|
||||
|
||||
nvc0->cb_dirty = 1; /* Force cache flush for UBO. */
|
||||
res->cb_bindings[s] |= 1 << i;
|
||||
} else {
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
|
||||
PUSH_DATA (push, (i << 4) | 0);
|
||||
|
@@ -887,6 +887,7 @@ nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
|
||||
|
||||
/* zsa state */
|
||||
IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
|
||||
IMMED_NVC0(push, NVC0_3D(DEPTH_BOUNDS_EN), 0);
|
||||
IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
|
||||
IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);
|
||||
|
||||
|
@@ -226,6 +226,26 @@ nvc0_create_texture_view(struct pipe_context *pipe,
|
||||
return &view->pipe;
|
||||
}
|
||||
|
||||
static void
|
||||
nvc0_update_tic(struct nvc0_context *nvc0, struct nv50_tic_entry *tic,
|
||||
struct nv04_resource *res)
|
||||
{
|
||||
uint64_t address = res->address;
|
||||
if (res->base.target != PIPE_BUFFER)
|
||||
return;
|
||||
address += tic->pipe.u.buf.first_element *
|
||||
util_format_get_blocksize(tic->pipe.format);
|
||||
if (tic->tic[1] == (uint32_t)address &&
|
||||
(tic->tic[2] & 0xff) == address >> 32)
|
||||
return;
|
||||
|
||||
nvc0_screen_tic_unlock(nvc0->screen, tic);
|
||||
tic->id = -1;
|
||||
tic->tic[1] = address;
|
||||
tic->tic[2] &= 0xffffff00;
|
||||
tic->tic[2] |= address >> 32;
|
||||
}
|
||||
|
||||
static bool
|
||||
nvc0_validate_tic(struct nvc0_context *nvc0, int s)
|
||||
{
|
||||
@@ -247,6 +267,7 @@ nvc0_validate_tic(struct nvc0_context *nvc0, int s)
|
||||
continue;
|
||||
}
|
||||
res = nv04_resource(tic->pipe.texture);
|
||||
nvc0_update_tic(nvc0, tic, res);
|
||||
|
||||
if (tic->id < 0) {
|
||||
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
|
||||
@@ -313,6 +334,7 @@ nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
|
||||
continue;
|
||||
}
|
||||
res = nv04_resource(tic->pipe.texture);
|
||||
nvc0_update_tic(nvc0, tic, res);
|
||||
|
||||
if (tic->id < 0) {
|
||||
tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
|
||||
|
@@ -506,11 +506,48 @@ nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
|
||||
}
|
||||
|
||||
/* This happens rather often with DTD9/st. */
|
||||
void
|
||||
static void
|
||||
nvc0_cb_push(struct nouveau_context *nv,
|
||||
struct nouveau_bo *bo, unsigned domain,
|
||||
unsigned base, unsigned size,
|
||||
struct nv04_resource *res,
|
||||
unsigned offset, unsigned words, const uint32_t *data)
|
||||
{
|
||||
struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
|
||||
struct nvc0_constbuf *cb = NULL;
|
||||
int s;
|
||||
|
||||
/* Go through all the constbuf binding points of this buffer and try to
|
||||
* find one which contains the region to be updated.
|
||||
*/
|
||||
for (s = 0; s < 6 && !cb; s++) {
|
||||
uint16_t bindings = res->cb_bindings[s];
|
||||
while (bindings) {
|
||||
int i = ffs(bindings) - 1;
|
||||
uint32_t cb_offset = nvc0->constbuf[s][i].offset;
|
||||
|
||||
bindings &= ~(1 << i);
|
||||
if (cb_offset <= offset &&
|
||||
cb_offset + nvc0->constbuf[s][i].size >= offset + words * 4) {
|
||||
cb = &nvc0->constbuf[s][i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cb) {
|
||||
nvc0_cb_bo_push(nv, res->bo, res->domain,
|
||||
res->offset + cb->offset, cb->size,
|
||||
offset - cb->offset, words, data);
|
||||
} else {
|
||||
nv->push_data(nv, res->bo, res->offset + offset, res->domain,
|
||||
words * 4, data);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nvc0_cb_bo_push(struct nouveau_context *nv,
|
||||
struct nouveau_bo *bo, unsigned domain,
|
||||
unsigned base, unsigned size,
|
||||
unsigned offset, unsigned words, const uint32_t *data)
|
||||
{
|
||||
struct nouveau_pushbuf *push = nv->pushbuf;
|
||||
|
||||
@@ -520,6 +557,9 @@ nvc0_cb_push(struct nouveau_context *nv,
|
||||
assert(!(offset & 3));
|
||||
size = align(size, 0x100);
|
||||
|
||||
assert(offset < size);
|
||||
assert(offset + words * 4 <= size);
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
|
||||
PUSH_DATA (push, size);
|
||||
PUSH_DATAh(push, bo->offset + base);
|
||||
|
@@ -899,6 +899,9 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
|
||||
push->kick_notify = nvc0_draw_vbo_kick_notify;
|
||||
|
||||
/* TODO: Instead of iterating over all the buffer resources looking for
|
||||
* coherent buffers, keep track of a context-wide count.
|
||||
*/
|
||||
for (s = 0; s < 5 && !nvc0->cb_dirty; ++s) {
|
||||
uint32_t valid = nvc0->constbuf_valid[s];
|
||||
|
||||
@@ -924,6 +927,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
|
||||
nvc0->cb_dirty = false;
|
||||
}
|
||||
|
||||
for (s = 0; s < 5; ++s) {
|
||||
for (int i = 0; i < nvc0->num_textures[s]; ++i) {
|
||||
struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
|
||||
struct pipe_resource *res;
|
||||
if (!tic)
|
||||
continue;
|
||||
res = nvc0->textures[s][i]->texture;
|
||||
if (res->target != PIPE_BUFFER ||
|
||||
!(res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
|
||||
continue;
|
||||
|
||||
BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
|
||||
PUSH_DATA (push, (tic->id << 4) | 1);
|
||||
NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
|
||||
}
|
||||
}
|
||||
|
||||
if (nvc0->state.vbo_mode) {
|
||||
nvc0_push_vbo(nvc0, info);
|
||||
push->kick_notify = nvc0_default_kick_notify;
|
||||
|
@@ -1853,7 +1853,7 @@ static void evergreen_emit_vertex_buffers(struct r600_context *rctx,
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
|
||||
radeon_emit(cs, (resource_offset + buffer_index) * 8);
|
||||
radeon_emit(cs, va); /* RESOURCEi_WORD0 */
|
||||
radeon_emit(cs, rbuffer->buf->size - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, rbuffer->b.b.width0 - vb->buffer_offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, /* RESOURCEi_WORD2 */
|
||||
S_030008_ENDIAN_SWAP(r600_endian_swap(32)) |
|
||||
S_030008_STRIDE(vb->stride) |
|
||||
@@ -1923,7 +1923,7 @@ static void evergreen_emit_constant_buffers(struct r600_context *rctx,
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 8, 0) | pkt_flags);
|
||||
radeon_emit(cs, (buffer_id_base + buffer_index) * 8);
|
||||
radeon_emit(cs, va); /* RESOURCEi_WORD0 */
|
||||
radeon_emit(cs, rbuffer->buf->size - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, rbuffer->b.b.width0 - cb->buffer_offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, /* RESOURCEi_WORD2 */
|
||||
S_030008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
|
||||
S_030008_STRIDE(gs_ring_buffer ? 4 : 16) |
|
||||
|
@@ -2029,6 +2029,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
|
||||
fprintf(stderr, "CND:%X ", cf->cond);
|
||||
if (cf->pop_count)
|
||||
fprintf(stderr, "POP:%X ", cf->pop_count);
|
||||
if (cf->end_of_program)
|
||||
fprintf(stderr, "EOP ");
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
@@ -936,28 +936,5 @@ static inline bool r600_can_read_depth(struct r600_texture *rtex)
|
||||
#define V_028A6C_OUTPRIM_TYPE_LINESTRIP 1
|
||||
#define V_028A6C_OUTPRIM_TYPE_TRISTRIP 2
|
||||
|
||||
static inline unsigned r600_conv_prim_to_gs_out(unsigned mode)
|
||||
{
|
||||
static const int prim_conv[] = {
|
||||
V_028A6C_OUTPRIM_TYPE_POINTLIST,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
V_028A6C_OUTPRIM_TYPE_TRISTRIP
|
||||
};
|
||||
assert(mode < Elements(prim_conv));
|
||||
|
||||
return prim_conv[mode];
|
||||
}
|
||||
|
||||
unsigned r600_conv_prim_to_gs_out(unsigned mode);
|
||||
#endif
|
||||
|
@@ -141,7 +141,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
|
||||
bool dump = r600_can_dump_shader(&rctx->screen->b, sel->tokens);
|
||||
unsigned use_sb = !(rctx->screen->b.debug_flags & DBG_NO_SB);
|
||||
unsigned sb_disasm = use_sb || (rctx->screen->b.debug_flags & DBG_SB_DISASM);
|
||||
unsigned export_shader = key.vs.as_es;
|
||||
unsigned export_shader;
|
||||
|
||||
shader->shader.bc.isa = rctx->isa;
|
||||
|
||||
@@ -220,6 +220,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
|
||||
}
|
||||
break;
|
||||
case TGSI_PROCESSOR_VERTEX:
|
||||
export_shader = key.vs.as_es;
|
||||
if (rctx->b.chip_class >= EVERGREEN) {
|
||||
if (export_shader)
|
||||
evergreen_update_es_state(ctx, shader);
|
||||
@@ -1830,8 +1831,6 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
ctx.shader = shader;
|
||||
ctx.native_integers = true;
|
||||
|
||||
shader->vs_as_gs_a = key.vs.as_gs_a;
|
||||
shader->vs_as_es = key.vs.as_es;
|
||||
|
||||
r600_bytecode_init(ctx.bc, rscreen->b.chip_class, rscreen->b.family,
|
||||
rscreen->has_compressed_msaa_texturing);
|
||||
@@ -1844,9 +1843,14 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
shader->processor_type = ctx.type;
|
||||
ctx.bc->type = shader->processor_type;
|
||||
|
||||
ring_outputs = key.vs.as_es || (ctx.type == TGSI_PROCESSOR_GEOMETRY);
|
||||
if (ctx.type == TGSI_PROCESSOR_VERTEX) {
|
||||
shader->vs_as_gs_a = key.vs.as_gs_a;
|
||||
shader->vs_as_es = key.vs.as_es;
|
||||
}
|
||||
|
||||
if (key.vs.as_es) {
|
||||
ring_outputs = shader->vs_as_es || ctx.type == TGSI_PROCESSOR_GEOMETRY;
|
||||
|
||||
if (shader->vs_as_es) {
|
||||
ctx.gs_for_vs = &rctx->gs_shader->current->shader;
|
||||
} else {
|
||||
ctx.gs_for_vs = NULL;
|
||||
@@ -1866,7 +1870,8 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
shader->nr_ps_color_exports = 0;
|
||||
shader->nr_ps_max_color_exports = 0;
|
||||
|
||||
shader->two_side = key.ps.color_two_side;
|
||||
if (ctx.type == TGSI_PROCESSOR_FRAGMENT)
|
||||
shader->two_side = key.ps.color_two_side;
|
||||
|
||||
/* register allocations */
|
||||
/* Values [0,127] correspond to GPR[0..127].
|
||||
@@ -2270,7 +2275,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
|
||||
convert_edgeflag_to_int(&ctx);
|
||||
|
||||
if (ring_outputs) {
|
||||
if (key.vs.as_es)
|
||||
if (shader->vs_as_es)
|
||||
emit_gs_ring_writes(&ctx, FALSE);
|
||||
} else {
|
||||
/* Export output */
|
||||
@@ -6151,10 +6156,10 @@ static int tgsi_cmp(struct r600_shader_ctx *ctx)
|
||||
r = tgsi_make_src_for_op3(ctx, temp_regs[0], i, &alu.src[0], &ctx->src[0]);
|
||||
if (r)
|
||||
return r;
|
||||
r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[1], &ctx->src[2]);
|
||||
r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[1], &ctx->src[2]);
|
||||
if (r)
|
||||
return r;
|
||||
r = tgsi_make_src_for_op3(ctx, temp_regs[2], i, &alu.src[2], &ctx->src[1]);
|
||||
r = tgsi_make_src_for_op3(ctx, temp_regs[1], i, &alu.src[2], &ctx->src[1]);
|
||||
if (r)
|
||||
return r;
|
||||
tgsi_dst(ctx, &inst->Dst[0], i, &alu.dst);
|
||||
|
@@ -1007,7 +1007,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
|
||||
|
||||
/* CMASK. */
|
||||
if (!rctx->dummy_cmask ||
|
||||
rctx->dummy_cmask->buf->size < cmask.size ||
|
||||
rctx->dummy_cmask->b.b.width0 < cmask.size ||
|
||||
rctx->dummy_cmask->buf->alignment % cmask.alignment != 0) {
|
||||
struct pipe_transfer *transfer;
|
||||
void *ptr;
|
||||
@@ -1025,7 +1025,7 @@ static void r600_init_color_surface(struct r600_context *rctx,
|
||||
|
||||
/* FMASK. */
|
||||
if (!rctx->dummy_fmask ||
|
||||
rctx->dummy_fmask->buf->size < fmask.size ||
|
||||
rctx->dummy_fmask->b.b.width0 < fmask.size ||
|
||||
rctx->dummy_fmask->buf->alignment % fmask.alignment != 0) {
|
||||
pipe_resource_reference((struct pipe_resource**)&rctx->dummy_fmask, NULL);
|
||||
rctx->dummy_fmask = r600_buffer_create_helper(rscreen, fmask.size, fmask.alignment);
|
||||
@@ -1694,7 +1694,7 @@ static void r600_emit_vertex_buffers(struct r600_context *rctx, struct r600_atom
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
|
||||
radeon_emit(cs, (320 + buffer_index) * 7);
|
||||
radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
|
||||
radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, /* RESOURCEi_WORD2 */
|
||||
S_038008_ENDIAN_SWAP(r600_endian_swap(32)) |
|
||||
S_038008_STRIDE(vb->stride));
|
||||
@@ -1743,7 +1743,7 @@ static void r600_emit_constant_buffers(struct r600_context *rctx,
|
||||
radeon_emit(cs, PKT3(PKT3_SET_RESOURCE, 7, 0));
|
||||
radeon_emit(cs, (buffer_id_base + buffer_index) * 7);
|
||||
radeon_emit(cs, offset); /* RESOURCEi_WORD0 */
|
||||
radeon_emit(cs, rbuffer->buf->size - offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, rbuffer->b.b.width0 - offset - 1); /* RESOURCEi_WORD1 */
|
||||
radeon_emit(cs, /* RESOURCEi_WORD2 */
|
||||
S_038008_ENDIAN_SWAP(gs_ring_buffer ? ENDIAN_NONE : r600_endian_swap(32)) |
|
||||
S_038008_STRIDE(gs_ring_buffer ? 4 : 16));
|
||||
@@ -2051,7 +2051,7 @@ bool r600_adjust_gprs(struct r600_context *rctx)
|
||||
/* always privilege vs stage so that at worst we have the
|
||||
* pixel stage producing wrong output (not the vertex
|
||||
* stage) */
|
||||
new_num_ps_gprs = max_gprs - ((new_num_vs_gprs - new_num_es_gprs - new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
|
||||
new_num_ps_gprs = max_gprs - ((new_num_vs_gprs + new_num_es_gprs + new_num_gs_gprs) + def_num_clause_temp_gprs * 2);
|
||||
new_num_vs_gprs = num_vs_gprs;
|
||||
new_num_gs_gprs = num_gs_gprs;
|
||||
new_num_es_gprs = num_es_gprs;
|
||||
|
@@ -123,6 +123,31 @@ static unsigned r600_conv_pipe_prim(unsigned prim)
|
||||
return prim_conv[prim];
|
||||
}
|
||||
|
||||
unsigned r600_conv_prim_to_gs_out(unsigned mode)
|
||||
{
|
||||
static const int prim_conv[] = {
|
||||
[PIPE_PRIM_POINTS] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
|
||||
[PIPE_PRIM_LINES] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_LINE_LOOP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_LINE_STRIP] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_TRIANGLES] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_TRIANGLE_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_TRIANGLE_FAN] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_QUADS] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_QUAD_STRIP] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_POLYGON] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_LINES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_LINE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_LINESTRIP,
|
||||
[PIPE_PRIM_TRIANGLES_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY] = V_028A6C_OUTPRIM_TYPE_TRISTRIP,
|
||||
[PIPE_PRIM_PATCHES] = V_028A6C_OUTPRIM_TYPE_POINTLIST,
|
||||
[R600_PRIM_RECTANGLE_LIST] = V_028A6C_OUTPRIM_TYPE_TRISTRIP
|
||||
};
|
||||
assert(mode < Elements(prim_conv));
|
||||
|
||||
return prim_conv[mode];
|
||||
}
|
||||
|
||||
/* common state between evergreen and r600 */
|
||||
|
||||
static void r600_bind_blend_state_internal(struct r600_context *rctx,
|
||||
|
@@ -32,6 +32,7 @@ int bc_decoder::decode_cf(unsigned &i, bc_cf& bc) {
|
||||
int r = 0;
|
||||
uint32_t dw0 = dw[i];
|
||||
uint32_t dw1 = dw[i+1];
|
||||
assert(i+1 <= ndw);
|
||||
|
||||
if ((dw1 >> 29) & 1) { // CF_ALU
|
||||
return decode_cf_alu(i, bc);
|
||||
|
@@ -199,6 +199,9 @@ void bc_finalizer::finalize_if(region_node* r) {
|
||||
cf_node *if_jump = sh.create_cf(CF_OP_JUMP);
|
||||
cf_node *if_pop = sh.create_cf(CF_OP_POP);
|
||||
|
||||
if (!last_cf || last_cf->get_parent_region() == r) {
|
||||
last_cf = if_pop;
|
||||
}
|
||||
if_pop->bc.pop_count = 1;
|
||||
if_pop->jump_after(if_pop);
|
||||
|
||||
|
@@ -95,7 +95,7 @@ int bc_parser::decode_shader() {
|
||||
if ((r = decode_cf(i, eop)))
|
||||
return r;
|
||||
|
||||
} while (!eop || (i >> 1) <= max_cf);
|
||||
} while (!eop || (i >> 1) < max_cf);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -769,6 +769,7 @@ int bc_parser::prepare_ir() {
|
||||
}
|
||||
|
||||
int bc_parser::prepare_loop(cf_node* c) {
|
||||
assert(c->bc.addr-1 < cf_map.size());
|
||||
|
||||
cf_node *end = cf_map[c->bc.addr - 1];
|
||||
assert(end->bc.op == CF_OP_LOOP_END);
|
||||
@@ -788,8 +789,12 @@ int bc_parser::prepare_loop(cf_node* c) {
|
||||
}
|
||||
|
||||
int bc_parser::prepare_if(cf_node* c) {
|
||||
assert(c->bc.addr-1 < cf_map.size());
|
||||
cf_node *c_else = NULL, *end = cf_map[c->bc.addr];
|
||||
|
||||
if (!end)
|
||||
return 0; // not quite sure how this happens, malformed input?
|
||||
|
||||
BCP_DUMP(
|
||||
sblog << "parsing JUMP @" << c->bc.id;
|
||||
sblog << "\n";
|
||||
@@ -815,7 +820,7 @@ int bc_parser::prepare_if(cf_node* c) {
|
||||
if (c_else->parent != c->parent)
|
||||
c_else = NULL;
|
||||
|
||||
if (end->parent != c->parent)
|
||||
if (end && end->parent != c->parent)
|
||||
end = NULL;
|
||||
|
||||
region_node *reg = sh->create_region();
|
||||
|
@@ -236,7 +236,7 @@ void rp_gpr_tracker::unreserve(alu_node* n) {
|
||||
|
||||
for (i = 0; i < nsrc; ++i) {
|
||||
value *v = n->src[i];
|
||||
if (v->is_readonly())
|
||||
if (v->is_readonly() || v->is_undef())
|
||||
continue;
|
||||
if (i == 1 && opt)
|
||||
continue;
|
||||
|
@@ -197,7 +197,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
@@ -206,13 +206,13 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
break;
|
||||
@@ -220,7 +220,7 @@ static void r600_emit_query_begin(struct r600_common_context *ctx, struct r600_q
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
@@ -254,7 +254,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_PRIMITIVES_EMITTED:
|
||||
case PIPE_QUERY_PRIMITIVES_GENERATED:
|
||||
@@ -264,7 +264,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
case PIPE_QUERY_TIME_ELAPSED:
|
||||
va += query->buffer.results_end + query->result_size/2;
|
||||
@@ -273,7 +273,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32UL) & 0xFF));
|
||||
radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
|
||||
radeon_emit(cs, 0);
|
||||
radeon_emit(cs, 0);
|
||||
break;
|
||||
@@ -282,7 +282,7 @@ static void r600_emit_query_end(struct r600_common_context *ctx, struct r600_que
|
||||
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
|
||||
radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
|
||||
radeon_emit(cs, va);
|
||||
radeon_emit(cs, (va >> 32UL) & 0xFF);
|
||||
radeon_emit(cs, (va >> 32) & 0xFFFF);
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
|
@@ -33,14 +33,6 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
|
||||
struct radeon_winsys_cs *cs = ctx->b.rings.gfx.cs;
|
||||
int i;
|
||||
|
||||
/* If the CS is sufficiently large, don't count the space needed
|
||||
* and just flush if there is less than 8096 dwords left. */
|
||||
if (cs->max_dw >= 24 * 1024) {
|
||||
if (cs->cdw > cs->max_dw - 8 * 1024)
|
||||
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* There are two memory usage counters in the winsys for all buffers
|
||||
* that have been added (cs_add_reloc) and two counters in the pipe
|
||||
* driver for those that haven't been added yet.
|
||||
@@ -54,6 +46,15 @@ void si_need_cs_space(struct si_context *ctx, unsigned num_dw,
|
||||
ctx->b.gtt = 0;
|
||||
ctx->b.vram = 0;
|
||||
|
||||
/* If the CS is sufficiently large, don't count the space needed
|
||||
* and just flush if there is less than 8096 dwords left.
|
||||
*/
|
||||
if (cs->max_dw >= 24 * 1024) {
|
||||
if (cs->cdw > cs->max_dw - 8 * 1024)
|
||||
ctx->b.rings.gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* The number of dwords we already used in the CS so far. */
|
||||
num_dw += cs->cdw;
|
||||
|
||||
|
@@ -195,9 +195,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
|
||||
r600_target = radeon_llvm_get_r600_target(triple);
|
||||
sctx->tm = LLVMCreateTargetMachine(r600_target, triple,
|
||||
r600_get_llvm_processor_name(sscreen->b.family),
|
||||
sctx->b.chip_class >= VI ?
|
||||
"+DumpCode" :
|
||||
"+DumpCode,+vgpr-spilling",
|
||||
"+DumpCode,+vgpr-spilling",
|
||||
LLVMCodeGenLevelDefault,
|
||||
LLVMRelocDefault,
|
||||
LLVMCodeModelDefault);
|
||||
|
@@ -2300,7 +2300,7 @@ static void tex_fetch_args(
|
||||
lp_build_const_int32(gallivm,
|
||||
SI_FMASK_TEX_OFFSET), "");
|
||||
fmask_ptr = LLVMGetParam(si_shader_ctx->radeon_bld.main_fn, SI_PARAM_RESOURCE);
|
||||
fmask_ptr = build_indexed_load_const(si_shader_ctx, res_ptr, ind_index);
|
||||
fmask_ptr = build_indexed_load_const(si_shader_ctx, fmask_ptr, ind_index);
|
||||
}
|
||||
} else {
|
||||
res_ptr = si_shader_ctx->resources[sampler_index];
|
||||
|
@@ -190,6 +190,7 @@ struct si_shader_selector {
|
||||
uint64_t inputs_read;
|
||||
uint64_t outputs_written;
|
||||
uint32_t patch_outputs_written;
|
||||
uint32_t ps_colors_written;
|
||||
};
|
||||
|
||||
/* Valid shader configurations:
|
||||
|
@@ -29,6 +29,7 @@
|
||||
#include "sid.h"
|
||||
#include "radeon/r600_cs.h"
|
||||
|
||||
#include "util/u_dual_blend.h"
|
||||
#include "util/u_format.h"
|
||||
#include "util/u_format_s3tc.h"
|
||||
#include "util/u_memory.h"
|
||||
@@ -233,8 +234,10 @@ static unsigned si_pack_float_12p4(float x)
|
||||
* - The COLOR1 format isn't INVALID because of possible dual-source blending,
|
||||
* so COLOR1 is enabled pretty much all the time.
|
||||
* So CB_TARGET_MASK is the only register that can disable COLOR1.
|
||||
*
|
||||
* Another reason is to avoid a hang with dual source blending.
|
||||
*/
|
||||
static void si_update_fb_blend_state(struct si_context *sctx)
|
||||
void si_update_fb_blend_state(struct si_context *sctx)
|
||||
{
|
||||
struct si_pm4_state *pm4;
|
||||
struct si_state_blend *blend = sctx->queued.named.blend;
|
||||
@@ -252,6 +255,16 @@ static void si_update_fb_blend_state(struct si_context *sctx)
|
||||
mask |= 0xf << (4*i);
|
||||
mask &= blend->cb_target_mask;
|
||||
|
||||
/* Avoid a hang that happens when dual source blending is enabled
|
||||
* but there is not enough color outputs. This is undefined behavior,
|
||||
* so disable color writes completely.
|
||||
*
|
||||
* Reproducible with Unigine Heaven 4.0 and drirc missing.
|
||||
*/
|
||||
if (blend->dual_src_blend &&
|
||||
(sctx->ps_shader->ps_colors_written & 0x3) != 0x3)
|
||||
mask = 0;
|
||||
|
||||
si_pm4_set_reg(pm4, R_028238_CB_TARGET_MASK, mask);
|
||||
si_pm4_set_state(sctx, fb_blend, pm4);
|
||||
}
|
||||
@@ -343,6 +356,7 @@ static void *si_create_blend_state_mode(struct pipe_context *ctx,
|
||||
return NULL;
|
||||
|
||||
blend->alpha_to_one = state->alpha_to_one;
|
||||
blend->dual_src_blend = util_blend_state_is_dual(state, 0);
|
||||
|
||||
if (state->logicop_enable) {
|
||||
color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4));
|
||||
@@ -3166,6 +3180,7 @@ static void si_init_config(struct si_context *sctx)
|
||||
unsigned rb_mask = sctx->screen->b.info.si_backend_enabled_mask;
|
||||
unsigned raster_config, raster_config_1;
|
||||
struct si_pm4_state *pm4 = CALLOC_STRUCT(si_pm4_state);
|
||||
int i;
|
||||
|
||||
if (pm4 == NULL)
|
||||
return;
|
||||
@@ -3196,6 +3211,11 @@ static void si_init_config(struct si_context *sctx)
|
||||
|
||||
si_pm4_set_reg(pm4, R_02882C_PA_SU_PRIM_FILTER_CNTL, 0);
|
||||
|
||||
for (i = 0; i < 16; i++) {
|
||||
si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0 + i*8, 0);
|
||||
si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0 + i*8, fui(1.0));
|
||||
}
|
||||
|
||||
switch (sctx->screen->b.family) {
|
||||
case CHIP_TAHITI:
|
||||
case CHIP_PITCAIRN:
|
||||
@@ -3282,8 +3302,6 @@ static void si_init_config(struct si_context *sctx)
|
||||
si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 0xAAAAAAAA);
|
||||
/* PA_SU_HARDWARE_SCREEN_OFFSET must be 0 due to hw bug on SI */
|
||||
si_pm4_set_reg(pm4, R_028234_PA_SU_HARDWARE_SCREEN_OFFSET, 0);
|
||||
si_pm4_set_reg(pm4, R_0282D0_PA_SC_VPORT_ZMIN_0, 0);
|
||||
si_pm4_set_reg(pm4, R_0282D4_PA_SC_VPORT_ZMAX_0, fui(1.0));
|
||||
si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0);
|
||||
si_pm4_set_reg(pm4, R_028BE8_PA_CL_GB_VERT_CLIP_ADJ, fui(1.0));
|
||||
si_pm4_set_reg(pm4, R_028BEC_PA_CL_GB_VERT_DISC_ADJ, fui(1.0));
|
||||
|
@@ -39,6 +39,7 @@ struct si_state_blend {
|
||||
struct si_pm4_state pm4;
|
||||
uint32_t cb_target_mask;
|
||||
bool alpha_to_one;
|
||||
bool dual_src_blend;
|
||||
};
|
||||
|
||||
struct si_state_sample_mask {
|
||||
@@ -251,6 +252,7 @@ void si_shader_change_notify(struct si_context *sctx);
|
||||
/* si_state.c */
|
||||
struct si_shader_selector;
|
||||
|
||||
void si_update_fb_blend_state(struct si_context *sctx);
|
||||
boolean si_is_format_supported(struct pipe_screen *screen,
|
||||
enum pipe_format format,
|
||||
enum pipe_texture_target target,
|
||||
|
@@ -713,6 +713,15 @@ static void *si_create_shader_state(struct pipe_context *ctx,
|
||||
}
|
||||
}
|
||||
break;
|
||||
case PIPE_SHADER_FRAGMENT:
|
||||
for (i = 0; i < sel->info.num_outputs; i++) {
|
||||
unsigned name = sel->info.output_semantic_name[i];
|
||||
unsigned index = sel->info.output_semantic_index[i];
|
||||
|
||||
if (name == TGSI_SEMANTIC_COLOR)
|
||||
sel->ps_colors_written |= 1 << index;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (sscreen->b.debug_flags & DBG_PRECOMPILE)
|
||||
@@ -840,6 +849,7 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
|
||||
}
|
||||
|
||||
sctx->ps_shader = sel;
|
||||
si_update_fb_blend_state(sctx);
|
||||
}
|
||||
|
||||
static void si_delete_shader_selector(struct pipe_context *ctx,
|
||||
|
@@ -314,6 +314,7 @@ qir_get_temp(struct vc4_compile *c)
|
||||
|
||||
reg.file = QFILE_TEMP;
|
||||
reg.index = c->num_temps++;
|
||||
reg.pack = 0;
|
||||
|
||||
if (c->num_temps > c->defs_array_size) {
|
||||
uint32_t old_size = c->defs_array_size;
|
||||
|
@@ -465,7 +465,7 @@ namespace {
|
||||
const bool is_write_only = access_qual == "write_only";
|
||||
const bool is_read_only = access_qual == "read_only";
|
||||
|
||||
typename module::argument::type marg_type;
|
||||
enum module::argument::type marg_type;
|
||||
if (is_image2d && is_read_only) {
|
||||
marg_type = module::argument::image2d_rd;
|
||||
} else if (is_image2d && is_write_only) {
|
||||
|
@@ -684,6 +684,9 @@ static boolean amdgpu_bo_get_handle(struct pb_buffer *buffer,
|
||||
enum amdgpu_bo_handle_type type;
|
||||
int r;
|
||||
|
||||
if ((void*)bo != (void*)buffer)
|
||||
pb_cache_manager_remove_buffer(buffer);
|
||||
|
||||
switch (whandle->type) {
|
||||
case DRM_API_HANDLE_TYPE_SHARED:
|
||||
type = amdgpu_bo_handle_type_gem_flink_name;
|
||||
|
@@ -110,7 +110,7 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
|
||||
struct amdgpu_heap_info vram, gtt;
|
||||
struct drm_amdgpu_info_hw_ip dma = {}, uvd = {}, vce = {};
|
||||
uint32_t vce_version = 0, vce_feature = 0;
|
||||
int r;
|
||||
int r, i, j;
|
||||
|
||||
/* Query hardware and driver information. */
|
||||
r = amdgpu_query_gpu_info(ws->dev, &ws->amdinfo);
|
||||
@@ -248,7 +248,6 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
|
||||
ws->info.vram_size = vram.heap_size;
|
||||
/* convert the shader clock from KHz to MHz */
|
||||
ws->info.max_sclk = ws->amdinfo.max_engine_clk / 1000;
|
||||
ws->info.max_compute_units = 1; /* TODO */
|
||||
ws->info.max_se = ws->amdinfo.num_shader_engines;
|
||||
ws->info.max_sh_per_se = ws->amdinfo.num_shader_arrays_per_engine;
|
||||
ws->info.has_uvd = uvd.available_rings != 0;
|
||||
@@ -263,6 +262,18 @@ static boolean do_winsys_init(struct amdgpu_winsys *ws)
|
||||
ws->info.r600_virtual_address = TRUE;
|
||||
ws->info.r600_has_dma = dma.available_rings != 0;
|
||||
|
||||
/* Guess what the maximum compute unit number is by looking at the mask
|
||||
* of enabled CUs.
|
||||
*/
|
||||
for (i = 0; i < ws->info.max_se; i++)
|
||||
for (j = 0; j < ws->info.max_sh_per_se; j++) {
|
||||
unsigned max = util_last_bit(ws->amdinfo.cu_bitmap[i][j]);
|
||||
|
||||
if (ws->info.max_compute_units < max)
|
||||
ws->info.max_compute_units = max;
|
||||
}
|
||||
ws->info.max_compute_units *= ws->info.max_se * ws->info.max_sh_per_se;
|
||||
|
||||
memcpy(ws->info.si_tile_mode_array, ws->amdinfo.gb_tile_mode,
|
||||
sizeof(ws->amdinfo.gb_tile_mode));
|
||||
ws->info.si_tile_mode_array_valid = TRUE;
|
||||
|
@@ -1126,6 +1126,9 @@ static boolean radeon_winsys_bo_get_handle(struct pb_buffer *buffer,
|
||||
|
||||
memset(&flink, 0, sizeof(flink));
|
||||
|
||||
if ((void*)bo != (void*)buffer)
|
||||
pb_cache_manager_remove_buffer(buffer);
|
||||
|
||||
if (whandle->type == DRM_API_HANDLE_TYPE_SHARED) {
|
||||
if (!bo->flink_name) {
|
||||
flink.handle = bo->handle;
|
||||
|
@@ -97,22 +97,17 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc,
|
||||
{
|
||||
int i;
|
||||
|
||||
csc->buf = MALLOC(ws->ib_max_size);
|
||||
if (!csc->buf)
|
||||
return FALSE;
|
||||
csc->fd = ws->fd;
|
||||
csc->nrelocs = 512;
|
||||
csc->relocs_bo = (struct radeon_bo**)
|
||||
CALLOC(1, csc->nrelocs * sizeof(struct radeon_bo*));
|
||||
if (!csc->relocs_bo) {
|
||||
FREE(csc->buf);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
csc->relocs = (struct drm_radeon_cs_reloc*)
|
||||
CALLOC(1, csc->nrelocs * sizeof(struct drm_radeon_cs_reloc));
|
||||
if (!csc->relocs) {
|
||||
FREE(csc->buf);
|
||||
FREE(csc->relocs_bo);
|
||||
return FALSE;
|
||||
}
|
||||
@@ -165,7 +160,6 @@ static void radeon_destroy_cs_context(struct radeon_cs_context *csc)
|
||||
radeon_cs_context_cleanup(csc);
|
||||
FREE(csc->relocs_bo);
|
||||
FREE(csc->relocs);
|
||||
FREE(csc->buf);
|
||||
}
|
||||
|
||||
|
||||
@@ -206,7 +200,7 @@ radeon_drm_cs_create(struct radeon_winsys_ctx *ctx,
|
||||
cs->cst = &cs->csc2;
|
||||
cs->base.buf = cs->csc->buf;
|
||||
cs->base.ring_type = ring_type;
|
||||
cs->base.max_dw = ws->ib_max_size / 4;
|
||||
cs->base.max_dw = ARRAY_SIZE(cs->csc->buf);
|
||||
|
||||
p_atomic_inc(&ws->num_cs);
|
||||
return &cs->base;
|
||||
|
@@ -30,7 +30,7 @@
|
||||
#include "radeon_drm_bo.h"
|
||||
|
||||
struct radeon_cs_context {
|
||||
uint32_t *buf;
|
||||
uint32_t buf[16 * 1024];
|
||||
|
||||
int fd;
|
||||
struct drm_radeon_cs cs;
|
||||
|
@@ -395,20 +395,16 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
|
||||
}
|
||||
|
||||
ws->info.r600_virtual_address = FALSE;
|
||||
ws->ib_max_size = 64 * 1024;
|
||||
|
||||
if (ws->info.drm_minor >= 13) {
|
||||
uint32_t ib_vm_max_size;
|
||||
|
||||
ws->info.r600_virtual_address = TRUE;
|
||||
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_VA_START, NULL,
|
||||
&ws->va_start))
|
||||
ws->info.r600_virtual_address = FALSE;
|
||||
|
||||
if (radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
|
||||
&ws->ib_max_size))
|
||||
ws->ib_max_size *= 4; /* the kernel returns the size in dwords */
|
||||
else
|
||||
if (!radeon_get_drm_value(ws->fd, RADEON_INFO_IB_VM_MAX_SIZE, NULL,
|
||||
&ib_vm_max_size))
|
||||
ws->info.r600_virtual_address = FALSE;
|
||||
|
||||
radeon_get_drm_value(ws->fd, RADEON_INFO_VA_UNMAP_WORKING, NULL,
|
||||
&ws->va_unmap_working);
|
||||
}
|
||||
|
@@ -73,7 +73,6 @@ struct radeon_drm_winsys {
|
||||
|
||||
enum radeon_generation gen;
|
||||
struct radeon_info info;
|
||||
uint32_t ib_max_size;
|
||||
uint32_t va_start;
|
||||
uint32_t va_unmap_working;
|
||||
uint32_t accel_working2;
|
||||
|
@@ -706,14 +706,30 @@ gbm_dri_bo_import(struct gbm_device *gbm,
|
||||
{
|
||||
struct gbm_import_fd_data *fd_data = buffer;
|
||||
int stride = fd_data->stride, offset = 0;
|
||||
int dri_format;
|
||||
|
||||
switch (fd_data->format) {
|
||||
case GBM_BO_FORMAT_XRGB8888:
|
||||
dri_format = GBM_FORMAT_XRGB8888;
|
||||
break;
|
||||
case GBM_BO_FORMAT_ARGB8888:
|
||||
dri_format = GBM_FORMAT_ARGB8888;
|
||||
break;
|
||||
default:
|
||||
dri_format = fd_data->format;
|
||||
}
|
||||
|
||||
image = dri->image->createImageFromFds(dri->screen,
|
||||
fd_data->width,
|
||||
fd_data->height,
|
||||
fd_data->format,
|
||||
dri_format,
|
||||
&fd_data->fd, 1,
|
||||
&stride, &offset,
|
||||
NULL);
|
||||
if (image == NULL) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
gbm_format = fd_data->format;
|
||||
break;
|
||||
}
|
||||
|
@@ -29,18 +29,7 @@ endif
|
||||
|
||||
intermediates := $(call local-generated-sources-dir)
|
||||
|
||||
sources := \
|
||||
glsl_lexer.cpp \
|
||||
glsl_parser.cpp \
|
||||
glcpp/glcpp-lex.c \
|
||||
glcpp/glcpp-parse.c \
|
||||
nir/nir_builder_opcodes.h \
|
||||
nir/nir_constant_expressions.c \
|
||||
nir/nir_opcodes.c \
|
||||
nir/nir_opcodes.h \
|
||||
nir/nir_opt_algebraic.c
|
||||
|
||||
LOCAL_SRC_FILES := $(filter-out $(sources), $(LOCAL_SRC_FILES))
|
||||
LOCAL_SRC_FILES := $(LOCAL_SRC_FILES)
|
||||
|
||||
LOCAL_C_INCLUDES += \
|
||||
$(intermediates)/glcpp \
|
||||
@@ -51,8 +40,10 @@ LOCAL_C_INCLUDES += \
|
||||
LOCAL_EXPORT_C_INCLUDE_DIRS += \
|
||||
$(intermediates)/nir
|
||||
|
||||
sources := $(addprefix $(intermediates)/, $(sources))
|
||||
LOCAL_GENERATED_SOURCES += $(sources)
|
||||
LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \
|
||||
$(LIBGLCPP_GENERATED_FILES) \
|
||||
$(NIR_GENERATED_FILES) \
|
||||
$(LIBGLSL_GENERATED_CXX_FILES))
|
||||
|
||||
define local-l-or-ll-to-c-or-cpp
|
||||
@mkdir -p $(dir $@)
|
||||
@@ -102,8 +93,7 @@ $(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps)
|
||||
nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py
|
||||
nir_constant_expressions_deps := \
|
||||
$(LOCAL_PATH)/nir/nir_opcodes.py \
|
||||
$(LOCAL_PATH)/nir/nir_constant_expressions.py \
|
||||
$(LOCAL_PATH)/nir/nir_constant_expressions.h
|
||||
$(LOCAL_PATH)/nir/nir_constant_expressions.py
|
||||
|
||||
$(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps)
|
||||
@mkdir -p $(dir $@)
|
||||
|
@@ -140,13 +140,16 @@ libglsl_la_SOURCES = \
|
||||
glsl_parser.cpp \
|
||||
glsl_parser.h \
|
||||
$(LIBGLSL_FILES) \
|
||||
$(NIR_FILES)
|
||||
$(NIR_FILES) \
|
||||
$(NIR_GENERATED_FILES)
|
||||
|
||||
|
||||
libnir_la_SOURCES = \
|
||||
glsl_types.cpp \
|
||||
builtin_types.cpp \
|
||||
glsl_symbol_table.cpp \
|
||||
$(NIR_FILES)
|
||||
$(NIR_FILES) \
|
||||
$(NIR_GENERATED_FILES)
|
||||
|
||||
glsl_compiler_SOURCES = \
|
||||
$(GLSL_COMPILER_CXX_FILES)
|
||||
@@ -197,19 +200,23 @@ am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY))
|
||||
am__v_YACC_0 = @echo " YACC " $@;
|
||||
am__v_YACC_1 =
|
||||
|
||||
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
|
||||
YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS)
|
||||
LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS)
|
||||
|
||||
glsl_parser.cpp glsl_parser.h: glsl_parser.yy
|
||||
$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $<
|
||||
$(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy
|
||||
|
||||
glsl_lexer.cpp: glsl_lexer.ll
|
||||
$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
|
||||
$(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll
|
||||
|
||||
glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y
|
||||
$(AM_V_at)$(MKDIR_P) glcpp
|
||||
$(AM_V_YACC) $(YACC) $(YFLAGS) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $<
|
||||
$(MKDIR_GEN)
|
||||
$(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y
|
||||
|
||||
glcpp/glcpp-lex.c: glcpp/glcpp-lex.l
|
||||
$(AM_V_at)$(MKDIR_P) glcpp
|
||||
$(AM_V_LEX) $(LEX) $(LFLAGS) -o $@ $<
|
||||
$(MKDIR_GEN)
|
||||
$(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l
|
||||
|
||||
# Only the parsers (specifically the header files generated at the same time)
|
||||
# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is
|
||||
@@ -222,11 +229,7 @@ BUILT_SOURCES = \
|
||||
glsl_lexer.cpp \
|
||||
glcpp/glcpp-parse.c \
|
||||
glcpp/glcpp-lex.c \
|
||||
nir/nir_builder_opcodes.h \
|
||||
nir/nir_constant_expressions.c \
|
||||
nir/nir_opcodes.c \
|
||||
nir/nir_opcodes.h \
|
||||
nir/nir_opt_algebraic.c
|
||||
$(NIR_GENERATED_FILES)
|
||||
CLEANFILES = \
|
||||
glcpp/glcpp-parse.h \
|
||||
glsl_parser.h \
|
||||
@@ -239,22 +242,24 @@ dist-hook:
|
||||
$(RM) glcpp/tests/*.out
|
||||
$(RM) glcpp/tests/subtest*/*.out
|
||||
|
||||
nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
|
||||
$(AM_V_at)$(MKDIR_P) nir
|
||||
$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
|
||||
PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS)
|
||||
|
||||
nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py nir/nir_constant_expressions.h
|
||||
$(AM_V_at)$(MKDIR_P) nir
|
||||
$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_constant_expressions.py > $@
|
||||
nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@
|
||||
|
||||
nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@
|
||||
|
||||
nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py
|
||||
$(AM_V_at)$(MKDIR_P) nir
|
||||
$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_h.py > $@
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@
|
||||
|
||||
nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py
|
||||
$(AM_V_at)$(MKDIR_P) nir
|
||||
$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opcodes_c.py > $@
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@
|
||||
|
||||
nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py
|
||||
$(AM_V_at)$(MKDIR_P) nir
|
||||
$(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) $(srcdir)/nir/nir_opt_algebraic.py > $@
|
||||
$(MKDIR_GEN)
|
||||
$(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@
|
||||
|
@@ -69,8 +69,7 @@ NIR_FILES = \
|
||||
nir/nir_vla.h \
|
||||
nir/nir_worklist.c \
|
||||
nir/nir_worklist.h \
|
||||
nir/nir_types.cpp \
|
||||
$(NIR_GENERATED_FILES)
|
||||
nir/nir_types.cpp
|
||||
|
||||
# libglsl
|
||||
|
||||
|
@@ -2339,6 +2339,7 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
|
||||
*/
|
||||
unsigned used_locations = (max_index >= 32)
|
||||
? ~0 : ~((1 << max_index) - 1);
|
||||
unsigned double_storage_locations = 0;
|
||||
|
||||
assert((target_index == MESA_SHADER_VERTEX)
|
||||
|| (target_index == MESA_SHADER_FRAGMENT));
|
||||
@@ -2452,34 +2453,6 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
|
||||
|
||||
const unsigned slots = var->type->count_attribute_slots();
|
||||
|
||||
/* From GL4.5 core spec, section 11.1.1 (Vertex Attributes):
|
||||
*
|
||||
* "A program with more than the value of MAX_VERTEX_ATTRIBS active
|
||||
* attribute variables may fail to link, unless device-dependent
|
||||
* optimizations are able to make the program fit within available
|
||||
* hardware resources. For the purposes of this test, attribute variables
|
||||
* of the type dvec3, dvec4, dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3,
|
||||
* and dmat4 may count as consuming twice as many attributes as equivalent
|
||||
* single-precision types. While these types use the same number of
|
||||
* generic attributes as their single-precision equivalents,
|
||||
* implementations are permitted to consume two single-precision vectors
|
||||
* of internal storage for each three- or four-component double-precision
|
||||
* vector."
|
||||
* Until someone has a good reason in Mesa, enforce that now.
|
||||
*/
|
||||
if (target_index == MESA_SHADER_VERTEX) {
|
||||
total_attribs_size += slots;
|
||||
if (var->type->without_array() == glsl_type::dvec3_type ||
|
||||
var->type->without_array() == glsl_type::dvec4_type ||
|
||||
var->type->without_array() == glsl_type::dmat2x3_type ||
|
||||
var->type->without_array() == glsl_type::dmat2x4_type ||
|
||||
var->type->without_array() == glsl_type::dmat3_type ||
|
||||
var->type->without_array() == glsl_type::dmat3x4_type ||
|
||||
var->type->without_array() == glsl_type::dmat4x3_type ||
|
||||
var->type->without_array() == glsl_type::dmat4_type)
|
||||
total_attribs_size += slots;
|
||||
}
|
||||
|
||||
/* If the variable is not a built-in and has a location statically
|
||||
* assigned in the shader (presumably via a layout qualifier), make sure
|
||||
* that it doesn't collide with other assigned locations. Otherwise,
|
||||
@@ -2594,6 +2567,38 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
|
||||
}
|
||||
|
||||
used_locations |= (use_mask << attr);
|
||||
|
||||
/* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes):
|
||||
*
|
||||
* "A program with more than the value of MAX_VERTEX_ATTRIBS
|
||||
* active attribute variables may fail to link, unless
|
||||
* device-dependent optimizations are able to make the program
|
||||
* fit within available hardware resources. For the purposes
|
||||
* of this test, attribute variables of the type dvec3, dvec4,
|
||||
* dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may
|
||||
* count as consuming twice as many attributes as equivalent
|
||||
* single-precision types. While these types use the same number
|
||||
* of generic attributes as their single-precision equivalents,
|
||||
* implementations are permitted to consume two single-precision
|
||||
* vectors of internal storage for each three- or four-component
|
||||
* double-precision vector."
|
||||
*
|
||||
* Mark this attribute slot as taking up twice as much space
|
||||
* so we can count it properly against limits. According to
|
||||
* issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this
|
||||
* is optional behavior, but it seems preferable.
|
||||
*/
|
||||
const glsl_type *type = var->type->without_array();
|
||||
if (type == glsl_type::dvec3_type ||
|
||||
type == glsl_type::dvec4_type ||
|
||||
type == glsl_type::dmat2x3_type ||
|
||||
type == glsl_type::dmat2x4_type ||
|
||||
type == glsl_type::dmat3_type ||
|
||||
type == glsl_type::dmat3x4_type ||
|
||||
type == glsl_type::dmat4x3_type ||
|
||||
type == glsl_type::dmat4_type) {
|
||||
double_storage_locations |= (use_mask << attr);
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
@@ -2605,6 +2610,9 @@ assign_attribute_or_color_locations(gl_shader_program *prog,
|
||||
}
|
||||
|
||||
if (target_index == MESA_SHADER_VERTEX) {
|
||||
unsigned total_attribs_size =
|
||||
_mesa_bitcount(used_locations & ((1 << max_index) - 1)) +
|
||||
_mesa_bitcount(double_storage_locations);
|
||||
if (total_attribs_size > max_index) {
|
||||
linker_error(prog,
|
||||
"attempt to use %d vertex attribute slots only %d available ",
|
||||
|
@@ -145,7 +145,7 @@ void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx)
|
||||
}
|
||||
}
|
||||
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr)
|
||||
{
|
||||
/* Copying an SSA definition makes no sense whatsoever. */
|
||||
assert(!src->is_ssa);
|
||||
@@ -155,17 +155,18 @@ void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx)
|
||||
dest->reg.base_offset = src->reg.base_offset;
|
||||
dest->reg.reg = src->reg.reg;
|
||||
if (src->reg.indirect) {
|
||||
dest->reg.indirect = ralloc(mem_ctx, nir_src);
|
||||
nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx);
|
||||
dest->reg.indirect = ralloc(instr, nir_src);
|
||||
nir_src_copy(dest->reg.indirect, src->reg.indirect, instr);
|
||||
} else {
|
||||
dest->reg.indirect = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
|
||||
nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
|
||||
nir_alu_instr *instr)
|
||||
{
|
||||
nir_src_copy(&dest->src, &src->src, mem_ctx);
|
||||
nir_src_copy(&dest->src, &src->src, &instr->instr);
|
||||
dest->abs = src->abs;
|
||||
dest->negate = src->negate;
|
||||
for (unsigned i = 0; i < 4; i++)
|
||||
@@ -173,9 +174,10 @@ nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx)
|
||||
}
|
||||
|
||||
void
|
||||
nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, void *mem_ctx)
|
||||
nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
||||
nir_alu_instr *instr)
|
||||
{
|
||||
nir_dest_copy(&dest->dest, &src->dest, mem_ctx);
|
||||
nir_dest_copy(&dest->dest, &src->dest, &instr->instr);
|
||||
dest->write_mask = src->write_mask;
|
||||
dest->saturate = src->saturate;
|
||||
}
|
||||
@@ -1921,14 +1923,14 @@ nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src, void *mem_ctx)
|
||||
nir_foreach_use_safe(def, use_src) {
|
||||
nir_instr *src_parent_instr = use_src->parent_instr;
|
||||
list_del(&use_src->use_link);
|
||||
nir_src_copy(use_src, &new_src, mem_ctx);
|
||||
nir_src_copy(use_src, &new_src, src_parent_instr);
|
||||
src_add_all_uses(use_src, src_parent_instr, NULL);
|
||||
}
|
||||
|
||||
nir_foreach_if_use_safe(def, use_src) {
|
||||
nir_if *src_parent_if = use_src->parent_if;
|
||||
list_del(&use_src->use_link);
|
||||
nir_src_copy(use_src, &new_src, mem_ctx);
|
||||
nir_src_copy(use_src, &new_src, src_parent_if);
|
||||
src_add_all_uses(use_src, NULL, src_parent_if);
|
||||
}
|
||||
}
|
||||
|
@@ -580,8 +580,8 @@ nir_dest_for_reg(nir_register *reg)
|
||||
return dest;
|
||||
}
|
||||
|
||||
void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx);
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, void *mem_ctx);
|
||||
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
|
||||
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
|
||||
|
||||
typedef struct {
|
||||
nir_src src;
|
||||
@@ -630,10 +630,6 @@ typedef struct {
|
||||
unsigned write_mask : 4; /* ignored if dest.is_ssa is true */
|
||||
} nir_alu_dest;
|
||||
|
||||
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, void *mem_ctx);
|
||||
void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
||||
void *mem_ctx);
|
||||
|
||||
typedef enum {
|
||||
nir_type_invalid = 0, /* Not a valid type */
|
||||
nir_type_float,
|
||||
@@ -702,6 +698,11 @@ typedef struct nir_alu_instr {
|
||||
nir_alu_src src[];
|
||||
} nir_alu_instr;
|
||||
|
||||
void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src,
|
||||
nir_alu_instr *instr);
|
||||
void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src,
|
||||
nir_alu_instr *instr);
|
||||
|
||||
/* is this source channel used? */
|
||||
static inline bool
|
||||
nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel)
|
||||
|
@@ -561,7 +561,7 @@ emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src,
|
||||
assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components);
|
||||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
|
||||
nir_src_copy(&mov->src[0].src, &src, mem_ctx);
|
||||
nir_src_copy(&mov->src[0].src, &src, mov);
|
||||
mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg);
|
||||
mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1;
|
||||
|
||||
|
@@ -46,11 +46,11 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
|
||||
for (unsigned i = 0; i < num_components; i++) {
|
||||
nir_alu_instr *chan = nir_alu_instr_create(mem_ctx, chan_op);
|
||||
nir_alu_ssa_dest_init(chan, 1);
|
||||
nir_alu_src_copy(&chan->src[0], &instr->src[0], mem_ctx);
|
||||
nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
|
||||
chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
|
||||
if (nir_op_infos[chan_op].num_inputs > 1) {
|
||||
assert(nir_op_infos[chan_op].num_inputs == 2);
|
||||
nir_alu_src_copy(&chan->src[1], &instr->src[1], mem_ctx);
|
||||
nir_alu_src_copy(&chan->src[1], &instr->src[1], chan);
|
||||
chan->src[1].swizzle[0] = chan->src[1].swizzle[i];
|
||||
}
|
||||
|
||||
@@ -153,7 +153,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, void *mem_ctx)
|
||||
unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ?
|
||||
0 : chan);
|
||||
|
||||
nir_alu_src_copy(&lower->src[i], &instr->src[i], mem_ctx);
|
||||
nir_alu_src_copy(&lower->src[i], &instr->src[i], lower);
|
||||
for (int j = 0; j < 4; j++)
|
||||
lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
|
||||
}
|
||||
|
@@ -91,7 +91,7 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
|
||||
nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
|
||||
nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
|
||||
mul->dest.write_mask = 0x1;
|
||||
nir_src_copy(&mul->src[0].src, &deref_array->indirect, mem_ctx);
|
||||
nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
|
||||
mul->src[1].src.is_ssa = true;
|
||||
mul->src[1].src.ssa = &atomic_counter_size->def;
|
||||
nir_instr_insert_before(&instr->instr, &mul->instr);
|
||||
|
@@ -376,7 +376,7 @@ nir_lower_io_block(nir_block *block, void *void_state)
|
||||
|
||||
store->const_index[0] = offset;
|
||||
|
||||
nir_src_copy(&store->src[0], &intrin->src[0], state->mem_ctx);
|
||||
nir_src_copy(&store->src[0], &intrin->src[0], store);
|
||||
|
||||
if (has_indirect)
|
||||
store->src[1] = indirect;
|
||||
|
@@ -183,8 +183,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
|
||||
nir_alu_instr *add = nir_alu_instr_create(state->shader,
|
||||
nir_op_iadd);
|
||||
add->src[0].src = *src.reg.indirect;
|
||||
nir_src_copy(&add->src[1].src, &deref_array->indirect,
|
||||
state->shader);
|
||||
nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
|
||||
add->dest.write_mask = 1;
|
||||
nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
|
||||
nir_instr_insert_before(instr, &add->instr);
|
||||
@@ -225,7 +224,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
|
||||
nir_src_for_ssa(&mov->dest.dest.ssa),
|
||||
state->shader);
|
||||
} else {
|
||||
nir_dest_copy(&mov->dest.dest, &intrin->dest, state->shader);
|
||||
nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
|
||||
}
|
||||
nir_instr_insert_before(&intrin->instr, &mov->instr);
|
||||
|
||||
@@ -241,7 +240,7 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
|
||||
&intrin->instr, state);
|
||||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
|
||||
nir_src_copy(&mov->src[0].src, &intrin->src[0], state->shader);
|
||||
nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
|
||||
mov->dest.write_mask = (1 << intrin->num_components) - 1;
|
||||
mov->dest.dest.is_ssa = false;
|
||||
mov->dest.dest.reg.reg = reg_src.reg.reg;
|
||||
|
@@ -60,8 +60,8 @@ insert_mov(nir_alu_instr *vec, unsigned start_channel,
|
||||
assert(src_idx < nir_op_infos[vec->op].num_inputs);
|
||||
|
||||
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
|
||||
nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mem_ctx);
|
||||
nir_alu_dest_copy(&mov->dest, &vec->dest, mem_ctx);
|
||||
nir_alu_src_copy(&mov->src[0], &vec->src[src_idx], mov);
|
||||
nir_alu_dest_copy(&mov->dest, &vec->dest, mov);
|
||||
|
||||
mov->dest.write_mask = (1u << start_channel);
|
||||
mov->src[0].swizzle[start_channel] = vec->src[src_idx].swizzle[0];
|
||||
|
@@ -216,8 +216,7 @@ nir_opt_peephole_ffma_block(nir_block *block, void *void_state)
|
||||
for (unsigned j = 0; j < add->dest.dest.ssa.num_components; j++)
|
||||
ffma->src[i].swizzle[j] = mul->src[i].swizzle[swizzle[j]];
|
||||
}
|
||||
nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src],
|
||||
state->mem_ctx);
|
||||
nir_alu_src_copy(&ffma->src[2], &add->src[1 - add_mul_src], ffma);
|
||||
|
||||
assert(add->dest.dest.is_ssa);
|
||||
|
||||
|
@@ -195,7 +195,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
|
||||
|
||||
nir_phi_instr *phi = nir_instr_as_phi(instr);
|
||||
nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel);
|
||||
nir_src_copy(&sel->src[0].src, &if_stmt->condition, state->mem_ctx);
|
||||
nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel);
|
||||
/* Splat the condition to all channels */
|
||||
memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle);
|
||||
|
||||
@@ -205,7 +205,7 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
|
||||
assert(src->src.is_ssa);
|
||||
|
||||
unsigned idx = src->pred == then_block ? 1 : 2;
|
||||
nir_src_copy(&sel->src[idx].src, &src->src, state->mem_ctx);
|
||||
nir_src_copy(&sel->src[idx].src, &src->src, sel);
|
||||
}
|
||||
|
||||
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
|
||||
|
@@ -40,6 +40,7 @@
|
||||
#include "ir_basic_block.h"
|
||||
#include "ir_optimization.h"
|
||||
#include "glsl_types.h"
|
||||
#include "util/hash_table.h"
|
||||
|
||||
namespace {
|
||||
|
||||
@@ -95,7 +96,8 @@ public:
|
||||
killed_all = false;
|
||||
mem_ctx = ralloc_context(0);
|
||||
this->acp = new(mem_ctx) exec_list;
|
||||
this->kills = new(mem_ctx) exec_list;
|
||||
this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
}
|
||||
~ir_constant_propagation_visitor()
|
||||
{
|
||||
@@ -123,7 +125,7 @@ public:
|
||||
* List of kill_entry: The masks of variables whose values were
|
||||
* killed in this block.
|
||||
*/
|
||||
exec_list *kills;
|
||||
hash_table *kills;
|
||||
|
||||
bool progress;
|
||||
|
||||
@@ -263,11 +265,12 @@ ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir)
|
||||
* main() at link time, so they're irrelevant to us.
|
||||
*/
|
||||
exec_list *orig_acp = this->acp;
|
||||
exec_list *orig_kills = this->kills;
|
||||
hash_table *orig_kills = this->kills;
|
||||
bool orig_killed_all = this->killed_all;
|
||||
|
||||
this->acp = new(mem_ctx) exec_list;
|
||||
this->kills = new(mem_ctx) exec_list;
|
||||
this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
this->killed_all = false;
|
||||
|
||||
visit_list_elements(this, &ir->body);
|
||||
@@ -352,11 +355,12 @@ void
|
||||
ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
|
||||
{
|
||||
exec_list *orig_acp = this->acp;
|
||||
exec_list *orig_kills = this->kills;
|
||||
hash_table *orig_kills = this->kills;
|
||||
bool orig_killed_all = this->killed_all;
|
||||
|
||||
this->acp = new(mem_ctx) exec_list;
|
||||
this->kills = new(mem_ctx) exec_list;
|
||||
this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
this->killed_all = false;
|
||||
|
||||
/* Populate the initial acp with a constant of the original */
|
||||
@@ -370,12 +374,14 @@ ir_constant_propagation_visitor::handle_if_block(exec_list *instructions)
|
||||
orig_acp->make_empty();
|
||||
}
|
||||
|
||||
exec_list *new_kills = this->kills;
|
||||
hash_table *new_kills = this->kills;
|
||||
this->kills = orig_kills;
|
||||
this->acp = orig_acp;
|
||||
this->killed_all = this->killed_all || orig_killed_all;
|
||||
|
||||
foreach_in_list(kill_entry, k, new_kills) {
|
||||
hash_entry *htk;
|
||||
hash_table_foreach(new_kills, htk) {
|
||||
kill_entry *k = (kill_entry *) htk->data;
|
||||
kill(k->var, k->write_mask);
|
||||
}
|
||||
}
|
||||
@@ -397,7 +403,7 @@ ir_visitor_status
|
||||
ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
|
||||
{
|
||||
exec_list *orig_acp = this->acp;
|
||||
exec_list *orig_kills = this->kills;
|
||||
hash_table *orig_kills = this->kills;
|
||||
bool orig_killed_all = this->killed_all;
|
||||
|
||||
/* FINISHME: For now, the initial acp for loops is totally empty.
|
||||
@@ -405,7 +411,8 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
|
||||
* cloned minus the killed entries after the first run through.
|
||||
*/
|
||||
this->acp = new(mem_ctx) exec_list;
|
||||
this->kills = new(mem_ctx) exec_list;
|
||||
this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
|
||||
_mesa_key_pointer_equal);
|
||||
this->killed_all = false;
|
||||
|
||||
visit_list_elements(this, &ir->body_instructions);
|
||||
@@ -414,12 +421,14 @@ ir_constant_propagation_visitor::visit_enter(ir_loop *ir)
|
||||
orig_acp->make_empty();
|
||||
}
|
||||
|
||||
exec_list *new_kills = this->kills;
|
||||
hash_table *new_kills = this->kills;
|
||||
this->kills = orig_kills;
|
||||
this->acp = orig_acp;
|
||||
this->killed_all = this->killed_all || orig_killed_all;
|
||||
|
||||
foreach_in_list(kill_entry, k, new_kills) {
|
||||
hash_entry *htk;
|
||||
hash_table_foreach(new_kills, htk) {
|
||||
kill_entry *k = (kill_entry *) htk->data;
|
||||
kill(k->var, k->write_mask);
|
||||
}
|
||||
|
||||
@@ -448,14 +457,15 @@ ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask)
|
||||
/* Add this writemask of the variable to the list of killed
|
||||
* variables in this block.
|
||||
*/
|
||||
foreach_in_list(kill_entry, entry, this->kills) {
|
||||
if (entry->var == var) {
|
||||
entry->write_mask |= write_mask;
|
||||
return;
|
||||
}
|
||||
hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var);
|
||||
if (kill_hash_entry) {
|
||||
kill_entry *entry = (kill_entry *) kill_hash_entry->data;
|
||||
entry->write_mask |= write_mask;
|
||||
return;
|
||||
}
|
||||
/* Not already in the list. Make new entry. */
|
||||
this->kills->push_tail(new(this->mem_ctx) kill_entry(var, write_mask));
|
||||
_mesa_hash_table_insert(this->kills, var,
|
||||
new(this->mem_ctx) kill_entry(var, write_mask));
|
||||
}
|
||||
|
||||
/**
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user